diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000000..49760995dfc --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,66 @@ +name: Ligthweight GithubActions +on: # yamllint disable-line rule:truthy + pull_request: + types: + - labeled + - unlabeled + - synchronize + - reopened + - opened + branches: + - master +jobs: + CheckLabels: + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Labels check + run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DockerHubPush: + needs: CheckLabels + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/docker_images_check/changed_images.json + StyleCheck: + needs: DockerHubPush + runs-on: [self-hosted] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/style_check + - name: Check out repository code + uses: actions/checkout@v2 + - name: Style Check + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + FinishCheck: + needs: [StyleCheck, DockerHubPush, CheckLabels] + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Finish label + run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitmodules b/.gitmodules index f7845d03162..e0404c1269d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -140,7 +140,7 @@ url = https://github.com/ClickHouse-Extras/libc-headers.git [submodule "contrib/replxx"] path = contrib/replxx - url = https://github.com/ClickHouse-Extras/replxx.git + url = https://github.com/AmokHuginnsson/replxx.git [submodule "contrib/avro"] path = contrib/avro url = https://github.com/ClickHouse-Extras/avro.git diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 9bf6ec41255..38867faf5d5 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -177,6 +177,10 @@ ReplxxLineReader::ReplxxLineReader( /// bind C-p/C-n to history-previous/history-next like readline. rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); }); rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); }); + + /// bind C-j to ENTER action. + rx.bind_key(Replxx::KEY::control('J'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }); + /// By default COMPLETE_NEXT/COMPLETE_PREV was binded to C-p/C-n, re-bind /// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but /// it also binded to M-p/M-n). diff --git a/cmake/find/capnp.cmake b/cmake/find/capnp.cmake index b0e4cc419f6..3d0d767bd0c 100644 --- a/cmake/find/capnp.cmake +++ b/cmake/find/capnp.cmake @@ -34,8 +34,6 @@ endif() if (CAPNP_LIBRARIES) set (USE_CAPNP 1) elseif(NOT MISSING_INTERNAL_CAPNP_LIBRARY) - add_subdirectory(contrib/capnproto-cmake) - set (CAPNP_LIBRARIES capnpc) set (USE_CAPNP 1) set (USE_INTERNAL_CAPNP_LIBRARY 1) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 67d7703f31c..676654452de 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,16 +1,5 @@ # Third-party libraries may have substandard code. -# Put all targets defined here and in added subfolders under "contrib/" folder in GUI-based IDEs by default. -# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they will -# appear not in "contrib/" as originally planned here. -get_filename_component (_current_dir_name "${CMAKE_CURRENT_LIST_DIR}" NAME) -if (CMAKE_FOLDER) - set (CMAKE_FOLDER "${CMAKE_FOLDER}/${_current_dir_name}") -else () - set (CMAKE_FOLDER "${_current_dir_name}") -endif () -unset (_current_dir_name) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") @@ -49,6 +38,10 @@ add_subdirectory (replxx-cmake) add_subdirectory (unixodbc-cmake) add_subdirectory (nanodbc-cmake) +if (USE_INTERNAL_CAPNP_LIBRARY AND NOT MISSING_INTERNAL_CAPNP_LIBRARY) + add_subdirectory(capnproto-cmake) +endif () + if (ENABLE_FUZZING) add_subdirectory (libprotobuf-mutator-cmake) endif() @@ -352,3 +345,76 @@ endif() if (USE_S2_GEOMETRY) add_subdirectory(s2geometry-cmake) endif() + +# Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. +# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear +# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, +# instead of controlling it via CMAKE_FOLDER. + +function (ensure_target_rooted_in _target _folder) + # Skip INTERFACE library targets, since FOLDER property is not available for them. + get_target_property (_target_type "${_target}" TYPE) + if (_target_type STREQUAL "INTERFACE_LIBRARY") + return () + endif () + + # Read the original FOLDER property value, if any. + get_target_property (_folder_prop "${_target}" FOLDER) + + # Normalize that value, so we avoid possible repetitions in folder names. + + if (NOT _folder_prop) + set (_folder_prop "") + endif () + + if (CMAKE_FOLDER AND _folder_prop MATCHES "^${CMAKE_FOLDER}/(.*)\$") + set (_folder_prop "${CMAKE_MATCH_1}") + endif () + + if (_folder AND _folder_prop MATCHES "^${_folder}/(.*)\$") + set (_folder_prop "${CMAKE_MATCH_1}") + endif () + + if (_folder) + set (_folder_prop "${_folder}/${_folder_prop}") + endif () + + if (CMAKE_FOLDER) + set (_folder_prop "${CMAKE_FOLDER}/${_folder_prop}") + endif () + + # Set the updated FOLDER property value back. + set_target_properties ("${_target}" PROPERTIES FOLDER "${_folder_prop}") +endfunction () + +function (ensure_own_targets_are_rooted_in _dir _folder) + get_directory_property (_targets DIRECTORY "${_dir}" BUILDSYSTEM_TARGETS) + foreach (_target IN LISTS _targets) + ensure_target_rooted_in ("${_target}" "${_folder}") + endforeach () +endfunction () + +function (ensure_all_targets_are_rooted_in _dir _folder) + ensure_own_targets_are_rooted_in ("${_dir}" "${_folder}") + + get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES) + foreach (_sub_dir IN LISTS _sub_dirs) + ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_folder}") + endforeach () +endfunction () + +function (organize_ide_folders_2_level _dir) + get_filename_component (_dir_name "${_dir}" NAME) + ensure_own_targets_are_rooted_in ("${_dir}" "${_dir_name}") + + # Note, that we respect only first two levels of nesting, we don't want to + # reorganize target folders further within each third-party dir. + + get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES) + foreach (_sub_dir IN LISTS _sub_dirs) + get_filename_component (_sub_dir_name "${_sub_dir}" NAME) + ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_dir_name}/${_sub_dir_name}") + endforeach () +endfunction () + +organize_ide_folders_2_level ("${CMAKE_CURRENT_LIST_DIR}") diff --git a/contrib/capnproto b/contrib/capnproto index a00ccd91b37..c8189ec3c27 160000 --- a/contrib/capnproto +++ b/contrib/capnproto @@ -1 +1 @@ -Subproject commit a00ccd91b3746ef2ab51d40fe3265829949d1ace +Subproject commit c8189ec3c27dacbd4a3288e682473010e377f593 diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt index 9f6e076cc7d..05446355535 100644 --- a/contrib/capnproto-cmake/CMakeLists.txt +++ b/contrib/capnproto-cmake/CMakeLists.txt @@ -45,6 +45,7 @@ set (CAPNP_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/serialize-packed.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema.c++" + "${CAPNPROTO_SOURCE_DIR}/capnp/stream.capnp.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema-loader.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/dynamic.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/stringify.c++" @@ -63,6 +64,7 @@ set (CAPNPC_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/lexer.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/grammar.capnp.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/parser.c++" + "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/generics.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/node-translator.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/compiler.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema-parser.c++" diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index ac67f2563a3..2ec6dbff1a1 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -47,6 +47,7 @@ set(SRCS ) add_library(cxx ${SRCS}) +set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") target_include_directories(cxx SYSTEM BEFORE PUBLIC $) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 0bb5d663633..425111d9b26 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -22,6 +22,7 @@ set(SRCS ) add_library(cxxabi ${SRCS}) +set_target_properties(cxxabi PROPERTIES FOLDER "contrib/libcxxabi-cmake") # Third party library may have substandard code. target_compile_options(cxxabi PRIVATE -w) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 1a9f5e50abd..155853a0bca 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -39,6 +39,7 @@ set(LIBUNWIND_SOURCES ${LIBUNWIND_ASM_SOURCES}) add_library(unwind ${LIBUNWIND_SOURCES}) +set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY) diff --git a/contrib/replxx b/contrib/replxx index f97765df14f..b0c266c2d8a 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit f97765df14f4a6236d69b8f14b53ef2051ebd95a +Subproject commit b0c266c2d8a835784181e17292b421848c78c6b8 diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 11da590f901..00944cbfc00 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -1,16 +1,22 @@ # docker build -t clickhouse/kerberized-hadoop . FROM sequenceiq/hadoop-docker:2.7.0 -RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo + +RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo + +# https://community.letsencrypt.org/t/rhel-centos-6-openssl-client-compatibility-after-dst-root-ca-x3-expiration/161032/81 +RUN sed -i s/xMDkzMDE0MDExNVow/0MDkzMDE4MTQwM1ow/ /etc/pki/tls/certs/ca-bundle.crt + RUN yum clean all && \ rpm --rebuilddb && \ yum -y update && \ yum -y install yum-plugin-ovl && \ yum --quiet -y install krb5-workstation.x86_64 + RUN cd /tmp && \ - curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ + curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ tar xzf commons-daemon-1.0.15-src.tar.gz && \ cd commons-daemon-1.0.15-src/src/native/unix && \ ./configure && \ diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index f4675d35819..77cbd910922 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -37,7 +37,9 @@ RUN set -x \ || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \ && dpkg -i "${PKG_VERSION}.deb" -CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ +ENV CCACHE_DIR=/test_output/ccache + +CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \ && ninja re2_st clickhouse_grpc_protos \ && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 424bfe71b15..0118e6df764 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -1,5 +1,7 @@ #!/bin/bash +# yaml check is not the best one + cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv ./check-style -n |& tee /test_output/style_output.txt ./check-typos |& tee /test_output/typos_output.txt diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index c4305d229cf..eaf7a96ce42 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -128,6 +128,8 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--history_file` — Path to a file containing command history. - `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). - `--hardware-utilization` — Print hardware utilization information in progress bar. +- `--print-profile-events` – Print `ProfileEvents` packets. +- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled). diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 11459ff4a4a..98d3044f007 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -25,9 +25,6 @@ #endif #include #include -#include -#include -#include #include #include #include "Common/MemoryTracker.h" @@ -35,13 +32,11 @@ #include #include #include -#include #include #include #include #include -#include #include #include @@ -51,9 +46,6 @@ #include #include #include -#include -#include -#include #include @@ -86,7 +78,6 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int TOO_DEEP_RECURSION; extern const int NETWORK_ERROR; - extern const int UNRECOGNIZED_ARGUMENTS; extern const int AUTHENTICATION_FAILED; } @@ -993,7 +984,7 @@ void Client::printHelpMessage(const OptionsDescription & options_description) } -void Client::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +void Client::addOptions(OptionsDescription & options_description) { /// Main commandline options related to client functionality and all parameters from Settings. options_description.main_description->add_options() @@ -1050,14 +1041,6 @@ void Client::addAndCheckOptions(OptionsDescription & options_description, po::va ( "types", po::value(), "types" ); - - cmd_settings.addProgramOptions(options_description.main_description.value()); - /// Parse main commandline options. - po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run(); - auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - if (unrecognized_options.size() > 1) - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[1]); - po::store(parsed, options); } @@ -1235,16 +1218,16 @@ int mainEntryClickHouseClient(int argc, char ** argv) client.init(argc, argv); return client.run(); } - catch (const boost::program_options::error & e) - { - std::cerr << "Bad arguments: " << e.what() << std::endl; - return 1; - } catch (const DB::Exception & e) { std::cerr << DB::getExceptionMessage(e, false) << std::endl; return 1; } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; diff --git a/programs/client/Client.h b/programs/client/Client.h index 43f6deae0b5..2def74ef3fc 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -24,7 +24,7 @@ protected: String getName() const override { return "client"; } void printHelpMessage(const OptionsDescription & options_description) override; - void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override; + void addOptions(OptionsDescription & options_description) override; void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector & external_tables_arguments) override; void processConfig() override; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 766123339af..4d8977c9e62 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1,8 +1,6 @@ #include "LocalServer.h" #include -#include -#include #include #include #include @@ -10,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -20,17 +17,12 @@ #include #include #include -#include -#include #include -#include -#include #include #include #include #include #include -#include #include #include #include @@ -42,9 +34,7 @@ #include #include #include -#include #include -#include #include #include @@ -512,19 +502,16 @@ void LocalServer::processConfig() format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")); insert_format = "Values"; + /// Setting value from cmd arg overrides one from config if (global_context->getSettingsRef().max_insert_block_size.changed) insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; else insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); - /// Skip networking - /// Sets external authenticators config (LDAP, Kerberos). global_context->setExternalAuthenticatorsConfig(config()); - global_context->initializeBackgroundExecutors(); - setupUsers(); /// Limit on total number of concurrently executing queries. @@ -660,7 +647,7 @@ void LocalServer::printHelpMessage(const OptionsDescription & options_descriptio } -void LocalServer::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +void LocalServer::addOptions(OptionsDescription & options_description) { options_description.main_description->add_options() ("database,d", po::value(), "database") @@ -678,11 +665,8 @@ void LocalServer::addAndCheckOptions(OptionsDescription & options_description, p ("logger.level", po::value(), "Log level") ("no-system-tables", "do not attach system tables (better startup time)") + ("path", po::value(), "Storage path") ; - - cmd_settings.addProgramOptions(options_description.main_description.value()); - po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run(); - po::store(parsed, options); } @@ -737,6 +721,17 @@ int mainEntryClickHouseLocal(int argc, char ** argv) app.init(argc, argv); return app.run(); } + catch (const DB::Exception & e) + { + std::cerr << DB::getExceptionMessage(e, false) << std::endl; + auto code = DB::getCurrentExceptionCode(); + return code ? code : 1; + } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index e14e18adced..ce0df06c86a 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -40,7 +40,7 @@ protected: String getQueryTextPrefix() override; void printHelpMessage(const OptionsDescription & options_description) override; - void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override; + void addOptions(OptionsDescription & options_description) override; void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector &) override; void processConfig() override; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b33e80eb57a..647ed4891f8 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -922,7 +922,7 @@ if (ThreadFuzzer::instance().isEffective()) /// Initialize background executors after we load default_profile config. /// This is needed to load proper values of background_pool_size etc. - global_context->initializeBackgroundExecutors(); + global_context->initializeBackgroundExecutorsIfNeeded(); if (settings.async_insert_threads) global_context->setAsynchronousInsertQueue(std::make_shared( diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7bcff4f5ef7..a7c8fdb4641 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -71,6 +71,7 @@ namespace ErrorCodes extern const int UNEXPECTED_PACKET_FROM_SERVER; extern const int INVALID_USAGE_OF_INPUT; extern const int CANNOT_SET_SIGNAL_HANDLER; + extern const int UNRECOGNIZED_ARGUMENTS; } } @@ -266,7 +267,7 @@ void ClientBase::onLogData(Block & block) { initLogsOutputStream(); progress_indication.clearProgressOutput(); - logs_out_stream->write(block); + logs_out_stream->writeLogs(block); logs_out_stream->flush(); } @@ -668,39 +669,61 @@ void ClientBase::onEndOfStream() void ClientBase::onProfileEvents(Block & block) { const auto rows = block.rows(); - if (rows == 0 || !progress_indication.print_hardware_utilization) + if (rows == 0) return; - const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); - const auto & names = typeid_cast(*block.getByName("name").column); - const auto & host_names = typeid_cast(*block.getByName("host_name").column); - const auto & array_values = typeid_cast(*block.getByName("value").column).getData(); - const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds); - const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds); - - HostToThreadTimesMap thread_times; - for (size_t i = 0; i < rows; ++i) + if (progress_indication.print_hardware_utilization) { - auto thread_id = array_thread_id[i]; - auto host_name = host_names.getDataAt(i).toString(); - if (thread_id != 0) - progress_indication.addThreadIdToList(host_name, thread_id); - auto event_name = names.getDataAt(i); - auto value = array_values[i]; - if (event_name == user_time_name) + const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); + const auto & names = typeid_cast(*block.getByName("name").column); + const auto & host_names = typeid_cast(*block.getByName("host_name").column); + const auto & array_values = typeid_cast(*block.getByName("value").column).getData(); + + const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds); + const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds); + + HostToThreadTimesMap thread_times; + for (size_t i = 0; i < rows; ++i) { - thread_times[host_name][thread_id].user_ms = value; + auto thread_id = array_thread_id[i]; + auto host_name = host_names.getDataAt(i).toString(); + if (thread_id != 0) + progress_indication.addThreadIdToList(host_name, thread_id); + auto event_name = names.getDataAt(i); + auto value = array_values[i]; + if (event_name == user_time_name) + { + thread_times[host_name][thread_id].user_ms = value; + } + else if (event_name == system_time_name) + { + thread_times[host_name][thread_id].system_ms = value; + } + else if (event_name == MemoryTracker::USAGE_EVENT_NAME) + { + thread_times[host_name][thread_id].memory_usage = value; + } } - else if (event_name == system_time_name) + progress_indication.updateThreadEventData(thread_times); + } + + if (profile_events.print) + { + if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms) { - thread_times[host_name][thread_id].system_ms = value; + initLogsOutputStream(); + progress_indication.clearProgressOutput(); + logs_out_stream->writeProfileEvents(block); + logs_out_stream->flush(); + + profile_events.watch.restart(); + profile_events.last_block = {}; } - else if (event_name == MemoryTracker::USAGE_EVENT_NAME) + else { - thread_times[host_name][thread_id].memory_usage = value; + profile_events.last_block = block; } } - progress_indication.updateThreadEventData(thread_times); } @@ -1023,6 +1046,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin processed_rows = 0; written_first_block = false; progress_indication.resetProgress(); + profile_events.watch.restart(); { /// Temporarily apply query settings to context. @@ -1091,6 +1115,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } } + /// Always print last block (if it was not printed already) + if (profile_events.last_block) + { + initLogsOutputStream(); + progress_indication.clearProgressOutput(); + logs_out_stream->writeProfileEvents(profile_events.last_block); + logs_out_stream->flush(); + } + if (is_interactive) { std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; @@ -1505,6 +1538,26 @@ void ClientBase::readArguments(int argc, char ** argv, Arguments & common_argume } } +void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +{ + cmd_settings.addProgramOptions(options_description.main_description.value()); + /// Parse main commandline options. + auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered(); + po::parsed_options parsed = parser.run(); + + /// Check unrecognized options without positional options. + auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); + if (!unrecognized_options.empty()) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); + + /// Check positional options (options after ' -- ', ex: clickhouse-client -- ). + unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + if (unrecognized_options.size() > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional options are not supported."); + + po::store(parsed, options); +} + void ClientBase::init(int argc, char ** argv) { @@ -1561,9 +1614,12 @@ void ClientBase::init(int argc, char ** argv) ("ignore-error", "do not stop processing in multiquery mode") ("stacktrace", "print stack traces of exceptions") ("hardware-utilization", "print hardware utilization information in progress bar") + ("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets") + ("profile-events-delay-ms", po::value()->default_value(profile_events.delay_ms), "Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet)") ; - addAndCheckOptions(options_description, options, common_arguments); + addOptions(options_description); + parseAndCheckOptions(options_description, options, common_arguments); po::notify(options); if (options.count("version") || options.count("V")) @@ -1611,6 +1667,10 @@ void ClientBase::init(int argc, char ** argv) config().setBool("vertical", true); if (options.count("stacktrace")) config().setBool("stacktrace", true); + if (options.count("print-profile-events")) + config().setBool("print-profile-events", true); + if (options.count("profile-events-delay-ms")) + config().setInt("profile-events-delay-ms", options["profile-events-delay-ms"].as()); if (options.count("progress")) config().setBool("progress", true); if (options.count("echo")) @@ -1631,6 +1691,8 @@ void ClientBase::init(int argc, char ** argv) progress_indication.print_hardware_utilization = true; query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); + profile_events.print = options.count("print-profile-events"); + profile_events.delay_ms = options["profile-events-delay-ms"].as(); processOptions(options_description, options, external_tables_arguments); argsToConfig(common_arguments, config(), 100); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index fce706d7cf8..2e6cd1f66a6 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -91,7 +92,7 @@ protected: }; virtual void printHelpMessage(const OptionsDescription & options_description) = 0; - virtual void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) = 0; + virtual void addOptions(OptionsDescription & options_description) = 0; virtual void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector & external_tables_arguments) = 0; @@ -132,6 +133,7 @@ private: void resetOutput(); void outputQueryInfo(bool echo_query_); void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector & external_tables_arguments); + void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments); protected: bool is_interactive = false; /// Use either interactive line editing interface or batch mode. @@ -217,6 +219,16 @@ protected: QueryFuzzer fuzzer; int query_fuzzer_runs = 0; + struct + { + bool print = false; + /// UINT64_MAX -- print only last + UInt64 delay_ms = 0; + Stopwatch watch; + /// For printing only last (delay_ms == 0). + Block last_block; + } profile_events; + QueryProcessingStage::Enum query_processing_stage; }; diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index a530e48ee35..e1c1481c5b4 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -109,29 +109,29 @@ void highlight(const String & query, std::vector & colors {TokenType::OpeningSquareBracket, Replxx::Color::BROWN}, {TokenType::ClosingSquareBracket, Replxx::Color::BROWN}, {TokenType::DoubleColon, Replxx::Color::BROWN}, - {TokenType::OpeningCurlyBrace, Replxx::Color::INTENSE}, - {TokenType::ClosingCurlyBrace, Replxx::Color::INTENSE}, + {TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Comma, Replxx::Color::INTENSE}, - {TokenType::Semicolon, Replxx::Color::INTENSE}, - {TokenType::Dot, Replxx::Color::INTENSE}, - {TokenType::Asterisk, Replxx::Color::INTENSE}, + {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::HereDoc, Replxx::Color::CYAN}, - {TokenType::Plus, Replxx::Color::INTENSE}, - {TokenType::Minus, Replxx::Color::INTENSE}, - {TokenType::Slash, Replxx::Color::INTENSE}, - {TokenType::Percent, Replxx::Color::INTENSE}, - {TokenType::Arrow, Replxx::Color::INTENSE}, - {TokenType::QuestionMark, Replxx::Color::INTENSE}, - {TokenType::Colon, Replxx::Color::INTENSE}, - {TokenType::Equals, Replxx::Color::INTENSE}, - {TokenType::NotEquals, Replxx::Color::INTENSE}, - {TokenType::Less, Replxx::Color::INTENSE}, - {TokenType::Greater, Replxx::Color::INTENSE}, - {TokenType::LessOrEquals, Replxx::Color::INTENSE}, - {TokenType::GreaterOrEquals, Replxx::Color::INTENSE}, - {TokenType::Concatenation, Replxx::Color::INTENSE}, - {TokenType::At, Replxx::Color::INTENSE}, + {TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::DoubleAt, Replxx::Color::MAGENTA}, {TokenType::EndOfStream, Replxx::Color::DEFAULT}, @@ -142,7 +142,7 @@ void highlight(const String & query, std::vector & colors {TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED}, {TokenType::ErrorSinglePipeMark, Replxx::Color::RED}, {TokenType::ErrorWrongNumber, Replxx::Color::RED}, - { TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED }}; + {TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}}; const Replxx::Color unknown_token_color = Replxx::Color::RED; diff --git a/src/Client/InternalTextLogs.cpp b/src/Client/InternalTextLogs.cpp index 65592fee670..430ba6daf0a 100644 --- a/src/Client/InternalTextLogs.cpp +++ b/src/Client/InternalTextLogs.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -13,7 +14,7 @@ namespace DB { -void InternalTextLogs::write(const Block & block) +void InternalTextLogs::writeLogs(const Block & block) { const auto & array_event_time = typeid_cast(*block.getByName("event_time").column).getData(); const auto & array_microseconds = typeid_cast(*block.getByName("event_time_microseconds").column).getData(); @@ -97,4 +98,69 @@ void InternalTextLogs::write(const Block & block) } } +void InternalTextLogs::writeProfileEvents(const Block & block) +{ + const auto & column_host_name = typeid_cast(*block.getByName("host_name").column); + const auto & array_current_time = typeid_cast(*block.getByName("current_time").column).getData(); + const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); + const auto & array_type = typeid_cast(*block.getByName("type").column).getData(); + const auto & column_name = typeid_cast(*block.getByName("name").column); + const auto & array_value = typeid_cast(*block.getByName("value").column).getData(); + + for (size_t row_num = 0; row_num < block.rows(); ++row_num) + { + /// host_name + auto host_name = column_host_name.getDataAt(row_num); + if (host_name.size) + { + writeCString("[", wb); + if (color) + writeString(setColor(StringRefHash()(host_name)), wb); + writeString(host_name, wb); + if (color) + writeCString(resetColor(), wb); + writeCString("] ", wb); + } + + /// current_time + auto current_time = array_current_time[row_num]; + writeDateTimeText<'.', ':'>(current_time, wb); + + /// thread_id + UInt64 thread_id = array_thread_id[row_num]; + writeCString(" [ ", wb); + if (color) + writeString(setColor(intHash64(thread_id)), wb); + writeIntText(thread_id, wb); + if (color) + writeCString(resetColor(), wb); + writeCString(" ] ", wb); + + /// name + auto name = column_name.getDataAt(row_num); + if (color) + writeString(setColor(StringRefHash()(name)), wb); + DB::writeString(name, wb); + if (color) + writeCString(resetColor(), wb); + writeCString(": ", wb); + + /// value + UInt64 value = array_value[row_num]; + writeIntText(value, wb); + + //// type + Int8 type = array_type[row_num]; + writeCString(" (", wb); + if (color) + writeString(setColor(intHash64(type)), wb); + writeString(toString(ProfileEvents::TypeEnum->castToName(type)), wb); + if (color) + writeCString(resetColor(), wb); + writeCString(")", wb); + + writeChar('\n', wb); + } +} + } diff --git a/src/Client/InternalTextLogs.h b/src/Client/InternalTextLogs.h index a8b119b0f69..0690211fd24 100644 --- a/src/Client/InternalTextLogs.h +++ b/src/Client/InternalTextLogs.h @@ -6,16 +6,37 @@ namespace DB { -/// Prints internal server logs -/// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock() +/// Prints internal server logs or profile events with colored output (if requested). /// NOTE: IRowOutputFormat does not suite well for this case class InternalTextLogs { public: InternalTextLogs(WriteBuffer & buf_out, bool color_) : wb(buf_out), color(color_) {} - - void write(const Block & block); + /// Print internal server logs + /// + /// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock(): + /// - event_time + /// - event_time_microseconds + /// - host_name + /// - query_id + /// - thread_id + /// - priority + /// - source + /// - text + void writeLogs(const Block & block); + /// Print profile events. + /// + /// Block: + /// - host_name + /// - current_time + /// - thread_id + /// - type + /// - name + /// - value + /// + /// See also TCPHandler::sendProfileEvents() for block columns. + void writeProfileEvents(const Block & block); void flush() { diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 4455ba3b9ad..abce1d27a45 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -267,19 +267,19 @@ bool LocalConnection::poll(size_t) } } - if (state->is_finished && send_progress && !state->sent_progress) - { - state->sent_progress = true; - next_packet_type = Protocol::Server::Progress; - return true; - } - if (state->is_finished) { finishQuery(); return true; } + if (send_progress && !state->sent_progress) + { + state->sent_progress = true; + next_packet_type = Protocol::Server::Progress; + return true; + } + if (state->block && state->block.value()) { next_packet_type = Protocol::Server::Data; @@ -293,7 +293,8 @@ bool LocalConnection::pollImpl() { Block block; auto next_read = pullBlock(block); - if (block) + + if (block && !state->io.null_format) { state->block.emplace(block); } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index b6d9b65c28b..1aff1460125 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -589,6 +589,8 @@ M(619, POSTGRESQL_REPLICATION_INTERNAL_ERROR) \ M(620, QUERY_NOT_ALLOWED) \ M(621, CANNOT_NORMALIZE_STRING) \ + M(622, CANNOT_PARSE_CAPN_PROTO_SCHEMA) \ + M(623, CAPN_PROTO_BAD_CAST) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 89214ad496e..f9fe8c97a14 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -118,7 +118,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p return absolute_path.starts_with(absolute_prefix_path); } -bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) +bool fileOrSymlinkPathStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) { /// Differs from pathStartsWith in how `path` is normalized before comparison. /// Make `path` absolute if it was relative and put it into normalized form: remove @@ -140,13 +140,14 @@ bool pathStartsWith(const String & path, const String & prefix_path) return pathStartsWith(filesystem_path, filesystem_prefix_path); } -bool symlinkStartsWith(const String & path, const String & prefix_path) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path) { auto filesystem_path = std::filesystem::path(path); auto filesystem_prefix_path = std::filesystem::path(prefix_path); - return symlinkStartsWith(filesystem_path, filesystem_prefix_path); + return fileOrSymlinkPathStartsWith(filesystem_path, filesystem_prefix_path); } + } diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index de5802cde6d..fc3a4f15573 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -35,8 +35,9 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p /// Returns true if path starts with prefix path bool pathStartsWith(const String & path, const String & prefix_path); -/// Returns true if symlink starts with prefix path -bool symlinkStartsWith(const String & path, const String & prefix_path); +/// Same as pathStartsWith, but without canonization, i.e. allowed to check symlinks. +/// (Path is made absolute and normalized.) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path); } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a5767955045..f91bf684c85 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -625,7 +625,8 @@ class IColumn; M(Bool, cross_to_inner_join_rewrite, true, "Use inner join instead of comma/cross join if possible", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ - + \ + M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0)\ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 8e588b62326..f5497588891 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -116,4 +116,9 @@ IMPLEMENT_SETTING_ENUM(ShortCircuitFunctionEvaluation, ErrorCodes::BAD_ARGUMENTS {{"enable", ShortCircuitFunctionEvaluation::ENABLE}, {"force_enable", ShortCircuitFunctionEvaluation::FORCE_ENABLE}, {"disable", ShortCircuitFunctionEvaluation::DISABLE}}) + +IMPLEMENT_SETTING_ENUM(EnumComparingMode, ErrorCodes::BAD_ARGUMENTS, + {{"by_names", FormatSettings::EnumComparingMode::BY_NAMES}, + {"by_values", FormatSettings::EnumComparingMode::BY_VALUES}, + {"by_names_case_insensitive", FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 33c5a6d8645..4bdb3c83ea5 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -168,4 +168,6 @@ enum class ShortCircuitFunctionEvaluation DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) +DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparingMode) + } diff --git a/src/DataTypes/EnumValues.cpp b/src/DataTypes/EnumValues.cpp index 6df899ba9a2..ab5ea0ca249 100644 --- a/src/DataTypes/EnumValues.cpp +++ b/src/DataTypes/EnumValues.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -82,6 +83,24 @@ Names EnumValues::getAllRegisteredNames() const return result; } +template +std::unordered_set EnumValues::getSetOfAllNames(bool to_lower) const +{ + std::unordered_set result; + for (const auto & value : values) + result.insert(to_lower ? boost::algorithm::to_lower_copy(value.first) : value.first); + return result; +} + +template +std::unordered_set EnumValues::getSetOfAllValues() const +{ + std::unordered_set result; + for (const auto & value : values) + result.insert(value.second); + return result; +} + template class EnumValues; template class EnumValues; diff --git a/src/DataTypes/EnumValues.h b/src/DataTypes/EnumValues.h index 1e5e4f55ea7..17c292c5551 100644 --- a/src/DataTypes/EnumValues.h +++ b/src/DataTypes/EnumValues.h @@ -80,6 +80,10 @@ public: } Names getAllRegisteredNames() const override; + + std::unordered_set getSetOfAllNames(bool to_lower) const; + + std::unordered_set getSetOfAllValues() const; }; } diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 8d1122b1194..dce2ce94b93 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -100,7 +100,7 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block) config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, configuration.command_termination_timeout }; auto shell_command = ShellCommand::execute(config); return shell_command; - }, configuration.max_command_execution_time * 1000); + }, configuration.max_command_execution_time * 10000); if (!result) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, diff --git a/src/Dictionaries/FileDictionarySource.cpp b/src/Dictionaries/FileDictionarySource.cpp index 8c1f099f344..7fd2dbf80f1 100644 --- a/src/Dictionaries/FileDictionarySource.cpp +++ b/src/Dictionaries/FileDictionarySource.cpp @@ -31,7 +31,7 @@ FileDictionarySource::FileDictionarySource( , context(context_) { auto user_files_path = context->getUserFilesPath(); - if (created_from_ddl && !pathStartsWith(filepath, user_files_path)) + if (created_from_ddl && !fileOrSymlinkPathStartsWith(filepath, user_files_path)) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path); } diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index f117cfb179e..42683fb884c 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -41,13 +41,7 @@ LibraryDictionarySource::LibraryDictionarySource( , context(Context::createCopy(context_)) { auto dictionaries_lib_path = context->getDictionariesLibPath(); - bool path_checked = false; - if (fs::is_symlink(path)) - path_checked = symlinkStartsWith(path, dictionaries_lib_path); - else - path_checked = pathStartsWith(path, dictionaries_lib_path); - - if (created_from_ddl && !path_checked) + if (created_from_ddl && !fileOrSymlinkPathStartsWith(path, dictionaries_lib_path)) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, dictionaries_lib_path); if (!fs::exists(path)) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp new file mode 100644 index 00000000000..ecfa5df8351 --- /dev/null +++ b/src/Formats/CapnProtoUtils.cpp @@ -0,0 +1,432 @@ +#include + +#if USE_CAPNP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_CAPN_PROTO_SCHEMA; + extern const int THERE_IS_NO_COLUMN; + extern const int BAD_TYPE_OF_FIELD; + extern const int CAPN_PROTO_BAD_CAST; + extern const int FILE_DOESNT_EXIST; + extern const int UNKNOWN_EXCEPTION; + extern const int INCORRECT_DATA; +} + +capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info) +{ + capnp::ParsedSchema schema; + try + { + int fd; + KJ_SYSCALL(fd = open(schema_info.schemaDirectory().data(), O_RDONLY)); + auto schema_dir = kj::newDiskDirectory(kj::OsFileHandle(fd)); + schema = impl.parseFromDirectory(*schema_dir, kj::Path::parse(schema_info.schemaPath()), {}); + } + catch (const kj::Exception & e) + { + /// That's not good to determine the type of error by its description, but + /// this is the only way to do it here, because kj doesn't specify the type of error. + auto description = std::string_view(e.getDescription().cStr()); + if (description.find("No such file or directory") != String::npos || description.find("no such directory") != String::npos) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); + + if (description.find("Parse error") != String::npos) + throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "Cannot parse CapnProto schema {}:{}", schema_info.schemaPath(), e.getLine()); + + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception while parsing CapnProro schema: {}, schema dir and file: {}, {}", description, schema_info.schemaDirectory(), schema_info.schemaPath()); + } + + auto message_maybe = schema.findNested(schema_info.messageName()); + auto * message_schema = kj::_::readMaybe(message_maybe); + if (!message_schema) + throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "CapnProto schema doesn't contain message with name {}", schema_info.messageName()); + return message_schema->asStruct(); +} + +bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode) +{ + if (mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE) + return boost::algorithm::to_lower_copy(first) == boost::algorithm::to_lower_copy(second); + return first == second; +} + +static const std::map capnp_simple_type_names = +{ + {capnp::schema::Type::Which::BOOL, "Bool"}, + {capnp::schema::Type::Which::VOID, "Void"}, + {capnp::schema::Type::Which::INT8, "Int8"}, + {capnp::schema::Type::Which::INT16, "Int16"}, + {capnp::schema::Type::Which::INT32, "Int32"}, + {capnp::schema::Type::Which::INT64, "Int64"}, + {capnp::schema::Type::Which::UINT8, "UInt8"}, + {capnp::schema::Type::Which::UINT16, "UInt16"}, + {capnp::schema::Type::Which::UINT32, "UInt32"}, + {capnp::schema::Type::Which::UINT64, "UInt64"}, + {capnp::schema::Type::Which::FLOAT32, "Float32"}, + {capnp::schema::Type::Which::FLOAT64, "Float64"}, + {capnp::schema::Type::Which::TEXT, "Text"}, + {capnp::schema::Type::Which::DATA, "Data"}, + {capnp::schema::Type::Which::INTERFACE, "Interface"}, + {capnp::schema::Type::Which::ANY_POINTER, "AnyPointer"}, +}; + +static bool checkIfStructContainsUnnamedUnion(const capnp::StructSchema & struct_schema) +{ + return struct_schema.getFields().size() != struct_schema.getNonUnionFields().size(); +} + +static bool checkIfStructIsNamedUnion(const capnp::StructSchema & struct_schema) +{ + return struct_schema.getFields().size() == struct_schema.getUnionFields().size(); +} + +/// Get full name of type for better exception messages. +static String getCapnProtoFullTypeName(const capnp::Type & type) +{ + switch (type.which()) + { + case capnp::schema::Type::Which::STRUCT: + { + auto struct_schema = type.asStruct(); + + auto non_union_fields = struct_schema.getNonUnionFields(); + std::vector non_union_field_names; + for (auto nested_field : non_union_fields) + non_union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + + auto union_fields = struct_schema.getUnionFields(); + std::vector union_field_names; + for (auto nested_field : union_fields) + union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + + String union_name = "Union(" + boost::algorithm::join(union_field_names, ", ") + ")"; + /// Check if the struct is a named union. + if (non_union_field_names.empty()) + return union_name; + + String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); + /// Check if the struct contains unnamed union. + if (!union_field_names.empty()) + type_name += ", " + union_name; + type_name += ")"; + return type_name; + } + case capnp::schema::Type::Which::LIST: + return "List(" + getCapnProtoFullTypeName(type.asList().getElementType()) + ")"; + case capnp::schema::Type::Which::ENUM: + { + auto enum_schema = type.asEnum(); + String enum_name = "Enum("; + auto enumerants = enum_schema.getEnumerants(); + for (size_t i = 0; i != enumerants.size(); ++i) + { + enum_name += String(enumerants[i].getProto().getName()) + " = " + std::to_string(enumerants[i].getOrdinal()); + if (i + 1 != enumerants.size()) + enum_name += ", "; + } + enum_name += ")"; + return enum_name; + } + default: + auto it = capnp_simple_type_names.find(type.which()); + if (it == capnp_simple_type_names.end()) + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unknown CapnProto type"); + return it->second; + } +} + +template +static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_type, FormatSettings::EnumComparingMode mode, UInt64 max_value, String & error_message) +{ + if (!capnp_type.isEnum()) + return false; + + auto enum_schema = capnp_type.asEnum(); + bool to_lower = mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE; + const auto * enum_type = assert_cast *>(column_type.get()); + const auto & enum_values = dynamic_cast &>(*enum_type); + + auto enumerants = enum_schema.getEnumerants(); + if (mode == FormatSettings::EnumComparingMode::BY_VALUES) + { + /// In CapnProto Enum fields are numbered sequentially starting from zero. + if (enumerants.size() > max_value) + { + error_message += "Enum from CapnProto schema contains values that is out of range for Clickhouse Enum"; + return false; + } + + auto values = enum_values.getSetOfAllValues(); + std::unordered_set capn_enum_values; + for (auto enumerant : enumerants) + capn_enum_values.insert(Type(enumerant.getOrdinal())); + auto result = values == capn_enum_values; + if (!result) + error_message += "The set of values in Enum from CapnProto schema is different from the set of values in ClickHouse Enum"; + return result; + } + + auto names = enum_values.getSetOfAllNames(to_lower); + std::unordered_set capn_enum_names; + + for (auto enumerant : enumerants) + { + String name = enumerant.getProto().getName(); + capn_enum_names.insert(to_lower ? boost::algorithm::to_lower_copy(name) : name); + } + + auto result = names == capn_enum_names; + if (!result) + error_message += "The set of names in Enum from CapnProto schema is different from the set of names in ClickHouse Enum"; + return result; +} + +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message); + +static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isStruct()) + return false; + + /// Check that struct is a named union of type VOID and one arbitrary type. + auto struct_schema = capnp_type.asStruct(); + if (!checkIfStructIsNamedUnion(struct_schema)) + return false; + + auto union_fields = struct_schema.getUnionFields(); + if (union_fields.size() != 2) + return false; + + auto first = union_fields[0]; + auto second = union_fields[1]; + + auto nested_type = assert_cast(data_type.get())->getNestedType(); + if (first.getType().isVoid()) + return checkCapnProtoType(second.getType(), nested_type, mode, error_message); + if (second.getType().isVoid()) + return checkCapnProtoType(first.getType(), nested_type, mode, error_message); + return false; +} + +static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isStruct()) + return false; + auto struct_schema = capnp_type.asStruct(); + + if (checkIfStructIsNamedUnion(struct_schema)) + return false; + + if (checkIfStructContainsUnnamedUnion(struct_schema)) + { + error_message += "CapnProto struct contains unnamed union"; + return false; + } + + const auto * tuple_data_type = assert_cast(data_type.get()); + auto nested_types = tuple_data_type->getElements(); + if (nested_types.size() != struct_schema.getFields().size()) + { + error_message += "Tuple and Struct types have different sizes"; + return false; + } + + if (!tuple_data_type->haveExplicitNames()) + { + error_message += "Only named Tuple can be converted to CapnProto Struct"; + return false; + } + for (const auto & name : tuple_data_type->getElementNames()) + { + KJ_IF_MAYBE(field, struct_schema.findFieldByName(name)) + { + if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message)) + return false; + } + else + { + error_message += "CapnProto struct doesn't contain a field with name " + name; + return false; + } + } + + return true; +} + +static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isList()) + return false; + auto list_schema = capnp_type.asList(); + auto nested_type = assert_cast(data_type.get())->getNestedType(); + return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message); +} + +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + switch (data_type->getTypeId()) + { + case TypeIndex::UInt8: + return capnp_type.isBool() || capnp_type.isUInt8(); + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + return capnp_type.isUInt16(); + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + return capnp_type.isUInt32(); + case TypeIndex::UInt64: + return capnp_type.isUInt64(); + case TypeIndex::Int8: + return capnp_type.isInt8(); + case TypeIndex::Int16: + return capnp_type.isInt16(); + case TypeIndex::Date32: [[fallthrough]]; + case TypeIndex::Int32: + return capnp_type.isInt32(); + case TypeIndex::DateTime64: [[fallthrough]]; + case TypeIndex::Int64: + return capnp_type.isInt64(); + case TypeIndex::Float32: + return capnp_type.isFloat32(); + case TypeIndex::Float64: + return capnp_type.isFloat64(); + case TypeIndex::Enum8: + return checkEnums(capnp_type, data_type, mode, INT8_MAX, error_message); + case TypeIndex::Enum16: + return checkEnums(capnp_type, data_type, mode, INT16_MAX, error_message); + case TypeIndex::Tuple: + return checkTupleType(capnp_type, data_type, mode, error_message); + case TypeIndex::Nullable: + { + auto result = checkNullableType(capnp_type, data_type, mode, error_message); + if (!result) + error_message += "Nullable can be represented only as a named union of type Void and nested type"; + return result; + } + case TypeIndex::Array: + return checkArrayType(capnp_type, data_type, mode, error_message); + case TypeIndex::LowCardinality: + return checkCapnProtoType(capnp_type, assert_cast(data_type.get())->getDictionaryType(), mode, error_message); + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + return capnp_type.isText() || capnp_type.isData(); + default: + return false; + } +} + +static std::pair splitFieldName(const String & name) +{ + const auto * begin = name.data(); + const auto * end = name.data() + name.size(); + const auto * it = find_first_symbols<'_', '.'>(begin, end); + String first = String(begin, it); + String second = it == end ? "" : String(it + 1, end); + return {first, second}; +} + +capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, struct_reader.getSchema().findFieldByName(field_name)) + { + capnp::DynamicValue::Reader field_reader; + try + { + field_reader = struct_reader.get(*field); + } + catch (const kj::Exception & e) + { + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot extract field value from struct by provided schema, error: {} Perhaps the data was generated by another schema", String(e.getDescription().cStr())); + } + + if (nested_name.empty()) + return field_reader; + + if (field_reader.getType() != capnp::DynamicValue::STRUCT) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getReaderByColumnName(field_reader.as(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); +} + +std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, struct_builder.getSchema().findFieldByName(field_name)) + { + if (nested_name.empty()) + return {struct_builder, *field}; + + auto field_builder = struct_builder.get(*field); + if (field_builder.getType() != capnp::DynamicValue::STRUCT) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getStructBuilderAndFieldByColumnName(field_builder.as(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); +} + +static capnp::StructSchema::Field getFieldByName(const capnp::StructSchema & schema, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, schema.findFieldByName(field_name)) + { + if (nested_name.empty()) + return *field; + + if (!field->getType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getFieldByName(field->getType().asStruct(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto schema doesn't contain field with name {}", field_name); +} + +void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode) +{ + /// Firstly check that struct doesn't contain unnamed union, because we don't support it. + if (checkIfStructContainsUnnamedUnion(schema)) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Schema contains unnamed union that is not supported"); + auto names_and_types = header.getNamesAndTypesList(); + String additional_error_message; + for (auto & [name, type] : names_and_types) + { + auto field = getFieldByName(schema, name); + if (!checkCapnProtoType(field.getType(), type, mode, additional_error_message)) + { + auto e = Exception( + ErrorCodes::CAPN_PROTO_BAD_CAST, + "Cannot convert ClickHouse type {} to CapnProto type {}", + type->getName(), + getCapnProtoFullTypeName(field.getType())); + if (!additional_error_message.empty()) + e.addMessage(additional_error_message); + throw std::move(e); + } + } +} + +} + +#endif diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h new file mode 100644 index 00000000000..93ca0a5e616 --- /dev/null +++ b/src/Formats/CapnProtoUtils.h @@ -0,0 +1,43 @@ +#pragma once + +#include "config_formats.h" +#if USE_CAPNP + +#include +#include +#include +#include +#include + +namespace DB +{ +// Wrapper for classes that could throw in destructor +// https://github.com/capnproto/capnproto/issues/553 +template +struct DestructorCatcher +{ + T impl; + template + DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} + ~DestructorCatcher() noexcept try { } catch (...) { return; } +}; + +class CapnProtoSchemaParser : public DestructorCatcher +{ +public: + CapnProtoSchemaParser() {} + + capnp::StructSchema getMessageSchema(const FormatSchemaInfo & schema_info); +}; + +bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode); + +std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name); + +capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name); + +void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode); + +} + +#endif diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index ec7fa0a9e80..d2dc18a03fd 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -111,6 +111,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index d5784219c6a..ee3824081bb 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -42,7 +42,7 @@ FormatSettings getFormatSettings(ContextPtr context); template FormatSettings getFormatSettings(ContextPtr context, const T & settings); -/** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format. +/** Allows to create an IInputFormat or IOutputFormat by the name of the format. * Note: format and compression are independent things. */ class FormatFactory final : private boost::noncopyable diff --git a/src/Formats/FormatSchemaInfo.cpp b/src/Formats/FormatSchemaInfo.cpp index 2605c0bdf04..24c8dfc14f2 100644 --- a/src/Formats/FormatSchemaInfo.cpp +++ b/src/Formats/FormatSchemaInfo.cpp @@ -99,4 +99,10 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String & } } +FormatSchemaInfo::FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message) + : FormatSchemaInfo( + settings.schema.format_schema, format, require_message, settings.schema.is_server, settings.schema.format_schema_path) +{ +} + } diff --git a/src/Formats/FormatSchemaInfo.h b/src/Formats/FormatSchemaInfo.h index cb041e02116..8c430218af0 100644 --- a/src/Formats/FormatSchemaInfo.h +++ b/src/Formats/FormatSchemaInfo.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -11,6 +12,7 @@ class FormatSchemaInfo { public: FormatSchemaInfo(const String & format_schema, const String & format, bool require_message, bool is_server, const std::string & format_schema_path); + FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message); /// Returns path to the schema file. const String & schemaPath() const { return schema_path; } diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8c894c77e82..403ccbc6763 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -183,6 +183,20 @@ struct FormatSettings { bool import_nested = false; } orc; + + /// For capnProto format we should determine how to + /// compare ClickHouse Enum and Enum from schema. + enum class EnumComparingMode + { + BY_NAMES, // Names in enums should be the same, values can be different. + BY_NAMES_CASE_INSENSITIVE, // Case-insensitive name comparison. + BY_VALUES, // Values should be the same, names can be different. + }; + + struct + { + EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES; + } capn_proto; }; } diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index 2d8fdc160f5..9ef248dc904 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -56,7 +56,6 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_, } } -// also resets few vars from IBlockInputStream (I didn't want to propagate resetParser upthere) void NativeReader::resetParser() { istr_concrete = nullptr; diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 3e4c0366e8a..acaf6f28492 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -67,6 +67,7 @@ void registerOutputFormatNull(FormatFactory & factory); void registerOutputFormatMySQLWire(FormatFactory & factory); void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); +void registerOutputFormatCapnProto(FormatFactory & factory); /// Input only formats. @@ -139,6 +140,7 @@ void registerFormats() registerOutputFormatMySQLWire(factory); registerOutputFormatMarkdown(factory); registerOutputFormatPostgreSQLWire(factory); + registerOutputFormatCapnProto(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 9ba7e822bfe..46ebab7f5fe 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -96,6 +96,9 @@ struct ReplaceRegexpImpl re2_st::StringPiece matches[max_captures]; size_t start_pos = 0; + bool is_first_match = true; + bool is_start_pos_added_one = false; + while (start_pos < static_cast(input.length())) { /// If no more replacements possible for current string @@ -103,6 +106,9 @@ struct ReplaceRegexpImpl if (searcher.Match(input, start_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) { + if (is_start_pos_added_one) + start_pos -= 1; + const auto & match = matches[0]; size_t bytes_to_copy = (match.data() - input.data()) - start_pos; @@ -112,6 +118,13 @@ struct ReplaceRegexpImpl res_offset += bytes_to_copy; start_pos += bytes_to_copy + match.length(); + /// To avoid infinite loop. + if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) + { + start_pos += 1; + is_start_pos_added_one = true; + } + /// Do substitution instructions for (const auto & it : instructions) { @@ -129,8 +142,9 @@ struct ReplaceRegexpImpl } } - if (replace_one || match.length() == 0) /// Stop after match of zero length, to avoid infinite loop. + if (replace_one || (!is_first_match && match.length() == 0)) can_finish_current_string = true; + is_first_match = false; } else can_finish_current_string = true; diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index 14e12fb310c..c3ae6516e0f 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -18,10 +18,10 @@ namespace ErrorCodes template -class FunctionReadWkt : public IFunction +class FunctionReadWKT : public IFunction { public: - explicit FunctionReadWkt() = default; + explicit FunctionReadWKT() = default; static constexpr const char * name = NameHolder::name; @@ -72,36 +72,36 @@ public: static FunctionPtr create(ContextPtr) { - return std::make_shared>(); + return std::make_shared>(); } }; -struct ReadWktPointNameHolder +struct ReadWKTPointNameHolder { - static constexpr const char * name = "readWktPoint"; + static constexpr const char * name = "readWKTPoint"; }; -struct ReadWktRingNameHolder +struct ReadWKTRingNameHolder { - static constexpr const char * name = "readWktRing"; + static constexpr const char * name = "readWKTRing"; }; -struct ReadWktPolygonNameHolder +struct ReadWKTPolygonNameHolder { - static constexpr const char * name = "readWktPolygon"; + static constexpr const char * name = "readWKTPolygon"; }; -struct ReadWktMultiPolygonNameHolder +struct ReadWKTMultiPolygonNameHolder { - static constexpr const char * name = "readWktMultiPolygon"; + static constexpr const char * name = "readWKTMultiPolygon"; }; -void registerFunctionReadWkt(FunctionFactory & factory) +void registerFunctionReadWKT(FunctionFactory & factory) { - factory.registerFunction, ReadWktPointNameHolder>>(); - factory.registerFunction, ReadWktRingNameHolder>>(); - factory.registerFunction, ReadWktPolygonNameHolder>>(); - factory.registerFunction, ReadWktMultiPolygonNameHolder>>(); + factory.registerFunction, ReadWKTPointNameHolder>>(); + factory.registerFunction, ReadWKTRingNameHolder>>(); + factory.registerFunction, ReadWKTPolygonNameHolder>>(); + factory.registerFunction, ReadWKTMultiPolygonNameHolder>>(); } } diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index a0ae38f6b85..fd55c9cc20a 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -23,7 +23,7 @@ void registerFunctionGeohashEncode(FunctionFactory & factory); void registerFunctionGeohashDecode(FunctionFactory & factory); void registerFunctionGeohashesInBox(FunctionFactory & factory); void registerFunctionWkt(FunctionFactory & factory); -void registerFunctionReadWkt(FunctionFactory & factory); +void registerFunctionReadWKT(FunctionFactory & factory); void registerFunctionSvg(FunctionFactory & factory); #if USE_H3 @@ -79,7 +79,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionGeohashDecode(factory); registerFunctionGeohashesInBox(factory); registerFunctionWkt(factory); - registerFunctionReadWkt(factory); + registerFunctionReadWKT(factory); registerFunctionSvg(factory); #if USE_H3 diff --git a/src/Functions/svg.cpp b/src/Functions/svg.cpp index 4495e668add..b3a89c0393c 100644 --- a/src/Functions/svg.cpp +++ b/src/Functions/svg.cpp @@ -102,6 +102,7 @@ public: void registerFunctionSvg(FunctionFactory & factory) { factory.registerFunction(); + factory.registerAlias("SVG", "svg"); } } diff --git a/src/IO/Progress.h b/src/IO/Progress.h index 7118de844f2..c00eea98ff4 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -121,7 +121,7 @@ struct Progress /** Callback to track the progress of the query. - * Used in IBlockInputStream and Context. + * Used in QueryPipeline and Context. * The function takes the number of rows in the last block, the number of bytes in the last block. * Note that the callback can be called from different threads. */ diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 6d6bf61834b..3c53769e128 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -43,8 +43,6 @@ namespace ErrorCodes extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; } -class IBlockOutputStream; - /** Different data structures that can be used for aggregation * For efficiency, the aggregation data itself is put into the pool. * Data and pool ownership (states of aggregate functions) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 78137b957df..d61bf2fdc31 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2982,8 +2982,12 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrasync_insert_queue = ptr; } -void Context::initializeBackgroundExecutors() +void Context::initializeBackgroundExecutorsIfNeeded() { + auto lock = getLock(); + if (is_background_executors_initialized) + return; + const size_t max_merges_and_mutations = getSettingsRef().background_pool_size * getSettingsRef().background_merges_mutations_concurrency_ratio; /// With this executor we can execute more tasks than threads we have @@ -3030,6 +3034,8 @@ void Context::initializeBackgroundExecutors() LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}", getSettingsRef().background_common_pool_size, getSettingsRef().background_common_pool_size); + + is_background_executors_initialized = true; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 041ee34dc5a..223c87be545 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -293,6 +293,8 @@ private: /// A flag, used to distinguish between user query and internal query to a database engine (MaterializedPostgreSQL). bool is_internal_query = false; + /// Has initializeBackgroundExecutors() method been executed? + bool is_background_executors_initialized = false; public: @@ -636,13 +638,13 @@ public: const Settings & getSettingsRef() const { return settings; } void setProgressCallback(ProgressCallback callback); - /// Used in InterpreterSelectQuery to pass it to the IBlockInputStream. + /// Used in executeQuery() to pass it to the QueryPipeline. ProgressCallback getProgressCallback() const; void setFileProgressCallback(FileProgressCallback && callback) { file_progress_callback = callback; } FileProgressCallback getFileProgressCallback() const { return file_progress_callback; } - /** Set in executeQuery and InterpreterSelectQuery. Then it is used in IBlockInputStream, + /** Set in executeQuery and InterpreterSelectQuery. Then it is used in QueryPipeline, * to update and monitor information about the total number of resources spent for the query. */ void setProcessListElement(QueryStatus * elem); @@ -869,7 +871,7 @@ public: void setReadTaskCallback(ReadTaskCallback && callback); /// Background executors related methods - void initializeBackgroundExecutors(); + void initializeBackgroundExecutorsIfNeeded(); MergeMutateBackgroundExecutorPtr getMergeMutateExecutor() const; OrdinaryBackgroundExecutorPtr getMovesExecutor() const; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5dbde2344d3..35c8c32c65b 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2303,14 +2303,12 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input { const Settings & settings = context->getSettingsRef(); - const auto & query = getSelectQuery(); auto finish_sorting_step = std::make_unique( query_plan.getCurrentDataStream(), input_sorting_info->order_key_prefix_descr, output_order_descr, settings.max_block_size, - limit, - query.hasFiltration()); + limit); query_plan.addStep(std::move(finish_sorting_step)); } diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 4386c294316..472efc109fb 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -11,6 +11,11 @@ namespace ProfileEvents { +std::shared_ptr TypeEnum = std::make_shared(DB::DataTypeEnum8::Values{ + { "increment", static_cast(INCREMENT)}, + { "gauge", static_cast(GAUGE)}, +}); + /// Put implementation here to avoid extra linking dependencies for clickhouse_common_io void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only) { diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 699c997d904..8a92eadec79 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include @@ -9,4 +10,13 @@ namespace ProfileEvents /// Dumps profile events to columns Map(String, UInt64) void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true); +/// This is for ProfileEvents packets. +enum Type : int8_t +{ + INCREMENT = 1, + GAUGE = 2, +}; + +extern std::shared_ptr TypeEnum; + } diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 6a3db48e835..65b2065b2ad 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -161,7 +161,7 @@ void loadMetadata(ContextMutablePtr context, const String & default_database_nam bool create_default_db_if_not_exists = !default_database_name.empty(); bool metadata_dir_for_default_db_already_exists = databases.count(default_database_name); if (create_default_db_if_not_exists && !metadata_dir_for_default_db_already_exists) - databases.emplace(default_database_name, path + "/" + escapeForFileName(default_database_name)); + databases.emplace(default_database_name, std::filesystem::path(path) / escapeForFileName(default_database_name)); TablesLoader::Databases loaded_databases; for (const auto & [name, db_path] : databases) diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index b647338d6fb..ba4dcee6f70 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -72,7 +72,8 @@ public: InputPort & getPort(PortKind kind) { return *std::next(inputs.begin(), kind); } - /// Compatible to IBlockOutputStream interface + /// Compatibility with old interface. + /// TODO: separate formats and processors. void write(const Block & block); diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index fd4b2870bea..4d000bb1f35 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -1,7 +1,6 @@ #include "CapnProtoRowInputFormat.h" #if USE_CAPNP -#include #include #include #include @@ -9,198 +8,40 @@ #include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include namespace DB { namespace ErrorCodes { - extern const int BAD_TYPE_OF_FIELD; - extern const int THERE_IS_NO_COLUMN; extern const int LOGICAL_ERROR; } -static CapnProtoRowInputFormat::NestedField split(const Block & header, size_t i) -{ - CapnProtoRowInputFormat::NestedField field = {{}, i}; - - // Remove leading dot in field definition, e.g. ".msg" -> "msg" - String name(header.safeGetByPosition(i).name); - if (!name.empty() && name[0] == '.') - name.erase(0, 1); - - splitInto<'.', '_'>(field.tokens, name); - return field; -} - - -static Field convertNodeToField(const capnp::DynamicValue::Reader & value) -{ - switch (value.getType()) - { - case capnp::DynamicValue::UNKNOWN: - throw Exception("Unknown field type", ErrorCodes::BAD_TYPE_OF_FIELD); - case capnp::DynamicValue::VOID: - return Field(); - case capnp::DynamicValue::BOOL: - return value.as() ? 1u : 0u; - case capnp::DynamicValue::INT: - return value.as(); - case capnp::DynamicValue::UINT: - return value.as(); - case capnp::DynamicValue::FLOAT: - return value.as(); - case capnp::DynamicValue::TEXT: - { - auto arr = value.as(); - return String(arr.begin(), arr.size()); - } - case capnp::DynamicValue::DATA: - { - auto arr = value.as().asChars(); - return String(arr.begin(), arr.size()); - } - case capnp::DynamicValue::LIST: - { - auto list_value = value.as(); - Array res(list_value.size()); - for (auto i : kj::indices(list_value)) - res[i] = convertNodeToField(list_value[i]); - - return res; - } - case capnp::DynamicValue::ENUM: - return value.as().getRaw(); - case capnp::DynamicValue::STRUCT: - { - auto struct_value = value.as(); - const auto & fields = struct_value.getSchema().getFields(); - - Tuple tuple(fields.size()); - for (auto i : kj::indices(fields)) - tuple[i] = convertNodeToField(struct_value.get(fields[i])); - - return tuple; - } - case capnp::DynamicValue::CAPABILITY: - throw Exception("CAPABILITY type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); - case capnp::DynamicValue::ANY_POINTER: - throw Exception("ANY_POINTER type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); - } - return Field(); -} - -static capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std::string & field) -{ - KJ_IF_MAYBE(child, node.findFieldByName(field)) - return *child; - else - throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr(), ErrorCodes::THERE_IS_NO_COLUMN); -} - - -void CapnProtoRowInputFormat::createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader) -{ - /// Columns in a table can map to fields in Cap'n'Proto or to structs. - - /// Store common parents and their tokens in order to backtrack. - std::vector parents; - std::vector parent_tokens; - - capnp::StructSchema cur_reader = reader; - - for (const auto & field : sorted_fields) - { - if (field.tokens.empty()) - throw Exception("Logical error in CapnProtoRowInputFormat", ErrorCodes::LOGICAL_ERROR); - - // Backtrack to common parent - while (field.tokens.size() < parent_tokens.size() + 1 - || !std::equal(parent_tokens.begin(), parent_tokens.end(), field.tokens.begin())) - { - actions.push_back({Action::POP}); - parents.pop_back(); - parent_tokens.pop_back(); - - if (parents.empty()) - { - cur_reader = reader; - break; - } - else - cur_reader = parents.back().getType().asStruct(); - } - - // Go forward - while (parent_tokens.size() + 1 < field.tokens.size()) - { - const auto & token = field.tokens[parents.size()]; - auto node = getFieldOrThrow(cur_reader, token); - if (node.getType().isStruct()) - { - // Descend to field structure - parents.emplace_back(node); - parent_tokens.emplace_back(token); - cur_reader = node.getType().asStruct(); - actions.push_back({Action::PUSH, node}); - } - else if (node.getType().isList()) - { - break; // Collect list - } - else - throw Exception("Field " + token + " is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD); - } - - // Read field from the structure - auto node = getFieldOrThrow(cur_reader, field.tokens[parents.size()]); - if (node.getType().isList() && !actions.empty() && actions.back().field == node) - { - // The field list here flattens Nested elements into multiple arrays - // In order to map Nested types in Cap'nProto back, they need to be collected - // Since the field names are sorted, the order of field positions must be preserved - // For example, if the fields are { b @0 :Text, a @1 :Text }, the `a` would come first - // even though it's position is second. - auto & columns = actions.back().columns; - auto it = std::upper_bound(columns.cbegin(), columns.cend(), field.pos); - columns.insert(it, field.pos); - } - else - { - actions.push_back({Action::READ, node, {field.pos}}); - } - } -} - -CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info) - : IRowInputFormat(std::move(header), in_, std::move(params_)), parser(std::make_shared()) +CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_) + : IRowInputFormat(std::move(header), in_, std::move(params_)) + , parser(std::make_shared()) + , format_settings(format_settings_) + , column_types(getPort().getHeader().getDataTypes()) + , column_names(getPort().getHeader().getNames()) { // Parse the schema and fetch the root object - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - auto schema = parser->impl.parseDiskFile(info.schemaPath(), info.absoluteSchemaPath(), {}); -#pragma GCC diagnostic pop - - root = schema.getNested(info.messageName()).asStruct(); - - /** - * The schema typically consists of fields in various nested structures. - * Here we gather the list of fields and sort them in a way so that fields in the same structure are adjacent, - * and the nesting level doesn't decrease to make traversal easier. - */ - const auto & sample = getPort().getHeader(); - NestedFieldList list; - size_t num_columns = sample.columns(); - for (size_t i = 0; i < num_columns; ++i) - list.push_back(split(sample, i)); - - // Order list first by value of strings then by length of string vector. - std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b) { return a.tokens < b.tokens; }); - createActions(list, root); + root = parser->getMessageSchema(info); + checkCapnProtoSchemaStructure(root, getPort().getHeader(), format_settings.capn_proto.enum_comparing_mode); } kj::Array CapnProtoRowInputFormat::readMessage() @@ -233,6 +74,191 @@ kj::Array CapnProtoRowInputFormat::readMessage() return msg; } +static void insertSignedInteger(IColumn & column, const DataTypePtr & column_type, Int64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::Int8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int64: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime64: + assert_cast &>(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a signed integer."); + } +} + +static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_type, UInt64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::UInt8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::UInt64: + assert_cast(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not an unsigned integer."); + } +} + +static void insertFloat(IColumn & column, const DataTypePtr & column_type, Float64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::Float32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Float64: + assert_cast(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a float."); + } +} + +template +static void insertString(IColumn & column, Value value) +{ + column.insertData(reinterpret_cast(value.begin()), value.size()); +} + +template +static void insertEnum(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicEnum & enum_value, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + auto enumerant = *kj::_::readMaybe(enum_value.getEnumerant()); + auto enum_type = assert_cast *>(column_type.get()); + DataTypePtr nested_type = std::make_shared>(); + switch (enum_comparing_mode) + { + case FormatSettings::EnumComparingMode::BY_VALUES: + insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal())); + return; + case FormatSettings::EnumComparingMode::BY_NAMES: + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName())))); + return; + case FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE: + { + /// Find the same enum name case insensitive. + String enum_name = enumerant.getProto().getName(); + for (auto & name : enum_type->getAllRegisteredNames()) + { + if (compareEnumNames(name, enum_name, enum_comparing_mode)) + { + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name))); + break; + } + } + } + } +} + +static void insertValue(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicValue::Reader & value, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + if (column_type->lowCardinality()) + { + auto & lc_column = assert_cast(column); + auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty(); + auto dict_type = assert_cast(column_type.get())->getDictionaryType(); + insertValue(*tmp_column, dict_type, value, enum_comparing_mode); + lc_column.insertFromFullColumn(*tmp_column, 0); + return; + } + + switch (value.getType()) + { + case capnp::DynamicValue::Type::INT: + insertSignedInteger(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::UINT: + insertUnsignedInteger(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::FLOAT: + insertFloat(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::BOOL: + insertUnsignedInteger(column, column_type, UInt64(value.as())); + break; + case capnp::DynamicValue::Type::DATA: + insertString(column, value.as()); + break; + case capnp::DynamicValue::Type::TEXT: + insertString(column, value.as()); + break; + case capnp::DynamicValue::Type::ENUM: + if (column_type->getTypeId() == TypeIndex::Enum8) + insertEnum(column, column_type, value.as(), enum_comparing_mode); + else + insertEnum(column, column_type, value.as(), enum_comparing_mode); + break; + case capnp::DynamicValue::LIST: + { + auto list_value = value.as(); + auto & column_array = assert_cast(column); + auto & offsets = column_array.getOffsets(); + offsets.push_back(offsets.back() + list_value.size()); + + auto & nested_column = column_array.getData(); + auto nested_type = assert_cast(column_type.get())->getNestedType(); + for (const auto & nested_value : list_value) + insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + break; + } + case capnp::DynamicValue::Type::STRUCT: + { + auto struct_value = value.as(); + if (column_type->isNullable()) + { + auto & nullable_column = assert_cast(column); + auto field = *kj::_::readMaybe(struct_value.which()); + if (field.getType().isVoid()) + nullable_column.insertDefault(); + else + { + auto & nested_column = nullable_column.getNestedColumn(); + auto nested_type = assert_cast(column_type.get())->getNestedType(); + auto nested_value = struct_value.get(field); + insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + nullable_column.getNullMapData().push_back(0); + } + } + else + { + auto & tuple_column = assert_cast(column); + const auto * tuple_type = assert_cast(column_type.get()); + for (size_t i = 0; i != tuple_column.tupleSize(); ++i) + insertValue( + tuple_column.getColumn(i), + tuple_type->getElements()[i], + struct_value.get(tuple_type->getElementNames()[i]), + enum_comparing_mode); + } + break; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto value type."); + } +} + bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) { if (in->eof()) @@ -245,51 +271,12 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension #else capnp::FlatArrayMessageReader msg(array); #endif - std::vector stack; - stack.push_back(msg.getRoot(root)); - for (auto action : actions) + auto root_reader = msg.getRoot(root); + for (size_t i = 0; i != columns.size(); ++i) { - switch (action.type) - { - case Action::READ: - { - Field value = convertNodeToField(stack.back().get(action.field)); - if (action.columns.size() > 1) - { - // Nested columns must be flattened into several arrays - // e.g. Array(Tuple(x ..., y ...)) -> Array(x ...), Array(y ...) - const auto & collected = DB::get(value); - size_t size = collected.size(); - // The flattened array contains an array of a part of the nested tuple - Array flattened(size); - for (size_t column_index = 0; column_index < action.columns.size(); ++column_index) - { - // Populate array with a single tuple elements - for (size_t off = 0; off < size; ++off) - { - const auto & tuple = DB::get(collected[off]); - flattened[off] = tuple[column_index]; - } - auto & col = columns[action.columns[column_index]]; - col->insert(flattened); - } - } - else - { - auto & col = columns[action.columns[0]]; - col->insert(value); - } - - break; - } - case Action::POP: - stack.pop_back(); - break; - case Action::PUSH: - stack.push_back(stack.back().get(action.field).as()); - break; - } + auto value = getReaderByColumnName(root_reader, column_names[i]); + insertValue(*columns[i], column_types[i], value, format_settings.capn_proto.enum_comparing_mode); } return true; @@ -302,8 +289,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings.schema.format_schema, "CapnProto", true, - settings.schema.is_server, settings.schema.format_schema_path)); + FormatSchemaInfo(settings, "CapnProto", true), settings); }); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h index 0957cd1d681..fc30cf11237 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h @@ -4,8 +4,8 @@ #if USE_CAPNP #include +#include #include -#include namespace DB { @@ -22,18 +22,7 @@ class ReadBuffer; class CapnProtoRowInputFormat : public IRowInputFormat { public: - struct NestedField - { - std::vector tokens; - size_t pos; - }; - using NestedFieldList = std::vector; - - /** schema_dir - base path for schema files - * schema_file - location of the capnproto schema, e.g. "schema.capnp" - * root_object - name to the root object, e.g. "Message" - */ - CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info); + CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_); String getName() const override { return "CapnProtoRowInputFormat"; } @@ -42,34 +31,11 @@ public: private: kj::Array readMessage(); - // Build a traversal plan from a sorted list of fields - void createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader); - - /* Action for state machine for traversing nested structures. */ - using BlockPositionList = std::vector; - struct Action - { - enum Type { POP, PUSH, READ }; - Type type{}; - capnp::StructSchema::Field field{}; - BlockPositionList columns{}; - }; - - // Wrapper for classes that could throw in destructor - // https://github.com/capnproto/capnproto/issues/553 - template - struct DestructorCatcher - { - T impl; - template - DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} - ~DestructorCatcher() noexcept try { } catch (...) { return; } - }; - using SchemaParser = DestructorCatcher; - - std::shared_ptr parser; + std::shared_ptr parser; capnp::StructSchema root; - std::vector actions; + const FormatSettings format_settings; + DataTypes column_types; + Names column_names; }; } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp new file mode 100644 index 00000000000..58f88c5c7cf --- /dev/null +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -0,0 +1,268 @@ +#include +#if USE_CAPNP + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + +CapnProtoOutputStream::CapnProtoOutputStream(WriteBuffer & out_) : out(out_) +{ +} + +void CapnProtoOutputStream::write(const void * buffer, size_t size) +{ + out.write(reinterpret_cast(buffer), size); +} + +CapnProtoRowOutputFormat::CapnProtoRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSchemaInfo & info, + const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_, params_), column_names(header_.getNames()), column_types(header_.getDataTypes()), output_stream(std::make_unique(out_)), format_settings(format_settings_) +{ + schema = schema_parser.getMessageSchema(info); + checkCapnProtoSchemaStructure(schema, getPort(PortKind::Main).getHeader(), format_settings.capn_proto.enum_comparing_mode); +} + +template +static capnp::DynamicEnum getDynamicEnum( + const ColumnPtr & column, + const DataTypePtr & data_type, + size_t row_num, + const capnp::EnumSchema & enum_schema, + FormatSettings::EnumComparingMode mode) +{ + const auto * enum_data_type = assert_cast *>(data_type.get()); + EnumValue enum_value = column->getInt(row_num); + if (mode == FormatSettings::EnumComparingMode::BY_VALUES) + return capnp::DynamicEnum(enum_schema, enum_value); + + auto enum_name = enum_data_type->getNameForValue(enum_value); + for (const auto enumerant : enum_schema.getEnumerants()) + { + if (compareEnumNames(String(enum_name), enumerant.getProto().getName(), mode)) + return capnp::DynamicEnum(enumerant); + } + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot convert CLickHouse Enum value to CapnProto Enum"); +} + +static capnp::DynamicValue::Builder initStructFieldBuilder(const ColumnPtr & column, size_t row_num, capnp::DynamicStruct::Builder & struct_builder, capnp::StructSchema::Field field) +{ + if (const auto * array_column = checkAndGetColumn(*column)) + { + size_t size = array_column->getOffsets()[row_num] - array_column->getOffsets()[row_num - 1]; + return struct_builder.init(field, size); + } + + if (field.getType().isStruct()) + return struct_builder.init(field); + + return struct_builder.get(field); +} + +static std::optional convertToDynamicValue( + const ColumnPtr & column, + const DataTypePtr & data_type, + size_t row_num, + capnp::DynamicValue::Builder builder, + FormatSettings::EnumComparingMode enum_comparing_mode, + std::vector> & temporary_text_data_storage) +{ + /// Here we don't do any types validation, because we did it in CapnProtoRowOutputFormat constructor. + + if (data_type->lowCardinality()) + { + const auto * lc_column = assert_cast(column.get()); + const auto & dict_type = assert_cast(data_type.get())->getDictionaryType(); + size_t index = lc_column->getIndexAt(row_num); + return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode, temporary_text_data_storage); + } + + switch (builder.getType()) + { + case capnp::DynamicValue::Type::INT: + /// We allow output DateTime64 as Int64. + if (WhichDataType(data_type).isDateTime64()) + return capnp::DynamicValue::Reader(assert_cast *>(column.get())->getElement(row_num)); + return capnp::DynamicValue::Reader(column->getInt(row_num)); + case capnp::DynamicValue::Type::UINT: + return capnp::DynamicValue::Reader(column->getUInt(row_num)); + case capnp::DynamicValue::Type::BOOL: + return capnp::DynamicValue::Reader(column->getBool(row_num)); + case capnp::DynamicValue::Type::FLOAT: + return capnp::DynamicValue::Reader(column->getFloat64(row_num)); + case capnp::DynamicValue::Type::ENUM: + { + auto enum_schema = builder.as().getSchema(); + if (data_type->getTypeId() == TypeIndex::Enum8) + return capnp::DynamicValue::Reader( + getDynamicEnum(column, data_type, row_num, enum_schema, enum_comparing_mode)); + return capnp::DynamicValue::Reader( + getDynamicEnum(column, data_type, row_num, enum_schema, enum_comparing_mode)); + } + case capnp::DynamicValue::Type::DATA: + { + auto data = column->getDataAt(row_num); + return capnp::DynamicValue::Reader(capnp::Data::Reader(reinterpret_cast(data.data), data.size)); + } + case capnp::DynamicValue::Type::TEXT: + { + /// In TEXT type data should be null-terminated, but ClickHouse String data could not be. + /// To make data null-terminated we should copy it to temporary String object, but + /// capnp::Text::Reader works only with pointer to the data and it's size, so we should + /// guarantee that new String object life time is longer than capnp::Text::Reader life time. + /// To do this we store new String object in a temporary storage, passed in this function + /// by reference. We use unique_ptr instead of just String to avoid pointers + /// invalidation on vector reallocation. + temporary_text_data_storage.push_back(std::make_unique(column->getDataAt(row_num))); + auto & data = temporary_text_data_storage.back(); + return capnp::DynamicValue::Reader(capnp::Text::Reader(data->data(), data->size())); + } + case capnp::DynamicValue::Type::STRUCT: + { + auto struct_builder = builder.as(); + auto nested_struct_schema = struct_builder.getSchema(); + /// Struct can be represent Tuple or Naullable (named union with two fields) + if (data_type->isNullable()) + { + const auto * nullable_type = assert_cast(data_type.get()); + const auto * nullable_column = assert_cast(column.get()); + auto fields = nested_struct_schema.getUnionFields(); + if (nullable_column->isNullAt(row_num)) + { + auto null_field = fields[0].getType().isVoid() ? fields[0] : fields[1]; + struct_builder.set(null_field, capnp::Void()); + } + else + { + auto value_field = fields[0].getType().isVoid() ? fields[1] : fields[0]; + struct_builder.clear(value_field); + const auto & nested_column = nullable_column->getNestedColumnPtr(); + auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); + auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode, temporary_text_data_storage); + if (value) + struct_builder.set(value_field, std::move(*value)); + } + } + else + { + const auto * tuple_data_type = assert_cast(data_type.get()); + auto nested_types = tuple_data_type->getElements(); + const auto & nested_columns = assert_cast(column.get())->getColumns(); + for (const auto & name : tuple_data_type->getElementNames()) + { + auto pos = tuple_data_type->getPositionByName(name); + auto field_builder + = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); + auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode, temporary_text_data_storage); + if (value) + struct_builder.set(name, std::move(*value)); + } + } + return std::nullopt; + } + case capnp::DynamicValue::Type::LIST: + { + auto list_builder = builder.as(); + const auto * array_column = assert_cast(column.get()); + const auto & nested_column = array_column->getDataPtr(); + const auto & nested_type = assert_cast(data_type.get())->getNestedType(); + const auto & offsets = array_column->getOffsets(); + auto offset = offsets[row_num - 1]; + size_t size = offsets[row_num] - offset; + + const auto * nested_array_column = checkAndGetColumn(*nested_column); + for (size_t i = 0; i != size; ++i) + { + capnp::DynamicValue::Builder value_builder; + /// For nested arrays we need to initialize nested list builder. + if (nested_array_column) + { + const auto & nested_offset = nested_array_column->getOffsets(); + size_t nested_array_size = nested_offset[offset + i] - nested_offset[offset + i - 1]; + value_builder = list_builder.init(i, nested_array_size); + } + else + value_builder = list_builder[i]; + + auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode, temporary_text_data_storage); + if (value) + list_builder.set(i, std::move(*value)); + } + return std::nullopt; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto type."); + } +} + +void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) +{ + capnp::MallocMessageBuilder message; + /// Temporary storage for data that will be outputted in fields with CapnProto type TEXT. + /// See comment in convertToDynamicValue() for more details. + std::vector> temporary_text_data_storage; + capnp::DynamicStruct::Builder root = message.initRoot(schema); + for (size_t i = 0; i != columns.size(); ++i) + { + auto [struct_builder, field] = getStructBuilderAndFieldByColumnName(root, column_names[i]); + auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); + auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode, temporary_text_data_storage); + if (value) + struct_builder.set(field, *value); + } + + capnp::writeMessage(*output_stream, message); +} + +void registerOutputFormatCapnProto(FormatFactory & factory) +{ + factory.registerOutputFormat("CapnProto", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, params, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings); + }); +} + +} + +#else + +namespace DB +{ +class FormatFactory; +void registerOutputFormatCapnProto(FormatFactory &) {} +} + +#endif // USE_CAPNP diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h new file mode 100644 index 00000000000..0f321071d62 --- /dev/null +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -0,0 +1,53 @@ +#pragma once + +#include "config_formats.h" +#if USE_CAPNP + +#include +#include +#include +#include +#include +#include + +namespace DB +{ +class CapnProtoOutputStream : public kj::OutputStream +{ +public: + CapnProtoOutputStream(WriteBuffer & out_); + + void write(const void * buffer, size_t size) override; + +private: + WriteBuffer & out; +}; + +class CapnProtoRowOutputFormat : public IRowOutputFormat +{ +public: + CapnProtoRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSchemaInfo & info, + const FormatSettings & format_settings_); + + String getName() const override { return "CapnProtoRowOutputFormat"; } + + void write(const Columns & columns, size_t row_num) override; + + void writeField(const IColumn &, const ISerialization &, size_t) override { } + +private: + Names column_names; + DataTypes column_types; + capnp::StructSchema schema; + std::unique_ptr output_stream; + const FormatSettings format_settings; + CapnProtoSchemaParser schema_parser; +}; + +} + +#endif // USE_CAPNP diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index a5e6b7ec480..df7b7102739 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -67,8 +67,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true, - settings.schema.is_server, settings.schema.format_schema_path), + FormatSchemaInfo(settings, "Protobuf", true), with_length_delimiter); }); } diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 12c5e98797a..29cd9be79bc 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -64,9 +64,7 @@ void registerOutputFormatProtobuf(FormatFactory & factory) { return std::make_shared( buf, header, params, - FormatSchemaInfo(settings.schema.format_schema, "Protobuf", - true, settings.schema.is_server, - settings.schema.format_schema_path), + FormatSchemaInfo(settings, "Protobuf", true), settings, with_length_delimiter); }); diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h index e8bf90c2b31..b0cdf4c8a3c 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.h +++ b/src/Processors/Merges/AggregatingSortedTransform.h @@ -16,7 +16,7 @@ public: const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index 87c466f31e8..a37e1c8402f 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -20,7 +20,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/FinishAggregatingInOrderTransform.h b/src/Processors/Merges/FinishAggregatingInOrderTransform.h index 6d5e334311f..58bd399afda 100644 --- a/src/Processors/Merges/FinishAggregatingInOrderTransform.h +++ b/src/Processors/Merges/FinishAggregatingInOrderTransform.h @@ -19,7 +19,7 @@ public: SortDescription description, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, params, diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h index 46272f00eed..e6307c629ea 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -15,7 +15,7 @@ public: SortDescription description_, size_t max_block_size, Graphite::Params params_, time_t time_of_merge_) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index cba78390c97..52acf36a4d7 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -15,10 +15,10 @@ IMergingTransformBase::IMergingTransformBase( const Block & input_header, const Block & output_header, bool have_all_inputs_, - bool has_limit_below_one_block_) + UInt64 limit_hint_) : IProcessor(InputPorts(num_inputs, input_header), {output_header}) , have_all_inputs(have_all_inputs_) - , has_limit_below_one_block(has_limit_below_one_block_) + , limit_hint(limit_hint_) { } @@ -79,7 +79,10 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs() /// setNotNeeded after reading first chunk, because in optimismtic case /// (e.g. with optimized 'ORDER BY primary_key LIMIT n' and small 'n') /// we won't have to read any chunks anymore; - auto chunk = input.pull(has_limit_below_one_block); + auto chunk = input.pull(limit_hint != 0); + if (limit_hint && chunk.getNumRows() < limit_hint) + input.setNeeded(); + if (!chunk.hasRows()) { if (!input.isFinished()) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 8b0a44ae025..4da49b8155c 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -17,7 +17,7 @@ public: const Block & input_header, const Block & output_header, bool have_all_inputs_, - bool has_limit_below_one_block_); + UInt64 limit_hint_); OutputPort & getOutputPort() { return outputs.front(); } @@ -67,7 +67,7 @@ private: std::vector input_states; std::atomic have_all_inputs; bool is_initialized = false; - bool has_limit_below_one_block = false; + UInt64 limit_hint = 0; IProcessor::Status prepareInitializeInputs(); }; @@ -83,9 +83,9 @@ public: const Block & input_header, const Block & output_header, bool have_all_inputs_, - bool has_limit_below_one_block_, + UInt64 limit_hint_, Args && ... args) - : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, has_limit_below_one_block_) + : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, limit_hint_) , algorithm(std::forward(args) ...) { } diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp index 667972e3cf6..ec864b561e9 100644 --- a/src/Processors/Merges/MergingSortedTransform.cpp +++ b/src/Processors/Merges/MergingSortedTransform.cpp @@ -13,13 +13,12 @@ MergingSortedTransform::MergingSortedTransform( SortDescription description_, size_t max_block_size, UInt64 limit_, - bool has_limit_below_one_block_, WriteBuffer * out_row_sources_buf_, bool quiet_, bool use_average_block_sizes, bool have_all_inputs_) : IMergingTransform( - num_inputs, header, header, have_all_inputs_, has_limit_below_one_block_, + num_inputs, header, header, have_all_inputs_, limit_, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/MergingSortedTransform.h b/src/Processors/Merges/MergingSortedTransform.h index 1fa9b1275bd..93bd36d8aec 100644 --- a/src/Processors/Merges/MergingSortedTransform.h +++ b/src/Processors/Merges/MergingSortedTransform.h @@ -17,7 +17,6 @@ public: SortDescription description, size_t max_block_size, UInt64 limit_ = 0, - bool has_limit_below_one_block_ = false, WriteBuffer * out_row_sources_buf_ = nullptr, bool quiet_ = false, bool use_average_block_sizes = false, diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index e760cdf0d2b..dfb386684fc 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -18,7 +18,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/SummingSortedTransform.h b/src/Processors/Merges/SummingSortedTransform.h index 0287caed5aa..0530ac2e96b 100644 --- a/src/Processors/Merges/SummingSortedTransform.h +++ b/src/Processors/Merges/SummingSortedTransform.h @@ -19,7 +19,7 @@ public: const Names & partition_key_columns, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h index f260e20f1da..5eced1cb58d 100644 --- a/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/src/Processors/Merges/VersionedCollapsingTransform.h @@ -19,7 +19,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/QueryPlan/FinishSortingStep.cpp b/src/Processors/QueryPlan/FinishSortingStep.cpp index 6347b69901c..9002c804e7b 100644 --- a/src/Processors/QueryPlan/FinishSortingStep.cpp +++ b/src/Processors/QueryPlan/FinishSortingStep.cpp @@ -31,14 +31,12 @@ FinishSortingStep::FinishSortingStep( SortDescription prefix_description_, SortDescription result_description_, size_t max_block_size_, - UInt64 limit_, - bool has_filtration_) + UInt64 limit_) : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_)) , prefix_description(std::move(prefix_description_)) , result_description(std::move(result_description_)) , max_block_size(max_block_size_) , limit(limit_) - , has_filtration(has_filtration_) { /// TODO: check input_stream is sorted by prefix_description. output_stream->sort_description = result_description; @@ -60,14 +58,12 @@ void FinishSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const if (pipeline.getNumStreams() > 1) { UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit); - bool has_limit_below_one_block = !has_filtration && limit_for_merging && limit_for_merging < max_block_size; auto transform = std::make_shared( pipeline.getHeader(), pipeline.getNumStreams(), prefix_description, max_block_size, - limit_for_merging, - has_limit_below_one_block); + limit_for_merging); pipeline.addTransform(std::move(transform)); } diff --git a/src/Processors/QueryPlan/FinishSortingStep.h b/src/Processors/QueryPlan/FinishSortingStep.h index ac34aea9df4..fd56c4353e7 100644 --- a/src/Processors/QueryPlan/FinishSortingStep.h +++ b/src/Processors/QueryPlan/FinishSortingStep.h @@ -14,8 +14,7 @@ public: SortDescription prefix_description_, SortDescription result_description_, size_t max_block_size_, - UInt64 limit_, - bool has_filtration_); + UInt64 limit_); String getName() const override { return "FinishSorting"; } @@ -32,7 +31,6 @@ private: SortDescription result_description; size_t max_block_size; UInt64 limit; - bool has_filtration; }; } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index cc400aacf2a..57785a5cc2d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -485,8 +485,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( pipe.getHeader(), pipe.numOutputPorts(), sort_description, - max_block_size, - 0, true); + max_block_size); pipe.addTransform(std::move(transform)); } diff --git a/src/Processors/Sources/SourceWithProgress.cpp b/src/Processors/Sources/SourceWithProgress.cpp index 15d64dee3ee..0ebdd968997 100644 --- a/src/Processors/Sources/SourceWithProgress.cpp +++ b/src/Processors/Sources/SourceWithProgress.cpp @@ -69,8 +69,7 @@ void SourceWithProgress::work() } } -/// Aggregated copy-paste from IBlockInputStream::progressImpl. -/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream. +/// TODO: Most of this must be done in PipelineExecutor outside. void SourceWithProgress::progress(const Progress & value) { was_progress_called = true; @@ -135,14 +134,12 @@ void SourceWithProgress::progress(const Progress & value) if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) { - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. + /// TODO: Should be done in PipelineExecutor. CurrentThread::updatePerformanceCounters(); last_profile_events_update_time = total_elapsed_microseconds; } - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. + /// TODO: Should be done in PipelineExecutor. limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); if (quota && limits.mode == LimitsMode::LIMITS_TOTAL) diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index ddb8a5a0d68..d0cb4975290 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -127,7 +127,7 @@ ColumnGathererTransform::ColumnGathererTransform( ReadBuffer & row_sources_buf_, size_t block_preferred_size_) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, num_inputs, row_sources_buf_, block_preferred_size_) , log(&Poco::Logger::get("ColumnGathererStream")) { diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index e272fd0f183..73817d7de4a 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -197,7 +197,6 @@ void MergeSortingTransform::consume(Chunk chunk) description, max_merged_block_size, limit, - false, nullptr, quiet, use_average_block_sizes, diff --git a/src/QueryPipeline/ProfileInfo.h b/src/QueryPipeline/ProfileInfo.h index 335092ce244..0a5800cd409 100644 --- a/src/QueryPipeline/ProfileInfo.h +++ b/src/QueryPipeline/ProfileInfo.h @@ -12,7 +12,7 @@ class Block; class ReadBuffer; class WriteBuffer; -/// Information for profiling. See IBlockInputStream.h +/// Information for profiling. See SourceWithProgress.h struct ProfileInfo { bool started = false; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index d2bbea03ce5..12f74805173 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -129,7 +129,6 @@ public: void setLeafLimits(const SizeLimits & limits) { pipe.setLeafLimits(limits); } void setQuota(const std::shared_ptr & quota) { pipe.setQuota(quota); } - /// For compatibility with IBlockInputStream. void setProgressCallback(const ProgressCallback & callback); void setProcessListElement(QueryStatus * elem); diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp index d39c49bb61c..fb10601216e 100644 --- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp @@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true); + DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true); pipe.addTransform(std::move(transform)); @@ -130,7 +130,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true); + DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true); pipe.addTransform(std::move(transform)); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 87cc76b1812..729cb33371a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -831,12 +832,6 @@ namespace { using namespace ProfileEvents; - enum ProfileEventTypes : int8_t - { - INCREMENT = 1, - GAUGE = 2, - }; - constexpr size_t NAME_COLUMN_INDEX = 4; constexpr size_t VALUE_COLUMN_INDEX = 5; @@ -879,7 +874,7 @@ namespace columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEventTypes::INCREMENT); + columns[i++]->insert(ProfileEvents::Type::INCREMENT); } } @@ -893,7 +888,7 @@ namespace columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEventTypes::GAUGE); + columns[i++]->insert(ProfileEvents::Type::GAUGE); columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); columns[i++]->insert(snapshot.memory_usage); @@ -907,18 +902,11 @@ void TCPHandler::sendProfileEvents() if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS) return; - auto profile_event_type = std::make_shared( - DataTypeEnum8::Values - { - { "increment", static_cast(INCREMENT)}, - { "gauge", static_cast(GAUGE)}, - }); - NamesAndTypesList column_names_and_types = { { "host_name", std::make_shared() }, { "current_time", std::make_shared() }, { "thread_id", std::make_shared() }, - { "type", profile_event_type }, + { "type", ProfileEvents::TypeEnum }, { "name", std::make_shared() }, { "value", std::make_shared() }, }; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index ea4821fc33d..5cb819c44a4 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -778,7 +778,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() { case MergeTreeData::MergingParams::Ordinary: merged_transform = std::make_shared( - header, pipes.size(), sort_description, merge_block_size, 0, false, ctx->rows_sources_write_buf.get(), true, ctx->blocks_are_granules_size); + header, pipes.size(), sort_description, merge_block_size, 0, ctx->rows_sources_write_buf.get(), true, ctx->blocks_are_granules_size); break; case MergeTreeData::MergingParams::Collapsing: diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 10fa18186ee..8b03c1e614d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -205,6 +205,8 @@ MergeTreeData::MergeTreeData( , background_operations_assignee(*this, BackgroundJobsAssignee::Type::DataProcessing, getContext()) , background_moves_assignee(*this, BackgroundJobsAssignee::Type::Moving, getContext()) { + context_->getGlobalContext()->initializeBackgroundExecutorsIfNeeded(); + const auto settings = getSettings(); allow_nullable_key = attach || settings->allow_nullable_key; diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 9b879283c10..15c5795ee7b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -160,9 +160,10 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, readData(column_from_part, column, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]); size_t read_rows_in_column = column->size() - column_size_before_reading; - if (read_rows_in_column < rows_to_read) - throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) + - ". Rows expected: " + toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA); + if (read_rows_in_column != rows_to_read) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read all data in MergeTreeReaderCompact. Rows read: {}. Rows expected: {}.", + read_rows_in_column, rows_to_read); } catch (Exception & e) { diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index fc57b48e86d..f225ecae8fa 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -10,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; + extern const int CANNOT_READ_ALL_DATA; } @@ -76,6 +77,10 @@ MergeTreeReaderStream::MergeTreeReaderStream( if (max_mark_range_bytes != 0) read_settings = read_settings.adjustBufferSize(max_mark_range_bytes); + //// Empty buffer does not makes progress. + if (!read_settings.local_fs_buffer_size || !read_settings.remote_fs_buffer_size) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read to empty buffer."); + /// Initialize the objects that shall be used to perform read operations. if (uncompressed_cache) { diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 206469da7be..29cc45a5c60 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -69,10 +69,6 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si size_t num_columns = columns.size(); checkNumberOfColumns(num_columns); - /// Pointers to offset columns that are common to the nested data structure columns. - /// If append is true, then the value will be equal to nullptr and will be used only to - /// check that the offsets column has been already read. - OffsetColumns offset_columns; std::unordered_map caches; std::unordered_set prefetched_streams; diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 76be3353808..16647d0b60f 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include @@ -111,9 +113,16 @@ Pipe StorageExecutable::read( { auto user_scripts_path = context->getUserScriptsPath(); auto script_path = user_scripts_path + '/' + script_name; - if (!std::filesystem::exists(std::filesystem::path(script_path))) + + if (!pathStartsWith(script_path, user_scripts_path)) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Executable file {} does not exists inside {}", + "Executable file {} must be inside user scripts folder {}", + script_name, + user_scripts_path); + + if (!std::filesystem::exists(std::filesystem::path(script_path))) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} does not exist inside user scripts folder {}", script_name, user_scripts_path); @@ -139,9 +148,9 @@ Pipe StorageExecutable::read( bool result = process_pool->tryBorrowObject(process, [&config, this]() { config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, settings.command_termination_timeout }; - auto shell_command = ShellCommand::execute(config); + auto shell_command = ShellCommand::executeDirect(config); return shell_command; - }, settings.max_command_execution_time * 1000); + }, settings.max_command_execution_time * 10000); if (!result) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7c6543c5519..978d161852b 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -124,8 +125,8 @@ void checkCreationIsAllowed(ContextPtr context_global, const std::string & db_di return; /// "/dev/null" is allowed for perf testing - if (!startsWith(table_path, db_dir_path) && table_path != "/dev/null") - throw Exception("File is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (!fileOrSymlinkPathStartsWith(table_path, db_dir_path) && table_path != "/dev/null") + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File `{}` is not inside `{}`", table_path, db_dir_path); if (fs::exists(table_path) && fs::is_directory(table_path)) throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); @@ -140,7 +141,10 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user fs_table_path = user_files_absolute_path / fs_table_path; Strings paths; - const String path = fs::weakly_canonical(fs_table_path); + /// Do not use fs::canonical or fs::weakly_canonical. + /// Otherwise it will not allow to work with symlinks in `user_files_path` directory. + String path = fs::absolute(fs_table_path); + path = fs::path(path).lexically_normal(); /// Normalize path. if (path.find_first_of("*?{") == std::string::npos) { std::error_code error; diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py new file mode 100644 index 00000000000..f095b04872b --- /dev/null +++ b/tests/ci/compress_files.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import subprocess +import logging +import os + +def compress_file_fast(path, archive_path): + if os.path.exists('/usr/bin/pigz'): + subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True) + else: + subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True) + + +def compress_fast(path, archive_path, exclude=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster") + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, compressing with default tar") + + if exclude is None: + exclude_part = "" + elif isinstance(exclude, list): + exclude_part = " ".join(["--exclude {}".format(x) for x in exclude]) + else: + exclude_part = "--exclude {}".format(str(exclude)) + + fname = os.path.basename(path) + if os.path.isfile(path): + path = os.path.dirname(path) + else: + path += "/.." + cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname) + logging.debug("compress_fast cmd:{}".format(cmd)) + subprocess.check_call(cmd, shell=True) + + +def decompress_fast(archive_path, result_path=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster ('{}' -> '{}')".format(archive_path, result_path)) + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, decompressing with default tar ('{}' -> '{}')".format(archive_path, result_path)) + + if result_path is None: + subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True) + else: + subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py new file mode 100644 index 00000000000..141d075cc6d --- /dev/null +++ b/tests/ci/docker_images_check.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +import subprocess +import logging +from report import create_test_html_report +from s3_helper import S3Helper +import json +import os +from pr_info import PRInfo +from github import Github +import shutil + +NAME = "Push to Dockerhub (actions)" + +def get_changed_docker_images(pr_info, repo_path, image_file_path): + images_dict = {} + path_to_images_file = os.path.join(repo_path, image_file_path) + if os.path.exists(path_to_images_file): + with open(path_to_images_file, 'r') as dict_file: + images_dict = json.load(dict_file) + else: + logging.info("Image file %s doesnt exists in repo %s", image_file_path, repo_path) + + dockerhub_repo_name = 'yandex' + if not images_dict: + return [], dockerhub_repo_name + + files_changed = pr_info.changed_files + + logging.info("Changed files for PR %s @ %s: %s", pr_info.number, pr_info.sha, str(files_changed)) + + changed_images = [] + + for dockerfile_dir, image_description in images_dict.items(): + if image_description['name'].startswith('clickhouse/'): + dockerhub_repo_name = 'clickhouse' + + for f in files_changed: + if f.startswith(dockerfile_dir): + logging.info( + "Found changed file '%s' which affects docker image '%s' with path '%s'", + f, image_description['name'], dockerfile_dir) + changed_images.append(dockerfile_dir) + break + + # The order is important: dependents should go later than bases, so that + # they are built with updated base versions. + index = 0 + while index < len(changed_images): + image = changed_images[index] + for dependent in images_dict[image]['dependent']: + logging.info( + "Marking docker image '%s' as changed because it depends on changed docker image '%s'", + dependent, image) + changed_images.append(dependent) + index += 1 + if index > 100: + # Sanity check to prevent infinite loop. + raise "Too many changed docker images, this is a bug." + str(changed_images) + + # If a dependent image was already in the list because its own files + # changed, but then it was added as a dependent of a changed base, we + # must remove the earlier entry so that it doesn't go earlier than its + # base. This way, the dependent will be rebuilt later than the base, and + # will correctly use the updated version of the base. + seen = set() + no_dups_reversed = [] + for x in reversed(changed_images): + if x not in seen: + seen.add(x) + no_dups_reversed.append(x) + + result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)] + logging.info("Changed docker images for PR %s @ %s: '%s'", pr_info.number, pr_info.sha, result) + return result, dockerhub_repo_name + +def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string): + logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder) + build_log = None + push_log = None + with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(pl.name) + if retcode != 0: + return False, build_log, None + + with open('tag_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(pl.name) + if retcode != 0: + return False, build_log, None + + logging.info("Pushing image %s to dockerhub", image_name) + + with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: + cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + push_log = str(pl.name) + if retcode != 0: + return False, build_log, push_log + + logging.info("Processing of %s successfully finished", image_name) + return True, build_log, push_log + +def process_single_image(versions, path_to_dockerfile_folder, image_name): + logging.info("Image will be pushed with versions %s", ', '.join(versions)) + result = [] + for ver in versions: + for i in range(5): + success, build_log, push_log = build_and_push_one_image(path_to_dockerfile_folder, image_name, ver) + if success: + result.append((image_name + ":" + ver, build_log, push_log, 'OK')) + break + logging.info("Got error will retry %s time and sleep for %s seconds", i, i * 5) + time.sleep(i * 5) + else: + result.append((image_name + ":" + ver, build_log, push_log, 'FAIL')) + + logging.info("Processing finished") + return result + + +def process_test_results(s3_client, test_results, s3_path_prefix): + overall_status = 'success' + processed_test_results = [] + for image, build_log, push_log, status in test_results: + if status != 'OK': + overall_status = 'failure' + url_part = '' + if build_log is not None and os.path.exists(build_log): + build_url = s3_client.upload_test_report_to_s3( + build_log, + s3_path_prefix + "/" + os.path.basename(build_log)) + url_part += 'build_log'.format(build_url) + if push_log is not None and os.path.exists(push_log): + push_url = s3_client.upload_test_report_to_s3( + push_log, + s3_path_prefix + "/" + os.path.basename(push_log)) + if url_part: + url_part += ', ' + url_part += 'push_log'.format(push_url) + if url_part: + test_name = image + ' (' + url_part + ')' + else: + test_name = image + processed_test_results.append((test_name, status)) + return overall_status, processed_test_results + +def upload_results(s3_client, pr_number, commit_sha, test_results): + s3_path_prefix = f"{pr_number}/{commit_sha}/" + NAME.lower().replace(' ', '_') + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + html_report = create_test_html_report(NAME, test_results, "https://hub.docker.com/u/clickhouse", task_url, branch_url, branch_name, commit_url) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') + dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') + + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, False, True) + changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") + logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) + pr_commit_version = str(pr_info.number) + '-' + pr_info.sha + + versions = [str(pr_info.number), pr_commit_version] + + subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True) + + result_images = {} + images_processing_result = [] + for rel_path, image_name in changed_images: + full_path = os.path.join(repo_path, rel_path) + images_processing_result += process_single_image(versions, full_path, image_name) + result_images[image_name] = pr_commit_version + + if len(changed_images): + description = "Updated " + ','.join([im[1] for im in changed_images]) + else: + description = "Nothing to update" + + + if len(description) >= 140: + description = description[:136] + "..." + + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") + aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) + + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) + + gh = Github(os.getenv("GITHUB_TOKEN")) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=url) + + with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: + json.dump(result_images, images_file) + + print("::notice ::Report url: {}".format(url)) + print("::set-output name=url_output::\"{}\"".format(url)) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py new file mode 100644 index 00000000000..89139468fd6 --- /dev/null +++ b/tests/ci/finish_check.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +import logging +from github import Github +from pr_info import PRInfo +import json +import os + +NAME = 'Run Check (actions)' + +def filter_statuses(statuses): + """ + Squash statuses to latest state + 1. context="first", state="success", update_time=1 + 2. context="second", state="success", update_time=2 + 3. context="first", stat="failure", update_time=3 + =========> + 1. context="second", state="success" + 2. context="first", stat="failure" + """ + filt = {} + for status in sorted(statuses, key=lambda x: x.updated_at): + filt[status.context] = status + return filt + + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_orgs=True) + gh = Github(os.getenv("GITHUB_TOKEN")) + commit = get_commit(gh, pr_info.sha) + + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + statuses = filter_statuses(list(commit.get_statuses())) + if NAME in statuses and statuses[NAME].state == "pending": + commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url) diff --git a/tests/ci/metrics_lambda/Dockerfile b/tests/ci/metrics_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/metrics_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py new file mode 100644 index 00000000000..d2fb048638b --- /dev/null +++ b/tests/ci/metrics_lambda/app.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners", headers=headers) + response.raise_for_status() + data = response.json() + print("Total runners", data['total_count']) + runners = data['runners'] + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_ratio, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) + +def main(github_secret_key, github_app_id, push_to_cloudwatch): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runners = list_runners(access_token) + if push_to_cloudwatch: + push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + else: + print(runners) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-cloudwatch', action='store_true', help='Store received token in parameter store') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_cloudwatch) diff --git a/tests/ci/metrics_lambda/requirements.txt b/tests/ci/metrics_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/metrics_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py new file mode 100644 index 00000000000..8feedb2d4d7 --- /dev/null +++ b/tests/ci/pr_info.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +import requests +import json +import os +import subprocess +import urllib +from unidiff import PatchSet + + +class PRInfo: + def __init__(self, github_event, need_orgs=False, need_changed_files=False): + self.number = github_event['number'] + if 'after' in github_event: + self.sha = github_event['after'] + else: + self.sha = github_event['pull_request']['head']['sha'] + + self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) + self.user_login = github_event['pull_request']['user']['login'] + self.user_orgs = set([]) + if need_orgs: + user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) + if user_orgs_response.ok: + response_json = user_orgs_response.json() + self.user_orgs = set(org['id'] for org in response_json) + + self.changed_files = set([]) + if need_changed_files: + diff_url = github_event['pull_request']['diff_url'] + diff = urllib.request.urlopen(github_event['pull_request']['diff_url']) + diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) + self.changed_files = set([f.path for f in diff_object]) + + def get_dict(self): + return { + 'sha': self.sha, + 'number': self.number, + 'labels': self.labels, + 'user_login': self.user_login, + 'user_orgs': self.user_orgs, + } diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py new file mode 100644 index 00000000000..c254ad74ae4 --- /dev/null +++ b/tests/ci/pvs_check.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +import subprocess +import os +import json +import logging +from github import Github +from report import create_test_html_report +from s3_helper import S3Helper +from pr_info import PRInfo +import shutil +import sys + +NAME = 'PVS Studio (actions)' +LICENCE_NAME = 'Free license: ClickHouse, Yandex' +HTML_REPORT_FOLDER = 'pvs-studio-html-report' +TXT_REPORT_NAME = 'pvs-studio-task-report.txt' + +def process_logs(s3_client, additional_logs, s3_path_prefix): + additional_urls = [] + for log_path in additional_logs: + if log_path: + additional_urls.append( + s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path))) + + return additional_urls + +def _process_txt_report(path): + warnings = [] + errors = [] + with open(path, 'r') as report_file: + for line in report_file: + if 'viva64' in line: + continue + elif 'warn' in line: + warnings.append(':'.join(line.split('\t')[0:2])) + elif 'err' in line: + errors.append(':'.join(line.split('\t')[0:2])) + return warnings, errors + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): + s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_') + additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + raw_log_url = additional_urls[0] + additional_urls.pop(0) + + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.path.join(os.getenv("REPO_COPY", os.path.abspath("../../"))) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + # this check modify repository so copy it to the temp directory + logging.info("Repo copy path %s", repo_path) + + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") + aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + + gh = Github(os.getenv("GITHUB_TOKEN")) + + images_path = os.path.join(temp_path, 'changed_images.json') + docker_image = 'clickhouse/pvs-test' + if os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r') as images_fd: + images = json.load(images_fd) + logging.info("Got images %s", images) + if 'clickhouse/pvs-test' in images: + docker_image += ':' + images['clickhouse/pvs-test'] + + logging.info("Got docker image %s", docker_image) + + if not aws_secret_key_id or not aws_secret_key: + logging.info("No secrets, will not upload anything to S3") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + + licence_key = os.getenv('PVS_STUDIO_KEY') + cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + commit = get_commit(gh, pr_info.sha) + + try: + subprocess.check_output(cmd, shell=True) + except: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) + + try: + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + html_urls = s3_helper.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) + index_html = None + + for url in html_urls: + if 'index.html' in url: + index_html = 'HTML report'.format(url) + break + + if not index_html: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) + + txt_report = os.path.join(temp_path, TXT_REPORT_NAME) + warnings, errors = _process_txt_report(txt_report) + errors = errors + warnings + + status = 'success' + test_results = [(index_html, "Look at the report"), ("Errors count not checked", "OK")] + description = "Total errors {}".format(len(errors)) + additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) + + print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) + except Exception as ex: + print("Got an exception", ex) + sys.exit(1) diff --git a/tests/ci/report.py b/tests/ci/report.py new file mode 100644 index 00000000000..5c9b174599d --- /dev/null +++ b/tests/ci/report.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- +import os +import datetime + +### FIXME: BEST FRONTEND PRACTICIES BELOW + +HTML_BASE_TEST_TEMPLATE = """ + + + + {title} + + +
+ +

{header}

+ +{test_part} + + + +""" + +HTML_TEST_PART = """ + + +{headers} + +{rows} +
+""" + +BASE_HEADERS = ['Test name', 'Test status'] + + +def _format_header(header, branch_name, branch_url=None): + result = ' '.join([w.capitalize() for w in header.split(' ')]) + result = result.replace("Clickhouse", "ClickHouse") + result = result.replace("clickhouse", "ClickHouse") + if 'ClickHouse' not in result: + result = 'ClickHouse ' + result + result += ' for ' + if branch_url: + result += '{name}'.format(url=branch_url, name=branch_name) + else: + result += branch_name + return result + + +def _get_status_style(status): + style = "font-weight: bold;" + if status in ('OK', 'success', 'PASSED'): + style += 'color: #0A0;' + elif status in ('FAIL', 'failure', 'error', 'FAILED', 'Timeout'): + style += 'color: #F00;' + else: + style += 'color: #FFB400;' + return style + + +def _get_html_url(url): + if isinstance(url, str): + return '{name}'.format(url=url, name=os.path.basename(url)) + if isinstance(url, tuple): + return '{name}'.format(url=url[0], name=url[1]) + return '' + + +def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=[]): + if test_result: + rows_part = "" + num_fails = 0 + has_test_time = False + has_test_logs = False + for result in test_result: + test_name = result[0] + test_status = result[1] + + test_logs = None + test_time = None + if len(result) > 2: + test_time = result[2] + has_test_time = True + + if len(result) > 3: + test_logs = result[3] + has_test_logs = True + + row = "" + row += "" + test_name + "" + style = _get_status_style(test_status) + + # Allow to quickly scroll to the first failure. + is_fail = test_status == "FAIL" or test_status == 'FLAKY' + is_fail_id = "" + if is_fail: + num_fails = num_fails + 1 + is_fail_id = 'id="fail' + str(num_fails) + '" ' + + row += ''.format(style) + test_status + "" + + if test_time is not None: + row += "" + test_time + "" + + if test_logs is not None: + test_logs_html = "
".join([_get_html_url(url) for url in test_logs]) + row += "" + test_logs_html + "" + + row += "" + rows_part += row + + headers = BASE_HEADERS + if has_test_time: + headers.append('Test time, sec.') + if has_test_logs: + headers.append('Logs') + + headers = ''.join(['' + h + '' for h in headers]) + test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part) + else: + test_part = "" + + additional_html_urls = "" + for url in additional_urls: + additional_html_urls += ' ' + _get_html_url(url) + + result = HTML_BASE_TEST_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + raw_log_name=os.path.basename(raw_log_url), + raw_log_url=raw_log_url, + task_url=task_url, + test_part=test_part, + branch_name=branch_name, + commit_url=commit_url, + additional_urls=additional_html_urls + ) + return result + + +HTML_BASE_BUILD_TEMPLATE = """ + + + + +{title} + + +
+

{header}

+ + + + + + + + + + + + +{rows} +
CompilerBuild typeSanitizerBundledSplittedStatusBuild logBuild timeArtifacts
+ + + +""" + +LINK_TEMPLATE = '{text}' + + +def create_build_html_report(header, build_results, build_logs_urls, artifact_urls_list, task_url, branch_url, branch_name, commit_url): + rows = "" + for (build_result, build_log_url, artifact_urls) in zip(build_results, build_logs_urls, artifact_urls_list): + row = "" + row += "{}".format(build_result.compiler) + if build_result.build_type: + row += "{}".format(build_result.build_type) + else: + row += "{}".format("relwithdebuginfo") + if build_result.sanitizer: + row += "{}".format(build_result.sanitizer) + else: + row += "{}".format("none") + + row += "{}".format(build_result.bundled) + row += "{}".format(build_result.splitted) + + if build_result.status: + style = _get_status_style(build_result.status) + row += '{}'.format(style, build_result.status) + else: + style = _get_status_style("error") + row += '{}'.format(style, "error") + + row += 'link'.format(build_log_url) + + if build_result.elapsed_seconds: + delta = datetime.timedelta(seconds=build_result.elapsed_seconds) + else: + delta = 'unknown' + + row += '{}'.format(str(delta)) + + links = "" + link_separator = "
" + if artifact_urls: + for artifact_url in artifact_urls: + links += LINK_TEMPLATE.format(text=os.path.basename(artifact_url), url=artifact_url) + links += link_separator + if links: + links = links[:-len(link_separator)] + row += "{}".format(links) + + row += "" + rows += row + return HTML_BASE_BUILD_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + rows=rows, + task_url=task_url, + branch_name=branch_name, + commit_url=commit_url) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py new file mode 100644 index 00000000000..95e827671ca --- /dev/null +++ b/tests/ci/run_check.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +import os +import json +import requests +from pr_info import PRInfo +import sys +import logging +from github import Github + +NAME = 'Run Check (actions)' + +TRUSTED_ORG_IDS = { + 7409213, # yandex + 28471076, # altinity + 54801242, # clickhouse +} + +OK_TEST_LABEL = set(["can be tested", "release", "pr-documentation", "pr-doc-fix"]) +DO_NOT_TEST_LABEL = "do not test" + +# Individual trusted contirbutors who are not in any trusted organization. +# Can be changed in runtime: we will append users that we learned to be in +# a trusted org, to save GitHub API calls. +TRUSTED_CONTRIBUTORS = { + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloundFlare + "BohuTANG", + "damozhaeva", # DOCSUP + "den-crane", + "gyuton", # DOCSUP + "gyuton", # technical writer, Yandex + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kitaisreal", # Seasoned contributor + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasFL", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober" # Developer of YT +} + + +def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): + if pr_user_login in TRUSTED_CONTRIBUTORS: + logging.info("User '{}' is trusted".format(pr_user_login)) + return True + + logging.info("User '{}' is not trusted".format(pr_user_login)) + + for org_id in pr_user_orgs: + if org_id in TRUSTED_ORG_IDS: + logging.info("Org '{}' is trusted; will mark user {} as trusted".format(org_id, pr_user_login)) + return True + logging.info("Org '{}' is not trusted".format(org_id)) + + return False + +# Returns whether we should look into individual checks for this PR. If not, it +# can be skipped entirely. +def should_run_checks_for_pr(pr_info): + # Consider the labels and whether the user is trusted. + force_labels = set(['force tests']).intersection(pr_info.labels) + if force_labels: + return True, "Labeled '{}'".format(', '.join(force_labels)) + + if 'do not test' in pr_info.labels: + return False, "Labeled 'do not test'" + + if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): + return False, "Needs 'can be tested' label" + + if 'release' in pr_info.labels or 'pr-backport' in pr_info.labels or 'pr-cherrypick' in pr_info.labels: + return False, "Don't try new checks for release/backports/cherry-picks" + + return True, "No special conditions apply" + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_orgs=True) + can_run, description = should_run_checks_for_pr(pr_info) + gh = Github(os.getenv("GITHUB_TOKEN")) + commit = get_commit(gh, pr_info.sha) + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + if not can_run: + print("::notice ::Cannot run") + commit.create_status(context=NAME, description=description, state="failure", target_url=url) + sys.exit(1) + else: + print("::notice ::Can run") + commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py new file mode 100644 index 00000000000..b9ae0de6e02 --- /dev/null +++ b/tests/ci/s3_helper.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +import hashlib +import logging +import os +import boto3 +from botocore.exceptions import ClientError, BotoCoreError +from multiprocessing.dummy import Pool +from compress_files import compress_file_fast + +def _md5(fname): + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + logging.debug("MD5 for {} is {}".format(fname, hash_md5.hexdigest())) + return hash_md5.hexdigest() + + +def _flatten_list(lst): + result = [] + for elem in lst: + if isinstance(elem, list): + result += _flatten_list(elem) + else: + result.append(elem) + return result + + +class S3Helper(object): + def __init__(self, host, aws_access_key_id, aws_secret_access_key): + self.session = boto3.session.Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) + self.client = self.session.client('s3', endpoint_url=host) + + def _upload_file_to_s3(self, bucket_name, file_path, s3_path): + logging.debug("Start uploading {} to bucket={} path={}".format(file_path, bucket_name, s3_path)) + metadata = {} + if os.path.getsize(file_path) < 64 * 1024 * 1024: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + metadata['ContentType'] = "text/plain; charset=utf-8" + logging.info("Content type %s for file path %s", "text/plain; charset=utf-8", file_path) + elif s3_path.endswith("html"): + metadata['ContentType'] = "text/html; charset=utf-8" + logging.info("Content type %s for file path %s", "text/html; charset=utf-8", file_path) + else: + logging.info("No content type provied for %s", file_path) + else: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + logging.info("Going to compress file log file %s to %s", file_path, file_path + ".gz") + compress_file_fast(file_path, file_path + ".gz") + file_path += ".gz" + s3_path += ".gz" + else: + logging.info("Processing file without compression") + logging.info("File is too large, do not provide content type") + + self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) + logging.info("Upload {} to {}. Meta: {}".format(file_path, s3_path, metadata)) + return "https://storage.yandexcloud.net/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) + + def upload_test_report_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-test-reports', file_path, s3_path) + + def upload_build_file_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-builds', file_path, s3_path) + + def _upload_folder_to_s3(self, folder_path, s3_folder_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks): + logging.info("Upload folder '{}' to bucket={} of s3 folder '{}'".format(folder_path, bucket_name, s3_folder_path)) + if not os.path.exists(folder_path): + return [] + files = os.listdir(folder_path) + if not files: + return [] + + p = Pool(min(len(files), 5)) + + def task(file_name): + full_fs_path = os.path.join(folder_path, file_name) + if keep_dirs_in_s3_path: + full_s3_path = s3_folder_path + "/" + os.path.basename(folder_path) + else: + full_s3_path = s3_folder_path + + if os.path.isdir(full_fs_path): + return self._upload_folder_to_s3(full_fs_path, full_s3_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks) + + if os.path.islink(full_fs_path): + if upload_symlinks: + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + return [] + + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + + return sorted(_flatten_list(list(p.map(task, files)))) + + def upload_build_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-builds', keep_dirs_in_s3_path, upload_symlinks) + + def upload_test_folder_to_s3(self, folder_path, s3_folder_path): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-test-reports', True, True) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py new file mode 100644 index 00000000000..71978379099 --- /dev/null +++ b/tests/ci/style_check.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +from github import Github +from report import create_test_html_report +import shutil +import logging +import subprocess +import os +import csv +from s3_helper import S3Helper +import time +import json +from pr_info import PRInfo + +NAME = "Style Check (actions)" + + +def process_logs(s3_client, additional_logs, s3_path_prefix): + additional_urls = [] + for log_path in additional_logs: + if log_path: + additional_urls.append( + s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path))) + + return additional_urls + + +def process_result(result_folder): + test_results = [] + additional_files = [] + # Just upload all files from result_folder. + # If task provides processed results, then it's responsible for content of result_folder. + if os.path.exists(result_folder): + test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + additional_files = [os.path.join(result_folder, f) for f in test_files] + + status_path = os.path.join(result_folder, "check_status.tsv") + logging.info("Found test_results.tsv") + status = list(csv.reader(open(status_path, 'r'), delimiter='\t')) + if len(status) != 1 or len(status[0]) != 2: + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + try: + results_path = os.path.join(result_folder, "test_results.tsv") + test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t')) + if len(test_results) == 0: + raise Exception("Empty results") + + return state, description, test_results, additional_files + except Exception: + if state == "success": + state, description = "error", "Failed to read test_results.tsv" + return state, description, test_results, additional_files + +def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): + s3_path_prefix = f"{pr_number}/{commit_sha}/style_check" + additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + raw_log_url = additional_urls[0] + additional_urls.pop(0) + + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +def update_check_with_curl(check_id): + cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " + "--header 'authorization: Bearer {}' " + "--header 'Accept: application/vnd.github.v3+json' " + "--header 'content-type: application/json' " + "-d '{{\"name\" : \"hello-world-name\"}}'") + cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) + subprocess.check_call(cmd, shell=True) + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") + aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + + gh = Github(os.getenv("GITHUB_TOKEN")) + + images_path = os.path.join(temp_path, 'changed_images.json') + docker_image = 'clickhouse/style-test' + if os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r') as images_fd: + images = json.load(images_fd) + logging.info("Got images %s", images) + if 'clickhouse/style-test' in images: + docker_image += ':' + images['clickhouse/style-test'] + + logging.info("Got docker image %s", docker_image) + for i in range(10): + try: + subprocess.check_output(f"docker pull {docker_image}", shell=True) + break + except Exception as ex: + time.sleep(i * 3) + logging.info("Got execption pulling docker %s", ex) + else: + raise Exception(f"Cannot pull dockerhub for image {docker_image}") + + if not aws_secret_key_id or not aws_secret_key: + logging.info("No secrets, will not upload anything to S3") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + + subprocess.check_output(f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) + state, description, test_results, additional_files = process_result(temp_path) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) + print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=state, target_url=report_url) diff --git a/tests/ci/termination_lambda/Dockerfile b/tests/ci/termination_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/termination_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py new file mode 100644 index 00000000000..0b39cf73f25 --- /dev/null +++ b/tests/ci/termination_lambda/app.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners", headers=headers) + response.raise_for_status() + data = response.json() + print("Total runners", data['total_count']) + runners = data['runners'] + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_ratio, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) + + +def how_many_instances_to_kill(event_data): + data_array = event_data['CapacityToTerminate'] + to_kill_by_zone = {} + for av_zone in data_array: + zone_name = av_zone['AvailabilityZone'] + to_kill = av_zone['Capacity'] + if zone_name not in to_kill_by_zone: + to_kill_by_zone[zone_name] = 0 + + to_kill_by_zone[zone_name] += to_kill + return to_kill_by_zone + +def get_candidates_to_be_killed(event_data): + data_array = event_data['Instances'] + instances_by_zone = {} + for instance in data_array: + zone_name = instance['AvailabilityZone'] + instance_id = instance['InstanceId'] + if zone_name not in instances_by_zone: + instances_by_zone[zone_name] = [] + instances_by_zone[zone_name].append(instance_id) + + return instances_by_zone + +def delete_runner(access_token, runner): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) + response.raise_for_status() + print(f"Response code deleting {runner.name} is {response.status_code}") + return response.status_code == 204 + + +def main(github_secret_key, github_app_id, event): + print("Got event", json.dumps(event, sort_keys=True, indent=4)) + to_kill_by_zone = how_many_instances_to_kill(event) + instances_by_zone = get_candidates_to_be_killed(event) + + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + + runners = list_runners(access_token) + + to_delete_runners = [] + instances_to_kill = [] + for zone in to_kill_by_zone: + num_to_kill = to_kill_by_zone[zone] + candidates = instances_by_zone[zone] + if num_to_kill > len(candidates): + raise Exception(f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}") + + delete_for_av = [] + for candidate in candidates: + if candidate not in set([runner.name for runner in runners]): + print(f"Candidate {candidate} was not in runners list, simply delete it") + instances_to_kill.append(candidate) + + for candidate in candidates: + if len(delete_for_av) + len(instances_to_kill) == num_to_kill: + break + if candidate in instances_to_kill: + continue + + for runner in runners: + if runner.name == candidate: + if not runner.busy: + print(f"Runner {runner.name} is not busy and can be deleted from AV {zone}") + delete_for_av.append(runner) + else: + print(f"Runner {runner.name} is busy, not going to delete it") + break + + if len(delete_for_av) < num_to_kill: + print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}") + to_delete_runners += delete_for_av + + print("Got instances to kill: ", ', '.join(instances_to_kill)) + print("Going to delete runners:", ', '.join([runner.name for runner in to_delete_runners])) + for runner in to_delete_runners: + if delete_runner(access_token, runner): + print(f"Runner {runner.name} successfuly deleted from github") + instances_to_kill.append(runner.name) + else: + print(f"Cannot delete {runner.name} from github") + + ## push metrics + #runners = list_runners(access_token) + #push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + + response = { + "InstanceIDs": instances_to_kill + } + print(response) + return response + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + return main(private_key, app_id, event) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + sample_event = { + "AutoScalingGroupARN": "arn:aws:autoscaling:us-east-1::autoScalingGroup:d4738357-2d40-4038-ae7e-b00ae0227003:autoScalingGroupName/my-asg", + "AutoScalingGroupName": "my-asg", + "CapacityToTerminate": [ + { + "AvailabilityZone": "us-east-1b", + "Capacity": 1, + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "Capacity": 2, + "InstanceMarketOption": "OnDemand" + } + ], + "Instances": [ + { + "AvailabilityZone": "us-east-1b", + "InstanceId": "i-08d0b3c1a137e02a5", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + } + ], + "Cause": "SCALE_IN" + } + + main(private_key, args.app_id, sample_event) diff --git a/tests/ci/termination_lambda/requirements.txt b/tests/ci/termination_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/termination_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/token_lambda/Dockerfile b/tests/ci/token_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/token_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/token_lambda/app.py b/tests/ci/token_lambda/app.py new file mode 100644 index 00000000000..731d6c040de --- /dev/null +++ b/tests/ci/token_lambda/app.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_runner_registration_token(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post("https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + + +def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runner_registration_token = get_runner_registration_token(access_token) + + if push_to_ssm: + import boto3 + + print("Trying to put params into ssm manager") + client = boto3.client('ssm') + client.put_parameter( + Name=ssm_parameter_name, + Value=runner_registration_token, + Type='SecureString', + Overwrite=True) + else: + print("Not push token to AWS Parameter Store, just print:", runner_registration_token) + + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True, 'github_runner_registration_token') + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get new token from github to add runners') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-ssm', action='store_true', help='Store received token in parameter store') + parser.add_argument('--ssm-parameter-name', default='github_runner_registration_token', help='AWS paramater store parameter name') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name) diff --git a/tests/ci/token_lambda/requirements.txt b/tests/ci/token_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/token_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/worker/init.sh b/tests/ci/worker/init.sh new file mode 100644 index 00000000000..2f6638f14b5 --- /dev/null +++ b/tests/ci/worker/init.sh @@ -0,0 +1,20 @@ +#!/usr/bin/bash +set -euo pipefail + +echo "Running init script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_HOME=/home/ubuntu/actions-runner + +echo "Receiving token" +export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` +export RUNNER_URL="https://github.com/ClickHouse" +# Funny fact, but metadata service has fixed IP +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` + +cd $RUNNER_HOME + +echo "Going to configure runner" +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work + +echo "Run" +sudo -u ubuntu ./run.sh diff --git a/tests/ci/worker/ubuntu_ami.sh b/tests/ci/worker/ubuntu_ami.sh new file mode 100644 index 00000000000..2609c1a69f3 --- /dev/null +++ b/tests/ci/worker/ubuntu_ami.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "Running prepare script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_VERSION=2.283.1 +export RUNNER_HOME=/home/ubuntu/actions-runner + +apt-get update + +apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + python3-pip \ + unzip + +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null + +apt-get update + +apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io + +usermod -aG docker ubuntu + +pip install boto3 pygithub requests urllib3 unidiff + +mkdir -p $RUNNER_HOME && cd $RUNNER_HOME + +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz + +tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +./bin/installdependencies.sh + +chown -R ubuntu:ubuntu $RUNNER_HOME + +cd /home/ubuntu +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip awscliv2.zip +./aws/install + +rm -rf /home/ubuntu/awscliv2.zip /home/ubuntu/aws diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index 4a9afc348cf..bf25f1fa484 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -985,10 +985,10 @@ "RANGE" "rank" "rankCorr" -"readWktMultiPolygon" -"readWktPoint" -"readWktPolygon" -"readWktRing" +"readWKTMultiPolygon" +"readWKTPoint" +"readWKTPolygon" +"readWKTRing" "REAL" "REFRESH" "regexpQuoteMeta" @@ -1177,6 +1177,7 @@ "sumWithOverflow" "SUSPEND" "svg" +"SVG" "SYNC" "synonyms" "SYNTAX" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index fb35375f284..722e931dc09 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -52,6 +52,7 @@ "h3GetResolution" "h3EdgeLengthM" "svg" +"SVG" "equals" "geohashesInBox" "polygonsIntersectionCartesian" @@ -114,7 +115,7 @@ "replaceOne" "emptyArrayInt32" "extract" -"readWktPolygon" +"readWKTPolygon" "notILike" "geohashDecode" "toModifiedJulianDay" @@ -164,7 +165,7 @@ "lessOrEquals" "subtractQuarters" "ngramSearch" -"readWktRing" +"readWKTRing" "trimRight" "endsWith" "ngramDistanceCaseInsensitive" @@ -713,13 +714,13 @@ "s2RectContains" "toDate" "regexpQuoteMeta" -"readWktMultiPolygon" +"readWKTMultiPolygon" "emptyArrayString" "bitmapOr" "cutWWW" "emptyArrayInt8" "less" -"readWktPoint" +"readWKTPoint" "reinterpretAsDateTime" "notEquals" "geoToS2" diff --git a/tests/queries/0_stateless/01300_read_wkt.sql b/tests/queries/0_stateless/01300_read_wkt.sql index 8121bdf6084..1995c5153d7 100644 --- a/tests/queries/0_stateless/01300_read_wkt.sql +++ b/tests/queries/0_stateless/01300_read_wkt.sql @@ -1,14 +1,14 @@ -SELECT readWktPoint('POINT(0 0)'); -SELECT readWktPolygon('POLYGON((1 0,10 0,10 10,0 10,1 0))'); -SELECT readWktPolygon('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))'); -SELECT readWktMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))'); +SELECT readWKTPoint('POINT(0 0)'); +SELECT readWKTPolygon('POLYGON((1 0,10 0,10 10,0 10,1 0))'); +SELECT readWKTPolygon('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))'); +SELECT readWKTMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))'); DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); INSERT INTO geo VALUES ('POINT(0 0)', 1); INSERT INTO geo VALUES ('POINT(1 0)', 2); INSERT INTO geo VALUES ('POINT(2 0)', 3); -SELECT readWktPoint(s) FROM geo ORDER BY id; +SELECT readWKTPoint(s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); @@ -18,13 +18,13 @@ INSERT INTO geo VALUES ('POLYGON((2 0,10 0,10 10,0 10,2 0))', 3); INSERT INTO geo VALUES ('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))', 4); INSERT INTO geo VALUES ('POLYGON((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4))', 5); INSERT INTO geo VALUES ('POLYGON((1 0,10 0,10 10,0 10,1 0),(4 4,5 4,5 5,4 5,4 4))', 6); -SELECT readWktPolygon(s) FROM geo ORDER BY id; +SELECT readWKTPolygon(s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); INSERT INTO geo VALUES ('MULTIPOLYGON(((1 0,10 0,10 10,0 10,1 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 1); INSERT INTO geo VALUES ('MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 2); INSERT INTO geo VALUES ('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 3); -SELECT readWktMultiPolygon(s) FROM geo ORDER BY id; +SELECT readWKTMultiPolygon(s) FROM geo ORDER BY id; DROP TABLE geo; diff --git a/tests/queries/0_stateless/01300_svg.sql b/tests/queries/0_stateless/01300_svg.sql index a1deb1745c3..cf794f2190b 100644 --- a/tests/queries/0_stateless/01300_svg.sql +++ b/tests/queries/0_stateless/01300_svg.sql @@ -1,50 +1,50 @@ -SELECT svg((0., 0.)); -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)]); -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]]); -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]]); -SELECT svg((0., 0.), 'b'); -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b'); -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], 'b'); -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b'); +SELECT SVG((0., 0.)); +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)]); +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]]); +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]]); +SELECT SVG((0., 0.), 'b'); +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b'); +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], 'b'); +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b'); DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Tuple(Float64, Float64), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ((0., 0.), 'b', 1); INSERT INTO geo VALUES ((1., 0.), 'c', 2); INSERT INTO geo VALUES ((2., 0.), 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg((0., 0.), s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG((0., 0.), s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Tuple(Float64, Float64)), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b', 1); INSERT INTO geo VALUES ([(1., 0.), (10, 0), (10, 10), (0, 10)], 'c', 2); INSERT INTO geo VALUES ([(2., 0.), (10, 0), (10, 10), (0, 10)], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Array(Tuple(Float64, Float64))), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'b', 1); INSERT INTO geo VALUES ([[(1., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'c', 2); INSERT INTO geo VALUES ([[(2., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Array(Array(Tuple(Float64, Float64)))), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b', 1); INSERT INTO geo VALUES ([[[(1., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'c', 2); INSERT INTO geo VALUES ([[[(2., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE geo; diff --git a/tests/queries/0_stateless/01339_client_unrecognized_option.sh b/tests/queries/0_stateless/01339_client_unrecognized_option.sh index f88d890f33c..00c153ec915 100755 --- a/tests/queries/0_stateless/01339_client_unrecognized_option.sh +++ b/tests/queries/0_stateless/01339_client_unrecognized_option.sh @@ -5,14 +5,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT xyzgarbage 2>&1 | grep -q "Code: 552" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT xyzgarbage 2>&1 | grep -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT -xyzgarbage 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT --xyzgarbage 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT --xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "Code: 552" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external -xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external -xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh index d63765fc179..c1d5c357308 100755 --- a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh @@ -10,6 +10,6 @@ rm -rf "${WORKING_FOLDER_01527}" mkdir -p "${WORKING_FOLDER_01527}" # OPTIMIZE was crashing due to lack of temporary volume in local -${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" -- --path="${WORKING_FOLDER_01527}" +${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" --path="${WORKING_FOLDER_01527}" rm -rf "${WORKING_FOLDER_01527}" diff --git a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh index 8684582ad45..95ecbf09cf5 100755 --- a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh +++ b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh @@ -36,10 +36,10 @@ ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WO EOF ## feed the table -${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" -- --path="${WORKING_FOLDER_01528}" +${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" --path="${WORKING_FOLDER_01528}" ## check the parts were created -${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" --path="${WORKING_FOLDER_01528}" ################# @@ -49,36 +49,36 @@ cat < "${WORKING_FOLDER_01528}/metadata/local/stdin.sql" ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin); EOF -cat <&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp +touch $CAPN_PROTO_FILE + +SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +CLIENT_SCHEMADIR=$CURDIR/format_schemas +SERVER_SCHEMADIR=test_02030 +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_simple_types"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_simple_types (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixed FixedString(5), data String, date Date, datetime DateTime, datetime64 DateTime64(3)) ENGINE=Memory" +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types values (-1, 1, -1000, 1000, -10000000, 1000000, -1000000000, 1000000000, 123.123, 123123123.123123123, 'Some string', 'fixed', 'Some data', '2000-01-06', '2000-06-01 19:42:42', '2000-04-01 11:21:33.123')" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_simple_types" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_tuples" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_tuples (value UInt64, tuple1 Tuple(one UInt64, two Tuple(three UInt64, four UInt64)), tuple2 Tuple(nested1 Tuple(nested2 Tuple(x UInt64)))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples VALUES (1, (2, (3, 4)), (((5))))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_tuples" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_lists" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_lists (value UInt64, list1 Array(UInt64), list2 Array(Array(Array(UInt64)))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists VALUES (1, [1, 2, 3], [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], []], []])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_lists" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_lists_and_tuples" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_lists_and_tuples (value UInt64, nested Tuple(a Tuple(b UInt64, c Array(Array(UInt64))), d Array(Tuple(e Array(Array(Tuple(f UInt64, g UInt64))), h Array(Tuple(k Array(UInt64))))))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples VALUES (1, ((2, [[3, 4], [5, 6], []]), [([[(7, 8), (9, 10)], [(11, 12), (13, 14)], []], [([15, 16, 17]), ([])])]))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_lists_and_tuples" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_table" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_table (nested Nested(value UInt64, array Array(UInt64), tuple Tuple(one UInt64, two UInt64))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table VALUES ([1, 2, 3], [[4, 5, 6], [], [7, 8]], [(9, 10), (11, 12), (13, 14)])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_table" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nullable" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nullable (nullable Nullable(UInt64), array Array(Nullable(UInt64)), tuple Tuple(nullable Nullable(UInt64))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable VALUES (1, [1, Null, 2], (1)), (Null, [Null, Null, 42], (Null))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nullable" + + +$CLICKHOUSE_CLIENT --query="SELECT CAST(number, 'Enum(\'one\' = 0, \'two\' = 1, \'tHrEe\' = 2)') AS value FROM numbers(3) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_enum:Message'" > $CAPN_PROTO_FILE + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 1, \'two\' = 2, \'tHrEe\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'oNe\' = 1, \'tWo\' = 2, \'threE\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names_case_insensitive'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'first\' = 0, \'second\' = 1, \'third\' = 2)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_values'" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 0, \'two\' = 1, \'three\' = 2)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 0, \'two\' = 1, \'tHrEe\' = 2, \'four\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 1, \'two\' = 2, \'tHrEe\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_values'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'first\' = 1, \'two\' = 2, \'three\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names_case_insensitive'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_low_cardinality" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_low_cardinality (lc1 LowCardinality(String), lc2 LowCardinality(Nullable(String)), lc3 Array(LowCardinality(Nullable(String)))) ENGINE=Memory" +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality VALUES ('one', 'two', ['one', Null, 'two', Null]), ('two', Null, [Null])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_low_cardinality" + + +$CLICKHOUSE_CLIENT --query="SELECT CAST(tuple(number, tuple(number + 1, tuple(number + 2))), 'Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') AS a FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_tuples:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a_b UInt64, a_c_d UInt64, a_c_e_f UInt64') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" + + +$CLICKHOUSE_CLIENT --query="SELECT number AS a_b, number + 1 AS a_c_d, number + 2 AS a_c_e_f FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_tuples:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(bb UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(ff UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'string String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL'; + + +$CLICKHOUSE_CLIENT --query="SELECT number AS uint64 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Array(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Enum(\'one\' = 1)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Tuple(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Nullable(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Int32') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + + +$CLICKHOUSE_CLIENT --query="SELECT number AS a, toString(number) as b FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_unnamed_union:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable2 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +rm $CAPN_PROTO_FILE +rm -rf ${SCHEMADIR:?}/${SERVER_SCHEMADIR:?} diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect new file mode 100755 index 00000000000..17b98b077d5 --- /dev/null +++ b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect @@ -0,0 +1,53 @@ +#!/usr/bin/expect -f +# Tags: no-fasttest + +log_user 0 +set timeout 20 +match_max 100000 + +# A default timeout action is to fail +expect_after { + timeout { + exit 1 + } + +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" +expect ":) " + +send -- "drop table if exists t\r" +expect "Ok." + +send -- "create table t engine=MergeTree() order by tuple() as select 1\r" +expect "Ok." + +send -- "set optimize_on_insert = 0\r" +expect "Ok." + +send -- "drop table if exists tt\r" +expect "Ok." + +send -- "create table tt (date Date, version UInt64, val UInt64) engine = ReplacingMergeTree(version) partition by date order by date\r" +expect "Ok." + +send -- "insert into tt values ('2020-01-01', 2, 2), ('2020-01-01', 1, 1)\r" +expect "Ok." + +send -- "insert into tt values ('2020-01-01', 0, 0)\r" +expect "Ok." + +send -- "OPTIMIZE TABLE tt\r" +expect "Ok." + +send -- "select * from tt order by version format TSV\r" +expect "2020-01-01\t2\t2" + +send -- "drop table tt\r" +expect "Ok." +send -- "drop table t\r" +expect "Ok." + +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02050_client_profile_events.reference b/tests/queries/0_stateless/02050_client_profile_events.reference new file mode 100644 index 00000000000..00fc3b5d06a --- /dev/null +++ b/tests/queries/0_stateless/02050_client_profile_events.reference @@ -0,0 +1,4 @@ +0 +SelectedRows: 131010 (increment) +OK +OK diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh new file mode 100755 index 00000000000..5c3887cf5fb --- /dev/null +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# do not print any ProfileEvents packets +$CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows' +# print only last +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5) format Null' |& grep -o 'SelectedRows: .*$' +# print everything +test "$($CLICKHOUSE_CLIENT --print-profile-events -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL +# print each 100 ms +test "$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=100 -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.reference b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh new file mode 100755 index 00000000000..dfdc71e0f0b --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# See 01658_read_file_to_string_column.sh +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +FILE_PATH="${user_files_path}/file/" +mkdir -p ${FILE_PATH} +chmod 777 ${FILE_PATH} + +FILE="test_symlink_${CLICKHOUSE_DATABASE}" + +symlink_path=${FILE_PATH}/${FILE} +file_path=$CUR_DIR/${FILE} + +touch ${file_path} +ln -s ${file_path} ${symlink_path} +chmod ugo+w ${symlink_path} + +function cleanup() +{ + rm ${symlink_path} ${file_path} +} +trap cleanup EXIT + +${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'"; +${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')"; + diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference new file mode 100644 index 00000000000..d7d3ee8f362 --- /dev/null +++ b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference @@ -0,0 +1,8 @@ +1 +1 +10 +10 +100 +100 +10000 +10000 diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 new file mode 100644 index 00000000000..465aa22beb3 --- /dev/null +++ b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 @@ -0,0 +1,19 @@ +-- Tags: long + +{% for rows_in_table in [1, 10, 100, 10000] %} +{% for wide in [0, 100000000] %} +drop table if exists data_02052_{{ rows_in_table }}_wide{{ wide }}; +create table data_02052_{{ rows_in_table }}_wide{{ wide }} (key Int, value String) +engine=MergeTree() +order by key +settings + min_bytes_for_wide_part={{ wide }} +as select number, repeat(toString(number), 5) from numbers({{ rows_in_table }}); + +-- avoid any optimizations with ignore(*) +select count(ignore(*)) from data_02052_{{ rows_in_table }}_wide{{ wide }} settings max_read_buffer_size=1, max_threads=1; +select count(ignore(*)) from data_02052_{{ rows_in_table }}_wide{{ wide }} settings max_read_buffer_size=0, max_threads=1; -- { serverError CANNOT_READ_ALL_DATA } + +drop table data_02052_{{ rows_in_table }}_wide{{ wide }}; +{% endfor %} +{% endfor %} diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference new file mode 100644 index 00000000000..c4c0901b9df --- /dev/null +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference @@ -0,0 +1,12 @@ +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh new file mode 100755 index 00000000000..d37155e8506 --- /dev/null +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# shellcheck disable=SC2206 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -f 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} --query 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + + +${CLICKHOUSE_CLIENT} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --j 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --query 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + + + diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference new file mode 100644 index 00000000000..993dd9b1cde --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference @@ -0,0 +1,11 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql new file mode 100644 index 00000000000..32f7f63f6d0 --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql @@ -0,0 +1,14 @@ +SELECT 'aaaabb ' == trim(leading 'b ' FROM 'b aaaabb ') x; +SELECT 'b aaaa' == trim(trailing 'b ' FROM 'b aaaabb ') x; +SELECT 'aaaa' == trim(both 'b ' FROM 'b aaaabb ') x; + +SELECT '1' == replaceRegexpAll(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll(',,1', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll('1,,', '^[,]*|[,]*$', '') x; + +SELECT '1,,' == replaceRegexpOne(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpOne(',,1', '^[,]*|[,]*$', '') x; +SELECT '1,,' == replaceRegexpOne('1,,', '^[,]*|[,]*$', '') x; + +SELECT '5935,5998,6014' == trim(BOTH ', ' FROM '5935,5998,6014, ') x; +SELECT '5935,5998,6014' == replaceRegexpAll('5935,5998,6014, ', concat('^[', regexpQuoteMeta(', '), ']*|[', regexpQuoteMeta(', '), ']*$'), '') AS x; diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp new file mode 100644 index 00000000000..f033b177a45 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp @@ -0,0 +1,13 @@ +@0x9ef128e10a8010b2; + +struct Message +{ + value @0 : EnumType; + + enum EnumType + { + one @0; + two @1; + tHrEe @2; + } +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp new file mode 100644 index 00000000000..a027692e4bc --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp @@ -0,0 +1,23 @@ +@0xd8dd7b35452d1c4c; + +struct FakeNullable1 +{ + union + { + value @0 : Text; + null @1 : Void; + trash @2 : Text; + } +} + +struct FakeNullable2 +{ + value @0 : Text; + null @1 : Void; +} + +struct Message +{ + nullable1 @0 : FakeNullable1; + nullable2 @1 : FakeNullable2; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp new file mode 100644 index 00000000000..78fe3cf551e --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp @@ -0,0 +1,8 @@ +@0x9ef128e10a8010b7; + +struct Message +{ + value @0 : UInt64; + list1 @1 : List(UInt64); + list2 @2 : List(List(List(UInt64))); +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp new file mode 100644 index 00000000000..0958889f0d8 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp @@ -0,0 +1,17 @@ +@0x9ef128e10a8010b7; + +struct NullableText +{ + union + { + value @0 : Text; + null @1 : Void; + } +} + +struct Message +{ + lc1 @0 : Text; + lc2 @1 : NullableText; + lc3 @2 : List(NullableText); +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp new file mode 100644 index 00000000000..11fa99f62f5 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp @@ -0,0 +1,36 @@ +@0x9ef128e10a8010b2; + +struct Nested1 +{ + b @0 : UInt64; + c @1 : List(List(UInt64)); +} + +struct Nested2 +{ + e @0 : List(List(Nested3)); + h @1 : List(Nested4); +} + +struct Nested3 +{ + f @0 : UInt64; + g @1 : UInt64; +} + +struct Nested4 +{ + k @0 : List(UInt64); +} + +struct Nested +{ + a @0 : Nested1; + d @1 : List(Nested2); +} + +struct Message +{ + value @0 : UInt64; + nested @1 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp new file mode 100644 index 00000000000..42f17246d58 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp @@ -0,0 +1,20 @@ +@0x9ef128e10a8010b3; + + +struct Nested1 +{ + one @0 : UInt64; + two @1 : UInt64; +} + +struct Nested +{ + value @0 : List(UInt64); + array @1 : List(List(UInt64)); + tuple @2 : List(Nested1); +} + +struct Message +{ + nested @0 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp new file mode 100644 index 00000000000..161c1bbaea6 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp @@ -0,0 +1,23 @@ +@0x9ef128e12a8010b2; + +struct Nested1 +{ + d @0 : UInt64; + e @1 : Nested2; +} + +struct Nested2 +{ + f @0 : UInt64; +} + +struct Nested +{ + b @0 : UInt64; + c @1 : Nested1; +} + +struct Message +{ + a @0 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp new file mode 100644 index 00000000000..41254911710 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp @@ -0,0 +1,22 @@ +@0x9ef128e10a8010b2; + +struct NullableUInt64 +{ + union + { + value @0 : UInt64; + null @1 : Void; + } +} + +struct Tuple +{ + nullable @0 : NullableUInt64; +} + +struct Message +{ + nullable @0 : NullableUInt64; + array @1 : List(NullableUInt64); + tuple @2 : Tuple; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp new file mode 100644 index 00000000000..a85bbbc511b --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp @@ -0,0 +1,21 @@ +@0xd9dd7b35452d1c4f; + +struct Message +{ + int8 @0 : Int8; + uint8 @1 : UInt8; + int16 @2 : Int16; + uint16 @3 : UInt16; + int32 @4 : Int32; + uint32 @5 : UInt32; + int64 @6 : Int64; + uint64 @7 : UInt64; + float32 @8 : Float32; + float64 @9 : Float64; + string @10 : Text; + fixed @11 : Text; + data @12 : Data; + date @13 : UInt16; + datetime @14 : UInt32; + datetime64 @15 : Int64; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp new file mode 100644 index 00000000000..21c3f0eb2e1 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp @@ -0,0 +1,35 @@ +@0x9ef128e10a8010b8; + +struct Nested5 +{ + x @0 : UInt64; +} + +struct Nested4 +{ + nested2 @0 : Nested5; +} + +struct Nested3 +{ + nested1 @0 : Nested4; +} + +struct Nested2 +{ + three @0 : UInt64; + four @1 : UInt64; +} + +struct Nested1 +{ + one @0 : UInt64; + two @1 : Nested2; +} + +struct Message +{ + value @0 : UInt64; + tuple1 @1 : Nested1; + tuple2 @2 : Nested3; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp new file mode 100644 index 00000000000..9fb5e37bfea --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp @@ -0,0 +1,10 @@ +@0xd8dd7b35452d1c4f; + +struct Message +{ + union + { + a @0 : UInt64; + b @1 : Text; + } +} diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 6916139f952..7137d04a568 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -70,7 +70,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | xargs xmllint --noout --nonet # FIXME: for now only clickhouse-test -pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test +pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | @@ -162,7 +162,7 @@ find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | gre find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL" # There shouldn't be any docker containers outside docker directory -find $ROOT_PATH -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" +find $ROOT_PATH -not -path $ROOT_PATH'/tests/ci*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" # There shouldn't be any docker compose files outside docker directory #find $ROOT_PATH -not -path $ROOT_PATH'/tests/testflows*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name '*compose*.yml' -type f 2>/dev/null | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to docker directory:"