mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Merge branch 'master' into update_keeper_config
This commit is contained in:
commit
35b5cf23cb
66
.github/workflows/main.yml
vendored
Normal file
66
.github/workflows/main.yml
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
name: Ligthweight GithubActions
|
||||
on: # yamllint disable-line rule:truthy
|
||||
pull_request:
|
||||
types:
|
||||
- labeled
|
||||
- unlabeled
|
||||
- synchronize
|
||||
- reopened
|
||||
- opened
|
||||
branches:
|
||||
- master
|
||||
jobs:
|
||||
CheckLabels:
|
||||
runs-on: [self-hosted]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Labels check
|
||||
run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
DockerHubPush:
|
||||
needs: CheckLabels
|
||||
runs-on: [self-hosted]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Images check
|
||||
run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py
|
||||
env:
|
||||
YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }}
|
||||
YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }}
|
||||
- name: Upload images files to artifacts
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/docker_images_check/changed_images.json
|
||||
StyleCheck:
|
||||
needs: DockerHubPush
|
||||
runs-on: [self-hosted]
|
||||
steps:
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/style_check
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Style Check
|
||||
env:
|
||||
YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }}
|
||||
YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py
|
||||
FinishCheck:
|
||||
needs: [StyleCheck, DockerHubPush, CheckLabels]
|
||||
runs-on: [self-hosted]
|
||||
steps:
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Finish label
|
||||
run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -140,7 +140,7 @@
|
||||
url = https://github.com/ClickHouse-Extras/libc-headers.git
|
||||
[submodule "contrib/replxx"]
|
||||
path = contrib/replxx
|
||||
url = https://github.com/ClickHouse-Extras/replxx.git
|
||||
url = https://github.com/AmokHuginnsson/replxx.git
|
||||
[submodule "contrib/avro"]
|
||||
path = contrib/avro
|
||||
url = https://github.com/ClickHouse-Extras/avro.git
|
||||
|
@ -177,6 +177,10 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
/// bind C-p/C-n to history-previous/history-next like readline.
|
||||
rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); });
|
||||
rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); });
|
||||
|
||||
/// bind C-j to ENTER action.
|
||||
rx.bind_key(Replxx::KEY::control('J'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); });
|
||||
|
||||
/// By default COMPLETE_NEXT/COMPLETE_PREV was binded to C-p/C-n, re-bind
|
||||
/// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but
|
||||
/// it also binded to M-p/M-n).
|
||||
|
@ -34,8 +34,6 @@ endif()
|
||||
if (CAPNP_LIBRARIES)
|
||||
set (USE_CAPNP 1)
|
||||
elseif(NOT MISSING_INTERNAL_CAPNP_LIBRARY)
|
||||
add_subdirectory(contrib/capnproto-cmake)
|
||||
|
||||
set (CAPNP_LIBRARIES capnpc)
|
||||
set (USE_CAPNP 1)
|
||||
set (USE_INTERNAL_CAPNP_LIBRARY 1)
|
||||
|
88
contrib/CMakeLists.txt
vendored
88
contrib/CMakeLists.txt
vendored
@ -1,16 +1,5 @@
|
||||
# Third-party libraries may have substandard code.
|
||||
|
||||
# Put all targets defined here and in added subfolders under "contrib/" folder in GUI-based IDEs by default.
|
||||
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they will
|
||||
# appear not in "contrib/" as originally planned here.
|
||||
get_filename_component (_current_dir_name "${CMAKE_CURRENT_LIST_DIR}" NAME)
|
||||
if (CMAKE_FOLDER)
|
||||
set (CMAKE_FOLDER "${CMAKE_FOLDER}/${_current_dir_name}")
|
||||
else ()
|
||||
set (CMAKE_FOLDER "${_current_dir_name}")
|
||||
endif ()
|
||||
unset (_current_dir_name)
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
|
||||
|
||||
@ -49,6 +38,10 @@ add_subdirectory (replxx-cmake)
|
||||
add_subdirectory (unixodbc-cmake)
|
||||
add_subdirectory (nanodbc-cmake)
|
||||
|
||||
if (USE_INTERNAL_CAPNP_LIBRARY AND NOT MISSING_INTERNAL_CAPNP_LIBRARY)
|
||||
add_subdirectory(capnproto-cmake)
|
||||
endif ()
|
||||
|
||||
if (ENABLE_FUZZING)
|
||||
add_subdirectory (libprotobuf-mutator-cmake)
|
||||
endif()
|
||||
@ -352,3 +345,76 @@ endif()
|
||||
if (USE_S2_GEOMETRY)
|
||||
add_subdirectory(s2geometry-cmake)
|
||||
endif()
|
||||
|
||||
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
|
||||
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
|
||||
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,
|
||||
# instead of controlling it via CMAKE_FOLDER.
|
||||
|
||||
function (ensure_target_rooted_in _target _folder)
|
||||
# Skip INTERFACE library targets, since FOLDER property is not available for them.
|
||||
get_target_property (_target_type "${_target}" TYPE)
|
||||
if (_target_type STREQUAL "INTERFACE_LIBRARY")
|
||||
return ()
|
||||
endif ()
|
||||
|
||||
# Read the original FOLDER property value, if any.
|
||||
get_target_property (_folder_prop "${_target}" FOLDER)
|
||||
|
||||
# Normalize that value, so we avoid possible repetitions in folder names.
|
||||
|
||||
if (NOT _folder_prop)
|
||||
set (_folder_prop "")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_FOLDER AND _folder_prop MATCHES "^${CMAKE_FOLDER}/(.*)\$")
|
||||
set (_folder_prop "${CMAKE_MATCH_1}")
|
||||
endif ()
|
||||
|
||||
if (_folder AND _folder_prop MATCHES "^${_folder}/(.*)\$")
|
||||
set (_folder_prop "${CMAKE_MATCH_1}")
|
||||
endif ()
|
||||
|
||||
if (_folder)
|
||||
set (_folder_prop "${_folder}/${_folder_prop}")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_FOLDER)
|
||||
set (_folder_prop "${CMAKE_FOLDER}/${_folder_prop}")
|
||||
endif ()
|
||||
|
||||
# Set the updated FOLDER property value back.
|
||||
set_target_properties ("${_target}" PROPERTIES FOLDER "${_folder_prop}")
|
||||
endfunction ()
|
||||
|
||||
function (ensure_own_targets_are_rooted_in _dir _folder)
|
||||
get_directory_property (_targets DIRECTORY "${_dir}" BUILDSYSTEM_TARGETS)
|
||||
foreach (_target IN LISTS _targets)
|
||||
ensure_target_rooted_in ("${_target}" "${_folder}")
|
||||
endforeach ()
|
||||
endfunction ()
|
||||
|
||||
function (ensure_all_targets_are_rooted_in _dir _folder)
|
||||
ensure_own_targets_are_rooted_in ("${_dir}" "${_folder}")
|
||||
|
||||
get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES)
|
||||
foreach (_sub_dir IN LISTS _sub_dirs)
|
||||
ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_folder}")
|
||||
endforeach ()
|
||||
endfunction ()
|
||||
|
||||
function (organize_ide_folders_2_level _dir)
|
||||
get_filename_component (_dir_name "${_dir}" NAME)
|
||||
ensure_own_targets_are_rooted_in ("${_dir}" "${_dir_name}")
|
||||
|
||||
# Note, that we respect only first two levels of nesting, we don't want to
|
||||
# reorganize target folders further within each third-party dir.
|
||||
|
||||
get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES)
|
||||
foreach (_sub_dir IN LISTS _sub_dirs)
|
||||
get_filename_component (_sub_dir_name "${_sub_dir}" NAME)
|
||||
ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_dir_name}/${_sub_dir_name}")
|
||||
endforeach ()
|
||||
endfunction ()
|
||||
|
||||
organize_ide_folders_2_level ("${CMAKE_CURRENT_LIST_DIR}")
|
||||
|
2
contrib/capnproto
vendored
2
contrib/capnproto
vendored
@ -1 +1 @@
|
||||
Subproject commit a00ccd91b3746ef2ab51d40fe3265829949d1ace
|
||||
Subproject commit c8189ec3c27dacbd4a3288e682473010e377f593
|
@ -45,6 +45,7 @@ set (CAPNP_SRCS
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/serialize-packed.c++"
|
||||
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/schema.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/stream.capnp.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/schema-loader.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/dynamic.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/stringify.c++"
|
||||
@ -63,6 +64,7 @@ set (CAPNPC_SRCS
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/compiler/lexer.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/compiler/grammar.capnp.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/compiler/parser.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/compiler/generics.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/compiler/node-translator.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/compiler/compiler.c++"
|
||||
"${CAPNPROTO_SOURCE_DIR}/capnp/schema-parser.c++"
|
||||
|
@ -47,6 +47,7 @@ set(SRCS
|
||||
)
|
||||
|
||||
add_library(cxx ${SRCS})
|
||||
set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")
|
||||
|
||||
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
|
||||
target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
|
||||
|
@ -22,6 +22,7 @@ set(SRCS
|
||||
)
|
||||
|
||||
add_library(cxxabi ${SRCS})
|
||||
set_target_properties(cxxabi PROPERTIES FOLDER "contrib/libcxxabi-cmake")
|
||||
|
||||
# Third party library may have substandard code.
|
||||
target_compile_options(cxxabi PRIVATE -w)
|
||||
|
@ -39,6 +39,7 @@ set(LIBUNWIND_SOURCES
|
||||
${LIBUNWIND_ASM_SOURCES})
|
||||
|
||||
add_library(unwind ${LIBUNWIND_SOURCES})
|
||||
set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake")
|
||||
|
||||
target_include_directories(unwind SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBUNWIND_SOURCE_DIR}/include>)
|
||||
target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY)
|
||||
|
2
contrib/replxx
vendored
2
contrib/replxx
vendored
@ -1 +1 @@
|
||||
Subproject commit f97765df14f4a6236d69b8f14b53ef2051ebd95a
|
||||
Subproject commit b0c266c2d8a835784181e17292b421848c78c6b8
|
@ -1,16 +1,22 @@
|
||||
# docker build -t clickhouse/kerberized-hadoop .
|
||||
|
||||
FROM sequenceiq/hadoop-docker:2.7.0
|
||||
RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo
|
||||
RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo
|
||||
RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo
|
||||
|
||||
RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo && \
|
||||
sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo && \
|
||||
sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo
|
||||
|
||||
# https://community.letsencrypt.org/t/rhel-centos-6-openssl-client-compatibility-after-dst-root-ca-x3-expiration/161032/81
|
||||
RUN sed -i s/xMDkzMDE0MDExNVow/0MDkzMDE4MTQwM1ow/ /etc/pki/tls/certs/ca-bundle.crt
|
||||
|
||||
RUN yum clean all && \
|
||||
rpm --rebuilddb && \
|
||||
yum -y update && \
|
||||
yum -y install yum-plugin-ovl && \
|
||||
yum --quiet -y install krb5-workstation.x86_64
|
||||
|
||||
RUN cd /tmp && \
|
||||
curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
|
||||
curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \
|
||||
tar xzf commons-daemon-1.0.15-src.tar.gz && \
|
||||
cd commons-daemon-1.0.15-src/src/native/unix && \
|
||||
./configure && \
|
||||
|
@ -37,7 +37,9 @@ RUN set -x \
|
||||
|| echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \
|
||||
&& dpkg -i "${PKG_VERSION}.deb"
|
||||
|
||||
CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
|
||||
ENV CCACHE_DIR=/test_output/ccache
|
||||
|
||||
CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
|
||||
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \
|
||||
&& ninja re2_st clickhouse_grpc_protos \
|
||||
&& pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
|
||||
|
@ -1,5 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
# yaml check is not the best one
|
||||
|
||||
cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv
|
||||
./check-style -n |& tee /test_output/style_output.txt
|
||||
./check-typos |& tee /test_output/typos_output.txt
|
||||
|
@ -128,6 +128,8 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
|
||||
- `--history_file` — Path to a file containing command history.
|
||||
- `--param_<name>` — Value for a [query with parameters](#cli-queries-with-parameters).
|
||||
- `--hardware-utilization` — Print hardware utilization information in progress bar.
|
||||
- `--print-profile-events` – Print `ProfileEvents` packets.
|
||||
- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet).
|
||||
|
||||
Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled).
|
||||
|
||||
|
@ -25,9 +25,6 @@
|
||||
#endif
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/NetException.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include <Common/Config/configReadClient.h>
|
||||
#include "Common/MemoryTracker.h"
|
||||
@ -35,13 +32,11 @@
|
||||
#include <Core/QueryProcessingStage.h>
|
||||
#include <Client/TestHint.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <IO/UseSSL.h>
|
||||
|
||||
@ -51,9 +46,6 @@
|
||||
#include <Parsers/ASTUseQuery.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
|
||||
#include <Interpreters/InterpreterSetQuery.h>
|
||||
|
||||
@ -86,7 +78,6 @@ namespace ErrorCodes
|
||||
extern const int SYNTAX_ERROR;
|
||||
extern const int TOO_DEEP_RECURSION;
|
||||
extern const int NETWORK_ERROR;
|
||||
extern const int UNRECOGNIZED_ARGUMENTS;
|
||||
extern const int AUTHENTICATION_FAILED;
|
||||
}
|
||||
|
||||
@ -993,7 +984,7 @@ void Client::printHelpMessage(const OptionsDescription & options_description)
|
||||
}
|
||||
|
||||
|
||||
void Client::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
|
||||
void Client::addOptions(OptionsDescription & options_description)
|
||||
{
|
||||
/// Main commandline options related to client functionality and all parameters from Settings.
|
||||
options_description.main_description->add_options()
|
||||
@ -1050,14 +1041,6 @@ void Client::addAndCheckOptions(OptionsDescription & options_description, po::va
|
||||
(
|
||||
"types", po::value<std::string>(), "types"
|
||||
);
|
||||
|
||||
cmd_settings.addProgramOptions(options_description.main_description.value());
|
||||
/// Parse main commandline options.
|
||||
po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run();
|
||||
auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional);
|
||||
if (unrecognized_options.size() > 1)
|
||||
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[1]);
|
||||
po::store(parsed, options);
|
||||
}
|
||||
|
||||
|
||||
@ -1235,16 +1218,16 @@ int mainEntryClickHouseClient(int argc, char ** argv)
|
||||
client.init(argc, argv);
|
||||
return client.run();
|
||||
}
|
||||
catch (const boost::program_options::error & e)
|
||||
{
|
||||
std::cerr << "Bad arguments: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
catch (const DB::Exception & e)
|
||||
{
|
||||
std::cerr << DB::getExceptionMessage(e, false) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
catch (const boost::program_options::error & e)
|
||||
{
|
||||
std::cerr << "Bad arguments: " << e.what() << std::endl;
|
||||
return DB::ErrorCodes::BAD_ARGUMENTS;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << std::endl;
|
||||
|
@ -24,7 +24,7 @@ protected:
|
||||
String getName() const override { return "client"; }
|
||||
|
||||
void printHelpMessage(const OptionsDescription & options_description) override;
|
||||
void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override;
|
||||
void addOptions(OptionsDescription & options_description) override;
|
||||
void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options,
|
||||
const std::vector<Arguments> & external_tables_arguments) override;
|
||||
void processConfig() override;
|
||||
|
@ -1,8 +1,6 @@
|
||||
#include "LocalServer.h"
|
||||
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
#include <Poco/Util/HelpFormatter.h>
|
||||
#include <Poco/Util/OptionCallback.h>
|
||||
#include <Poco/String.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/NullChannel.h>
|
||||
@ -10,7 +8,6 @@
|
||||
#include <Storages/System/attachSystemTables.h>
|
||||
#include <Storages/System/attachInformationSchemaTables.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Interpreters/loadMetadata.h>
|
||||
#include <Interpreters/DatabaseCatalog.h>
|
||||
#include <base/getFQDNOrHostName.h>
|
||||
@ -20,17 +17,12 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/ClickHouseRevision.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Common/UnicodeBar.h>
|
||||
#include <Common/config_version.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <loggers/Loggers.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/UseSSL.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <base/ErrorHandlers.h>
|
||||
@ -42,9 +34,7 @@
|
||||
#include <Disks/registerDisks.h>
|
||||
#include <Formats/registerFormats.h>
|
||||
#include <boost/program_options/options_description.hpp>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <base/argsToConfig.h>
|
||||
#include <Common/TerminalSize.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <filesystem>
|
||||
|
||||
@ -512,19 +502,16 @@ void LocalServer::processConfig()
|
||||
|
||||
format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
|
||||
insert_format = "Values";
|
||||
|
||||
/// Setting value from cmd arg overrides one from config
|
||||
if (global_context->getSettingsRef().max_insert_block_size.changed)
|
||||
insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size;
|
||||
else
|
||||
insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size);
|
||||
|
||||
/// Skip networking
|
||||
|
||||
/// Sets external authenticators config (LDAP, Kerberos).
|
||||
global_context->setExternalAuthenticatorsConfig(config());
|
||||
|
||||
global_context->initializeBackgroundExecutors();
|
||||
|
||||
setupUsers();
|
||||
|
||||
/// Limit on total number of concurrently executing queries.
|
||||
@ -660,7 +647,7 @@ void LocalServer::printHelpMessage(const OptionsDescription & options_descriptio
|
||||
}
|
||||
|
||||
|
||||
void LocalServer::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
|
||||
void LocalServer::addOptions(OptionsDescription & options_description)
|
||||
{
|
||||
options_description.main_description->add_options()
|
||||
("database,d", po::value<std::string>(), "database")
|
||||
@ -678,11 +665,8 @@ void LocalServer::addAndCheckOptions(OptionsDescription & options_description, p
|
||||
("logger.level", po::value<std::string>(), "Log level")
|
||||
|
||||
("no-system-tables", "do not attach system tables (better startup time)")
|
||||
("path", po::value<std::string>(), "Storage path")
|
||||
;
|
||||
|
||||
cmd_settings.addProgramOptions(options_description.main_description.value());
|
||||
po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run();
|
||||
po::store(parsed, options);
|
||||
}
|
||||
|
||||
|
||||
@ -737,6 +721,17 @@ int mainEntryClickHouseLocal(int argc, char ** argv)
|
||||
app.init(argc, argv);
|
||||
return app.run();
|
||||
}
|
||||
catch (const DB::Exception & e)
|
||||
{
|
||||
std::cerr << DB::getExceptionMessage(e, false) << std::endl;
|
||||
auto code = DB::getCurrentExceptionCode();
|
||||
return code ? code : 1;
|
||||
}
|
||||
catch (const boost::program_options::error & e)
|
||||
{
|
||||
std::cerr << "Bad arguments: " << e.what() << std::endl;
|
||||
return DB::ErrorCodes::BAD_ARGUMENTS;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
|
||||
|
@ -40,7 +40,7 @@ protected:
|
||||
String getQueryTextPrefix() override;
|
||||
void printHelpMessage(const OptionsDescription & options_description) override;
|
||||
|
||||
void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override;
|
||||
void addOptions(OptionsDescription & options_description) override;
|
||||
void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options,
|
||||
const std::vector<Arguments> &) override;
|
||||
void processConfig() override;
|
||||
|
@ -922,7 +922,7 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
|
||||
/// Initialize background executors after we load default_profile config.
|
||||
/// This is needed to load proper values of background_pool_size etc.
|
||||
global_context->initializeBackgroundExecutors();
|
||||
global_context->initializeBackgroundExecutorsIfNeeded();
|
||||
|
||||
if (settings.async_insert_threads)
|
||||
global_context->setAsynchronousInsertQueue(std::make_shared<AsynchronousInsertQueue>(
|
||||
|
@ -71,6 +71,7 @@ namespace ErrorCodes
|
||||
extern const int UNEXPECTED_PACKET_FROM_SERVER;
|
||||
extern const int INVALID_USAGE_OF_INPUT;
|
||||
extern const int CANNOT_SET_SIGNAL_HANDLER;
|
||||
extern const int UNRECOGNIZED_ARGUMENTS;
|
||||
}
|
||||
|
||||
}
|
||||
@ -266,7 +267,7 @@ void ClientBase::onLogData(Block & block)
|
||||
{
|
||||
initLogsOutputStream();
|
||||
progress_indication.clearProgressOutput();
|
||||
logs_out_stream->write(block);
|
||||
logs_out_stream->writeLogs(block);
|
||||
logs_out_stream->flush();
|
||||
}
|
||||
|
||||
@ -668,39 +669,61 @@ void ClientBase::onEndOfStream()
|
||||
void ClientBase::onProfileEvents(Block & block)
|
||||
{
|
||||
const auto rows = block.rows();
|
||||
if (rows == 0 || !progress_indication.print_hardware_utilization)
|
||||
if (rows == 0)
|
||||
return;
|
||||
const auto & array_thread_id = typeid_cast<const ColumnUInt64 &>(*block.getByName("thread_id").column).getData();
|
||||
const auto & names = typeid_cast<const ColumnString &>(*block.getByName("name").column);
|
||||
const auto & host_names = typeid_cast<const ColumnString &>(*block.getByName("host_name").column);
|
||||
const auto & array_values = typeid_cast<const ColumnUInt64 &>(*block.getByName("value").column).getData();
|
||||
|
||||
const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds);
|
||||
const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds);
|
||||
|
||||
HostToThreadTimesMap thread_times;
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
if (progress_indication.print_hardware_utilization)
|
||||
{
|
||||
auto thread_id = array_thread_id[i];
|
||||
auto host_name = host_names.getDataAt(i).toString();
|
||||
if (thread_id != 0)
|
||||
progress_indication.addThreadIdToList(host_name, thread_id);
|
||||
auto event_name = names.getDataAt(i);
|
||||
auto value = array_values[i];
|
||||
if (event_name == user_time_name)
|
||||
const auto & array_thread_id = typeid_cast<const ColumnUInt64 &>(*block.getByName("thread_id").column).getData();
|
||||
const auto & names = typeid_cast<const ColumnString &>(*block.getByName("name").column);
|
||||
const auto & host_names = typeid_cast<const ColumnString &>(*block.getByName("host_name").column);
|
||||
const auto & array_values = typeid_cast<const ColumnUInt64 &>(*block.getByName("value").column).getData();
|
||||
|
||||
const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds);
|
||||
const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds);
|
||||
|
||||
HostToThreadTimesMap thread_times;
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
thread_times[host_name][thread_id].user_ms = value;
|
||||
auto thread_id = array_thread_id[i];
|
||||
auto host_name = host_names.getDataAt(i).toString();
|
||||
if (thread_id != 0)
|
||||
progress_indication.addThreadIdToList(host_name, thread_id);
|
||||
auto event_name = names.getDataAt(i);
|
||||
auto value = array_values[i];
|
||||
if (event_name == user_time_name)
|
||||
{
|
||||
thread_times[host_name][thread_id].user_ms = value;
|
||||
}
|
||||
else if (event_name == system_time_name)
|
||||
{
|
||||
thread_times[host_name][thread_id].system_ms = value;
|
||||
}
|
||||
else if (event_name == MemoryTracker::USAGE_EVENT_NAME)
|
||||
{
|
||||
thread_times[host_name][thread_id].memory_usage = value;
|
||||
}
|
||||
}
|
||||
else if (event_name == system_time_name)
|
||||
progress_indication.updateThreadEventData(thread_times);
|
||||
}
|
||||
|
||||
if (profile_events.print)
|
||||
{
|
||||
if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms)
|
||||
{
|
||||
thread_times[host_name][thread_id].system_ms = value;
|
||||
initLogsOutputStream();
|
||||
progress_indication.clearProgressOutput();
|
||||
logs_out_stream->writeProfileEvents(block);
|
||||
logs_out_stream->flush();
|
||||
|
||||
profile_events.watch.restart();
|
||||
profile_events.last_block = {};
|
||||
}
|
||||
else if (event_name == MemoryTracker::USAGE_EVENT_NAME)
|
||||
else
|
||||
{
|
||||
thread_times[host_name][thread_id].memory_usage = value;
|
||||
profile_events.last_block = block;
|
||||
}
|
||||
}
|
||||
progress_indication.updateThreadEventData(thread_times);
|
||||
}
|
||||
|
||||
|
||||
@ -1023,6 +1046,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
processed_rows = 0;
|
||||
written_first_block = false;
|
||||
progress_indication.resetProgress();
|
||||
profile_events.watch.restart();
|
||||
|
||||
{
|
||||
/// Temporarily apply query settings to context.
|
||||
@ -1091,6 +1115,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
|
||||
}
|
||||
}
|
||||
|
||||
/// Always print last block (if it was not printed already)
|
||||
if (profile_events.last_block)
|
||||
{
|
||||
initLogsOutputStream();
|
||||
progress_indication.clearProgressOutput();
|
||||
logs_out_stream->writeProfileEvents(profile_events.last_block);
|
||||
logs_out_stream->flush();
|
||||
}
|
||||
|
||||
if (is_interactive)
|
||||
{
|
||||
std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
|
||||
@ -1505,6 +1538,26 @@ void ClientBase::readArguments(int argc, char ** argv, Arguments & common_argume
|
||||
}
|
||||
}
|
||||
|
||||
void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
|
||||
{
|
||||
cmd_settings.addProgramOptions(options_description.main_description.value());
|
||||
/// Parse main commandline options.
|
||||
auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered();
|
||||
po::parsed_options parsed = parser.run();
|
||||
|
||||
/// Check unrecognized options without positional options.
|
||||
auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional);
|
||||
if (!unrecognized_options.empty())
|
||||
throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]);
|
||||
|
||||
/// Check positional options (options after ' -- ', ex: clickhouse-client -- <options>).
|
||||
unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional);
|
||||
if (unrecognized_options.size() > 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional options are not supported.");
|
||||
|
||||
po::store(parsed, options);
|
||||
}
|
||||
|
||||
|
||||
void ClientBase::init(int argc, char ** argv)
|
||||
{
|
||||
@ -1561,9 +1614,12 @@ void ClientBase::init(int argc, char ** argv)
|
||||
("ignore-error", "do not stop processing in multiquery mode")
|
||||
("stacktrace", "print stack traces of exceptions")
|
||||
("hardware-utilization", "print hardware utilization information in progress bar")
|
||||
("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets")
|
||||
("profile-events-delay-ms", po::value<UInt64>()->default_value(profile_events.delay_ms), "Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet)")
|
||||
;
|
||||
|
||||
addAndCheckOptions(options_description, options, common_arguments);
|
||||
addOptions(options_description);
|
||||
parseAndCheckOptions(options_description, options, common_arguments);
|
||||
po::notify(options);
|
||||
|
||||
if (options.count("version") || options.count("V"))
|
||||
@ -1611,6 +1667,10 @@ void ClientBase::init(int argc, char ** argv)
|
||||
config().setBool("vertical", true);
|
||||
if (options.count("stacktrace"))
|
||||
config().setBool("stacktrace", true);
|
||||
if (options.count("print-profile-events"))
|
||||
config().setBool("print-profile-events", true);
|
||||
if (options.count("profile-events-delay-ms"))
|
||||
config().setInt("profile-events-delay-ms", options["profile-events-delay-ms"].as<UInt64>());
|
||||
if (options.count("progress"))
|
||||
config().setBool("progress", true);
|
||||
if (options.count("echo"))
|
||||
@ -1631,6 +1691,8 @@ void ClientBase::init(int argc, char ** argv)
|
||||
progress_indication.print_hardware_utilization = true;
|
||||
|
||||
query_processing_stage = QueryProcessingStage::fromString(options["stage"].as<std::string>());
|
||||
profile_events.print = options.count("print-profile-events");
|
||||
profile_events.delay_ms = options["profile-events-delay-ms"].as<UInt64>();
|
||||
|
||||
processOptions(options_description, options, external_tables_arguments);
|
||||
argsToConfig(common_arguments, config(), 100);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Common/ProgressIndication.h>
|
||||
#include <Common/InterruptListener.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Interpreters/Context.h>
|
||||
@ -91,7 +92,7 @@ protected:
|
||||
};
|
||||
|
||||
virtual void printHelpMessage(const OptionsDescription & options_description) = 0;
|
||||
virtual void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) = 0;
|
||||
virtual void addOptions(OptionsDescription & options_description) = 0;
|
||||
virtual void processOptions(const OptionsDescription & options_description,
|
||||
const CommandLineOptions & options,
|
||||
const std::vector<Arguments> & external_tables_arguments) = 0;
|
||||
@ -132,6 +133,7 @@ private:
|
||||
void resetOutput();
|
||||
void outputQueryInfo(bool echo_query_);
|
||||
void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> & external_tables_arguments);
|
||||
void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments);
|
||||
|
||||
protected:
|
||||
bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
|
||||
@ -217,6 +219,16 @@ protected:
|
||||
QueryFuzzer fuzzer;
|
||||
int query_fuzzer_runs = 0;
|
||||
|
||||
struct
|
||||
{
|
||||
bool print = false;
|
||||
/// UINT64_MAX -- print only last
|
||||
UInt64 delay_ms = 0;
|
||||
Stopwatch watch;
|
||||
/// For printing only last (delay_ms == 0).
|
||||
Block last_block;
|
||||
} profile_events;
|
||||
|
||||
QueryProcessingStage::Enum query_processing_stage;
|
||||
};
|
||||
|
||||
|
@ -109,29 +109,29 @@ void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors
|
||||
{TokenType::OpeningSquareBracket, Replxx::Color::BROWN},
|
||||
{TokenType::ClosingSquareBracket, Replxx::Color::BROWN},
|
||||
{TokenType::DoubleColon, Replxx::Color::BROWN},
|
||||
{TokenType::OpeningCurlyBrace, Replxx::Color::INTENSE},
|
||||
{TokenType::ClosingCurlyBrace, Replxx::Color::INTENSE},
|
||||
{TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
|
||||
{TokenType::Comma, Replxx::Color::INTENSE},
|
||||
{TokenType::Semicolon, Replxx::Color::INTENSE},
|
||||
{TokenType::Dot, Replxx::Color::INTENSE},
|
||||
{TokenType::Asterisk, Replxx::Color::INTENSE},
|
||||
{TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::HereDoc, Replxx::Color::CYAN},
|
||||
{TokenType::Plus, Replxx::Color::INTENSE},
|
||||
{TokenType::Minus, Replxx::Color::INTENSE},
|
||||
{TokenType::Slash, Replxx::Color::INTENSE},
|
||||
{TokenType::Percent, Replxx::Color::INTENSE},
|
||||
{TokenType::Arrow, Replxx::Color::INTENSE},
|
||||
{TokenType::QuestionMark, Replxx::Color::INTENSE},
|
||||
{TokenType::Colon, Replxx::Color::INTENSE},
|
||||
{TokenType::Equals, Replxx::Color::INTENSE},
|
||||
{TokenType::NotEquals, Replxx::Color::INTENSE},
|
||||
{TokenType::Less, Replxx::Color::INTENSE},
|
||||
{TokenType::Greater, Replxx::Color::INTENSE},
|
||||
{TokenType::LessOrEquals, Replxx::Color::INTENSE},
|
||||
{TokenType::GreaterOrEquals, Replxx::Color::INTENSE},
|
||||
{TokenType::Concatenation, Replxx::Color::INTENSE},
|
||||
{TokenType::At, Replxx::Color::INTENSE},
|
||||
{TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)},
|
||||
{TokenType::DoubleAt, Replxx::Color::MAGENTA},
|
||||
|
||||
{TokenType::EndOfStream, Replxx::Color::DEFAULT},
|
||||
@ -142,7 +142,7 @@ void highlight(const String & query, std::vector<replxx::Replxx::Color> & colors
|
||||
{TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED},
|
||||
{TokenType::ErrorSinglePipeMark, Replxx::Color::RED},
|
||||
{TokenType::ErrorWrongNumber, Replxx::Color::RED},
|
||||
{ TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED }};
|
||||
{TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}};
|
||||
|
||||
const Replxx::Color unknown_token_color = Replxx::Color::RED;
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Client/InternalTextLogs.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Interpreters/InternalTextLogsQueue.h>
|
||||
#include <Interpreters/ProfileEventsExt.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
@ -13,7 +14,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void InternalTextLogs::write(const Block & block)
|
||||
void InternalTextLogs::writeLogs(const Block & block)
|
||||
{
|
||||
const auto & array_event_time = typeid_cast<const ColumnUInt32 &>(*block.getByName("event_time").column).getData();
|
||||
const auto & array_microseconds = typeid_cast<const ColumnUInt32 &>(*block.getByName("event_time_microseconds").column).getData();
|
||||
@ -97,4 +98,69 @@ void InternalTextLogs::write(const Block & block)
|
||||
}
|
||||
}
|
||||
|
||||
void InternalTextLogs::writeProfileEvents(const Block & block)
|
||||
{
|
||||
const auto & column_host_name = typeid_cast<const ColumnString &>(*block.getByName("host_name").column);
|
||||
const auto & array_current_time = typeid_cast<const ColumnUInt32 &>(*block.getByName("current_time").column).getData();
|
||||
const auto & array_thread_id = typeid_cast<const ColumnUInt64 &>(*block.getByName("thread_id").column).getData();
|
||||
const auto & array_type = typeid_cast<const ColumnInt8 &>(*block.getByName("type").column).getData();
|
||||
const auto & column_name = typeid_cast<const ColumnString &>(*block.getByName("name").column);
|
||||
const auto & array_value = typeid_cast<const ColumnUInt64 &>(*block.getByName("value").column).getData();
|
||||
|
||||
for (size_t row_num = 0; row_num < block.rows(); ++row_num)
|
||||
{
|
||||
/// host_name
|
||||
auto host_name = column_host_name.getDataAt(row_num);
|
||||
if (host_name.size)
|
||||
{
|
||||
writeCString("[", wb);
|
||||
if (color)
|
||||
writeString(setColor(StringRefHash()(host_name)), wb);
|
||||
writeString(host_name, wb);
|
||||
if (color)
|
||||
writeCString(resetColor(), wb);
|
||||
writeCString("] ", wb);
|
||||
}
|
||||
|
||||
/// current_time
|
||||
auto current_time = array_current_time[row_num];
|
||||
writeDateTimeText<'.', ':'>(current_time, wb);
|
||||
|
||||
/// thread_id
|
||||
UInt64 thread_id = array_thread_id[row_num];
|
||||
writeCString(" [ ", wb);
|
||||
if (color)
|
||||
writeString(setColor(intHash64(thread_id)), wb);
|
||||
writeIntText(thread_id, wb);
|
||||
if (color)
|
||||
writeCString(resetColor(), wb);
|
||||
writeCString(" ] ", wb);
|
||||
|
||||
/// name
|
||||
auto name = column_name.getDataAt(row_num);
|
||||
if (color)
|
||||
writeString(setColor(StringRefHash()(name)), wb);
|
||||
DB::writeString(name, wb);
|
||||
if (color)
|
||||
writeCString(resetColor(), wb);
|
||||
writeCString(": ", wb);
|
||||
|
||||
/// value
|
||||
UInt64 value = array_value[row_num];
|
||||
writeIntText(value, wb);
|
||||
|
||||
//// type
|
||||
Int8 type = array_type[row_num];
|
||||
writeCString(" (", wb);
|
||||
if (color)
|
||||
writeString(setColor(intHash64(type)), wb);
|
||||
writeString(toString(ProfileEvents::TypeEnum->castToName(type)), wb);
|
||||
if (color)
|
||||
writeCString(resetColor(), wb);
|
||||
writeCString(")", wb);
|
||||
|
||||
writeChar('\n', wb);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,16 +6,37 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Prints internal server logs
|
||||
/// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock()
|
||||
/// Prints internal server logs or profile events with colored output (if requested).
|
||||
/// NOTE: IRowOutputFormat does not suite well for this case
|
||||
class InternalTextLogs
|
||||
{
|
||||
public:
|
||||
InternalTextLogs(WriteBuffer & buf_out, bool color_) : wb(buf_out), color(color_) {}
|
||||
|
||||
|
||||
void write(const Block & block);
|
||||
/// Print internal server logs
|
||||
///
|
||||
/// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock():
|
||||
/// - event_time
|
||||
/// - event_time_microseconds
|
||||
/// - host_name
|
||||
/// - query_id
|
||||
/// - thread_id
|
||||
/// - priority
|
||||
/// - source
|
||||
/// - text
|
||||
void writeLogs(const Block & block);
|
||||
/// Print profile events.
|
||||
///
|
||||
/// Block:
|
||||
/// - host_name
|
||||
/// - current_time
|
||||
/// - thread_id
|
||||
/// - type
|
||||
/// - name
|
||||
/// - value
|
||||
///
|
||||
/// See also TCPHandler::sendProfileEvents() for block columns.
|
||||
void writeProfileEvents(const Block & block);
|
||||
|
||||
void flush()
|
||||
{
|
||||
|
@ -267,19 +267,19 @@ bool LocalConnection::poll(size_t)
|
||||
}
|
||||
}
|
||||
|
||||
if (state->is_finished && send_progress && !state->sent_progress)
|
||||
{
|
||||
state->sent_progress = true;
|
||||
next_packet_type = Protocol::Server::Progress;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (state->is_finished)
|
||||
{
|
||||
finishQuery();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (send_progress && !state->sent_progress)
|
||||
{
|
||||
state->sent_progress = true;
|
||||
next_packet_type = Protocol::Server::Progress;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (state->block && state->block.value())
|
||||
{
|
||||
next_packet_type = Protocol::Server::Data;
|
||||
@ -293,7 +293,8 @@ bool LocalConnection::pollImpl()
|
||||
{
|
||||
Block block;
|
||||
auto next_read = pullBlock(block);
|
||||
if (block)
|
||||
|
||||
if (block && !state->io.null_format)
|
||||
{
|
||||
state->block.emplace(block);
|
||||
}
|
||||
|
@ -589,6 +589,8 @@
|
||||
M(619, POSTGRESQL_REPLICATION_INTERNAL_ERROR) \
|
||||
M(620, QUERY_NOT_ALLOWED) \
|
||||
M(621, CANNOT_NORMALIZE_STRING) \
|
||||
M(622, CANNOT_PARSE_CAPN_PROTO_SCHEMA) \
|
||||
M(623, CAPN_PROTO_BAD_CAST) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -118,7 +118,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p
|
||||
return absolute_path.starts_with(absolute_prefix_path);
|
||||
}
|
||||
|
||||
bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path)
|
||||
bool fileOrSymlinkPathStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path)
|
||||
{
|
||||
/// Differs from pathStartsWith in how `path` is normalized before comparison.
|
||||
/// Make `path` absolute if it was relative and put it into normalized form: remove
|
||||
@ -140,13 +140,14 @@ bool pathStartsWith(const String & path, const String & prefix_path)
|
||||
return pathStartsWith(filesystem_path, filesystem_prefix_path);
|
||||
}
|
||||
|
||||
bool symlinkStartsWith(const String & path, const String & prefix_path)
|
||||
bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path)
|
||||
{
|
||||
auto filesystem_path = std::filesystem::path(path);
|
||||
auto filesystem_prefix_path = std::filesystem::path(prefix_path);
|
||||
|
||||
return symlinkStartsWith(filesystem_path, filesystem_prefix_path);
|
||||
return fileOrSymlinkPathStartsWith(filesystem_path, filesystem_prefix_path);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -35,8 +35,9 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p
|
||||
/// Returns true if path starts with prefix path
|
||||
bool pathStartsWith(const String & path, const String & prefix_path);
|
||||
|
||||
/// Returns true if symlink starts with prefix path
|
||||
bool symlinkStartsWith(const String & path, const String & prefix_path);
|
||||
/// Same as pathStartsWith, but without canonization, i.e. allowed to check symlinks.
|
||||
/// (Path is made absolute and normalized.)
|
||||
bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path);
|
||||
|
||||
}
|
||||
|
||||
|
@ -625,7 +625,8 @@ class IColumn;
|
||||
M(Bool, cross_to_inner_join_rewrite, true, "Use inner join instead of comma/cross join if possible", 0) \
|
||||
\
|
||||
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \
|
||||
|
||||
\
|
||||
M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0)\
|
||||
// End of FORMAT_FACTORY_SETTINGS
|
||||
// Please add settings non-related to formats into the COMMON_SETTINGS above.
|
||||
|
||||
|
@ -116,4 +116,9 @@ IMPLEMENT_SETTING_ENUM(ShortCircuitFunctionEvaluation, ErrorCodes::BAD_ARGUMENTS
|
||||
{{"enable", ShortCircuitFunctionEvaluation::ENABLE},
|
||||
{"force_enable", ShortCircuitFunctionEvaluation::FORCE_ENABLE},
|
||||
{"disable", ShortCircuitFunctionEvaluation::DISABLE}})
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(EnumComparingMode, ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"by_names", FormatSettings::EnumComparingMode::BY_NAMES},
|
||||
{"by_values", FormatSettings::EnumComparingMode::BY_VALUES},
|
||||
{"by_names_case_insensitive", FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE}})
|
||||
}
|
||||
|
@ -168,4 +168,6 @@ enum class ShortCircuitFunctionEvaluation
|
||||
|
||||
DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation)
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparingMode)
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <DataTypes/EnumValues.h>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -82,6 +83,24 @@ Names EnumValues<T>::getAllRegisteredNames() const
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::unordered_set<String> EnumValues<T>::getSetOfAllNames(bool to_lower) const
|
||||
{
|
||||
std::unordered_set<String> result;
|
||||
for (const auto & value : values)
|
||||
result.insert(to_lower ? boost::algorithm::to_lower_copy(value.first) : value.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::unordered_set<T> EnumValues<T>::getSetOfAllValues() const
|
||||
{
|
||||
std::unordered_set<T> result;
|
||||
for (const auto & value : values)
|
||||
result.insert(value.second);
|
||||
return result;
|
||||
}
|
||||
|
||||
template class EnumValues<Int8>;
|
||||
template class EnumValues<Int16>;
|
||||
|
||||
|
@ -80,6 +80,10 @@ public:
|
||||
}
|
||||
|
||||
Names getAllRegisteredNames() const override;
|
||||
|
||||
std::unordered_set<String> getSetOfAllNames(bool to_lower) const;
|
||||
|
||||
std::unordered_set<T> getSetOfAllValues() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -100,7 +100,7 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block)
|
||||
config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, configuration.command_termination_timeout };
|
||||
auto shell_command = ShellCommand::execute(config);
|
||||
return shell_command;
|
||||
}, configuration.max_command_execution_time * 1000);
|
||||
}, configuration.max_command_execution_time * 10000);
|
||||
|
||||
if (!result)
|
||||
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
|
||||
|
@ -31,7 +31,7 @@ FileDictionarySource::FileDictionarySource(
|
||||
, context(context_)
|
||||
{
|
||||
auto user_files_path = context->getUserFilesPath();
|
||||
if (created_from_ddl && !pathStartsWith(filepath, user_files_path))
|
||||
if (created_from_ddl && !fileOrSymlinkPathStartsWith(filepath, user_files_path))
|
||||
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path);
|
||||
}
|
||||
|
||||
|
@ -41,13 +41,7 @@ LibraryDictionarySource::LibraryDictionarySource(
|
||||
, context(Context::createCopy(context_))
|
||||
{
|
||||
auto dictionaries_lib_path = context->getDictionariesLibPath();
|
||||
bool path_checked = false;
|
||||
if (fs::is_symlink(path))
|
||||
path_checked = symlinkStartsWith(path, dictionaries_lib_path);
|
||||
else
|
||||
path_checked = pathStartsWith(path, dictionaries_lib_path);
|
||||
|
||||
if (created_from_ddl && !path_checked)
|
||||
if (created_from_ddl && !fileOrSymlinkPathStartsWith(path, dictionaries_lib_path))
|
||||
throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, dictionaries_lib_path);
|
||||
|
||||
if (!fs::exists(path))
|
||||
|
432
src/Formats/CapnProtoUtils.cpp
Normal file
432
src/Formats/CapnProtoUtils.cpp
Normal file
@ -0,0 +1,432 @@
|
||||
#include <Formats/CapnProtoUtils.h>
|
||||
|
||||
#if USE_CAPNP
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <capnp/schema.h>
|
||||
#include <capnp/schema-parser.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_PARSE_CAPN_PROTO_SCHEMA;
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
extern const int BAD_TYPE_OF_FIELD;
|
||||
extern const int CAPN_PROTO_BAD_CAST;
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
extern const int UNKNOWN_EXCEPTION;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info)
|
||||
{
|
||||
capnp::ParsedSchema schema;
|
||||
try
|
||||
{
|
||||
int fd;
|
||||
KJ_SYSCALL(fd = open(schema_info.schemaDirectory().data(), O_RDONLY));
|
||||
auto schema_dir = kj::newDiskDirectory(kj::OsFileHandle(fd));
|
||||
schema = impl.parseFromDirectory(*schema_dir, kj::Path::parse(schema_info.schemaPath()), {});
|
||||
}
|
||||
catch (const kj::Exception & e)
|
||||
{
|
||||
/// That's not good to determine the type of error by its description, but
|
||||
/// this is the only way to do it here, because kj doesn't specify the type of error.
|
||||
auto description = std::string_view(e.getDescription().cStr());
|
||||
if (description.find("No such file or directory") != String::npos || description.find("no such directory") != String::npos)
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath());
|
||||
|
||||
if (description.find("Parse error") != String::npos)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "Cannot parse CapnProto schema {}:{}", schema_info.schemaPath(), e.getLine());
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception while parsing CapnProro schema: {}, schema dir and file: {}, {}", description, schema_info.schemaDirectory(), schema_info.schemaPath());
|
||||
}
|
||||
|
||||
auto message_maybe = schema.findNested(schema_info.messageName());
|
||||
auto * message_schema = kj::_::readMaybe(message_maybe);
|
||||
if (!message_schema)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "CapnProto schema doesn't contain message with name {}", schema_info.messageName());
|
||||
return message_schema->asStruct();
|
||||
}
|
||||
|
||||
bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode)
|
||||
{
|
||||
if (mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE)
|
||||
return boost::algorithm::to_lower_copy(first) == boost::algorithm::to_lower_copy(second);
|
||||
return first == second;
|
||||
}
|
||||
|
||||
static const std::map<capnp::schema::Type::Which, String> capnp_simple_type_names =
|
||||
{
|
||||
{capnp::schema::Type::Which::BOOL, "Bool"},
|
||||
{capnp::schema::Type::Which::VOID, "Void"},
|
||||
{capnp::schema::Type::Which::INT8, "Int8"},
|
||||
{capnp::schema::Type::Which::INT16, "Int16"},
|
||||
{capnp::schema::Type::Which::INT32, "Int32"},
|
||||
{capnp::schema::Type::Which::INT64, "Int64"},
|
||||
{capnp::schema::Type::Which::UINT8, "UInt8"},
|
||||
{capnp::schema::Type::Which::UINT16, "UInt16"},
|
||||
{capnp::schema::Type::Which::UINT32, "UInt32"},
|
||||
{capnp::schema::Type::Which::UINT64, "UInt64"},
|
||||
{capnp::schema::Type::Which::FLOAT32, "Float32"},
|
||||
{capnp::schema::Type::Which::FLOAT64, "Float64"},
|
||||
{capnp::schema::Type::Which::TEXT, "Text"},
|
||||
{capnp::schema::Type::Which::DATA, "Data"},
|
||||
{capnp::schema::Type::Which::INTERFACE, "Interface"},
|
||||
{capnp::schema::Type::Which::ANY_POINTER, "AnyPointer"},
|
||||
};
|
||||
|
||||
static bool checkIfStructContainsUnnamedUnion(const capnp::StructSchema & struct_schema)
|
||||
{
|
||||
return struct_schema.getFields().size() != struct_schema.getNonUnionFields().size();
|
||||
}
|
||||
|
||||
static bool checkIfStructIsNamedUnion(const capnp::StructSchema & struct_schema)
|
||||
{
|
||||
return struct_schema.getFields().size() == struct_schema.getUnionFields().size();
|
||||
}
|
||||
|
||||
/// Get full name of type for better exception messages.
|
||||
static String getCapnProtoFullTypeName(const capnp::Type & type)
|
||||
{
|
||||
switch (type.which())
|
||||
{
|
||||
case capnp::schema::Type::Which::STRUCT:
|
||||
{
|
||||
auto struct_schema = type.asStruct();
|
||||
|
||||
auto non_union_fields = struct_schema.getNonUnionFields();
|
||||
std::vector<String> non_union_field_names;
|
||||
for (auto nested_field : non_union_fields)
|
||||
non_union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType()));
|
||||
|
||||
auto union_fields = struct_schema.getUnionFields();
|
||||
std::vector<String> union_field_names;
|
||||
for (auto nested_field : union_fields)
|
||||
union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType()));
|
||||
|
||||
String union_name = "Union(" + boost::algorithm::join(union_field_names, ", ") + ")";
|
||||
/// Check if the struct is a named union.
|
||||
if (non_union_field_names.empty())
|
||||
return union_name;
|
||||
|
||||
String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", ");
|
||||
/// Check if the struct contains unnamed union.
|
||||
if (!union_field_names.empty())
|
||||
type_name += ", " + union_name;
|
||||
type_name += ")";
|
||||
return type_name;
|
||||
}
|
||||
case capnp::schema::Type::Which::LIST:
|
||||
return "List(" + getCapnProtoFullTypeName(type.asList().getElementType()) + ")";
|
||||
case capnp::schema::Type::Which::ENUM:
|
||||
{
|
||||
auto enum_schema = type.asEnum();
|
||||
String enum_name = "Enum(";
|
||||
auto enumerants = enum_schema.getEnumerants();
|
||||
for (size_t i = 0; i != enumerants.size(); ++i)
|
||||
{
|
||||
enum_name += String(enumerants[i].getProto().getName()) + " = " + std::to_string(enumerants[i].getOrdinal());
|
||||
if (i + 1 != enumerants.size())
|
||||
enum_name += ", ";
|
||||
}
|
||||
enum_name += ")";
|
||||
return enum_name;
|
||||
}
|
||||
default:
|
||||
auto it = capnp_simple_type_names.find(type.which());
|
||||
if (it == capnp_simple_type_names.end())
|
||||
throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unknown CapnProto type");
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_type, FormatSettings::EnumComparingMode mode, UInt64 max_value, String & error_message)
|
||||
{
|
||||
if (!capnp_type.isEnum())
|
||||
return false;
|
||||
|
||||
auto enum_schema = capnp_type.asEnum();
|
||||
bool to_lower = mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE;
|
||||
const auto * enum_type = assert_cast<const DataTypeEnum<Type> *>(column_type.get());
|
||||
const auto & enum_values = dynamic_cast<const EnumValues<Type> &>(*enum_type);
|
||||
|
||||
auto enumerants = enum_schema.getEnumerants();
|
||||
if (mode == FormatSettings::EnumComparingMode::BY_VALUES)
|
||||
{
|
||||
/// In CapnProto Enum fields are numbered sequentially starting from zero.
|
||||
if (enumerants.size() > max_value)
|
||||
{
|
||||
error_message += "Enum from CapnProto schema contains values that is out of range for Clickhouse Enum";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto values = enum_values.getSetOfAllValues();
|
||||
std::unordered_set<Type> capn_enum_values;
|
||||
for (auto enumerant : enumerants)
|
||||
capn_enum_values.insert(Type(enumerant.getOrdinal()));
|
||||
auto result = values == capn_enum_values;
|
||||
if (!result)
|
||||
error_message += "The set of values in Enum from CapnProto schema is different from the set of values in ClickHouse Enum";
|
||||
return result;
|
||||
}
|
||||
|
||||
auto names = enum_values.getSetOfAllNames(to_lower);
|
||||
std::unordered_set<String> capn_enum_names;
|
||||
|
||||
for (auto enumerant : enumerants)
|
||||
{
|
||||
String name = enumerant.getProto().getName();
|
||||
capn_enum_names.insert(to_lower ? boost::algorithm::to_lower_copy(name) : name);
|
||||
}
|
||||
|
||||
auto result = names == capn_enum_names;
|
||||
if (!result)
|
||||
error_message += "The set of names in Enum from CapnProto schema is different from the set of names in ClickHouse Enum";
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message);
|
||||
|
||||
static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message)
|
||||
{
|
||||
if (!capnp_type.isStruct())
|
||||
return false;
|
||||
|
||||
/// Check that struct is a named union of type VOID and one arbitrary type.
|
||||
auto struct_schema = capnp_type.asStruct();
|
||||
if (!checkIfStructIsNamedUnion(struct_schema))
|
||||
return false;
|
||||
|
||||
auto union_fields = struct_schema.getUnionFields();
|
||||
if (union_fields.size() != 2)
|
||||
return false;
|
||||
|
||||
auto first = union_fields[0];
|
||||
auto second = union_fields[1];
|
||||
|
||||
auto nested_type = assert_cast<const DataTypeNullable *>(data_type.get())->getNestedType();
|
||||
if (first.getType().isVoid())
|
||||
return checkCapnProtoType(second.getType(), nested_type, mode, error_message);
|
||||
if (second.getType().isVoid())
|
||||
return checkCapnProtoType(first.getType(), nested_type, mode, error_message);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message)
|
||||
{
|
||||
if (!capnp_type.isStruct())
|
||||
return false;
|
||||
auto struct_schema = capnp_type.asStruct();
|
||||
|
||||
if (checkIfStructIsNamedUnion(struct_schema))
|
||||
return false;
|
||||
|
||||
if (checkIfStructContainsUnnamedUnion(struct_schema))
|
||||
{
|
||||
error_message += "CapnProto struct contains unnamed union";
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto * tuple_data_type = assert_cast<const DataTypeTuple *>(data_type.get());
|
||||
auto nested_types = tuple_data_type->getElements();
|
||||
if (nested_types.size() != struct_schema.getFields().size())
|
||||
{
|
||||
error_message += "Tuple and Struct types have different sizes";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!tuple_data_type->haveExplicitNames())
|
||||
{
|
||||
error_message += "Only named Tuple can be converted to CapnProto Struct";
|
||||
return false;
|
||||
}
|
||||
for (const auto & name : tuple_data_type->getElementNames())
|
||||
{
|
||||
KJ_IF_MAYBE(field, struct_schema.findFieldByName(name))
|
||||
{
|
||||
if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
error_message += "CapnProto struct doesn't contain a field with name " + name;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message)
|
||||
{
|
||||
if (!capnp_type.isList())
|
||||
return false;
|
||||
auto list_schema = capnp_type.asList();
|
||||
auto nested_type = assert_cast<const DataTypeArray *>(data_type.get())->getNestedType();
|
||||
return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message);
|
||||
}
|
||||
|
||||
static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message)
|
||||
{
|
||||
switch (data_type->getTypeId())
|
||||
{
|
||||
case TypeIndex::UInt8:
|
||||
return capnp_type.isBool() || capnp_type.isUInt8();
|
||||
case TypeIndex::Date: [[fallthrough]];
|
||||
case TypeIndex::UInt16:
|
||||
return capnp_type.isUInt16();
|
||||
case TypeIndex::DateTime: [[fallthrough]];
|
||||
case TypeIndex::UInt32:
|
||||
return capnp_type.isUInt32();
|
||||
case TypeIndex::UInt64:
|
||||
return capnp_type.isUInt64();
|
||||
case TypeIndex::Int8:
|
||||
return capnp_type.isInt8();
|
||||
case TypeIndex::Int16:
|
||||
return capnp_type.isInt16();
|
||||
case TypeIndex::Date32: [[fallthrough]];
|
||||
case TypeIndex::Int32:
|
||||
return capnp_type.isInt32();
|
||||
case TypeIndex::DateTime64: [[fallthrough]];
|
||||
case TypeIndex::Int64:
|
||||
return capnp_type.isInt64();
|
||||
case TypeIndex::Float32:
|
||||
return capnp_type.isFloat32();
|
||||
case TypeIndex::Float64:
|
||||
return capnp_type.isFloat64();
|
||||
case TypeIndex::Enum8:
|
||||
return checkEnums<Int8>(capnp_type, data_type, mode, INT8_MAX, error_message);
|
||||
case TypeIndex::Enum16:
|
||||
return checkEnums<Int16>(capnp_type, data_type, mode, INT16_MAX, error_message);
|
||||
case TypeIndex::Tuple:
|
||||
return checkTupleType(capnp_type, data_type, mode, error_message);
|
||||
case TypeIndex::Nullable:
|
||||
{
|
||||
auto result = checkNullableType(capnp_type, data_type, mode, error_message);
|
||||
if (!result)
|
||||
error_message += "Nullable can be represented only as a named union of type Void and nested type";
|
||||
return result;
|
||||
}
|
||||
case TypeIndex::Array:
|
||||
return checkArrayType(capnp_type, data_type, mode, error_message);
|
||||
case TypeIndex::LowCardinality:
|
||||
return checkCapnProtoType(capnp_type, assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType(), mode, error_message);
|
||||
case TypeIndex::FixedString: [[fallthrough]];
|
||||
case TypeIndex::String:
|
||||
return capnp_type.isText() || capnp_type.isData();
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static std::pair<String, String> splitFieldName(const String & name)
|
||||
{
|
||||
const auto * begin = name.data();
|
||||
const auto * end = name.data() + name.size();
|
||||
const auto * it = find_first_symbols<'_', '.'>(begin, end);
|
||||
String first = String(begin, it);
|
||||
String second = it == end ? "" : String(it + 1, end);
|
||||
return {first, second};
|
||||
}
|
||||
|
||||
capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name)
|
||||
{
|
||||
auto [field_name, nested_name] = splitFieldName(name);
|
||||
KJ_IF_MAYBE(field, struct_reader.getSchema().findFieldByName(field_name))
|
||||
{
|
||||
capnp::DynamicValue::Reader field_reader;
|
||||
try
|
||||
{
|
||||
field_reader = struct_reader.get(*field);
|
||||
}
|
||||
catch (const kj::Exception & e)
|
||||
{
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot extract field value from struct by provided schema, error: {} Perhaps the data was generated by another schema", String(e.getDescription().cStr()));
|
||||
}
|
||||
|
||||
if (nested_name.empty())
|
||||
return field_reader;
|
||||
|
||||
if (field_reader.getType() != capnp::DynamicValue::STRUCT)
|
||||
throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name);
|
||||
|
||||
return getReaderByColumnName(field_reader.as<capnp::DynamicStruct>(), nested_name);
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name);
|
||||
}
|
||||
|
||||
std::pair<capnp::DynamicStruct::Builder, capnp::StructSchema::Field> getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name)
|
||||
{
|
||||
auto [field_name, nested_name] = splitFieldName(name);
|
||||
KJ_IF_MAYBE(field, struct_builder.getSchema().findFieldByName(field_name))
|
||||
{
|
||||
if (nested_name.empty())
|
||||
return {struct_builder, *field};
|
||||
|
||||
auto field_builder = struct_builder.get(*field);
|
||||
if (field_builder.getType() != capnp::DynamicValue::STRUCT)
|
||||
throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name);
|
||||
|
||||
return getStructBuilderAndFieldByColumnName(field_builder.as<capnp::DynamicStruct>(), nested_name);
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name);
|
||||
}
|
||||
|
||||
static capnp::StructSchema::Field getFieldByName(const capnp::StructSchema & schema, const String & name)
|
||||
{
|
||||
auto [field_name, nested_name] = splitFieldName(name);
|
||||
KJ_IF_MAYBE(field, schema.findFieldByName(field_name))
|
||||
{
|
||||
if (nested_name.empty())
|
||||
return *field;
|
||||
|
||||
if (!field->getType().isStruct())
|
||||
throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name);
|
||||
|
||||
return getFieldByName(field->getType().asStruct(), nested_name);
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto schema doesn't contain field with name {}", field_name);
|
||||
}
|
||||
|
||||
void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode)
|
||||
{
|
||||
/// Firstly check that struct doesn't contain unnamed union, because we don't support it.
|
||||
if (checkIfStructContainsUnnamedUnion(schema))
|
||||
throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Schema contains unnamed union that is not supported");
|
||||
auto names_and_types = header.getNamesAndTypesList();
|
||||
String additional_error_message;
|
||||
for (auto & [name, type] : names_and_types)
|
||||
{
|
||||
auto field = getFieldByName(schema, name);
|
||||
if (!checkCapnProtoType(field.getType(), type, mode, additional_error_message))
|
||||
{
|
||||
auto e = Exception(
|
||||
ErrorCodes::CAPN_PROTO_BAD_CAST,
|
||||
"Cannot convert ClickHouse type {} to CapnProto type {}",
|
||||
type->getName(),
|
||||
getCapnProtoFullTypeName(field.getType()));
|
||||
if (!additional_error_message.empty())
|
||||
e.addMessage(additional_error_message);
|
||||
throw std::move(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
43
src/Formats/CapnProtoUtils.h
Normal file
43
src/Formats/CapnProtoUtils.h
Normal file
@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
|
||||
#include "config_formats.h"
|
||||
#if USE_CAPNP
|
||||
|
||||
#include <Formats/FormatSchemaInfo.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Core/Block.h>
|
||||
#include <capnp/schema-parser.h>
|
||||
#include <capnp/dynamic.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
// Wrapper for classes that could throw in destructor
|
||||
// https://github.com/capnproto/capnproto/issues/553
|
||||
template <typename T>
|
||||
struct DestructorCatcher
|
||||
{
|
||||
T impl;
|
||||
template <typename ... Arg>
|
||||
DestructorCatcher(Arg && ... args) : impl(kj::fwd<Arg>(args)...) {}
|
||||
~DestructorCatcher() noexcept try { } catch (...) { return; }
|
||||
};
|
||||
|
||||
class CapnProtoSchemaParser : public DestructorCatcher<capnp::SchemaParser>
|
||||
{
|
||||
public:
|
||||
CapnProtoSchemaParser() {}
|
||||
|
||||
capnp::StructSchema getMessageSchema(const FormatSchemaInfo & schema_info);
|
||||
};
|
||||
|
||||
bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode);
|
||||
|
||||
std::pair<capnp::DynamicStruct::Builder, capnp::StructSchema::Field> getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name);
|
||||
|
||||
capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name);
|
||||
|
||||
void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -111,6 +111,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
|
||||
format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
|
||||
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
|
||||
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
|
||||
|
||||
/// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
|
||||
if (format_settings.schema.is_server)
|
||||
|
@ -42,7 +42,7 @@ FormatSettings getFormatSettings(ContextPtr context);
|
||||
template <typename T>
|
||||
FormatSettings getFormatSettings(ContextPtr context, const T & settings);
|
||||
|
||||
/** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format.
|
||||
/** Allows to create an IInputFormat or IOutputFormat by the name of the format.
|
||||
* Note: format and compression are independent things.
|
||||
*/
|
||||
class FormatFactory final : private boost::noncopyable
|
||||
|
@ -99,4 +99,10 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String &
|
||||
}
|
||||
}
|
||||
|
||||
FormatSchemaInfo::FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message)
|
||||
: FormatSchemaInfo(
|
||||
settings.schema.format_schema, format, require_message, settings.schema.is_server, settings.schema.format_schema_path)
|
||||
{
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -11,6 +12,7 @@ class FormatSchemaInfo
|
||||
{
|
||||
public:
|
||||
FormatSchemaInfo(const String & format_schema, const String & format, bool require_message, bool is_server, const std::string & format_schema_path);
|
||||
FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message);
|
||||
|
||||
/// Returns path to the schema file.
|
||||
const String & schemaPath() const { return schema_path; }
|
||||
|
@ -183,6 +183,20 @@ struct FormatSettings
|
||||
{
|
||||
bool import_nested = false;
|
||||
} orc;
|
||||
|
||||
/// For capnProto format we should determine how to
|
||||
/// compare ClickHouse Enum and Enum from schema.
|
||||
enum class EnumComparingMode
|
||||
{
|
||||
BY_NAMES, // Names in enums should be the same, values can be different.
|
||||
BY_NAMES_CASE_INSENSITIVE, // Case-insensitive name comparison.
|
||||
BY_VALUES, // Values should be the same, names can be different.
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES;
|
||||
} capn_proto;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -56,7 +56,6 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_,
|
||||
}
|
||||
}
|
||||
|
||||
// also resets few vars from IBlockInputStream (I didn't want to propagate resetParser upthere)
|
||||
void NativeReader::resetParser()
|
||||
{
|
||||
istr_concrete = nullptr;
|
||||
|
@ -67,6 +67,7 @@ void registerOutputFormatNull(FormatFactory & factory);
|
||||
void registerOutputFormatMySQLWire(FormatFactory & factory);
|
||||
void registerOutputFormatMarkdown(FormatFactory & factory);
|
||||
void registerOutputFormatPostgreSQLWire(FormatFactory & factory);
|
||||
void registerOutputFormatCapnProto(FormatFactory & factory);
|
||||
|
||||
/// Input only formats.
|
||||
|
||||
@ -139,6 +140,7 @@ void registerFormats()
|
||||
registerOutputFormatMySQLWire(factory);
|
||||
registerOutputFormatMarkdown(factory);
|
||||
registerOutputFormatPostgreSQLWire(factory);
|
||||
registerOutputFormatCapnProto(factory);
|
||||
|
||||
registerInputFormatRegexp(factory);
|
||||
registerInputFormatJSONAsString(factory);
|
||||
|
@ -96,6 +96,9 @@ struct ReplaceRegexpImpl
|
||||
re2_st::StringPiece matches[max_captures];
|
||||
|
||||
size_t start_pos = 0;
|
||||
bool is_first_match = true;
|
||||
bool is_start_pos_added_one = false;
|
||||
|
||||
while (start_pos < static_cast<size_t>(input.length()))
|
||||
{
|
||||
/// If no more replacements possible for current string
|
||||
@ -103,6 +106,9 @@ struct ReplaceRegexpImpl
|
||||
|
||||
if (searcher.Match(input, start_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures))
|
||||
{
|
||||
if (is_start_pos_added_one)
|
||||
start_pos -= 1;
|
||||
|
||||
const auto & match = matches[0];
|
||||
size_t bytes_to_copy = (match.data() - input.data()) - start_pos;
|
||||
|
||||
@ -112,6 +118,13 @@ struct ReplaceRegexpImpl
|
||||
res_offset += bytes_to_copy;
|
||||
start_pos += bytes_to_copy + match.length();
|
||||
|
||||
/// To avoid infinite loop.
|
||||
if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1)
|
||||
{
|
||||
start_pos += 1;
|
||||
is_start_pos_added_one = true;
|
||||
}
|
||||
|
||||
/// Do substitution instructions
|
||||
for (const auto & it : instructions)
|
||||
{
|
||||
@ -129,8 +142,9 @@ struct ReplaceRegexpImpl
|
||||
}
|
||||
}
|
||||
|
||||
if (replace_one || match.length() == 0) /// Stop after match of zero length, to avoid infinite loop.
|
||||
if (replace_one || (!is_first_match && match.length() == 0))
|
||||
can_finish_current_string = true;
|
||||
is_first_match = false;
|
||||
}
|
||||
else
|
||||
can_finish_current_string = true;
|
||||
|
@ -18,10 +18,10 @@ namespace ErrorCodes
|
||||
|
||||
|
||||
template <class DataTypeName, class Geometry, class Serializer, class NameHolder>
|
||||
class FunctionReadWkt : public IFunction
|
||||
class FunctionReadWKT : public IFunction
|
||||
{
|
||||
public:
|
||||
explicit FunctionReadWkt() = default;
|
||||
explicit FunctionReadWKT() = default;
|
||||
|
||||
static constexpr const char * name = NameHolder::name;
|
||||
|
||||
@ -72,36 +72,36 @@ public:
|
||||
|
||||
static FunctionPtr create(ContextPtr)
|
||||
{
|
||||
return std::make_shared<FunctionReadWkt<DataTypeName, Geometry, Serializer, NameHolder>>();
|
||||
return std::make_shared<FunctionReadWKT<DataTypeName, Geometry, Serializer, NameHolder>>();
|
||||
}
|
||||
};
|
||||
|
||||
struct ReadWktPointNameHolder
|
||||
struct ReadWKTPointNameHolder
|
||||
{
|
||||
static constexpr const char * name = "readWktPoint";
|
||||
static constexpr const char * name = "readWKTPoint";
|
||||
};
|
||||
|
||||
struct ReadWktRingNameHolder
|
||||
struct ReadWKTRingNameHolder
|
||||
{
|
||||
static constexpr const char * name = "readWktRing";
|
||||
static constexpr const char * name = "readWKTRing";
|
||||
};
|
||||
|
||||
struct ReadWktPolygonNameHolder
|
||||
struct ReadWKTPolygonNameHolder
|
||||
{
|
||||
static constexpr const char * name = "readWktPolygon";
|
||||
static constexpr const char * name = "readWKTPolygon";
|
||||
};
|
||||
|
||||
struct ReadWktMultiPolygonNameHolder
|
||||
struct ReadWKTMultiPolygonNameHolder
|
||||
{
|
||||
static constexpr const char * name = "readWktMultiPolygon";
|
||||
static constexpr const char * name = "readWKTMultiPolygon";
|
||||
};
|
||||
|
||||
void registerFunctionReadWkt(FunctionFactory & factory)
|
||||
void registerFunctionReadWKT(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionReadWkt<DataTypePointName, CartesianPoint, PointSerializer<CartesianPoint>, ReadWktPointNameHolder>>();
|
||||
factory.registerFunction<FunctionReadWkt<DataTypeRingName, CartesianRing, RingSerializer<CartesianPoint>, ReadWktRingNameHolder>>();
|
||||
factory.registerFunction<FunctionReadWkt<DataTypePolygonName, CartesianPolygon, PolygonSerializer<CartesianPoint>, ReadWktPolygonNameHolder>>();
|
||||
factory.registerFunction<FunctionReadWkt<DataTypeMultiPolygonName, CartesianMultiPolygon, MultiPolygonSerializer<CartesianPoint>, ReadWktMultiPolygonNameHolder>>();
|
||||
factory.registerFunction<FunctionReadWKT<DataTypePointName, CartesianPoint, PointSerializer<CartesianPoint>, ReadWKTPointNameHolder>>();
|
||||
factory.registerFunction<FunctionReadWKT<DataTypeRingName, CartesianRing, RingSerializer<CartesianPoint>, ReadWKTRingNameHolder>>();
|
||||
factory.registerFunction<FunctionReadWKT<DataTypePolygonName, CartesianPolygon, PolygonSerializer<CartesianPoint>, ReadWKTPolygonNameHolder>>();
|
||||
factory.registerFunction<FunctionReadWKT<DataTypeMultiPolygonName, CartesianMultiPolygon, MultiPolygonSerializer<CartesianPoint>, ReadWKTMultiPolygonNameHolder>>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -23,7 +23,7 @@ void registerFunctionGeohashEncode(FunctionFactory & factory);
|
||||
void registerFunctionGeohashDecode(FunctionFactory & factory);
|
||||
void registerFunctionGeohashesInBox(FunctionFactory & factory);
|
||||
void registerFunctionWkt(FunctionFactory & factory);
|
||||
void registerFunctionReadWkt(FunctionFactory & factory);
|
||||
void registerFunctionReadWKT(FunctionFactory & factory);
|
||||
void registerFunctionSvg(FunctionFactory & factory);
|
||||
|
||||
#if USE_H3
|
||||
@ -79,7 +79,7 @@ void registerFunctionsGeo(FunctionFactory & factory)
|
||||
registerFunctionGeohashDecode(factory);
|
||||
registerFunctionGeohashesInBox(factory);
|
||||
registerFunctionWkt(factory);
|
||||
registerFunctionReadWkt(factory);
|
||||
registerFunctionReadWKT(factory);
|
||||
registerFunctionSvg(factory);
|
||||
|
||||
#if USE_H3
|
||||
|
@ -102,6 +102,7 @@ public:
|
||||
void registerFunctionSvg(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionSvg>();
|
||||
factory.registerAlias("SVG", "svg");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -121,7 +121,7 @@ struct Progress
|
||||
|
||||
|
||||
/** Callback to track the progress of the query.
|
||||
* Used in IBlockInputStream and Context.
|
||||
* Used in QueryPipeline and Context.
|
||||
* The function takes the number of rows in the last block, the number of bytes in the last block.
|
||||
* Note that the callback can be called from different threads.
|
||||
*/
|
||||
|
@ -43,8 +43,6 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
|
||||
}
|
||||
|
||||
class IBlockOutputStream;
|
||||
|
||||
/** Different data structures that can be used for aggregation
|
||||
* For efficiency, the aggregation data itself is put into the pool.
|
||||
* Data and pool ownership (states of aggregate functions)
|
||||
|
@ -2982,8 +2982,12 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptr<AsynchronousInser
|
||||
shared->async_insert_queue = ptr;
|
||||
}
|
||||
|
||||
void Context::initializeBackgroundExecutors()
|
||||
void Context::initializeBackgroundExecutorsIfNeeded()
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (is_background_executors_initialized)
|
||||
return;
|
||||
|
||||
const size_t max_merges_and_mutations = getSettingsRef().background_pool_size * getSettingsRef().background_merges_mutations_concurrency_ratio;
|
||||
|
||||
/// With this executor we can execute more tasks than threads we have
|
||||
@ -3030,6 +3034,8 @@ void Context::initializeBackgroundExecutors()
|
||||
|
||||
LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}",
|
||||
getSettingsRef().background_common_pool_size, getSettingsRef().background_common_pool_size);
|
||||
|
||||
is_background_executors_initialized = true;
|
||||
}
|
||||
|
||||
|
||||
|
@ -293,6 +293,8 @@ private:
|
||||
|
||||
/// A flag, used to distinguish between user query and internal query to a database engine (MaterializedPostgreSQL).
|
||||
bool is_internal_query = false;
|
||||
/// Has initializeBackgroundExecutors() method been executed?
|
||||
bool is_background_executors_initialized = false;
|
||||
|
||||
|
||||
public:
|
||||
@ -636,13 +638,13 @@ public:
|
||||
const Settings & getSettingsRef() const { return settings; }
|
||||
|
||||
void setProgressCallback(ProgressCallback callback);
|
||||
/// Used in InterpreterSelectQuery to pass it to the IBlockInputStream.
|
||||
/// Used in executeQuery() to pass it to the QueryPipeline.
|
||||
ProgressCallback getProgressCallback() const;
|
||||
|
||||
void setFileProgressCallback(FileProgressCallback && callback) { file_progress_callback = callback; }
|
||||
FileProgressCallback getFileProgressCallback() const { return file_progress_callback; }
|
||||
|
||||
/** Set in executeQuery and InterpreterSelectQuery. Then it is used in IBlockInputStream,
|
||||
/** Set in executeQuery and InterpreterSelectQuery. Then it is used in QueryPipeline,
|
||||
* to update and monitor information about the total number of resources spent for the query.
|
||||
*/
|
||||
void setProcessListElement(QueryStatus * elem);
|
||||
@ -869,7 +871,7 @@ public:
|
||||
void setReadTaskCallback(ReadTaskCallback && callback);
|
||||
|
||||
/// Background executors related methods
|
||||
void initializeBackgroundExecutors();
|
||||
void initializeBackgroundExecutorsIfNeeded();
|
||||
|
||||
MergeMutateBackgroundExecutorPtr getMergeMutateExecutor() const;
|
||||
OrdinaryBackgroundExecutorPtr getMovesExecutor() const;
|
||||
|
@ -2303,14 +2303,12 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input
|
||||
{
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
const auto & query = getSelectQuery();
|
||||
auto finish_sorting_step = std::make_unique<FinishSortingStep>(
|
||||
query_plan.getCurrentDataStream(),
|
||||
input_sorting_info->order_key_prefix_descr,
|
||||
output_order_descr,
|
||||
settings.max_block_size,
|
||||
limit,
|
||||
query.hasFiltration());
|
||||
limit);
|
||||
|
||||
query_plan.addStep(std::move(finish_sorting_step));
|
||||
}
|
||||
|
@ -11,6 +11,11 @@
|
||||
namespace ProfileEvents
|
||||
{
|
||||
|
||||
std::shared_ptr<DB::DataTypeEnum8> TypeEnum = std::make_shared<DB::DataTypeEnum8>(DB::DataTypeEnum8::Values{
|
||||
{ "increment", static_cast<Int8>(INCREMENT)},
|
||||
{ "gauge", static_cast<Int8>(GAUGE)},
|
||||
});
|
||||
|
||||
/// Put implementation here to avoid extra linking dependencies for clickhouse_common_io
|
||||
void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only)
|
||||
{
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
|
||||
@ -9,4 +10,13 @@ namespace ProfileEvents
|
||||
/// Dumps profile events to columns Map(String, UInt64)
|
||||
void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true);
|
||||
|
||||
/// This is for ProfileEvents packets.
|
||||
enum Type : int8_t
|
||||
{
|
||||
INCREMENT = 1,
|
||||
GAUGE = 2,
|
||||
};
|
||||
|
||||
extern std::shared_ptr<DB::DataTypeEnum8> TypeEnum;
|
||||
|
||||
}
|
||||
|
@ -161,7 +161,7 @@ void loadMetadata(ContextMutablePtr context, const String & default_database_nam
|
||||
bool create_default_db_if_not_exists = !default_database_name.empty();
|
||||
bool metadata_dir_for_default_db_already_exists = databases.count(default_database_name);
|
||||
if (create_default_db_if_not_exists && !metadata_dir_for_default_db_already_exists)
|
||||
databases.emplace(default_database_name, path + "/" + escapeForFileName(default_database_name));
|
||||
databases.emplace(default_database_name, std::filesystem::path(path) / escapeForFileName(default_database_name));
|
||||
|
||||
TablesLoader::Databases loaded_databases;
|
||||
for (const auto & [name, db_path] : databases)
|
||||
|
@ -72,7 +72,8 @@ public:
|
||||
|
||||
InputPort & getPort(PortKind kind) { return *std::next(inputs.begin(), kind); }
|
||||
|
||||
/// Compatible to IBlockOutputStream interface
|
||||
/// Compatibility with old interface.
|
||||
/// TODO: separate formats and processors.
|
||||
|
||||
void write(const Block & block);
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include "CapnProtoRowInputFormat.h"
|
||||
#if USE_CAPNP
|
||||
|
||||
#include <Core/Field.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
@ -9,198 +8,40 @@
|
||||
#include <capnp/serialize.h>
|
||||
#include <capnp/dynamic.h>
|
||||
#include <capnp/common.h>
|
||||
#include <base/logger_useful.h>
|
||||
#include <base/find_symbols.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_TYPE_OF_FIELD;
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
static CapnProtoRowInputFormat::NestedField split(const Block & header, size_t i)
|
||||
{
|
||||
CapnProtoRowInputFormat::NestedField field = {{}, i};
|
||||
|
||||
// Remove leading dot in field definition, e.g. ".msg" -> "msg"
|
||||
String name(header.safeGetByPosition(i).name);
|
||||
if (!name.empty() && name[0] == '.')
|
||||
name.erase(0, 1);
|
||||
|
||||
splitInto<'.', '_'>(field.tokens, name);
|
||||
return field;
|
||||
}
|
||||
|
||||
|
||||
static Field convertNodeToField(const capnp::DynamicValue::Reader & value)
|
||||
{
|
||||
switch (value.getType())
|
||||
{
|
||||
case capnp::DynamicValue::UNKNOWN:
|
||||
throw Exception("Unknown field type", ErrorCodes::BAD_TYPE_OF_FIELD);
|
||||
case capnp::DynamicValue::VOID:
|
||||
return Field();
|
||||
case capnp::DynamicValue::BOOL:
|
||||
return value.as<bool>() ? 1u : 0u;
|
||||
case capnp::DynamicValue::INT:
|
||||
return value.as<int64_t>();
|
||||
case capnp::DynamicValue::UINT:
|
||||
return value.as<uint64_t>();
|
||||
case capnp::DynamicValue::FLOAT:
|
||||
return value.as<double>();
|
||||
case capnp::DynamicValue::TEXT:
|
||||
{
|
||||
auto arr = value.as<capnp::Text>();
|
||||
return String(arr.begin(), arr.size());
|
||||
}
|
||||
case capnp::DynamicValue::DATA:
|
||||
{
|
||||
auto arr = value.as<capnp::Data>().asChars();
|
||||
return String(arr.begin(), arr.size());
|
||||
}
|
||||
case capnp::DynamicValue::LIST:
|
||||
{
|
||||
auto list_value = value.as<capnp::DynamicList>();
|
||||
Array res(list_value.size());
|
||||
for (auto i : kj::indices(list_value))
|
||||
res[i] = convertNodeToField(list_value[i]);
|
||||
|
||||
return res;
|
||||
}
|
||||
case capnp::DynamicValue::ENUM:
|
||||
return value.as<capnp::DynamicEnum>().getRaw();
|
||||
case capnp::DynamicValue::STRUCT:
|
||||
{
|
||||
auto struct_value = value.as<capnp::DynamicStruct>();
|
||||
const auto & fields = struct_value.getSchema().getFields();
|
||||
|
||||
Tuple tuple(fields.size());
|
||||
for (auto i : kj::indices(fields))
|
||||
tuple[i] = convertNodeToField(struct_value.get(fields[i]));
|
||||
|
||||
return tuple;
|
||||
}
|
||||
case capnp::DynamicValue::CAPABILITY:
|
||||
throw Exception("CAPABILITY type not supported", ErrorCodes::BAD_TYPE_OF_FIELD);
|
||||
case capnp::DynamicValue::ANY_POINTER:
|
||||
throw Exception("ANY_POINTER type not supported", ErrorCodes::BAD_TYPE_OF_FIELD);
|
||||
}
|
||||
return Field();
|
||||
}
|
||||
|
||||
static capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std::string & field)
|
||||
{
|
||||
KJ_IF_MAYBE(child, node.findFieldByName(field))
|
||||
return *child;
|
||||
else
|
||||
throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr(), ErrorCodes::THERE_IS_NO_COLUMN);
|
||||
}
|
||||
|
||||
|
||||
void CapnProtoRowInputFormat::createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader)
|
||||
{
|
||||
/// Columns in a table can map to fields in Cap'n'Proto or to structs.
|
||||
|
||||
/// Store common parents and their tokens in order to backtrack.
|
||||
std::vector<capnp::StructSchema::Field> parents;
|
||||
std::vector<std::string> parent_tokens;
|
||||
|
||||
capnp::StructSchema cur_reader = reader;
|
||||
|
||||
for (const auto & field : sorted_fields)
|
||||
{
|
||||
if (field.tokens.empty())
|
||||
throw Exception("Logical error in CapnProtoRowInputFormat", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
// Backtrack to common parent
|
||||
while (field.tokens.size() < parent_tokens.size() + 1
|
||||
|| !std::equal(parent_tokens.begin(), parent_tokens.end(), field.tokens.begin()))
|
||||
{
|
||||
actions.push_back({Action::POP});
|
||||
parents.pop_back();
|
||||
parent_tokens.pop_back();
|
||||
|
||||
if (parents.empty())
|
||||
{
|
||||
cur_reader = reader;
|
||||
break;
|
||||
}
|
||||
else
|
||||
cur_reader = parents.back().getType().asStruct();
|
||||
}
|
||||
|
||||
// Go forward
|
||||
while (parent_tokens.size() + 1 < field.tokens.size())
|
||||
{
|
||||
const auto & token = field.tokens[parents.size()];
|
||||
auto node = getFieldOrThrow(cur_reader, token);
|
||||
if (node.getType().isStruct())
|
||||
{
|
||||
// Descend to field structure
|
||||
parents.emplace_back(node);
|
||||
parent_tokens.emplace_back(token);
|
||||
cur_reader = node.getType().asStruct();
|
||||
actions.push_back({Action::PUSH, node});
|
||||
}
|
||||
else if (node.getType().isList())
|
||||
{
|
||||
break; // Collect list
|
||||
}
|
||||
else
|
||||
throw Exception("Field " + token + " is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD);
|
||||
}
|
||||
|
||||
// Read field from the structure
|
||||
auto node = getFieldOrThrow(cur_reader, field.tokens[parents.size()]);
|
||||
if (node.getType().isList() && !actions.empty() && actions.back().field == node)
|
||||
{
|
||||
// The field list here flattens Nested elements into multiple arrays
|
||||
// In order to map Nested types in Cap'nProto back, they need to be collected
|
||||
// Since the field names are sorted, the order of field positions must be preserved
|
||||
// For example, if the fields are { b @0 :Text, a @1 :Text }, the `a` would come first
|
||||
// even though it's position is second.
|
||||
auto & columns = actions.back().columns;
|
||||
auto it = std::upper_bound(columns.cbegin(), columns.cend(), field.pos);
|
||||
columns.insert(it, field.pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
actions.push_back({Action::READ, node, {field.pos}});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info)
|
||||
: IRowInputFormat(std::move(header), in_, std::move(params_)), parser(std::make_shared<SchemaParser>())
|
||||
CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_)
|
||||
: IRowInputFormat(std::move(header), in_, std::move(params_))
|
||||
, parser(std::make_shared<CapnProtoSchemaParser>())
|
||||
, format_settings(format_settings_)
|
||||
, column_types(getPort().getHeader().getDataTypes())
|
||||
, column_names(getPort().getHeader().getNames())
|
||||
{
|
||||
// Parse the schema and fetch the root object
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
auto schema = parser->impl.parseDiskFile(info.schemaPath(), info.absoluteSchemaPath(), {});
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
root = schema.getNested(info.messageName()).asStruct();
|
||||
|
||||
/**
|
||||
* The schema typically consists of fields in various nested structures.
|
||||
* Here we gather the list of fields and sort them in a way so that fields in the same structure are adjacent,
|
||||
* and the nesting level doesn't decrease to make traversal easier.
|
||||
*/
|
||||
const auto & sample = getPort().getHeader();
|
||||
NestedFieldList list;
|
||||
size_t num_columns = sample.columns();
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
list.push_back(split(sample, i));
|
||||
|
||||
// Order list first by value of strings then by length of string vector.
|
||||
std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b) { return a.tokens < b.tokens; });
|
||||
createActions(list, root);
|
||||
root = parser->getMessageSchema(info);
|
||||
checkCapnProtoSchemaStructure(root, getPort().getHeader(), format_settings.capn_proto.enum_comparing_mode);
|
||||
}
|
||||
|
||||
kj::Array<capnp::word> CapnProtoRowInputFormat::readMessage()
|
||||
@ -233,6 +74,191 @@ kj::Array<capnp::word> CapnProtoRowInputFormat::readMessage()
|
||||
return msg;
|
||||
}
|
||||
|
||||
static void insertSignedInteger(IColumn & column, const DataTypePtr & column_type, Int64 value)
|
||||
{
|
||||
switch (column_type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Int8:
|
||||
assert_cast<ColumnInt8 &>(column).insertValue(value);
|
||||
break;
|
||||
case TypeIndex::Int16:
|
||||
assert_cast<ColumnInt16 &>(column).insertValue(value);
|
||||
break;
|
||||
case TypeIndex::Int32:
|
||||
assert_cast<ColumnInt32 &>(column).insertValue(value);
|
||||
break;
|
||||
case TypeIndex::Int64:
|
||||
assert_cast<ColumnInt64 &>(column).insertValue(value);
|
||||
break;
|
||||
case TypeIndex::DateTime64:
|
||||
assert_cast<ColumnDecimal<DateTime64> &>(column).insertValue(value);
|
||||
break;
|
||||
default:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a signed integer.");
|
||||
}
|
||||
}
|
||||
|
||||
static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_type, UInt64 value)
|
||||
{
|
||||
switch (column_type->getTypeId())
|
||||
{
|
||||
case TypeIndex::UInt8:
|
||||
assert_cast<ColumnUInt8 &>(column).insertValue(value);
|
||||
break;
|
||||
case TypeIndex::Date: [[fallthrough]];
|
||||
case TypeIndex::UInt16:
|
||||
assert_cast<ColumnUInt16 &>(column).insertValue(value);
|
||||
break;
|
||||
case TypeIndex::DateTime: [[fallthrough]];
|
||||
case TypeIndex::UInt32:
|
||||
assert_cast<ColumnUInt32 &>(column).insertValue(value);
|
||||
break;
|
||||
case TypeIndex::UInt64:
|
||||
assert_cast<ColumnUInt64 &>(column).insertValue(value);
|
||||
break;
|
||||
default:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not an unsigned integer.");
|
||||
}
|
||||
}
|
||||
|
||||
static void insertFloat(IColumn & column, const DataTypePtr & column_type, Float64 value)
|
||||
{
|
||||
switch (column_type->getTypeId())
|
||||
{
|
||||
case TypeIndex::Float32:
|
||||
assert_cast<ColumnFloat32 &>(column).insertValue(value);
|
||||
break;
|
||||
case TypeIndex::Float64:
|
||||
assert_cast<ColumnFloat64 &>(column).insertValue(value);
|
||||
break;
|
||||
default:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a float.");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Value>
|
||||
static void insertString(IColumn & column, Value value)
|
||||
{
|
||||
column.insertData(reinterpret_cast<const char *>(value.begin()), value.size());
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
static void insertEnum(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicEnum & enum_value, FormatSettings::EnumComparingMode enum_comparing_mode)
|
||||
{
|
||||
auto enumerant = *kj::_::readMaybe(enum_value.getEnumerant());
|
||||
auto enum_type = assert_cast<const DataTypeEnum<ValueType> *>(column_type.get());
|
||||
DataTypePtr nested_type = std::make_shared<DataTypeNumber<ValueType>>();
|
||||
switch (enum_comparing_mode)
|
||||
{
|
||||
case FormatSettings::EnumComparingMode::BY_VALUES:
|
||||
insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal()));
|
||||
return;
|
||||
case FormatSettings::EnumComparingMode::BY_NAMES:
|
||||
insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName()))));
|
||||
return;
|
||||
case FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE:
|
||||
{
|
||||
/// Find the same enum name case insensitive.
|
||||
String enum_name = enumerant.getProto().getName();
|
||||
for (auto & name : enum_type->getAllRegisteredNames())
|
||||
{
|
||||
if (compareEnumNames(name, enum_name, enum_comparing_mode))
|
||||
{
|
||||
insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void insertValue(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicValue::Reader & value, FormatSettings::EnumComparingMode enum_comparing_mode)
|
||||
{
|
||||
if (column_type->lowCardinality())
|
||||
{
|
||||
auto & lc_column = assert_cast<ColumnLowCardinality &>(column);
|
||||
auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty();
|
||||
auto dict_type = assert_cast<const DataTypeLowCardinality *>(column_type.get())->getDictionaryType();
|
||||
insertValue(*tmp_column, dict_type, value, enum_comparing_mode);
|
||||
lc_column.insertFromFullColumn(*tmp_column, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (value.getType())
|
||||
{
|
||||
case capnp::DynamicValue::Type::INT:
|
||||
insertSignedInteger(column, column_type, value.as<Int64>());
|
||||
break;
|
||||
case capnp::DynamicValue::Type::UINT:
|
||||
insertUnsignedInteger(column, column_type, value.as<UInt64>());
|
||||
break;
|
||||
case capnp::DynamicValue::Type::FLOAT:
|
||||
insertFloat(column, column_type, value.as<Float64>());
|
||||
break;
|
||||
case capnp::DynamicValue::Type::BOOL:
|
||||
insertUnsignedInteger(column, column_type, UInt64(value.as<bool>()));
|
||||
break;
|
||||
case capnp::DynamicValue::Type::DATA:
|
||||
insertString(column, value.as<capnp::Data>());
|
||||
break;
|
||||
case capnp::DynamicValue::Type::TEXT:
|
||||
insertString(column, value.as<capnp::Text>());
|
||||
break;
|
||||
case capnp::DynamicValue::Type::ENUM:
|
||||
if (column_type->getTypeId() == TypeIndex::Enum8)
|
||||
insertEnum<Int8>(column, column_type, value.as<capnp::DynamicEnum>(), enum_comparing_mode);
|
||||
else
|
||||
insertEnum<Int16>(column, column_type, value.as<capnp::DynamicEnum>(), enum_comparing_mode);
|
||||
break;
|
||||
case capnp::DynamicValue::LIST:
|
||||
{
|
||||
auto list_value = value.as<capnp::DynamicList>();
|
||||
auto & column_array = assert_cast<ColumnArray &>(column);
|
||||
auto & offsets = column_array.getOffsets();
|
||||
offsets.push_back(offsets.back() + list_value.size());
|
||||
|
||||
auto & nested_column = column_array.getData();
|
||||
auto nested_type = assert_cast<const DataTypeArray *>(column_type.get())->getNestedType();
|
||||
for (const auto & nested_value : list_value)
|
||||
insertValue(nested_column, nested_type, nested_value, enum_comparing_mode);
|
||||
break;
|
||||
}
|
||||
case capnp::DynamicValue::Type::STRUCT:
|
||||
{
|
||||
auto struct_value = value.as<capnp::DynamicStruct>();
|
||||
if (column_type->isNullable())
|
||||
{
|
||||
auto & nullable_column = assert_cast<ColumnNullable &>(column);
|
||||
auto field = *kj::_::readMaybe(struct_value.which());
|
||||
if (field.getType().isVoid())
|
||||
nullable_column.insertDefault();
|
||||
else
|
||||
{
|
||||
auto & nested_column = nullable_column.getNestedColumn();
|
||||
auto nested_type = assert_cast<const DataTypeNullable *>(column_type.get())->getNestedType();
|
||||
auto nested_value = struct_value.get(field);
|
||||
insertValue(nested_column, nested_type, nested_value, enum_comparing_mode);
|
||||
nullable_column.getNullMapData().push_back(0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & tuple_column = assert_cast<ColumnTuple &>(column);
|
||||
const auto * tuple_type = assert_cast<const DataTypeTuple *>(column_type.get());
|
||||
for (size_t i = 0; i != tuple_column.tupleSize(); ++i)
|
||||
insertValue(
|
||||
tuple_column.getColumn(i),
|
||||
tuple_type->getElements()[i],
|
||||
struct_value.get(tuple_type->getElementNames()[i]),
|
||||
enum_comparing_mode);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto value type.");
|
||||
}
|
||||
}
|
||||
|
||||
bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
|
||||
{
|
||||
if (in->eof())
|
||||
@ -245,51 +271,12 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension
|
||||
#else
|
||||
capnp::FlatArrayMessageReader msg(array);
|
||||
#endif
|
||||
std::vector<capnp::DynamicStruct::Reader> stack;
|
||||
stack.push_back(msg.getRoot<capnp::DynamicStruct>(root));
|
||||
|
||||
for (auto action : actions)
|
||||
auto root_reader = msg.getRoot<capnp::DynamicStruct>(root);
|
||||
for (size_t i = 0; i != columns.size(); ++i)
|
||||
{
|
||||
switch (action.type)
|
||||
{
|
||||
case Action::READ:
|
||||
{
|
||||
Field value = convertNodeToField(stack.back().get(action.field));
|
||||
if (action.columns.size() > 1)
|
||||
{
|
||||
// Nested columns must be flattened into several arrays
|
||||
// e.g. Array(Tuple(x ..., y ...)) -> Array(x ...), Array(y ...)
|
||||
const auto & collected = DB::get<const Array &>(value);
|
||||
size_t size = collected.size();
|
||||
// The flattened array contains an array of a part of the nested tuple
|
||||
Array flattened(size);
|
||||
for (size_t column_index = 0; column_index < action.columns.size(); ++column_index)
|
||||
{
|
||||
// Populate array with a single tuple elements
|
||||
for (size_t off = 0; off < size; ++off)
|
||||
{
|
||||
const auto & tuple = DB::get<const Tuple &>(collected[off]);
|
||||
flattened[off] = tuple[column_index];
|
||||
}
|
||||
auto & col = columns[action.columns[column_index]];
|
||||
col->insert(flattened);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & col = columns[action.columns[0]];
|
||||
col->insert(value);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case Action::POP:
|
||||
stack.pop_back();
|
||||
break;
|
||||
case Action::PUSH:
|
||||
stack.push_back(stack.back().get(action.field).as<capnp::DynamicStruct>());
|
||||
break;
|
||||
}
|
||||
auto value = getReaderByColumnName(root_reader, column_names[i]);
|
||||
insertValue(*columns[i], column_types[i], value, format_settings.capn_proto.enum_comparing_mode);
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -302,8 +289,7 @@ void registerInputFormatCapnProto(FormatFactory & factory)
|
||||
[](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<CapnProtoRowInputFormat>(buf, sample, std::move(params),
|
||||
FormatSchemaInfo(settings.schema.format_schema, "CapnProto", true,
|
||||
settings.schema.is_server, settings.schema.format_schema_path));
|
||||
FormatSchemaInfo(settings, "CapnProto", true), settings);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -4,8 +4,8 @@
|
||||
#if USE_CAPNP
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Formats/CapnProtoUtils.h>
|
||||
#include <Processors/Formats/IRowInputFormat.h>
|
||||
#include <capnp/schema-parser.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -22,18 +22,7 @@ class ReadBuffer;
|
||||
class CapnProtoRowInputFormat : public IRowInputFormat
|
||||
{
|
||||
public:
|
||||
struct NestedField
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
size_t pos;
|
||||
};
|
||||
using NestedFieldList = std::vector<NestedField>;
|
||||
|
||||
/** schema_dir - base path for schema files
|
||||
* schema_file - location of the capnproto schema, e.g. "schema.capnp"
|
||||
* root_object - name to the root object, e.g. "Message"
|
||||
*/
|
||||
CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info);
|
||||
CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_);
|
||||
|
||||
String getName() const override { return "CapnProtoRowInputFormat"; }
|
||||
|
||||
@ -42,34 +31,11 @@ public:
|
||||
private:
|
||||
kj::Array<capnp::word> readMessage();
|
||||
|
||||
// Build a traversal plan from a sorted list of fields
|
||||
void createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader);
|
||||
|
||||
/* Action for state machine for traversing nested structures. */
|
||||
using BlockPositionList = std::vector<size_t>;
|
||||
struct Action
|
||||
{
|
||||
enum Type { POP, PUSH, READ };
|
||||
Type type{};
|
||||
capnp::StructSchema::Field field{};
|
||||
BlockPositionList columns{};
|
||||
};
|
||||
|
||||
// Wrapper for classes that could throw in destructor
|
||||
// https://github.com/capnproto/capnproto/issues/553
|
||||
template <typename T>
|
||||
struct DestructorCatcher
|
||||
{
|
||||
T impl;
|
||||
template <typename ... Arg>
|
||||
DestructorCatcher(Arg && ... args) : impl(kj::fwd<Arg>(args)...) {}
|
||||
~DestructorCatcher() noexcept try { } catch (...) { return; }
|
||||
};
|
||||
using SchemaParser = DestructorCatcher<capnp::SchemaParser>;
|
||||
|
||||
std::shared_ptr<SchemaParser> parser;
|
||||
std::shared_ptr<CapnProtoSchemaParser> parser;
|
||||
capnp::StructSchema root;
|
||||
std::vector<Action> actions;
|
||||
const FormatSettings format_settings;
|
||||
DataTypes column_types;
|
||||
Names column_names;
|
||||
};
|
||||
|
||||
}
|
||||
|
268
src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp
Normal file
268
src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp
Normal file
@ -0,0 +1,268 @@
|
||||
#include <Processors/Formats/Impl/CapnProtoRowOutputFormat.h>
|
||||
#if USE_CAPNP
|
||||
|
||||
#include <Formats/CapnProtoUtils.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <capnp/dynamic.h>
|
||||
#include <capnp/serialize-packed.h>
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
CapnProtoOutputStream::CapnProtoOutputStream(WriteBuffer & out_) : out(out_)
|
||||
{
|
||||
}
|
||||
|
||||
void CapnProtoOutputStream::write(const void * buffer, size_t size)
|
||||
{
|
||||
out.write(reinterpret_cast<const char *>(buffer), size);
|
||||
}
|
||||
|
||||
CapnProtoRowOutputFormat::CapnProtoRowOutputFormat(
|
||||
WriteBuffer & out_,
|
||||
const Block & header_,
|
||||
const RowOutputFormatParams & params_,
|
||||
const FormatSchemaInfo & info,
|
||||
const FormatSettings & format_settings_)
|
||||
: IRowOutputFormat(header_, out_, params_), column_names(header_.getNames()), column_types(header_.getDataTypes()), output_stream(std::make_unique<CapnProtoOutputStream>(out_)), format_settings(format_settings_)
|
||||
{
|
||||
schema = schema_parser.getMessageSchema(info);
|
||||
checkCapnProtoSchemaStructure(schema, getPort(PortKind::Main).getHeader(), format_settings.capn_proto.enum_comparing_mode);
|
||||
}
|
||||
|
||||
template <typename EnumValue>
|
||||
static capnp::DynamicEnum getDynamicEnum(
|
||||
const ColumnPtr & column,
|
||||
const DataTypePtr & data_type,
|
||||
size_t row_num,
|
||||
const capnp::EnumSchema & enum_schema,
|
||||
FormatSettings::EnumComparingMode mode)
|
||||
{
|
||||
const auto * enum_data_type = assert_cast<const DataTypeEnum<EnumValue> *>(data_type.get());
|
||||
EnumValue enum_value = column->getInt(row_num);
|
||||
if (mode == FormatSettings::EnumComparingMode::BY_VALUES)
|
||||
return capnp::DynamicEnum(enum_schema, enum_value);
|
||||
|
||||
auto enum_name = enum_data_type->getNameForValue(enum_value);
|
||||
for (const auto enumerant : enum_schema.getEnumerants())
|
||||
{
|
||||
if (compareEnumNames(String(enum_name), enumerant.getProto().getName(), mode))
|
||||
return capnp::DynamicEnum(enumerant);
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot convert CLickHouse Enum value to CapnProto Enum");
|
||||
}
|
||||
|
||||
static capnp::DynamicValue::Builder initStructFieldBuilder(const ColumnPtr & column, size_t row_num, capnp::DynamicStruct::Builder & struct_builder, capnp::StructSchema::Field field)
|
||||
{
|
||||
if (const auto * array_column = checkAndGetColumn<ColumnArray>(*column))
|
||||
{
|
||||
size_t size = array_column->getOffsets()[row_num] - array_column->getOffsets()[row_num - 1];
|
||||
return struct_builder.init(field, size);
|
||||
}
|
||||
|
||||
if (field.getType().isStruct())
|
||||
return struct_builder.init(field);
|
||||
|
||||
return struct_builder.get(field);
|
||||
}
|
||||
|
||||
static std::optional<capnp::DynamicValue::Reader> convertToDynamicValue(
|
||||
const ColumnPtr & column,
|
||||
const DataTypePtr & data_type,
|
||||
size_t row_num,
|
||||
capnp::DynamicValue::Builder builder,
|
||||
FormatSettings::EnumComparingMode enum_comparing_mode,
|
||||
std::vector<std::unique_ptr<String>> & temporary_text_data_storage)
|
||||
{
|
||||
/// Here we don't do any types validation, because we did it in CapnProtoRowOutputFormat constructor.
|
||||
|
||||
if (data_type->lowCardinality())
|
||||
{
|
||||
const auto * lc_column = assert_cast<const ColumnLowCardinality *>(column.get());
|
||||
const auto & dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
|
||||
size_t index = lc_column->getIndexAt(row_num);
|
||||
return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode, temporary_text_data_storage);
|
||||
}
|
||||
|
||||
switch (builder.getType())
|
||||
{
|
||||
case capnp::DynamicValue::Type::INT:
|
||||
/// We allow output DateTime64 as Int64.
|
||||
if (WhichDataType(data_type).isDateTime64())
|
||||
return capnp::DynamicValue::Reader(assert_cast<const ColumnDecimal<DateTime64> *>(column.get())->getElement(row_num));
|
||||
return capnp::DynamicValue::Reader(column->getInt(row_num));
|
||||
case capnp::DynamicValue::Type::UINT:
|
||||
return capnp::DynamicValue::Reader(column->getUInt(row_num));
|
||||
case capnp::DynamicValue::Type::BOOL:
|
||||
return capnp::DynamicValue::Reader(column->getBool(row_num));
|
||||
case capnp::DynamicValue::Type::FLOAT:
|
||||
return capnp::DynamicValue::Reader(column->getFloat64(row_num));
|
||||
case capnp::DynamicValue::Type::ENUM:
|
||||
{
|
||||
auto enum_schema = builder.as<capnp::DynamicEnum>().getSchema();
|
||||
if (data_type->getTypeId() == TypeIndex::Enum8)
|
||||
return capnp::DynamicValue::Reader(
|
||||
getDynamicEnum<Int8>(column, data_type, row_num, enum_schema, enum_comparing_mode));
|
||||
return capnp::DynamicValue::Reader(
|
||||
getDynamicEnum<Int16>(column, data_type, row_num, enum_schema, enum_comparing_mode));
|
||||
}
|
||||
case capnp::DynamicValue::Type::DATA:
|
||||
{
|
||||
auto data = column->getDataAt(row_num);
|
||||
return capnp::DynamicValue::Reader(capnp::Data::Reader(reinterpret_cast<const kj::byte *>(data.data), data.size));
|
||||
}
|
||||
case capnp::DynamicValue::Type::TEXT:
|
||||
{
|
||||
/// In TEXT type data should be null-terminated, but ClickHouse String data could not be.
|
||||
/// To make data null-terminated we should copy it to temporary String object, but
|
||||
/// capnp::Text::Reader works only with pointer to the data and it's size, so we should
|
||||
/// guarantee that new String object life time is longer than capnp::Text::Reader life time.
|
||||
/// To do this we store new String object in a temporary storage, passed in this function
|
||||
/// by reference. We use unique_ptr<String> instead of just String to avoid pointers
|
||||
/// invalidation on vector reallocation.
|
||||
temporary_text_data_storage.push_back(std::make_unique<String>(column->getDataAt(row_num)));
|
||||
auto & data = temporary_text_data_storage.back();
|
||||
return capnp::DynamicValue::Reader(capnp::Text::Reader(data->data(), data->size()));
|
||||
}
|
||||
case capnp::DynamicValue::Type::STRUCT:
|
||||
{
|
||||
auto struct_builder = builder.as<capnp::DynamicStruct>();
|
||||
auto nested_struct_schema = struct_builder.getSchema();
|
||||
/// Struct can be represent Tuple or Naullable (named union with two fields)
|
||||
if (data_type->isNullable())
|
||||
{
|
||||
const auto * nullable_type = assert_cast<const DataTypeNullable *>(data_type.get());
|
||||
const auto * nullable_column = assert_cast<const ColumnNullable *>(column.get());
|
||||
auto fields = nested_struct_schema.getUnionFields();
|
||||
if (nullable_column->isNullAt(row_num))
|
||||
{
|
||||
auto null_field = fields[0].getType().isVoid() ? fields[0] : fields[1];
|
||||
struct_builder.set(null_field, capnp::Void());
|
||||
}
|
||||
else
|
||||
{
|
||||
auto value_field = fields[0].getType().isVoid() ? fields[1] : fields[0];
|
||||
struct_builder.clear(value_field);
|
||||
const auto & nested_column = nullable_column->getNestedColumnPtr();
|
||||
auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field);
|
||||
auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode, temporary_text_data_storage);
|
||||
if (value)
|
||||
struct_builder.set(value_field, std::move(*value));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto * tuple_data_type = assert_cast<const DataTypeTuple *>(data_type.get());
|
||||
auto nested_types = tuple_data_type->getElements();
|
||||
const auto & nested_columns = assert_cast<const ColumnTuple *>(column.get())->getColumns();
|
||||
for (const auto & name : tuple_data_type->getElementNames())
|
||||
{
|
||||
auto pos = tuple_data_type->getPositionByName(name);
|
||||
auto field_builder
|
||||
= initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name));
|
||||
auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode, temporary_text_data_storage);
|
||||
if (value)
|
||||
struct_builder.set(name, std::move(*value));
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
case capnp::DynamicValue::Type::LIST:
|
||||
{
|
||||
auto list_builder = builder.as<capnp::DynamicList>();
|
||||
const auto * array_column = assert_cast<const ColumnArray *>(column.get());
|
||||
const auto & nested_column = array_column->getDataPtr();
|
||||
const auto & nested_type = assert_cast<const DataTypeArray *>(data_type.get())->getNestedType();
|
||||
const auto & offsets = array_column->getOffsets();
|
||||
auto offset = offsets[row_num - 1];
|
||||
size_t size = offsets[row_num] - offset;
|
||||
|
||||
const auto * nested_array_column = checkAndGetColumn<ColumnArray>(*nested_column);
|
||||
for (size_t i = 0; i != size; ++i)
|
||||
{
|
||||
capnp::DynamicValue::Builder value_builder;
|
||||
/// For nested arrays we need to initialize nested list builder.
|
||||
if (nested_array_column)
|
||||
{
|
||||
const auto & nested_offset = nested_array_column->getOffsets();
|
||||
size_t nested_array_size = nested_offset[offset + i] - nested_offset[offset + i - 1];
|
||||
value_builder = list_builder.init(i, nested_array_size);
|
||||
}
|
||||
else
|
||||
value_builder = list_builder[i];
|
||||
|
||||
auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode, temporary_text_data_storage);
|
||||
if (value)
|
||||
list_builder.set(i, std::move(*value));
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
default:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto type.");
|
||||
}
|
||||
}
|
||||
|
||||
void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num)
|
||||
{
|
||||
capnp::MallocMessageBuilder message;
|
||||
/// Temporary storage for data that will be outputted in fields with CapnProto type TEXT.
|
||||
/// See comment in convertToDynamicValue() for more details.
|
||||
std::vector<std::unique_ptr<String>> temporary_text_data_storage;
|
||||
capnp::DynamicStruct::Builder root = message.initRoot<capnp::DynamicStruct>(schema);
|
||||
for (size_t i = 0; i != columns.size(); ++i)
|
||||
{
|
||||
auto [struct_builder, field] = getStructBuilderAndFieldByColumnName(root, column_names[i]);
|
||||
auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field);
|
||||
auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode, temporary_text_data_storage);
|
||||
if (value)
|
||||
struct_builder.set(field, *value);
|
||||
}
|
||||
|
||||
capnp::writeMessage(*output_stream, message);
|
||||
}
|
||||
|
||||
void registerOutputFormatCapnProto(FormatFactory & factory)
|
||||
{
|
||||
factory.registerOutputFormat("CapnProto", [](
|
||||
WriteBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowOutputFormatParams & params,
|
||||
const FormatSettings & format_settings)
|
||||
{
|
||||
return std::make_shared<CapnProtoRowOutputFormat>(buf, sample, params, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class FormatFactory;
|
||||
void registerOutputFormatCapnProto(FormatFactory &) {}
|
||||
}
|
||||
|
||||
#endif // USE_CAPNP
|
53
src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h
Normal file
53
src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h
Normal file
@ -0,0 +1,53 @@
|
||||
#pragma once
|
||||
|
||||
#include "config_formats.h"
|
||||
#if USE_CAPNP
|
||||
|
||||
#include <Processors/Formats/IRowOutputFormat.h>
|
||||
#include <Formats/FormatSchemaInfo.h>
|
||||
#include <Formats/CapnProtoUtils.h>
|
||||
#include <capnp/schema.h>
|
||||
#include <capnp/dynamic.h>
|
||||
#include <kj/io.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class CapnProtoOutputStream : public kj::OutputStream
|
||||
{
|
||||
public:
|
||||
CapnProtoOutputStream(WriteBuffer & out_);
|
||||
|
||||
void write(const void * buffer, size_t size) override;
|
||||
|
||||
private:
|
||||
WriteBuffer & out;
|
||||
};
|
||||
|
||||
class CapnProtoRowOutputFormat : public IRowOutputFormat
|
||||
{
|
||||
public:
|
||||
CapnProtoRowOutputFormat(
|
||||
WriteBuffer & out_,
|
||||
const Block & header_,
|
||||
const RowOutputFormatParams & params_,
|
||||
const FormatSchemaInfo & info,
|
||||
const FormatSettings & format_settings_);
|
||||
|
||||
String getName() const override { return "CapnProtoRowOutputFormat"; }
|
||||
|
||||
void write(const Columns & columns, size_t row_num) override;
|
||||
|
||||
void writeField(const IColumn &, const ISerialization &, size_t) override { }
|
||||
|
||||
private:
|
||||
Names column_names;
|
||||
DataTypes column_types;
|
||||
capnp::StructSchema schema;
|
||||
std::unique_ptr<CapnProtoOutputStream> output_stream;
|
||||
const FormatSettings format_settings;
|
||||
CapnProtoSchemaParser schema_parser;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // USE_CAPNP
|
@ -67,8 +67,7 @@ void registerInputFormatProtobuf(FormatFactory & factory)
|
||||
const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<ProtobufRowInputFormat>(buf, sample, std::move(params),
|
||||
FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true,
|
||||
settings.schema.is_server, settings.schema.format_schema_path),
|
||||
FormatSchemaInfo(settings, "Protobuf", true),
|
||||
with_length_delimiter);
|
||||
});
|
||||
}
|
||||
|
@ -64,9 +64,7 @@ void registerOutputFormatProtobuf(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<ProtobufRowOutputFormat>(
|
||||
buf, header, params,
|
||||
FormatSchemaInfo(settings.schema.format_schema, "Protobuf",
|
||||
true, settings.schema.is_server,
|
||||
settings.schema.format_schema_path),
|
||||
FormatSchemaInfo(settings, "Protobuf", true),
|
||||
settings,
|
||||
with_length_delimiter);
|
||||
});
|
||||
|
@ -16,7 +16,7 @@ public:
|
||||
const Block & header, size_t num_inputs,
|
||||
SortDescription description_, size_t max_block_size)
|
||||
: IMergingTransform(
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0,
|
||||
header,
|
||||
num_inputs,
|
||||
std::move(description_),
|
||||
|
@ -20,7 +20,7 @@ public:
|
||||
WriteBuffer * out_row_sources_buf_ = nullptr,
|
||||
bool use_average_block_sizes = false)
|
||||
: IMergingTransform(
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0,
|
||||
header,
|
||||
num_inputs,
|
||||
std::move(description_),
|
||||
|
@ -19,7 +19,7 @@ public:
|
||||
SortDescription description,
|
||||
size_t max_block_size)
|
||||
: IMergingTransform(
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0,
|
||||
header,
|
||||
num_inputs,
|
||||
params,
|
||||
|
@ -15,7 +15,7 @@ public:
|
||||
SortDescription description_, size_t max_block_size,
|
||||
Graphite::Params params_, time_t time_of_merge_)
|
||||
: IMergingTransform(
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0,
|
||||
header,
|
||||
num_inputs,
|
||||
std::move(description_),
|
||||
|
@ -15,10 +15,10 @@ IMergingTransformBase::IMergingTransformBase(
|
||||
const Block & input_header,
|
||||
const Block & output_header,
|
||||
bool have_all_inputs_,
|
||||
bool has_limit_below_one_block_)
|
||||
UInt64 limit_hint_)
|
||||
: IProcessor(InputPorts(num_inputs, input_header), {output_header})
|
||||
, have_all_inputs(have_all_inputs_)
|
||||
, has_limit_below_one_block(has_limit_below_one_block_)
|
||||
, limit_hint(limit_hint_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -79,7 +79,10 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs()
|
||||
/// setNotNeeded after reading first chunk, because in optimismtic case
|
||||
/// (e.g. with optimized 'ORDER BY primary_key LIMIT n' and small 'n')
|
||||
/// we won't have to read any chunks anymore;
|
||||
auto chunk = input.pull(has_limit_below_one_block);
|
||||
auto chunk = input.pull(limit_hint != 0);
|
||||
if (limit_hint && chunk.getNumRows() < limit_hint)
|
||||
input.setNeeded();
|
||||
|
||||
if (!chunk.hasRows())
|
||||
{
|
||||
if (!input.isFinished())
|
||||
|
@ -17,7 +17,7 @@ public:
|
||||
const Block & input_header,
|
||||
const Block & output_header,
|
||||
bool have_all_inputs_,
|
||||
bool has_limit_below_one_block_);
|
||||
UInt64 limit_hint_);
|
||||
|
||||
OutputPort & getOutputPort() { return outputs.front(); }
|
||||
|
||||
@ -67,7 +67,7 @@ private:
|
||||
std::vector<InputState> input_states;
|
||||
std::atomic<bool> have_all_inputs;
|
||||
bool is_initialized = false;
|
||||
bool has_limit_below_one_block = false;
|
||||
UInt64 limit_hint = 0;
|
||||
|
||||
IProcessor::Status prepareInitializeInputs();
|
||||
};
|
||||
@ -83,9 +83,9 @@ public:
|
||||
const Block & input_header,
|
||||
const Block & output_header,
|
||||
bool have_all_inputs_,
|
||||
bool has_limit_below_one_block_,
|
||||
UInt64 limit_hint_,
|
||||
Args && ... args)
|
||||
: IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, has_limit_below_one_block_)
|
||||
: IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, limit_hint_)
|
||||
, algorithm(std::forward<Args>(args) ...)
|
||||
{
|
||||
}
|
||||
|
@ -13,13 +13,12 @@ MergingSortedTransform::MergingSortedTransform(
|
||||
SortDescription description_,
|
||||
size_t max_block_size,
|
||||
UInt64 limit_,
|
||||
bool has_limit_below_one_block_,
|
||||
WriteBuffer * out_row_sources_buf_,
|
||||
bool quiet_,
|
||||
bool use_average_block_sizes,
|
||||
bool have_all_inputs_)
|
||||
: IMergingTransform(
|
||||
num_inputs, header, header, have_all_inputs_, has_limit_below_one_block_,
|
||||
num_inputs, header, header, have_all_inputs_, limit_,
|
||||
header,
|
||||
num_inputs,
|
||||
std::move(description_),
|
||||
|
@ -17,7 +17,6 @@ public:
|
||||
SortDescription description,
|
||||
size_t max_block_size,
|
||||
UInt64 limit_ = 0,
|
||||
bool has_limit_below_one_block_ = false,
|
||||
WriteBuffer * out_row_sources_buf_ = nullptr,
|
||||
bool quiet_ = false,
|
||||
bool use_average_block_sizes = false,
|
||||
|
@ -18,7 +18,7 @@ public:
|
||||
WriteBuffer * out_row_sources_buf_ = nullptr,
|
||||
bool use_average_block_sizes = false)
|
||||
: IMergingTransform(
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0,
|
||||
header,
|
||||
num_inputs,
|
||||
std::move(description_),
|
||||
|
@ -19,7 +19,7 @@ public:
|
||||
const Names & partition_key_columns,
|
||||
size_t max_block_size)
|
||||
: IMergingTransform(
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0,
|
||||
header,
|
||||
num_inputs,
|
||||
std::move(description_),
|
||||
|
@ -19,7 +19,7 @@ public:
|
||||
WriteBuffer * out_row_sources_buf_ = nullptr,
|
||||
bool use_average_block_sizes = false)
|
||||
: IMergingTransform(
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0,
|
||||
header,
|
||||
num_inputs,
|
||||
std::move(description_),
|
||||
|
@ -31,14 +31,12 @@ FinishSortingStep::FinishSortingStep(
|
||||
SortDescription prefix_description_,
|
||||
SortDescription result_description_,
|
||||
size_t max_block_size_,
|
||||
UInt64 limit_,
|
||||
bool has_filtration_)
|
||||
UInt64 limit_)
|
||||
: ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_))
|
||||
, prefix_description(std::move(prefix_description_))
|
||||
, result_description(std::move(result_description_))
|
||||
, max_block_size(max_block_size_)
|
||||
, limit(limit_)
|
||||
, has_filtration(has_filtration_)
|
||||
{
|
||||
/// TODO: check input_stream is sorted by prefix_description.
|
||||
output_stream->sort_description = result_description;
|
||||
@ -60,14 +58,12 @@ void FinishSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const
|
||||
if (pipeline.getNumStreams() > 1)
|
||||
{
|
||||
UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit);
|
||||
bool has_limit_below_one_block = !has_filtration && limit_for_merging && limit_for_merging < max_block_size;
|
||||
auto transform = std::make_shared<MergingSortedTransform>(
|
||||
pipeline.getHeader(),
|
||||
pipeline.getNumStreams(),
|
||||
prefix_description,
|
||||
max_block_size,
|
||||
limit_for_merging,
|
||||
has_limit_below_one_block);
|
||||
limit_for_merging);
|
||||
|
||||
pipeline.addTransform(std::move(transform));
|
||||
}
|
||||
|
@ -14,8 +14,7 @@ public:
|
||||
SortDescription prefix_description_,
|
||||
SortDescription result_description_,
|
||||
size_t max_block_size_,
|
||||
UInt64 limit_,
|
||||
bool has_filtration_);
|
||||
UInt64 limit_);
|
||||
|
||||
String getName() const override { return "FinishSorting"; }
|
||||
|
||||
@ -32,7 +31,6 @@ private:
|
||||
SortDescription result_description;
|
||||
size_t max_block_size;
|
||||
UInt64 limit;
|
||||
bool has_filtration;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -485,8 +485,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
|
||||
pipe.getHeader(),
|
||||
pipe.numOutputPorts(),
|
||||
sort_description,
|
||||
max_block_size,
|
||||
0, true);
|
||||
max_block_size);
|
||||
|
||||
pipe.addTransform(std::move(transform));
|
||||
}
|
||||
|
@ -69,8 +69,7 @@ void SourceWithProgress::work()
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregated copy-paste from IBlockInputStream::progressImpl.
|
||||
/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream.
|
||||
/// TODO: Most of this must be done in PipelineExecutor outside.
|
||||
void SourceWithProgress::progress(const Progress & value)
|
||||
{
|
||||
was_progress_called = true;
|
||||
@ -135,14 +134,12 @@ void SourceWithProgress::progress(const Progress & value)
|
||||
|
||||
if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds)
|
||||
{
|
||||
/// Should be done in PipelineExecutor.
|
||||
/// It is here for compatibility with IBlockInputsStream.
|
||||
/// TODO: Should be done in PipelineExecutor.
|
||||
CurrentThread::updatePerformanceCounters();
|
||||
last_profile_events_update_time = total_elapsed_microseconds;
|
||||
}
|
||||
|
||||
/// Should be done in PipelineExecutor.
|
||||
/// It is here for compatibility with IBlockInputsStream.
|
||||
/// TODO: Should be done in PipelineExecutor.
|
||||
limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds);
|
||||
|
||||
if (quota && limits.mode == LimitsMode::LIMITS_TOTAL)
|
||||
|
@ -127,7 +127,7 @@ ColumnGathererTransform::ColumnGathererTransform(
|
||||
ReadBuffer & row_sources_buf_,
|
||||
size_t block_preferred_size_)
|
||||
: IMergingTransform<ColumnGathererStream>(
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
|
||||
num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0,
|
||||
num_inputs, row_sources_buf_, block_preferred_size_)
|
||||
, log(&Poco::Logger::get("ColumnGathererStream"))
|
||||
{
|
||||
|
@ -197,7 +197,6 @@ void MergeSortingTransform::consume(Chunk chunk)
|
||||
description,
|
||||
max_merged_block_size,
|
||||
limit,
|
||||
false,
|
||||
nullptr,
|
||||
quiet,
|
||||
use_average_block_sizes,
|
||||
|
@ -12,7 +12,7 @@ class Block;
|
||||
class ReadBuffer;
|
||||
class WriteBuffer;
|
||||
|
||||
/// Information for profiling. See IBlockInputStream.h
|
||||
/// Information for profiling. See SourceWithProgress.h
|
||||
struct ProfileInfo
|
||||
{
|
||||
bool started = false;
|
||||
|
@ -129,7 +129,6 @@ public:
|
||||
void setLeafLimits(const SizeLimits & limits) { pipe.setLeafLimits(limits); }
|
||||
void setQuota(const std::shared_ptr<const EnabledQuota> & quota) { pipe.setQuota(quota); }
|
||||
|
||||
/// For compatibility with IBlockInputStream.
|
||||
void setProgressCallback(const ProgressCallback & callback);
|
||||
void setProcessListElement(QueryStatus * elem);
|
||||
|
||||
|
@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest)
|
||||
EXPECT_EQ(pipe.numOutputPorts(), 3);
|
||||
|
||||
auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
|
||||
DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true);
|
||||
DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true);
|
||||
|
||||
pipe.addTransform(std::move(transform));
|
||||
|
||||
@ -130,7 +130,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes)
|
||||
EXPECT_EQ(pipe.numOutputPorts(), 3);
|
||||
|
||||
auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
|
||||
DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true);
|
||||
DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true);
|
||||
|
||||
pipe.addTransform(std::move(transform));
|
||||
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <Interpreters/InternalTextLogsQueue.h>
|
||||
#include <Interpreters/OpenTelemetrySpanLog.h>
|
||||
#include <Interpreters/Session.h>
|
||||
#include <Interpreters/ProfileEventsExt.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
|
||||
#include <Storages/StorageS3Cluster.h>
|
||||
@ -831,12 +832,6 @@ namespace
|
||||
{
|
||||
using namespace ProfileEvents;
|
||||
|
||||
enum ProfileEventTypes : int8_t
|
||||
{
|
||||
INCREMENT = 1,
|
||||
GAUGE = 2,
|
||||
};
|
||||
|
||||
constexpr size_t NAME_COLUMN_INDEX = 4;
|
||||
constexpr size_t VALUE_COLUMN_INDEX = 5;
|
||||
|
||||
@ -879,7 +874,7 @@ namespace
|
||||
columns[i++]->insertData(host_name.data(), host_name.size());
|
||||
columns[i++]->insert(UInt64(snapshot.current_time));
|
||||
columns[i++]->insert(UInt64{snapshot.thread_id});
|
||||
columns[i++]->insert(ProfileEventTypes::INCREMENT);
|
||||
columns[i++]->insert(ProfileEvents::Type::INCREMENT);
|
||||
}
|
||||
}
|
||||
|
||||
@ -893,7 +888,7 @@ namespace
|
||||
columns[i++]->insertData(host_name.data(), host_name.size());
|
||||
columns[i++]->insert(UInt64(snapshot.current_time));
|
||||
columns[i++]->insert(UInt64{snapshot.thread_id});
|
||||
columns[i++]->insert(ProfileEventTypes::GAUGE);
|
||||
columns[i++]->insert(ProfileEvents::Type::GAUGE);
|
||||
|
||||
columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME));
|
||||
columns[i++]->insert(snapshot.memory_usage);
|
||||
@ -907,18 +902,11 @@ void TCPHandler::sendProfileEvents()
|
||||
if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS)
|
||||
return;
|
||||
|
||||
auto profile_event_type = std::make_shared<DataTypeEnum8>(
|
||||
DataTypeEnum8::Values
|
||||
{
|
||||
{ "increment", static_cast<Int8>(INCREMENT)},
|
||||
{ "gauge", static_cast<Int8>(GAUGE)},
|
||||
});
|
||||
|
||||
NamesAndTypesList column_names_and_types = {
|
||||
{ "host_name", std::make_shared<DataTypeString>() },
|
||||
{ "current_time", std::make_shared<DataTypeDateTime>() },
|
||||
{ "thread_id", std::make_shared<DataTypeUInt64>() },
|
||||
{ "type", profile_event_type },
|
||||
{ "type", ProfileEvents::TypeEnum },
|
||||
{ "name", std::make_shared<DataTypeString>() },
|
||||
{ "value", std::make_shared<DataTypeUInt64>() },
|
||||
};
|
||||
|
@ -778,7 +778,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
|
||||
{
|
||||
case MergeTreeData::MergingParams::Ordinary:
|
||||
merged_transform = std::make_shared<MergingSortedTransform>(
|
||||
header, pipes.size(), sort_description, merge_block_size, 0, false, ctx->rows_sources_write_buf.get(), true, ctx->blocks_are_granules_size);
|
||||
header, pipes.size(), sort_description, merge_block_size, 0, ctx->rows_sources_write_buf.get(), true, ctx->blocks_are_granules_size);
|
||||
break;
|
||||
|
||||
case MergeTreeData::MergingParams::Collapsing:
|
||||
|
@ -205,6 +205,8 @@ MergeTreeData::MergeTreeData(
|
||||
, background_operations_assignee(*this, BackgroundJobsAssignee::Type::DataProcessing, getContext())
|
||||
, background_moves_assignee(*this, BackgroundJobsAssignee::Type::Moving, getContext())
|
||||
{
|
||||
context_->getGlobalContext()->initializeBackgroundExecutorsIfNeeded();
|
||||
|
||||
const auto settings = getSettings();
|
||||
allow_nullable_key = attach || settings->allow_nullable_key;
|
||||
|
||||
|
@ -160,9 +160,10 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
|
||||
readData(column_from_part, column, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]);
|
||||
|
||||
size_t read_rows_in_column = column->size() - column_size_before_reading;
|
||||
if (read_rows_in_column < rows_to_read)
|
||||
throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) +
|
||||
". Rows expected: " + toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
if (read_rows_in_column != rows_to_read)
|
||||
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
|
||||
"Cannot read all data in MergeTreeReaderCompact. Rows read: {}. Rows expected: {}.",
|
||||
read_rows_in_column, rows_to_read);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
|
@ -10,6 +10,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
}
|
||||
|
||||
|
||||
@ -76,6 +77,10 @@ MergeTreeReaderStream::MergeTreeReaderStream(
|
||||
if (max_mark_range_bytes != 0)
|
||||
read_settings = read_settings.adjustBufferSize(max_mark_range_bytes);
|
||||
|
||||
//// Empty buffer does not makes progress.
|
||||
if (!read_settings.local_fs_buffer_size || !read_settings.remote_fs_buffer_size)
|
||||
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read to empty buffer.");
|
||||
|
||||
/// Initialize the objects that shall be used to perform read operations.
|
||||
if (uncompressed_cache)
|
||||
{
|
||||
|
@ -69,10 +69,6 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si
|
||||
size_t num_columns = columns.size();
|
||||
checkNumberOfColumns(num_columns);
|
||||
|
||||
/// Pointers to offset columns that are common to the nested data structure columns.
|
||||
/// If append is true, then the value will be equal to nullptr and will be used only to
|
||||
/// check that the offsets column has been already read.
|
||||
OffsetColumns offset_columns;
|
||||
std::unordered_map<String, ISerialization::SubstreamsCache> caches;
|
||||
|
||||
std::unordered_set<std::string> prefetched_streams;
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <filesystem>
|
||||
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
|
||||
#include <Core/Block.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -111,9 +113,16 @@ Pipe StorageExecutable::read(
|
||||
{
|
||||
auto user_scripts_path = context->getUserScriptsPath();
|
||||
auto script_path = user_scripts_path + '/' + script_name;
|
||||
if (!std::filesystem::exists(std::filesystem::path(script_path)))
|
||||
|
||||
if (!pathStartsWith(script_path, user_scripts_path))
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"Executable file {} does not exists inside {}",
|
||||
"Executable file {} must be inside user scripts folder {}",
|
||||
script_name,
|
||||
user_scripts_path);
|
||||
|
||||
if (!std::filesystem::exists(std::filesystem::path(script_path)))
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"Executable file {} does not exist inside user scripts folder {}",
|
||||
script_name,
|
||||
user_scripts_path);
|
||||
|
||||
@ -139,9 +148,9 @@ Pipe StorageExecutable::read(
|
||||
bool result = process_pool->tryBorrowObject(process, [&config, this]()
|
||||
{
|
||||
config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, settings.command_termination_timeout };
|
||||
auto shell_command = ShellCommand::execute(config);
|
||||
auto shell_command = ShellCommand::executeDirect(config);
|
||||
return shell_command;
|
||||
}, settings.max_command_execution_time * 1000);
|
||||
}, settings.max_command_execution_time * 10000);
|
||||
|
||||
if (!result)
|
||||
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/parseGlobs.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/StorageInMemoryMetadata.h>
|
||||
|
||||
@ -124,8 +125,8 @@ void checkCreationIsAllowed(ContextPtr context_global, const std::string & db_di
|
||||
return;
|
||||
|
||||
/// "/dev/null" is allowed for perf testing
|
||||
if (!startsWith(table_path, db_dir_path) && table_path != "/dev/null")
|
||||
throw Exception("File is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
if (!fileOrSymlinkPathStartsWith(table_path, db_dir_path) && table_path != "/dev/null")
|
||||
throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File `{}` is not inside `{}`", table_path, db_dir_path);
|
||||
|
||||
if (fs::exists(table_path) && fs::is_directory(table_path))
|
||||
throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME);
|
||||
@ -140,7 +141,10 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
|
||||
fs_table_path = user_files_absolute_path / fs_table_path;
|
||||
|
||||
Strings paths;
|
||||
const String path = fs::weakly_canonical(fs_table_path);
|
||||
/// Do not use fs::canonical or fs::weakly_canonical.
|
||||
/// Otherwise it will not allow to work with symlinks in `user_files_path` directory.
|
||||
String path = fs::absolute(fs_table_path);
|
||||
path = fs::path(path).lexically_normal(); /// Normalize path.
|
||||
if (path.find_first_of("*?{") == std::string::npos)
|
||||
{
|
||||
std::error_code error;
|
||||
|
51
tests/ci/compress_files.py
Normal file
51
tests/ci/compress_files.py
Normal file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import logging
|
||||
import os
|
||||
|
||||
def compress_file_fast(path, archive_path):
|
||||
if os.path.exists('/usr/bin/pigz'):
|
||||
subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True)
|
||||
else:
|
||||
subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True)
|
||||
|
||||
|
||||
def compress_fast(path, archive_path, exclude=None):
|
||||
pigz_part = ''
|
||||
if os.path.exists('/usr/bin/pigz'):
|
||||
logging.info("pigz found, will compress and decompress faster")
|
||||
pigz_part = "--use-compress-program='pigz'"
|
||||
else:
|
||||
pigz_part = '-z'
|
||||
logging.info("no pigz, compressing with default tar")
|
||||
|
||||
if exclude is None:
|
||||
exclude_part = ""
|
||||
elif isinstance(exclude, list):
|
||||
exclude_part = " ".join(["--exclude {}".format(x) for x in exclude])
|
||||
else:
|
||||
exclude_part = "--exclude {}".format(str(exclude))
|
||||
|
||||
fname = os.path.basename(path)
|
||||
if os.path.isfile(path):
|
||||
path = os.path.dirname(path)
|
||||
else:
|
||||
path += "/.."
|
||||
cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname)
|
||||
logging.debug("compress_fast cmd:{}".format(cmd))
|
||||
subprocess.check_call(cmd, shell=True)
|
||||
|
||||
|
||||
def decompress_fast(archive_path, result_path=None):
|
||||
pigz_part = ''
|
||||
if os.path.exists('/usr/bin/pigz'):
|
||||
logging.info("pigz found, will compress and decompress faster ('{}' -> '{}')".format(archive_path, result_path))
|
||||
pigz_part = "--use-compress-program='pigz'"
|
||||
else:
|
||||
pigz_part = '-z'
|
||||
logging.info("no pigz, decompressing with default tar ('{}' -> '{}')".format(archive_path, result_path))
|
||||
|
||||
if result_path is None:
|
||||
subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True)
|
||||
else:
|
||||
subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True)
|
233
tests/ci/docker_images_check.py
Normal file
233
tests/ci/docker_images_check.py
Normal file
@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
import subprocess
|
||||
import logging
|
||||
from report import create_test_html_report
|
||||
from s3_helper import S3Helper
|
||||
import json
|
||||
import os
|
||||
from pr_info import PRInfo
|
||||
from github import Github
|
||||
import shutil
|
||||
|
||||
NAME = "Push to Dockerhub (actions)"
|
||||
|
||||
def get_changed_docker_images(pr_info, repo_path, image_file_path):
|
||||
images_dict = {}
|
||||
path_to_images_file = os.path.join(repo_path, image_file_path)
|
||||
if os.path.exists(path_to_images_file):
|
||||
with open(path_to_images_file, 'r') as dict_file:
|
||||
images_dict = json.load(dict_file)
|
||||
else:
|
||||
logging.info("Image file %s doesnt exists in repo %s", image_file_path, repo_path)
|
||||
|
||||
dockerhub_repo_name = 'yandex'
|
||||
if not images_dict:
|
||||
return [], dockerhub_repo_name
|
||||
|
||||
files_changed = pr_info.changed_files
|
||||
|
||||
logging.info("Changed files for PR %s @ %s: %s", pr_info.number, pr_info.sha, str(files_changed))
|
||||
|
||||
changed_images = []
|
||||
|
||||
for dockerfile_dir, image_description in images_dict.items():
|
||||
if image_description['name'].startswith('clickhouse/'):
|
||||
dockerhub_repo_name = 'clickhouse'
|
||||
|
||||
for f in files_changed:
|
||||
if f.startswith(dockerfile_dir):
|
||||
logging.info(
|
||||
"Found changed file '%s' which affects docker image '%s' with path '%s'",
|
||||
f, image_description['name'], dockerfile_dir)
|
||||
changed_images.append(dockerfile_dir)
|
||||
break
|
||||
|
||||
# The order is important: dependents should go later than bases, so that
|
||||
# they are built with updated base versions.
|
||||
index = 0
|
||||
while index < len(changed_images):
|
||||
image = changed_images[index]
|
||||
for dependent in images_dict[image]['dependent']:
|
||||
logging.info(
|
||||
"Marking docker image '%s' as changed because it depends on changed docker image '%s'",
|
||||
dependent, image)
|
||||
changed_images.append(dependent)
|
||||
index += 1
|
||||
if index > 100:
|
||||
# Sanity check to prevent infinite loop.
|
||||
raise "Too many changed docker images, this is a bug." + str(changed_images)
|
||||
|
||||
# If a dependent image was already in the list because its own files
|
||||
# changed, but then it was added as a dependent of a changed base, we
|
||||
# must remove the earlier entry so that it doesn't go earlier than its
|
||||
# base. This way, the dependent will be rebuilt later than the base, and
|
||||
# will correctly use the updated version of the base.
|
||||
seen = set()
|
||||
no_dups_reversed = []
|
||||
for x in reversed(changed_images):
|
||||
if x not in seen:
|
||||
seen.add(x)
|
||||
no_dups_reversed.append(x)
|
||||
|
||||
result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)]
|
||||
logging.info("Changed docker images for PR %s @ %s: '%s'", pr_info.number, pr_info.sha, result)
|
||||
return result, dockerhub_repo_name
|
||||
|
||||
def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string):
|
||||
logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder)
|
||||
build_log = None
|
||||
push_log = None
|
||||
with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl:
|
||||
cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder)
|
||||
retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait()
|
||||
build_log = str(pl.name)
|
||||
if retcode != 0:
|
||||
return False, build_log, None
|
||||
|
||||
with open('tag_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl:
|
||||
cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder)
|
||||
retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait()
|
||||
build_log = str(pl.name)
|
||||
if retcode != 0:
|
||||
return False, build_log, None
|
||||
|
||||
logging.info("Pushing image %s to dockerhub", image_name)
|
||||
|
||||
with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl:
|
||||
cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string)
|
||||
retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait()
|
||||
push_log = str(pl.name)
|
||||
if retcode != 0:
|
||||
return False, build_log, push_log
|
||||
|
||||
logging.info("Processing of %s successfully finished", image_name)
|
||||
return True, build_log, push_log
|
||||
|
||||
def process_single_image(versions, path_to_dockerfile_folder, image_name):
|
||||
logging.info("Image will be pushed with versions %s", ', '.join(versions))
|
||||
result = []
|
||||
for ver in versions:
|
||||
for i in range(5):
|
||||
success, build_log, push_log = build_and_push_one_image(path_to_dockerfile_folder, image_name, ver)
|
||||
if success:
|
||||
result.append((image_name + ":" + ver, build_log, push_log, 'OK'))
|
||||
break
|
||||
logging.info("Got error will retry %s time and sleep for %s seconds", i, i * 5)
|
||||
time.sleep(i * 5)
|
||||
else:
|
||||
result.append((image_name + ":" + ver, build_log, push_log, 'FAIL'))
|
||||
|
||||
logging.info("Processing finished")
|
||||
return result
|
||||
|
||||
|
||||
def process_test_results(s3_client, test_results, s3_path_prefix):
|
||||
overall_status = 'success'
|
||||
processed_test_results = []
|
||||
for image, build_log, push_log, status in test_results:
|
||||
if status != 'OK':
|
||||
overall_status = 'failure'
|
||||
url_part = ''
|
||||
if build_log is not None and os.path.exists(build_log):
|
||||
build_url = s3_client.upload_test_report_to_s3(
|
||||
build_log,
|
||||
s3_path_prefix + "/" + os.path.basename(build_log))
|
||||
url_part += '<a href="{}">build_log</a>'.format(build_url)
|
||||
if push_log is not None and os.path.exists(push_log):
|
||||
push_url = s3_client.upload_test_report_to_s3(
|
||||
push_log,
|
||||
s3_path_prefix + "/" + os.path.basename(push_log))
|
||||
if url_part:
|
||||
url_part += ', '
|
||||
url_part += '<a href="{}">push_log</a>'.format(push_url)
|
||||
if url_part:
|
||||
test_name = image + ' (' + url_part + ')'
|
||||
else:
|
||||
test_name = image
|
||||
processed_test_results.append((test_name, status))
|
||||
return overall_status, processed_test_results
|
||||
|
||||
def upload_results(s3_client, pr_number, commit_sha, test_results):
|
||||
s3_path_prefix = f"{pr_number}/{commit_sha}/" + NAME.lower().replace(' ', '_')
|
||||
|
||||
branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
|
||||
branch_name = "master"
|
||||
if pr_number != 0:
|
||||
branch_name = "PR #{}".format(pr_number)
|
||||
branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number)
|
||||
commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}"
|
||||
|
||||
task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}"
|
||||
|
||||
html_report = create_test_html_report(NAME, test_results, "https://hub.docker.com/u/clickhouse", task_url, branch_url, branch_name, commit_url)
|
||||
with open('report.html', 'w') as f:
|
||||
f.write(html_report)
|
||||
|
||||
url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html")
|
||||
logging.info("Search result in url %s", url)
|
||||
return url
|
||||
|
||||
def get_commit(gh, commit_sha):
|
||||
repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse"))
|
||||
commit = repo.get_commit(commit_sha)
|
||||
return commit
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))
|
||||
temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check')
|
||||
dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD')
|
||||
|
||||
if os.path.exists(temp_path):
|
||||
shutil.rmtree(temp_path)
|
||||
|
||||
if not os.path.exists(temp_path):
|
||||
os.makedirs(temp_path)
|
||||
|
||||
with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file:
|
||||
event = json.load(event_file)
|
||||
|
||||
pr_info = PRInfo(event, False, True)
|
||||
changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json")
|
||||
logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images]))
|
||||
pr_commit_version = str(pr_info.number) + '-' + pr_info.sha
|
||||
|
||||
versions = [str(pr_info.number), pr_commit_version]
|
||||
|
||||
subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True)
|
||||
|
||||
result_images = {}
|
||||
images_processing_result = []
|
||||
for rel_path, image_name in changed_images:
|
||||
full_path = os.path.join(repo_path, rel_path)
|
||||
images_processing_result += process_single_image(versions, full_path, image_name)
|
||||
result_images[image_name] = pr_commit_version
|
||||
|
||||
if len(changed_images):
|
||||
description = "Updated " + ','.join([im[1] for im in changed_images])
|
||||
else:
|
||||
description = "Nothing to update"
|
||||
|
||||
|
||||
if len(description) >= 140:
|
||||
description = description[:136] + "..."
|
||||
|
||||
aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "")
|
||||
aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "")
|
||||
|
||||
s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key)
|
||||
|
||||
s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_')
|
||||
status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix)
|
||||
|
||||
url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results)
|
||||
|
||||
gh = Github(os.getenv("GITHUB_TOKEN"))
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
commit.create_status(context=NAME, description=description, state=status, target_url=url)
|
||||
|
||||
with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file:
|
||||
json.dump(result_images, images_file)
|
||||
|
||||
print("::notice ::Report url: {}".format(url))
|
||||
print("::set-output name=url_output::\"{}\"".format(url))
|
43
tests/ci/finish_check.py
Normal file
43
tests/ci/finish_check.py
Normal file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python3
|
||||
import logging
|
||||
from github import Github
|
||||
from pr_info import PRInfo
|
||||
import json
|
||||
import os
|
||||
|
||||
NAME = 'Run Check (actions)'
|
||||
|
||||
def filter_statuses(statuses):
|
||||
"""
|
||||
Squash statuses to latest state
|
||||
1. context="first", state="success", update_time=1
|
||||
2. context="second", state="success", update_time=2
|
||||
3. context="first", stat="failure", update_time=3
|
||||
=========>
|
||||
1. context="second", state="success"
|
||||
2. context="first", stat="failure"
|
||||
"""
|
||||
filt = {}
|
||||
for status in sorted(statuses, key=lambda x: x.updated_at):
|
||||
filt[status.context] = status
|
||||
return filt
|
||||
|
||||
|
||||
def get_commit(gh, commit_sha):
|
||||
repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse"))
|
||||
commit = repo.get_commit(commit_sha)
|
||||
return commit
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file:
|
||||
event = json.load(event_file)
|
||||
|
||||
pr_info = PRInfo(event, need_orgs=True)
|
||||
gh = Github(os.getenv("GITHUB_TOKEN"))
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
|
||||
url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}"
|
||||
statuses = filter_statuses(list(commit.get_statuses()))
|
||||
if NAME in statuses and statuses[NAME].state == "pending":
|
||||
commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url)
|
13
tests/ci/metrics_lambda/Dockerfile
Normal file
13
tests/ci/metrics_lambda/Dockerfile
Normal file
@ -0,0 +1,13 @@
|
||||
FROM public.ecr.aws/lambda/python:3.9
|
||||
|
||||
# Copy function code
|
||||
COPY app.py ${LAMBDA_TASK_ROOT}
|
||||
|
||||
# Install the function's dependencies using file requirements.txt
|
||||
# from your project folder.
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}"
|
||||
|
||||
# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
|
||||
CMD [ "app.handler" ]
|
143
tests/ci/metrics_lambda/app.py
Normal file
143
tests/ci/metrics_lambda/app.py
Normal file
@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import argparse
|
||||
import jwt
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
from collections import namedtuple
|
||||
|
||||
def get_key_and_app_from_aws():
|
||||
import boto3
|
||||
secret_name = "clickhouse_github_secret_key"
|
||||
session = boto3.session.Session()
|
||||
client = session.client(
|
||||
service_name='secretsmanager',
|
||||
)
|
||||
get_secret_value_response = client.get_secret_value(
|
||||
SecretId=secret_name
|
||||
)
|
||||
data = json.loads(get_secret_value_response['SecretString'])
|
||||
return data['clickhouse-app-key'], int(data['clickhouse-app-id'])
|
||||
|
||||
def handler(event, context):
|
||||
private_key, app_id = get_key_and_app_from_aws()
|
||||
main(private_key, app_id, True)
|
||||
|
||||
def get_installation_id(jwt_token):
|
||||
headers = {
|
||||
"Authorization": f"Bearer {jwt_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
response = requests.get("https://api.github.com/app/installations", headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data[0]['id']
|
||||
|
||||
def get_access_token(jwt_token, installation_id):
|
||||
headers = {
|
||||
"Authorization": f"Bearer {jwt_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data['token']
|
||||
|
||||
|
||||
RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy'])
|
||||
|
||||
def list_runners(access_token):
|
||||
headers = {
|
||||
"Authorization": f"token {access_token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
}
|
||||
|
||||
response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners", headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
print("Total runners", data['total_count'])
|
||||
runners = data['runners']
|
||||
result = []
|
||||
for runner in runners:
|
||||
tags = [tag['name'] for tag in runner['labels']]
|
||||
desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags,
|
||||
offline=runner['status']=='offline', busy=runner['busy'])
|
||||
result.append(desc)
|
||||
return result
|
||||
|
||||
def push_metrics_to_cloudwatch(listed_runners, namespace):
|
||||
import boto3
|
||||
client = boto3.client('cloudwatch')
|
||||
metrics_data = []
|
||||
busy_runners = sum(1 for runner in listed_runners if runner.busy)
|
||||
metrics_data.append({
|
||||
'MetricName': 'BusyRunners',
|
||||
'Value': busy_runners,
|
||||
'Unit': 'Count',
|
||||
})
|
||||
total_active_runners = sum(1 for runner in listed_runners if not runner.offline)
|
||||
metrics_data.append({
|
||||
'MetricName': 'ActiveRunners',
|
||||
'Value': total_active_runners,
|
||||
'Unit': 'Count',
|
||||
})
|
||||
total_runners = len(listed_runners)
|
||||
metrics_data.append({
|
||||
'MetricName': 'TotalRunners',
|
||||
'Value': total_runners,
|
||||
'Unit': 'Count',
|
||||
})
|
||||
if total_active_runners == 0:
|
||||
busy_ratio = 100
|
||||
else:
|
||||
busy_ratio = busy_runners / total_active_runners * 100
|
||||
|
||||
metrics_data.append({
|
||||
'MetricName': 'BusyRunnersRatio',
|
||||
'Value': busy_ratio,
|
||||
'Unit': 'Percent',
|
||||
})
|
||||
|
||||
client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data)
|
||||
|
||||
def main(github_secret_key, github_app_id, push_to_cloudwatch):
|
||||
payload = {
|
||||
"iat": int(time.time()) - 60,
|
||||
"exp": int(time.time()) + (10 * 60),
|
||||
"iss": github_app_id,
|
||||
}
|
||||
|
||||
encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256")
|
||||
installation_id = get_installation_id(encoded_jwt)
|
||||
access_token = get_access_token(encoded_jwt, installation_id)
|
||||
runners = list_runners(access_token)
|
||||
if push_to_cloudwatch:
|
||||
push_metrics_to_cloudwatch(runners, 'RunnersMetrics')
|
||||
else:
|
||||
print(runners)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Get list of runners and their states')
|
||||
parser.add_argument('-p', '--private-key-path', help='Path to file with private key')
|
||||
parser.add_argument('-k', '--private-key', help='Private key')
|
||||
parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True)
|
||||
parser.add_argument('--push-to-cloudwatch', action='store_true', help='Store received token in parameter store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.private_key_path and not args.private_key:
|
||||
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
|
||||
|
||||
if args.private_key_path and args.private_key:
|
||||
print("Either --private-key-path or --private-key must be specified", file=sys.stderr)
|
||||
|
||||
if args.private_key:
|
||||
private_key = args.private_key
|
||||
else:
|
||||
with open(args.private_key_path, 'r') as key_file:
|
||||
private_key = key_file.read()
|
||||
|
||||
main(private_key, args.app_id, args.push_to_cloudwatch)
|
3
tests/ci/metrics_lambda/requirements.txt
Normal file
3
tests/ci/metrics_lambda/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
requests
|
||||
PyJWT
|
||||
cryptography
|
41
tests/ci/pr_info.py
Normal file
41
tests/ci/pr_info.py
Normal file
@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python3
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import urllib
|
||||
from unidiff import PatchSet
|
||||
|
||||
|
||||
class PRInfo:
|
||||
def __init__(self, github_event, need_orgs=False, need_changed_files=False):
|
||||
self.number = github_event['number']
|
||||
if 'after' in github_event:
|
||||
self.sha = github_event['after']
|
||||
else:
|
||||
self.sha = github_event['pull_request']['head']['sha']
|
||||
|
||||
self.labels = set([l['name'] for l in github_event['pull_request']['labels']])
|
||||
self.user_login = github_event['pull_request']['user']['login']
|
||||
self.user_orgs = set([])
|
||||
if need_orgs:
|
||||
user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url'])
|
||||
if user_orgs_response.ok:
|
||||
response_json = user_orgs_response.json()
|
||||
self.user_orgs = set(org['id'] for org in response_json)
|
||||
|
||||
self.changed_files = set([])
|
||||
if need_changed_files:
|
||||
diff_url = github_event['pull_request']['diff_url']
|
||||
diff = urllib.request.urlopen(github_event['pull_request']['diff_url'])
|
||||
diff_object = PatchSet(diff, diff.headers.get_charsets()[0])
|
||||
self.changed_files = set([f.path for f in diff_object])
|
||||
|
||||
def get_dict(self):
|
||||
return {
|
||||
'sha': self.sha,
|
||||
'number': self.number,
|
||||
'labels': self.labels,
|
||||
'user_login': self.user_login,
|
||||
'user_orgs': self.user_orgs,
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user