diff --git a/README.md b/README.md index 2008e5d2750..ef39a163807 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,3 @@ ClickHouse is an open-source column-oriented database management system that all * [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. - -## Upcoming Events - -* [ClickHouse Online Meetup (in Russian)](https://events.yandex.ru/events/click-house-onlajn-vs-18-06-2020) on June 18, 2020. -* [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date. -* [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date. diff --git a/base/common/DateLUT.cpp b/base/common/DateLUT.cpp index 750900493aa..6ff0884701c 100644 --- a/base/common/DateLUT.cpp +++ b/base/common/DateLUT.cpp @@ -75,7 +75,7 @@ std::string determineDefaultTimeZone() try { - tz_database_path = fs::canonical(tz_database_path); + tz_database_path = fs::weakly_canonical(tz_database_path); /// The tzdata file exists. If it is inside the tz_database_dir, /// then the relative path is the time zone id. @@ -91,7 +91,7 @@ std::string determineDefaultTimeZone() if (!tz_file_path.is_absolute()) tz_file_path = tz_database_path / tz_file_path; - tz_file_path = fs::canonical(tz_file_path); + tz_file_path = fs::weakly_canonical(tz_file_path); fs::path relative_path = tz_file_path.lexically_relative(tz_database_path); if (!relative_path.empty() && *relative_path.begin() != ".." && *relative_path.begin() != ".") diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 2a36777218e..33dee5d4a63 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,7 @@ #include #include #include +#include #if !defined(ARCADIA_BUILD) # include @@ -83,7 +85,8 @@ static const size_t signal_pipe_buf_size = + sizeof(ucontext_t) + sizeof(StackTrace) + sizeof(UInt32) - + max_query_id_size + 1; /// query_id + varint encoded length + + max_query_id_size + 1 /// query_id + varint encoded length + + sizeof(void*); using signal_function = void(int, siginfo_t*, void*); @@ -133,13 +136,14 @@ static void signalHandler(int sig, siginfo_t * info, void * context) DB::writePODBinary(stack_trace, out); DB::writeBinary(UInt32(getThreadId()), out); DB::writeStringBinary(query_id, out); + DB::writePODBinary(DB::current_thread, out); out.next(); if (sig != SIGTSTP) /// This signal is used for debugging. { /// The time that is usually enough for separate thread to print info into log. - ::sleep(10); + sleepForSeconds(10); call_default_signal_handler(sig); } @@ -216,16 +220,18 @@ public: StackTrace stack_trace(NoCapture{}); UInt32 thread_num; std::string query_id; + DB::ThreadStatus * thread_ptr{}; DB::readPODBinary(info, in); DB::readPODBinary(context, in); DB::readPODBinary(stack_trace, in); DB::readBinary(thread_num, in); DB::readBinary(query_id, in); + DB::readPODBinary(thread_ptr, in); /// This allows to receive more signals if failure happens inside onFault function. /// Example: segfault while symbolizing stack trace. - std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id); }).detach(); + std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach(); } } } @@ -236,7 +242,8 @@ private: void onTerminate(const std::string & message, UInt32 thread_num) const { - LOG_FATAL(log, "(version {}{}) (from thread {}) {}", VERSION_STRING, VERSION_OFFICIAL, thread_num, message); + LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message); } void onFault( @@ -245,21 +252,30 @@ private: const ucontext_t & context, const StackTrace & stack_trace, UInt32 thread_num, - const std::string & query_id) const + const std::string & query_id, + DB::ThreadStatus * thread_ptr) const { + DB::ThreadStatus thread_status; + + /// Send logs from this thread to client if possible. + /// It will allow client to see failure messages directly. + if (thread_ptr) + { + if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) + DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace); + } + LOG_FATAL(log, "########################################"); + if (query_id.empty()) { - std::stringstream message; - message << "(version " << VERSION_STRING << VERSION_OFFICIAL << ")"; - message << " (from thread " << thread_num << ")"; - if (query_id.empty()) - message << " (no query)"; - else - message << " (query_id: " << query_id << ")"; - message << " Received signal " << strsignal(sig) << " (" << sig << ")."; - - LOG_FATAL(log, message.str()); + LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, strsignal(sig), sig); + } + else + { + LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, query_id, strsignal(sig), sig); } LOG_FATAL(log, signalToErrorMessage(sig, info, context)); @@ -279,6 +295,10 @@ private: /// Write symbolized stack trace line by line for better grep-ability. stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); + + /// When everything is done, we will try to send these error messages to client. + if (thread_ptr) + thread_ptr->onFatalError(); } }; @@ -292,17 +312,15 @@ static void sanitizerDeathCallback() StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe. + if (query_id.size == 0) { - std::stringstream message; - message << "(version " << VERSION_STRING << VERSION_OFFICIAL << ")"; - message << " (from thread " << getThreadId() << ")"; - if (query_id.size == 0) - message << " (no query)"; - else - message << " (query_id: " << query_id << ")"; - message << " Sanitizer trap."; - - LOG_FATAL(log, message.str()); + LOG_FATAL(log, "(version {}{}) (from thread {}) (no query) Sanitizer trap.", + VERSION_STRING, VERSION_OFFICIAL, getThreadId()); + } + else + { + LOG_FATAL(log, "(version {}{}) (from thread {}) (query_id: {}) Sanitizer trap.", + VERSION_STRING, VERSION_OFFICIAL, getThreadId(), query_id); } /// Just in case print our own stack trace. In case when llvm-symbolizer does not work. @@ -711,12 +729,23 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() signal_listener = std::make_unique(*this); signal_listener_thread.start(*signal_listener); + +#if defined(__ELF__) && !defined(__FreeBSD__) + String build_id_hex = DB::SymbolIndex::instance().getBuildIDHex(); + if (build_id_hex.empty()) + build_id_info = "no build id"; + else + build_id_info = "build id: " + build_id_hex; +#else + build_id_info = "no build id"; +#endif } void BaseDaemon::logRevision() const { Poco::Logger::root().information("Starting " + std::string{VERSION_FULL} + " with revision " + std::to_string(ClickHouseRevision::get()) + + ", " + build_id_info + ", PID " + std::to_string(getpid())); } diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 39332cfe963..2a3262dd26f 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -198,6 +198,8 @@ protected: std::string config_path; DB::ConfigProcessor::LoadedConfig loaded_config; Poco::Util::AbstractConfiguration * last_configuration = nullptr; + + String build_id_info; }; diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 72959d3c1d8..27dd8cc8f15 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,9 +1,9 @@ # This strings autochanged from release_lib.sh: -SET(VERSION_REVISION 54435) +SET(VERSION_REVISION 54436) SET(VERSION_MAJOR 20) -SET(VERSION_MINOR 5) +SET(VERSION_MINOR 6) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 91df18a906dcffdbee6816e5389df6c65f86e35f) -SET(VERSION_DESCRIBE v20.5.1.1-prestable) -SET(VERSION_STRING 20.5.1.1) +SET(VERSION_GITHASH efc57fb063b3fb4df968d916720ec4d4ced4642e) +SET(VERSION_DESCRIBE v20.6.1.1-prestable) +SET(VERSION_STRING 20.6.1.1) # end of autochange diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 0ecdfd2a3ad..da91ccaa0c2 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -18,7 +18,7 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}") set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS}) set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) -# glibc-compatibility library relies to fixed version of libc headers +# glibc-compatibility library relies to constant version of libc headers # (because minor changes in function attributes between different glibc versions will introduce incompatibilities) # This is for x86_64. For other architectures we have separate toolchains. if (ARCH_AMD64 AND NOT_UNBUNDLED) diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index fb7b236d30d..e92fe4b7159 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -97,5 +97,37 @@ if (USE_INTERNAL_BOOST_LIBRARY) add_library (boost::system ALIAS _boost_system) target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR}) else () - message (FATAL_ERROR "TODO: external Boost library is not supported!") + # 1.70 like in contrib/boost + # 1.67 on CI + set(BOOST_VERSION 1.67) + + find_package(Boost ${BOOST_VERSION} COMPONENTS + system + filesystem + iostreams + program_options + regex + REQUIRED) + + add_library (_boost_headers_only INTERFACE) + add_library (boost::headers_only ALIAS _boost_headers_only) + target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${Boost_INCLUDE_DIR}) + + add_library (_boost_filesystem INTERFACE) + add_library (_boost_iostreams INTERFACE) + add_library (_boost_program_options INTERFACE) + add_library (_boost_regex INTERFACE) + add_library (_boost_system INTERFACE) + + target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY}) + target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY}) + target_link_libraries (_boost_program_options INTERFACE ${Boost_PROGRAM_OPTIONS_LIBRARY}) + target_link_libraries (_boost_regex INTERFACE ${Boost_REGEX_LIBRARY}) + target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY}) + + add_library (boost::filesystem ALIAS _boost_filesystem) + add_library (boost::iostreams ALIAS _boost_iostreams) + add_library (boost::program_options ALIAS _boost_program_options) + add_library (boost::regex ALIAS _boost_regex) + add_library (boost::system ALIAS _boost_system) endif () diff --git a/contrib/hyperscan b/contrib/hyperscan index 3058c9c20cb..3907fd00ee8 160000 --- a/contrib/hyperscan +++ b/contrib/hyperscan @@ -1 +1 @@ -Subproject commit 3058c9c20cba3accdf92544d8513a26240c4ff70 +Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531 diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/hyperscan-cmake/CMakeLists.txt index 1f30bfccbe8..bc2f6c4e89d 100644 --- a/contrib/hyperscan-cmake/CMakeLists.txt +++ b/contrib/hyperscan-cmake/CMakeLists.txt @@ -219,7 +219,9 @@ if (ENABLE_HYPERSCAN) target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1) target_compile_options (hyperscan - PRIVATE -g0 -march=corei7 # library has too much debug information + PRIVATE -g0 # Library has too much debug information + -march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system + -fno-sanitize=undefined # Assume the library takes care of itself ) target_include_directories (hyperscan PRIVATE diff --git a/debian/changelog b/debian/changelog index 4ce601e18aa..0c0a59005f2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (20.5.1.1) unstable; urgency=low +clickhouse (20.6.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Tue, 28 Apr 2020 20:12:13 +0300 + -- clickhouse-release Mon, 22 Jun 2020 20:40:23 +0300 diff --git a/debian/control b/debian/control index 3ce12b504c2..58efd711d27 100644 --- a/debian/control +++ b/debian/control @@ -28,7 +28,7 @@ Description: Client binary for ClickHouse Package: clickhouse-common-static Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends}, tzdata +Depends: ${shlibs:Depends}, ${misc:Depends} Suggests: clickhouse-common-static-dbg Replaces: clickhouse-common, clickhouse-server-base Provides: clickhouse-common, clickhouse-server-base diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 493cdaac543..c4683d35e99 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.5.1.* +ARG version=20.6.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ @@ -17,7 +17,6 @@ RUN apt-get update \ clickhouse-client=$version \ clickhouse-common-static=$version \ locales \ - tzdata \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf \ && apt-get clean diff --git a/docker/packager/packager b/docker/packager/packager index 85dd3cc421c..ccb01a4df92 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -142,7 +142,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if unbundled: # TODO: fix build with ENABLE_RDKAFKA - cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_BOOST_LIBRARY=1') + cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0') if split_binary: cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 3a16a1fd158..132a5d89959 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.5.1.* +ARG version=20.6.1.* ARG gosu_ver=1.10 RUN apt-get update \ @@ -21,7 +21,6 @@ RUN apt-get update \ locales \ ca-certificates \ wget \ - tzdata \ && rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 059f3cb631b..8fc9c670b06 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -110,7 +110,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # create default database, if defined if [ -n "$CLICKHOUSE_DB" ]; then echo "$0: create database '$CLICKHOUSE_DB'" - "${clickhouseclient[@]}" "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB"; + "${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB"; fi for f in /docker-entrypoint-initdb.d/*; do diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 6673d32c2e2..be6d6d73793 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.5.1.* +ARG version=20.6.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index f6fc3594594..a0fb6ac8d2f 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -17,8 +17,7 @@ RUN apt-get update \ odbc-postgresql \ sqlite3 \ curl \ - tar \ - tzdata + tar RUN rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ diff --git a/docker/test/integration/compose/docker_compose_postgres.yml b/docker/test/integration/compose/docker_compose_postgres.yml index ffc4d3164b5..fff4fb1fa42 100644 --- a/docker/test/integration/compose/docker_compose_postgres.yml +++ b/docker/test/integration/compose/docker_compose_postgres.yml @@ -7,3 +7,7 @@ services: POSTGRES_PASSWORD: mysecretpassword ports: - 5432:5432 + networks: + default: + aliases: + - postgre-sql.local diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index a2760907cb3..bc7ef0f2324 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -22,7 +22,7 @@ function configure echo all killed set -m # Spawn temporary in its own process groups - left/clickhouse-server --config-file=left/config/config.xml -- --path db0 &> setup-server-log.log & + left/clickhouse-server --config-file=left/config/config.xml -- --path db0 --user_files_path db0/user_files &> setup-server-log.log & left_pid=$! kill -0 $left_pid disown $left_pid @@ -59,12 +59,12 @@ function restart set -m # Spawn servers in their own process groups - left/clickhouse-server --config-file=left/config/config.xml -- --path left/db &>> left-server-log.log & + left/clickhouse-server --config-file=left/config/config.xml -- --path left/db --user_files_path left/db/user_files &>> left-server-log.log & left_pid=$! kill -0 $left_pid disown $left_pid - right/clickhouse-server --config-file=right/config/config.xml -- --path right/db &>> right-server-log.log & + right/clickhouse-server --config-file=right/config/config.xml -- --path right/db --user_files_path right/db/user_files &>> right-server-log.log & right_pid=$! kill -0 $right_pid disown $right_pid diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 5afaf725c50..1a9438cd60f 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -17,7 +17,7 @@ function find_reference_sha # If not master, try to fetch pull/.../{head,merge} if [ "$PR_TO_TEST" != "0" ] then - git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pr/*" + git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*" fi # Go back from the revision to be tested, trying to find the closest published @@ -28,9 +28,9 @@ function find_reference_sha # and SHA_TO_TEST, but a revision that is merged with recent master, given # by pull/.../merge ref. # Master is the first parent of the pull/.../merge. - if git -C ch rev-parse pr/merge + if git -C ch rev-parse "pull/$PR_TO_TEST/merge" then - start_ref=pr/merge~ + start_ref="pull/$PR_TO_TEST/merge~" fi while : @@ -73,11 +73,11 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi ( git -C ch log -1 --decorate "$SHA_TO_TEST" ||: - if git -C ch rev-parse pr/merge &> /dev/null + if git -C ch rev-parse "pull/$PR_TO_TEST/merge" &> /dev/null then echo echo Real tested commit is: - git -C ch log -1 --decorate pr/merge + git -C ch log -1 --decorate "pull/$PR_TO_TEST/merge" fi ) | tee right-commit.txt @@ -87,7 +87,7 @@ then # tests for use by compare.sh. Compare to merge base, because master might be # far in the future and have unrelated test changes. base=$(git -C ch merge-base "$SHA_TO_TEST" master) - git -C ch diff --name-only "$SHA_TO_TEST" "$base" | tee changed-tests.txt + git -C ch diff --name-only "$base" "$SHA_TO_TEST" | tee changed-tests.txt if grep -vq '^tests/performance' changed-tests.txt then # Have some other changes besides the tests, so truncate the test list, @@ -131,5 +131,8 @@ done dmesg -T > dmesg.log -7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze +7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \ + {right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \ + report analyze benchmark + cp compare.log /output diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 6e573b4648f..0c86917013f 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -20,9 +20,9 @@ RUN apt-get --allow-unauthenticated update -y \ # apt-get --allow-unauthenticated install --yes --no-install-recommends \ # pvs-studio -ENV PKG_VERSION="pvs-studio-7.07.38234.46-amd64.deb" +ENV PKG_VERSION="pvs-studio-7.07.38234.48-amd64.deb" -RUN wget "http://files.viva64.com/$PKG_VERSION" +RUN wget "https://files.viva64.com/$PKG_VERSION" RUN sudo dpkg -i "$PKG_VERSION" CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md index 1acf92cb501..b69d7ed5309 100644 --- a/docs/_description_templates/template-function.md +++ b/docs/_description_templates/template-function.md @@ -1,4 +1,4 @@ -## function-name {#function-name-in-lower-case} +## functionName {#functionname-in-lower-case} Short description. diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md index 5a33716f899..fc912aba3e1 100644 --- a/docs/_description_templates/template-setting.md +++ b/docs/_description_templates/template-setting.md @@ -1,4 +1,4 @@ -## setting-name {#setting-name-in-lower-case} +## setting_name {#setting_name} Description. diff --git a/docs/en/development/build-cross-arm.md b/docs/en/development/build-cross-arm.md index b2b5fa6b1e0..9e64a58ff14 100644 --- a/docs/en/development/build-cross-arm.md +++ b/docs/en/development/build-cross-arm.md @@ -7,7 +7,7 @@ toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64) This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. -The cross-build for AARCH64 is based on the [Build instructions](build.md), follow them first. +The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first. # Install Clang-8 {#install-clang-8} diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index 7c49a5609f2..bf0032a4cb2 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -5,9 +5,9 @@ toc_title: How to Build ClickHouse on Linux for Mac OS X # How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x} -This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build-osx.md). +This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md). -The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first. +The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first. # Install Clang-8 {#install-clang-8} diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 842e565b132..fd3c21e95db 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -28,10 +28,9 @@ There are several ways to do this. ### Install from Repository {#install-from-repository} On Ubuntu 19.10 or newer: -``` -$ sudo apt-get update -$ sudo apt-get install gcc-9 g++-9 -``` + + $ sudo apt-get update + $ sudo apt-get install gcc-9 g++-9 ### Install from a PPA Package {#install-from-a-ppa-package} diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 3776c9b513f..7e6a0fd6a17 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -3,11 +3,11 @@ toc_priority: 61 toc_title: For Beginners --- -# The Beginner ClickHouse Developer Instruction +# The Beginner ClickHouse Developer Instruction {#the-beginner-clickhouse-developer-instruction} Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X. -If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. +If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading. @@ -137,7 +137,7 @@ Official Yandex builds currently use GCC because it generates machine code of sl To install GCC on Ubuntu run: `sudo apt install gcc g++` -Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/#install-gcc-9. +Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9. Mac OS X build is supported only for Clang. Just run `brew install llvm` diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index eefced2e684..2ed3020c3c6 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -200,7 +200,7 @@ Debug version of `jemalloc` is used for debug build. ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries. All the fuzz testing should be performed with sanitizers (Address and Undefined). -LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have "\_fuzzer" name postfixes. +LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “\_fuzzer” name postfixes. Fuzzer example can be found at `src/Parsers/tests/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`. We encourage you to write fuzz tests for every functionality that handles user input. @@ -211,7 +211,6 @@ Google OSS-Fuzz can be found at `docker/fuzz`. We also use simple fuzz test to generate random SQL queries and to check that the server doesn’t die executing them. You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer). - ## Security Audit {#security-audit} People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint. diff --git a/docs/en/engines/database-engines/index.md b/docs/en/engines/database-engines/index.md index 87646a34ac2..fdf2a4d03b7 100644 --- a/docs/en/engines/database-engines/index.md +++ b/docs/en/engines/database-engines/index.md @@ -12,8 +12,8 @@ By default, ClickHouse uses its native database engine, which provides configura You can also use the following database engines: -- [MySQL](mysql.md) +- [MySQL](../../engines/database-engines/mysql.md) -- [Lazy](lazy.md) +- [Lazy](../../engines/database-engines/lazy.md) [Original article](https://clickhouse.tech/docs/en/database_engines/) diff --git a/docs/en/engines/index.md b/docs/en/engines/index.md index 0ab3b5fe172..66d2fdcb074 100644 --- a/docs/en/engines/index.md +++ b/docs/en/engines/index.md @@ -1,8 +1,15 @@ --- toc_folder_title: Engines +toc_hidden: true toc_priority: 25 toc_title: hidden -toc_hidden: true --- +# ClickHouse Engines + +There are two key engine kinds in ClickHouse: + +- [Table engines](table-engines/index.md) +- [Database engines](database-engines/index.md) + {## [Original article](https://clickhouse.tech/docs/en/engines/) ##} diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index ee28bfda905..0008fd36b09 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -19,27 +19,27 @@ The table engine (type of table) determines: ### MergeTree {#mergetree} -The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated*](mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines. +The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, secondary data-skipping indexes, and other features not supported in other engines. Engines in the family: -- [MergeTree](mergetree-family/mergetree.md#mergetree) -- [ReplacingMergeTree](mergetree-family/replacingmergetree.md#replacingmergetree) -- [SummingMergeTree](mergetree-family/summingmergetree.md#summingmergetree) -- [AggregatingMergeTree](mergetree-family/aggregatingmergetree.md#aggregatingmergetree) -- [CollapsingMergeTree](mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) -- [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) -- [GraphiteMergeTree](mergetree-family/graphitemergetree.md#graphitemergetree) +- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree) +- [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree) +- [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree) +- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree) +- [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) +- [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) +- [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) ### Log {#log} -Lightweight [engines](log-family/index.md) with minimum functionality. They’re the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole. +Lightweight [engines](../../engines/table-engines/log-family/index.md) with minimum functionality. They’re the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole. Engines in the family: -- [TinyLog](log-family/tinylog.md#tinylog) -- [StripeLog](log-family/stripelog.md#stripelog) -- [Log](log-family/log.md#log) +- [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog) +- [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog) +- [Log](../../engines/table-engines/log-family/log.md#log) ### Integration Engines {#integration-engines} @@ -47,28 +47,28 @@ Engines for communicating with other data storage and processing systems. Engines in the family: -- [Kafka](integrations/kafka.md#kafka) -- [MySQL](integrations/mysql.md#mysql) -- [ODBC](integrations/odbc.md#table-engine-odbc) -- [JDBC](integrations/jdbc.md#table-engine-jdbc) -- [HDFS](integrations/hdfs.md#hdfs) +- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) +- [MySQL](../../engines/table-engines/integrations/mysql.md#mysql) +- [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc) +- [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc) +- [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs) ### Special Engines {#special-engines} Engines in the family: -- [Distributed](special/distributed.md#distributed) -- [MaterializedView](special/materializedview.md#materializedview) -- [Dictionary](special/dictionary.md#dictionary) -- [Merge](special/merge.md#merge) -- [File](special/file.md#file) -- [Null](special/null.md#null) -- [Set](special/set.md#set) -- [Join](special/join.md#join) -- [URL](special/url.md#table_engines-url) -- [View](special/view.md#table_engines-view) -- [Memory](special/memory.md#memory) -- [Buffer](special/buffer.md#buffer) +- [Distributed](../../engines/table-engines/special/distributed.md#distributed) +- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview) +- [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) +- [Merge](../../engines/table-engines/special/merge.md#merge) +- [File](../../engines/table-engines/special/file.md#file) +- [Null](../../engines/table-engines/special/null.md#null) +- [Set](../../engines/table-engines/special/set.md#set) +- [Join](../../engines/table-engines/special/join.md#join) +- [URL](../../engines/table-engines/special/url.md#table_engines-url) +- [View](../../engines/table-engines/special/view.md#table_engines-view) +- [Memory](../../engines/table-engines/special/memory.md#memory) +- [Buffer](../../engines/table-engines/special/buffer.md#buffer) ## Virtual Columns {#table_engines-virtual_columns} @@ -80,4 +80,4 @@ To select data from a virtual column, you must specify its name in the `SELECT` If you create a table with a column that has the same name as one of the table virtual columns, the virtual column becomes inaccessible. We don’t recommend doing this. To help avoid conflicts, virtual column names are usually prefixed with an underscore. -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/) +[Original article](https://clickhouse.tech/docs/en/engines/table-engines/) diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 045b235bb5e..847b6fa90ee 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -6,7 +6,7 @@ toc_title: HDFS # HDFS {#table_engines-hdfs} This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar -to the [File](../special/file.md#table_engines-file) and [URL](../special/url.md#table_engines-url) engines, but provides Hadoop-specific features. +to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features. ## Usage {#usage} @@ -116,6 +116,6 @@ CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9 **See Also** -- [Virtual columns](../index.md#table_engines-virtual_columns) +- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) [Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index 97d5333b0ad..cff9ab3a0c4 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -173,7 +173,7 @@ For a list of possible configuration options, see the [librdkafka configuration **See Also** -- [Virtual columns](../index.md#table_engines-virtual_columns) -- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) +- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) +- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) [Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) diff --git a/docs/en/engines/table-engines/log-family/log-family.md b/docs/en/engines/table-engines/log-family/log-family.md index af87625393c..2a5096c62b3 100644 --- a/docs/en/engines/table-engines/log-family/log-family.md +++ b/docs/en/engines/table-engines/log-family/log-family.md @@ -9,9 +9,9 @@ These engines were developed for scenarios when you need to quickly write many s Engines of the family: -- [StripeLog](stripelog.md) -- [Log](log.md) -- [TinyLog](tinylog.md) +- [StripeLog](../../../engines/table-engines/log-family/stripelog.md) +- [Log](../../../engines/table-engines/log-family/log.md) +- [TinyLog](../../../engines/table-engines/log-family/tinylog.md) ## Common Properties {#common-properties} diff --git a/docs/en/engines/table-engines/log-family/log.md b/docs/en/engines/table-engines/log-family/log.md index 1bc229837a4..faf4c5889f4 100644 --- a/docs/en/engines/table-engines/log-family/log.md +++ b/docs/en/engines/table-engines/log-family/log.md @@ -5,9 +5,9 @@ toc_title: Log # Log {#log} -Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article. +Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article. -Log differs from [TinyLog](tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. +Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other. The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes. diff --git a/docs/en/engines/table-engines/log-family/stripelog.md b/docs/en/engines/table-engines/log-family/stripelog.md index 87e8999fb51..68d8f52d68f 100644 --- a/docs/en/engines/table-engines/log-family/stripelog.md +++ b/docs/en/engines/table-engines/log-family/stripelog.md @@ -5,7 +5,7 @@ toc_title: StripeLog # Stripelog {#stripelog} -This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article. +This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article. Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows). diff --git a/docs/en/engines/table-engines/log-family/tinylog.md b/docs/en/engines/table-engines/log-family/tinylog.md index 8dc5a837028..0a0d2ef3761 100644 --- a/docs/en/engines/table-engines/log-family/tinylog.md +++ b/docs/en/engines/table-engines/log-family/tinylog.md @@ -5,10 +5,10 @@ toc_title: TinyLog # TinyLog {#tinylog} -The engine belongs to the log engine family. See [Log Engine Family](log-family.md) for common properties of log engines and their differences. +The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) for common properties of log engines and their differences. This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient. -Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since it’s simpler than the [Log](log.md) engine (fewer files need to be opened). +Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since it’s simpler than the [Log](../../../engines/table-engines/log-family/log.md) engine (fewer files need to be opened). [Original article](https://clickhouse.tech/docs/en/operations/table_engines/tinylog/) diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 7e1c04a17e7..abc5b19b930 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -5,7 +5,7 @@ toc_title: AggregatingMergeTree # Aggregatingmergetree {#aggregatingmergetree} -The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions. +The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions. You can use `AggregatingMergeTree` tables for incremental data aggregation, including for aggregated materialized views. @@ -36,7 +36,7 @@ For a description of request parameters, see [request description](../../../sql- **Query clauses** -When creating a `AggregatingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table. +When creating a `AggregatingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md index 26e9dfe91c3..d35d249120a 100644 --- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -5,7 +5,7 @@ toc_title: CollapsingMergeTree # CollapsingMergeTree {#table_engine-collapsingmergetree} -The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm. +The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm. `CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document. @@ -36,7 +36,7 @@ For a description of query parameters, see [query description](../../../sql-refe **Query clauses** -When creating a `CollapsingMergeTree` table, the same [query clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table. +When creating a `CollapsingMergeTree` table, the same [query clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 91ae0dfbc44..28494d37bf6 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -5,11 +5,11 @@ toc_title: Custom Partitioning Key # Custom Partitioning Key {#custom-partitioning-key} -Partitioning is available for the [MergeTree](mergetree.md) family tables (including [replicated](replication.md) tables). [Materialized views](../special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well. +Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well. A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. -The partition is specified in the `PARTITION BY expr` clause when [creating a table](mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`: +The partition is specified in the `PARTITION BY expr` clause when [creating a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`: ``` sql CREATE TABLE visits @@ -23,7 +23,7 @@ PARTITION BY toYYYYMM(VisitDate) ORDER BY Hour; ``` -The partition key can also be a tuple of expressions (similar to the [primary key](mergetree.md#primary-keys-and-indexes-in-queries)). For example: +The partition key can also be a tuple of expressions (similar to the [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries)). For example: ``` sql ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/name', 'replica1', Sign) @@ -38,7 +38,7 @@ When inserting new data to a table, this data is stored as a separate part (chun !!! info "Info" A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldn’t make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors. -Use the [system.parts](../../../operations/system-tables.md#system_tables-parts) table to view the table parts and partitions. For example, let’s assume that we have a `visits` table with partitioning by month. Let’s perform the `SELECT` query for the `system.parts` table: +Use the [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) table to view the table parts and partitions. For example, let’s assume that we have a `visits` table with partitioning by month. Let’s perform the `SELECT` query for the `system.parts` table: ``` sql SELECT diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md index db761a01be3..b9f341892ad 100644 --- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md @@ -9,7 +9,7 @@ This engine is designed for thinning and aggregating/averaging (rollup) [Graphit You can use any ClickHouse table engine to store the Graphite data if you don’t need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite. -The engine inherits properties from [MergeTree](mergetree.md). +The engine inherits properties from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md). ## Creating a Table {#creating-table} @@ -50,7 +50,7 @@ The names of these columns should be set in the rollup configuration. **Query clauses** -When creating a `GraphiteMergeTree` table, the same [clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table. +When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 33d12293172..92903f3167e 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -15,20 +15,20 @@ Main features: This allows you to create a small sparse index that helps find data faster. -- Partitions can be used if the [partitioning key](custom-partitioning-key.md) is specified. +- Partitions can be used if the [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified. ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. This also improves query performance. - Data replication support. - The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](replication.md). + The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](../../../engines/table-engines/mergetree-family/replication.md). - Data sampling support. If necessary, you can set the data sampling method in the table. !!! info "Info" - The [Merge](../special/merge.md#merge) engine does not belong to the `*MergeTree` family. + The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family. ## Creating a Table {#table_engine-mergetree-creating-a-table} @@ -51,9 +51,6 @@ ORDER BY expr For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md). -!!! note "Note" - `INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes). - ### Query Clauses {#mergetree-query-clauses} - `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters. @@ -62,11 +59,11 @@ For a description of parameters, see the [CREATE query description](../../../sql A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`. - ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause. - - Use the `ORDER BY tuple()` syntax, if you don't need sorting. See [Selecting the Primary Key](#selecting-the-primary-key). + ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause. -- `PARTITION BY` — The [partitioning key](custom-partitioning-key.md). Optional. + Use the `ORDER BY tuple()` syntax, if you don’t need sorting. See [Selecting the Primary Key](#selecting-the-primary-key). + +- `PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. @@ -196,22 +193,22 @@ The number of columns in the primary key is not explicitly limited. Depending on ClickHouse sorts data by primary key, so the higher the consistency, the better the compression. -- Provide additional logic when merging data parts in the [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](summingmergetree.md) engines. +- Provide additional logic when merging data parts in the [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) engines. In this case it makes sense to specify the *sorting key* that is different from the primary key. A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries. -You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads). - +You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads). + To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries. ### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key} It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple. -This feature is helpful when using the [SummingMergeTree](summingmergetree.md) and -[AggregatingMergeTree](aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions. +This feature is helpful when using the [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) and +[AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions. In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple. @@ -257,7 +254,7 @@ ClickHouse cannot use an index if the values of the primary key in the query par ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence. -### Data Skipping Indexes (experimental) {#table_engine-mergetree-data_skipping-indexes} +### Data Skipping Indexes {#table_engine-mergetree-data_skipping-indexes} The index declaration is in the columns section of the `CREATE` query. @@ -487,7 +484,7 @@ When ClickHouse see that data is expired, it performs an off-schedule merge. To If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query before `SELECT`. -## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} +## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} ### Introduction {#introduction} @@ -502,7 +499,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be - Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)). - Storage policy — Set of volumes and the rules for moving data between them. -The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables. +The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables. ### Configuration {#table_engine-mergetree-multiple-volumes_configure} @@ -632,7 +629,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' The `default` storage policy implies using only one volume, which consists of only one disk given in ``. Once a table is created, its storage policy cannot be changed. -The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting. +The number of threads performing background moves of data parts can be changed by [background\_move\_pool\_size](../../../operations/settings/settings.md#background_move_pool_size) setting. ### Details {#details} @@ -651,7 +648,7 @@ In all these cases except for mutations and partition freezing, a part is stored Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones. In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file. -Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs. +Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs. User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met. diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 6ca8be1912e..a218fa1876f 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -5,7 +5,7 @@ toc_title: ReplacingMergeTree # ReplacingMergeTree {#replacingmergetree} -The engine differs from [MergeTree](mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](mergetree.md) value). +The engine differs from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md) value. Data deduplication occurs only during a merge. Merging occurs in the background at an unknown time, so you can’t plan for it. Some of the data may remain unprocessed. Although you can run an unscheduled merge using the `OPTIMIZE` query, don’t count on using it, because the `OPTIMIZE` query will read and write a large amount of data. @@ -33,14 +33,14 @@ For a description of request parameters, see [request description](../../../sql- - `ver` — column with version. Type `UInt*`, `Date` or `DateTime`. Optional parameter. - When merging, `ReplacingMergeTree` from all the rows with the same primary key leaves only one: + When merging, `ReplacingMergeTree` from all the rows with the same sorting key leaves only one: - Last in the selection, if `ver` not set. - With the maximum version, if `ver` specified. **Query clauses** -When creating a `ReplacingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table. +When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index aa4e2e87d95..5da096ef02e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -63,7 +63,7 @@ For each `INSERT` query, approximately ten entries are added to ZooKeeper throug For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasn’t proven necessary on the Yandex.Metrica cluster (approximately 300 servers). -Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting. +Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting. By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option. @@ -217,6 +217,6 @@ If the data in ZooKeeper was lost or damaged, you can save data by moving it to **See also** -- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) +- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) [Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md index 53c76044051..e8e60b629b3 100644 --- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md @@ -5,7 +5,7 @@ toc_title: SummingMergeTree # SummingMergeTree {#summingmergetree} -The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection. +The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection. We recommend to use the engine together with `MergeTree`. Store complete data in `MergeTree` table, and use `SummingMergeTree` for aggregated data storing, for example, when preparing reports. Such an approach will prevent you from losing valuable data due to an incorrectly composed primary key. @@ -35,7 +35,7 @@ For a description of request parameters, see [request description](../../../sql- **Query clauses** -When creating a `SummingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table. +When creating a `SummingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
@@ -96,7 +96,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data. -ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above. +ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above. ### Common Rules for Summation {#common-rules-for-summation} @@ -110,7 +110,7 @@ The values are not summarized for columns in the primary key. ### The Summation in the Aggregatefunction Columns {#the-summation-in-the-aggregatefunction-columns} -For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](aggregatingmergetree.md) engine aggregating according to the function. +For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine aggregating according to the function. ### Nested Structures {#nested-structures} @@ -132,7 +132,7 @@ Examples: [(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)] ``` -When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference.md) function for aggregation of `Map`. +When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`. For nested data structure, you do not need to specify its columns in the tuple of columns for summation. diff --git a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index c9e1da3d454..986a1fc05b6 100644 --- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -12,7 +12,7 @@ This engine: See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details. -The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion. +The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion. ## Creating a Table {#creating-a-table} @@ -47,7 +47,7 @@ VersionedCollapsingMergeTree(sign, version) **Query Clauses** -When creating a `VersionedCollapsingMergeTree` table, the same [clauses](mergetree.md) are required as when creating a `MergeTree` table. +When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required as when creating a `MergeTree` table.
diff --git a/docs/en/engines/table-engines/special/dictionary.md b/docs/en/engines/table-engines/special/dictionary.md index 086ad53fab5..a6d6f296673 100644 --- a/docs/en/engines/table-engines/special/dictionary.md +++ b/docs/en/engines/table-engines/special/dictionary.md @@ -7,7 +7,7 @@ toc_title: Dictionary The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table. -## Example +## Example {#example} As an example, consider a dictionary of `products` with the following configuration: diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 11245bbf262..3446f820a71 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -10,7 +10,7 @@ Reading is automatically parallelized. During a read, the table indexes on remot The Distributed engine accepts parameters: -- the cluster name in the server's config file +- the cluster name in the server’s config file - the name of a remote database @@ -23,7 +23,7 @@ The Distributed engine accepts parameters: See also: - `insert_distributed_sync` setting - - [MergeTree](../mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples + - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples Example: @@ -37,7 +37,7 @@ For example, for a query with GROUP BY, data will be aggregated on remote server Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase(). -logs – The cluster name in the server's config file. +logs – The cluster name in the server’s config file. Clusters are set like this: @@ -82,7 +82,7 @@ Replicas are duplicating servers (in order to read all the data, you can access Cluster names must not contain dots. The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server: -- `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn't start. If you change the DNS record, restart the server. +- `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn’t start. If you change the DNS record, restart the server. - `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port. - `user` – Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md). - `password` – The password for connecting to a remote server (not masked). Default value: empty string. @@ -99,38 +99,38 @@ You can specify as many clusters as you wish in the configuration. To view your clusters, use the `system.clusters` table. -The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster's servers). +The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster’s servers). -The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don't need to create a Distributed table – use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md). +The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don’t need to create a Distributed table – use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md). There are two methods for writing data to a cluster: -First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table "looks at". This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently. +First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently. -Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn't mean anything in this case. +Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn’t mean anything in this case. Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19. Each shard can have the `internal_replication` parameter defined in the config file. -If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table "looks at" replicated tables. In other words, if the table where data will be written is going to replicate them itself. +If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself. If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data. To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19). -The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user's ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID). +The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID). -A simple reminder from the division is a limited solution for sharding and isn't always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. +A simple reminder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. -SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don't have to transfer the old data to it. You can write new data with a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. +SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don’t have to transfer the old data to it. You can write new data with a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. You should be concerned about the sharding scheme in the following cases: - Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient. -- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we've done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into "layers", where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries. +- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries. -Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. +Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used. @@ -145,7 +145,7 @@ When the `max_parallel_replicas` option is enabled, query processing is parallel **See Also** -- [Virtual columns](index.md#table_engines-virtual_columns) -- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) +- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) +- [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) [Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index afccf0a4552..79fd2e94c4a 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -33,7 +33,7 @@ You may manually create this subfolder and file in server filesystem and then [A !!! warning "Warning" Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined. -## Example +## Example {#example} **1.** Set up the `file_engine_table` table: diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index aa12092367c..393f7ccab23 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -25,7 +25,7 @@ Generate table engine supports only `SELECT` queries. It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`. -## Example +## Example {#example} **1.** Set up the `generate_engine_table` table: diff --git a/docs/en/engines/table-engines/special/merge.md b/docs/en/engines/table-engines/special/merge.md index 5dca7f8602d..53615ce07ec 100644 --- a/docs/en/engines/table-engines/special/merge.md +++ b/docs/en/engines/table-engines/special/merge.md @@ -11,7 +11,7 @@ Reading is automatically parallelized. Writing to a table is not supported. When The `Merge` engine accepts parameters: the database name and a regular expression for tables. -## Examples +## Examples {#examples} Example 1: @@ -67,6 +67,6 @@ FROM WatchLog **See Also** -- [Virtual columns](index.md#table_engines-virtual_columns) +- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) [Original article](https://clickhouse.tech/docs/en/operations/table_engines/merge/) diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 4fa1a50df38..0e22bfb7008 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -5,7 +5,7 @@ toc_title: URL # URL Table Engine {#table_engines-url} -Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](file.md) engine. +Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine. Syntax: `URL(URL, Format)` @@ -25,7 +25,7 @@ respectively. For processing `POST` requests, the remote server must support You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting. -## Example +## Example {#example} **1.** Create a `url_engine_table` table on the server : diff --git a/docs/en/faq/general.md b/docs/en/faq/general.md deleted file mode 100644 index 53cb583e25f..00000000000 --- a/docs/en/faq/general.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -toc_priority: 78 -toc_title: General Questions ---- - -# General Questions {#general-questions} - -## Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce} - -We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT. - -These systems aren’t appropriate for online queries due to their high latency. In other words, they can’t be used as the back-end for a web interface. These types of systems aren’t useful for real-time data updates. Distributed sorting isn’t the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks. - -Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface. - -## What If I Have a Problem with Encodings When Using Oracle Through ODBC? {#oracle-odbc-encodings} - -If you use Oracle through the ODBC driver as a source of external dictionaries, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS\_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). - -**Example** - -``` sql -NLS_LANG=RUSSIAN_RUSSIA.UTF8 -``` - -## How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file} - -### Using INTO OUTFILE Clause {#using-into-outfile-clause} - -Add an [INTO OUTFILE](../sql-reference/statements/select/into-outfile.md#into-outfile-clause) clause to your query. - -For example: - -``` sql -SELECT * FROM table INTO OUTFILE 'file' -``` - -By default, ClickHouse uses the [TabSeparated](../interfaces/formats.md#tabseparated) format for output data. To select the [data format](../interfaces/formats.md), use the [FORMAT clause](../sql-reference/statements/select/format.md#format-clause). - -For example: - -``` sql -SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV -``` - -### Using a File-Engine Table {#using-a-file-engine-table} - -See [File](../engines/table-engines/special/file.md). - -### Using Command-Line Redirection {#using-command-line-redirection} - -``` sql -$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt -``` - -See [clickhouse-client](../interfaces/cli.md). - -{## [Original article](https://clickhouse.tech/docs/en/faq/general/) ##} diff --git a/docs/en/faq/general/dbms-naming.md b/docs/en/faq/general/dbms-naming.md new file mode 100644 index 00000000000..f6139b8faf1 --- /dev/null +++ b/docs/en/faq/general/dbms-naming.md @@ -0,0 +1,11 @@ +--- +toc_hidden: true +toc_priority: 10 +--- + +# What Does “ClickHouse” Mean? {#what-does-clickhouse-mean} + +It’s a combination of “**Click**stream” and “Data ware**house**”. It comes from the original use case at Yandex.Metrica, where ClickHouse was supposed to keep records of all clicks by people from all over the Internet and it still does the job. You can read more about this use case on [ClickHouse history](../../introduction/history.md) page. + +!!! info "Fun fact" + Many years after ClickHouse got its name, this approach of combining two words that are meaningful on their own has been highlighted as the best way to name a database in a [research by Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), an Associate Professor of Databases at Carnegie Mellon University. ClickHouse shared his “best database name of all time” award with Postgres. diff --git a/docs/en/faq/general/index.md b/docs/en/faq/general/index.md new file mode 100644 index 00000000000..a456ee1f057 --- /dev/null +++ b/docs/en/faq/general/index.md @@ -0,0 +1,18 @@ +--- +toc_hidden_folder: true +toc_priority: 1 +toc_title: General +--- + +# General Questions About ClickHouse {#general-questions} + +Questions: + +- [What does “ClickHouse” mean?](../../faq/general/dbms-naming.md) +- [What does “Не тормозит” mean?](../../faq/general/ne-tormozit.md) +- [Why not use something like MapReduce?](../../faq/general/mapreduce.md) + +!!! info "Don’t see what you were looking for?" + Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. + +{## [Original article](https://clickhouse.tech/docs/en/faq/general/) ##} diff --git a/docs/en/faq/general/mapreduce.md b/docs/en/faq/general/mapreduce.md new file mode 100644 index 00000000000..83fcd99ab81 --- /dev/null +++ b/docs/en/faq/general/mapreduce.md @@ -0,0 +1,12 @@ +--- +toc_hidden: true +toc_priority: 20 +--- + +# Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce} + +We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT. + +These systems aren’t appropriate for online queries due to their high latency. In other words, they can’t be used as the back-end for a web interface. These types of systems aren’t useful for real-time data updates. Distributed sorting isn’t the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks. + +Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface. diff --git a/docs/en/faq/general/ne-tormozit.md b/docs/en/faq/general/ne-tormozit.md new file mode 100644 index 00000000000..dd42694ece9 --- /dev/null +++ b/docs/en/faq/general/ne-tormozit.md @@ -0,0 +1,24 @@ +--- +toc_hidden: true +toc_priority: 11 +--- + +# What Does “Не тормозит” mean? {#what-does-ne-tormozit-mean} + +This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front. + +Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). That’s why it initially got its slogan in Russian, which is “не тормозит”. After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. + +One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it. + +So, what does it mean? Here are some ways to translate *“не тормозит”*: + +- If you translate it literally, it’d be something like *“ClickHouse doesn’t press the brake pedal”*. +- If you’d want to express it as close to how it sounds to a Russian person with IT background, it’d be something like *“If you larger system lags, it’s not because it uses ClickHouse”*. +- Shorter, but not so precise versions could be *“ClickHouse is not slow”*, *“ClickHouse doesn’t lag”* or just *“ClickHouse is fast”*. + +If you haven’t seen one of those t-shirts in person, you can check them out online in many ClickHouse-related videos. For example, this one: + +![iframe](https://www.youtube.com/embed/bSyQahMVZ7w) + +P.S. These t-shirts are not for sale, they are given away for free on most [ClickHouse Meetups](https://clickhouse.tech/#meet), usually for best questions or other forms of active participation. diff --git a/docs/en/faq/index.md b/docs/en/faq/index.md index bdbd59f7880..08683c329b3 100644 --- a/docs/en/faq/index.md +++ b/docs/en/faq/index.md @@ -1,9 +1,17 @@ --- toc_folder_title: F.A.Q. -toc_priority: 76 -toc_title: hidden toc_hidden: true +toc_priority: 76 --- +# ClickHouse F.A.Q {#clickhouse-f-a-q} + +This section of the documentation is a place to collect answers to ClickHouse-related questions that arise often. + +Categories: + +- [General](../faq/general/index.md) +- [Operations](../faq/operations/index.md) +- [Integration](../faq/integration/index.md) {## [Original article](https://clickhouse.tech/docs/en/faq) ##} diff --git a/docs/en/faq/integration/file-export.md b/docs/en/faq/integration/file-export.md new file mode 100644 index 00000000000..669297f36d7 --- /dev/null +++ b/docs/en/faq/integration/file-export.md @@ -0,0 +1,36 @@ +--- +toc_hidden: true +toc_priority: 10 +--- + +# How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file} + +## Using INTO OUTFILE Clause {#using-into-outfile-clause} + +Add an [INTO OUTFILE](../../sql-reference/statements/select/into-outfile.md#into-outfile-clause) clause to your query. + +For example: + +``` sql +SELECT * FROM table INTO OUTFILE 'file' +``` + +By default, ClickHouse uses the [TabSeparated](../../interfaces/formats.md#tabseparated) format for output data. To select the [data format](../../interfaces/formats.md), use the [FORMAT clause](../../sql-reference/statements/select/format.md#format-clause). + +For example: + +``` sql +SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV +``` + +## Using a File-Engine Table {#using-a-file-engine-table} + +See [File](../../engines/table-engines/special/file.md) table engine. + +## Using Command-Line Redirection {#using-command-line-redirection} + +``` sql +$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt +``` + +See [clickhouse-client](../../interfaces/cli.md). diff --git a/docs/en/faq/integration/index.md b/docs/en/faq/integration/index.md new file mode 100644 index 00000000000..f74738d316c --- /dev/null +++ b/docs/en/faq/integration/index.md @@ -0,0 +1,17 @@ +--- +toc_hidden_folder: true +toc_priority: 3 +toc_title: Integration +--- + +# Question About Integrating ClickHouse and Other Systems {#question-about-integrating-clickhouse-and-other-systems} + +Questions: + +- [How do I export data from ClickHouse to a file?](../../faq/integration/file-export.md) +- [What if I Have a problem with encodings when connecting to Oracle via ODBC?](../../faq/integration/oracle-odbc.md) + +!!! info "Don’t see what you were looking for?" + Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. + +{## [Original article](https://clickhouse.tech/docs/en/faq/integration/) ##} diff --git a/docs/en/faq/integration/oracle-odbc.md b/docs/en/faq/integration/oracle-odbc.md new file mode 100644 index 00000000000..d6e4ed02424 --- /dev/null +++ b/docs/en/faq/integration/oracle-odbc.md @@ -0,0 +1,14 @@ +--- +toc_hidden: true +toc_priority: 20 +--- + +# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings} + +If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS\_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). + +**Example** + +``` sql +NLS_LANG=RUSSIAN_RUSSIA.UTF8 +``` diff --git a/docs/en/faq/operations/index.md b/docs/en/faq/operations/index.md new file mode 100644 index 00000000000..1d294c56611 --- /dev/null +++ b/docs/en/faq/operations/index.md @@ -0,0 +1,16 @@ +--- +toc_hidden_folder: true +toc_priority: 2 +toc_title: Operations +--- + +# Question About Operating ClickHouse Servers and Clusters {#question-about-operating-clickhouse-servers-and-clusters} + +Questions: + +- [Which ClickHouse version to use in production?](../../faq/operations/production.md) + +!!! info "Don’t see what you were looking for?" + Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. + +{## [Original article](https://clickhouse.tech/docs/en/faq/production/) ##} diff --git a/docs/en/faq/operations/production.md b/docs/en/faq/operations/production.md new file mode 100644 index 00000000000..83341a3423b --- /dev/null +++ b/docs/en/faq/operations/production.md @@ -0,0 +1,69 @@ +--- +toc_hidden: true +toc_priority: 10 +--- + +# Which ClickHouse Version to Use in Production? {#which-clickhouse-version-to-use-in-production} + +First of all, let’s discuss why people ask this question in the first place. There are two key reasons: + +1. ClickHouse is developed with pretty high velocity and usually, there are 10+ stable releases per year. It makes a wide range of releases to choose from, which is not so trivial choice. +2. Some users want to avoid spending time figuring out which version works best for their use case and just follow someone else’s advice. + +The second reason is more fundamental, so we’ll start with it and then get back to navigating through various ClickHouse releases. + +## Which ClickHouse Version Do You Recommend? {#which-clickhouse-version-do-you-recommend} + +It’s tempting to hire consultants or trust some known experts to get rid of responsibility for your production environment. You install some specific ClickHouse version that someone else recommended, now if there’s some issue with it - it’s not your fault, it’s someone else’s. This line of reasoning is a big trap. No external person knows better what’s going on in your company’s production environment. + +So how to properly choose which ClickHouse version to upgrade to? Or how to choose your first ClickHouse version? First of all, you need to invest in setting up a **realistic pre-production environment**. In an ideal world, it could be a completely identical shadow copy, but that’s usually expensive. + +Here’re some key points to get reasonable fidelity in a pre-production environment with not so high costs: + +- Pre-production environment needs to run an as close set of queries as you intend to run in production: + - Don’t make it read-only with some frozen data. + - Don’t make it write-only with just copying data without building some typical reports. + - Don’t wipe it clean instead of applying schema migrations. +- Use a sample of real production data and queries. Try to choose a sample that’s still representative and makes `SELECT` queries return reasonable results. Use obfuscation if your data is sensitive and internal policies don’t allow it to leave the production environment. +- Make sure that pre-production is covered by your monitoring and alerting software the same way as your production environment does. +- If your production spans across multiple datacenters or regions, make your pre-production does the same. +- If your production uses complex features like replication, distributed table, cascading materialize views, make sure they are configured similarly in pre-production. +- There’s a trade-off on using the roughly same number of servers or VMs in pre-production as in production, but of smaller size, or much less of them, but of the same size. The first option might catch extra network-related issues, while the latter is easier to manage. + +The second area to invest in is **automated testing infrastructure**. Don’t assume that if some kind of query has executed successfully once, it’ll continue to do so forever. It’s ok to have some unit tests where ClickHouse is mocked but make sure your product has a reasonable set of automated tests that are run against real ClickHouse and check that all important use cases are still working as expected. + +Extra step forward could be contributing those automated tests to [ClickHouse’s open-source test infrastructure](https://github.com/ClickHouse/ClickHouse/tree/master/tests) that’s continuously used in its day-to-day development. It definitely will take some additional time and effort to learn [how to run it](../../development/tests.md) and then how to adapt your tests to this framework, but it’ll pay off by ensuring that ClickHouse releases are already tested against them when they are announced stable, instead of repeatedly losing time on reporting the issue after the fact and then waiting for a bugfix to be implemented, backported and released. Some companies even have such test contributions to infrastructure by its use as an internal policy, most notably it’s called [Beyonce’s Rule](https://www.oreilly.com/library/view/software-engineering-at/9781492082781/ch01.html#policies_that_scale_well) at Google. + +When you have your pre-production environment and testing infrastructure in place, choosing the best version is straightforward: + +1. Routinely run your automated tests against new ClickHouse releases. You can do it even for ClickHouse releases that are marked as `testing`, but going forward to the next steps with them is not recommended. +2. Deploy the ClickHouse release that passed the tests to pre-production and check that all processes are running as expected. +3. Report any issues you discovered to [ClickHouse GitHub Issues](https://github.com/ClickHouse/ClickHouse/issues). +4. If there were no major issues, it should be safe to start deploying ClickHouse release to your production environment. Investing in gradual release automation that implements an approach similar to [canary releases](https://martinfowler.com/bliki/CanaryRelease.html) or [green-blue deployments](https://martinfowler.com/bliki/BlueGreenDeployment.html) might further reduce the risk of issues in production. + +As you might have noticed, there’s nothing specific to ClickHouse in the approach described above, people do that for any piece of infrastructure they rely on if they take their production environment seriously. + +## How to Choose Between ClickHouse Releases? {#how-to-choose-between-clickhouse-releases} + +If you look into contents of ClickHouse package repository, you’ll see four kinds of packages: + +1. `testing` +2. `prestable` +3. `stable` +4. `lts` (long-term support) + +As was mentioned earlier, `testing` is good mostly to notice issues early, running them in production is not recommended because each of them is not tested as thoroughly as other kinds of packages. + +`prestable` is a release candidate which generally looks promising and is likely to become announced as `stable` soon. You can try them out in pre-production and report issues if you see any. + +For production use, there are two key options: `stable` and `lts`. Here is some guidance on how to choose between them: + +- `stable` is the kind of package we recommend by default. They are released roughly monthly (and thus provide new features with reasonable delay) and three latest stable releases are supported in terms of diagnostics and backporting of bugfixes. +- `lts` are released twice a year and are supported for a year after their initial release. You might prefer them over `stable` in the following cases: + - Your company has some internal policies that don’t allow for frequent upgrades or using non-LTS software. + - You are using ClickHouse in some secondary products that either doesn’t require any complex ClickHouse features and don’t have enough resources to keep it updated. + +Many teams who initially thought that `lts` is the way to go, often switch to `stable` anyway because of some recent feature that’s important for their product. + +!!! warning "Important" + One more thing to keep in mind when upgrading ClickHouse: we’re always keeping eye on compatibility across releases, but sometimes it’s not reasonable to keep and some minor details might change. So make sure you check the [changelog](../../whats-new/changelog/index.md) before upgrading to see if there are any notes about backward-incompatible changes. diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md index c81b272d029..654fd6ff95f 100644 --- a/docs/en/getting-started/example-datasets/index.md +++ b/docs/en/getting-started/example-datasets/index.md @@ -9,12 +9,12 @@ toc_title: Introduction This section describes how to obtain example datasets and import them into ClickHouse. For some datasets example queries are also available. -- [Anonymized Yandex.Metrica Dataset](metrica.md) -- [Star Schema Benchmark](star-schema.md) -- [WikiStat](wikistat.md) -- [Terabyte of Click Logs from Criteo](criteo.md) -- [AMPLab Big Data Benchmark](amplab-benchmark.md) -- [New York Taxi Data](nyc-taxi.md) -- [OnTime](ontime.md) +- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md) +- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md) +- [WikiStat](../../getting-started/example-datasets/wikistat.md) +- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md) +- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md) +- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md) +- [OnTime](../../getting-started/example-datasets/ontime.md) [Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) diff --git a/docs/en/getting-started/example-datasets/metrica.md b/docs/en/getting-started/example-datasets/metrica.md index 4131dca78fe..a9cf80716ce 100644 --- a/docs/en/getting-started/example-datasets/metrica.md +++ b/docs/en/getting-started/example-datasets/metrica.md @@ -7,7 +7,7 @@ toc_title: Yandex.Metrica Data Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section. -The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz. +The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits\_100m\_obfuscated\_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits\_100m\_obfuscated\_v1.tar.xz. ## Obtaining Tables from Prepared Partitions {#obtaining-tables-from-prepared-partitions} diff --git a/docs/en/getting-started/index.md b/docs/en/getting-started/index.md index c84b3c7c9a6..066809aac89 100644 --- a/docs/en/getting-started/index.md +++ b/docs/en/getting-started/index.md @@ -7,9 +7,9 @@ toc_title: hidden # Getting Started {#getting-started} -If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](install.md). After that you can: +If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](../getting-started/install.md). After that you can: -- [Go through detailed tutorial](tutorial.md) -- [Experiment with example datasets](example-datasets/ontime.md) +- [Go through detailed tutorial](../getting-started/tutorial.md) +- [Experiment with example datasets](../getting-started/example-datasets/ontime.md) [Original article](https://clickhouse.tech/docs/en/getting_started/) diff --git a/docs/en/getting-started/playground.md b/docs/en/getting-started/playground.md index bed1618314b..a0411428865 100644 --- a/docs/en/getting-started/playground.md +++ b/docs/en/getting-started/playground.md @@ -6,13 +6,13 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} [ClickHouse Playground](https://play.clickhouse.tech) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. -Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. There's also a selection of ClickHouse LTS releases to experiment with. +Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. There’s also a selection of ClickHouse LTS releases to experiment with. ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md). You can make queries to playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). -## Credentials +## Credentials {#credentials} | Parameter | Value | |:--------------------|:----------------------------------------| @@ -23,13 +23,13 @@ You can make queries to playground using any HTTP client, for example [curl](htt There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above): -* 20.3 LTS: `play-api-v20-3.clickhouse.tech` -* 19.14 LTS: `play-api-v19-14.clickhouse.tech` +- 20.3 LTS: `play-api-v20-3.clickhouse.tech` +- 19.14 LTS: `play-api-v19-14.clickhouse.tech` !!! note "Note" All these endpoints require a secure TLS connection. -## Limitations +## Limitations {#limitations} The queries are executed as a read-only user. It implies some limitations: @@ -37,12 +37,12 @@ The queries are executed as a read-only user. It implies some limitations: - INSERT queries are not allowed The following settings are also enforced: -- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes) -- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows) -- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode) -- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time) +- [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes) +- [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows) +- [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode) +- [max\_execution\_time=60000](../operations/settings/query_complexity/#max-execution-time) -## Examples +## Examples {#examples} HTTPS endpoint example with `curl`: @@ -51,11 +51,12 @@ curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse!';&us ``` TCP endpoint example with [CLI](../interfaces/cli.md): + ``` bash clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse!'" ``` -## Implementation Details +## Implementation Details {#implementation-details} ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md). The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add extra layer of protection and improved global connectivity. diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md index 952161bdb0a..bc71d762138 100644 --- a/docs/en/getting-started/tutorial.md +++ b/docs/en/getting-started/tutorial.md @@ -11,7 +11,7 @@ By going through this tutorial, you’ll learn how to set up a simple ClickHouse ## Single Node Setup {#single-node-setup} -To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](install.md#install-from-deb-packages) or [rpm](install.md#from-rpm-packages) packages, but there are [alternatives](install.md#from-docker-image) for the operating systems that do no support them. +To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them. For example, you have chosen `deb` packages and executed: @@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv ## Import Sample Dataset {#import-sample-dataset} -Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. +Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. ### Download and Extract Table Data {#download-and-extract-table-data} diff --git a/docs/en/guides/apply-catboost-model.md b/docs/en/guides/apply-catboost-model.md index 835f4dda7fa..f614b121714 100644 --- a/docs/en/guides/apply-catboost-model.md +++ b/docs/en/guides/apply-catboost-model.md @@ -232,6 +232,6 @@ FROM ``` !!! note "Note" - More info about [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions. + More info about [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions. [Original article](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) diff --git a/docs/en/guides/index.md b/docs/en/guides/index.md index ea20a606cac..8a48a411b7a 100644 --- a/docs/en/guides/index.md +++ b/docs/en/guides/index.md @@ -9,6 +9,6 @@ toc_title: Overview List of detailed step-by-step instructions that help to solve various tasks using ClickHouse: - [Tutorial on simple cluster set-up](../getting-started/tutorial.md) -- [Applying a CatBoost model in ClickHouse](apply-catboost-model.md) +- [Applying a CatBoost model in ClickHouse](../guides/apply-catboost-model.md) [Original article](https://clickhouse.tech/docs/en/guides/) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index a08f2db7149..3be3490cffe 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1147,11 +1147,11 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e [Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats. -`Arrow` is Apache Arrow's "file mode" format. It is designed for in-memory random access. +`Arrow` is Apache Arrow’s “file mode” format. It is designed for in-memory random access. ## ArrowStream {#data-format-arrow-stream} -`ArrowStream` is Apache Arrow's "stream mode" format. It is designed for in-memory stream processing. +`ArrowStream` is Apache Arrow’s “stream mode” format. It is designed for in-memory stream processing. ## ORC {#data-format-orc} diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 98de4a560fb..69699be52ec 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -275,7 +275,7 @@ Use buffering to avoid situations where a query processing error occurred after ### Queries with Parameters {#cli-queries-with-parameters} -You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](cli.md#cli-queries-with-parameters). +You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](../interfaces/cli.md#cli-queries-with-parameters). ### Example {#example} @@ -291,7 +291,7 @@ ClickHouse supports specific queries through the HTTP interface. For example, yo $ echo '(4),(5),(6)' | curl 'http://localhost:8123/?query=INSERT%20INTO%20t%20VALUES' --data-binary @- ``` -ClickHouse also supports Predefined HTTP Interface which can help you more easy integration with third party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter). +ClickHouse also supports Predefined HTTP Interface which can help you more easily integrate with third-party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter). Example: @@ -314,7 +314,7 @@ Example: ``` -- You can now request the url directly for data in the Prometheus format: +- You can now request the URL directly for data in the Prometheus format: @@ -361,41 +361,40 @@ $ curl -v 'http://localhost:8123/predefined_query' * Connection #0 to host localhost left intact - * Connection #0 to host localhost left intact ``` -As you can see from the example, if `` is configured in the config.xml file and `` can contain many `s`. ClickHouse will match the HTTP requests received to the predefined type in `` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful. +As you can see from the example if `http_handlers` is configured in the config.xml file and `http_handlers` can contain many `rules`. ClickHouse will match the HTTP requests received to the predefined type in `rule` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful. -> Now `` can configure ``, ``, ``,``: -> `` is responsible for matching the method part of the HTTP request. `` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request. -> -> `` is responsible for matching the url part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the url portion of the HTTP request. -> -> `` is responsible for matching the header part of the HTTP request. It is compatible with RE2’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request. -> -> `` contains the main processing part. Now `` can configure ``, ``, ``, ``, ``, ``. -> \> `` currently supports three types: **predefined\_query\_handler**, **dynamic\_query\_handler**, **static**. -> \> -> \> `` - use with predefined\_query\_handler type, executes query when the handler is called. -> \> -> \> `` - use with dynamic\_query\_handler type, extracts and executes the value corresponding to the `` value in HTTP request params. -> \> -> \> `` - use with static type, response status code. -> \> -> \> `` - use with static type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). -> \> -> \> `` - use with static type, Response content sent to client, when using the prefix ‘file://’ or ‘config://’, find the content from the file or configuration send to client. +Now `rule` can configure `method`, `headers`, `url`, `handler`: + - `method` is responsible for matching the method part of the HTTP request. `method` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request. -Next are the configuration methods for the different ``. + - `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request. -## predefined\_query\_handler {#predefined_query_handler} + - `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request. -`` supports setting Settings and query\_params values. You can configure `` in the type of ``. + - `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`. + `type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static). + + - `query` — use with `predefined_query_handler` type, executes query when the handler is called. + + - `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request params. + + - `status` — use with `static` type, response status code. + + - `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). -`` value is a predefined query of ``, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration. + - `response_content` — use with `static` type, response content sent to client, when using the prefix ‘file://’ or ‘config://’, find the content from the file or configuration sends to client. -The following example defines the values of `max_threads` and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully. +Next are the configuration methods for different `type`. + +### predefined_query_handler {#predefined_query_handler} + +`predefined_query_handler` supports setting `Settings` and `query_params` values. You can configure `query` in the type of `predefined_query_handler`. + +`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration. + +The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully. Example: @@ -424,15 +423,15 @@ max_alter_threads 2 ``` !!! note "caution" - In one `` only supports one `` of an insert type. + In one `predefined_query_handler` only supports one `query` of an insert type. -## dynamic\_query\_handler {#dynamic_query_handler} +### dynamic_query_handler {#dynamic_query_handler} -In ``, query is written in the form of param of the HTTP request. The difference is that in ``, query is wrote in the configuration file. You can configure `` in ``. +In `dynamic_query_handler`, the query is written in the form of param of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`. -ClickHouse extracts and executes the value corresponding to the `` value in the url of the HTTP request. The default value of `` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in. +ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in. -To experiment with this functionality, the example defines the values of max\_threads and max\_alter\_threads and queries whether the Settings were set successfully. +To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully. Example: @@ -455,9 +454,9 @@ max_threads 1 max_alter_threads 2 ``` -## static {#static} +### static {#static} -`` can return [content\_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and response\_content. response\_content can return the specified content +`static` can return [content_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content. Example: diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index e1b8a639db2..1613e8508a5 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -9,19 +9,19 @@ toc_title: Introduction ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security): - [HTTP](http.md), which is documented and easy to use directly. -- [Native TCP](tcp.md), which has less overhead. +- [Native TCP](../interfaces/tcp.md), which has less overhead. In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following: -- [Command-line client](cli.md) -- [JDBC driver](jdbc.md) -- [ODBC driver](odbc.md) -- [C++ client library](cpp.md) +- [Command-line client](../interfaces/cli.md) +- [JDBC driver](../interfaces/jdbc.md) +- [ODBC driver](../interfaces/odbc.md) +- [C++ client library](../interfaces/cpp.md) There are also a wide range of third-party libraries for working with ClickHouse: -- [Client libraries](third-party/client-libraries.md) -- [Integrations](third-party/integrations.md) -- [Visual interfaces](third-party/gui.md) +- [Client libraries](../interfaces/third-party/client-libraries.md) +- [Integrations](../interfaces/third-party/integrations.md) +- [Visual interfaces](../interfaces/third-party/gui.md) [Original article](https://clickhouse.tech/docs/en/interfaces/) diff --git a/docs/en/interfaces/tcp.md b/docs/en/interfaces/tcp.md index 6be4d560ef1..75ca3e3fd91 100644 --- a/docs/en/interfaces/tcp.md +++ b/docs/en/interfaces/tcp.md @@ -5,6 +5,6 @@ toc_title: Native Interface (TCP) # Native Interface (TCP) {#native-interface-tcp} -The native protocol is used in the [command-line client](cli.md), for inter-server communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse-engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)) and/or by intercepting and analyzing TCP traffic. +The native protocol is used in the [command-line client](../interfaces/cli.md), for inter-server communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse-engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)) and/or by intercepting and analyzing TCP traffic. [Original article](https://clickhouse.tech/docs/en/interfaces/tcp/) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 716e774871b..4ec748ee7ee 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -12,6 +12,7 @@ toc_title: Integrations - Relational database management systems - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) @@ -97,5 +98,12 @@ toc_title: Integrations - Elixir - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on Rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) + [Original article](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 081f963f74f..3ebadd6d002 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -8,78 +8,78 @@ toc_title: Adopters !!! warning "Disclaimer" The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful. -| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | -|---------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [2gis](https://2gis.ru){.favicon} | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | -| [Aloha Browser](https://alohabrowser.com/){.favicon} | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) | -| [Amadeus](https://amadeus.com/){.favicon} | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | -| [Appsflyer](https://www.appsflyer.com){.favicon} | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | -| [ArenaData](https://arenadata.tech/){.favicon} | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | -| [Badoo](https://badoo.com){.favicon} | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | -| [Benocs](https://www.benocs.com/){.favicon} | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | -| [Bloomberg](https://www.bloomberg.com/){.favicon} | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | -| [Bloxy](https://bloxy.info){.favicon} | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | -| [Dataliance for China Telecom](https://www.chinatelecomglobal.com/){.favicon} | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | -| [CARTO](https://carto.com/){.favicon} | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | -| [CERN](http://public.web.cern.ch/public/){.favicon} | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | -| [Cisco](http://cisco.com/){.favicon} | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | -| [Citadel Securities](https://www.citadelsecurities.com/){.favicon} | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | -| [Citymobil](https://city-mobil.ru){.favicon} | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | -| [ContentSquare](https://contentsquare.com){.favicon} | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | -| [Cloudflare](https://cloudflare.com){.favicon} | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | -| [Corunet](https://coru.net/){.favicon} | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | -| [CraiditX 氪信](https://www.creditx.com){.favicon} | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | -| [Criteo](https://www.criteo.com/){.favicon} | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | -| [Deutsche Bank](https://db.com){.favicon} | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | -| [Diva-e](https://www.diva-e.com){.favicon} | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | -| [Exness](https://www.exness.com){.favicon} | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | -| [Geniee](https://geniee.co.jp){.favicon} | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | -| [HUYA](https://www.huya.com/){.favicon} | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| [Idealista](https://www.idealista.com){.favicon} | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | -| [Infovista](https://www.infovista.com/){.favicon} | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | -| [InnoGames](https://www.innogames.com){.favicon} | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | -| [Integros](https://integros.com){.favicon} | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| [Kodiak Data](https://www.kodiakdata.com/){.favicon} | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | -| [Kontur](https://kontur.ru){.favicon} | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | -| [Lawrence Berkeley National Laboratory](https://www.lbl.gov){.favicon} | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | -| [LifeStreet](https://lifestreet.com/){.favicon} | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | -| [Mail.ru Cloud Solutions](https://mcs.mail.ru/){.favicon} | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | -| [MessageBird](https://www.messagebird.com){.favicon} | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | -| [MGID](https://www.mgid.com/){.favicon} | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | -| [OneAPM](https://www.oneapm.com/){.favicon} | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | -| [Pragma Innovation](http://www.pragma-innovation.fr/){.favicon} | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | -| [QINGCLOUD](https://www.qingcloud.com/){.favicon} | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | -| [Qrator](https://qrator.net){.favicon} | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| [Percent 百分点](https://www.percent.cn/){.favicon} | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | -| [Rambler](https://rambler.ru){.favicon} | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | -| [Tencent](https://www.tencent.com){.favicon} | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | -| [Traffic Stars](https://trafficstars.com/){.favicon} | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | -| [S7 Airlines](https://www.s7.ru){.favicon} | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | -| [SEMrush](https://www.semrush.com/){.favicon} | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | -| [scireum GmbH](https://www.scireum.de/){.favicon} | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | -| [Sentry](https://sentry.io/){.favicon} | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | -| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr){.favicon} | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | -| [seo.do](https://seo.do/){.favicon} | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | -| [Sina](http://english.sina.com/index.html){.favicon} | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | -| [SMI2](https://smi2.ru/){.favicon} | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | -| [Splunk](https://www.splunk.com/){.favicon} | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | -| [Spotify](https://www.spotify.com){.favicon} | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | -| [Tencent](https://www.tencent.com){.favicon} | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | -| [Uber](https://www.uber.com){.favicon} | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | -| [VKontakte](https://vk.com){.favicon} | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | -| [Wisebits](https://wisebits.com/){.favicon} | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| [Xiaoxin Tech](http://www.xiaoxintech.cn/){.favicon} | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | -| [Ximalaya](https://www.ximalaya.com/){.favicon} | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | -| [Yandex Cloud](https://cloud.yandex.ru/services/managed-clickhouse){.favicon} | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | -| [Yandex DataLens](https://cloud.yandex.ru/services/datalens){.favicon} | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | -| [Yandex Market](https://market.yandex.ru/){.favicon} | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | -| [Yandex Metrica](https://metrica.yandex.com){.favicon} | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | -| [ЦВТ](https://htc-cs.ru/){.favicon} | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | -| [МКБ](https://mkb.ru/){.favicon} | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | -| [Jinshuju 金数据](https://jinshuju.net){.favicon} | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | -| [Instana](https://www.instana.com){.favicon} | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | -| [Wargaming](https://wargaming.com/en/){.favicon} | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | -| [Crazypanda](https://crazypanda.ru/en/){.favicon} | Games | | — | — | Live session on ClickHouse meetup | -| [FunCorp](https://fun.co/rp){.favicon} | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | +| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | +|------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 2gis | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | +| Aloha Browser | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) | +| Amadeus | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | +| Appsflyer | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | +| ArenaData | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | +| Badoo | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | +| Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | +| Bloomberg | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | +| Bloxy | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | +| Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | +| CARTO | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | +| CERN | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | +| Cisco | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | +| Citadel Securities | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | +| Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | +| ContentSquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | +| Cloudflare | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | +| Corunet | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | +| CraiditX 氪信 | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | +| Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | +| Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | +| Exness | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | +| Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | +| HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | +| Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | +| InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | +| Integros | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| Kodiak Data | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | +| Kontur | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | +| Lawrence Berkeley National Laboratory | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | +| LifeStreet | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | +| Mail.ru Cloud Solutions | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | +| MessageBird | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | +| MGID | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | +| OneAPM | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | +| Pragma Innovation | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | +| QINGCLOUD | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | +| Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | +| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | +| Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | +| Traffic Stars | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | +| S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | +| SEMrush | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | +| scireum GmbH | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | +| Sentry | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | +| SGK | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | +| seo.do | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | +| Sina | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | +| SMI2 | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | +| Splunk | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | +| Spotify | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | +| Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | +| Uber | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | +| VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| Wisebits | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| Xiaoxin Tech | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | +| Ximalaya | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | +| Yandex Cloud | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | +| Yandex DataLens | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | +| Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | +| Yandex Metrica | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | +| ЦВТ | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | +| МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | +| Jinshuju 金数据 | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | +| Instana | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | +| Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | +| Crazypanda | Games | | — | — | Live session on ClickHouse meetup | +| FunCorp | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) diff --git a/docs/en/introduction/distinctive-features.md b/docs/en/introduction/distinctive-features.md index c1c44194a5f..686d73f1c06 100644 --- a/docs/en/introduction/distinctive-features.md +++ b/docs/en/introduction/distinctive-features.md @@ -5,7 +5,7 @@ toc_title: Distinctive Features # Distinctive Features of ClickHouse {#distinctive-features-of-clickhouse} -## True Column-Oriented DBMS {#true-column-oriented-dbms} +## True Column-Oriented Database Management System {#true-column-oriented-dbms} In a true column-oriented DBMS, no extra data is stored with the values. Among other things, this means that constant-length values must be supported, to avoid storing their length “number” next to the values. As an example, a billion UInt8-type values should consume around 1 GB uncompressed, or this strongly affects the CPU use. It is essential to store data compactly (without any “garbage”) even when uncompressed, since the speed of decompression (CPU usage) depends mainly on the volume of uncompressed data. @@ -15,11 +15,15 @@ It’s also worth noting that ClickHouse is a database management system, not a ## Data Compression {#data-compression} -Some column-oriented DBMSs (InfiniDB CE and MonetDB) do not use data compression. However, data compression does play a key role in achieving excellent performance. +Some column-oriented DBMSs do not use data compression. However, data compression does play a key role in achieving excellent performance. + +In addition to efficient general-purpose compression codecs with different trade-offs between disk space and CPU consumption, ClickHouse provides [specialized codecs](../sql-reference/statements/create.md#create-query-specialized-codecs) for specific kinds of data, which allow ClickHouse to compete with and outperform more niche databases, like time-series ones. ## Disk Storage of Data {#disk-storage-of-data} -Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis. ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available. +Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis. + +ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available. ## Parallel Processing on Multiple Cores {#parallel-processing-on-multiple-cores} @@ -28,15 +32,18 @@ Large queries are parallelized naturally, taking all the necessary resources ava ## Distributed Processing on Multiple Servers {#distributed-processing-on-multiple-servers} Almost none of the columnar DBMSs mentioned above have support for distributed query processing. + In ClickHouse, data can reside on different shards. Each shard can be a group of replicas used for fault tolerance. All shards are used to run a query in parallel, transparently for the user. ## SQL Support {#sql-support} -ClickHouse supports a declarative query language based on SQL that is identical to the SQL standard in many cases. -Supported queries include GROUP BY, ORDER BY, subqueries in FROM, IN, and JOIN clauses, and scalar subqueries. -Dependent subqueries and window functions are not supported. +ClickHouse supports a [declarative query language based on SQL](../sql-reference/index.md) that is identical to the ANSI SQL standard in [many cases](../sql-reference/ansi.md). -## Vector Engine {#vector-engine} +Supported queries include [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), subqueries in [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md) clause, [IN](../sql-reference/operators/in.md) operator, and scalar subqueries. + +Correlated (dependent) subqueries and window functions are not supported at the time of writing but might become available in the future. + +## Vector Computation Engine {#vector-engine} Data is not only stored by columns but is processed by vectors (parts of columns), which allows achieving high CPU efficiency. @@ -44,13 +51,19 @@ Data is not only stored by columns but is processed by vectors (parts of columns ClickHouse supports tables with a primary key. To quickly perform queries on the range of the primary key, the data is sorted incrementally using the merge tree. Due to this, data can continually be added to the table. No locks are taken when new data is ingested. -## Index {#index} +## Primary Index {#primary-index} Having a data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. +## Secondary Indexes {#secondary-indexes} + +Unlike other database management systems, secondary indexes in ClickHouse does not point to specific rows or row ranges. Instead, they allow the database to know in advance that all rows in some data parts wouldn't match the query filtering conditions and do not read them at all, thus they are called [data skipping indexes](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). + ## Suitable for Online Queries {#suitable-for-online-queries} -Low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online. +Most OLAP database management systems don't aim for online queries with sub-second latencies. In alternative systems, report building time of tens of seconds or even minutes is often considered acceptable. Sometimes it takes even more which forces to prepare reports offline (in advance or by responding with "come back later"). + +In ClickHouse low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online. ## Support for Approximated Calculations {#support-for-approximated-calculations} @@ -60,16 +73,24 @@ ClickHouse provides various ways to trade accuracy for performance: 2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk. 3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources. +## Adaptive Join Algorithm + +ClickHouse adaptively chooses how to [JOIN](../sql-reference/statements/select/join.md) multiple tables, by preferring hash-join algorithm and falling back to the merge-join algorithm if there's more than one large table. + ## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support} ClickHouse uses asynchronous multi-master replication. After being written to any available replica, all the remaining replicas retrieve their copy in the background. The system maintains identical data on different replicas. Recovery after most failures is performed automatically, or semi-automatically in complex cases. For more information, see the section [Data replication](../engines/table-engines/mergetree-family/replication.md). +## Role-Based Access Control + +ClickHouse implements user account management using SQL queries and allows for [role-based access control configuration](../operations/access-rights.md) similar to what can be found in ANSI SQL standard and popular relational database management systems. + ## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages} 1. No full-fledged transactions. -2. Lack of ability to modify or delete already inserted data with high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example to comply with [GDPR](https://gdpr-info.eu). -3. The sparse index makes ClickHouse not so suitable for point queries retrieving single rows by their keys. +2. Lack of ability to modify or delete already inserted data with a high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example, to comply with [GDPR](https://gdpr-info.eu). +3. The sparse index makes ClickHouse not so efficient for point queries retrieving single rows by their keys. -[Original article](https://clickhouse.tech/docs/en/introduction/distinctive_features/) +[Original article](https://clickhouse.tech/docs/en/introduction/distinctive-features/) diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md index 001afd29fcb..78db369e8e8 100644 --- a/docs/en/operations/access-rights.md +++ b/docs/en/operations/access-rights.md @@ -16,17 +16,16 @@ ClickHouse access entities: You can configure access entities using: -- SQL-driven workflow. +- SQL-driven workflow. You need to [enable](#enabling-access-control) this functionality. -- Server [configuration files](configuration-files.md) `users.xml` and `config.xml`. +- Server [configuration files](../operations/configuration-files.md) `users.xml` and `config.xml`. We recommend using SQL-driven workflow. Both of the configuration methods work simultaneously, so if you use the server configuration files for managing accounts and access rights, you can smoothly switch to SQL-driven workflow. !!! note "Warning" - You can't manage the same access entity by both configuration methods simultaneously. - + You can’t manage the same access entity by both configuration methods simultaneously. ## Usage {#access-control-usage} @@ -34,45 +33,44 @@ By default, the ClickHouse server provides the `default` user account which is n If you just started using ClickHouse, consider the following scenario: -1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user. -2. Log in to the `default` user account and create all the required users. Don't forget to create an administrator account (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`). -3. [Restrict permissions](settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it. +1. [Enable](#enabling-access-control) SQL-driven access control and account management for the `default` user. +2. Log in to the `default` user account and create all the required users. Don’t forget to create an administrator account (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`). +3. [Restrict permissions](../operations/settings/permissions-for-queries.md#permissions_for_queries) for the `default` user and disable SQL-driven access control and account management for it. ### Properties of Current Solution {#access-control-properties} -- You can grant permissions for databases and tables even if they do not exist. -- If a table was deleted, all the privileges that correspond to this table are not revoked. This means that even if you create a new table with the same name later, all the privileges remain valid. To revoke privileges corresponding to the deleted table, you need to execute, for example, the `REVOKE ALL PRIVILEGES ON db.table FROM ALL` query. -- There are no lifetime settings for privileges. +- You can grant permissions for databases and tables even if they do not exist. +- If a table was deleted, all the privileges that correspond to this table are not revoked. This means that even if you create a new table with the same name later, all the privileges remain valid. To revoke privileges corresponding to the deleted table, you need to execute, for example, the `REVOKE ALL PRIVILEGES ON db.table FROM ALL` query. +- There are no lifetime settings for privileges. -## User account {#user-account-management} +## User Account {#user-account-management} A user account is an access entity that allows to authorize someone in ClickHouse. A user account contains: -- Identification information. -- [Privileges](../sql-reference/statements/grant.md#grant-privileges) that define a scope of queries the user can execute. -- Hosts allowed to connect to the ClickHouse server. -- Assigned and default roles. -- Settings with their constraints applied by default at user login. -- Assigned settings profiles. +- Identification information. +- [Privileges](../sql-reference/statements/grant.md#grant-privileges) that define a scope of queries the user can execute. +- Hosts allowed to connect to the ClickHouse server. +- Assigned and default roles. +- Settings with their constraints applied by default at user login. +- Assigned settings profiles. Privileges can be granted to a user account by the [GRANT](../sql-reference/statements/grant.md) query or by assigning [roles](#role-management). To revoke privileges from a user, ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query. To list privileges for a user, use the [SHOW GRANTS](../sql-reference/statements/show.md#show-grants-statement) statement. Management queries: -- [CREATE USER](../sql-reference/statements/create.md#create-user-statement) -- [ALTER USER](../sql-reference/statements/alter.md#alter-user-statement) -- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement) -- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement) +- [CREATE USER](../sql-reference/statements/create.md#create-user-statement) +- [ALTER USER](../sql-reference/statements/alter.md#alter-user-statement) +- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement) +- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement) ### Settings Applying {#access-control-settings-applying} Settings can be configured differently: for a user account, in its granted roles and in settings profiles. At user login, if a setting is configured for different access entities, the value and constraints of this setting are applied as follows (from higher to lower priority): -1. User account settings. -2. The settings of default roles of the user account. If a setting is configured in some roles, then order of the setting application is undefined. -3. The settings from settings profiles assigned to a user or to its default roles. If a setting is configured in some profiles, then order of setting application is undefined. -4. Settings applied to all the server by default or from the [default profile](server-configuration-parameters/settings.md#default-profile). - +1. User account settings. +2. The settings of default roles of the user account. If a setting is configured in some roles, then order of the setting application is undefined. +3. The settings from settings profiles assigned to a user or to its default roles. If a setting is configured in some profiles, then order of setting application is undefined. +4. Settings applied to all the server by default or from the [default profile](../operations/server-configuration-parameters/settings.md#default-profile). ## Role {#role-management} @@ -80,18 +78,18 @@ Role is a container for access entities that can be granted to a user account. Role contains: -- [Privileges](../sql-reference/statements/grant.md#grant-privileges) -- Settings and constraints -- List of assigned roles +- [Privileges](../sql-reference/statements/grant.md#grant-privileges) +- Settings and constraints +- List of assigned roles Management queries: -- [CREATE ROLE](../sql-reference/statements/create.md#create-role-statement) -- [ALTER ROLE](../sql-reference/statements/alter.md#alter-role-statement) -- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement) -- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement) -- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement) -- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement) +- [CREATE ROLE](../sql-reference/statements/create.md#create-role-statement) +- [ALTER ROLE](../sql-reference/statements/alter.md#alter-role-statement) +- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement) +- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement) +- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement) +- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement) Privileges can be granted to a role by the [GRANT](../sql-reference/statements/grant.md) query. To revoke privileges from a role ClickHouse provides the [REVOKE](../sql-reference/statements/revoke.md) query. @@ -101,47 +99,43 @@ Row policy is a filter that defines which of the rows are available to a user or Management queries: -- [CREATE ROW POLICY](../sql-reference/statements/create.md#create-row-policy-statement) -- [ALTER ROW POLICY](../sql-reference/statements/alter.md#alter-row-policy-statement) -- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement) -- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement) - +- [CREATE ROW POLICY](../sql-reference/statements/create.md#create-row-policy-statement) +- [ALTER ROW POLICY](../sql-reference/statements/alter.md#alter-row-policy-statement) +- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement) +- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement) ## Settings Profile {#settings-profiles-management} -Settings profile is a collection of [settings](settings/index.md). Settings profile contains settings and constraints, as well as a list of roles and/or users to which this profile is applied. +Settings profile is a collection of [settings](../operations/settings/index.md). Settings profile contains settings and constraints, as well as a list of roles and/or users to which this profile is applied. Management queries: -- [CREATE SETTINGS PROFILE](../sql-reference/statements/create.md#create-settings-profile-statement) -- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter.md#alter-settings-profile-statement) -- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement) -- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement) - +- [CREATE SETTINGS PROFILE](../sql-reference/statements/create.md#create-settings-profile-statement) +- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter.md#alter-settings-profile-statement) +- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement) +- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement) ## Quota {#quotas-management} -Quota limits resource usage. See [Quotas](quotas.md). +Quota limits resource usage. See [Quotas](../operations/quotas.md). Quota contains a set of limits for some durations, as well as a list of roles and/or users which should use this quota. Management queries: -- [CREATE QUOTA](../sql-reference/statements/create.md#create-quota-statement) -- [ALTER QUOTA](../sql-reference/statements/alter.md#alter-quota-statement) -- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement) -- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement) - +- [CREATE QUOTA](../sql-reference/statements/create.md#create-quota-statement) +- [ALTER QUOTA](../sql-reference/statements/alter.md#alter-quota-statement) +- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement) +- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement) ## Enabling SQL-driven Access Control and Account Management {#enabling-access-control} -- Setup a directory for configurations storage. +- Setup a directory for configurations storage. - ClickHouse stores access entity configurations in the folder set in the [access_control_path](server-configuration-parameters/settings.md#access_control_path) server configuration parameter. + ClickHouse stores access entity configurations in the folder set in the [access\_control\_path](../operations/server-configuration-parameters/settings.md#access_control_path) server configuration parameter. -- Enable SQL-driven access control and account management for at least one user account. - - By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access_management](settings/settings-users.md#access_management-user-setting) setting to 1. +- Enable SQL-driven access control and account management for at least one user account. + By default, SQL-driven access control and account management is disabled for all users. You need to configure at least one user in the `users.xml` configuration file and set the value of the [access\_management](../operations/settings/settings-users.md#access_management-user-setting) setting to 1. [Original article](https://clickhouse.tech/docs/en/operations/access_rights/) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 72316284e3b..30c9fd158bf 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -24,7 +24,7 @@ Some local filesystems provide snapshot functionality (for example, [ZFS](https: ## clickhouse-copier {#clickhouse-copier} -[clickhouse-copier](utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters. +[clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters. For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well. diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index f574240ea39..091ed3fca10 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -9,7 +9,7 @@ ClickHouse supports multi-file configuration management. The main server configu All the configuration files should be in XML format. Also, they should have the same root element, usually ``. -## Override +## Override {#override} Some settings specified in the main configuration file can be overridden in other configuration files: @@ -18,13 +18,13 @@ Some settings specified in the main configuration file can be overridden in othe - If `replace` is specified, it replaces the entire element with the specified one. - If `remove` is specified, it deletes the element. -## Substitution +## Substitution {#substitution} -The config can also define "substitutions". If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)). +The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)). Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. -## User Settings +## User Settings {#user-settings} The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`. @@ -32,7 +32,7 @@ Users configuration can be splitted into separate files similar to `config.xml` Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`. Directory `users.d` is used by default, as `users_config` defaults to `users.xml`. -## Example +## Example {#example} For example, you can have separate config file for each user like this: @@ -55,7 +55,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ``` -## Implementation Details +## Implementation Details {#implementation-details} For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file. diff --git a/docs/en/operations/index.md b/docs/en/operations/index.md index 7c0518610fb..3364598ebc2 100644 --- a/docs/en/operations/index.md +++ b/docs/en/operations/index.md @@ -8,19 +8,19 @@ toc_title: Introduction ClickHouse operations manual consists of the following major sections: -- [Requirements](requirements.md) -- [Monitoring](monitoring.md) -- [Troubleshooting](troubleshooting.md) -- [Usage Recommendations](tips.md) -- [Update Procedure](update.md) -- [Access Rights](access-rights.md) -- [Data Backup](backup.md) -- [Configuration Files](configuration-files.md) -- [Quotas](quotas.md) -- [System Tables](system-tables.md) -- [Server Configuration Parameters](server-configuration-parameters/index.md) -- [How To Test Your Hardware With ClickHouse](performance-test.md) -- [Settings](settings/index.md) -- [Utilities](utilities/index.md) +- [Requirements](../operations/requirements.md) +- [Monitoring](../operations/monitoring.md) +- [Troubleshooting](../operations/troubleshooting.md) +- [Usage Recommendations](../operations/tips.md) +- [Update Procedure](../operations/update.md) +- [Access Rights](../operations/access-rights.md) +- [Data Backup](../operations/backup.md) +- [Configuration Files](../operations/configuration-files.md) +- [Quotas](../operations/quotas.md) +- [System Tables](../operations/system-tables/index.md) +- [Server Configuration Parameters](../operations/server-configuration-parameters/index.md) +- [How To Test Your Hardware With ClickHouse](../operations/performance-test.md) +- [Settings](../operations/settings/index.md) +- [Utilities](../operations/utilities/index.md) {## [Original article](https://clickhouse.tech/docs/en/operations/) ##} diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index 25e29197646..d0d16b526ba 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -26,19 +26,19 @@ It is highly recommended to set up monitoring for: ClickHouse server has embedded instruments for self-state monitoring. -To track server events use server logs. See the [logger](server-configuration-parameters/settings.md#server_configuration_parameters-logger) section of the configuration file. +To track server events use server logs. See the [logger](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-logger) section of the configuration file. ClickHouse collects: - Different metrics of how the server uses computational resources. - Common statistics on query processing. -You can find metrics in the [system.metrics](../operations/system-tables.md#system_tables-metrics), [system.events](../operations/system-tables.md#system_tables-events), and [system.asynchronous\_metrics](../operations/system-tables.md#system_tables-asynchronous_metrics) tables. +You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous\_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables. -You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html). +You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Graphite by following their official [guide](https://graphite.readthedocs.io/en/latest/install.html). -You can configure ClickHouse to export metrics to [Prometheus](https://prometheus.io). See the [Prometheus section](server-configuration-parameters/settings.md#server_configuration_parameters-prometheus) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Prometheus by following their official [guide](https://prometheus.io/docs/prometheus/latest/installation/). +You can configure ClickHouse to export metrics to [Prometheus](https://prometheus.io). See the [Prometheus section](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-prometheus) in the ClickHouse server configuration file. Before configuring export of metrics, you should set up Prometheus by following their official [guide](https://prometheus.io/docs/prometheus/latest/installation/). Additionally, you can monitor server availability through the HTTP API. Send the `HTTP GET` request to `/ping`. If the server is available, it responds with `200 OK`. -To monitor servers in a cluster configuration, you should set the [max\_replica\_delay\_for\_distributed\_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap. +To monitor servers in a cluster configuration, you should set the [max\_replica\_delay\_for\_distributed\_queries](../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) parameter and use the HTTP resource `/replicas_status`. A request to `/replicas_status` returns `200 OK` if the replica is available and is not delayed behind the other replicas. If a replica is delayed, it returns `503 HTTP_SERVICE_UNAVAILABLE` with information about the gap. diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 3e9365c281a..5a907fb3332 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -9,11 +9,11 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p To use profiler: -- Setup the [trace\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration. +- Setup the [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration. - This section configures the [trace\_log](../../operations/system-tables.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesn’t clean up the table and all the stored virtual memory address may become invalid. + This section configures the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesn’t clean up the table and all the stored virtual memory address may become invalid. -- Setup the [query\_profiler\_cpu\_time\_period\_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. +- Setup the [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query. @@ -23,7 +23,7 @@ To analyze the `trace_log` system table: - Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages). -- Allow introspection functions by the [allow\_introspection\_functions](../settings/settings.md#settings-allow_introspection_functions) setting. +- Allow introspection functions by the [allow\_introspection\_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting. For security reasons, introspection functions are disabled by default. diff --git a/docs/en/operations/performance-test.md b/docs/en/operations/performance-test.md index a3beccdaab5..9a637b68756 100644 --- a/docs/en/operations/performance-test.md +++ b/docs/en/operations/performance-test.md @@ -24,7 +24,7 @@ With this instruction you can run basic ClickHouse performance test on any serve # Then do: chmod a+x clickhouse -5. Download configs: +1. Download configs: @@ -34,7 +34,7 @@ With this instruction you can run basic ClickHouse performance test on any serve wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml -6. Download benchmark files: +1. Download benchmark files: @@ -42,7 +42,7 @@ With this instruction you can run basic ClickHouse performance test on any serve chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql -7. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows). +1. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows). @@ -50,31 +50,31 @@ With this instruction you can run basic ClickHouse performance test on any serve tar xvf hits_100m_obfuscated_v1.tar.xz -C . mv hits_100m_obfuscated_v1/* . -8. Run the server: +1. Run the server: ./clickhouse server -9. Check the data: ssh to the server in another terminal +1. Check the data: ssh to the server in another terminal ./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated" 100000000 -10. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `–-max_memory_usage 100000000000` parameter. +1. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `–-max_memory_usage 100000000000` parameter. mcedit benchmark-new.sh -11. Run the benchmark: +1. Run the benchmark: ./benchmark-new.sh hits_100m_obfuscated -12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com +1. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com All the results are published here: https://clickhouse.tech/benchmark/hardware/ diff --git a/docs/en/operations/server-configuration-parameters/index.md b/docs/en/operations/server-configuration-parameters/index.md index 0a4d3ac613d..02bbe7362cf 100644 --- a/docs/en/operations/server-configuration-parameters/index.md +++ b/docs/en/operations/server-configuration-parameters/index.md @@ -10,8 +10,8 @@ This section contains descriptions of server settings that cannot be changed at These settings are stored in the `config.xml` file on the ClickHouse server. -Other settings are described in the “[Settings](../settings/index.md#session-settings-intro)” section. +Other settings are described in the “[Settings](../../operations/settings/index.md#session-settings-intro)” section. -Before studying the settings, read the [Configuration files](../configuration-files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes). +Before studying the settings, read the [Configuration files](../../operations/configuration-files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes). [Original article](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index b90b432da6c..e54208c89a8 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -145,10 +145,10 @@ Settings: - interval – The interval for sending, in seconds. - timeout – The timeout for sending data, in seconds. - root\_path – Prefix for keys. -- metrics – Sending data from the [system.metrics](../../operations/system-tables.md#system_tables-metrics) table. -- events – Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables.md#system_tables-events) table. -- events\_cumulative – Sending cumulative data from the [system.events](../../operations/system-tables.md#system_tables-events) table. -- asynchronous\_metrics – Sending data from the [system.asynchronous\_metrics](../../operations/system-tables.md#system_tables-asynchronous_metrics) table. +- metrics – Sending data from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. +- events – Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. +- events\_cumulative – Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. +- asynchronous\_metrics – Sending data from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. You can configure multiple `` clauses. For instance, you can use this for sending different data at different intervals. @@ -229,7 +229,7 @@ Opens `https://tabix.io/` when accessing `http://localhost: http_port`. The path to the file with substitutions. -For more information, see the section “[Configuration files](../configuration-files.md#configuration_files)”. +For more information, see the section “[Configuration files](../../operations/configuration-files.md#configuration_files)”. **Example** @@ -503,7 +503,7 @@ Keys for server/client settings: Logging events that are associated with [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process. -Queries are logged in the [system.part\_log](../../operations/system-tables.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below). +Queries are logged in the [system.part\_log](../../operations/system-tables/part_log.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below). Use the following parameters to configure logging: @@ -544,9 +544,9 @@ Settings: - `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’. - `port` – Port for `endpoint`. -- `metrics` – Flag that sets to expose metrics from the [system.metrics](../system-tables.md#system_tables-metrics) table. -- `events` – Flag that sets to expose metrics from the [system.events](../system-tables.md#system_tables-events) table. -- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous\_metrics](../system-tables.md#system_tables-asynchronous_metrics) table. +- `metrics` – Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. +- `events` – Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. +- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. **Example** @@ -562,9 +562,9 @@ Settings: ## query\_log {#server_configuration_parameters-query-log} -Setting for logging queries received with the [log\_queries=1](../settings/settings.md) setting. +Setting for logging queries received with the [log\_queries=1](../../operations/settings/settings.md) setting. -Queries are logged in the [system.query\_log](../../operations/system-tables.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). +Queries are logged in the [system.query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). Use the following parameters to configure logging: @@ -588,9 +588,9 @@ If the table doesn’t exist, ClickHouse will create it. If the structure of the ## query\_thread\_log {#server_configuration_parameters-query_thread_log} -Setting for logging threads of queries received with the [log\_query\_threads=1](../settings/settings.md#settings-log-query-threads) setting. +Setting for logging threads of queries received with the [log\_query\_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting. -Queries are logged in the [system.query\_thread\_log](../../operations/system-tables.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). +Queries are logged in the [system.query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). Use the following parameters to configure logging: @@ -614,7 +614,7 @@ If the table doesn’t exist, ClickHouse will create it. If the structure of the ## trace\_log {#server_configuration_parameters-trace_log} -Settings for the [trace\_log](../../operations/system-tables.md#system_tables-trace_log) system table operation. +Settings for the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. Parameters: @@ -675,11 +675,11 @@ Configuration of clusters used by the [Distributed](../../engines/table-engines/ ``` -For the value of the `incl` attribute, see the section “[Configuration files](../configuration-files.md#configuration_files)”. +For the value of the `incl` attribute, see the section “[Configuration files](../../operations/configuration-files.md#configuration_files)”. **See Also** -- [skip\_unavailable\_shards](../settings/settings.md#settings-skip_unavailable_shards) +- [skip\_unavailable\_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) ## timezone {#server_configuration_parameters-timezone} @@ -705,7 +705,7 @@ Port for communicating with clients over the TCP protocol. 9000 ``` -## tcp_port_secure {#server_configuration_parameters-tcp_port_secure} +## tcp\_port\_secure {#server_configuration_parameters-tcp_port_secure} TCP port for secure communication with clients. Use it with [OpenSSL](#server_configuration_parameters-openssl) settings. @@ -733,7 +733,7 @@ Example 9004 ``` -## tmp_path {#tmp-path} +## tmp\_path {#tmp-path} Path to temporary data for processing large queries. @@ -746,23 +746,23 @@ Path to temporary data for processing large queries. /var/lib/clickhouse/tmp/ ``` -## tmp_policy {#tmp-policy} +## tmp\_policy {#tmp-policy} -Policy from [storage_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files. +Policy from [storage\_configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files. -If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored. +If not set, [tmp\_path](#tmp-path) is used, otherwise it is ignored. !!! note "Note" - `move_factor` is ignored. - - `keep_free_space_bytes` is ignored. - - `max_data_part_size_bytes` is ignored. - - Уou must have exactly one volume in that policy. +- `keep_free_space_bytes` is ignored. +- `max_data_part_size_bytes` is ignored. +- Уou must have exactly one volume in that policy. ## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size} Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). -There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use\_uncompressed\_cache](../settings/settings.md#setting-use_uncompressed_cache) is enabled. +There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use\_uncompressed\_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) is enabled. The uncompressed cache is advantageous for very short queries in individual cases. @@ -894,9 +894,9 @@ The update is performed asynchronously, in a separate system thread. **See also** -- [background_schedule_pool_size](../settings/settings.md#background_schedule_pool_size) +- [background\_schedule\_pool\_size](../../operations/settings/settings.md#background_schedule_pool_size) -## access_control_path {#access_control_path} +## access\_control\_path {#access_control_path} Path to a folder where a ClickHouse server stores user and role configurations created by SQL commands. @@ -904,6 +904,6 @@ Default value: `/var/lib/clickhouse/access/`. **See also** -- [Access Control and Account Management](../access-rights.md#access-control) +- [Access Control and Account Management](../../operations/access-rights.md#access-control) [Original article](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/settings/) diff --git a/docs/en/operations/settings/permissions-for-queries.md b/docs/en/operations/settings/permissions-for-queries.md index 1f697989f5d..b94301bf103 100644 --- a/docs/en/operations/settings/permissions-for-queries.md +++ b/docs/en/operations/settings/permissions-for-queries.md @@ -37,7 +37,7 @@ After setting `readonly = 1`, the user can’t change `readonly` and `allow_ddl` When using the `GET` method in the [HTTP interface](../../interfaces/http.md), `readonly = 1` is set automatically. To modify data, use the `POST` method. Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user -from changing only specific settings, for details see [constraints on settings](constraints-on-settings.md). +from changing only specific settings, for details see [constraints on settings](../../operations/settings/constraints-on-settings.md). Default value: 0 diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 83b6054d642..812056785da 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -113,7 +113,7 @@ Limit on the number of bytes in the result. The same as the previous setting. What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’. By default, throw. -Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max\_result\_rows](#setting-max_result_rows), multiple of [max\_block\_size](settings.md#setting-max_block_size) and depends on [max_threads](settings.md#settings-max_threads). +Using ‘break’ is similar to using LIMIT. `Break` interrupts execution only at the block level. This means that amount of returned rows is greater than [max\_result\_rows](#setting-max_result_rows), multiple of [max\_block\_size](../../operations/settings/settings.md#setting-max_block_size) and depends on [max\_threads](../../operations/settings/settings.md#settings-max_threads). Example: diff --git a/docs/en/operations/settings/settings-profiles.md b/docs/en/operations/settings/settings-profiles.md index 3e5d1a02cbd..c7a01466462 100644 --- a/docs/en/operations/settings/settings-profiles.md +++ b/docs/en/operations/settings/settings-profiles.md @@ -8,8 +8,7 @@ toc_title: Settings Profiles A settings profile is a collection of settings grouped under the same name. !!! note "Information" - ClickHouse also supports [SQL-driven workflow](../access-rights.md#access-control) for managing settings profiles. We recommend using it. - + ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing settings profiles. We recommend using it. The profile can have any name. You can specify the same profile for different users. The most important thing you can write in the settings profile is `readonly=1`, which ensures read-only access. @@ -71,9 +70,9 @@ Example: ``` -The example specifies two profiles: `default` and `web`. +The example specifies two profiles: `default` and `web`. -The `default` profile has a special purpose: it must always be present and is applied when starting the server. In other words, the `default` profile contains default settings. +The `default` profile has a special purpose: it must always be present and is applied when starting the server. In other words, the `default` profile contains default settings. The `web` profile is a regular profile that can be set using the `SET` query or using a URL parameter in an HTTP query. diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 3c104202801..7d6d39e6d88 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -8,8 +8,7 @@ toc_title: User Settings The `users` section of the `user.xml` configuration file contains user settings. !!! note "Information" - ClickHouse also supports [SQL-driven workflow](../access-rights.md#access-control) for managing users. We recommend using it. - + ClickHouse also supports [SQL-driven workflow](../../operations/access-rights.md#access-control) for managing users. We recommend using it. Structure of the `users` section: @@ -74,14 +73,14 @@ Password can be specified in plaintext or in SHA256 (hex format). The first line of the result is the password. The second line is the corresponding double SHA1 hash. -### access_management {#access_management-user-setting} +### access\_management {#access_management-user-setting} -This setting enables or disables using of SQL-driven [access control and account management](../access-rights.md#access-control) for the user. +This setting enables or disables using of SQL-driven [access control and account management](../../operations/access-rights.md#access-control) for the user. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -129,14 +128,14 @@ To open access only from localhost, specify: ### user\_name/profile {#user-nameprofile} -You can assign a settings profile for the user. Settings profiles are configured in a separate section of the `users.xml` file. For more information, see [Profiles of Settings](settings-profiles.md). +You can assign a settings profile for the user. Settings profiles are configured in a separate section of the `users.xml` file. For more information, see [Profiles of Settings](../../operations/settings/settings-profiles.md). ### user\_name/quota {#user-namequota} Quotas allow you to track or limit resource usage over a period of time. Quotas are configured in the `quotas` section of the `users.xml` configuration file. -You can assign a quotas set for the user. For a detailed description of quotas configuration, see [Quotas](../quotas.md#quotas). +You can assign a quotas set for the user. For a detailed description of quotas configuration, see [Quotas](../../operations/quotas.md#quotas). ### user\_name/databases {#user-namedatabases} diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f29866d4980..10dbed3cddb 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -404,11 +404,11 @@ Possible values: Default value: 0. -## partial_merge_join_optimizations {#partial_merge_join_optimizations} +## partial\_merge\_join\_optimizations {#partial_merge_join_optimizations} Disables optimizations in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries. -By default, this setting enables improvements that could lead to wrong results. If you see suspicious results in your queries, disable optimizations by this setting. Optimizations can be different in different versions of the ClickHouse server. +By default, this setting enables improvements that could lead to wrong results. If you see suspicious results in your queries, disable optimizations by this setting. Optimizations can be different in different versions of the ClickHouse server. Possible values: @@ -417,35 +417,35 @@ Possible values: Default value: 1. -## partial_merge_join_rows_in_right_blocks {#partial_merge_join_rows_in_right_blocks} +## partial\_merge\_join\_rows\_in\_right\_blocks {#partial_merge_join_rows_in_right_blocks} Limits sizes of right-hand join data blocks in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries. ClickHouse server: -1. Splits right-hand join data into blocks with up to the specified number of rows. -2. Indexes each block with their minimum and maximum values -3. Unloads prepared blocks to disk if possible. +1. Splits right-hand join data into blocks with up to the specified number of rows. +2. Indexes each block with their minimum and maximum values +3. Unloads prepared blocks to disk if possible. Possible values: -- Any positive integer. Recommended range of values: [1000, 100000]. +- Any positive integer. Recommended range of values: \[1000, 100000\]. Default value: 65536. -## join_on_disk_max_files_to_merge {#join_on_disk_max_files_to_merge} +## join\_on\_disk\_max\_files\_to\_merge {#join_on_disk_max_files_to_merge} -Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk. +Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk. The bigger the value of the setting, the more RAM used and the less disk I/O needed. Possible values: -- Any positive integer, starting from 2. +- Any positive integer, starting from 2. Default value: 64. -## any_join_distinct_right_table_keys {#any_join_distinct_right_table_keys} +## any\_join\_distinct\_right\_table\_keys {#any_join_distinct_right_table_keys} Enables legacy ClickHouse server behavior in `ANY INNER|LEFT JOIN` operations. @@ -454,19 +454,18 @@ Enables legacy ClickHouse server behavior in `ANY INNER|LEFT JOIN` operations. When the legacy behavior enabled: -- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping. -- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do. +- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping. +- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do. When the legacy behavior disabled: -- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations. -- Results of `ANY INNER JOIN` operations contain one row per key from both left and right tables. +- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations. +- Results of `ANY INNER JOIN` operations contain one row per key from both left and right tables. Possible values: -- 0 — Legacy behavior is disabled. -- 1 — Legacy behavior is enabled. - +- 0 — Legacy behavior is disabled. +- 1 — Legacy behavior is enabled. Default value: 0. @@ -474,19 +473,17 @@ See also: - [JOIN strictness](../../sql-reference/statements/select/join.md#select-join-strictness) - -## temporary_files_codec {#temporary_files_codec} +## temporary\_files\_codec {#temporary_files_codec} Sets compression codec for temporary files used in sorting and joining operations on disk. -Possible values: +Possible values: -- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied. -- NONE — No compression is applied. +- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied. +- NONE — No compression is applied. Default value: LZ4. - ## max\_block\_size {#setting-max_block_size} In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of the block (in a count of rows) to load from tables. The block size shouldn’t be too small, so that the expenditures on each block are still noticeable, but not too large so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality. @@ -555,7 +552,7 @@ Default value: 8. If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesn’t use the cache of uncompressed blocks. -The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks. +The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks. Possible values: @@ -567,7 +564,7 @@ Default value: 128 ✕ 8192. If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it doesn’t use the cache of uncompressed blocks. -The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks. +The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks. Possible value: @@ -592,7 +589,7 @@ Default value: 0. Setting up query logging. -Queries sent to ClickHouse with this setup are logged according to the rules in the [query\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server configuration parameter. +Queries sent to ClickHouse with this setup are logged according to the rules in the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server configuration parameter. Example: @@ -622,7 +619,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' Setting up query threads logging. -Queries’ threads runned by ClickHouse with this setup are logged according to the rules in the [query\_thread\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter. +Queries’ threads runned by ClickHouse with this setup are logged according to the rules in the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter. Example: @@ -789,7 +786,7 @@ For more information, see the section “Extreme values”. ## use\_uncompressed\_cache {#setting-use_uncompressed_cache} Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). -Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed\_cache\_size](../server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted. +Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted. For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically to save space for truly small queries. This means that you can keep the ‘use\_uncompressed\_cache’ setting always set to 1. @@ -822,6 +819,11 @@ ClickHouse supports the following algorithms of choosing replicas: - [Nearest hostname](#load_balancing-nearest_hostname) - [In order](#load_balancing-in_order) - [First or random](#load_balancing-first_or_random) +- [Round robin](#load_balancing-round_robin) + +See also: + +- [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors) ### Random (by Default) {#load_balancing-random} @@ -865,6 +867,14 @@ This algorithm chooses the first replica in the set or a random replica if the f The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, the load is evenly distributed among replicas that are still available. +### Round Robin {#load_balancing-round_robin} + +``` sql +load_balancing = round_robin +``` + +This algorithm uses round robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted). + ## prefer\_localhost\_replica {#settings-prefer-localhost-replica} Enables/disables preferable using the localhost replica when processing distributed queries. @@ -955,10 +965,10 @@ ClickHouse generates an exception See also: -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert\_quorum\_timeout](#settings-insert_quorum_timeout) +- [select\_sequential\_consistency](#settings-select_sequential_consistency) -## insert_quorum_timeout {#settings-insert_quorum_timeout} +## insert\_quorum\_timeout {#settings-insert_quorum_timeout} Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. @@ -986,8 +996,8 @@ When sequential consistency is enabled, ClickHouse allows the client to execute See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_timeout](#settings-insert_quorum_timeout) +- [insert\_quorum](#settings-insert_quorum) +- [insert\_quorum\_timeout](#settings-insert_quorum_timeout) ## insert\_deduplicate {#settings-insert-deduplicate} @@ -1002,7 +1012,6 @@ Default value: 1. By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)). - ## deduplicate\_blocks\_in\_dependent\_materialized\_views {#settings-deduplicate-blocks-in-dependent-materialized-views} Enables or disables the deduplication check for materialized views that receive data from Replicated\* tables. @@ -1067,15 +1076,15 @@ Default value: 0. ## count\_distinct\_implementation {#settings-count_distinct_implementation} -Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference.md#agg_function-count) construction. +Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) construction. Possible values: -- [uniq](../../sql-reference/aggregate-functions/reference.md#agg_function-uniq) -- [uniqCombined](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqcombined) -- [uniqCombined64](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqcombined64) -- [uniqHLL12](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqhll12) -- [uniqExact](../../sql-reference/aggregate-functions/reference.md#agg_function-uniqexact) +- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) +- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) Default value: `uniqExact`. @@ -1109,24 +1118,24 @@ Possible values: Default value: 0. -## optimize_skip_unused_shards {#optimize-skip-unused-shards} +## optimize\_skip\_unused\_shards {#optimize-skip-unused-shards} Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise does nothing). Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0 -## force_optimize_skip_unused_shards {#force-optimize-skip-unused-shards} +## force\_optimize\_skip\_unused\_shards {#force-optimize-skip-unused-shards} -Enables or disables query execution if [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown. +Enables or disables query execution if [optimize\_skip\_unused\_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown. Possible values: -- 0 — Disabled. ClickHouse doesn't throw an exception. +- 0 — Disabled. ClickHouse doesn’t throw an exception. - 1 — Enabled. Query execution is disabled only if the table has a sharding key. - 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table. @@ -1165,8 +1174,10 @@ Controls how fast errors in distributed tables are zeroed. If a replica is unava See also: +- [load\_balancing](#load_balancing-round_robin) - [Table engine Distributed](../../engines/table-engines/special/distributed.md) - [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap) +- [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors) ## distributed\_replica\_error\_cap {#settings-distributed_replica_error_cap} @@ -1177,8 +1188,24 @@ Error count of each replica is capped at this value, preventing a single replica See also: +- [load\_balancing](#load_balancing-round_robin) - [Table engine Distributed](../../engines/table-engines/special/distributed.md) - [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life) +- [distributed\_replica\_max\_ignored\_errors](#settings-distributed_replica_max_ignored_errors) + +## distributed\_replica\_max\_ignored\_errors {#settings-distributed_replica_max_ignored_errors} + +- Type: unsigned int +- Default value: 0 + +Number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm). + +See also: + +- [load\_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap) +- [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life) ## distributed\_directory\_monitor\_sleep\_time\_ms {#distributed_directory_monitor_sleep_time_ms} @@ -1249,7 +1276,7 @@ Default value: 1000000000 nanoseconds (once a second). See also: -- System table [trace\_log](../../operations/system-tables.md#system_tables-trace_log) +- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) ## query\_profiler\_cpu\_time\_period\_ns {#query_profiler_cpu_time_period_ns} @@ -1272,7 +1299,7 @@ Default value: 1000000000 nanoseconds. See also: -- System table [trace\_log](../../operations/system-tables.md#system_tables-trace_log) +- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) ## allow\_introspection\_functions {#settings-allow_introspection_functions} @@ -1287,8 +1314,8 @@ Default value: 0. **See Also** -- [Sampling Query Profiler](../optimizing-performance/sampling-query-profiler.md) -- System table [trace\_log](../../operations/system-tables.md#system_tables-trace_log) +- [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md) +- System table [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) ## input\_format\_parallel\_parsing {#input-format-parallel-parsing} @@ -1336,7 +1363,7 @@ Type: URL Default value: Empty -## background_pool_size {#background_pool_size} +## background\_pool\_size {#background_pool_size} Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from `default` profile at ClickHouse server start and can’t be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance. @@ -1348,9 +1375,9 @@ Possible values: Default value: 16. -## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} +## background\_buffer\_flush\_schedule\_pool\_size {#background_buffer_flush_schedule_pool_size} -Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and can't be changed in a user session. +Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and can’t be changed in a user session. Possible values: @@ -1358,7 +1385,7 @@ Possible values: Default value: 16. -## background_move_pool_size {#background_move_pool_size} +## background\_move\_pool\_size {#background_move_pool_size} Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at ClickHouse server start and can’t be changed in a user session. @@ -1368,9 +1395,9 @@ Possible values: Default value: 8. -## background_schedule_pool_size {#background_schedule_pool_size} +## background\_schedule\_pool\_size {#background_schedule_pool_size} -Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and can’t be changed in a user session. +Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../../operations/server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and can’t be changed in a user session. Possible values: @@ -1378,7 +1405,7 @@ Possible values: Default value: 16. -## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size} +## background\_distributed\_schedule\_pool\_size {#background_distributed_schedule_pool_size} Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at ClickHouse server start and can’t be changed in a user session. @@ -1388,9 +1415,68 @@ Possible values: Default value: 16. -## low_cardinality_max_dictionary_size {#low_cardinality_max_dictionary_size} +## transform_null_in {#transform_null_in} -Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that can't be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method. +Enables equality of [NULL](../../sql-reference/syntax.md#null-literal) values for [IN](../../sql-reference/operators/in.md) operator. + +By default, `NULL` values can't be compared because `NULL` means undefined value. Thus, comparison `expr = NULL` must always return `false`. With this setting `NULL = NULL` returns `true` for `IN` operator. + +Possible values: + +- 0 — Comparison of `NULL` values in `IN` operator returns `false`. +- 1 — Comparison of `NULL` values in `IN` operator returns `true`. + +Default value: 0. + +**Example** + +Consider the `null_in` table: + +```text +┌──idx─┬─────i─┐ +│ 1 │ 1 │ +│ 2 │ NULL │ +│ 3 │ 3 │ +└──────┴───────┘ +``` + +Query: + +```sql +SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 0; +``` + +Result: + +```text +┌──idx─┬────i─┐ +│ 1 │ 1 │ +└──────┴──────┘ +``` + +Query: + +```sql +SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; +``` + +Result: + +```text +┌──idx─┬─────i─┐ +│ 1 │ 1 │ +│ 2 │ NULL │ +└──────┴───────┘ +``` + +**See Also** + +- [NULL Processing in IN Operators](../../sql-reference/operators/in.md#in-null-processing) + + +## low\_cardinality\_max\_dictionary\_size {#low_cardinality_max_dictionary_size} + +Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that can’t be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method. Possible values: @@ -1398,7 +1484,7 @@ Possible values: Default value: 8192. -## low_cardinality_use_single_dictionary_for_part {#low_cardinality_use_single_dictionary_for_part} +## low\_cardinality\_use\_single\_dictionary\_for\_part {#low_cardinality_use_single_dictionary_for_part} Turns on or turns off using of single dictionary for the data part. @@ -1406,44 +1492,73 @@ By default, ClickHouse server monitors the size of dictionaries and if a diction Possible values: -- 1 — Creating several dictionaries for the data part is prohibited. -- 0 — Creating several dictionaries for the data part is not prohibited. +- 1 — Creating several dictionaries for the data part is prohibited. +- 0 — Creating several dictionaries for the data part is not prohibited. Default value: 0. -## low_cardinality_allow_in_native_format {#low_cardinality_allow_in_native_format} +## low\_cardinality\_allow\_in\_native\_format {#low_cardinality_allow_in_native_format} Allows or restricts using the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type with the [Native](../../interfaces/formats.md#native) format. If usage of `LowCardinality` is restricted, ClickHouse server converts `LowCardinality`-columns to ordinary ones for `SELECT` queries, and convert ordinary columns to `LowCardinality`-columns for `INSERT` queries. -This setting is required mainly for third-party clients which don't support `LowCardinality` data type. +This setting is required mainly for third-party clients which don’t support `LowCardinality` data type. Possible values: -- 1 — Usage of `LowCardinality` is not restricted. -- 0 — Usage of `LowCardinality` is restricted. +- 1 — Usage of `LowCardinality` is not restricted. +- 0 — Usage of `LowCardinality` is restricted. Default value: 1. - -## allow_suspicious_low_cardinality_types {#allow_suspicious_low_cardinality_types} +## allow\_suspicious\_low\_cardinality\_types {#allow_suspicious_low_cardinality_types} Allows or restricts using [LowCardinality](../../sql-reference/data-types/lowcardinality.md) with data types with fixed size of 8 bytes or less: numeric data types and `FixedString(8_bytes_or_less)`. For small fixed values using of `LowCardinality` is usually inefficient, because ClickHouse stores a numeric index for each row. As a result: -- Disk space usage can rise. -- RAM consumption can be higher, depending on a dictionary size. -- Some functions can work slower due to extra coding/encoding operations. +- Disk space usage can rise. +- RAM consumption can be higher, depending on a dictionary size. +- Some functions can work slower due to extra coding/encoding operations. Merge times in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables can grow due to all the reasons described above. Possible values: -- 1 — Usage of `LowCardinality` is not restricted. -- 0 — Usage of `LowCardinality` is restricted. +- 1 — Usage of `LowCardinality` is not restricted. +- 0 — Usage of `LowCardinality` is restricted. Default value: 0. +## min_insert_block_size_rows_for_materialized_views {#min-insert-block-size-rows-for-materialized-views} + +Sets minimum number of rows in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create.md#create-view). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. + +Possible values: + +- Any positive integer. +- 0 — Squashing disabled. + +Default value: 1048576. + +**See Also** + +- [min_insert_block_size_rows](#min-insert-block-size-rows) + +## min_insert_block_size_bytes_for_materialized_views {#min-insert-block-size-bytes-for-materialized-views} + +Sets minimum number of bytes in block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create.md#create-view). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage. + +Possible values: + +- Any positive integer. +- 0 — Squashing disabled. + +Default value: 268435456. + +**See also** + +- [min_insert_block_size_bytes](#min-insert-block-size-bytes) + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/en/operations/system-tables.md b/docs/en/operations/system-tables.md deleted file mode 100644 index 28f448b632c..00000000000 --- a/docs/en/operations/system-tables.md +++ /dev/null @@ -1,1342 +0,0 @@ ---- -toc_priority: 52 -toc_title: System Tables ---- - -# System Tables {#system-tables} - -## Introduction {#system-tables-introduction} - -System tables provide information about: - -- Server states, processes, and environment. -- Server's internal processes. - -System tables: - -- Located in the `system` database. -- Available only for reading data. -- Can't be dropped or altered, but can be detached. - -Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. - -Unlike other system tables, the system tables [metric_log](#system_tables-metric_log), [query_log](#system_tables-query_log), [query_thread_log](#system_tables-query_thread_log), [trace_log](#system_tables-trace_log) are served by [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. - -By default, table growth is unlimited. To control a size of a table, you can use [TTL](../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables. - - -### Sources of System Metrics {#system-tables-sources-of-system-metrics} - -For collecting system metrics ClickHouse server uses: - -- `CAP_NET_ADMIN` capability. -- [procfs](https://en.wikipedia.org/wiki/Procfs) (only in Linux). - -**procfs** - -If ClickHouse server doesn't have `CAP_NET_ADMIN` capability, it tries to fall back to `ProcfsMetricsProvider`. `ProcfsMetricsProvider` allows collecting per-query system metrics (for CPU and I/O). - -If procfs is supported and enabled on the system, ClickHouse server collects these metrics: - -- `OSCPUVirtualTimeMicroseconds` -- `OSCPUWaitMicroseconds` -- `OSIOWaitMicroseconds` -- `OSReadChars` -- `OSWriteChars` -- `OSReadBytes` -- `OSWriteBytes` - -## system.asynchronous\_metrics {#system_tables-asynchronous_metrics} - -Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use. - -Columns: - -- `metric` ([String](../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../sql-reference/data-types/float.md)) — Metric value. - -**Example** - -``` sql -SELECT * FROM system.asynchronous_metrics LIMIT 10 -``` - -``` text -┌─metric──────────────────────────────────┬──────value─┐ -│ jemalloc.background_thread.run_interval │ 0 │ -│ jemalloc.background_thread.num_runs │ 0 │ -│ jemalloc.background_thread.num_threads │ 0 │ -│ jemalloc.retained │ 422551552 │ -│ jemalloc.mapped │ 1682989056 │ -│ jemalloc.resident │ 1656446976 │ -│ jemalloc.metadata_thp │ 0 │ -│ jemalloc.metadata │ 10226856 │ -│ UncompressedCacheCells │ 0 │ -│ MarkCacheFiles │ 0 │ -└─────────────────────────────────────────┴────────────┘ -``` - -**See Also** - -- [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. -- [system.metrics](#system_tables-metrics) — Contains instantly calculated metrics. -- [system.events](#system_tables-events) — Contains a number of events that have occurred. -- [system.metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - -## system.asynchronous_metric_log {#system-tables-async-log} - -Contains the historical values for `system.asynchronous_log` (see [system.asynchronous_metrics](#system_tables-asynchronous_metrics)) - -## system.clusters {#system-clusters} - -Contains information about clusters available in the config file and the servers in them. - -Columns: - -- `cluster` (String) — The cluster name. -- `shard_num` (UInt32) — The shard number in the cluster, starting from 1. -- `shard_weight` (UInt32) — The relative weight of the shard when writing data. -- `replica_num` (UInt32) — The replica number in the shard, starting from 1. -- `host_name` (String) — The host name, as specified in the config. -- `host_address` (String) — The host IP address obtained from DNS. -- `port` (UInt16) — The port to use for connecting to the server. -- `user` (String) — The name of the user for connecting to the server. -- `errors_count` (UInt32) - number of times this host failed to reach replica. -- `estimated_recovery_time` (UInt32) - seconds left until replica error count is zeroed and it is considered to be back to normal. - -Please note that `errors_count` is updated once per query to the cluster, but `estimated_recovery_time` is recalculated on-demand. So there could be a case of non-zero `errors_count` and zero `estimated_recovery_time`, that next query will zero `errors_count` and try to use replica as if it has no errors. - -**See also** - -- [Table engine Distributed](../engines/table-engines/special/distributed.md) -- [distributed\_replica\_error\_cap setting](settings/settings.md#settings-distributed_replica_error_cap) -- [distributed\_replica\_error\_half\_life setting](settings/settings.md#settings-distributed_replica_error_half_life) - -## system.columns {#system-columns} - -Contains information about columns in all the tables. - -You can use this table to get information similar to the [DESCRIBE TABLE](../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once. - -The `system.columns` table contains the following columns (the column type is shown in brackets): - -- `database` (String) — Database name. -- `table` (String) — Table name. -- `name` (String) — Column name. -- `type` (String) — Column type. -- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. -- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. -- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. -- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. -- `marks_bytes` (UInt64) — The size of marks, in bytes. -- `comment` (String) — Comment on the column, or an empty string if it is not defined. -- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression. -- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. -- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. -- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. - -## system.contributors {#system-contributors} - -Contains information about contributors. All constributors in random order. The order is random at query execution time. - -Columns: - -- `name` (String) — Contributor (author) name from git log. - -**Example** - -``` sql -SELECT * FROM system.contributors LIMIT 10 -``` - -``` text -┌─name─────────────┐ -│ Olga Khvostikova │ -│ Max Vetrov │ -│ LiuYangkuan │ -│ svladykin │ -│ zamulla │ -│ Šimon Podlipský │ -│ BayoNet │ -│ Ilya Khomutov │ -│ Amy Krishnevsky │ -│ Loud_Scream │ -└──────────────────┘ -``` - -To find out yourself in the table, use a query: - -``` sql -SELECT * FROM system.contributors WHERE name='Olga Khvostikova' -``` - -``` text -┌─name─────────────┐ -│ Olga Khvostikova │ -└──────────────────┘ -``` - -## system.databases {#system-databases} - -This table contains a single String column called ‘name’ – the name of a database. -Each database that the server knows about has a corresponding entry in the table. -This system table is used for implementing the `SHOW DATABASES` query. - -## system.data_type_families {#system_tables-data_type_families} - -Contains information about supported [data types](../sql-reference/data-types/). - -Columns: - -- `name` ([String](../sql-reference/data-types/string.md)) — Data type name. -- `case_insensitive` ([UInt8](../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid. -- `alias_to` ([String](../sql-reference/data-types/string.md)) — Data type name for which `name` is an alias. - -**Example** - -``` sql -SELECT * FROM system.data_type_families WHERE alias_to = 'String' -``` - -``` text -┌─name───────┬─case_insensitive─┬─alias_to─┐ -│ LONGBLOB │ 1 │ String │ -│ LONGTEXT │ 1 │ String │ -│ TINYTEXT │ 1 │ String │ -│ TEXT │ 1 │ String │ -│ VARCHAR │ 1 │ String │ -│ MEDIUMBLOB │ 1 │ String │ -│ BLOB │ 1 │ String │ -│ TINYBLOB │ 1 │ String │ -│ CHAR │ 1 │ String │ -│ MEDIUMTEXT │ 1 │ String │ -└────────────┴──────────────────┴──────────┘ -``` - -**See Also** - -- [Syntax](../sql-reference/syntax.md) — Information about supported syntax. - -## system.detached\_parts {#system_tables-detached_parts} - -Contains information about detached parts of [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) tables. The `reason` column specifies why the part was detached. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../sql-reference/statements/alter.md#alter_attach-partition) command. For the description of other columns, see [system.parts](#system_tables-parts). If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../sql-reference/statements/alter.md#alter_drop-detached). - -## system.dictionaries {#system_tables-dictionaries} - -Contains information about [external dictionaries](../sql-reference/dictionaries/external-dictionaries/external-dicts.md). - -Columns: - -- `database` ([String](../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. -- `name` ([String](../sql-reference/data-types/string.md)) — [Dictionary name](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md). -- `status` ([Enum8](../sql-reference/data-types/enum.md)) — Dictionary status. Possible values: - - `NOT_LOADED` — Dictionary was not loaded because it was not used. - - `LOADED` — Dictionary loaded successfully. - - `FAILED` — Unable to load the dictionary as a result of an error. - - `LOADING` — Dictionary is loading now. - - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). - - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. -- `origin` ([String](../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. -- `type` ([String](../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). -- `key` — [Key type](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key): Numeric Key ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([String](../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”. -- `attribute.names` ([Array](../sql-reference/data-types/array.md)([String](../sql-reference/data-types/string.md))) — Array of [attribute names](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary. -- `attribute.types` ([Array](../sql-reference/data-types/array.md)([String](../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) that are provided by the dictionary. -- `bytes_allocated` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. -- `query_count` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. -- `hit_rate` ([Float64](../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. -- `element_count` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. -- `load_factor` ([Float64](../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). -- `source` ([String](../sql-reference/data-types/string.md)) — Text describing the [data source](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) for the dictionary. -- `lifetime_min` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. -- `lifetime_max` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. -- `loading_start_time` ([DateTime](../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary. -- `last_successful_update_time` ([DateTime](../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes. -- `loading_duration` ([Float32](../sql-reference/data-types/float.md)) — Duration of a dictionary loading. -- `last_exception` ([String](../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. - -**Example** - -Configure the dictionary. - -``` sql -CREATE DICTIONARY dictdb.dict -( - `key` Int64 DEFAULT -1, - `value_default` String DEFAULT 'world', - `value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)' -) -PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb')) -LIFETIME(MIN 0 MAX 1) -LAYOUT(FLAT()) -``` - -Make sure that the dictionary is loaded. - -``` sql -SELECT * FROM system.dictionaries -``` - -``` text -┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐ -│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │ -└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ -``` - -## system.events {#system_tables-events} - -Contains information about the number of events that have occurred in the system. For example, in the table, you can find how many `SELECT` queries were processed since the ClickHouse server started. - -Columns: - -- `event` ([String](../sql-reference/data-types/string.md)) — Event name. -- `value` ([UInt64](../sql-reference/data-types/int-uint.md)) — Number of events occurred. -- `description` ([String](../sql-reference/data-types/string.md)) — Event description. - -**Example** - -``` sql -SELECT * FROM system.events LIMIT 5 -``` - -``` text -┌─event─────────────────────────────────┬─value─┬─description────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ Query │ 12 │ Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries. │ -│ SelectQuery │ 8 │ Same as Query, but only for SELECT queries. │ -│ FileOpen │ 73 │ Number of files opened. │ -│ ReadBufferFromFileDescriptorRead │ 155 │ Number of reads (read/pread) from a file descriptor. Does not include sockets. │ -│ ReadBufferFromFileDescriptorReadBytes │ 9931 │ Number of bytes read from file descriptors. If the file is compressed, this will show the compressed data size. │ -└───────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -**See Also** - -- [system.asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.metrics](#system_tables-metrics) — Contains instantly calculated metrics. -- [system.metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -- [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. - -## system.functions {#system-functions} - -Contains information about normal and aggregate functions. - -Columns: - -- `name`(`String`) – The name of the function. -- `is_aggregate`(`UInt8`) — Whether the function is aggregate. - -## system.graphite\_retentions {#system-graphite-retentions} - -Contains information about parameters [graphite\_rollup](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) which are used in tables with [\*GraphiteMergeTree](../engines/table-engines/mergetree-family/graphitemergetree.md) engines. - -Columns: - -- `config_name` (String) - `graphite_rollup` parameter name. -- `regexp` (String) - A pattern for the metric name. -- `function` (String) - The name of the aggregating function. -- `age` (UInt64) - The minimum age of the data in seconds. -- `precision` (UInt64) - How precisely to define the age of the data in seconds. -- `priority` (UInt16) - Pattern priority. -- `is_default` (UInt8) - Whether the pattern is the default. -- `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. -- `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. - -## system.merges {#system-merges} - -Contains information about merges and part mutations currently in process for tables in the MergeTree family. - -Columns: - -- `database` (String) — The name of the database the table is in. -- `table` (String) — Table name. -- `elapsed` (Float64) — The time elapsed (in seconds) since the merge started. -- `progress` (Float64) — The percentage of completed work from 0 to 1. -- `num_parts` (UInt64) — The number of pieces to be merged. -- `result_part_name` (String) — The name of the part that will be formed as the result of merging. -- `is_mutation` (UInt8) - 1 if this process is a part mutation. -- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. -- `total_size_marks` (UInt64) — The total number of marks in the merged parts. -- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. -- `rows_read` (UInt64) — Number of rows read. -- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. -- `rows_written` (UInt64) — Number of rows written. - -## system.metrics {#system_tables-metrics} - -Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date. - -Columns: - -- `metric` ([String](../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Int64](../sql-reference/data-types/int-uint.md)) — Metric value. -- `description` ([String](../sql-reference/data-types/string.md)) — Metric description. - -The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. - -**Example** - -``` sql -SELECT * FROM system.metrics LIMIT 10 -``` - -``` text -┌─metric─────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ Query │ 1 │ Number of executing queries │ -│ Merge │ 0 │ Number of executing background merges │ -│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE) │ -│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas │ -│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas │ -│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency │ -│ BackgroundPoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping) │ -│ BackgroundSchedulePoolTask │ 0 │ Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc. │ -│ DiskSpaceReservedForMerge │ 0 │ Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts. │ -│ DistributedSend │ 0 │ Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode. │ -└────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -**See Also** - -- [system.asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.events](#system_tables-events) — Contains a number of events that occurred. -- [system.metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -- [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. - -## system.metric\_log {#system_tables-metric_log} - -Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. -To turn on metrics history collection on `system.metric_log`, create `/etc/clickhouse-server/config.d/metric_log.xml` with following content: - -``` xml - - - system - metric_log
- 7500 - 1000 -
-
-``` - -**Example** - -``` sql -SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical; -``` - -``` text -Row 1: -────── -event_date: 2020-02-18 -event_time: 2020-02-18 07:15:33 -milliseconds: 554 -ProfileEvent_Query: 0 -ProfileEvent_SelectQuery: 0 -ProfileEvent_InsertQuery: 0 -ProfileEvent_FileOpen: 0 -ProfileEvent_Seek: 0 -ProfileEvent_ReadBufferFromFileDescriptorRead: 1 -ProfileEvent_ReadBufferFromFileDescriptorReadFailed: 0 -ProfileEvent_ReadBufferFromFileDescriptorReadBytes: 0 -ProfileEvent_WriteBufferFromFileDescriptorWrite: 1 -ProfileEvent_WriteBufferFromFileDescriptorWriteFailed: 0 -ProfileEvent_WriteBufferFromFileDescriptorWriteBytes: 56 -... -CurrentMetric_Query: 0 -CurrentMetric_Merge: 0 -CurrentMetric_PartMutation: 0 -CurrentMetric_ReplicatedFetch: 0 -CurrentMetric_ReplicatedSend: 0 -CurrentMetric_ReplicatedChecks: 0 -... -``` - -**See also** - -- [system.asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.events](#system_tables-events) — Contains a number of events that occurred. -- [system.metrics](#system_tables-metrics) — Contains instantly calculated metrics. -- [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. - -## system.numbers {#system-numbers} - -This table contains a single UInt64 column named ‘number’ that contains almost all the natural numbers starting from zero. -You can use this table for tests, or if you need to do a brute force search. -Reads from this table are not parallelized. - -## system.numbers\_mt {#system-numbers-mt} - -The same as ‘system.numbers’ but reads are parallelized. The numbers can be returned in any order. -Used for tests. - -## system.one {#system-one} - -This table contains a single row with a single ‘dummy’ UInt8 column containing the value 0. -This table is used if a SELECT query doesn’t specify the FROM clause. -This is similar to the DUAL table found in other DBMSs. - -## system.parts {#system_tables-parts} - -Contains information about parts of [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) tables. - -Each row describes one data part. - -Columns: - -- `partition` (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../sql-reference/statements/alter.md#query_language_queries_alter) query. - - Formats: - - - `YYYYMM` for automatic partitioning by month. - - `any_string` when partitioning manually. - -- `name` (`String`) – Name of the data part. - -- `active` (`UInt8`) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. - -- `marks` (`UInt64`) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint doesn’t work for adaptive granularity). - -- `rows` (`UInt64`) – The number of rows. - -- `bytes_on_disk` (`UInt64`) – Total size of all the data part files in bytes. - -- `data_compressed_bytes` (`UInt64`) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. - -- `data_uncompressed_bytes` (`UInt64`) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. - -- `marks_bytes` (`UInt64`) – The size of the file with marks. - -- `modification_time` (`DateTime`) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation.\| - -- `remove_time` (`DateTime`) – The time when the data part became inactive. - -- `refcount` (`UInt32`) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. - -- `min_date` (`Date`) – The minimum value of the date key in the data part. - -- `max_date` (`Date`) – The maximum value of the date key in the data part. - -- `min_time` (`DateTime`) – The minimum value of the date and time key in the data part. - -- `max_time`(`DateTime`) – The maximum value of the date and time key in the data part. - -- `partition_id` (`String`) – ID of the partition. - -- `min_block_number` (`UInt64`) – The minimum number of data parts that make up the current part after merging. - -- `max_block_number` (`UInt64`) – The maximum number of data parts that make up the current part after merging. - -- `level` (`UInt32`) – Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. - -- `data_version` (`UInt64`) – Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). - -- `primary_key_bytes_in_memory` (`UInt64`) – The amount of memory (in bytes) used by primary key values. - -- `primary_key_bytes_in_memory_allocated` (`UInt64`) – The amount of memory (in bytes) reserved for primary key values. - -- `is_frozen` (`UInt8`) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesn’t exist. For more details, see [FREEZE PARTITION](../sql-reference/statements/alter.md#alter_freeze-partition) - -- `database` (`String`) – Name of the database. - -- `table` (`String`) – Name of the table. - -- `engine` (`String`) – Name of the table engine without parameters. - -- `path` (`String`) – Absolute path to the folder with data part files. - -- `disk` (`String`) – Name of a disk that stores the data part. - -- `hash_of_all_files` (`String`) – [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of compressed files. - -- `hash_of_uncompressed_files` (`String`) – [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.). - -- `uncompressed_hash_of_compressed_files` (`String`) – [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of data in the compressed files as if they were uncompressed. - -- `bytes` (`UInt64`) – Alias for `bytes_on_disk`. - -- `marks_size` (`UInt64`) – Alias for `marks_bytes`. - -## system.part\_log {#system_tables-part-log} - -The `system.part_log` table is created only if the [part\_log](server-configuration-parameters/settings.md#server_configuration_parameters-part-log) server setting is specified. - -This table contains information about events that occurred with [data parts](../engines/table-engines/mergetree-family/custom-partitioning-key.md) in the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family tables, such as adding or merging data. - -The `system.part_log` table contains the following columns: - -- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values: - - `NEW_PART` — Inserting of a new data part. - - `MERGE_PARTS` — Merging of data parts. - - `DOWNLOAD_PART` — Downloading a data part. - - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../sql-reference/statements/alter.md#alter_detach-partition). - - `MUTATE_PART` — Mutating of a data part. - - `MOVE_PART` — Moving the data part from the one disk to another one. -- `event_date` (Date) — Event date. -- `event_time` (DateTime) — Event time. -- `duration_ms` (UInt64) — Duration. -- `database` (String) — Name of the database the data part is in. -- `table` (String) — Name of the table the data part is in. -- `part_name` (String) — Name of the data part. -- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`. -- `rows` (UInt64) — The number of rows in the data part. -- `size_in_bytes` (UInt64) — Size of the data part in bytes. -- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge). -- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes. -- `read_rows` (UInt64) — The number of rows was read during the merge. -- `read_bytes` (UInt64) — The number of bytes was read during the merge. -- `error` (UInt16) — The code number of the occurred error. -- `exception` (String) — Text message of the occurred error. - -The `system.part_log` table is created after the first inserting data to the `MergeTree` table. - -## system.processes {#system_tables-processes} - -This system table is used for implementing the `SHOW PROCESSLIST` query. - -Columns: - -- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated. -- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server. -- `elapsed` (Float64) – The time in seconds since request execution started. -- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. -- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. -- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. -- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max\_memory\_usage](../operations/settings/query-complexity.md#settings_max_memory_usage) setting. -- `query` (String) – The query text. For `INSERT`, it doesn’t include the data to insert. -- `query_id` (String) – Query ID, if defined. - -## system.text\_log {#system-tables-text-log} - -Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting. - -Columns: - -- `event_date` (Date) — Date of the entry. -- `event_time` (DateTime) — Time of the entry. -- `microseconds` (UInt32) — Microseconds of the entry. -- `thread_name` (String) — Name of the thread from which the logging was done. -- `thread_id` (UInt64) — OS thread ID. -- `level` (`Enum8`) — Entry level. Possible values: - - `1` or `'Fatal'`. - - `2` or `'Critical'`. - - `3` or `'Error'`. - - `4` or `'Warning'`. - - `5` or `'Notice'`. - - `6` or `'Information'`. - - `7` or `'Debug'`. - - `8` or `'Trace'`. -- `query_id` (String) — ID of the query. -- `logger_name` (LowCardinality(String)) — Name of the logger (i.e. `DDLWorker`). -- `message` (String) — The message itself. -- `revision` (UInt32) — ClickHouse revision. -- `source_file` (LowCardinality(String)) — Source file from which the logging was done. -- `source_line` (UInt64) — Source line from which the logging was done. - -## system.query_log {#system_tables-query_log} - -Contains information about executed queries, for example, start time, duration of processing, error messages. - -!!! note "Note" - The table doesn’t contain input data for `INSERT` queries. - -You can change settings of queries logging in the [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration. - -You can disable queries logging by setting [log_queries = 0](settings/settings.md#settings-log-queries). We don't recommend to turn off logging because information in this table is important for solving issues. - -The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs) query. - -ClickHouse doesn't delete data from the table automatically. See [Introduction](#system-tables-introduction) for more details. - -The `system.query_log` table registers two kinds of queries: - -1. Initial queries that were run directly by the client. -2. Child queries that were initiated by other queries (for distributed query execution). For these types of queries, information about the parent queries is shown in the `initial_*` columns. - -Each query creates one or two rows in the `query_log` table, depending on the status (see the `type` column) of the query: - -1. If the query execution was successful, two rows with the `QueryStart` and `QueryFinish` types are created . -2. If an error occurred during query processing, two events with the `QueryStart` and `ExceptionWhileProcessing` types are created . -3. If an error occurred before launching the query, a single event with the `ExceptionBeforeStart` type is created. - -Columns: - -- `type` ([Enum8](../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: - - `'QueryStart' = 1` — Successful start of query execution. - - `'QueryFinish' = 2` — Successful end of query execution. - - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. - - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. -- `event_date` ([Date](../sql-reference/data-types/date.md)) — Query starting date. -- `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — Query starting time. -- `query_start_time` ([DateTime](../sql-reference/data-types/datetime.md)) — Start time of query execution. -- `query_duration_ms` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. -- `read_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it's `read_rows` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesn't affect this value. -- `read_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it's `read_bytes` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesn't affect this value. -- `written_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `result_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. -- `result_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. -- `memory_usage` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. -- `query` ([String](../sql-reference/data-types/string.md)) — Query string. -- `exception` ([String](../sql-reference/data-types/string.md)) — Exception message. -- `exception_code` ([Int32](../sql-reference/data-types/int-uint.md)) — Code of an exception. -- `stack_trace` ([String](../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. -- `is_initial_query` ([UInt8](../sql-reference/data-types/int-uint.md)) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query as part of distributed query execution. -- `user` ([String](../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. -- `query_id` ([String](../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. -- `port` ([UInt16](../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query. -- `initial_user` ([String](../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` ([String](../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. -- `initial_port` ([UInt16](../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query. -- `interface` ([UInt8](../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values: - - 1 — TCP. - - 2 — HTTP. -- `os_user` ([String](../sql-reference/data-types/string.md)) — Operating system username who runs [clickhouse-client](../interfaces/cli.md). -- `client_hostname` ([String](../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run. -- `client_name` ([String](../sql-reference/data-types/string.md)) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. -- `client_revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_major` ([UInt32](../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_minor` ([UInt32](../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_patch` ([UInt32](../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version. -- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. - - 1 — `GET` method was used. - - 2 — `POST` method was used. -- `http_user_agent` ([String](../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. -- `quota_key` ([String](../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`). -- `revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `thread_numbers` ([Array(UInt32)](../sql-reference/data-types/array.md)) — Number of threads that are participating in query execution. -- `ProfileEvents.Names` ([Array(String)](../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](#system_tables-events) -- `ProfileEvents.Values` ([Array(UInt64)](../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column. -- `Settings.Names` ([Array(String)](../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. -- `Settings.Values` ([Array(String)](../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column. - -**Example** - -``` sql -SELECT * FROM system.query_log LIMIT 1 FORMAT Vertical; -``` - -``` text -Row 1: -────── -type: QueryStart -event_date: 2020-05-13 -event_time: 2020-05-13 14:02:28 -query_start_time: 2020-05-13 14:02:28 -query_duration_ms: 0 -read_rows: 0 -read_bytes: 0 -written_rows: 0 -written_bytes: 0 -result_rows: 0 -result_bytes: 0 -memory_usage: 0 -query: SELECT 1 -exception_code: 0 -exception: -stack_trace: -is_initial_query: 1 -user: default -query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -address: ::ffff:127.0.0.1 -port: 57720 -initial_user: default -initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -initial_address: ::ffff:127.0.0.1 -initial_port: 57720 -interface: 1 -os_user: bayonet -client_hostname: clickhouse.ru-central1.internal -client_name: ClickHouse client -client_revision: 54434 -client_version_major: 20 -client_version_minor: 4 -client_version_patch: 1 -http_method: 0 -http_user_agent: -quota_key: -revision: 54434 -thread_ids: [] -ProfileEvents.Names: [] -ProfileEvents.Values: [] -Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage'] -Settings.Values: ['0','random','1','10000000000'] - -``` -**See Also** - -- [system.query_thread_log](#system_tables-query_thread_log) — This table contains information about each query execution thread. - -## system.query_thread_log {#system_tables-query_thread_log} - -Contains information about threads which execute queries, for example, thread name, thread start time, duration of query processing. - -To start logging: - -1. Configure parameters in the [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section. -2. Set [log_query_threads](settings/settings.md#settings-log-query-threads) to 1. - -The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs) query. - -ClickHouse doesn't delete data from the table automatically. See [Introduction](#system-tables-introduction) for more details. - -Columns: - -- `event_date` ([Date](../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. -- `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. -- `query_start_time` ([DateTime](../sql-reference/data-types/datetime.md)) — Start time of query execution. -- `query_duration_ms` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. -- `read_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. -- `read_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. -- `written_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `memory_usage` ([Int64](../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. -- `peak_memory_usage` ([Int64](../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. -- `thread_name` ([String](../sql-reference/data-types/string.md)) — Name of the thread. -- `thread_number` ([UInt32](../sql-reference/data-types/int-uint.md)) — Internal thread ID. -- `thread_id` ([Int32](../sql-reference/data-types/int-uint.md)) — thread ID. -- `master_thread_id` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. -- `query` ([String](../sql-reference/data-types/string.md)) — Query string. -- `is_initial_query` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query for distributed query execution. -- `user` ([String](../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. -- `query_id` ([String](../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. -- `port` ([UInt16](../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. -- `initial_user` ([String](../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` ([String](../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. -- `initial_port` ([UInt16](../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. -- `interface` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: - - 1 — TCP. - - 2 — HTTP. -- `os_user` ([String](../sql-reference/data-types/string.md)) — OS’s username who runs [clickhouse-client](../interfaces/cli.md). -- `client_hostname` ([String](../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run. -- `client_name` ([String](../sql-reference/data-types/string.md)) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. -- `client_revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_major` ([UInt32](../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_minor` ([UInt32](../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_patch` ([UInt32](../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version. -- `http_method` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. - - 1 — `GET` method was used. - - 2 — `POST` method was used. -- `http_user_agent` ([String](../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. -- `quota_key` ([String](../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`). -- `revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `ProfileEvents.Names` ([Array(String)](../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events). -- `ProfileEvents.Values` ([Array(UInt64)](../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column. - -**Example** - -``` sql - SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical -``` - -``` text -Row 1: -────── -event_date: 2020-05-13 -event_time: 2020-05-13 14:02:28 -query_start_time: 2020-05-13 14:02:28 -query_duration_ms: 0 -read_rows: 1 -read_bytes: 1 -written_rows: 0 -written_bytes: 0 -memory_usage: 0 -peak_memory_usage: 0 -thread_name: QueryPipelineEx -thread_id: 28952 -master_thread_id: 28924 -query: SELECT 1 -is_initial_query: 1 -user: default -query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -address: ::ffff:127.0.0.1 -port: 57720 -initial_user: default -initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a -initial_address: ::ffff:127.0.0.1 -initial_port: 57720 -interface: 1 -os_user: bayonet -client_hostname: clickhouse.ru-central1.internal -client_name: ClickHouse client -client_revision: 54434 -client_version_major: 20 -client_version_minor: 4 -client_version_patch: 1 -http_method: 0 -http_user_agent: -quota_key: -revision: 54434 -ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds'] -ProfileEvents.Values: [1,97,81,5,81] -... -``` - -**See Also** - -- [system.query_log](#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. - -## system.trace\_log {#system_tables-trace_log} - -Contains stack traces collected by the sampling query profiler. - -ClickHouse creates this table when the [trace\_log](server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query\_profiler\_real\_time\_period\_ns](settings/settings.md#query_profiler_real_time_period_ns) and [query\_profiler\_cpu\_time\_period\_ns](settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. - -To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions. - -Columns: - -- `event_date` ([Date](../sql-reference/data-types/date.md)) — Date of sampling moment. - -- `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. - -- `timestamp_ns` ([UInt64](../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. - -- `revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. - - When connecting to server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. - -- `timer_type` ([Enum8](../sql-reference/data-types/enum.md)) — Timer type: - - - `Real` represents wall-clock time. - - `CPU` represents CPU time. - -- `thread_number` ([UInt32](../sql-reference/data-types/int-uint.md)) — Thread identifier. - -- `query_id` ([String](../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) system table. - -- `trace` ([Array(UInt64)](../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. - -**Example** - -``` sql -SELECT * FROM system.trace_log LIMIT 1 \G -``` - -``` text -Row 1: -────── -event_date: 2019-11-15 -event_time: 2019-11-15 15:09:38 -revision: 54428 -timer_type: Real -thread_number: 48 -query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 -trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] -``` - -## system.replicas {#system_tables-replicas} - -Contains information and status for replicated tables residing on the local server. -This table can be used for monitoring. The table contains a row for every Replicated\* table. - -Example: - -``` sql -SELECT * -FROM system.replicas -WHERE table = 'visits' -FORMAT Vertical -``` - -``` text -Row 1: -────── -database: merge -table: visits -engine: ReplicatedCollapsingMergeTree -is_leader: 1 -can_become_leader: 1 -is_readonly: 0 -is_session_expired: 0 -future_parts: 1 -parts_to_check: 0 -zookeeper_path: /clickhouse/tables/01-06/visits -replica_name: example01-06-1.yandex.ru -replica_path: /clickhouse/tables/01-06/visits/replicas/example01-06-1.yandex.ru -columns_version: 9 -queue_size: 1 -inserts_in_queue: 0 -merges_in_queue: 1 -part_mutations_in_queue: 0 -queue_oldest_time: 2020-02-20 08:34:30 -inserts_oldest_time: 0000-00-00 00:00:00 -merges_oldest_time: 2020-02-20 08:34:30 -part_mutations_oldest_time: 0000-00-00 00:00:00 -oldest_part_to_get: -oldest_part_to_merge_to: 20200220_20284_20840_7 -oldest_part_to_mutate_to: -log_max_index: 596273 -log_pointer: 596274 -last_queue_update: 2020-02-20 08:34:32 -absolute_delay: 0 -total_replicas: 2 -active_replicas: 2 -``` - -Columns: - -- `database` (`String`) - Database name -- `table` (`String`) - Table name -- `engine` (`String`) - Table engine name -- `is_leader` (`UInt8`) - Whether the replica is the leader. - Only one replica at a time can be the leader. The leader is responsible for selecting background merges to perform. - Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader. -- `can_become_leader` (`UInt8`) - Whether the replica can be elected as a leader. -- `is_readonly` (`UInt8`) - Whether the replica is in read-only mode. - This mode is turned on if the config doesn’t have sections with ZooKeeper, if an unknown error occurred when reinitializing sessions in ZooKeeper, and during session reinitialization in ZooKeeper. -- `is_session_expired` (`UInt8`) - the session with ZooKeeper has expired. Basically the same as `is_readonly`. -- `future_parts` (`UInt32`) - The number of data parts that will appear as the result of INSERTs or merges that haven’t been done yet. -- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged. -- `zookeeper_path` (`String`) - Path to table data in ZooKeeper. -- `replica_name` (`String`) - Replica name in ZooKeeper. Different replicas of the same table have different names. -- `replica_path` (`String`) - Path to replica data in ZooKeeper. The same as concatenating ‘zookeeper\_path/replicas/replica\_path’. -- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven’t made all of the ALTERs yet. -- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`. -- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. -- `merges_in_queue` (`UInt32`) - The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time. -- `part_mutations_in_queue` (`UInt32`) - The number of mutations waiting to be made. -- `queue_oldest_time` (`DateTime`) - If `queue_size` greater than 0, shows when the oldest operation was added to the queue. -- `inserts_oldest_time` (`DateTime`) - See `queue_oldest_time` -- `merges_oldest_time` (`DateTime`) - See `queue_oldest_time` -- `part_mutations_oldest_time` (`DateTime`) - See `queue_oldest_time` - -The next 4 columns have a non-zero value only where there is an active session with ZK. - -- `log_max_index` (`UInt64`) - Maximum entry number in the log of general activity. -- `log_pointer` (`UInt64`) - Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. If `log_pointer` is much smaller than `log_max_index`, something is wrong. -- `last_queue_update` (`DateTime`) - When the queue was updated last time. -- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has. -- `total_replicas` (`UInt8`) - The total number of known replicas of this table. -- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas). - -If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row. -If you don’t request the last 4 columns (log\_max\_index, log\_pointer, total\_replicas, active\_replicas), the table works quickly. - -For example, you can check that everything is working correctly like this: - -``` sql -SELECT - database, - table, - is_leader, - is_readonly, - is_session_expired, - future_parts, - parts_to_check, - columns_version, - queue_size, - inserts_in_queue, - merges_in_queue, - log_max_index, - log_pointer, - total_replicas, - active_replicas -FROM system.replicas -WHERE - is_readonly - OR is_session_expired - OR future_parts > 20 - OR parts_to_check > 10 - OR queue_size > 20 - OR inserts_in_queue > 10 - OR log_max_index - log_pointer > 10 - OR total_replicas < 2 - OR active_replicas < total_replicas -``` - -If this query doesn’t return anything, it means that everything is fine. - -## system.settings {#system-tables-system-settings} - -Contains information about session settings for current user. - -Columns: - -- `name` ([String](../sql-reference/data-types/string.md)) — Setting name. -- `value` ([String](../sql-reference/data-types/string.md)) — Setting value. -- `changed` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. -- `description` ([String](../sql-reference/data-types/string.md)) — Short setting description. -- `min` ([Nullable](../sql-reference/data-types/nullable.md)([String](../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../sql-reference/syntax.md#null-literal). -- `max` ([Nullable](../sql-reference/data-types/nullable.md)([String](../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../sql-reference/syntax.md#null-literal). -- `readonly` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: - - `0` — Current user can change the setting. - - `1` — Current user can’t change the setting. - -**Example** - -The following example shows how to get information about settings which name contains `min_i`. - -``` sql -SELECT * -FROM system.settings -WHERE name LIKE '%min_i%' -``` - -``` text -┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐ -│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘ -``` - -Using of `WHERE changed` can be useful, for example, when you want to check: - -- Whether settings in configuration files are loaded correctly and are in use. -- Settings that changed in the current session. - - - -``` sql -SELECT * FROM system.settings WHERE changed AND name='load_balancing' -``` - -**See also** - -- [Settings](settings/index.md#session-settings-intro) -- [Permissions for Queries](settings/permissions-for-queries.md#settings_readonly) -- [Constraints on Settings](settings/constraints-on-settings.md) - -## system.table\_engines {#system.table_engines} - -``` text -┌─name───────────────────┬─value───────┐ -│ max_threads │ 8 │ -│ use_uncompressed_cache │ 0 │ -│ load_balancing │ random │ -│ max_memory_usage │ 10000000000 │ -└────────────────────────┴─────────────┘ -``` - -## system.merge\_tree\_settings {#system-merge_tree_settings} - -Contains information about settings for `MergeTree` tables. - -Columns: - -- `name` (String) — Setting name. -- `value` (String) — Setting value. -- `description` (String) — Setting description. -- `type` (String) — Setting type (implementation specific string value). -- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. - -## system.table\_engines {#system-table-engines} - -Contains description of table engines supported by server and their feature support information. - -This table contains the following columns (the column type is shown in brackets): - -- `name` (String) — The name of table engine. -- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` clause. -- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [skipping indices](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). -- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). -- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. -- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../engines/table-engines/mergetree-family/replication.md). -- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. - -Example: - -``` sql -SELECT * -FROM system.table_engines -WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') -``` - -``` text -┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┐ -│ Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ -│ MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ -│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ -└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┘ -``` - -**See also** - -- MergeTree family [query clauses](../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) -- Kafka [settings](../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) -- Join [settings](../engines/table-engines/special/join.md#join-limitations-and-settings) - -## system.tables {#system-tables} - -Contains metadata of each table that the server knows about. Detached tables are not shown in `system.tables`. - -This table contains the following columns (the column type is shown in brackets): - -- `database` (String) — The name of the database the table is in. - -- `name` (String) — Table name. - -- `engine` (String) — Table engine name (without parameters). - -- `is_temporary` (UInt8) - Flag that indicates whether the table is temporary. - -- `data_path` (String) - Path to the table data in the file system. - -- `metadata_path` (String) - Path to the table metadata in the file system. - -- `metadata_modification_time` (DateTime) - Time of latest modification of the table metadata. - -- `dependencies_database` (Array(String)) - Database dependencies. - -- `dependencies_table` (Array(String)) - Table dependencies ([MaterializedView](../engines/table-engines/special/materializedview.md) tables based on the current table). - -- `create_table_query` (String) - The query that was used to create the table. - -- `engine_full` (String) - Parameters of the table engine. - -- `partition_key` (String) - The partition key expression specified in the table. - -- `sorting_key` (String) - The sorting key expression specified in the table. - -- `primary_key` (String) - The primary key expression specified in the table. - -- `sampling_key` (String) - The sampling key expression specified in the table. - -- `storage_policy` (String) - The storage policy: - - - [MergeTree](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - - [Distributed](../engines/table-engines/special/distributed.md#distributed) - -- `total_rows` (Nullable(UInt64)) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `Null` (including underying `Buffer` table). - -- `total_bytes` (Nullable(UInt64)) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `Null` (**does not** includes any underlying storage). - - - If the table stores data on disk, returns used space on disk (i.e. compressed). - - If the table stores data in memory, returns approximated number of used bytes in memory. - -The `system.tables` table is used in `SHOW TABLES` query implementation. - -## system.zookeeper {#system-zookeeper} - -The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config. -The query must have a ‘path’ equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for. - -The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. -To output data for all root nodes, write path = ‘/’. -If the path specified in ‘path’ doesn’t exist, an exception will be thrown. - -Columns: - -- `name` (String) — The name of the node. -- `path` (String) — The path to the node. -- `value` (String) — Node value. -- `dataLength` (Int32) — Size of the value. -- `numChildren` (Int32) — Number of descendants. -- `czxid` (Int64) — ID of the transaction that created the node. -- `mzxid` (Int64) — ID of the transaction that last changed the node. -- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. -- `ctime` (DateTime) — Time of node creation. -- `mtime` (DateTime) — Time of the last modification of the node. -- `version` (Int32) — Node version: the number of times the node was changed. -- `cversion` (Int32) — Number of added or removed descendants. -- `aversion` (Int32) — Number of changes to the ACL. -- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. - -Example: - -``` sql -SELECT * -FROM system.zookeeper -WHERE path = '/clickhouse/tables/01-08/visits/replicas' -FORMAT Vertical -``` - -``` text -Row 1: -────── -name: example01-08-1.yandex.ru -value: -czxid: 932998691229 -mzxid: 932998691229 -ctime: 2015-03-27 16:49:51 -mtime: 2015-03-27 16:49:51 -version: 0 -cversion: 47 -aversion: 0 -ephemeralOwner: 0 -dataLength: 0 -numChildren: 7 -pzxid: 987021031383 -path: /clickhouse/tables/01-08/visits/replicas - -Row 2: -────── -name: example01-08-2.yandex.ru -value: -czxid: 933002738135 -mzxid: 933002738135 -ctime: 2015-03-27 16:57:01 -mtime: 2015-03-27 16:57:01 -version: 0 -cversion: 37 -aversion: 0 -ephemeralOwner: 0 -dataLength: 0 -numChildren: 7 -pzxid: 987021252247 -path: /clickhouse/tables/01-08/visits/replicas -``` - -## system.mutations {#system_tables-mutations} - -The table contains information about [mutations](../sql-reference/statements/alter.md#alter-mutations) of MergeTree tables and their progress. Each mutation command is represented by a single row. The table has the following columns: - -**database**, **table** - The name of the database and table to which the mutation was applied. - -**mutation\_id** - The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ZooKeeper. For unreplicated tables the IDs correspond to file names in the data directory of the table. - -**command** - The mutation command string (the part of the query after `ALTER TABLE [db.]table`). - -**create\_time** - When this mutation command was submitted for execution. - -**block\_numbers.partition\_id**, **block\_numbers.number** - A nested column. For mutations of replicated tables, it contains one record for each partition: the partition ID and the block number that was acquired by the mutation (in each partition, only parts that contain blocks with numbers less than the block number acquired by the mutation in that partition will be mutated). In non-replicated tables, block numbers in all partitions form a single sequence. This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation. - -**parts\_to\_do** - The number of data parts that need to be mutated for the mutation to finish. - -**is\_done** - Is the mutation done? Note that even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not done yet because of a long-running INSERT that will create a new data part that will need to be mutated. - -If there were problems with mutating some parts, the following columns contain additional information: - -**latest\_failed\_part** - The name of the most recent part that could not be mutated. - -**latest\_fail\_time** - The time of the most recent part mutation failure. - -**latest\_fail\_reason** - The exception message that caused the most recent part mutation failure. - -## system.disks {#system_tables-disks} - -Contains information about disks defined in the [server configuration](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). - -Columns: - -- `name` ([String](../sql-reference/data-types/string.md)) — Name of a disk in the server configuration. -- `path` ([String](../sql-reference/data-types/string.md)) — Path to the mount point in the file system. -- `free_space` ([UInt64](../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. -- `total_space` ([UInt64](../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. -- `keep_free_space` ([UInt64](../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. - -## system.storage\_policies {#system_tables-storage_policies} - -Contains information about storage policies and volumes defined in the [server configuration](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). - -Columns: - -- `policy_name` ([String](../sql-reference/data-types/string.md)) — Name of the storage policy. -- `volume_name` ([String](../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. -- `volume_priority` ([UInt64](../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration. -- `disks` ([Array(String)](../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. -- `max_data_part_size` ([UInt64](../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). -- `move_factor` ([Float64](../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. - -If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. - -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/) diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md new file mode 100644 index 00000000000..afc71af1114 --- /dev/null +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -0,0 +1,3 @@ +## system.asynchronous\_metric\_log {#system-tables-async-log} + +Contains the historical values for `system.asynchronous_log` (see [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics)) diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md new file mode 100644 index 00000000000..438dfb62fb8 --- /dev/null +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -0,0 +1,36 @@ +# system.asynchronous\_metrics {#system_tables-asynchronous_metrics} + +Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use. + +Columns: + +- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. + +**Example** + +``` sql +SELECT * FROM system.asynchronous_metrics LIMIT 10 +``` + +``` text +┌─metric──────────────────────────────────┬──────value─┐ +│ jemalloc.background_thread.run_interval │ 0 │ +│ jemalloc.background_thread.num_runs │ 0 │ +│ jemalloc.background_thread.num_threads │ 0 │ +│ jemalloc.retained │ 422551552 │ +│ jemalloc.mapped │ 1682989056 │ +│ jemalloc.resident │ 1656446976 │ +│ jemalloc.metadata_thp │ 0 │ +│ jemalloc.metadata │ 10226856 │ +│ UncompressedCacheCells │ 0 │ +│ MarkCacheFiles │ 0 │ +└─────────────────────────────────────────┴────────────┘ +``` + +**See Also** + +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [system.events](events.md#system_tables-events) — Contains a number of events that have occurred. +- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md new file mode 100644 index 00000000000..bba695cff35 --- /dev/null +++ b/docs/en/operations/system-tables/clusters.md @@ -0,0 +1,24 @@ +# system.clusters {#system-clusters} + +Contains information about clusters available in the config file and the servers in them. + +Columns: + +- `cluster` (String) — The cluster name. +- `shard_num` (UInt32) — The shard number in the cluster, starting from 1. +- `shard_weight` (UInt32) — The relative weight of the shard when writing data. +- `replica_num` (UInt32) — The replica number in the shard, starting from 1. +- `host_name` (String) — The host name, as specified in the config. +- `host_address` (String) — The host IP address obtained from DNS. +- `port` (UInt16) — The port to use for connecting to the server. +- `user` (String) — The name of the user for connecting to the server. +- `errors_count` (UInt32) - number of times this host failed to reach replica. +- `estimated_recovery_time` (UInt32) - seconds left until replica error count is zeroed and it is considered to be back to normal. + +Please note that `errors_count` is updated once per query to the cluster, but `estimated_recovery_time` is recalculated on-demand. So there could be a case of non-zero `errors_count` and zero `estimated_recovery_time`, that next query will zero `errors_count` and try to use replica as if it has no errors. + +**See also** + +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed\_replica\_error\_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) +- [distributed\_replica\_error\_half\_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md new file mode 100644 index 00000000000..fa96d8ec209 --- /dev/null +++ b/docs/en/operations/system-tables/columns.md @@ -0,0 +1,22 @@ +# system.columns {#system-columns} + +Contains information about columns in all the tables. + +You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once. + +The `system.columns` table contains the following columns (the column type is shown in brackets): + +- `database` (String) — Database name. +- `table` (String) — Table name. +- `name` (String) — Column name. +- `type` (String) — Column type. +- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. +- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. +- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. +- `marks_bytes` (UInt64) — The size of marks, in bytes. +- `comment` (String) — Comment on the column, or an empty string if it is not defined. +- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression. +- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. +- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. +- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. diff --git a/docs/en/operations/system-tables/contributors.md b/docs/en/operations/system-tables/contributors.md new file mode 100644 index 00000000000..54d543c8cc7 --- /dev/null +++ b/docs/en/operations/system-tables/contributors.md @@ -0,0 +1,40 @@ +# system.contributors {#system-contributors} + +Contains information about contributors. The order is random at query execution time. + +Columns: + +- `name` (String) — Contributor (author) name from git log. + +**Example** + +``` sql +SELECT * FROM system.contributors LIMIT 10 +``` + +``` text +┌─name─────────────┐ +│ Olga Khvostikova │ +│ Max Vetrov │ +│ LiuYangkuan │ +│ svladykin │ +│ zamulla │ +│ Šimon Podlipský │ +│ BayoNet │ +│ Ilya Khomutov │ +│ Amy Krishnevsky │ +│ Loud_Scream │ +└──────────────────┘ +``` + +To find out yourself in the table, use a query: + +``` sql +SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova' +``` + +``` text +┌─name─────────────┐ +│ Olga Khvostikova │ +└──────────────────┘ +``` diff --git a/docs/en/operations/system-tables/data_type_families.md b/docs/en/operations/system-tables/data_type_families.md new file mode 100644 index 00000000000..76dea7a9379 --- /dev/null +++ b/docs/en/operations/system-tables/data_type_families.md @@ -0,0 +1,34 @@ +# system.data\_type\_families {#system_tables-data_type_families} + +Contains information about supported [data types](../../sql-reference/data-types/). + +Columns: + +- `name` ([String](../../sql-reference/data-types/string.md)) — Data type name. +- `case_insensitive` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid. +- `alias_to` ([String](../../sql-reference/data-types/string.md)) — Data type name for which `name` is an alias. + +**Example** + +``` sql +SELECT * FROM system.data_type_families WHERE alias_to = 'String' +``` + +``` text +┌─name───────┬─case_insensitive─┬─alias_to─┐ +│ LONGBLOB │ 1 │ String │ +│ LONGTEXT │ 1 │ String │ +│ TINYTEXT │ 1 │ String │ +│ TEXT │ 1 │ String │ +│ VARCHAR │ 1 │ String │ +│ MEDIUMBLOB │ 1 │ String │ +│ BLOB │ 1 │ String │ +│ TINYBLOB │ 1 │ String │ +│ CHAR │ 1 │ String │ +│ MEDIUMTEXT │ 1 │ String │ +└────────────┴──────────────────┴──────────┘ +``` + +**See Also** + +- [Syntax](../../sql-reference/syntax.md) — Information about supported syntax. diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md new file mode 100644 index 00000000000..5bcfde9dee3 --- /dev/null +++ b/docs/en/operations/system-tables/databases.md @@ -0,0 +1,7 @@ +# system.databases {#system-databases} + +This table contains a single String column called ‘name’ – the name of a database. + +Each database that the server knows about has a corresponding entry in the table. + +This system table is used for implementing the `SHOW DATABASES` query. diff --git a/docs/en/operations/system-tables/detached_parts.md b/docs/en/operations/system-tables/detached_parts.md new file mode 100644 index 00000000000..4c4c1f85413 --- /dev/null +++ b/docs/en/operations/system-tables/detached_parts.md @@ -0,0 +1,9 @@ +# system.detached\_parts {#system_tables-detached_parts} + +Contains information about detached parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. The `reason` column specifies why the part was detached. + +For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter.md#alter_attach-partition) command. + +For the description of other columns, see [system.parts](parts.md#system_tables-parts). + +If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter.md#alter_drop-detached). diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md new file mode 100644 index 00000000000..59c3eb7f03e --- /dev/null +++ b/docs/en/operations/system-tables/dictionaries.md @@ -0,0 +1,61 @@ +# system.dictionaries {#system_tables-dictionaries} + +Contains information about [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. +- `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values: + - `NOT_LOADED` — Dictionary was not loaded because it was not used. + - `LOADED` — Dictionary loaded successfully. + - `FAILED` — Unable to load the dictionary as a result of an error. + - `LOADING` — Dictionary is loading now. + - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). + - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. +- `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. +- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). +- `key` — [Key type](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key): Numeric Key ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([String](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”. +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) provided by the dictionary. +- `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) that are provided by the dictionary. +- `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. +- `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. +- `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. +- `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. +- `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). +- `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) for the dictionary. +- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary. +- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes. +- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading. +- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. + +**Example** + +Configure the dictionary. + +``` sql +CREATE DICTIONARY dictdb.dict +( + `key` Int64 DEFAULT -1, + `value_default` String DEFAULT 'world', + `value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)' +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb')) +LIFETIME(MIN 0 MAX 1) +LAYOUT(FLAT()) +``` + +Make sure that the dictionary is loaded. + +``` sql +SELECT * FROM system.dictionaries +``` + +``` text +┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐ +│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │ +└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ +``` diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md new file mode 100644 index 00000000000..69909138ee8 --- /dev/null +++ b/docs/en/operations/system-tables/disks.md @@ -0,0 +1,26 @@ +# system.disks {#system_tables-disks} + +Contains information about disks defined in the [server configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). + +Columns: + +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk in the server configuration. +- `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system. +- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. +- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. +- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. + +## system.storage\_policies {#system_tables-storage_policies} + +Contains information about storage policies and volumes defined in the [server configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). + +Columns: + +- `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy. +- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. +- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration. +- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. +- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). +- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. + +If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md new file mode 100644 index 00000000000..0a073a1ab1a --- /dev/null +++ b/docs/en/operations/system-tables/events.md @@ -0,0 +1,32 @@ +# system.events {#system_tables-events} + +Contains information about the number of events that have occurred in the system. For example, in the table, you can find how many `SELECT` queries were processed since the ClickHouse server started. + +Columns: + +- `event` ([String](../../sql-reference/data-types/string.md)) — Event name. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. +- `description` ([String](../../sql-reference/data-types/string.md)) — Event description. + +**Example** + +``` sql +SELECT * FROM system.events LIMIT 5 +``` + +``` text +┌─event─────────────────────────────────┬─value─┬─description────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ Query │ 12 │ Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries. │ +│ SelectQuery │ 8 │ Same as Query, but only for SELECT queries. │ +│ FileOpen │ 73 │ Number of files opened. │ +│ ReadBufferFromFileDescriptorRead │ 155 │ Number of reads (read/pread) from a file descriptor. Does not include sockets. │ +│ ReadBufferFromFileDescriptorReadBytes │ 9931 │ Number of bytes read from file descriptors. If the file is compressed, this will show the compressed data size. │ +└───────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md new file mode 100644 index 00000000000..c76e806505a --- /dev/null +++ b/docs/en/operations/system-tables/functions.md @@ -0,0 +1,8 @@ +# system.functions {#system-functions} + +Contains information about normal and aggregate functions. + +Columns: + +- `name`(`String`) – The name of the function. +- `is_aggregate`(`UInt8`) — Whether the function is aggregate. diff --git a/docs/en/operations/system-tables/graphite_retentions.md b/docs/en/operations/system-tables/graphite_retentions.md new file mode 100644 index 00000000000..00e016ffe64 --- /dev/null +++ b/docs/en/operations/system-tables/graphite_retentions.md @@ -0,0 +1,15 @@ +# system.graphite\_retentions {#system-graphite-retentions} + +Contains information about parameters [graphite\_rollup](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) which are used in tables with [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) engines. + +Columns: + +- `config_name` (String) - `graphite_rollup` parameter name. +- `regexp` (String) - A pattern for the metric name. +- `function` (String) - The name of the aggregating function. +- `age` (UInt64) - The minimum age of the data in seconds. +- `precision` (UInt64) - How precisely to define the age of the data in seconds. +- `priority` (UInt16) - Pattern priority. +- `is_default` (UInt8) - Whether the pattern is the default. +- `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. +- `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md new file mode 100644 index 00000000000..395c98ee8b4 --- /dev/null +++ b/docs/en/operations/system-tables/index.md @@ -0,0 +1,49 @@ +--- +toc_priority: 52 +toc_title: System Tables +--- + +# System Tables {#system-tables} + +## Introduction {#system-tables-introduction} + +System tables provide information about: + +- Server states, processes, and environment. +- Server’s internal processes. + +System tables: + +- Located in the `system` database. +- Available only for reading data. +- Can’t be dropped or altered, but can be detached. + +Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. + +Unlike other system tables, the system tables [metric\_log](metric_log.md#system_tables-metric_log), [query\_log](query_log.md#system_tables-query_log), [query\_thread\_log](query_thread_log.md#system_tables-query_thread_log), [trace\_log](trace_log.md#system_tables-trace_log) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. + +By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables. + +## Sources of System Metrics {#system-tables-sources-of-system-metrics} + +For collecting system metrics ClickHouse server uses: + +- `CAP_NET_ADMIN` capability. +- [procfs](https://en.wikipedia.org/wiki/Procfs) (only in Linux). + +**procfs** + +If ClickHouse server doesn’t have `CAP_NET_ADMIN` capability, it tries to fall back to `ProcfsMetricsProvider`. `ProcfsMetricsProvider` allows collecting per-query system metrics (for CPU and I/O). + +If procfs is supported and enabled on the system, ClickHouse server collects these metrics: + +- `OSCPUVirtualTimeMicroseconds` +- `OSCPUWaitMicroseconds` +- `OSIOWaitMicroseconds` +- `OSReadChars` +- `OSWriteChars` +- `OSReadBytes` +- `OSWriteBytes` + + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/) diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md new file mode 100644 index 00000000000..e8452fe6dae --- /dev/null +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -0,0 +1,11 @@ +# system.merge\_tree\_settings {#system-merge_tree_settings} + +Contains information about settings for `MergeTree` tables. + +Columns: + +- `name` (String) — Setting name. +- `value` (String) — Setting value. +- `description` (String) — Setting description. +- `type` (String) — Setting type (implementation specific string value). +- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md new file mode 100644 index 00000000000..2ff70cceb44 --- /dev/null +++ b/docs/en/operations/system-tables/merges.md @@ -0,0 +1,19 @@ +# system.merges {#system-merges} + +Contains information about merges and part mutations currently in process for tables in the MergeTree family. + +Columns: + +- `database` (String) — The name of the database the table is in. +- `table` (String) — Table name. +- `elapsed` (Float64) — The time elapsed (in seconds) since the merge started. +- `progress` (Float64) — The percentage of completed work from 0 to 1. +- `num_parts` (UInt64) — The number of pieces to be merged. +- `result_part_name` (String) — The name of the part that will be formed as the result of merging. +- `is_mutation` (UInt8) - 1 if this process is a part mutation. +- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. +- `total_size_marks` (UInt64) — The total number of marks in the merged parts. +- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. +- `rows_read` (UInt64) — Number of rows read. +- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. +- `rows_written` (UInt64) — Number of rows written. diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md new file mode 100644 index 00000000000..028b5fe5065 --- /dev/null +++ b/docs/en/operations/system-tables/metric_log.md @@ -0,0 +1,55 @@ +# system.metric\_log {#system_tables-metric_log} + +Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. +To turn on metrics history collection on `system.metric_log`, create `/etc/clickhouse-server/config.d/metric_log.xml` with following content: + +``` xml + + + system + metric_log
+ 7500 + 1000 +
+
+``` + +**Example** + +``` sql +SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +event_date: 2020-02-18 +event_time: 2020-02-18 07:15:33 +milliseconds: 554 +ProfileEvent_Query: 0 +ProfileEvent_SelectQuery: 0 +ProfileEvent_InsertQuery: 0 +ProfileEvent_FileOpen: 0 +ProfileEvent_Seek: 0 +ProfileEvent_ReadBufferFromFileDescriptorRead: 1 +ProfileEvent_ReadBufferFromFileDescriptorReadFailed: 0 +ProfileEvent_ReadBufferFromFileDescriptorReadBytes: 0 +ProfileEvent_WriteBufferFromFileDescriptorWrite: 1 +ProfileEvent_WriteBufferFromFileDescriptorWriteFailed: 0 +ProfileEvent_WriteBufferFromFileDescriptorWriteBytes: 56 +... +CurrentMetric_Query: 0 +CurrentMetric_Merge: 0 +CurrentMetric_PartMutation: 0 +CurrentMetric_ReplicatedFetch: 0 +CurrentMetric_ReplicatedSend: 0 +CurrentMetric_ReplicatedChecks: 0 +... +``` + +**See also** + +- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [system.events](events.md#system_tables-events) — Contains a number of events that occurred. +- [system.metrics](metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md new file mode 100644 index 00000000000..2dc54167fb1 --- /dev/null +++ b/docs/en/operations/system-tables/metrics.md @@ -0,0 +1,39 @@ +# system.metrics {#system_tables-metrics} + +Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date. + +Columns: + +- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. +- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. + +The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. + +**Example** + +``` sql +SELECT * FROM system.metrics LIMIT 10 +``` + +``` text +┌─metric─────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ Query │ 1 │ Number of executing queries │ +│ Merge │ 0 │ Number of executing background merges │ +│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE) │ +│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas │ +│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas │ +│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency │ +│ BackgroundPoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping) │ +│ BackgroundSchedulePoolTask │ 0 │ Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc. │ +│ DiskSpaceReservedForMerge │ 0 │ Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts. │ +│ DistributedSend │ 0 │ Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode. │ +└────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [system.asynchronous\_metrics](asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [system.events](events.md#system_tables-events) — Contains a number of events that occurred. +- [system.metric\_log](metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md new file mode 100644 index 00000000000..b9375a78354 --- /dev/null +++ b/docs/en/operations/system-tables/mutations.md @@ -0,0 +1,25 @@ +# system.mutations {#system_tables-mutations} + +The table contains information about [mutations](../../sql-reference/statements/alter.md#alter-mutations) of MergeTree tables and their progress. Each mutation command is represented by a single row. The table has the following columns: + +**database**, **table** - The name of the database and table to which the mutation was applied. + +**mutation\_id** - The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ZooKeeper. For unreplicated tables the IDs correspond to file names in the data directory of the table. + +**command** - The mutation command string (the part of the query after `ALTER TABLE [db.]table`). + +**create\_time** - When this mutation command was submitted for execution. + +**block\_numbers.partition\_id**, **block\_numbers.number** - A nested column. For mutations of replicated tables, it contains one record for each partition: the partition ID and the block number that was acquired by the mutation (in each partition, only parts that contain blocks with numbers less than the block number acquired by the mutation in that partition will be mutated). In non-replicated tables, block numbers in all partitions form a single sequence. This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation. + +**parts\_to\_do** - The number of data parts that need to be mutated for the mutation to finish. + +**is\_done** - Is the mutation done? Note that even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not done yet because of a long-running INSERT that will create a new data part that will need to be mutated. + +If there were problems with mutating some parts, the following columns contain additional information: + +**latest\_failed\_part** - The name of the most recent part that could not be mutated. + +**latest\_fail\_time** - The time of the most recent part mutation failure. + +**latest\_fail\_reason** - The exception message that caused the most recent part mutation failure. diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md new file mode 100644 index 00000000000..de517447913 --- /dev/null +++ b/docs/en/operations/system-tables/numbers.md @@ -0,0 +1,7 @@ +# system.numbers {#system-numbers} + +This table contains a single UInt64 column named `number` that contains almost all the natural numbers starting from zero. + +You can use this table for tests, or if you need to do a brute force search. + +Reads from this table are not parallelized. diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md new file mode 100644 index 00000000000..c23bc810cfc --- /dev/null +++ b/docs/en/operations/system-tables/numbers_mt.md @@ -0,0 +1,5 @@ +# system.numbers\_mt {#system-numbers-mt} + +The same as [system.numbers](numbers.md) but reads are parallelized. The numbers can be returned in any order. + +Used for tests. diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md new file mode 100644 index 00000000000..2bf0a20696d --- /dev/null +++ b/docs/en/operations/system-tables/one.md @@ -0,0 +1,7 @@ +# system.one {#system-one} + +This table contains a single row with a single `dummy` UInt8 column containing the value 0. + +This table is used if a `SELECT` query doesn’t specify the `FROM` clause. + +This is similar to the `DUAL` table found in other DBMSs. diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md new file mode 100644 index 00000000000..2d457486f06 --- /dev/null +++ b/docs/en/operations/system-tables/part_log.md @@ -0,0 +1,32 @@ +# system.part\_log {#system_tables-part-log} + +The `system.part_log` table is created only if the [part\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-part-log) server setting is specified. + +This table contains information about events that occurred with [data parts](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family tables, such as adding or merging data. + +The `system.part_log` table contains the following columns: + +- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values: + - `NEW_PART` — Inserting of a new data part. + - `MERGE_PARTS` — Merging of data parts. + - `DOWNLOAD_PART` — Downloading a data part. + - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter.md#alter_detach-partition). + - `MUTATE_PART` — Mutating of a data part. + - `MOVE_PART` — Moving the data part from the one disk to another one. +- `event_date` (Date) — Event date. +- `event_time` (DateTime) — Event time. +- `duration_ms` (UInt64) — Duration. +- `database` (String) — Name of the database the data part is in. +- `table` (String) — Name of the table the data part is in. +- `part_name` (String) — Name of the data part. +- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`. +- `rows` (UInt64) — The number of rows in the data part. +- `size_in_bytes` (UInt64) — Size of the data part in bytes. +- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge). +- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes. +- `read_rows` (UInt64) — The number of rows was read during the merge. +- `read_bytes` (UInt64) — The number of bytes was read during the merge. +- `error` (UInt16) — The code number of the occurred error. +- `exception` (String) — Text message of the occurred error. + +The `system.part_log` table is created after the first inserting data to the `MergeTree` table. diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md new file mode 100644 index 00000000000..be14090654c --- /dev/null +++ b/docs/en/operations/system-tables/parts.md @@ -0,0 +1,80 @@ +# system.parts {#system_tables-parts} + +Contains information about parts of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. + +Each row describes one data part. + +Columns: + +- `partition` (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter.md#query_language_queries_alter) query. + + Formats: + + - `YYYYMM` for automatic partitioning by month. + - `any_string` when partitioning manually. + +- `name` (`String`) – Name of the data part. + +- `active` (`UInt8`) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. + +- `marks` (`UInt64`) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint doesn’t work for adaptive granularity). + +- `rows` (`UInt64`) – The number of rows. + +- `bytes_on_disk` (`UInt64`) – Total size of all the data part files in bytes. + +- `data_compressed_bytes` (`UInt64`) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. + +- `data_uncompressed_bytes` (`UInt64`) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. + +- `marks_bytes` (`UInt64`) – The size of the file with marks. + +- `modification_time` (`DateTime`) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation.\| + +- `remove_time` (`DateTime`) – The time when the data part became inactive. + +- `refcount` (`UInt32`) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. + +- `min_date` (`Date`) – The minimum value of the date key in the data part. + +- `max_date` (`Date`) – The maximum value of the date key in the data part. + +- `min_time` (`DateTime`) – The minimum value of the date and time key in the data part. + +- `max_time`(`DateTime`) – The maximum value of the date and time key in the data part. + +- `partition_id` (`String`) – ID of the partition. + +- `min_block_number` (`UInt64`) – The minimum number of data parts that make up the current part after merging. + +- `max_block_number` (`UInt64`) – The maximum number of data parts that make up the current part after merging. + +- `level` (`UInt32`) – Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. + +- `data_version` (`UInt64`) – Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). + +- `primary_key_bytes_in_memory` (`UInt64`) – The amount of memory (in bytes) used by primary key values. + +- `primary_key_bytes_in_memory_allocated` (`UInt64`) – The amount of memory (in bytes) reserved for primary key values. + +- `is_frozen` (`UInt8`) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesn’t exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter.md#alter_freeze-partition) + +- `database` (`String`) – Name of the database. + +- `table` (`String`) – Name of the table. + +- `engine` (`String`) – Name of the table engine without parameters. + +- `path` (`String`) – Absolute path to the folder with data part files. + +- `disk` (`String`) – Name of a disk that stores the data part. + +- `hash_of_all_files` (`String`) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of compressed files. + +- `hash_of_uncompressed_files` (`String`) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.). + +- `uncompressed_hash_of_compressed_files` (`String`) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of data in the compressed files as if they were uncompressed. + +- `bytes` (`UInt64`) – Alias for `bytes_on_disk`. + +- `marks_size` (`UInt64`) – Alias for `marks_bytes`. diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md new file mode 100644 index 00000000000..16681784c46 --- /dev/null +++ b/docs/en/operations/system-tables/processes.md @@ -0,0 +1,15 @@ +# system.processes {#system_tables-processes} + +This system table is used for implementing the `SHOW PROCESSLIST` query. + +Columns: + +- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated. +- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server. +- `elapsed` (Float64) – The time in seconds since request execution started. +- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. +- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting. +- `query` (String) – The query text. For `INSERT`, it doesn’t include the data to insert. +- `query_id` (String) – Query ID, if defined. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md new file mode 100644 index 00000000000..9ee34b0e516 --- /dev/null +++ b/docs/en/operations/system-tables/query_log.md @@ -0,0 +1,138 @@ +# system.query\_log {#system_tables-query_log} + +Contains information about executed queries, for example, start time, duration of processing, error messages. + +!!! note "Note" + This table doesn’t contain the ingested data for `INSERT` queries. + +You can change settings of queries logging in the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration. + +You can disable queries logging by setting [log\_queries = 0](../../operations/settings/settings.md#settings-log-queries). We don’t recommend to turn off logging because information in this table is important for solving issues. + +The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. + +ClickHouse doesn’t delete data from the table automatically. See [Introduction](index.md#system-tables-introduction) for more details. + +The `system.query_log` table registers two kinds of queries: + +1. Initial queries that were run directly by the client. +2. Child queries that were initiated by other queries (for distributed query execution). For these types of queries, information about the parent queries is shown in the `initial_*` columns. + +Each query creates one or two rows in the `query_log` table, depending on the status (see the `type` column) of the query: + +1. If the query execution was successful, two rows with the `QueryStart` and `QueryFinish` types are created . +2. If an error occurred during query processing, two events with the `QueryStart` and `ExceptionWhileProcessing` types are created . +3. If an error occurred before launching the query, a single event with the `ExceptionBeforeStart` type is created. + +Columns: + +- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: + - `'QueryStart' = 1` — Successful start of query execution. + - `'QueryFinish' = 2` — Successful end of query execution. + - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. + - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Query starting date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesn’t affect this value. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesn’t affect this value. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. +- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. +- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. +- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. +- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Query type. Possible values: + - 1 — Query was initiated by the client. + - 0 — Query was initiated by another query as part of distributed query execution. +- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query. +- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query. +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values: + - 1 — TCP. + - 2 — HTTP. +- `os_user` ([String](../../sql-reference/data-types/string.md)) — Operating system username who runs [clickhouse-client](../../interfaces/cli.md). +- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. +- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. +- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: + - 0 — The query was launched from the TCP interface. + - 1 — `GET` method was used. + - 2 — `POST` method was used. +- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. +- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `thread_numbers` ([Array(UInt32)](../../sql-reference/data-types/array.md)) — Number of threads that are participating in query execution. +- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](events.md#system_tables-events) +- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column. +- `Settings.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. +- `Settings.Values` ([Array(String)](../../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column. + +**Example** + +``` sql +SELECT * FROM system.query_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +type: QueryStart +event_date: 2020-05-13 +event_time: 2020-05-13 14:02:28 +query_start_time: 2020-05-13 14:02:28 +query_duration_ms: 0 +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +memory_usage: 0 +query: SELECT 1 +exception_code: 0 +exception: +stack_trace: +is_initial_query: 1 +user: default +query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +address: ::ffff:127.0.0.1 +port: 57720 +initial_user: default +initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +initial_address: ::ffff:127.0.0.1 +initial_port: 57720 +interface: 1 +os_user: bayonet +client_hostname: clickhouse.ru-central1.internal +client_name: ClickHouse client +client_revision: 54434 +client_version_major: 20 +client_version_minor: 4 +client_version_patch: 1 +http_method: 0 +http_user_agent: +quota_key: +revision: 54434 +thread_ids: [] +ProfileEvents.Names: [] +ProfileEvents.Values: [] +Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage'] +Settings.Values: ['0','random','1','10000000000'] +``` + +**See Also** + +- [system.query\_thread\_log](query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md new file mode 100644 index 00000000000..370257639b3 --- /dev/null +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -0,0 +1,113 @@ +# system.query\_thread\_log {#system_tables-query_thread_log} + +Contains information about threads which execute queries, for example, thread name, thread start time, duration of query processing. + +To start logging: + +1. Configure parameters in the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section. +2. Set [log\_query\_threads](../../operations/settings/settings.md#settings-log-query-threads) to 1. + +The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. + +ClickHouse doesn’t delete data from the table automatically. See [Introduction](index.md#system-tables-introduction) for more details. + +Columns: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. +- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread. +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID. +- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID. +- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: + - 1 — Query was initiated by the client. + - 0 — Query was initiated by another query for distributed query execution. +- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. +- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: + - 1 — TCP. + - 2 — HTTP. +- `os_user` ([String](../../sql-reference/data-types/string.md)) — OS’s username who runs [clickhouse-client](../../interfaces/cli.md). +- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. +- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. +- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values: + - 0 — The query was launched from the TCP interface. + - 1 — `GET` method was used. + - 2 — `POST` method was used. +- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. +- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `ProfileEvents.Names` ([Array(String)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events). +- `ProfileEvents.Values` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column. + +**Example** + +``` sql + SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical +``` + +``` text +Row 1: +────── +event_date: 2020-05-13 +event_time: 2020-05-13 14:02:28 +query_start_time: 2020-05-13 14:02:28 +query_duration_ms: 0 +read_rows: 1 +read_bytes: 1 +written_rows: 0 +written_bytes: 0 +memory_usage: 0 +peak_memory_usage: 0 +thread_name: QueryPipelineEx +thread_id: 28952 +master_thread_id: 28924 +query: SELECT 1 +is_initial_query: 1 +user: default +query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +address: ::ffff:127.0.0.1 +port: 57720 +initial_user: default +initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +initial_address: ::ffff:127.0.0.1 +initial_port: 57720 +interface: 1 +os_user: bayonet +client_hostname: clickhouse.ru-central1.internal +client_name: ClickHouse client +client_revision: 54434 +client_version_major: 20 +client_version_minor: 4 +client_version_patch: 1 +http_method: 0 +http_user_agent: +quota_key: +revision: 54434 +ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds'] +ProfileEvents.Values: [1,97,81,5,81] +... +``` + +**See Also** + +- [system.query\_log](query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md new file mode 100644 index 00000000000..a3719375a5c --- /dev/null +++ b/docs/en/operations/system-tables/replicas.md @@ -0,0 +1,121 @@ +# system.replicas {#system_tables-replicas} + +Contains information and status for replicated tables residing on the local server. +This table can be used for monitoring. The table contains a row for every Replicated\* table. + +Example: + +``` sql +SELECT * +FROM system.replicas +WHERE table = 'visits' +FORMAT Vertical +``` + +``` text +Row 1: +────── +database: merge +table: visits +engine: ReplicatedCollapsingMergeTree +is_leader: 1 +can_become_leader: 1 +is_readonly: 0 +is_session_expired: 0 +future_parts: 1 +parts_to_check: 0 +zookeeper_path: /clickhouse/tables/01-06/visits +replica_name: example01-06-1.yandex.ru +replica_path: /clickhouse/tables/01-06/visits/replicas/example01-06-1.yandex.ru +columns_version: 9 +queue_size: 1 +inserts_in_queue: 0 +merges_in_queue: 1 +part_mutations_in_queue: 0 +queue_oldest_time: 2020-02-20 08:34:30 +inserts_oldest_time: 0000-00-00 00:00:00 +merges_oldest_time: 2020-02-20 08:34:30 +part_mutations_oldest_time: 0000-00-00 00:00:00 +oldest_part_to_get: +oldest_part_to_merge_to: 20200220_20284_20840_7 +oldest_part_to_mutate_to: +log_max_index: 596273 +log_pointer: 596274 +last_queue_update: 2020-02-20 08:34:32 +absolute_delay: 0 +total_replicas: 2 +active_replicas: 2 +``` + +Columns: + +- `database` (`String`) - Database name +- `table` (`String`) - Table name +- `engine` (`String`) - Table engine name +- `is_leader` (`UInt8`) - Whether the replica is the leader. + Only one replica at a time can be the leader. The leader is responsible for selecting background merges to perform. + Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader. +- `can_become_leader` (`UInt8`) - Whether the replica can be elected as a leader. +- `is_readonly` (`UInt8`) - Whether the replica is in read-only mode. + This mode is turned on if the config doesn’t have sections with ZooKeeper, if an unknown error occurred when reinitializing sessions in ZooKeeper, and during session reinitialization in ZooKeeper. +- `is_session_expired` (`UInt8`) - the session with ZooKeeper has expired. Basically the same as `is_readonly`. +- `future_parts` (`UInt32`) - The number of data parts that will appear as the result of INSERTs or merges that haven’t been done yet. +- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged. +- `zookeeper_path` (`String`) - Path to table data in ZooKeeper. +- `replica_name` (`String`) - Replica name in ZooKeeper. Different replicas of the same table have different names. +- `replica_path` (`String`) - Path to replica data in ZooKeeper. The same as concatenating ‘zookeeper\_path/replicas/replica\_path’. +- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven’t made all of the ALTERs yet. +- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`. +- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. +- `merges_in_queue` (`UInt32`) - The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time. +- `part_mutations_in_queue` (`UInt32`) - The number of mutations waiting to be made. +- `queue_oldest_time` (`DateTime`) - If `queue_size` greater than 0, shows when the oldest operation was added to the queue. +- `inserts_oldest_time` (`DateTime`) - See `queue_oldest_time` +- `merges_oldest_time` (`DateTime`) - See `queue_oldest_time` +- `part_mutations_oldest_time` (`DateTime`) - See `queue_oldest_time` + +The next 4 columns have a non-zero value only where there is an active session with ZK. + +- `log_max_index` (`UInt64`) - Maximum entry number in the log of general activity. +- `log_pointer` (`UInt64`) - Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. If `log_pointer` is much smaller than `log_max_index`, something is wrong. +- `last_queue_update` (`DateTime`) - When the queue was updated last time. +- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has. +- `total_replicas` (`UInt8`) - The total number of known replicas of this table. +- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas). + +If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row. +If you don’t request the last 4 columns (log\_max\_index, log\_pointer, total\_replicas, active\_replicas), the table works quickly. + +For example, you can check that everything is working correctly like this: + +``` sql +SELECT + database, + table, + is_leader, + is_readonly, + is_session_expired, + future_parts, + parts_to_check, + columns_version, + queue_size, + inserts_in_queue, + merges_in_queue, + log_max_index, + log_pointer, + total_replicas, + active_replicas +FROM system.replicas +WHERE + is_readonly + OR is_session_expired + OR future_parts > 20 + OR parts_to_check > 10 + OR queue_size > 20 + OR inserts_in_queue > 10 + OR log_max_index - log_pointer > 10 + OR total_replicas < 2 + OR active_replicas < total_replicas +``` + +If this query doesn’t return anything, it means that everything is fine. diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md new file mode 100644 index 00000000000..87079148c47 --- /dev/null +++ b/docs/en/operations/system-tables/settings.md @@ -0,0 +1,50 @@ +# system.settings {#system-tables-system-settings} + +Contains information about session settings for current user. + +Columns: + +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. +- `description` ([String](../../sql-reference/data-types/string.md)) — Short setting description. +- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: + - `0` — Current user can change the setting. + - `1` — Current user can’t change the setting. + +**Example** + +The following example shows how to get information about settings which name contains `min_i`. + +``` sql +SELECT * +FROM system.settings +WHERE name LIKE '%min_i%' +``` + +``` text +┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐ +│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ +│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ +│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ +└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘ +``` + +Using of `WHERE changed` can be useful, for example, when you want to check: + +- Whether settings in configuration files are loaded correctly and are in use. +- Settings that changed in the current session. + + + +``` sql +SELECT * FROM system.settings WHERE changed AND name='load_balancing' +``` + +**See also** + +- [Settings](../../operations/settings/index.md#session-settings-intro) +- [Permissions for Queries](../../operations/settings/permissions-for-queries.md#settings_readonly) +- [Constraints on Settings](../../operations/settings/constraints-on-settings.md) diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md new file mode 100644 index 00000000000..4376577f798 --- /dev/null +++ b/docs/en/operations/system-tables/storage_policies.md @@ -0,0 +1,14 @@ +# system.storage\_policies {#system_tables-storage_policies} + +Contains information about storage policies and volumes defined in the [server configuration](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). + +Columns: + +- `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy. +- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. +- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration. +- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. +- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). +- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. + +If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md new file mode 100644 index 00000000000..453010d01a6 --- /dev/null +++ b/docs/en/operations/system-tables/table_engines.md @@ -0,0 +1,35 @@ +# system.table\_engines {#system-table-engines} + +Contains description of table engines supported by server and their feature support information. + +This table contains the following columns (the column type is shown in brackets): + +- `name` (String) — The name of table engine. +- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` clause. +- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [skipping indices](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). +- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). +- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. +- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md). +- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. + +Example: + +``` sql +SELECT * +FROM system.table_engines +WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') +``` + +``` text +┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┐ +│ Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ +│ MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ +│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ +└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┘ +``` + +**See also** + +- MergeTree family [query clauses](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) +- Kafka [settings](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) +- Join [settings](../../engines/table-engines/special/join.md#join-limitations-and-settings) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md new file mode 100644 index 00000000000..d92db720f12 --- /dev/null +++ b/docs/en/operations/system-tables/tables.md @@ -0,0 +1,49 @@ +# system.tables {#system-tables} + +Contains metadata of each table that the server knows about. Detached tables are not shown in `system.tables`. + +This table contains the following columns (the column type is shown in brackets): + +- `database` (String) — The name of the database the table is in. + +- `name` (String) — Table name. + +- `engine` (String) — Table engine name (without parameters). + +- `is_temporary` (UInt8) - Flag that indicates whether the table is temporary. + +- `data_path` (String) - Path to the table data in the file system. + +- `metadata_path` (String) - Path to the table metadata in the file system. + +- `metadata_modification_time` (DateTime) - Time of latest modification of the table metadata. + +- `dependencies_database` (Array(String)) - Database dependencies. + +- `dependencies_table` (Array(String)) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table). + +- `create_table_query` (String) - The query that was used to create the table. + +- `engine_full` (String) - Parameters of the table engine. + +- `partition_key` (String) - The partition key expression specified in the table. + +- `sorting_key` (String) - The sorting key expression specified in the table. + +- `primary_key` (String) - The primary key expression specified in the table. + +- `sampling_key` (String) - The sampling key expression specified in the table. + +- `storage_policy` (String) - The storage policy: + + - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) + - [Distributed](../../engines/table-engines/special/distributed.md#distributed) + +- `total_rows` (Nullable(UInt64)) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `Null` (including underying `Buffer` table). + +- `total_bytes` (Nullable(UInt64)) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `Null` (**does not** includes any underlying storage). + + - If the table stores data on disk, returns used space on disk (i.e. compressed). + - If the table stores data in memory, returns approximated number of used bytes in memory. + +The `system.tables` table is used in `SHOW TABLES` query implementation. diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md new file mode 100644 index 00000000000..aeb1d6e91d2 --- /dev/null +++ b/docs/en/operations/system-tables/text_log.md @@ -0,0 +1,26 @@ +# system.text\_log {#system-tables-text-log} + +Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting. + +Columns: + +- `event_date` (Date) — Date of the entry. +- `event_time` (DateTime) — Time of the entry. +- `microseconds` (UInt32) — Microseconds of the entry. +- `thread_name` (String) — Name of the thread from which the logging was done. +- `thread_id` (UInt64) — OS thread ID. +- `level` (`Enum8`) — Entry level. Possible values: + - `1` or `'Fatal'`. + - `2` or `'Critical'`. + - `3` or `'Error'`. + - `4` or `'Warning'`. + - `5` or `'Notice'`. + - `6` or `'Information'`. + - `7` or `'Debug'`. + - `8` or `'Trace'`. +- `query_id` (String) — ID of the query. +- `logger_name` (LowCardinality(String)) — Name of the logger (i.e. `DDLWorker`). +- `message` (String) — The message itself. +- `revision` (UInt32) — ClickHouse revision. +- `source_file` (LowCardinality(String)) — Source file from which the logging was done. +- `source_line` (UInt64) — Source line from which the logging was done. diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md new file mode 100644 index 00000000000..4814c3132be --- /dev/null +++ b/docs/en/operations/system-tables/trace_log.md @@ -0,0 +1,48 @@ +# system.trace\_log {#system_tables-trace_log} + +Contains stack traces collected by the sampling query profiler. + +ClickHouse creates this table when the [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. + +To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions. + +Columns: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of sampling moment. + +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. + +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. + +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. + + When connecting to server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. + +- `timer_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Timer type: + + - `Real` represents wall-clock time. + - `CPU` represents CPU time. + +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier. + +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) system table. + +- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. + +**Example** + +``` sql +SELECT * FROM system.trace_log LIMIT 1 \G +``` + +``` text +Row 1: +────── +event_date: 2019-11-15 +event_time: 2019-11-15 15:09:38 +revision: 54428 +timer_type: Real +thread_number: 48 +query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 +trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] +``` diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md new file mode 100644 index 00000000000..4146d22d1dd --- /dev/null +++ b/docs/en/operations/system-tables/zookeeper.md @@ -0,0 +1,70 @@ +# system.zookeeper {#system-zookeeper} + +The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config. +The query must have a ‘path’ equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for. + +The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. +To output data for all root nodes, write path = ‘/’. +If the path specified in ‘path’ doesn’t exist, an exception will be thrown. + +Columns: + +- `name` (String) — The name of the node. +- `path` (String) — The path to the node. +- `value` (String) — Node value. +- `dataLength` (Int32) — Size of the value. +- `numChildren` (Int32) — Number of descendants. +- `czxid` (Int64) — ID of the transaction that created the node. +- `mzxid` (Int64) — ID of the transaction that last changed the node. +- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. +- `ctime` (DateTime) — Time of node creation. +- `mtime` (DateTime) — Time of the last modification of the node. +- `version` (Int32) — Node version: the number of times the node was changed. +- `cversion` (Int32) — Number of added or removed descendants. +- `aversion` (Int32) — Number of changes to the ACL. +- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. + +Example: + +``` sql +SELECT * +FROM system.zookeeper +WHERE path = '/clickhouse/tables/01-08/visits/replicas' +FORMAT Vertical +``` + +``` text +Row 1: +────── +name: example01-08-1.yandex.ru +value: +czxid: 932998691229 +mzxid: 932998691229 +ctime: 2015-03-27 16:49:51 +mtime: 2015-03-27 16:49:51 +version: 0 +cversion: 47 +aversion: 0 +ephemeralOwner: 0 +dataLength: 0 +numChildren: 7 +pzxid: 987021031383 +path: /clickhouse/tables/01-08/visits/replicas + +Row 2: +────── +name: example01-08-2.yandex.ru +value: +czxid: 933002738135 +mzxid: 933002738135 +ctime: 2015-03-27 16:57:01 +mtime: 2015-03-27 16:57:01 +version: 0 +cversion: 37 +aversion: 0 +ephemeralOwner: 0 +dataLength: 0 +numChildren: 7 +pzxid: 987021252247 +path: /clickhouse/tables/01-08/visits/replicas +``` diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index 1a33aa3f689..6be27f8cb9f 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -103,7 +103,7 @@ Check: - Endpoint settings. - Check [listen\_host](server-configuration-parameters/settings.md#server_configuration_parameters-listen_host) and [tcp\_port](server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) settings. + Check [listen\_host](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-listen_host) and [tcp\_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) settings. ClickHouse server accepts localhost connections only by default. @@ -115,8 +115,8 @@ Check: Check: - - The [tcp\_port\_secure](server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting. - - Settings for [SSL certificates](server-configuration-parameters/settings.md#server_configuration_parameters-openssl). + - The [tcp\_port\_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting. + - Settings for [SSL certificates](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`. diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md index 1131703765b..edacf1ff973 100644 --- a/docs/en/operations/update.md +++ b/docs/en/operations/update.md @@ -5,7 +5,7 @@ toc_title: ClickHouse Update # ClickHouse Update {#clickhouse-update} -If ClickHouse was installed from deb packages, execute the following commands on the server: +If ClickHouse was installed from `deb` packages, execute the following commands on the server: ``` bash $ sudo apt-get update @@ -13,6 +13,6 @@ $ sudo apt-get install clickhouse-client clickhouse-server $ sudo service clickhouse-server restart ``` -If you installed ClickHouse using something other than the recommended deb packages, use the appropriate update method. +If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method. ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time. diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index eb5fe33f1fe..fe5048f7044 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -6,8 +6,8 @@ toc_title: Overview # ClickHouse Utility {#clickhouse-utility} -- [clickhouse-local](clickhouse-local.md) — Allows running SQL queries on data without stopping the ClickHouse server, similar to how `awk` does this. -- [clickhouse-copier](clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. -- [clickhouse-benchmark](clickhouse-benchmark.md) — Loads server with the custom queries and settings. +- [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without stopping the ClickHouse server, similar to how `awk` does this. +- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. +- [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings. [Original article](https://clickhouse.tech/docs/en/operations/utils/) diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 8573d55a33c..6d70637236b 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -27,7 +27,7 @@ Example 2: `uniqArray(arr)` – Counts the number of unique elements in all ‘a ## -State {#agg-functions-combinator-state} -If you apply this combinator, the aggregate function doesn’t return the resulting value (such as the number of unique values for the [uniq](reference.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later. +If you apply this combinator, the aggregate function doesn’t return the resulting value (such as the number of unique values for the [uniq](../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later. To work with these states, use: @@ -53,11 +53,11 @@ Converts an aggregate function for tables into an aggregate function for arrays Changes behavior of an aggregate function. -If an aggregate function doesn't have input values, with this combinator it returns the default value for its return data type. Applies to the aggregate functions that can take empty input data. +If an aggregate function doesn’t have input values, with this combinator it returns the default value for its return data type. Applies to the aggregate functions that can take empty input data. `-OrDefault` can be used with other combinators. -**Syntax** +**Syntax** ``` sql OrDefault(x) @@ -65,10 +65,10 @@ If an aggregate function doesn't have input values, with this combinator it retu **Parameters** -- `x` — Aggregate function parameters. +- `x` — Aggregate function parameters. + +**Returned values** -**Returned values** - Returns the default value of an aggregate function’s return type if there is nothing to aggregate. Type depends on the aggregate function used. @@ -109,16 +109,15 @@ Result: └───────────────────────────────────┘ ``` - ## -OrNull {#agg-functions-combinator-ornull} Changes behavior of an aggregate function. -This combinator converts a result of an aggregate function to the [Nullable](../data-types/nullable.md) data type. If the aggregate function does not have values to calculate it returns [NULL](../syntax.md#null-literal). +This combinator converts a result of an aggregate function to the [Nullable](../../sql-reference/data-types/nullable.md) data type. If the aggregate function does not have values to calculate it returns [NULL](../../sql-reference/syntax.md#null-literal). `-OrNull` can be used with other combinators. -**Syntax** +**Syntax** ``` sql OrNull(x) @@ -126,12 +125,12 @@ This combinator converts a result of an aggregate function to the [Nullable](../ **Parameters** -- `x` — Aggregate function parameters. - -**Returned values** +- `x` — Aggregate function parameters. -- The result of the aggregate function, converted to the `Nullable` data type. -- `NULL`, if there is nothing to aggregate. +**Returned values** + +- The result of the aggregate function, converted to the `Nullable` data type. +- `NULL`, if there is nothing to aggregate. Type: `Nullable(aggregate function return type)`. @@ -210,7 +209,7 @@ Consider the `people` table with the following data: Let’s get the names of the people whose age lies in the intervals of `[30,60)` and `[60,75)`. Since we use integer representation for age, we get ages in the `[30, 59]` and `[60,74]` intervals. -To aggregate names in an array, we use the [groupArray](reference.md#agg_function-grouparray) aggregate function. It takes one argument. In our case, it’s the `name` column. The `groupArrayResample` function should use the `age` column to aggregate names by age. To define the required intervals, we pass the `30, 75, 30` arguments into the `groupArrayResample` function. +To aggregate names in an array, we use the [groupArray](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray) aggregate function. It takes one argument. In our case, it’s the `name` column. The `groupArrayResample` function should use the `age` column to aggregate names by age. To define the required intervals, we pass the `30, 75, 30` arguments into the `groupArrayResample` function. ``` sql SELECT groupArrayResample(30, 75, 30)(name, age) FROM people diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index aa1045b123c..a75cbc6cac5 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -10,8 +10,8 @@ Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggrega ClickHouse also supports: -- [Parametric aggregate functions](parametric-functions.md#aggregate_functions_parametric), which accept other parameters in addition to columns. -- [Combinators](combinators.md#aggregate_functions_combinators), which change the behavior of aggregate functions. +- [Parametric aggregate functions](../../sql-reference/aggregate-functions/parametric-functions.md#aggregate_functions_parametric), which accept other parameters in addition to columns. +- [Combinators](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators), which change the behavior of aggregate functions. ## NULL Processing {#null-processing} diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 3dec141d736..fb9d390d2e7 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -20,7 +20,7 @@ The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.or **Parameters** `number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins. -`values` — [Expression](../syntax.md#syntax-expressions) resulting in input values. +`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values. **Returned values** @@ -316,7 +316,7 @@ Result: The function takes as arguments a set of conditions from 1 to 32 arguments of type `UInt8` that indicate whether a certain condition was met for the event. Any condition can be specified as an argument (as in [WHERE](../../sql-reference/statements/select/where.md#select-where)). -The conditions, except the first, apply in pairs: the result of the second will be true if the first and second are true, of the third if the first and fird are true, etc. +The conditions, except the first, apply in pairs: the result of the second will be true if the first and second are true, of the third if the first and third are true, etc. **Syntax** @@ -494,4 +494,4 @@ Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >= ## sumMapFiltered(keys\_to\_keep)(keys, values) {#summapfilteredkeys-to-keepkeys-values} -Same behavior as [sumMap](reference.md#agg_functions-summap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys. +Same behavior as [sumMap](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys. diff --git a/docs/en/sql-reference/aggregate-functions/reference.md b/docs/en/sql-reference/aggregate-functions/reference.md deleted file mode 100644 index 4c505a46fd1..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference.md +++ /dev/null @@ -1,1975 +0,0 @@ ---- -toc_priority: 36 -toc_title: Reference ---- - -# Aggregate Function Reference {#aggregate-functions-reference} - -## count {#agg_function-count} - -Counts the number of rows or not-NULL values. - -ClickHouse supports the following syntaxes for `count`: -- `count(expr)` or `COUNT(DISTINCT expr)`. -- `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. - -**Parameters** - -The function can take: - -- Zero parameters. -- One [expression](../syntax.md#syntax-expressions). - -**Returned value** - -- If the function is called without parameters it counts the number of rows. -- If the [expression](../syntax.md#syntax-expressions) is passed, then the function counts how many times this expression returned not null. If the expression returns a [Nullable](../../sql-reference/data-types/nullable.md)-type value, then the result of `count` stays not `Nullable`. The function returns 0 if the expression returned `NULL` for all the rows. - -In both cases the type of the returned value is [UInt64](../../sql-reference/data-types/int-uint.md). - -**Details** - -ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count\_distinct\_implementation](../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](#agg_function-uniqexact) function. - -The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. - -**Examples** - -Example 1: - -``` sql -SELECT count() FROM t -``` - -``` text -┌─count()─┐ -│ 5 │ -└─────────┘ -``` - -Example 2: - -``` sql -SELECT name, value FROM system.settings WHERE name = 'count_distinct_implementation' -``` - -``` text -┌─name──────────────────────────┬─value─────┐ -│ count_distinct_implementation │ uniqExact │ -└───────────────────────────────┴───────────┘ -``` - -``` sql -SELECT count(DISTINCT num) FROM t -``` - -``` text -┌─uniqExact(num)─┐ -│ 3 │ -└────────────────┘ -``` - -This example shows that `count(DISTINCT num)` is performed by the `uniqExact` function according to the `count_distinct_implementation` setting value. - -## any(x) {#agg_function-any} - -Selects the first encountered value. -The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. -To get a determinate result, you can use the ‘min’ or ‘max’ function instead of ‘any’. - -In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY. - -When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. - -## anyHeavy(x) {#anyheavyx} - -Selects a frequently occurring value using the [heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) algorithm. If there is a value that occurs more than in half the cases in each of the query’s execution threads, this value is returned. Normally, the result is nondeterministic. - -``` sql -anyHeavy(column) -``` - -**Arguments** - -- `column` – The column name. - -**Example** - -Take the [OnTime](../../getting-started/example-datasets/ontime.md) data set and select any frequently occurring value in the `AirlineID` column. - -``` sql -SELECT anyHeavy(AirlineID) AS res -FROM ontime -``` - -``` text -┌───res─┐ -│ 19690 │ -└───────┘ -``` - -## anyLast(x) {#anylastx} - -Selects the last value encountered. -The result is just as indeterminate as for the `any` function. - -## groupBitAnd {#groupbitand} - -Applies bitwise `AND` for series of numbers. - -``` sql -groupBitAnd(expr) -``` - -**Parameters** - -`expr` – An expression that results in `UInt*` type. - -**Return value** - -Value of the `UInt*` type. - -**Example** - -Test data: - -``` text -binary decimal -00101100 = 44 -00011100 = 28 -00001101 = 13 -01010101 = 85 -``` - -Query: - -``` sql -SELECT groupBitAnd(num) FROM t -``` - -Where `num` is the column with the test data. - -Result: - -``` text -binary decimal -00000100 = 4 -``` - -## groupBitOr {#groupbitor} - -Applies bitwise `OR` for series of numbers. - -``` sql -groupBitOr(expr) -``` - -**Parameters** - -`expr` – An expression that results in `UInt*` type. - -**Return value** - -Value of the `UInt*` type. - -**Example** - -Test data: - -``` text -binary decimal -00101100 = 44 -00011100 = 28 -00001101 = 13 -01010101 = 85 -``` - -Query: - -``` sql -SELECT groupBitOr(num) FROM t -``` - -Where `num` is the column with the test data. - -Result: - -``` text -binary decimal -01111101 = 125 -``` - -## groupBitXor {#groupbitxor} - -Applies bitwise `XOR` for series of numbers. - -``` sql -groupBitXor(expr) -``` - -**Parameters** - -`expr` – An expression that results in `UInt*` type. - -**Return value** - -Value of the `UInt*` type. - -**Example** - -Test data: - -``` text -binary decimal -00101100 = 44 -00011100 = 28 -00001101 = 13 -01010101 = 85 -``` - -Query: - -``` sql -SELECT groupBitXor(num) FROM t -``` - -Where `num` is the column with the test data. - -Result: - -``` text -binary decimal -01101000 = 104 -``` - -## groupBitmap {#groupbitmap} - -Bitmap or Aggregate calculations from a unsigned integer column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../../sql-reference/functions/bitmap-functions.md). - -``` sql -groupBitmap(expr) -``` - -**Parameters** - -`expr` – An expression that results in `UInt*` type. - -**Return value** - -Value of the `UInt64` type. - -**Example** - -Test data: - -``` text -UserID -1 -1 -2 -3 -``` - -Query: - -``` sql -SELECT groupBitmap(UserID) as num FROM t -``` - -Result: - -``` text -num -3 -``` - -## min(x) {#agg_function-min} - -Calculates the minimum. - -## max(x) {#agg_function-max} - -Calculates the maximum. - -## argMin(arg, val) {#agg-function-argmin} - -Calculates the ‘arg’ value for a minimal ‘val’ value. If there are several different values of ‘arg’ for minimal values of ‘val’, the first of these values encountered is output. - -**Example:** - -``` text -┌─user─────┬─salary─┐ -│ director │ 5000 │ -│ manager │ 3000 │ -│ worker │ 1000 │ -└──────────┴────────┘ -``` - -``` sql -SELECT argMin(user, salary) FROM salary -``` - -``` text -┌─argMin(user, salary)─┐ -│ worker │ -└──────────────────────┘ -``` - -## argMax(arg, val) {#agg-function-argmax} - -Calculates the ‘arg’ value for a maximum ‘val’ value. If there are several different values of ‘arg’ for maximum values of ‘val’, the first of these values encountered is output. - -## sum(x) {#agg_function-sum} - -Calculates the sum. -Only works for numbers. - -## sumWithOverflow(x) {#sumwithoverflowx} - -Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, the function returns an error. - -Only works for numbers. - -## sumMap(key, value), sumMap(Tuple(key, value)) {#agg_functions-summap} - -Totals the ‘value’ array according to the keys specified in the ‘key’ array. -Passing tuple of keys and values arrays is synonymical to passing two arrays of keys and values. -The number of elements in ‘key’ and ‘value’ must be the same for each row that is totaled. -Returns a tuple of two arrays: keys in sorted order, and values ​​summed for the corresponding keys. - -Example: - -``` sql -CREATE TABLE sum_map( - date Date, - timeslot DateTime, - statusMap Nested( - status UInt16, - requests UInt64 - ), - statusMapTuple Tuple(Array(Int32), Array(Int32)) -) ENGINE = Log; -INSERT INTO sum_map VALUES - ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10], ([1, 2, 3], [10, 10, 10])), - ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10], ([3, 4, 5], [10, 10, 10])), - ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10], ([4, 5, 6], [10, 10, 10])), - ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10], ([6, 7, 8], [10, 10, 10])); - -SELECT - timeslot, - sumMap(statusMap.status, statusMap.requests), - sumMap(statusMapTuple) -FROM sum_map -GROUP BY timeslot -``` - -``` text -┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┬─sumMap(statusMapTuple)─────────┐ -│ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10]) │ ([1,2,3,4,5],[10,10,20,10,10]) │ -│ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10]) │ ([4,5,6,7,8],[10,10,20,10,10]) │ -└─────────────────────┴──────────────────────────────────────────────┴────────────────────────────────┘ -``` - -## skewPop {#skewpop} - -Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. - -``` sql -skewPop(expr) -``` - -**Parameters** - -`expr` — [Expression](../syntax.md#syntax-expressions) returning a number. - -**Returned value** - -The skewness of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md) - -**Example** - -``` sql -SELECT skewPop(value) FROM series_with_value_column -``` - -## skewSamp {#skewsamp} - -Computes the [sample skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. - -It represents an unbiased estimate of the skewness of a random variable if passed values form its sample. - -``` sql -skewSamp(expr) -``` - -**Parameters** - -`expr` — [Expression](../syntax.md#syntax-expressions) returning a number. - -**Returned value** - -The skewness of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md). If `n <= 1` (`n` is the size of the sample), then the function returns `nan`. - -**Example** - -``` sql -SELECT skewSamp(value) FROM series_with_value_column -``` - -## kurtPop {#kurtpop} - -Computes the [kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence. - -``` sql -kurtPop(expr) -``` - -**Parameters** - -`expr` — [Expression](../syntax.md#syntax-expressions) returning a number. - -**Returned value** - -The kurtosis of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md) - -**Example** - -``` sql -SELECT kurtPop(value) FROM series_with_value_column -``` - -## kurtSamp {#kurtsamp} - -Computes the [sample kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence. - -It represents an unbiased estimate of the kurtosis of a random variable if passed values form its sample. - -``` sql -kurtSamp(expr) -``` - -**Parameters** - -`expr` — [Expression](../syntax.md#syntax-expressions) returning a number. - -**Returned value** - -The kurtosis of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md). If `n <= 1` (`n` is a size of the sample), then the function returns `nan`. - -**Example** - -``` sql -SELECT kurtSamp(value) FROM series_with_value_column -``` - -## timeSeriesGroupSum(uid, timestamp, value) {#agg-function-timeseriesgroupsum} - -`timeSeriesGroupSum` can aggregate different time series that sample timestamp not alignment. -It will use linear interpolation between two sample timestamp and then sum time-series together. - -- `uid` is the time series unique id, `UInt64`. -- `timestamp` is Int64 type in order to support millisecond or microsecond. -- `value` is the metric. - -The function returns array of tuples with `(timestamp, aggregated_value)` pairs. - -Before using this function make sure `timestamp` is in ascending order. - -Example: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -And the result will be: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` - -## timeSeriesGroupRateSum(uid, ts, val) {#agg-function-timeseriesgroupratesum} - -Similarly to `timeSeriesGroupSum`, `timeSeriesGroupRateSum` calculates the rate of time-series and then sum rates together. -Also, timestamp should be in ascend order before use this function. - -Applying this function to the data from the `timeSeriesGroupSum` example, you get the following result: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` - -## avg(x) {#agg_function-avg} - -Calculates the average. -Only works for numbers. -The result is always Float64. - -## avgWeighted {#avgweighted} - -Calculates the [weighted arithmetic mean](https://en.wikipedia.org/wiki/Weighted_arithmetic_mean). - -**Syntax** - -``` sql -avgWeighted(x, weight) -``` - -**Parameters** - -- `x` — Values. [Integer](../data-types/int-uint.md) or [floating-point](../data-types/float.md). -- `weight` — Weights of the values. [Integer](../data-types/int-uint.md) or [floating-point](../data-types/float.md). - -Type of `x` and `weight` must be the same. - -**Returned value** - -- Weighted mean. -- `NaN`. If all the weights are equal to 0. - -Type: [Float64](../data-types/float.md). - -**Example** - -Query: - -``` sql -SELECT avgWeighted(x, w) -FROM values('x Int8, w Int8', (4, 1), (1, 0), (10, 2)) -``` - -Result: - -``` text -┌─avgWeighted(x, weight)─┐ -│ 8 │ -└────────────────────────┘ -``` - -## uniq {#agg_function-uniq} - -Calculates the approximate number of different values of the argument. - -``` sql -uniq(x[, ...]) -``` - -**Parameters** - -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. - -**Returned value** - -- A [UInt64](../../sql-reference/data-types/int-uint.md)-type number. - -**Implementation details** - -Function: - -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. - -- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. - - This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. - -- Provides the result deterministically (it doesn’t depend on the query processing order). - -We recommend using this function in almost all scenarios. - -**See Also** - -- [uniqCombined](#agg_function-uniqcombined) -- [uniqCombined64](#agg_function-uniqcombined64) -- [uniqHLL12](#agg_function-uniqhll12) -- [uniqExact](#agg_function-uniqexact) - -## uniqCombined {#agg_function-uniqcombined} - -Calculates the approximate number of different argument values. - -``` sql -uniqCombined(HLL_precision)(x[, ...]) -``` - -The `uniqCombined` function is a good choice for calculating the number of different values. - -**Parameters** - -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. - -`HLL_precision` is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each). - -**Returned value** - -- A number [UInt64](../../sql-reference/data-types/int-uint.md)-type number. - -**Implementation details** - -Function: - -- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. - -- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. - - For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. - -- Provides the result deterministically (it doesn’t depend on the query processing order). - -!!! note "Note" - Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](#agg_function-uniqcombined64) - -Compared to the [uniq](#agg_function-uniq) function, the `uniqCombined`: - -- Consumes several times less memory. -- Calculates with several times higher accuracy. -- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network. - -**See Also** - -- [uniq](#agg_function-uniq) -- [uniqCombined64](#agg_function-uniqcombined64) -- [uniqHLL12](#agg_function-uniqhll12) -- [uniqExact](#agg_function-uniqexact) - -## uniqCombined64 {#agg_function-uniqcombined64} - -Same as [uniqCombined](#agg_function-uniqcombined), but uses 64-bit hash for all data types. - -## uniqHLL12 {#agg_function-uniqhll12} - -Calculates the approximate number of different argument values, using the [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) algorithm. - -``` sql -uniqHLL12(x[, ...]) -``` - -**Parameters** - -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. - -**Returned value** - -- A [UInt64](../../sql-reference/data-types/int-uint.md)-type number. - -**Implementation details** - -Function: - -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. - -- Uses the HyperLogLog algorithm to approximate the number of different argument values. - - 212 5-bit cells are used. The size of the state is slightly more than 2.5 KB. The result is not very accurate (up to ~10% error) for small data sets (<10K elements). However, the result is fairly accurate for high-cardinality data sets (10K-100M), with a maximum error of ~1.6%. Starting from 100M, the estimation error increases, and the function will return very inaccurate results for data sets with extremely high cardinality (1B+ elements). - -- Provides the determinate result (it doesn’t depend on the query processing order). - -We don’t recommend using this function. In most cases, use the [uniq](#agg_function-uniq) or [uniqCombined](#agg_function-uniqcombined) function. - -**See Also** - -- [uniq](#agg_function-uniq) -- [uniqCombined](#agg_function-uniqcombined) -- [uniqExact](#agg_function-uniqexact) - -## uniqExact {#agg_function-uniqexact} - -Calculates the exact number of different argument values. - -``` sql -uniqExact(x[, ...]) -``` - -Use the `uniqExact` function if you absolutely need an exact result. Otherwise use the [uniq](#agg_function-uniq) function. - -The `uniqExact` function uses more memory than `uniq`, because the size of the state has unbounded growth as the number of different values increases. - -**Parameters** - -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. - -**See Also** - -- [uniq](#agg_function-uniq) -- [uniqCombined](#agg_function-uniqcombined) -- [uniqHLL12](#agg_function-uniqhll12) - -## groupArray(x), groupArray(max\_size)(x) {#agg_function-grouparray} - -Creates an array of argument values. -Values can be added to the array in any (indeterminate) order. - -The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. -For example, `groupArray (1) (x)` is equivalent to `[any (x)]`. - -In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`. - -## groupArrayInsertAt {#grouparrayinsertat} - -Inserts a value into the array at the specified position. - -**Syntax** - -```sql -groupArrayInsertAt(default_x, size)(x, pos); -``` - -If in one query several values are inserted into the same position, the function behaves in the following ways: - -- If a query is executed in a single thread, the first one of the inserted values is used. -- If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values. - -**Parameters** - -- `x` — Value to be inserted. [Expression](../syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md). -- `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). -- `default_x`— Default value for substituting in empty positions. Optional parameter. [Expression](../syntax.md#syntax-expressions) resulting in the data type configured for the `x` parameter. If `default_x` is not defined, the [default values](../../sql-reference/statements/create.md#create-default-values) are used. -- `size`— Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` must be specified. [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). - -**Returned value** - -- Array with inserted values. - -Type: [Array](../../sql-reference/data-types/array.md#data-type-array). - -**Example** - -Query: - -```sql -SELECT groupArrayInsertAt(toString(number), number * 2) FROM numbers(5); -``` - -Result: - -```text -┌─groupArrayInsertAt(toString(number), multiply(number, 2))─┐ -│ ['0','','1','','2','','3','','4'] │ -└───────────────────────────────────────────────────────────┘ -``` - -Query: - -```sql -SELECT groupArrayInsertAt('-')(toString(number), number * 2) FROM numbers(5); -``` - -Result: - -```text -┌─groupArrayInsertAt('-')(toString(number), multiply(number, 2))─┐ -│ ['0','-','1','-','2','-','3','-','4'] │ -└────────────────────────────────────────────────────────────────┘ -``` - -Query: - -```sql -SELECT groupArrayInsertAt('-', 5)(toString(number), number * 2) FROM numbers(5); -``` - -Result: - -```text -┌─groupArrayInsertAt('-', 5)(toString(number), multiply(number, 2))─┐ -│ ['0','-','1','-','2'] │ -└───────────────────────────────────────────────────────────────────┘ -``` - -Multi-threaded insertion of elements into one position. - -Query: - -```sql -SELECT groupArrayInsertAt(number, 0) FROM numbers_mt(10) SETTINGS max_block_size = 1; -``` - -As a result of this query you get random integer in the `[0,9]` range. For example: - -```text -┌─groupArrayInsertAt(number, 0)─┐ -│ [7] │ -└───────────────────────────────┘ -``` - -## groupArrayMovingSum {#agg_function-grouparraymovingsum} - -Calculates the moving sum of input values. - -``` sql -groupArrayMovingSum(numbers_for_summing) -groupArrayMovingSum(window_size)(numbers_for_summing) -``` - -The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. - -**Parameters** - -- `numbers_for_summing` — [Expression](../syntax.md#syntax-expressions) resulting in a numeric data type value. -- `window_size` — Size of the calculation window. - -**Returned values** - -- Array of the same size and type as the input data. - -**Example** - -The sample table: - -``` sql -CREATE TABLE t -( - `int` UInt8, - `float` Float32, - `dec` Decimal32(2) -) -ENGINE = TinyLog -``` - -``` text -┌─int─┬─float─┬──dec─┐ -│ 1 │ 1.1 │ 1.10 │ -│ 2 │ 2.2 │ 2.20 │ -│ 4 │ 4.4 │ 4.40 │ -│ 7 │ 7.77 │ 7.77 │ -└─────┴───────┴──────┘ -``` - -The queries: - -``` sql -SELECT - groupArrayMovingSum(int) AS I, - groupArrayMovingSum(float) AS F, - groupArrayMovingSum(dec) AS D -FROM t -``` - -``` text -┌─I──────────┬─F───────────────────────────────┬─D──────────────────────┐ -│ [1,3,7,14] │ [1.1,3.3000002,7.7000003,15.47] │ [1.10,3.30,7.70,15.47] │ -└────────────┴─────────────────────────────────┴────────────────────────┘ -``` - -``` sql -SELECT - groupArrayMovingSum(2)(int) AS I, - groupArrayMovingSum(2)(float) AS F, - groupArrayMovingSum(2)(dec) AS D -FROM t -``` - -``` text -┌─I──────────┬─F───────────────────────────────┬─D──────────────────────┐ -│ [1,3,6,11] │ [1.1,3.3000002,6.6000004,12.17] │ [1.10,3.30,6.60,12.17] │ -└────────────┴─────────────────────────────────┴────────────────────────┘ -``` - -## groupArrayMovingAvg {#agg_function-grouparraymovingavg} - -Calculates the moving average of input values. - -``` sql -groupArrayMovingAvg(numbers_for_summing) -groupArrayMovingAvg(window_size)(numbers_for_summing) -``` - -The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. - -**Parameters** - -- `numbers_for_summing` — [Expression](../syntax.md#syntax-expressions) resulting in a numeric data type value. -- `window_size` — Size of the calculation window. - -**Returned values** - -- Array of the same size and type as the input data. - -The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). It truncates the decimal places insignificant for the resulting data type. - -**Example** - -The sample table `b`: - -``` sql -CREATE TABLE t -( - `int` UInt8, - `float` Float32, - `dec` Decimal32(2) -) -ENGINE = TinyLog -``` - -``` text -┌─int─┬─float─┬──dec─┐ -│ 1 │ 1.1 │ 1.10 │ -│ 2 │ 2.2 │ 2.20 │ -│ 4 │ 4.4 │ 4.40 │ -│ 7 │ 7.77 │ 7.77 │ -└─────┴───────┴──────┘ -``` - -The queries: - -``` sql -SELECT - groupArrayMovingAvg(int) AS I, - groupArrayMovingAvg(float) AS F, - groupArrayMovingAvg(dec) AS D -FROM t -``` - -``` text -┌─I─────────┬─F───────────────────────────────────┬─D─────────────────────┐ -│ [0,0,1,3] │ [0.275,0.82500005,1.9250001,3.8675] │ [0.27,0.82,1.92,3.86] │ -└───────────┴─────────────────────────────────────┴───────────────────────┘ -``` - -``` sql -SELECT - groupArrayMovingAvg(2)(int) AS I, - groupArrayMovingAvg(2)(float) AS F, - groupArrayMovingAvg(2)(dec) AS D -FROM t -``` - -``` text -┌─I─────────┬─F────────────────────────────────┬─D─────────────────────┐ -│ [0,1,3,5] │ [0.55,1.6500001,3.3000002,6.085] │ [0.55,1.65,3.30,6.08] │ -└───────────┴──────────────────────────────────┴───────────────────────┘ -``` - -## groupUniqArray(x), groupUniqArray(max\_size)(x) {#groupuniqarrayx-groupuniqarraymax-sizex} - -Creates an array from different argument values. Memory consumption is the same as for the `uniqExact` function. - -The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. -For example, `groupUniqArray(1)(x)` is equivalent to `[any(x)]`. - -## quantile {#quantile} - -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. - -This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and a random number generator for sampling. The result is non-deterministic. To get an exact quantile, use the [quantileExact](#quantileexact) function. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantile(level)(expr) -``` - -Alias: `median`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../sql-reference/data-types/index.md#data_types), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). - -**Returned value** - -- Approximate quantile of the specified level. - -Type: - -- [Float64](../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Input table: - -``` text -┌─val─┐ -│ 1 │ -│ 1 │ -│ 2 │ -│ 3 │ -└─────┘ -``` - -Query: - -``` sql -SELECT quantile(val) FROM t -``` - -Result: - -``` text -┌─quantile(val)─┐ -│ 1.5 │ -└───────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileDeterministic {#quantiledeterministic} - -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. - -This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and deterministic algorithm of sampling. The result is deterministic. To get an exact quantile, use the [quantileExact](#quantileexact) function. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileDeterministic(level)(expr, determinator) -``` - -Alias: `medianDeterministic`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../sql-reference/data-types/index.md#data_types), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). -- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly. - -**Returned value** - -- Approximate quantile of the specified level. - -Type: - -- [Float64](../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Input table: - -``` text -┌─val─┐ -│ 1 │ -│ 1 │ -│ 2 │ -│ 3 │ -└─────┘ -``` - -Query: - -``` sql -SELECT quantileDeterministic(val, 1) FROM t -``` - -Result: - -``` text -┌─quantileDeterministic(val, 1)─┐ -│ 1.5 │ -└───────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileExact {#quantileexact} - -Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. - -To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileExact(level)(expr) -``` - -Alias: `medianExact`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../sql-reference/data-types/index.md#data_types), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). - -**Returned value** - -- Quantile of the specified level. - -Type: - -- [Float64](../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Query: - -``` sql -SELECT quantileExact(number) FROM numbers(10) -``` - -Result: - -``` text -┌─quantileExact(number)─┐ -│ 5 │ -└───────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileExactWeighted {#quantileexactweighted} - -Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence, taking into account the weight of each element. - -To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Each value is counted with its weight, as if it is present `weight` times. A hash table is used in the algorithm. Because of this, if the passed values ​​are frequently repeated, the function consumes less RAM than [quantileExact](#quantileexact). You can use this function instead of `quantileExact` and specify the weight 1. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileExactWeighted(level)(expr, weight) -``` - -Alias: `medianExactWeighted`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../sql-reference/data-types/index.md#data_types), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. - -**Returned value** - -- Quantile of the specified level. - -Type: - -- [Float64](../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Input table: - -``` text -┌─n─┬─val─┐ -│ 0 │ 3 │ -│ 1 │ 2 │ -│ 2 │ 1 │ -│ 5 │ 4 │ -└───┴─────┘ -``` - -Query: - -``` sql -SELECT quantileExactWeighted(n, val) FROM t -``` - -Result: - -``` text -┌─quantileExactWeighted(n, val)─┐ -│ 1 │ -└───────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileTiming {#quantiletiming} - -With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. - -The result is deterministic (it doesn’t depend on the query processing order). The function is optimized for working with sequences which describe distributions like loading web pages times or backend response times. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileTiming(level)(expr) -``` - -Alias: `medianTiming`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - -- `expr` — [Expression](../syntax.md#syntax-expressions) over a column values returning a [Float\*](../../sql-reference/data-types/float.md)-type number. - - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. - -**Accuracy** - -The calculation is accurate if: - -- Total number of values doesn’t exceed 5670. -- Total number of values exceeds 5670, but the page loading time is less than 1024ms. - -Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. - -!!! note "Note" - For calculating page loading time quantiles, this function is more effective and accurate than [quantile](#quantile). - -**Returned value** - -- Quantile of the specified level. - -Type: `Float32`. - -!!! note "Note" - If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../sql-reference/data-types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../statements/select/order-by.md#select-order-by) for notes on sorting `NaN` values. - -**Example** - -Input table: - -``` text -┌─response_time─┐ -│ 72 │ -│ 112 │ -│ 126 │ -│ 145 │ -│ 104 │ -│ 242 │ -│ 313 │ -│ 168 │ -│ 108 │ -└───────────────┘ -``` - -Query: - -``` sql -SELECT quantileTiming(response_time) FROM t -``` - -Result: - -``` text -┌─quantileTiming(response_time)─┐ -│ 126 │ -└───────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileTimingWeighted {#quantiletimingweighted} - -With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence according to the weight of each sequence member. - -The result is deterministic (it doesn’t depend on the query processing order). The function is optimized for working with sequences which describe distributions like loading web pages times or backend response times. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileTimingWeighted(level)(expr, weight) -``` - -Alias: `medianTimingWeighted`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - -- `expr` — [Expression](../syntax.md#syntax-expressions) over a column values returning a [Float\*](../../sql-reference/data-types/float.md)-type number. - - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. - -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. - -**Accuracy** - -The calculation is accurate if: - -- Total number of values doesn’t exceed 5670. -- Total number of values exceeds 5670, but the page loading time is less than 1024ms. - -Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. - -!!! note "Note" - For calculating page loading time quantiles, this function is more effective and accurate than [quantile](#quantile). - -**Returned value** - -- Quantile of the specified level. - -Type: `Float32`. - -!!! note "Note" - If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../sql-reference/data-types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../statements/select/order-by.md#select-order-by) for notes on sorting `NaN` values. - -**Example** - -Input table: - -``` text -┌─response_time─┬─weight─┐ -│ 68 │ 1 │ -│ 104 │ 2 │ -│ 112 │ 3 │ -│ 126 │ 2 │ -│ 138 │ 1 │ -│ 162 │ 1 │ -└───────────────┴────────┘ -``` - -Query: - -``` sql -SELECT quantileTimingWeighted(response_time, weight) FROM t -``` - -Result: - -``` text -┌─quantileTimingWeighted(response_time, weight)─┐ -│ 112 │ -└───────────────────────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileTDigest {#quantiletdigest} - -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. - -The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic. - -The performance of the function is lower than performance of [quantile](#quantile) or [quantileTiming](#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileTDigest(level)(expr) -``` - -Alias: `medianTDigest`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../sql-reference/data-types/index.md#data_types), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). - -**Returned value** - -- Approximate quantile of the specified level. - -Type: - -- [Float64](../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Query: - -``` sql -SELECT quantileTDigest(number) FROM numbers(10) -``` - -Result: - -``` text -┌─quantileTDigest(number)─┐ -│ 4.5 │ -└─────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileTDigestWeighted {#quantiletdigestweighted} - -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence member. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. - -The performance of the function is lower than performance of [quantile](#quantile) or [quantileTiming](#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`. - -The result depends on the order of running the query, and is nondeterministic. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileTDigest(level)(expr) -``` - -Alias: `medianTDigest`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../sql-reference/data-types/index.md#data_types), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. - -**Returned value** - -- Approximate quantile of the specified level. - -Type: - -- [Float64](../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Query: - -``` sql -SELECT quantileTDigestWeighted(number, 1) FROM numbers(10) -``` - -Result: - -``` text -┌─quantileTDigestWeighted(number, 1)─┐ -│ 4.5 │ -└────────────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## median {#median} - -The `median*` functions are the aliases for the corresponding `quantile*` functions. They calculate median of a numeric data sample. - -Functions: - -- `median` — Alias for [quantile](#quantile). -- `medianDeterministic` — Alias for [quantileDeterministic](#quantiledeterministic). -- `medianExact` — Alias for [quantileExact](#quantileexact). -- `medianExactWeighted` — Alias for [quantileExactWeighted](#quantileexactweighted). -- `medianTiming` — Alias for [quantileTiming](#quantiletiming). -- `medianTimingWeighted` — Alias for [quantileTimingWeighted](#quantiletimingweighted). -- `medianTDigest` — Alias for [quantileTDigest](#quantiletdigest). -- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](#quantiletdigestweighted). - -**Example** - -Input table: - -``` text -┌─val─┐ -│ 1 │ -│ 1 │ -│ 2 │ -│ 3 │ -└─────┘ -``` - -Query: - -``` sql -SELECT medianDeterministic(val, 1) FROM t -``` - -Result: - -``` text -┌─medianDeterministic(val, 1)─┐ -│ 1.5 │ -└─────────────────────────────┘ -``` - -## quantiles(level1, level2, …)(x) {#quantiles} - -All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. - -## varSamp(x) {#varsampx} - -Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`. - -It represents an unbiased estimate of the variance of a random variable if passed values form its sample. - -Returns `Float64`. When `n <= 1`, returns `+∞`. - -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. - -## varPop(x) {#varpopx} - -Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. - -In other words, dispersion for a set of values. Returns `Float64`. - -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. - -## stddevSamp(x) {#stddevsampx} - -The result is equal to the square root of `varSamp(x)`. - -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error. - -## stddevPop(x) {#stddevpopx} - -The result is equal to the square root of `varPop(x)`. - -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error. - -## topK(N)(x) {#topknx} - -Returns an array of the approximately most frequent values in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves). - -Implements the [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) algorithm for analyzing TopK, based on the reduce-and-combine algorithm from [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf). - -``` sql -topK(N)(column) -``` - -This function doesn’t provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that aren’t the most frequent values. - -We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`. - -**Parameters** - -- ‘N’ is the number of elements to return. - -If the parameter is omitted, default value 10 is used. - -**Arguments** - -- ’ x ’ – The value to calculate frequency. - -**Example** - -Take the [OnTime](../../getting-started/example-datasets/ontime.md) data set and select the three most frequently occurring values in the `AirlineID` column. - -``` sql -SELECT topK(3)(AirlineID) AS res -FROM ontime -``` - -``` text -┌─res─────────────────┐ -│ [19393,19790,19805] │ -└─────────────────────┘ -``` - -## topKWeighted {#topkweighted} - -Similar to `topK` but takes one additional argument of integer type - `weight`. Every value is accounted `weight` times for frequency calculation. - -**Syntax** - -``` sql -topKWeighted(N)(x, weight) -``` - -**Parameters** - -- `N` — The number of elements to return. - -**Arguments** - -- `x` – The value. -- `weight` — The weight. [UInt8](../../sql-reference/data-types/int-uint.md). - -**Returned value** - -Returns an array of the values with maximum approximate sum of weights. - -**Example** - -Query: - -``` sql -SELECT topKWeighted(10)(number, number) FROM numbers(1000) -``` - -Result: - -``` text -┌─topKWeighted(10)(number, number)──────────┐ -│ [999,998,997,996,995,994,993,992,991,990] │ -└───────────────────────────────────────────┘ -``` - -## covarSamp(x, y) {#covarsampx-y} - -Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. - -Returns Float64. When `n <= 1`, returns +∞. - -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error. - -## covarPop(x, y) {#covarpopx-y} - -Calculates the value of `Σ((x - x̅)(y - y̅)) / n`. - -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error. - -## corr(x, y) {#corrx-y} - -Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. - -!!! note "Note" - This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error. - -## categoricalInformationValue {#categoricalinformationvalue} - -Calculates the value of `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` for each category. - -``` sql -categoricalInformationValue(category1, category2, ..., tag) -``` - -The result indicates how a discrete (categorical) feature `[category1, category2, ...]` contribute to a learning model which predicting the value of `tag`. - -## simpleLinearRegression {#simplelinearregression} - -Performs simple (unidimensional) linear regression. - -``` sql -simpleLinearRegression(x, y) -``` - -Parameters: - -- `x` — Column with dependent variable values. -- `y` — Column with explanatory variable values. - -Returned values: - -Constants `(a, b)` of the resulting line `y = a*x + b`. - -**Examples** - -``` sql -SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [0, 1, 2, 3]) -``` - -``` text -┌─arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [0, 1, 2, 3])─┐ -│ (1,0) │ -└───────────────────────────────────────────────────────────────────┘ -``` - -``` sql -SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) -``` - -``` text -┌─arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6])─┐ -│ (1,3) │ -└───────────────────────────────────────────────────────────────────┘ -``` - -## stochasticLinearRegression {#agg_functions-stochasticlinearregression} - -This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). - -### Parameters {#agg_functions-stochasticlinearregression-parameters} - -There are 4 customizable parameters. They are passed to the function sequentially, but there is no need to pass all four - default values will be used, however good model required some parameter tuning. - -``` text -stochasticLinearRegression(1.0, 1.0, 10, 'SGD') -``` - -1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`. -2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`. -3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`. -4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods. - -### Usage {#agg_functions-stochasticlinearregression-usage} - -`stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage we use `-State` combinator, which basically saves the state (model weights, etc). -To predict we use function [evalMLMethod](../functions/machine-learning-functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on. - - - -**1.** Fitting - -Such query may be used. - -``` sql -CREATE TABLE IF NOT EXISTS train_data -( - param1 Float64, - param2 Float64, - target Float64 -) ENGINE = Memory; - -CREATE TABLE your_model ENGINE = Memory AS SELECT -stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2) -AS state FROM train_data; -``` - -Here we also need to insert data into `train_data` table. The number of parameters is not fixed, it depends only on number of arguments, passed into `linearRegressionState`. They all must be numeric values. -Note that the column with target value(which we would like to learn to predict) is inserted as the first argument. - -**2.** Predicting - -After saving a state into the table, we may use it multiple times for prediction, or even merge with other states and create new even better models. - -``` sql -WITH (SELECT state FROM your_model) AS model SELECT -evalMLMethod(model, param1, param2) FROM test_data -``` - -The query will return a column of predicted values. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features. - -`test_data` is a table like `train_data` but may not contain target value. - -### Notes {#agg_functions-stochasticlinearregression-notes} - -1. To merge two models user may create such query: - `sql SELECT state1 + state2 FROM your_models` - where `your_models` table contains both models. This query will return new `AggregateFunctionState` object. - -2. User may fetch weights of the created model for its own purposes without saving the model if no `-State` combinator is used. - `sql SELECT stochasticLinearRegression(0.01)(target, param1, param2) FROM train_data` - Such query will fit the model and return its weights - first are weights, which correspond to the parameters of the model, the last one is bias. So in the example above the query will return a column with 3 values. - -**See Also** - -- [stochasticLogisticRegression](#agg_functions-stochasticlogisticregression) -- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) - -## stochasticLogisticRegression {#agg_functions-stochasticlogisticregression} - -This function implements stochastic logistic regression. It can be used for binary classification problem, supports the same custom parameters as stochasticLinearRegression and works the same way. - -### Parameters {#agg_functions-stochasticlogisticregression-parameters} - -Parameters are exactly the same as in stochasticLinearRegression: -`learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`. -For more information see [parameters](#agg_functions-stochasticlinearregression-parameters). - -``` text -stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') -``` - -1. Fitting - - - - See the `Fitting` section in the [stochasticLinearRegression](#stochasticlinearregression-usage-fitting) description. - - Predicted labels have to be in \[-1, 1\]. - -1. Predicting - - - - Using saved state we can predict probability of object having label `1`. - - ``` sql - WITH (SELECT state FROM your_model) AS model SELECT - evalMLMethod(model, param1, param2) FROM test_data - ``` - - The query will return a column of probabilities. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features. - - We can also set a bound of probability, which assigns elements to different labels. - - ``` sql - SELECT ans < 1.1 AND ans > 0.5 FROM - (WITH (SELECT state FROM your_model) AS model SELECT - evalMLMethod(model, param1, param2) AS ans FROM test_data) - ``` - - Then the result will be labels. - - `test_data` is a table like `train_data` but may not contain target value. - -**See Also** - -- [stochasticLinearRegression](#agg_functions-stochasticlinearregression) -- [Difference between linear and logistic regressions.](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) - -## groupBitmapAnd {#groupbitmapand} - -Calculations the AND of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../../sql-reference/functions/bitmap-functions.md). - -``` sql -groupBitmapAnd(expr) -``` - -**Parameters** - -`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. - -**Return value** - -Value of the `UInt64` type. - -**Example** - -``` sql -DROP TABLE IF EXISTS bitmap_column_expr_test2; -CREATE TABLE bitmap_column_expr_test2 -( - tag_id String, - z AggregateFunction(groupBitmap, UInt32) -) -ENGINE = MergeTree -ORDER BY tag_id; - -INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); - -SELECT groupBitmapAnd(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─groupBitmapAnd(z)─┐ -│ 3 │ -└───────────────────┘ - -SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─arraySort(bitmapToArray(groupBitmapAndState(z)))─┐ -│ [6,8,10] │ -└──────────────────────────────────────────────────┘ -``` - -## groupBitmapOr {#groupbitmapor} - -Calculations the OR of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../../sql-reference/functions/bitmap-functions.md). This is equivalent to `groupBitmapMerge`. - -``` sql -groupBitmapOr(expr) -``` - -**Parameters** - -`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. - -**Return value** - -Value of the `UInt64` type. - -**Example** - -``` sql -DROP TABLE IF EXISTS bitmap_column_expr_test2; -CREATE TABLE bitmap_column_expr_test2 -( - tag_id String, - z AggregateFunction(groupBitmap, UInt32) -) -ENGINE = MergeTree -ORDER BY tag_id; - -INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); - -SELECT groupBitmapOr(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─groupBitmapOr(z)─┐ -│ 15 │ -└──────────────────┘ - -SELECT arraySort(bitmapToArray(groupBitmapOrState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─arraySort(bitmapToArray(groupBitmapOrState(z)))─┐ -│ [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] │ -└─────────────────────────────────────────────────┘ -``` - -## groupBitmapXor {#groupbitmapxor} - -Calculations the XOR of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../../sql-reference/functions/bitmap-functions.md). - -``` sql -groupBitmapOr(expr) -``` - -**Parameters** - -`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. - -**Return value** - -Value of the `UInt64` type. - -**Example** - -``` sql -DROP TABLE IF EXISTS bitmap_column_expr_test2; -CREATE TABLE bitmap_column_expr_test2 -( - tag_id String, - z AggregateFunction(groupBitmap, UInt32) -) -ENGINE = MergeTree -ORDER BY tag_id; - -INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); - -SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─groupBitmapXor(z)─┐ -│ 10 │ -└───────────────────┘ - -SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─arraySort(bitmapToArray(groupBitmapXorState(z)))─┐ -│ [1,3,5,6,8,10,11,13,14,15] │ -└──────────────────────────────────────────────────┘ -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/agg_functions/reference/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md new file mode 100644 index 00000000000..16306597983 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -0,0 +1,13 @@ +--- +toc_priority: 6 +--- + +# any {#agg_function-any} + +Selects the first encountered value. +The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. +To get a determinate result, you can use the ‘min’ or ‘max’ function instead of ‘any’. + +In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY. + +When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. diff --git a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md new file mode 100644 index 00000000000..dbfda63c146 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md @@ -0,0 +1,29 @@ +--- +toc_priority: 103 +--- +# anyHeavy {#anyheavyx} + +Selects a frequently occurring value using the [heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) algorithm. If there is a value that occurs more than in half the cases in each of the query’s execution threads, this value is returned. Normally, the result is nondeterministic. + +``` sql +anyHeavy(column) +``` + +**Arguments** + +- `column` – The column name. + +**Example** + +Take the [OnTime](../../../getting-started/example-datasets/ontime.md) data set and select any frequently occurring value in the `AirlineID` column. + +``` sql +SELECT anyHeavy(AirlineID) AS res +FROM ontime +``` + +``` text +┌───res─┐ +│ 19690 │ +└───────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/anylast.md b/docs/en/sql-reference/aggregate-functions/reference/anylast.md new file mode 100644 index 00000000000..298ec063924 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/anylast.md @@ -0,0 +1,8 @@ +--- +toc_priority: 104 +--- + +## anyLast {#anylastx} + +Selects the last value encountered. +The result is just as indeterminate as for the [any](any.md) function. diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md new file mode 100644 index 00000000000..3093a4f67ef --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -0,0 +1,9 @@ +--- +toc_priority: 106 +--- + +# argMax {#agg-function-argmax} + +Syntax: `argMax(arg, val)` + +Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output. diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md new file mode 100644 index 00000000000..315c7b6c29a --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -0,0 +1,29 @@ +--- +toc_priority: 105 +--- + +# argMin {#agg-function-argmin} + +Syntax: `argMin(arg, val)` + +Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output. + +**Example:** + +``` text +┌─user─────┬─salary─┐ +│ director │ 5000 │ +│ manager │ 3000 │ +│ worker │ 1000 │ +└──────────┴────────┘ +``` + +``` sql +SELECT argMin(user, salary) FROM salary +``` + +``` text +┌─argMin(user, salary)─┐ +│ worker │ +└──────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md new file mode 100644 index 00000000000..4ebae95b79d --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -0,0 +1,7 @@ +--- +toc_priority: 5 +--- + +# avg {#agg_function-avg} + +Calculates the average. Only works for numbers. The result is always Float64. diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md new file mode 100644 index 00000000000..20b7187a744 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -0,0 +1,44 @@ +--- +toc_priority: 107 +--- + +# avgWeighted {#avgweighted} + +Calculates the [weighted arithmetic mean](https://en.wikipedia.org/wiki/Weighted_arithmetic_mean). + +**Syntax** + +``` sql +avgWeighted(x, weight) +``` + +**Parameters** + +- `x` — Values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md). +- `weight` — Weights of the values. [Integer](../../../sql-reference/data-types/int-uint.md) or [floating-point](../../../sql-reference/data-types/float.md). + +Type of `x` and `weight` must be the same. + +**Returned value** + +- Weighted mean. +- `NaN`. If all the weights are equal to 0. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT avgWeighted(x, w) +FROM values('x Int8, w Int8', (4, 1), (1, 0), (10, 2)) +``` + +Result: + +``` text +┌─avgWeighted(x, weight)─┐ +│ 8 │ +└────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md new file mode 100644 index 00000000000..2e9001dec19 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md @@ -0,0 +1,13 @@ +--- +toc_priority: 250 +--- + +# categoricalInformationValue {#categoricalinformationvalue} + +Calculates the value of `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` for each category. + +``` sql +categoricalInformationValue(category1, category2, ..., tag) +``` + +The result indicates how a discrete (categorical) feature `[category1, category2, ...]` contribute to a learning model which predicting the value of `tag`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/corr.md b/docs/en/sql-reference/aggregate-functions/reference/corr.md new file mode 100644 index 00000000000..88f9295a8f2 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/corr.md @@ -0,0 +1,12 @@ +--- +toc_priority: 107 +--- + +# corr {#corrx-y} + +Syntax: `corr(x, y)` + +Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. + +!!! note "Note" + This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error. diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md new file mode 100644 index 00000000000..12b99814661 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -0,0 +1,69 @@ +--- +toc_priority: 1 +--- + +# count {#agg_function-count} + +Counts the number of rows or not-NULL values. + +ClickHouse supports the following syntaxes for `count`: +- `count(expr)` or `COUNT(DISTINCT expr)`. +- `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. + +**Parameters** + +The function can take: + +- Zero parameters. +- One [expression](../../../sql-reference/syntax.md#syntax-expressions). + +**Returned value** + +- If the function is called without parameters it counts the number of rows. +- If the [expression](../../../sql-reference/syntax.md#syntax-expressions) is passed, then the function counts how many times this expression returned not null. If the expression returns a [Nullable](../../../sql-reference/data-types/nullable.md)-type value, then the result of `count` stays not `Nullable`. The function returns 0 if the expression returned `NULL` for all the rows. + +In both cases the type of the returned value is [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Details** + +ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count\_distinct\_implementation](../../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](uniq.md#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](uniqexact.md#agg_function-uniqexact) function. + +The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. + +**Examples** + +Example 1: + +``` sql +SELECT count() FROM t +``` + +``` text +┌─count()─┐ +│ 5 │ +└─────────┘ +``` + +Example 2: + +``` sql +SELECT name, value FROM system.settings WHERE name = 'count_distinct_implementation' +``` + +``` text +┌─name──────────────────────────┬─value─────┐ +│ count_distinct_implementation │ uniqExact │ +└───────────────────────────────┴───────────┘ +``` + +``` sql +SELECT count(DISTINCT num) FROM t +``` + +``` text +┌─uniqExact(num)─┐ +│ 3 │ +└────────────────┘ +``` + +This example shows that `count(DISTINCT num)` is performed by the `uniqExact` function according to the `count_distinct_implementation` setting value. diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarpop.md b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md new file mode 100644 index 00000000000..2a7d805763e --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/covarpop.md @@ -0,0 +1,12 @@ +--- +toc_priority: 36 +--- + +# covarPop {#covarpop} + +Syntax: `covarPop(x, y)` + +Calculates the value of `Σ((x - x̅)(y - y̅)) / n`. + +!!! note "Note" + This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error. diff --git a/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md new file mode 100644 index 00000000000..4bdb1b02d40 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md @@ -0,0 +1,12 @@ +--- +toc_priority: 37 +--- + +# covarSamp {#covarsamp} + +Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. + +Returns Float64. When `n <= 1`, returns +∞. + +!!! note "Note" + This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md new file mode 100644 index 00000000000..86b7b83022b --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md @@ -0,0 +1,14 @@ +--- +toc_priority: 110 +--- + +# groupArray {#agg_function-grouparray} + +Syntax: `groupArray(x)` or `groupArray(max_size)(x)` + +Creates an array of argument values. +Values can be added to the array in any (indeterminate) order. + +The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. For example, `groupArray(1)(x)` is equivalent to `[any (x)]`. + +In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md new file mode 100644 index 00000000000..c35d3bd2004 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -0,0 +1,91 @@ +--- +toc_priority: 112 +--- + +# groupArrayInsertAt {#grouparrayinsertat} + +Inserts a value into the array at the specified position. + +**Syntax** + +``` sql +groupArrayInsertAt(default_x, size)(x, pos); +``` + +If in one query several values are inserted into the same position, the function behaves in the following ways: + +- If a query is executed in a single thread, the first one of the inserted values is used. +- If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values. + +**Parameters** + +- `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md). +- `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). +- `default_x`— Default value for substituting in empty positions. Optional parameter. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in the data type configured for the `x` parameter. If `default_x` is not defined, the [default values](../../../sql-reference/statements/create.md#create-default-values) are used. +- `size`— Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` must be specified. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). + +**Returned value** + +- Array with inserted values. + +Type: [Array](../../../sql-reference/data-types/array.md#data-type-array). + +**Example** + +Query: + +``` sql +SELECT groupArrayInsertAt(toString(number), number * 2) FROM numbers(5); +``` + +Result: + +``` text +┌─groupArrayInsertAt(toString(number), multiply(number, 2))─┐ +│ ['0','','1','','2','','3','','4'] │ +└───────────────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT groupArrayInsertAt('-')(toString(number), number * 2) FROM numbers(5); +``` + +Result: + +``` text +┌─groupArrayInsertAt('-')(toString(number), multiply(number, 2))─┐ +│ ['0','-','1','-','2','-','3','-','4'] │ +└────────────────────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT groupArrayInsertAt('-', 5)(toString(number), number * 2) FROM numbers(5); +``` + +Result: + +``` text +┌─groupArrayInsertAt('-', 5)(toString(number), multiply(number, 2))─┐ +│ ['0','-','1','-','2'] │ +└───────────────────────────────────────────────────────────────────┘ +``` + +Multi-threaded insertion of elements into one position. + +Query: + +``` sql +SELECT groupArrayInsertAt(number, 0) FROM numbers_mt(10) SETTINGS max_block_size = 1; +``` + +As a result of this query you get random integer in the `[0,9]` range. For example: + +``` text +┌─groupArrayInsertAt(number, 0)─┐ +│ [7] │ +└───────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md new file mode 100644 index 00000000000..1cd40c2002f --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -0,0 +1,78 @@ +--- +toc_priority: 114 +--- + +# groupArrayMovingAvg {#agg_function-grouparraymovingavg} + +Calculates the moving average of input values. + +``` sql +groupArrayMovingAvg(numbers_for_summing) +groupArrayMovingAvg(window_size)(numbers_for_summing) +``` + +The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. + +**Parameters** + +- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. +- `window_size` — Size of the calculation window. + +**Returned values** + +- Array of the same size and type as the input data. + +The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). It truncates the decimal places insignificant for the resulting data type. + +**Example** + +The sample table `b`: + +``` sql +CREATE TABLE t +( + `int` UInt8, + `float` Float32, + `dec` Decimal32(2) +) +ENGINE = TinyLog +``` + +``` text +┌─int─┬─float─┬──dec─┐ +│ 1 │ 1.1 │ 1.10 │ +│ 2 │ 2.2 │ 2.20 │ +│ 4 │ 4.4 │ 4.40 │ +│ 7 │ 7.77 │ 7.77 │ +└─────┴───────┴──────┘ +``` + +The queries: + +``` sql +SELECT + groupArrayMovingAvg(int) AS I, + groupArrayMovingAvg(float) AS F, + groupArrayMovingAvg(dec) AS D +FROM t +``` + +``` text +┌─I─────────┬─F───────────────────────────────────┬─D─────────────────────┐ +│ [0,0,1,3] │ [0.275,0.82500005,1.9250001,3.8675] │ [0.27,0.82,1.92,3.86] │ +└───────────┴─────────────────────────────────────┴───────────────────────┘ +``` + +``` sql +SELECT + groupArrayMovingAvg(2)(int) AS I, + groupArrayMovingAvg(2)(float) AS F, + groupArrayMovingAvg(2)(dec) AS D +FROM t +``` + +``` text +┌─I─────────┬─F────────────────────────────────┬─D─────────────────────┐ +│ [0,1,3,5] │ [0.55,1.6500001,3.3000002,6.085] │ [0.55,1.65,3.30,6.08] │ +└───────────┴──────────────────────────────────┴───────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md new file mode 100644 index 00000000000..ef979cd5f6a --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -0,0 +1,76 @@ +--- +toc_priority: 113 +--- + +# groupArrayMovingSum {#agg_function-grouparraymovingsum} + +Calculates the moving sum of input values. + +``` sql +groupArrayMovingSum(numbers_for_summing) +groupArrayMovingSum(window_size)(numbers_for_summing) +``` + +The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. + +**Parameters** + +- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. +- `window_size` — Size of the calculation window. + +**Returned values** + +- Array of the same size and type as the input data. + +**Example** + +The sample table: + +``` sql +CREATE TABLE t +( + `int` UInt8, + `float` Float32, + `dec` Decimal32(2) +) +ENGINE = TinyLog +``` + +``` text +┌─int─┬─float─┬──dec─┐ +│ 1 │ 1.1 │ 1.10 │ +│ 2 │ 2.2 │ 2.20 │ +│ 4 │ 4.4 │ 4.40 │ +│ 7 │ 7.77 │ 7.77 │ +└─────┴───────┴──────┘ +``` + +The queries: + +``` sql +SELECT + groupArrayMovingSum(int) AS I, + groupArrayMovingSum(float) AS F, + groupArrayMovingSum(dec) AS D +FROM t +``` + +``` text +┌─I──────────┬─F───────────────────────────────┬─D──────────────────────┐ +│ [1,3,7,14] │ [1.1,3.3000002,7.7000003,15.47] │ [1.10,3.30,7.70,15.47] │ +└────────────┴─────────────────────────────────┴────────────────────────┘ +``` + +``` sql +SELECT + groupArrayMovingSum(2)(int) AS I, + groupArrayMovingSum(2)(float) AS F, + groupArrayMovingSum(2)(dec) AS D +FROM t +``` + +``` text +┌─I──────────┬─F───────────────────────────────┬─D──────────────────────┐ +│ [1,3,6,11] │ [1.1,3.3000002,6.6000004,12.17] │ [1.10,3.30,6.60,12.17] │ +└────────────┴─────────────────────────────────┴────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md new file mode 100644 index 00000000000..9be73fd54ec --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -0,0 +1,46 @@ +--- +toc_priority: 125 +--- + +# groupBitAnd {#groupbitand} + +Applies bitwise `AND` for series of numbers. + +``` sql +groupBitAnd(expr) +``` + +**Parameters** + +`expr` – An expression that results in `UInt*` type. + +**Return value** + +Value of the `UInt*` type. + +**Example** + +Test data: + +``` text +binary decimal +00101100 = 44 +00011100 = 28 +00001101 = 13 +01010101 = 85 +``` + +Query: + +``` sql +SELECT groupBitAnd(num) FROM t +``` + +Where `num` is the column with the test data. + +Result: + +``` text +binary decimal +00000100 = 4 +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md new file mode 100644 index 00000000000..9367652db38 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md @@ -0,0 +1,44 @@ +--- +toc_priority: 128 +--- + +# groupBitmap {#groupbitmap} + +Bitmap or Aggregate calculations from a unsigned integer column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../../../sql-reference/functions/bitmap-functions.md). + +``` sql +groupBitmap(expr) +``` + +**Parameters** + +`expr` – An expression that results in `UInt*` type. + +**Return value** + +Value of the `UInt64` type. + +**Example** + +Test data: + +``` text +UserID +1 +1 +2 +3 +``` + +Query: + +``` sql +SELECT groupBitmap(UserID) as num FROM t +``` + +Result: + +``` text +num +3 +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md new file mode 100644 index 00000000000..7c0c89040bb --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md @@ -0,0 +1,46 @@ +--- +toc_priority: 129 +--- + +# groupBitmapAnd {#groupbitmapand} + +Calculations the AND of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../../../sql-reference/functions/bitmap-functions.md). + +``` sql +groupBitmapAnd(expr) +``` + +**Parameters** + +`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. + +**Return value** + +Value of the `UInt64` type. + +**Example** + +``` sql +DROP TABLE IF EXISTS bitmap_column_expr_test2; +CREATE TABLE bitmap_column_expr_test2 +( + tag_id String, + z AggregateFunction(groupBitmap, UInt32) +) +ENGINE = MergeTree +ORDER BY tag_id; + +INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); + +SELECT groupBitmapAnd(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─groupBitmapAnd(z)─┐ +│ 3 │ +└───────────────────┘ + +SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─arraySort(bitmapToArray(groupBitmapAndState(z)))─┐ +│ [6,8,10] │ +└──────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md new file mode 100644 index 00000000000..894c6c90aab --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md @@ -0,0 +1,46 @@ +--- +toc_priority: 130 +--- + +# groupBitmapOr {#groupbitmapor} + +Calculations the OR of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../../../sql-reference/functions/bitmap-functions.md). This is equivalent to `groupBitmapMerge`. + +``` sql +groupBitmapOr(expr) +``` + +**Parameters** + +`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. + +**Return value** + +Value of the `UInt64` type. + +**Example** + +``` sql +DROP TABLE IF EXISTS bitmap_column_expr_test2; +CREATE TABLE bitmap_column_expr_test2 +( + tag_id String, + z AggregateFunction(groupBitmap, UInt32) +) +ENGINE = MergeTree +ORDER BY tag_id; + +INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); + +SELECT groupBitmapOr(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─groupBitmapOr(z)─┐ +│ 15 │ +└──────────────────┘ + +SELECT arraySort(bitmapToArray(groupBitmapOrState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─arraySort(bitmapToArray(groupBitmapOrState(z)))─┐ +│ [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] │ +└─────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md new file mode 100644 index 00000000000..5d0ec0fb097 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md @@ -0,0 +1,46 @@ +--- +toc_priority: 131 +--- + +# groupBitmapXor {#groupbitmapxor} + +Calculations the XOR of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../../../sql-reference/functions/bitmap-functions.md). + +``` sql +groupBitmapOr(expr) +``` + +**Parameters** + +`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. + +**Return value** + +Value of the `UInt64` type. + +**Example** + +``` sql +DROP TABLE IF EXISTS bitmap_column_expr_test2; +CREATE TABLE bitmap_column_expr_test2 +( + tag_id String, + z AggregateFunction(groupBitmap, UInt32) +) +ENGINE = MergeTree +ORDER BY tag_id; + +INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); + +SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─groupBitmapXor(z)─┐ +│ 10 │ +└───────────────────┘ + +SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─arraySort(bitmapToArray(groupBitmapXorState(z)))─┐ +│ [1,3,5,6,8,10,11,13,14,15] │ +└──────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md new file mode 100644 index 00000000000..7383e620060 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -0,0 +1,46 @@ +--- +toc_priority: 126 +--- + +# groupBitOr {#groupbitor} + +Applies bitwise `OR` for series of numbers. + +``` sql +groupBitOr(expr) +``` + +**Parameters** + +`expr` – An expression that results in `UInt*` type. + +**Return value** + +Value of the `UInt*` type. + +**Example** + +Test data: + +``` text +binary decimal +00101100 = 44 +00011100 = 28 +00001101 = 13 +01010101 = 85 +``` + +Query: + +``` sql +SELECT groupBitOr(num) FROM t +``` + +Where `num` is the column with the test data. + +Result: + +``` text +binary decimal +01111101 = 125 +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md new file mode 100644 index 00000000000..01026012b91 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -0,0 +1,46 @@ +--- +toc_priority: 127 +--- + +# groupBitXor {#groupbitxor} + +Applies bitwise `XOR` for series of numbers. + +``` sql +groupBitXor(expr) +``` + +**Parameters** + +`expr` – An expression that results in `UInt*` type. + +**Return value** + +Value of the `UInt*` type. + +**Example** + +Test data: + +``` text +binary decimal +00101100 = 44 +00011100 = 28 +00001101 = 13 +01010101 = 85 +``` + +Query: + +``` sql +SELECT groupBitXor(num) FROM t +``` + +Where `num` is the column with the test data. + +Result: + +``` text +binary decimal +01101000 = 104 +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md new file mode 100644 index 00000000000..49f4d477d17 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md @@ -0,0 +1,12 @@ +--- +toc_priority: 111 +--- + +# groupUniqArray + +Syntax: `groupUniqArray(x)` or `groupUniqArray(max_size)(x)` + +Creates an array from different argument values. Memory consumption is the same as for the [uniqExact](uniqexact.md) function. + +The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. +For example, `groupUniqArray(1)(x)` is equivalent to `[any(x)]`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md new file mode 100644 index 00000000000..282642bc32d --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -0,0 +1,75 @@ +--- +toc_priority: 36 +toc_title: Reference +toc_folder_title: Reference +--- + +# Aggregate Function Reference {#aggregate-functions-reference} + +Standard aggregate functions: + +- [count](../../../sql-reference/aggregate-functions/reference/count.md) +- [min](../../../sql-reference/aggregate-functions/reference/min.md) +- [max](../../../sql-reference/aggregate-functions/reference/max.md) +- [sum](../../../sql-reference/aggregate-functions/reference/sum.md) +- [avg](../../../sql-reference/aggregate-functions/reference/avg.md) +- [any](../../../sql-reference/aggregate-functions/reference/any.md) +- [stddevPop](../../../sql-reference/aggregate-functions/reference/stddevpop.md) +- [stddevSamp](../../../sql-reference/aggregate-functions/reference/stddevsamp.md) +- [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md) +- [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) +- [covarPop](../../../sql-reference/aggregate-functions/reference/covarpop.md) +- [covarSamp](../../../sql-reference/aggregate-functions/reference/covarsamp.md) + +ClickHouse-specific aggregate functions: + +- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md) +- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md) +- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md) +- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md) +- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md) +- [topK](../../../sql-reference/aggregate-functions/reference/topkweighted.md) +- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) +- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md) +- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md) +- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) +- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) +- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) +- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) +- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) +- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) +- [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md) +- [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md) +- [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md) +- [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md) +- [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md) +- [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md) +- [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md) +- [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md) +- [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md) +- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md) +- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md) +- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md) +- [timeSeriesGroupSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupsum.md) +- [timeSeriesGroupRateSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md) +- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md) +- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md) +- [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md) +- [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md) +- [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md) +- [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md) +- [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) +- [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) +- [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md) +- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md) +- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) + +- [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md) + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md new file mode 100644 index 00000000000..65e7e31b9b4 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md @@ -0,0 +1,25 @@ +--- +toc_priority: 153 +--- + +# kurtPop {#kurtpop} + +Computes the [kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence. + +``` sql +kurtPop(expr) +``` + +**Parameters** + +`expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. + +**Returned value** + +The kurtosis of the given distribution. Type — [Float64](../../../sql-reference/data-types/float.md) + +**Example** + +``` sql +SELECT kurtPop(value) FROM series_with_value_column +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md new file mode 100644 index 00000000000..224bbbdb9e7 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md @@ -0,0 +1,27 @@ +--- +toc_priority: 154 +--- + +# kurtSamp {#kurtsamp} + +Computes the [sample kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence. + +It represents an unbiased estimate of the kurtosis of a random variable if passed values form its sample. + +``` sql +kurtSamp(expr) +``` + +**Parameters** + +`expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. + +**Returned value** + +The kurtosis of the given distribution. Type — [Float64](../../../sql-reference/data-types/float.md). If `n <= 1` (`n` is a size of the sample), then the function returns `nan`. + +**Example** + +``` sql +SELECT kurtSamp(value) FROM series_with_value_column +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/max.md b/docs/en/sql-reference/aggregate-functions/reference/max.md new file mode 100644 index 00000000000..c462dd590a6 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/max.md @@ -0,0 +1,7 @@ +--- +toc_priority: 3 +--- + +# max {#agg_function-max} + +Calculates the maximum. diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md new file mode 100644 index 00000000000..4dca13ed1b4 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md @@ -0,0 +1,25 @@ +--- +toc_priority: 143 +--- + +# maxMap {#agg_functions-maxmap} + +Syntax: `maxMap(key, value)` or `maxMap(Tuple(key, value))` + +Calculates the maximum from `value` array according to the keys specified in the ‘key’ array. +Passing tuple of keys and values arrays is synonymical to passing two arrays of keys and values. +The number of elements in ‘key’ and ‘value’ must be the same for each row that is totaled. +Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. + +Example: + +``` sql +SELECT maxMap(a, b) +FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1])) +``` + +``` text +┌─maxMap(a, b)──────┐ +│ ([1,2,3],[2,2,1]) │ +└───────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md new file mode 100644 index 00000000000..b4f38a9b562 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -0,0 +1,41 @@ +# median {#median} + +The `median*` functions are the aliases for the corresponding `quantile*` functions. They calculate median of a numeric data sample. + +Functions: + +- `median` — Alias for [quantile](#quantile). +- `medianDeterministic` — Alias for [quantileDeterministic](#quantiledeterministic). +- `medianExact` — Alias for [quantileExact](#quantileexact). +- `medianExactWeighted` — Alias for [quantileExactWeighted](#quantileexactweighted). +- `medianTiming` — Alias for [quantileTiming](#quantiletiming). +- `medianTimingWeighted` — Alias for [quantileTimingWeighted](#quantiletimingweighted). +- `medianTDigest` — Alias for [quantileTDigest](#quantiletdigest). +- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](#quantiletdigestweighted). + +**Example** + +Input table: + +``` text +┌─val─┐ +│ 1 │ +│ 1 │ +│ 2 │ +│ 3 │ +└─────┘ +``` + +Query: + +``` sql +SELECT medianDeterministic(val, 1) FROM t +``` + +Result: + +``` text +┌─medianDeterministic(val, 1)─┐ +│ 1.5 │ +└─────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/min.md b/docs/en/sql-reference/aggregate-functions/reference/min.md new file mode 100644 index 00000000000..56b03468243 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/min.md @@ -0,0 +1,7 @@ +--- +toc_priority: 2 +--- + +## min {#agg_function-min} + +Calculates the minimum. diff --git a/docs/en/sql-reference/aggregate-functions/reference/minmap.md b/docs/en/sql-reference/aggregate-functions/reference/minmap.md new file mode 100644 index 00000000000..1b946dea209 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/minmap.md @@ -0,0 +1,28 @@ +--- +toc_priority: 142 +--- + +# minMap {#agg_functions-minmap} + +Syntax: `minMap(key, value)` or `minMap(Tuple(key, value))` + +Calculates the minimum from `value` array according to the keys specified in the `key` array. + +Passing tuple of keys and values arrays is a synonym to passing two arrays of keys and values. + +The number of elements in `key` and `value` must be the same for each row that is totaled. + +Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. + +Example: + +``` sql +SELECT minMap(a, b) +FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1])) +``` + +``` text +┌─minMap(a, b)──────┐ +│ ([1,2,3],[2,1,1]) │ +└───────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md new file mode 100644 index 00000000000..454925779f7 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -0,0 +1,66 @@ +--- +toc_priority: 200 +--- + +# quantile {#quantile} + +Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and a random number generator for sampling. The result is non-deterministic. To get an exact quantile, use the [quantileExact](quantileexact.md#quantileexact) function. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. + +**Syntax** + +``` sql +quantile(level)(expr) +``` + +Alias: `median`. + +**Parameters** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Returned value** + +- Approximate quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Input table: + +``` text +┌─val─┐ +│ 1 │ +│ 1 │ +│ 2 │ +│ 3 │ +└─────┘ +``` + +Query: + +``` sql +SELECT quantile(val) FROM t +``` + +Result: + +``` text +┌─quantile(val)─┐ +│ 1.5 │ +└───────────────┘ +``` + +**See Also** + +- [median](median.md#median) +- [quantiles](quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md new file mode 100644 index 00000000000..b413332de34 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -0,0 +1,67 @@ +--- +toc_priority: 206 +--- + +# quantileDeterministic {#quantiledeterministic} + +Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and deterministic algorithm of sampling. The result is deterministic. To get an exact quantile, use the [quantileExact](quantileexact.md#quantileexact) function. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. + +**Syntax** + +``` sql +quantileDeterministic(level)(expr, determinator) +``` + +Alias: `medianDeterministic`. + +**Parameters** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly. + +**Returned value** + +- Approximate quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Input table: + +``` text +┌─val─┐ +│ 1 │ +│ 1 │ +│ 2 │ +│ 3 │ +└─────┘ +``` + +Query: + +``` sql +SELECT quantileDeterministic(val, 1) FROM t +``` + +Result: + +``` text +┌─quantileDeterministic(val, 1)─┐ +│ 1.5 │ +└───────────────────────────────┘ +``` + +**See Also** + +- [median](median.md#median) +- [quantiles](quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md new file mode 100644 index 00000000000..e2780df6bbf --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -0,0 +1,55 @@ +--- +toc_priority: 202 +--- + +# quantileExact {#quantileexact} + +Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. + +**Syntax** + +``` sql +quantileExact(level)(expr) +``` + +Alias: `medianExact`. + +**Parameters** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Returned value** + +- Quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +SELECT quantileExact(number) FROM numbers(10) +``` + +Result: + +``` text +┌─quantileExact(number)─┐ +│ 5 │ +└───────────────────────┘ +``` + +**See Also** + +- [median](median.md#median) +- [quantiles](quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md new file mode 100644 index 00000000000..98152c9c73f --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -0,0 +1,68 @@ +--- +toc_priority: 203 +--- + + +# quantileExactWeighted {#quantileexactweighted} + +Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence, taking into account the weight of each element. + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Each value is counted with its weight, as if it is present `weight` times. A hash table is used in the algorithm. Because of this, if the passed values ​​are frequently repeated, the function consumes less RAM than [quantileExact](quantileexact.md#quantileexact). You can use this function instead of `quantileExact` and specify the weight 1. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. + +**Syntax** + +``` sql +quantileExactWeighted(level)(expr, weight) +``` + +Alias: `medianExactWeighted`. + +**Parameters** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. + +**Returned value** + +- Quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Input table: + +``` text +┌─n─┬─val─┐ +│ 0 │ 3 │ +│ 1 │ 2 │ +│ 2 │ 1 │ +│ 5 │ 4 │ +└───┴─────┘ +``` + +Query: + +``` sql +SELECT quantileExactWeighted(n, val) FROM t +``` + +Result: + +``` text +┌─quantileExactWeighted(n, val)─┐ +│ 1 │ +└───────────────────────────────┘ +``` + +**See Also** + +- [median](median.md#median) +- [quantiles](quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md new file mode 100644 index 00000000000..abce6a9e7f0 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -0,0 +1,9 @@ +--- +toc_priority: 201 +--- + +# quantiles {#quantiles} + +Syntax: `quantiles(level1, level2, …)(x)` + +All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md new file mode 100644 index 00000000000..18a4e5be4e0 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -0,0 +1,57 @@ +--- +toc_priority: 207 +--- + +# quantileTDigest {#quantiletdigest} + +Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. + +The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic. + +The performance of the function is lower than performance of [quantile](quantile.md#quantile) or [quantileTiming](quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. + +**Syntax** + +``` sql +quantileTDigest(level)(expr) +``` + +Alias: `medianTDigest`. + +**Parameters** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + +**Returned value** + +- Approximate quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +SELECT quantileTDigest(number) FROM numbers(10) +``` + +Result: + +``` text +┌─quantileTDigest(number)─┐ +│ 4.5 │ +└─────────────────────────┘ +``` + +**See Also** + +- [median](median.md#median) +- [quantiles](quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md new file mode 100644 index 00000000000..712e49dfbcb --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -0,0 +1,58 @@ +--- +toc_priority: 208 +--- + +# quantileTDigestWeighted {#quantiletdigestweighted} + +Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence member. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. + +The performance of the function is lower than performance of [quantile](quantile.md#quantile) or [quantileTiming](quantiletiming.md#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`. + +The result depends on the order of running the query, and is nondeterministic. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. + +**Syntax** + +``` sql +quantileTDigest(level)(expr) +``` + +Alias: `medianTDigest`. + +**Parameters** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. + +**Returned value** + +- Approximate quantile of the specified level. + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +Query: + +``` sql +SELECT quantileTDigestWeighted(number, 1) FROM numbers(10) +``` + +Result: + +``` text +┌─quantileTDigestWeighted(number, 1)─┐ +│ 4.5 │ +└────────────────────────────────────┘ +``` + +**See Also** + +- [median](median.md#median) +- [quantiles](quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md new file mode 100644 index 00000000000..7e887cba40d --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -0,0 +1,86 @@ +--- +toc_priority: 204 +--- + +# quantileTiming {#quantiletiming} + +With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. + +The result is deterministic (it doesn’t depend on the query processing order). The function is optimized for working with sequences which describe distributions like loading web pages times or backend response times. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. + +**Syntax** + +``` sql +quantileTiming(level)(expr) +``` + +Alias: `medianTiming`. + +**Parameters** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). + +- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. + + - If negative values are passed to the function, the behavior is undefined. + - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + +**Accuracy** + +The calculation is accurate if: + +- Total number of values doesn’t exceed 5670. +- Total number of values exceeds 5670, but the page loading time is less than 1024ms. + +Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. + +!!! note "Note" + For calculating page loading time quantiles, this function is more effective and accurate than [quantile](quantile.md#quantile). + +**Returned value** + +- Quantile of the specified level. + +Type: `Float32`. + +!!! note "Note" + If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) for notes on sorting `NaN` values. + +**Example** + +Input table: + +``` text +┌─response_time─┐ +│ 72 │ +│ 112 │ +│ 126 │ +│ 145 │ +│ 104 │ +│ 242 │ +│ 313 │ +│ 168 │ +│ 108 │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT quantileTiming(response_time) FROM t +``` + +Result: + +``` text +┌─quantileTiming(response_time)─┐ +│ 126 │ +└───────────────────────────────┘ +``` + +**See Also** + +- [median](median.md#median) +- [quantiles](quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md new file mode 100644 index 00000000000..60853558e64 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -0,0 +1,85 @@ +--- +toc_priority: 205 +--- + +# quantileTimingWeighted {#quantiletimingweighted} + +With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence according to the weight of each sequence member. + +The result is deterministic (it doesn’t depend on the query processing order). The function is optimized for working with sequences which describe distributions like loading web pages times or backend response times. + +When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](quantiles.md#quantiles) function. + +**Syntax** + +``` sql +quantileTimingWeighted(level)(expr, weight) +``` + +Alias: `medianTimingWeighted`. + +**Parameters** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). + +- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. + + - If negative values are passed to the function, the behavior is undefined. + - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + +- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. + +**Accuracy** + +The calculation is accurate if: + +- Total number of values doesn’t exceed 5670. +- Total number of values exceeds 5670, but the page loading time is less than 1024ms. + +Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. + +!!! note "Note" + For calculating page loading time quantiles, this function is more effective and accurate than [quantile](quantile.md#quantile). + +**Returned value** + +- Quantile of the specified level. + +Type: `Float32`. + +!!! note "Note" + If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../../../sql-reference/statements/select/order-by.md#select-order-by) for notes on sorting `NaN` values. + +**Example** + +Input table: + +``` text +┌─response_time─┬─weight─┐ +│ 68 │ 1 │ +│ 104 │ 2 │ +│ 112 │ 3 │ +│ 126 │ 2 │ +│ 138 │ 1 │ +│ 162 │ 1 │ +└───────────────┴────────┘ +``` + +Query: + +``` sql +SELECT quantileTimingWeighted(response_time, weight) FROM t +``` + +Result: + +``` text +┌─quantileTimingWeighted(response_time, weight)─┐ +│ 112 │ +└───────────────────────────────────────────────┘ +``` + +**See Also** + +- [median](median.md#median) +- [quantiles](quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md new file mode 100644 index 00000000000..fee71cdeb49 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md @@ -0,0 +1,42 @@ +--- +toc_priority: 220 +--- + +# simpleLinearRegression {#simplelinearregression} + +Performs simple (unidimensional) linear regression. + +``` sql +simpleLinearRegression(x, y) +``` + +Parameters: + +- `x` — Column with dependent variable values. +- `y` — Column with explanatory variable values. + +Returned values: + +Constants `(a, b)` of the resulting line `y = a*x + b`. + +**Examples** + +``` sql +SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [0, 1, 2, 3]) +``` + +``` text +┌─arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [0, 1, 2, 3])─┐ +│ (1,0) │ +└───────────────────────────────────────────────────────────────────┘ +``` + +``` sql +SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) +``` + +``` text +┌─arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6])─┐ +│ (1,3) │ +└───────────────────────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md new file mode 100644 index 00000000000..d15a5ffdd47 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md @@ -0,0 +1,25 @@ +--- +toc_priority: 150 +--- + +# skewPop {#skewpop} + +Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. + +``` sql +skewPop(expr) +``` + +**Parameters** + +`expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. + +**Returned value** + +The skewness of the given distribution. Type — [Float64](../../../sql-reference/data-types/float.md) + +**Example** + +``` sql +SELECT skewPop(value) FROM series_with_value_column +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md new file mode 100644 index 00000000000..cb323f4b142 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md @@ -0,0 +1,27 @@ +--- +toc_priority: 151 +--- + +# skewSamp {#skewsamp} + +Computes the [sample skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. + +It represents an unbiased estimate of the skewness of a random variable if passed values form its sample. + +``` sql +skewSamp(expr) +``` + +**Parameters** + +`expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. + +**Returned value** + +The skewness of the given distribution. Type — [Float64](../../../sql-reference/data-types/float.md). If `n <= 1` (`n` is the size of the sample), then the function returns `nan`. + +**Example** + +``` sql +SELECT skewSamp(value) FROM series_with_value_column +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md new file mode 100644 index 00000000000..c64ebe1cbcf --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md @@ -0,0 +1,10 @@ +--- +toc_priority: 30 +--- + +# stddevPop {#stddevpop} + +The result is equal to the square root of [varPop](varpop.md). + +!!! note "Note" + This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error. diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md new file mode 100644 index 00000000000..78aa89a0757 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -0,0 +1,10 @@ +--- +toc_priority: 31 +--- + +# stddevSamp {#stddevsamp} + +The result is equal to the square root of [varSamp](varsamp.md). + +!!! note "Note" + This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error. diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md new file mode 100644 index 00000000000..948614d4e68 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md @@ -0,0 +1,75 @@ +--- +toc_priority: 221 +--- + +# stochasticLinearRegression {#agg_functions-stochasticlinearregression} + +This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). + +### Parameters {#agg_functions-stochasticlinearregression-parameters} + +There are 4 customizable parameters. They are passed to the function sequentially, but there is no need to pass all four - default values will be used, however good model required some parameter tuning. + +``` text +stochasticLinearRegression(1.0, 1.0, 10, 'SGD') +``` + +1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`. +2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`. +3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`. +4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods. + +### Usage {#agg_functions-stochasticlinearregression-usage} + +`stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage we use `-State` combinator, which basically saves the state (model weights, etc). +To predict we use function [evalMLMethod](../../../sql-reference/functions/machine-learning-functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on. + + + +**1.** Fitting + +Such query may be used. + +``` sql +CREATE TABLE IF NOT EXISTS train_data +( + param1 Float64, + param2 Float64, + target Float64 +) ENGINE = Memory; + +CREATE TABLE your_model ENGINE = Memory AS SELECT +stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2) +AS state FROM train_data; +``` + +Here we also need to insert data into `train_data` table. The number of parameters is not fixed, it depends only on number of arguments, passed into `linearRegressionState`. They all must be numeric values. +Note that the column with target value(which we would like to learn to predict) is inserted as the first argument. + +**2.** Predicting + +After saving a state into the table, we may use it multiple times for prediction, or even merge with other states and create new even better models. + +``` sql +WITH (SELECT state FROM your_model) AS model SELECT +evalMLMethod(model, param1, param2) FROM test_data +``` + +The query will return a column of predicted values. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features. + +`test_data` is a table like `train_data` but may not contain target value. + +### Notes {#agg_functions-stochasticlinearregression-notes} + +1. To merge two models user may create such query: + `sql SELECT state1 + state2 FROM your_models` + where `your_models` table contains both models. This query will return new `AggregateFunctionState` object. + +2. User may fetch weights of the created model for its own purposes without saving the model if no `-State` combinator is used. + `sql SELECT stochasticLinearRegression(0.01)(target, param1, param2) FROM train_data` + Such query will fit the model and return its weights - first are weights, which correspond to the parameters of the model, the last one is bias. So in the example above the query will return a column with 3 values. + +**See Also** + +- [stochasticLogisticRegression](stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) +- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md new file mode 100644 index 00000000000..2e86eeed881 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md @@ -0,0 +1,55 @@ +--- +toc_priority: 222 +--- + +# stochasticLogisticRegression {#agg_functions-stochasticlogisticregression} + +This function implements stochastic logistic regression. It can be used for binary classification problem, supports the same custom parameters as stochasticLinearRegression and works the same way. + +### Parameters {#agg_functions-stochasticlogisticregression-parameters} + +Parameters are exactly the same as in stochasticLinearRegression: +`learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`. +For more information see [parameters](#agg_functions-stochasticlinearregression-parameters). + +``` text +stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') +``` + +**1.** Fitting + + + + See the `Fitting` section in the [stochasticLinearRegression](#stochasticlinearregression-usage-fitting) description. + + Predicted labels have to be in \[-1, 1\]. + +**2.** Predicting + + + + Using saved state we can predict probability of object having label `1`. + + ``` sql + WITH (SELECT state FROM your_model) AS model SELECT + evalMLMethod(model, param1, param2) FROM test_data + ``` + + The query will return a column of probabilities. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features. + + We can also set a bound of probability, which assigns elements to different labels. + + ``` sql + SELECT ans < 1.1 AND ans > 0.5 FROM + (WITH (SELECT state FROM your_model) AS model SELECT + evalMLMethod(model, param1, param2) AS ans FROM test_data) + ``` + + Then the result will be labels. + + `test_data` is a table like `train_data` but may not contain target value. + +**See Also** + +- [stochasticLinearRegression](stochasticlinearregression.md#agg_functions-stochasticlinearregression) +- [Difference between linear and logistic regressions.](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sum.md b/docs/en/sql-reference/aggregate-functions/reference/sum.md new file mode 100644 index 00000000000..77d38a2c7b2 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/sum.md @@ -0,0 +1,7 @@ +--- +toc_priority: 4 +--- + +# sum {#agg_function-sum} + +Calculates the sum. Only works for numbers. diff --git a/docs/en/sql-reference/aggregate-functions/reference/summap.md b/docs/en/sql-reference/aggregate-functions/reference/summap.md new file mode 100644 index 00000000000..4ccbc22de35 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/summap.md @@ -0,0 +1,48 @@ +--- +toc_priority: 141 +--- + +# sumMap {#agg_functions-summap} + +Syntax: `sumMap(key, value)` or `sumMap(Tuple(key, value))` + +Totals the `value` array according to the keys specified in the `key` array. + +Passing tuple of keys and values arrays is a synonym to passing two arrays of keys and values. + +The number of elements in `key` and `value` must be the same for each row that is totaled. + +Returns a tuple of two arrays: keys in sorted order, and values ​​summed for the corresponding keys. + +Example: + +``` sql +CREATE TABLE sum_map( + date Date, + timeslot DateTime, + statusMap Nested( + status UInt16, + requests UInt64 + ), + statusMapTuple Tuple(Array(Int32), Array(Int32)) +) ENGINE = Log; +INSERT INTO sum_map VALUES + ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10], ([1, 2, 3], [10, 10, 10])), + ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10], ([3, 4, 5], [10, 10, 10])), + ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10], ([4, 5, 6], [10, 10, 10])), + ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10], ([6, 7, 8], [10, 10, 10])); + +SELECT + timeslot, + sumMap(statusMap.status, statusMap.requests), + sumMap(statusMapTuple) +FROM sum_map +GROUP BY timeslot +``` + +``` text +┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┬─sumMap(statusMapTuple)─────────┐ +│ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10]) │ ([1,2,3,4,5],[10,10,20,10,10]) │ +│ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10]) │ ([4,5,6,7,8],[10,10,20,10,10]) │ +└─────────────────────┴──────────────────────────────────────────────┴────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md new file mode 100644 index 00000000000..fa603b4b155 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md @@ -0,0 +1,9 @@ +--- +toc_priority: 140 +--- + +# sumWithOverflow {#sumwithoverflowx} + +Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, the function returns an error. + +Only works for numbers. diff --git a/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md b/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md new file mode 100644 index 00000000000..4cbe37bc635 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md @@ -0,0 +1,16 @@ +--- +toc_priority: 171 +--- + +# timeSeriesGroupRateSum {#agg-function-timeseriesgroupratesum} + +Syntax: `timeSeriesGroupRateSum(uid, ts, val)` + +Similarly to [timeSeriesGroupSum](timeseriesgroupsum.md), `timeSeriesGroupRateSum` calculates the rate of time-series and then sum rates together. +Also, timestamp should be in ascend order before use this function. + +Applying this function to the data from the `timeSeriesGroupSum` example, you get the following result: + +``` text +[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupsum.md b/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupsum.md new file mode 100644 index 00000000000..aa90c7956df --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupsum.md @@ -0,0 +1,57 @@ +--- +toc_priority: 170 +--- + +# timeSeriesGroupSum {#agg-function-timeseriesgroupsum} + +Syntax: `timeSeriesGroupSum(uid, timestamp, value)` + +`timeSeriesGroupSum` can aggregate different time series that sample timestamp not alignment. +It will use linear interpolation between two sample timestamp and then sum time-series together. + +- `uid` is the time series unique id, `UInt64`. +- `timestamp` is Int64 type in order to support millisecond or microsecond. +- `value` is the metric. + +The function returns array of tuples with `(timestamp, aggregated_value)` pairs. + +Before using this function make sure `timestamp` is in ascending order. + +Example: + +``` text +┌─uid─┬─timestamp─┬─value─┐ +│ 1 │ 2 │ 0.2 │ +│ 1 │ 7 │ 0.7 │ +│ 1 │ 12 │ 1.2 │ +│ 1 │ 17 │ 1.7 │ +│ 1 │ 25 │ 2.5 │ +│ 2 │ 3 │ 0.6 │ +│ 2 │ 8 │ 1.6 │ +│ 2 │ 12 │ 2.4 │ +│ 2 │ 18 │ 3.6 │ +│ 2 │ 24 │ 4.8 │ +└─────┴───────────┴───────┘ +``` + +``` sql +CREATE TABLE time_series( + uid UInt64, + timestamp Int64, + value Float64 +) ENGINE = Memory; +INSERT INTO time_series VALUES + (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), + (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); + +SELECT timeSeriesGroupSum(uid, timestamp, value) +FROM ( + SELECT * FROM time_series order by timestamp ASC +); +``` + +And the result will be: + +``` text +[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md new file mode 100644 index 00000000000..004a67d33af --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -0,0 +1,42 @@ +--- +toc_priority: 108 +--- + +# topK {#topk} + +Returns an array of the approximately most frequent values in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves). + +Implements the [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) algorithm for analyzing TopK, based on the reduce-and-combine algorithm from [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf). + +``` sql +topK(N)(column) +``` + +This function doesn’t provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that aren’t the most frequent values. + +We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`. + +**Parameters** + +- ‘N’ is the number of elements to return. + +If the parameter is omitted, default value 10 is used. + +**Arguments** + +- ’ x ’ – The value to calculate frequency. + +**Example** + +Take the [OnTime](../../../getting-started/example-datasets/ontime.md) data set and select the three most frequently occurring values in the `AirlineID` column. + +``` sql +SELECT topK(3)(AirlineID) AS res +FROM ontime +``` + +``` text +┌─res─────────────────┐ +│ [19393,19790,19805] │ +└─────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md new file mode 100644 index 00000000000..b597317f44e --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -0,0 +1,42 @@ +--- +toc_priority: 109 +--- + +# topKWeighted {#topkweighted} + +Similar to `topK` but takes one additional argument of integer type - `weight`. Every value is accounted `weight` times for frequency calculation. + +**Syntax** + +``` sql +topKWeighted(N)(x, weight) +``` + +**Parameters** + +- `N` — The number of elements to return. + +**Arguments** + +- `x` – The value. +- `weight` — The weight. [UInt8](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +Returns an array of the values with maximum approximate sum of weights. + +**Example** + +Query: + +``` sql +SELECT topKWeighted(10)(number, number) FROM numbers(1000) +``` + +Result: + +``` text +┌─topKWeighted(10)(number, number)──────────┐ +│ [999,998,997,996,995,994,993,992,991,990] │ +└───────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md new file mode 100644 index 00000000000..ed831b9c25b --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -0,0 +1,40 @@ +--- +toc_priority: 190 +--- + +# uniq {#agg_function-uniq} + +Calculates the approximate number of different values of the argument. + +``` sql +uniq(x[, ...]) +``` + +**Parameters** + +The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. + +**Returned value** + +- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. + +**Implementation details** + +Function: + +- Calculates a hash for all parameters in the aggregate, then uses it in calculations. + +- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. + + This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. + +- Provides the result deterministically (it doesn’t depend on the query processing order). + +We recommend using this function in almost all scenarios. + +**See Also** + +- [uniqCombined](uniqcombined.md#agg_function-uniqcombined) +- [uniqCombined64](uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](uniqexact.md#agg_function-uniqexact) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md new file mode 100644 index 00000000000..c7600334da8 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -0,0 +1,51 @@ +--- +toc_priority: 192 +--- + +# uniqCombined {#agg_function-uniqcombined} + +Calculates the approximate number of different argument values. + +``` sql +uniqCombined(HLL_precision)(x[, ...]) +``` + +The `uniqCombined` function is a good choice for calculating the number of different values. + +**Parameters** + +The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. + +`HLL_precision` is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each). + +**Returned value** + +- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. + +**Implementation details** + +Function: + +- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. + +- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. + + For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. + +- Provides the result deterministically (it doesn’t depend on the query processing order). + +!!! note "Note" + Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](uniqcombined64.md#agg_function-uniqcombined64) + +Compared to the [uniq](uniq.md#agg_function-uniq) function, the `uniqCombined`: + +- Consumes several times less memory. +- Calculates with several times higher accuracy. +- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network. + +**See Also** + +- [uniq](uniq.md#agg_function-uniq) +- [uniqCombined64](uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](uniqexact.md#agg_function-uniqexact) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md new file mode 100644 index 00000000000..5bf2e283e6e --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md @@ -0,0 +1,7 @@ +--- +toc_priority: 193 +--- + +# uniqCombined64 {#agg_function-uniqcombined64} + +Same as [uniqCombined](uniqcombined.md#agg_function-uniqcombined), but uses 64-bit hash for all data types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md new file mode 100644 index 00000000000..9ccd065bb62 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -0,0 +1,25 @@ +--- +toc_priority: 191 +--- + +# uniqExact {#agg_function-uniqexact} + +Calculates the exact number of different argument values. + +``` sql +uniqExact(x[, ...]) +``` + +Use the `uniqExact` function if you absolutely need an exact result. Otherwise use the [uniq](uniq.md#agg_function-uniq) function. + +The `uniqExact` function uses more memory than `uniq`, because the size of the state has unbounded growth as the number of different values increases. + +**Parameters** + +The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. + +**See Also** + +- [uniq](uniq.md#agg_function-uniq) +- [uniqCombined](uniq.md#agg_function-uniqcombined) +- [uniqHLL12](uniq.md#agg_function-uniqhll12) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md new file mode 100644 index 00000000000..10263a65071 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -0,0 +1,39 @@ +--- +toc_priority: 194 +--- + +# uniqHLL12 {#agg_function-uniqhll12} + +Calculates the approximate number of different argument values, using the [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) algorithm. + +``` sql +uniqHLL12(x[, ...]) +``` + +**Parameters** + +The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. + +**Returned value** + +- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. + +**Implementation details** + +Function: + +- Calculates a hash for all parameters in the aggregate, then uses it in calculations. + +- Uses the HyperLogLog algorithm to approximate the number of different argument values. + + 212 5-bit cells are used. The size of the state is slightly more than 2.5 KB. The result is not very accurate (up to ~10% error) for small data sets (<10K elements). However, the result is fairly accurate for high-cardinality data sets (10K-100M), with a maximum error of ~1.6%. Starting from 100M, the estimation error increases, and the function will return very inaccurate results for data sets with extremely high cardinality (1B+ elements). + +- Provides the determinate result (it doesn’t depend on the query processing order). + +We don’t recommend using this function. In most cases, use the [uniq](uniq.md#agg_function-uniq) or [uniqCombined](uniqcombined.md#agg_function-uniqcombined) function. + +**See Also** + +- [uniq](uniq.md#agg_function-uniq) +- [uniqCombined](uniqcombined.md#agg_function-uniqcombined) +- [uniqExact](uniqexact.md#agg_function-uniqexact) diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md new file mode 100644 index 00000000000..c08dcfd9bfd --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -0,0 +1,12 @@ +--- +toc_priority: 32 +--- + +# varPop(x) {#varpopx} + +Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. + +In other words, dispersion for a set of values. Returns `Float64`. + +!!! note "Note" + This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md new file mode 100644 index 00000000000..78bc545a5d0 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -0,0 +1,14 @@ +--- +toc_priority: 33 +--- + +# varSamp {#varsamp} + +Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`. + +It represents an unbiased estimate of the variance of a random variable if passed values form its sample. + +Returns `Float64`. When `n <= 1`, returns `+∞`. + +!!! note "Note" + This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index f214db20ea7..8bde9e8011b 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -28,7 +28,7 @@ CREATE TABLE t ) ENGINE = ... ``` -[uniq](../../sql-reference/aggregate-functions/reference.md#agg_function-uniq), anyIf ([any](../../sql-reference/aggregate-functions/reference.md#agg_function-any)+[If](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-if)) and [quantiles](../../sql-reference/aggregate-functions/reference.md) are the aggregate functions supported in ClickHouse. +[uniq](../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq), anyIf ([any](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any)+[If](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-if)) and [quantiles](../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) are the aggregate functions supported in ClickHouse. ## Usage {#usage} diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 98c36d228e3..48957498d63 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -45,7 +45,7 @@ SELECT [1, 2] AS x, toTypeName(x) ## Working with Data Types {#working-with-data-types} -When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](nullable.md). +When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md). If ClickHouse couldn’t determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`). diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 4e14f90c029..a2ae68ebf14 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -122,6 +122,6 @@ FROM dt - [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) -- [The `Date` data type](date.md) +- [The `Date` data type](../../sql-reference/data-types/date.md) [Original article](https://clickhouse.tech/docs/en/data_types/datetime/) diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index 0529dcbf6a3..9a3b198b5e4 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -15,7 +15,7 @@ Syntax: DateTime64(precision, [timezone]) ``` -Internally, stores data as a number of ‘ticks’ since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01.000’). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](datetime.md). +Internally, stores data as a number of ‘ticks’ since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01.000’). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](../../sql-reference/data-types/datetime.md). ## Examples {#examples} @@ -98,5 +98,5 @@ FROM dt - [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) -- [`Date` data type](date.md) -- [`DateTime` data type](datetime.md) +- [`Date` data type](../../sql-reference/data-types/date.md) +- [`DateTime` data type](../../sql-reference/data-types/datetime.md) diff --git a/docs/en/sql-reference/data-types/domains/ipv4.md b/docs/en/sql-reference/data-types/domains/ipv4.md index d8735d70b29..1237514b9e7 100644 --- a/docs/en/sql-reference/data-types/domains/ipv4.md +++ b/docs/en/sql-reference/data-types/domains/ipv4.md @@ -31,7 +31,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` domain supports custom input format as IPv4-strings: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/en/sql-reference/data-types/domains/ipv6.md b/docs/en/sql-reference/data-types/domains/ipv6.md index 7fd88887acc..bc57202bf66 100644 --- a/docs/en/sql-reference/data-types/domains/ipv6.md +++ b/docs/en/sql-reference/data-types/domains/ipv6.md @@ -31,7 +31,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` domain supports custom input as IPv6-strings: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/en/sql-reference/data-types/enum.md b/docs/en/sql-reference/data-types/enum.md index 6ab6c4fe57a..8c1e42aec79 100644 --- a/docs/en/sql-reference/data-types/enum.md +++ b/docs/en/sql-reference/data-types/enum.md @@ -93,7 +93,7 @@ Each of the values is assigned a number in the range `-128 ... 127` for `Enum8` Neither the string nor the numeric value in an `Enum` can be [NULL](../../sql-reference/syntax.md). -An `Enum` can be contained in [Nullable](nullable.md) type. So if you create a table using the query +An `Enum` can be contained in [Nullable](../../sql-reference/data-types/nullable.md) type. So if you create a table using the query ``` sql CREATE TABLE t_enum_nullable diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index a3912e9a85f..c813bd66a67 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -24,7 +24,7 @@ Examples of the values that can be efficiently stored in `FixedString`-typed col - Currency codes (USD, RUB … ). - Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256). -To store UUID values, use the [UUID](uuid.md) data type. +To store UUID values, use the [UUID](../../sql-reference/data-types/uuid.md) data type. When inserting the data, ClickHouse: diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index 63ec877b703..202d5122ab6 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -10,6 +10,6 @@ ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any. -You can check whether data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables.md#system_tables-data_type_families) table. +You can check whether data type name is case-sensitive in the [system.data\_type\_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table. [Original article](https://clickhouse.tech/docs/en/data_types/) diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 74eac6b54cd..7ccac61e4d7 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -5,55 +5,55 @@ toc_title: LowCardinality # LowCardinality Data Type {#lowcardinality-data-type} -Changes the internal representation of other data types to be dictionary-encoded. +Changes the internal representation of other data types to be dictionary-encoded. ## Syntax {#lowcardinality-syntax} -```sql +``` sql LowCardinality(data_type) ``` **Parameters** -- `data_type` — [String](string.md), [FixedString](fixedstring.md), [Date](date.md), [DateTime](datetime.md), and numbers excepting [Decimal](decimal.md). `LowCardinality` is not efficient for some data types, see the [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) setting description. +- `data_type` — [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), and numbers excepting [Decimal](../../sql-reference/data-types/decimal.md). `LowCardinality` is not efficient for some data types, see the [allow\_suspicious\_low\_cardinality\_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) setting description. ## Description {#lowcardinality-dscr} -`LowCardinality` is a superstructure that changes a data storage method and rules of data processing. ClickHouse applies [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) to `LowCardinality`-columns. Operating with dictionary encoded data significantly increases performance of [SELECT](../statements/select/index.md) queries for many applications. +`LowCardinality` is a superstructure that changes a data storage method and rules of data processing. ClickHouse applies [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) to `LowCardinality`-columns. Operating with dictionary encoded data significantly increases performance of [SELECT](../../sql-reference/statements/select/index.md) queries for many applications. The efficiency of using `LowCarditality` data type depends on data diversity. If a dictionary contains less than 10,000 distinct values, then ClickHouse mostly shows higher efficiency of data reading and storing. If a dictionary contains more than 100,000 distinct values, then ClickHouse can perform worse in comparison with using ordinary data types. -Consider using `LowCardinality` instead of [Enum](enum.md) when working with strings. `LowCardinality` provides more flexibility in use and often reveals the same or higher efficiency. +Consider using `LowCardinality` instead of [Enum](../../sql-reference/data-types/enum.md) when working with strings. `LowCardinality` provides more flexibility in use and often reveals the same or higher efficiency. -## Example +## Example {#example} Create a table with a `LowCardinality`-column: -```sql +``` sql CREATE TABLE lc_t ( - `id` UInt16, + `id` UInt16, `strings` LowCardinality(String) ) ENGINE = MergeTree() ORDER BY id ``` -## Related Settings and Functions +## Related Settings and Functions {#related-settings-and-functions} Settings: -- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size) -- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) -- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) -- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) +- [low\_cardinality\_max\_dictionary\_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size) +- [low\_cardinality\_use\_single\_dictionary\_for\_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) +- [low\_cardinality\_allow\_in\_native\_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) +- [allow\_suspicious\_low\_cardinality\_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) Functions: -- [toLowCardinality](../functions/type-conversion-functions.md#tolowcardinality) +- [toLowCardinality](../../sql-reference/functions/type-conversion-functions.md#tolowcardinality) -## See Also +## See Also {#see-also} -- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality). -- [Reducing Clickhouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/). -- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). \ No newline at end of file +- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality). +- [Reducing Clickhouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/). +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md index dfa2d8a3b35..2cf5e41867e 100644 --- a/docs/en/sql-reference/data-types/nullable.md +++ b/docs/en/sql-reference/data-types/nullable.md @@ -7,7 +7,7 @@ toc_title: Nullable Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that don’t have a value will store `NULL`. -For a `TypeName`, you can’t use composite data types [Array](array.md) and [Tuple](tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`. +For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`. A `Nullable` type field can’t be included in table indexes. diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 5f4c408f939..1e7eea76c7f 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -1,17 +1,20 @@ # SimpleAggregateFunction {#data-type-simpleaggregatefunction} -`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we don’t have to store and process any extra data. +`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we don’t have to store and process any extra data. The following aggregate functions are supported: -- [`any`](../../sql-reference/aggregate-functions/reference.md#agg_function-any) -- [`anyLast`](../../sql-reference/aggregate-functions/reference.md#anylastx) -- [`min`](../../sql-reference/aggregate-functions/reference.md#agg_function-min) -- [`max`](../../sql-reference/aggregate-functions/reference.md#agg_function-max) -- [`sum`](../../sql-reference/aggregate-functions/reference.md#agg_function-sum) -- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference.md#groupbitand) -- [`groupBitOr`](../../sql-reference/aggregate-functions/reference.md#groupbitor) -- [`groupBitXor`](../../sql-reference/aggregate-functions/reference.md#groupbitxor) +- [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any) +- [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx) +- [`min`](../../sql-reference/aggregate-functions/reference/min.md#agg_function-min) +- [`max`](../../sql-reference/aggregate-functions/reference/max.md#agg_function-max) +- [`sum`](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) +- [`sumWithOverflow`](../../sql-reference/aggregate-functions/reference/sumwithoverflow.md#sumwithoverflowx) +- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference/groupbitand.md#groupbitand) +- [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor) +- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor) +- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray) +- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md) Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. diff --git a/docs/en/sql-reference/data-types/special-data-types/set.md b/docs/en/sql-reference/data-types/special-data-types/set.md index ad15b5d33b9..fdc03ad3de1 100644 --- a/docs/en/sql-reference/data-types/special-data-types/set.md +++ b/docs/en/sql-reference/data-types/special-data-types/set.md @@ -5,6 +5,6 @@ toc_title: Set # Set {#set} -Used for the right half of an [IN](../../operators/in.md#select-in-operators) expression. +Used for the right half of an [IN](../../../sql-reference/operators/in.md#select-in-operators) expression. [Original article](https://clickhouse.tech/docs/en/data_types/special_data_types/set/) diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index 66908e2c530..60adb942925 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -5,7 +5,7 @@ toc_title: Tuple(T1, T2, ...) # Tuple(t1, T2, …) {#tuplet1-t2} -A tuple of elements, each having an individual [type](index.md#data_types). +A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuples are used for temporary column grouping. Columns can be grouped when an IN expression is used in a query, and for specifying certain formal parameters of lambda functions. For more information, see the sections [IN operators](../../sql-reference/operators/in.md) and [Higher order functions](../../sql-reference/functions/higher-order-functions.md). @@ -33,7 +33,7 @@ SELECT tuple(1,'a') AS x, toTypeName(x) ## Working with Data Types {#working-with-data-types} -When creating a tuple on the fly, ClickHouse automatically detects the type of each argument as the minimum of the types which can store the argument value. If the argument is [NULL](../../sql-reference/syntax.md#null-literal), the type of the tuple element is [Nullable](nullable.md). +When creating a tuple on the fly, ClickHouse automatically detects the type of each argument as the minimum of the types which can store the argument value. If the argument is [NULL](../../sql-reference/syntax.md#null-literal), the type of the tuple element is [Nullable](../../sql-reference/data-types/nullable.md). Example of automatic data type detection: diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md index 86a64cc2381..1e22b41b508 100644 --- a/docs/en/sql-reference/data-types/uuid.md +++ b/docs/en/sql-reference/data-types/uuid.md @@ -68,8 +68,8 @@ SELECT * FROM t_uuid ## Restrictions {#restrictions} -The UUID data type only supports functions which [String](string.md) data type also supports (for example, [min](../../sql-reference/aggregate-functions/reference.md#agg_function-min), [max](../../sql-reference/aggregate-functions/reference.md#agg_function-max), and [count](../../sql-reference/aggregate-functions/reference.md#agg_function-count)). +The UUID data type only supports functions which [String](../../sql-reference/data-types/string.md) data type also supports (for example, [min](../../sql-reference/aggregate-functions/reference/min.md#agg_function-min), [max](../../sql-reference/aggregate-functions/reference/max.md#agg_function-max), and [count](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count)). -The UUID data type is not supported by arithmetic operations (for example, [abs](../../sql-reference/functions/arithmetic-functions.md#arithm_func-abs)) or aggregate functions, such as [sum](../../sql-reference/aggregate-functions/reference.md#agg_function-sum) and [avg](../../sql-reference/aggregate-functions/reference.md#agg_function-avg). +The UUID data type is not supported by arithmetic operations (for example, [abs](../../sql-reference/functions/arithmetic-functions.md#arithm_func-abs)) or aggregate functions, such as [sum](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and [avg](../../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg). [Original article](https://clickhouse.tech/docs/en/data_types/uuid/) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md index 2d7fd564ed0..894ffe38d47 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md @@ -5,7 +5,7 @@ toc_title: Hierarchical dictionaries # Hierarchical Dictionaries {#hierarchical-dictionaries} -ClickHouse supports hierarchical dictionaries with a [numeric key](external-dicts-dict-structure.md#ext_dict-numeric-key). +ClickHouse supports hierarchical dictionaries with a [numeric key](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict-numeric-key). Look at the following hierarchical structure: @@ -35,7 +35,7 @@ This hierarchy can be expressed as the following dictionary table. This table contains a column `parent_region` that contains the key of the nearest parent for the element. -ClickHouse supports the [hierarchical](external-dicts-dict-structure.md#hierarchical-dict-attr) property for [external dictionary](index.md) attributes. This property allows you to configure the hierarchical dictionary similar to described above. +ClickHouse supports the [hierarchical](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#hierarchical-dict-attr) property for [external dictionary](../../../sql-reference/dictionaries/external-dictionaries/index.md) attributes. This property allows you to configure the hierarchical dictionary similar to described above. The [dictGetHierarchy](../../../sql-reference/functions/ext-dict-functions.md#dictgethierarchy) function allows you to get the parent chain of an element. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index bdadf97cd11..9617a8dfa70 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -39,7 +39,7 @@ The configuration looks like this: ``` -Corresponding [DDL-query](../../statements/create.md#create-dictionary-query): +Corresponding [DDL-query](../../../sql-reference/statements/create.md#create-dictionary-query): ``` sql CREATE DICTIONARY (...) @@ -123,7 +123,7 @@ LAYOUT(SPARSE_HASHED()) ### complex\_key\_hashed {#complex-key-hashed} -This type of storage is for use with composite [keys](external-dicts-dict-structure.md). Similar to `hashed`. +This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `hashed`. Configuration example: @@ -157,7 +157,7 @@ Example: The table contains discounts for each advertiser in the format: +---------|-------------|-------------|------+ ``` -To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](external-dicts-dict-structure.md). These elements must contain elements `name` and`type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others). +To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and`type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others). Example: @@ -258,12 +258,12 @@ The dictionary is stored in a cache that has a fixed number of cells. These cell When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache. -For cache dictionaries, the expiration [lifetime](external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used, and it is re-requested the next time it needs to be used. +For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used, and it is re-requested the next time it needs to be used. This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table. To improve cache performance, use a subquery with `LIMIT`, and call the function with the dictionary externally. -Supported [sources](external-dicts-dict-sources.md): MySQL, ClickHouse, executable, HTTP. +Supported [sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): MySQL, ClickHouse, executable, HTTP. Example of settings: @@ -294,7 +294,7 @@ Set a large enough cache size. You need to experiment to select the number of ce ### complex\_key\_cache {#complex-key-cache} -This type of storage is for use with composite [keys](external-dicts-dict-structure.md). Similar to `cache`. +This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `cache`. ### direct {#direct} @@ -302,7 +302,7 @@ The dictionary is not stored in memory and directly goes to the source during th The dictionary key has the `UInt64` type. -All types of [sources](external-dicts-dict-sources.md), except local files, are supported. +All types of [sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), except local files, are supported. Configuration example: @@ -320,7 +320,7 @@ LAYOUT(DIRECT()) ### complex\_key\_direct {#complex-key-direct} -This type of storage is for use with composite [keys](external-dicts-dict-structure.md). Similar to `direct`. +This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `direct`. ### ip\_trie {#ip-trie} diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index a59cdf72551..21f4238045d 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -52,9 +52,9 @@ LIFETIME(MIN 300 MAX 360) If `0` and `0`, ClickHouse does not reload the dictionary by timeout. In this case, ClickHouse can reload the dictionary earlier if the dictionary configuration file was changed or the `SYSTEM RELOAD DICTIONARY` command was executed. -When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](external-dicts-dict-sources.md): +When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): -When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](external-dicts-dict-sources.md): +When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md): - For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. - For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query. @@ -63,7 +63,7 @@ When upgrading the dictionaries, the ClickHouse server applies different logic d For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: - The dictionary table must have a field that always changes when the source data is updated. -- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](external-dicts-dict-sources.md). +- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md). Example of settings: diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 71b719ce996..29c76223059 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -24,7 +24,7 @@ If dictionary is configured using xml-file, the configuration looks like this: ``` -In case of [DDL-query](../../statements/create.md#create-dictionary-query), equal configuration will looks like: +In case of [DDL-query](../../../sql-reference/statements/create.md#create-dictionary-query), equal configuration will looks like: ``` sql CREATE DICTIONARY dict_name (...) @@ -95,7 +95,7 @@ Setting fields: ## Executable File {#dicts-external_dicts_dict_sources-executable} -Working with executable files depends on [how the dictionary is stored in memory](external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts executable file and treats its output as dictionary data. +Working with executable files depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts executable file and treats its output as dictionary data. Example of settings: @@ -121,7 +121,7 @@ Setting fields: ## Http(s) {#dicts-external_dicts_dict_sources-http} -Working with an HTTP(s) server depends on [how the dictionary is stored in memory](external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. +Working with an HTTP(s) server depends on [how the dictionary is stored in memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. Example of settings: @@ -202,11 +202,11 @@ Setting fields: - `db` – Name of the database. Omit it if the database name is set in the `` parameters. - `table` – Name of the table and schema if exists. - `connection_string` – Connection string. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](external-dicts-dict-lifetime.md). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. -If you have a problems with encodings when using Oracle, see the corresponding [FAQ](../../../faq/general.md#oracle-odbc-encodings) article. +If you have a problems with encodings when using Oracle, see the corresponding [F.A.Q.](../../../faq/integration/oracle-odbc.md) item. ### Known Vulnerability of the ODBC Dictionary Functionality {#known-vulnerability-of-the-odbc-dictionary-functionality} @@ -474,7 +474,7 @@ Setting fields: - `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](external-dicts-dict-lifetime.md). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). MySQL can be connected on a local host via sockets. To do this, set `host` and `socket`. @@ -551,7 +551,7 @@ Setting fields: - `db` – Name of the database. - `table` – Name of the table. - `where` – The selection criteria. May be omitted. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](external-dicts-dict-lifetime.md). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). ### Mongodb {#dicts-external_dicts_dict_sources-mongodb} @@ -629,7 +629,7 @@ Setting fields: Example of settings: -```xml +``` xml localhost @@ -648,20 +648,19 @@ Example of settings: ``` Setting fields: -- `host` – The Cassandra host or comma-separated list of hosts. -- `port` – The port on the Cassandra servers. If not specified, default port is used. -- `user` – Name of the Cassandra user. -- `password` – Password of the Cassandra user. -- `keyspace` – Name of the keyspace (database). -- `column_family` – Name of the column family (table). -- `allow_filering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1. -- `partition_key_prefix` – Number of partition key columns in primary key of the Cassandra table. - Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. - Default value is 1 (the first key column is a partition key and other key columns are clustering key). -- `consistency` – Consistency level. Possible values: `One`, `Two`, `Three`, - `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default is `One`. -- `where` – Optional selection criteria. -- `max_threads` – The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries. - +- `host` – The Cassandra host or comma-separated list of hosts. +- `port` – The port on the Cassandra servers. If not specified, default port is used. +- `user` – Name of the Cassandra user. +- `password` – Password of the Cassandra user. +- `keyspace` – Name of the keyspace (database). +- `column_family` – Name of the column family (table). +- `allow_filering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1. +- `partition_key_prefix` – Number of partition key columns in primary key of the Cassandra table. +Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. +Default value is 1 (the first key column is a partition key and other key columns are clustering key). +- `consistency` – Consistency level. Possible values: `One`, `Two`, `Three`, +`All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default is `One`. +- `where` – Optional selection criteria. +- `max_threads` – The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries. [Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_sources/) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 2e3a7496ae4..e25b3ab78c3 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -28,8 +28,8 @@ XML description: Attributes are described in the elements: -- `` — [Key column](external-dicts-dict-structure.md#ext_dict_structure-key). -- `` — [Data column](external-dicts-dict-structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. +- `` — [Key column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key). +- `` — [Data column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. DDL query: @@ -44,8 +44,8 @@ PRIMARY KEY Id Attributes are described in the query body: -- `PRIMARY KEY` — [Key column](external-dicts-dict-structure.md#ext_dict_structure-key) -- `AttrName AttrType` — [Data column](external-dicts-dict-structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. +- `PRIMARY KEY` — [Key column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key) +- `AttrName AttrType` — [Data column](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. ## Key {#ext_dict_structure-key} @@ -90,12 +90,12 @@ PRIMARY KEY Id ### Composite Key {#composite-key} -The key can be a `tuple` from any types of fields. The [layout](external-dicts-dict-layout.md) in this case must be `complex_key_hashed` or `complex_key_cache`. +The key can be a `tuple` from any types of fields. The [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) in this case must be `complex_key_hashed` or `complex_key_cache`. !!! tip "Tip" A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. -The key structure is set in the element ``. Key fields are specified in the same format as the dictionary [attributes](external-dicts-dict-structure.md). Example: +The key structure is set in the element ``. Key fields are specified in the same format as the dictionary [attributes](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Example: ``` xml @@ -161,8 +161,8 @@ Configuration fields: | `name` | Column name. | Yes | | `type` | ClickHouse data type.
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../../sql-reference/data-types/nullable.md) is not supported. | Yes | | `null_value` | Default value for a non-existing element.
In the example, it is an empty string. You cannot use `NULL` in this field. | Yes | -| `expression` | [Expression](../../syntax.md#syntax-expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | -| `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](external-dicts-dict-hierarchical.md).

Default value: `false`. | No | +| `expression` | [Expression](../../../sql-reference/syntax.md#syntax-expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | +| `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-hierarchical.md).

Default value: `false`. | No | | `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).
If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.

Default value: `false`. | No | | `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.

Default value: `false`. | No | diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md index bdefa264ed9..8dad24df6a7 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md @@ -29,7 +29,7 @@ If dictionary is configured using xml file, than dictionary configuration has th ``` -Corresponding [DDL-query](../../statements/create.md#create-dictionary-query) has the following structure: +Corresponding [DDL-query](../../../sql-reference/statements/create.md#create-dictionary-query) has the following structure: ``` sql CREATE DICTIONARY dict_name @@ -43,9 +43,9 @@ LIFETIME(...) -- Lifetime of dictionary in memory ``` - `name` – The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`. -- [source](external-dicts-dict-sources.md) — Source of the dictionary. -- [layout](external-dicts-dict-layout.md) — Dictionary layout in memory. -- [structure](external-dicts-dict-structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key. -- [lifetime](external-dicts-dict-lifetime.md) — Frequency of dictionary updates. +- [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) — Source of the dictionary. +- [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) — Dictionary layout in memory. +- [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key. +- [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) — Frequency of dictionary updates. [Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict/) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index afd9bcdcbce..d279ecdd165 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -5,23 +5,23 @@ toc_title: General Description # External Dictionaries {#dicts-external-dicts} -You can add your own dictionaries from various data sources. The data source for a dictionary can be a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Sources for external dictionaries](external-dicts-dict-sources.md)”. +You can add your own dictionaries from various data sources. The data source for a dictionary can be a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Sources for external dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”. ClickHouse: - Fully or partially stores dictionaries in RAM. - Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically. -- Allows to create external dictionaries with xml files or [DDL queries](../../statements/create.md#create-dictionary-query). +- Allows to create external dictionaries with xml files or [DDL queries](../../../sql-reference/statements/create.md#create-dictionary-query). The configuration of external dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries\_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter. Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries\_lazy\_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) setting. -The [dictionaries](../../../operations/system-tables.md#system_tables-dictionaries) system table contains information about dictionaries configured at server. For each dictionary you can find there: +The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tables-dictionaries) system table contains information about dictionaries configured at server. For each dictionary you can find there: -- Status of the dictionary. -- Configuration parameters. -- Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded. +- Status of the dictionary. +- Configuration parameters. +- Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded. The dictionary configuration file has the following format: @@ -41,20 +41,20 @@ The dictionary configuration file has the following format: ``` -You can [configure](external-dicts-dict.md) any number of dictionaries in the same file. +You can [configure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) any number of dictionaries in the same file. -[DDL queries for dictionaries](../../statements/create.md#create-dictionary-query) doesn’t require any additional records in server configuration. They allow to work with dictionaries as first-class entities, like tables or views. +[DDL queries for dictionaries](../../../sql-reference/statements/create.md#create-dictionary-query) doesn’t require any additional records in server configuration. They allow to work with dictionaries as first-class entities, like tables or views. !!! attention "Attention" You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../../sql-reference/functions/other-functions.md) function). This functionality is not related to external dictionaries. ## See Also {#ext-dicts-see-also} -- [Configuring an External Dictionary](external-dicts-dict.md) -- [Storing Dictionaries in Memory](external-dicts-dict-layout.md) -- [Dictionary Updates](external-dicts-dict-lifetime.md) -- [Sources of External Dictionaries](external-dicts-dict-sources.md) -- [Dictionary Key and Fields](external-dicts-dict-structure.md) +- [Configuring an External Dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) +- [Storing Dictionaries in Memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) +- [Dictionary Updates](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) +- [Sources of External Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) +- [Dictionary Key and Fields](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) - [Functions for Working with External Dictionaries](../../../sql-reference/functions/ext-dict-functions.md) [Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts/) diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index df8701e035e..420182642bb 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -14,7 +14,7 @@ ClickHouse supports special functions for working with dictionaries that can be ClickHouse supports: -- [Built-in dictionaries](internal-dicts.md#internal_dicts) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). -- [Plug-in (external) dictionaries](external-dictionaries/external-dicts.md#dicts-external-dicts) with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). +- [Built-in dictionaries](../../sql-reference/dictionaries/internal-dicts.md#internal_dicts) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). +- [Plug-in (external) dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md#dicts-external-dicts) with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). [Original article](https://clickhouse.tech/docs/en/query_language/dicts/) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 4f449eea516..8db398f7a15 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1,6 +1,6 @@ --- toc_priority: 46 -toc_title: Working with Arrays +toc_title: Arrays --- # Functions for Working with Arrays {#functions-for-working-with-arrays} @@ -187,7 +187,6 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) ``` ``` text - ┌─indexOf([1, 3, NULL, NULL], NULL)─┐ │ 3 │ └───────────────────────────────────┘ @@ -522,7 +521,7 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]); - `NaN` values are right before `NULL`. - `Inf` values are right before `NaN`. -Note that `arraySort` is a [higher-order function](higher-order-functions.md). You can pass a lambda function to it as the first argument. In this case, sorting order is determined by the result of the lambda function applied to the elements of the array. +Note that `arraySort` is a [higher-order function](../../sql-reference/functions/higher-order-functions.md). You can pass a lambda function to it as the first argument. In this case, sorting order is determined by the result of the lambda function applied to the elements of the array. Let’s consider the following example: @@ -622,7 +621,7 @@ SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res; - `NaN` values are right before `NULL`. - `-Inf` values are right before `NaN`. -Note that the `arrayReverseSort` is a [higher-order function](higher-order-functions.md). You can pass a lambda function to it as the first argument. Example is shown below. +Note that the `arrayReverseSort` is a [higher-order function](../../sql-reference/functions/higher-order-functions.md). You can pass a lambda function to it as the first argument. Example is shown below. ``` sql SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res; @@ -687,7 +686,7 @@ If you want to get a list of unique items in an array, you can use arrayReduce( ## arrayJoin(arr) {#array-functions-join} -A special function. See the section [“ArrayJoin function”](array-join.md#functions_arrayjoin). +A special function. See the section [“ArrayJoin function”](../../sql-reference/functions/array-join.md#functions_arrayjoin). ## arrayDifference {#arraydifference} @@ -701,13 +700,13 @@ arrayDifference(array) **Parameters** -- `array` – [Array](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). **Returned values** Returns an array of differences between adjacent elements. -Type: [UInt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.yandex/docs/en/data_types/float/). +Type: [UInt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.tech/docs/en/data_types/float/). **Example** @@ -753,7 +752,7 @@ arrayDistinct(array) **Parameters** -- `array` – [Array](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). **Returned values** @@ -999,15 +998,15 @@ arrayZip(arr1, arr2, ..., arrN) **Parameters** -- `arrN` — [Array](../data-types/array.md). +- `arrN` — [Array](../../sql-reference/data-types/array.md). The function can take any number of arrays of different types. All the input arrays must be of equal size. **Returned value** -- Array with elements from the source arrays grouped into [tuples](../data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. +- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. -Type: [Array](../data-types/array.md). +Type: [Array](../../sql-reference/data-types/array.md). **Example** diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md index b253a699c1f..446a4729ff2 100644 --- a/docs/en/sql-reference/functions/conditional-functions.md +++ b/docs/en/sql-reference/functions/conditional-functions.md @@ -109,11 +109,11 @@ Returns `then` if the `cond` evaluates to be true (greater than zero), otherwise **See also** -- [ifNotFinite](other-functions.md#ifnotfinite). +- [ifNotFinite](../../sql-reference/functions/other-functions.md#ifnotfinite). ## multiIf {#multiif} -Allows you to write the [CASE](../operators/index.md#operator_case) operator more compactly in the query. +Allows you to write the [CASE](../../sql-reference/operators/index.md#operator_case) operator more compactly in the query. Syntax: `multiIf(cond_1, then_1, cond_2, then_2, ..., else)` diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 5a9dbf4d17e..3cbc7c73543 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1,6 +1,6 @@ --- toc_priority: 39 -toc_title: Working with Dates and Times +toc_title: Dates and Times --- # Functions for Working with Dates and Times {#functions-for-working-with-dates-and-times} @@ -364,7 +364,7 @@ dateDiff('unit', startdate, enddate, [timezone]) **Parameters** -- `unit` — Time unit, in which the returned value is expressed. [String](../syntax.md#syntax-string-literal). +- `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal). Supported values: diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 591d88dd985..49b1c2dda2c 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -1,6 +1,6 @@ --- toc_priority: 58 -toc_title: Working with External Dictionaries +toc_title: External Dictionaries --- # Functions for Working with External Dictionaries {#ext_dict_functions} @@ -18,10 +18,10 @@ dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr) **Parameters** -- `dict_name` — Name of the dictionary. [String literal](../syntax.md#syntax-string-literal). -- `attr_name` — Name of the column of the dictionary. [String literal](../syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. -- `default_value_expr` — Value returned if the dictionary doesn’t contain a row with the `id_expr` key. [Expression](../syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `default_value_expr` — Value returned if the dictionary doesn’t contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute. **Returned value** @@ -107,8 +107,8 @@ dictHas('dict_name', id_expr) **Parameters** -- `dict_name` — Name of the dictionary. [String literal](../syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned value** @@ -129,8 +129,8 @@ dictGetHierarchy('dict_name', key) **Parameters** -- `dict_name` — Name of the dictionary. [String literal](../syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned value** @@ -148,9 +148,9 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr) **Parameters** -- `dict_name` — Name of the dictionary. [String literal](../syntax.md#syntax-string-literal). -- `child_id_expr` — Key to be checked. [Expression](../syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned value** @@ -184,10 +184,10 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) **Parameters** -- `dict_name` — Name of the dictionary. [String literal](../syntax.md#syntax-string-literal). -- `attr_name` — Name of the column of the dictionary. [String literal](../syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `default_value_expr` — Value which is returned if the dictionary doesn’t contain a row with the `id_expr` key. [Expression](../syntax.md#syntax-expressions) returning a value in the data type configured for the `attr_name` attribute. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `default_value_expr` — Value which is returned if the dictionary doesn’t contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a value in the data type configured for the `attr_name` attribute. **Returned value** diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 07b7d701177..c32af7194fb 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -1,9 +1,9 @@ --- toc_priority: 63 -toc_title: Working with Nullable arguments +toc_title: Nullable --- -# Functions for Working with Nullable Aggregates {#functions-for-working-with-nullable-aggregates} +# Functions for Working with Nullable Values {#functions-for-working-with-nullable-aggregates} ## isNull {#isnull} diff --git a/docs/en/sql-reference/functions/geo.md b/docs/en/sql-reference/functions/geo.md index 942b951c7c8..65925f8a64b 100644 --- a/docs/en/sql-reference/functions/geo.md +++ b/docs/en/sql-reference/functions/geo.md @@ -1,6 +1,6 @@ --- toc_priority: 62 -toc_title: Working with geographical coordinates +toc_title: Geographical Coordinates --- # Functions for Working with Geographical Coordinates {#functions-for-working-with-geographical-coordinates} diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index dd568e68754..50dfe1d2110 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -423,7 +423,7 @@ murmurHash3_128( expr ) **Parameters** -- `expr` — [Expressions](../syntax.md#syntax-expressions) returning a [String](../../sql-reference/data-types/string.md)-type value. +- `expr` — [Expressions](../../sql-reference/syntax.md#syntax-expressions) returning a [String](../../sql-reference/data-types/string.md)-type value. **Returned Value** diff --git a/docs/en/sql-reference/functions/higher-order-functions.md b/docs/en/sql-reference/functions/higher-order-functions.md index cfa0e3263b9..484bdaa12e6 100644 --- a/docs/en/sql-reference/functions/higher-order-functions.md +++ b/docs/en/sql-reference/functions/higher-order-functions.md @@ -239,7 +239,7 @@ SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]); └────────────────────┘ ``` -For more information about the `arraySort` method, see the [Functions for Working With Arrays](array-functions.md#array_functions-sort) section. +For more information about the `arraySort` method, see the [Functions for Working With Arrays](../../sql-reference/functions/array-functions.md#array_functions-sort) section. ### arrayReverseSort(\[func,\] arr1, …) {#arrayreversesortfunc-arr1} @@ -257,6 +257,6 @@ SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; └───────────────────┘ ``` -For more information about the `arrayReverseSort` method, see the [Functions for Working With Arrays](array-functions.md#array_functions-reverse-sort) section. +For more information about the `arrayReverseSort` method, see the [Functions for Working With Arrays](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) section. [Original article](https://clickhouse.tech/docs/en/query_language/functions/higher_order_functions/) diff --git a/docs/en/sql-reference/functions/in-functions.md b/docs/en/sql-reference/functions/in-functions.md index 6465c59e916..065805a36ae 100644 --- a/docs/en/sql-reference/functions/in-functions.md +++ b/docs/en/sql-reference/functions/in-functions.md @@ -1,13 +1,13 @@ --- toc_priority: 60 -toc_title: Implementing the IN Operator +toc_title: IN Operator --- # Functions for Implementing the IN Operator {#functions-for-implementing-the-in-operator} ## in, notIn, globalIn, globalNotIn {#in-functions} -See the section [IN operators](../operators/in.md#select-in-operators). +See the section [IN operators](../../sql-reference/operators/in.md#select-in-operators). ## tuple(x, y, …), operator (x, y, …) {#tuplex-y-operator-x-y} diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 4bf846da03c..6848f74da1f 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -18,7 +18,7 @@ For proper operation of introspection functions: For security reasons introspection functions are disabled by default. -ClickHouse saves profiler reports to the [trace\_log](../../operations/system-tables.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly. +ClickHouse saves profiler reports to the [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly. ## addressToLine {#addresstoline} @@ -98,7 +98,7 @@ LIMIT 1 \G ``` -The [arrayMap](higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output. +The [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output. ``` text Row 1: @@ -184,7 +184,7 @@ LIMIT 1 \G ``` -The [arrayMap](higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output. +The [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output. ``` text Row 1: @@ -281,7 +281,7 @@ LIMIT 1 \G ``` -The [arrayMap](higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output. +The [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output. ``` text Row 1: diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 9330bb9c307..8c894fe6311 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -1,9 +1,9 @@ --- toc_priority: 55 -toc_title: Working with IP Addresses +toc_title: IP Addresses --- -# Functions for Working with IP Addresses {#functions-for-working-with-ip-addresses} +# Functions for Working with IPv4 and IPv6 Addresses {#functions-for-working-with-ip-addresses} ## IPv4NumToString(num) {#ipv4numtostringnum} diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index cf3e352eba9..6f9c643c565 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -1,6 +1,6 @@ --- toc_priority: 56 -toc_title: Working with JSON +toc_title: JSON --- # Functions for Working with JSON {#functions-for-working-with-json} @@ -238,15 +238,15 @@ JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) **Parameters** -- `json` — [String](../data-types/string.md) with valid JSON. -- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../data-types/string.md) to get the field by the key or an [integer](../data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned values** -- Array with `('key', 'value')` tuples. Both tuple members are strings. -- Empty array if the requested object does not exist, or input JSON is invalid. +- Array with `('key', 'value')` tuples. Both tuple members are strings. +- Empty array if the requested object does not exist, or input JSON is invalid. -Type: [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), [String](../data-types/string.md)). +Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). **Examples** @@ -292,5 +292,4 @@ Result: └───────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` - [Original article](https://clickhouse.tech/docs/en/query_language/functions/json_functions/) diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md index 1e72da5c0b0..9de3854c3e3 100644 --- a/docs/en/sql-reference/functions/machine-learning-functions.md +++ b/docs/en/sql-reference/functions/machine-learning-functions.md @@ -1,18 +1,18 @@ --- toc_priority: 64 -toc_title: Machine Learning Functions +toc_title: Machine Learning --- # Machine Learning Functions {#machine-learning-functions} -## evalMLMethod (prediction) {#machine_learning_methods-evalmlmethod} +## evalMLMethod {#machine_learning_methods-evalmlmethod} Prediction using fitted regression models uses `evalMLMethod` function. See link in `linearRegression`. -### Stochastic Linear Regression {#stochastic-linear-regression} +## stochasticLinearRegressionn {#stochastic-linear-regression} -The [stochasticLinearRegression](../../sql-reference/aggregate-functions/reference.md#agg_functions-stochasticlinearregression) aggregate function implements stochastic gradient descent method using linear model and MSE loss function. Uses `evalMLMethod` to predict on new data. +The [stochasticLinearRegression](../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md#agg_functions-stochasticlinearregression) aggregate function implements stochastic gradient descent method using linear model and MSE loss function. Uses `evalMLMethod` to predict on new data. -### Stochastic Logistic Regression {#stochastic-logistic-regression} +## stochasticLogisticRegression {#stochastic-logistic-regression} -The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data. +The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 9aa26f32b18..c67cec6f7d4 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -13,19 +13,19 @@ Returns a string with the name of the host that this function was performed on. Gets a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration. -**Syntax** +**Syntax** -```sql +``` sql getMacro(name); ``` **Parameters** -- `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). +- `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Value of the specified macro. +- Value of the specified macro. Type: [String](../../sql-reference/data-types/string.md). @@ -33,7 +33,7 @@ Type: [String](../../sql-reference/data-types/string.md). The example `macros` section in the server configuration file: -```xml +``` xml Value @@ -41,13 +41,13 @@ The example `macros` section in the server configuration file: Query: -```sql +``` sql SELECT getMacro('test'); ``` Result: -```text +``` text ┌─getMacro('test')─┐ │ Value │ └──────────────────┘ @@ -55,12 +55,12 @@ Result: An alternative way to get the same value: -```sql +``` sql SELECT * FROM system.macros WHERE macro = 'test'; ``` -```text +``` text ┌─macro─┬─substitution─┐ │ test │ Value │ └───────┴──────────────┘ @@ -242,7 +242,7 @@ Result: Checks whether the argument is a constant expression. -A constant expression means an expression whose resulting value is known at the query analysis (i.e. before execution). For example, expressions over [literals](../syntax.md#literals) are constant expressions. +A constant expression means an expression whose resulting value is known at the query analysis (i.e. before execution). For example, expressions over [literals](../../sql-reference/syntax.md#literals) are constant expressions. The function is intended for development, debugging and demonstration. @@ -254,26 +254,26 @@ isConstant(x) **Parameters** -- `x` — Expression to check. +- `x` — Expression to check. **Returned values** -- `1` — `x` is constant. -- `0` — `x` is non-constant. +- `1` — `x` is constant. +- `0` — `x` is non-constant. -Type: [UInt8](../data-types/int-uint.md). +Type: [UInt8](../../sql-reference/data-types/int-uint.md). **Examples** Query: -```sql +``` sql SELECT isConstant(x + 1) FROM (SELECT 43 AS x) ``` Result: -```text +``` text ┌─isConstant(plus(x, 1))─┐ │ 1 │ └────────────────────────┘ @@ -281,13 +281,13 @@ Result: Query: -```sql +``` sql WITH 3.14 AS pi SELECT isConstant(cos(pi)) ``` Result: -```text +``` text ┌─isConstant(cos(pi))─┐ │ 1 │ └─────────────────────┘ @@ -295,13 +295,13 @@ Result: Query: -```sql +``` sql SELECT isConstant(number) FROM numbers(1) ``` Result: -```text +``` text ┌─isConstant(number)─┐ │ 0 │ └────────────────────┘ @@ -345,7 +345,7 @@ Result: │ inf │ 42 │ └─────────┴───────────────────────────────┘ -You can get similar result by using [ternary operator](conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. +You can get similar result by using [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. ## isNaN(x) {#isnanx} @@ -732,7 +732,7 @@ WHERE diff != 1 ## runningDifferenceStartingWithFirstValue {#runningdifferencestartingwithfirstvalue} -Same as for [runningDifference](./other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. +Same as for [runningDifference](../../sql-reference/functions/other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. ## MACNumToString(num) {#macnumtostringnum} @@ -917,7 +917,7 @@ SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) Creates an array with a single value. -Used for internal implementation of [arrayJoin](array-join.md#functions_arrayjoin). +Used for internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). ``` sql SELECT replicate(x, arr); @@ -1072,7 +1072,7 @@ joinGet(join_storage_table_name, `value_column`, join_keys) **Parameters** -- `join_storage_table_name` — an [identifier](../syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. +- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. - `value_column` — name of the column of the table that contains required data. - `join_keys` — list of keys. @@ -1200,4 +1200,51 @@ SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers └────────┴────────────────────────────────┴──────────────────────────────────┘ ``` +## randomString {#randomstring} + +Generates a binary string of the specified length filled with random bytes (including zero bytes). + +**Syntax** + +``` sql +randomString(length) +``` + +**Parameters** + +- `length` — String length. Positive integer. + +**Returned value** + +- String filled with random bytes. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +str: 3 G : pT ?w тi k aV f6 +len: 30 + +Row 2: +────── +str: 9 ,] ^ ) ]?? 8 +len: 30 +``` + +**See Also** + +- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom) +- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) + [Original article](https://clickhouse.tech/docs/en/query_language/functions/other_functions/) diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 05fb982138c..68998928398 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -1,24 +1,25 @@ --- toc_priority: 51 -toc_title: Generating Pseudo-Random Numbers +toc_title: Pseudo-Random Numbers --- -# Functions for Generating Pseudo-random Numbers {#functions-for-generating-pseudo-random-numbers} +# Functions for Generating Pseudo-Random Numbers {#functions-for-generating-pseudo-random-numbers} -Non-cryptographic generators of pseudo-random numbers are used. +All the functions accept zero arguments or one argument. If an argument is passed, it can be any type, and its value is not used for anything. The only purpose of this argument is to prevent common subexpression elimination, so that two different instances of the same function return different columns with different random numbers. -All the functions accept zero arguments or one argument. -If an argument is passed, it can be any type, and its value is not used for anything. -The only purpose of this argument is to prevent common subexpression elimination, so that two different instances of the same function return different columns with different random numbers. +!!! note "Note" + Non-cryptographic generators of pseudo-random numbers are used. ## rand, rand32 {#rand} Returns a pseudo-random UInt32 number, evenly distributed among all UInt32-type numbers. + Uses a linear congruential generator. ## rand64 {#rand64} Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type numbers. + Uses a linear congruential generator. ## randConstant {#randconstant} @@ -33,13 +34,13 @@ randConstant([x]) **Parameters** -- `x` — [Expression](../syntax.md#syntax-expressions) resulting in any of the [supported data types](../data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. **Returned value** - Pseudo-random number. -Type: [UInt32](../data-types/int-uint.md). +Type: [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -60,7 +61,7 @@ Result: └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ ``` -# Random functions for working with strings {#random-functions-for-working-with-strings} +# Random Functions for Working with Strings {#random-functions-for-working-with-strings} ## randomString {#random-string} @@ -77,6 +78,7 @@ Result: ``` sql fuzzBits([s], [prob]) ``` + Inverts bits of `s`, each with probability `prob`. **Parameters** @@ -92,11 +94,12 @@ Fuzzed string with same as s type. SELECT fuzzBits(materialize('abacaba'), 0.1) FROM numbers(3) ``` -``` text -┌─fuzzBits(materialize('abacaba'), 0.1)─┐ -│ abaaaja │ -│ a*cjab+ │ -│ aeca2A │ + +\`\`\` text +┌─fuzzBits(materialize(‘abacaba’), 0.1)─┐ +│ abaaaja │ +│ a\*cjab+ │ +│ aeca2A │ └───────────────────────────────────────┘ [Original article](https://clickhouse.tech/docs/en/query_language/functions/random_functions/) diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 3ce7aa64f3f..922cf7374d7 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -37,7 +37,7 @@ round(expression [, decimal_places]) **Parameters:** -- `expression` — A number to be rounded. Can be any [expression](../syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). - `decimal-places` — An integer value. - If `decimal-places > 0` then the function rounds the value to the right of the decimal point. - If `decimal-places < 0` then the function rounds the value to the left of the decimal point. @@ -116,7 +116,7 @@ roundBankers(expression [, decimal_places]) **Parameters** -- `expression` — A number to be rounded. Can be any [expression](../syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). - `decimal-places` — Decimal places. An integer number. - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 32e85799ab1..a19544f868f 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1,10 +1,13 @@ --- toc_priority: 40 -toc_title: Working with strings +toc_title: Strings --- # Functions for Working with Strings {#functions-for-working-with-strings} +!!! note "Note" + Functions for [searching](string-search-functions.md) and [replacing](string-replace-functions.md) in strings are described separately. + ## empty {#empty} Returns 1 for an empty string or 0 for a non-empty string. diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 2d29e394aba..98b9943cdab 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -5,6 +5,9 @@ toc_title: For Replacing in Strings # Functions for Searching and Replacing in Strings {#functions-for-searching-and-replacing-in-strings} +!!! note "Note" + Functions for [searching](string-search-functions.md) and [other manipulations with strings](string-functions.md) are described separately. + ## replaceOne(haystack, pattern, replacement) {#replaceonehaystack-pattern-replacement} Replaces the first occurrence, if it exists, of the ‘pattern’ substring in ‘haystack’ with the ‘replacement’ substring. diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 2a49e18febe..815d74a2ccb 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -1,12 +1,15 @@ --- toc_priority: 41 -toc_title: For Searching Strings +toc_title: For Searching in Strings --- -# Functions for Searching Strings {#functions-for-searching-strings} +# Functions for Searching in Strings {#functions-for-searching-strings} The search is case-sensitive by default in all these functions. There are separate variants for case insensitive search. +!!! note "Note" + Functions for [replacing](string-replace-functions.md) and [other manipulations with strings](string-functions.md) are described separately. + ## position(haystack, needle), locate(haystack, needle) {#position} Returns the position (in bytes) of the found substring in the string, starting from 1. @@ -25,8 +28,8 @@ Alias: `locate(haystack, needle)`. **Parameters** -- `haystack` — string, in which substring will to be searched. [String](../syntax.md#syntax-string-literal). -- `needle` — substring to be searched. [String](../syntax.md#syntax-string-literal). +- `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). **Returned values** @@ -83,8 +86,8 @@ positionCaseInsensitive(haystack, needle) **Parameters** -- `haystack` — string, in which substring will to be searched. [String](../syntax.md#syntax-string-literal). -- `needle` — substring to be searched. [String](../syntax.md#syntax-string-literal). +- `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). **Returned values** @@ -125,8 +128,8 @@ positionUTF8(haystack, needle) **Parameters** -- `haystack` — string, in which substring will to be searched. [String](../syntax.md#syntax-string-literal). -- `needle` — substring to be searched. [String](../syntax.md#syntax-string-literal). +- `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). **Returned values** @@ -153,7 +156,7 @@ Result: └───────────────────────────────────┘ ``` -The phrase “Salut, étudiante!”, where character `é` can be represented using a one point (`U+00E9`) or two points (`U+0065U+0301`) the function can be returned some unexpected result: +The phrase “Salut, étudiante!”, where character `é` can be represented using a one point (`U+00E9`) or two points (`U+0065U+0301`) the function can be returned some unexpected result: Query for the letter `é`, which is represented one Unicode point `U+00E9`: @@ -169,16 +172,16 @@ Result: └────────────────────────────────────────┘ ``` -Query for the letter `é`, which is represented two Unicode points `U+0065U+0301`: +Query for the letter `é`, which is represented two Unicode points `U+0065U+0301`: ``` sql -SELECT positionUTF8('Salut, étudiante!', '!') +SELECT positionUTF8('Salut, étudiante!', '!') ``` Result: ``` text -┌─positionUTF8('Salut, étudiante!', '!')─┐ +┌─positionUTF8('Salut, étudiante!', '!')─┐ │ 18 │ └────────────────────────────────────────┘ ``` @@ -197,8 +200,8 @@ positionCaseInsensitiveUTF8(haystack, needle) **Parameters** -- `haystack` — string, in which substring will to be searched. [String](../syntax.md#syntax-string-literal). -- `needle` — substring to be searched. [String](../syntax.md#syntax-string-literal). +- `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). **Returned value** @@ -225,7 +228,7 @@ Result: ## multiSearchAllPositions {#multisearchallpositions} -The same as [position](string-search-functions.md#position) but returns `Array` of positions (in bytes) of the found corresponding substrings in the string. Positions are indexed starting from 1. +The same as [position](../../sql-reference/functions/string-search-functions.md#position) but returns `Array` of positions (in bytes) of the found corresponding substrings in the string. Positions are indexed starting from 1. The search is performed on sequences of bytes without respect to string encoding and collation. @@ -241,8 +244,8 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needlen]) **Parameters** -- `haystack` — string, in which substring will to be searched. [String](../syntax.md#syntax-string-literal). -- `needle` — substring to be searched. [String](../syntax.md#syntax-string-literal). +- `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). **Returned values** diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 5a3bb264a84..254dceef29b 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -22,7 +22,7 @@ Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) **Parameters** -- `expr` — [Expression](../syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -87,7 +87,7 @@ Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md **Parameters** -- `expr` — [Expression](../syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -151,7 +151,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Parameters** -- `expr` — [Expression](../syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -195,7 +195,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe **Parameters** -- `expr` — [Expression](../syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. **Returned value** @@ -516,7 +516,7 @@ Result: **See Also** -- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/) +- \[ISO 8601 announcement by @xkcd\](https://xkcd.com/1179/) - [RFC 1123](https://tools.ietf.org/html/rfc1123) - [toDate](#todate) - [toDateTime](#todatetime) @@ -531,24 +531,23 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r ## toLowCardinality {#tolowcardinality} -Converts input parameter to the [LowCardianlity](../data-types/lowcardinality.md) version of same data type. +Converts input parameter to the [LowCardianlity](../../sql-reference/data-types/lowcardinality.md) version of same data type. To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`. **Syntax** -```sql +``` sql toLowCardinality(expr) ``` **Parameters** -- `expr` — [Expression](../syntax.md#syntax-expressions) resulting in one of the [supported data types](../data-types/index.md#data_types). - +- `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md#data_types). **Returned values** -- Result of `expr`. +- Result of `expr`. Type: `LowCardinality(expr_result_type)` @@ -556,22 +555,23 @@ Type: `LowCardinality(expr_result_type)` Query: -```sql +``` sql SELECT toLowCardinality('1') ``` Result: -```text +``` text ┌─toLowCardinality('1')─┐ │ 1 │ └───────────────────────┘ ``` +## toUnixTimestamp64Milli {#tounixtimestamp64milli} -## toUnixTimestamp64Milli -## toUnixTimestamp64Micro -## toUnixTimestamp64Nano +## toUnixTimestamp64Micro {#tounixtimestamp64micro} + +## toUnixTimestamp64Nano {#tounixtimestamp64nano} Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. Please note that output value is a timestamp in UTC, not in timezone of `DateTime64`. @@ -619,11 +619,13 @@ Result: └─────────────────────────────┘ ``` -## fromUnixTimestamp64Milli -## fromUnixTimestamp64Micro -## fromUnixTimestamp64Nano +## fromUnixTimestamp64Milli {#fromunixtimestamp64milli} -Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and optional timezone. Input value is scaled up or down appropriately depending on it's precision. Please note that input value is treated as UTC timestamp, not timestamp at given (or implicit) timezone. +## fromUnixTimestamp64Micro {#fromunixtimestamp64micro} + +## fromUnixTimestamp64Nano {#fromunixtimestamp64nano} + +Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and optional timezone. Input value is scaled up or down appropriately depending on it’s precision. Please note that input value is treated as UTC timestamp, not timestamp at given (or implicit) timezone. **Syntax** @@ -653,5 +655,4 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC') └──────────────────────────────────────┘ ``` - [Original article](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 34477529649..620c15d9d83 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -1,6 +1,6 @@ --- toc_priority: 54 -toc_title: Working with URLs +toc_title: URLs --- # Functions for Working with URLs {#functions-for-working-with-urls} @@ -117,7 +117,7 @@ Returns the part of the domain that includes top-level subdomains up to the “f For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. -### port(URL[, default_port = 0]) {#port} +### port(URL\[, default\_port = 0\]) {#port} Returns the port or `default_port` if there is no port in the URL (or in case of validation error). diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index 15d7e0685fa..f608c643ee8 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -1,6 +1,6 @@ --- toc_priority: 53 -toc_title: Working with UUID +toc_title: UUID --- # Functions for Working with UUID {#functions-for-working-with-uuid} @@ -82,7 +82,6 @@ SELECT ``` ``` text - ┌─uuid─────────────────────────────────┬─bytes────────────┐ │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │ └──────────────────────────────────────┴──────────────────┘ @@ -116,6 +115,6 @@ SELECT ## See Also {#see-also} -- [dictGetUUID](ext-dict-functions.md#ext_dict_functions-other) +- [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) [Original article](https://clickhouse.tech/docs/en/query_language/functions/uuid_function/) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index c3335e94e1e..79de37bf3d3 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -1,6 +1,6 @@ --- toc_priority: 59 -toc_title: Working with Yandex.Metrica Dictionaries +toc_title: Yandex.Metrica Dictionaries --- # Functions for Working with Yandex.Metrica Dictionaries {#functions-for-working-with-yandex-metrica-dictionaries} diff --git a/docs/en/sql-reference/index.md b/docs/en/sql-reference/index.md index 8b460fddd6b..aeb3fb3ccdb 100644 --- a/docs/en/sql-reference/index.md +++ b/docs/en/sql-reference/index.md @@ -9,10 +9,10 @@ toc_title: hidden ClickHouse supports the following types of queries: -- [SELECT](statements/select/index.md) -- [INSERT INTO](statements/insert-into.md) -- [CREATE](statements/create.md) -- [ALTER](statements/alter.md#query_language_queries_alter) -- [Other types of queries](statements/misc.md) +- [SELECT](../sql-reference/statements/select/index.md) +- [INSERT INTO](../sql-reference/statements/insert-into.md) +- [CREATE](../sql-reference/statements/create.md) +- [ALTER](../sql-reference/statements/alter.md#query_language_queries_alter) +- [Other types of queries](../sql-reference/statements/misc.md) [Original article](https://clickhouse.tech/docs/en/sql-reference/) diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index e08eb540b1e..58797853454 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -1,4 +1,4 @@ -### IN Operators {#select-in-operators} +# IN Operators {#select-in-operators} The `IN`, `NOT IN`, `GLOBAL IN`, and `GLOBAL NOT IN` operators are covered separately, since their functionality is quite rich. @@ -62,9 +62,9 @@ ORDER BY EventDate ASC For each day after March 17th, count the percentage of pageviews made by users who visited the site on March 17th. A subquery in the IN clause is always run just one time on a single server. There are no dependent subqueries. -## NULL Processing {#null-processing-1} +## NULL Processing {#in-null-processing} -During request processing, the IN operator assumes that the result of an operation with [NULL](../syntax.md#null-literal) is always equal to `0`, regardless of whether `NULL` is on the right or left side of the operator. `NULL` values are not included in any dataset, do not correspond to each other and cannot be compared. +During request processing, the `IN` operator assumes that the result of an operation with [NULL](../../sql-reference/syntax.md#null-literal) always equals `0`, regardless of whether `NULL` is on the right or left side of the operator. `NULL` values are not included in any dataset, do not correspond to each other and cannot be compared if [transform_null_in = 0](../../operations/settings/settings.md#transform_null_in). Here is an example with the `t_null` table: diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 1758cfa22d9..e07febf9ec9 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -59,7 +59,7 @@ ClickHouse transforms operators to their corresponding functions at the query pa ## Operators for Working with Data Sets {#operators-for-working-with-data-sets} -*See [IN operators](in.md).* +*See [IN operators](../../sql-reference/operators/in.md).* `a IN ...` – The `in(a, b)` function. diff --git a/docs/en/sql-reference/statements/alter.md b/docs/en/sql-reference/statements/alter.md index 44485555bda..52d821a8ab8 100644 --- a/docs/en/sql-reference/statements/alter.md +++ b/docs/en/sql-reference/statements/alter.md @@ -34,7 +34,7 @@ These actions are described in detail below. ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after] ``` -Adds a new column to the table with the specified `name`, `type`, [`codec`](create.md#codecs) and `default_expr` (see the section [Default expressions](create.md#create-default-values)). +Adds a new column to the table with the specified `name`, `type`, [`codec`](../../sql-reference/statements/create.md#codecs) and `default_expr` (see the section [Default expressions](../../sql-reference/statements/create.md#create-default-values)). If the `IF NOT EXISTS` clause is included, the query won’t return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. Otherwise, the column is added to the end of the table. Note that there is no way to add a column to the beginning of a table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions. @@ -90,7 +90,7 @@ Adds a comment to the column. If the `IF EXISTS` clause is specified, the query Each column can have one comment. If a comment already exists for the column, a new comment overwrites the previous comment. -Comments are stored in the `comment_expression` column returned by the [DESCRIBE TABLE](misc.md#misc-describe-table) query. +Comments are stored in the `comment_expression` column returned by the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query. Example: @@ -144,7 +144,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`). -If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](insert-into.md#insert_query_insert-select) query, then switch the tables using the [RENAME](misc.md#misc_operations-rename) query and delete the old table. You can use the [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. +If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select) query, then switch the tables using the [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename) query and delete the old table. You can use the [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running. @@ -182,7 +182,7 @@ Also, they are replicated (syncing indices metadata through ZooKeeper). ### Manipulations with Constraints {#manipulations-with-constraints} -See more on [constraints](create.md#constraints) +See more on [constraints](../../sql-reference/statements/create.md#constraints) Constraints could be added or deleted using following syntax: @@ -233,7 +233,7 @@ Read about setting the partition expression in a section [How to specify the par After the query is executed, you can do whatever you want with the data in the `detached` directory — delete it from the file system, or just leave it. -This query is replicated – it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../../operations/system-tables.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replica. +This query is replicated – it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../../operations/system-tables/replicas.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replica. #### DROP PARTITION {#alter_drop-partition} @@ -434,13 +434,13 @@ You can specify the partition expression in `ALTER ... PARTITION` queries in dif - As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. - As the expression from the table column. Constants and constant expressions are supported. For example, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`. - Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. -- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached\_parts](../../operations/system-tables.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. +- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached\_parts](../../operations/system-tables/detached_parts.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed. For old-style tables, you can specify the partition either as a number `201901` or a string `'201901'`. The syntax for the new-style tables is stricter with types (similar to the parser for the VALUES input format). -All the rules above are also true for the [OPTIMIZE](misc.md#misc_operations-optimize) query. If you need to specify the only partition when optimizing a non-partitioned table, set the expression `PARTITION tuple()`. For example: +All the rules above are also true for the [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) query. If you need to specify the only partition when optimizing a non-partitioned table, set the expression `PARTITION tuple()`. For example: ``` sql OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL; @@ -495,7 +495,7 @@ For \*MergeTree tables mutations execute by rewriting whole data parts. There is Mutations are totally ordered by their creation order and are applied to each part in that order. Mutations are also partially ordered with INSERTs - data that was inserted into the table before the mutation was submitted will be mutated and data that was inserted after that will not be mutated. Note that mutations do not block INSERTs in any way. -A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for nonreplicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the [`system.mutations`](../../operations/system-tables.md#system_tables-mutations) table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted, but if the mutation is stuck for some reason it can be cancelled with the [`KILL MUTATION`](misc.md#kill-mutation) query. +A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for nonreplicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the [`system.mutations`](../../operations/system-tables/mutations.md#system_tables-mutations) table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted, but if the mutation is stuck for some reason it can be cancelled with the [`KILL MUTATION`](../../sql-reference/statements/misc.md#kill-mutation) query. Entries for finished mutations are not deleted right away (the number of preserved entries is determined by the `finished_mutations_to_keep` storage engine parameter). Older mutation entries are deleted. @@ -512,11 +512,11 @@ ALTER USER [IF EXISTS] name [ON CLUSTER cluster_name] [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] -``` +``` ### Description {#alter-user-dscr} -To use `ALTER USER` you must have the [ALTER USER](grant.md#grant-access-management) privilege. +To use `ALTER USER` you must have the [ALTER USER](../../sql-reference/statements/grant.md#grant-access-management) privilege. ### Examples {#alter-user-examples} @@ -526,7 +526,7 @@ Set assigned roles as default: ALTER USER user DEFAULT ROLE role1, role2 ``` -If roles aren't previously assigned to a user, ClickHouse throws an exception. +If roles aren’t previously assigned to a user, ClickHouse throws an exception. Set all the assigned roles to default: @@ -542,7 +542,6 @@ Set all the assigned roles to default, excepting `role1` and `role2`: ALTER USER user DEFAULT ROLE ALL EXCEPT role1, role2 ``` - ## ALTER ROLE {#alter-role-statement} Changes roles. @@ -555,7 +554,6 @@ ALTER ROLE [IF EXISTS] name [ON CLUSTER cluster_name] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] ``` - ## ALTER ROW POLICY {#alter-row-policy-statement} Changes row policy. @@ -571,7 +569,6 @@ ALTER [ROW] POLICY [IF EXISTS] name [ON CLUSTER cluster_name] ON [database.]tabl [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` - ## ALTER QUOTA {#alter-quota-statement} Changes quotas. @@ -588,7 +585,6 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name] [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` - ## ALTER SETTINGS PROFILE {#alter-settings-profile-statement} Changes settings profiles. @@ -601,5 +597,4 @@ ALTER SETTINGS PROFILE [IF EXISTS] name [ON CLUSTER cluster_name] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...] ``` - [Original article](https://clickhouse.tech/docs/en/query_language/alter/) diff --git a/docs/en/sql-reference/statements/create.md b/docs/en/sql-reference/statements/create.md index 7a7f4635ec3..051fa23e1bb 100644 --- a/docs/en/sql-reference/statements/create.md +++ b/docs/en/sql-reference/statements/create.md @@ -44,7 +44,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ``` Creates a table named ‘name’ in the ‘db’ database or the current database if ‘db’ is not set, with the structure specified in brackets and the ‘engine’ engine. -The structure of the table is a list of column descriptions. If indexes are supported by the engine, they are indicated as parameters for the table engine. +The structure of the table is a list of column descriptions, secondary indexes and constraints . If primary key is supported by the engine, it will be indicated as parameter for the table engine. A column description is `name type` in the simplest case. Example: `RegionID UInt32`. Expressions can also be defined for default values (see below). @@ -59,7 +59,7 @@ Creates a table with the same structure as another table. You can specify a diff CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function() ``` -Creates a table with the structure and data returned by a [table function](../table-functions/index.md#table-functions). +Creates a table with the structure and data returned by a [table function](../../sql-reference/table-functions/index.md#table-functions). ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... @@ -73,7 +73,7 @@ There can be other clauses after the `ENGINE` clause in the query. See detailed ### Default Values {#create-default-values} -The column description can specify an expression for a default value, in one of the following ways:`DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`. +The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`. Example: `URLDomain String DEFAULT domain(URL)`. If an expression for the default value is not defined, the default values will be set to zeros for numbers, empty strings for strings, empty arrays for arrays, and `0000-00-00` for dates or `0000-00-00 00:00:00` for dates with time. NULLs are not supported. @@ -298,7 +298,7 @@ External dictionary structure consists of attributes. Dictionary attributes are Depending on dictionary [layout](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) one or more attributes can be specified as dictionary keys. -For more information, see [External Dictionaries](../dictionaries/external-dictionaries/external-dicts.md) section. +For more information, see [External Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section. ## CREATE USER {#create-user-statement} @@ -306,7 +306,7 @@ Creates a [user account](../../operations/access-rights.md#user-account-manageme ### Syntax {#create-user-syntax} -```sql +``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] @@ -314,44 +314,42 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] ``` -#### Identification +#### Identification {#identification} There are multiple ways of user identification: -- `IDENTIFIED WITH no_password` -- `IDENTIFIED WITH plaintext_password BY 'qwerty'` -- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` -- `IDENTIFIED WITH sha256_hash BY 'hash'` -- `IDENTIFIED WITH double_sha1_password BY 'qwerty'` -- `IDENTIFIED WITH double_sha1_hash BY 'hash'` +- `IDENTIFIED WITH no_password` +- `IDENTIFIED WITH plaintext_password BY 'qwerty'` +- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` +- `IDENTIFIED WITH sha256_hash BY 'hash'` +- `IDENTIFIED WITH double_sha1_password BY 'qwerty'` +- `IDENTIFIED WITH double_sha1_hash BY 'hash'` -#### User Host +#### User Host {#user-host} User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways: -- `HOST IP 'ip_address_or_subnetwork'` — User can connect to ClickHouse server only from the specified IP address or a [subnetwork](https://en.wikipedia.org/wiki/Subnetwork). Examples: `HOST IP '192.168.0.0/16'`, `HOST IP '2001:DB8::/32'`. For use in production, only specify `HOST IP` elements (IP addresses and their masks), since using `host` and `host_regexp` might cause extra latency. -- `HOST ANY` — User can connect from any location. This is a default option. -- `HOST LOCAL` — User can connect only locally. -- `HOST NAME 'fqdn'` — User host can be specified as FQDN. For example, `HOST NAME 'mysite.com'`. -- `HOST NAME REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST NAME REGEXP '.*\.mysite\.com'`. -- `HOST LIKE 'template'` — Allows you to use the [LIKE](../functions/string-search-functions.md#function-like) operator to filter the user hosts. For example, `HOST LIKE '%'` is equivalent to `HOST ANY`, `HOST LIKE '%.mysite.com'` filters all the hosts in the `mysite.com` domain. +- `HOST IP 'ip_address_or_subnetwork'` — User can connect to ClickHouse server only from the specified IP address or a [subnetwork](https://en.wikipedia.org/wiki/Subnetwork). Examples: `HOST IP '192.168.0.0/16'`, `HOST IP '2001:DB8::/32'`. For use in production, only specify `HOST IP` elements (IP addresses and their masks), since using `host` and `host_regexp` might cause extra latency. +- `HOST ANY` — User can connect from any location. This is a default option. +- `HOST LOCAL` — User can connect only locally. +- `HOST NAME 'fqdn'` — User host can be specified as FQDN. For example, `HOST NAME 'mysite.com'`. +- `HOST NAME REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST NAME REGEXP '.*\.mysite\.com'`. +- `HOST LIKE 'template'` — Allows you to use the [LIKE](../../sql-reference/functions/string-search-functions.md#function-like) operator to filter the user hosts. For example, `HOST LIKE '%'` is equivalent to `HOST ANY`, `HOST LIKE '%.mysite.com'` filters all the hosts in the `mysite.com` domain. Another way of specifying host is to use `@` syntax following the username. Examples: -- `CREATE USER mira@'127.0.0.1'` — Equivalent to the `HOST IP` syntax. -- `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. -- `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. +- `CREATE USER mira@'127.0.0.1'` — Equivalent to the `HOST IP` syntax. +- `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. +- `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. !!! info "Warning" - ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we don't recommend to do so. - + ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we don’t recommend to do so. ### Examples {#create-user-examples} - Create the user account `mira` protected by the password `qwerty`: -```sql +``` sql CREATE USER mira HOST IP '127.0.0.1' IDENTIFIED WITH sha256_password BY 'qwerty' ``` @@ -377,33 +375,32 @@ Create the user account `john` and make all his future roles default excepting ` ALTER USER john DEFAULT ROLE ALL EXCEPT role1, role2 ``` - ## CREATE ROLE {#create-role-statement} Creates a [role](../../operations/access-rights.md#role-management). ### Syntax {#create-role-syntax} -```sql +``` sql CREATE ROLE [IF NOT EXISTS | OR REPLACE] name [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] ``` ### Description {#create-role-description} -Role is a set of [privileges](grant.md#grant-privileges). A user assigned a role gets all the privileges of this role. +Role is a set of [privileges](../../sql-reference/statements/grant.md#grant-privileges). A user assigned a role gets all the privileges of this role. -A user can be assigned multiple roles. Users can apply their assigned roles in arbitrary combinations by the [SET ROLE](misc.md#set-role-statement) statement. The final scope of privileges is a combined set of all the privileges of all the applied roles. If a user has privileges granted directly to it's user account, they are also combined with the privileges granted by roles. +A user can be assigned multiple roles. Users can apply their assigned roles in arbitrary combinations by the [SET ROLE](../../sql-reference/statements/misc.md#set-role-statement) statement. The final scope of privileges is a combined set of all the privileges of all the applied roles. If a user has privileges granted directly to it’s user account, they are also combined with the privileges granted by roles. -User can have default roles which apply at user login. To set default roles, use the [SET DEFAULT ROLE](misc.md#set-default-role-statement) statement or the [ALTER USER](alter.md#alter-user-statement) statement. +User can have default roles which apply at user login. To set default roles, use the [SET DEFAULT ROLE](../../sql-reference/statements/misc.md#set-default-role-statement) statement or the [ALTER USER](../../sql-reference/statements/alter.md#alter-user-statement) statement. -To revoke a role, use the [REVOKE](revoke.md) statement. +To revoke a role, use the [REVOKE](../../sql-reference/statements/revoke.md) statement. -To delete role, use the [DROP ROLE](misc.md#drop-role-statement) statement. The deleted role is being automatically revoked from all the users and roles to which it was assigned. +To delete role, use the [DROP ROLE](../../sql-reference/statements/misc.md#drop-role-statement) statement. The deleted role is being automatically revoked from all the users and roles to which it was assigned. ### Examples {#create-role-examples} -```sql +``` sql CREATE ROLE accountant; GRANT SELECT ON db.* TO accountant; ``` @@ -412,13 +409,13 @@ This sequence of queries creates the role `accountant` that has the privilege of Assigning the role to the user `mira`: -```sql +``` sql GRANT accountant TO mira; ``` After the role is assigned, the user can apply it and execute the allowed queries. For example: -```sql +``` sql SET ROLE accountant; SELECT * FROM db.*; ``` @@ -445,7 +442,7 @@ Permissive policy grants access to rows. Permissive policies which apply to the Restrictive policy restricts access to rows. Restrictive policies which apply to the same table are combined together using the boolean `AND` operator. -Restrictive policies apply to rows that passed the permissive filters. If you set restrictive policies but no permissive policies, the user can't get any row from the table. +Restrictive policies apply to rows that passed the permissive filters. If you set restrictive policies but no permissive policies, the user can’t get any row from the table. #### Section TO {#create-row-policy-to} @@ -453,11 +450,10 @@ In the section `TO` you can provide a mixed list of roles and users, for example Keyword `ALL` means all the ClickHouse users including current user. Keywords `ALL EXCEPT` allow to exclude some users from the all users list, for example, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost` -### Examples - -- `CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO accountant, john@localhost` -- `CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO ALL EXCEPT mira` +### Examples {#examples} +- `CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO accountant, john@localhost` +- `CREATE ROW POLICY filter ON mydb.mytable FOR SELECT USING a<1000 TO ALL EXCEPT mira` ## CREATE QUOTA {#create-quota-statement} @@ -482,7 +478,6 @@ Limit the maximum number of queries for the current user with 123 queries in 15 CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER ``` - ## CREATE SETTINGS PROFILE {#create-settings-profile-statement} Creates a [settings profile](../../operations/access-rights.md#settings-profiles-management) that can be assigned to a user or a role. @@ -502,5 +497,4 @@ Create the `max_memory_usage_profile` settings profile with value and constraint CREATE SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin ``` - [Original article](https://clickhouse.tech/docs/en/query_language/create/) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index d9e4f2f9309..72266561c1b 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -3,35 +3,33 @@ toc_priority: 39 toc_title: GRANT --- +# GRANT {#grant} -# GRANT +- Grants [privileges](#grant-privileges) to ClickHouse user accounts or roles. +- Assigns roles to user accounts or to the other roles. -- Grants [privileges](#grant-privileges) to ClickHouse user accounts or roles. -- Assigns roles to user accounts or to the other roles. - -To revoke privileges, use the [REVOKE](revoke.md) statement. Also you can list granted privileges with the [SHOW GRANTS](show.md#show-grants-statement) statement. +To revoke privileges, use the [REVOKE](../../sql-reference/statements/revoke.md) statement. Also you can list granted privileges with the [SHOW GRANTS](../../sql-reference/statements/show.md#show-grants-statement) statement. ## Granting Privilege Syntax {#grant-privigele-syntax} -```sql +``` sql GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] ``` -- `privilege` — Type of privilege. -- `role` — ClickHouse user role. -- `user` — ClickHouse user account. +- `privilege` — Type of privilege. +- `role` — ClickHouse user role. +- `user` — ClickHouse user account. The `WITH GRANT OPTION` clause grants `user` or `role` with permission to execute the `GRANT` query. Users can grant privileges of the same scope they have and less. - ## Assigning Role Syntax {#assign-role-syntax} -```sql +``` sql GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION] ``` -- `role` — ClickHouse user role. -- `user` — ClickHouse user account. +- `role` — ClickHouse user role. +- `user` — ClickHouse user account. The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`. @@ -41,17 +39,17 @@ To use `GRANT`, your account must have the `GRANT OPTION` privilege. You can gra For example, administrator has granted privileges to the `john` account by the query: -```sql +``` sql GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION ``` It means that `john` has the permission to execute: -- `SELECT x,y FROM db.table`. -- `SELECT x FROM db.table`. -- `SELECT y FROM db.table`. +- `SELECT x,y FROM db.table`. +- `SELECT x FROM db.table`. +- `SELECT y FROM db.table`. -`john` can't execute `SELECT z FROM db.table`. The `SELECT * FROM db.table` also is not available. Processing this query, ClickHouse doesn't return any data, even `x` and `y`. The only exception is if a table contains only `x` and `y` columns. In this case ClickHouse returns all the data. +`john` can’t execute `SELECT z FROM db.table`. The `SELECT * FROM db.table` also is not available. Processing this query, ClickHouse doesn’t return any data, even `x` and `y`. The only exception is if a table contains only `x` and `y` columns. In this case ClickHouse returns all the data. Also `john` has the `GRANT OPTION` privilege, so it can grant other users with privileges of the same or smaller scope. @@ -61,7 +59,6 @@ Access to the `system` database is always allowed (since this database is used f You can grant multiple privileges to multiple accounts in one query. The query `GRANT SELECT, INSERT ON *.* TO john, robin` allows accounts `john` and `robin` to execute the `INSERT` and `SELECT` queries over all the tables in all the databases on the server. - ## Privileges {#grant-privileges} Privilege is a permission to execute specific kind of queries. @@ -70,143 +67,143 @@ Privileges have a hierarchical structure. A set of permitted queries depends on Hierarchy of privileges: -- [SELECT](#grant-select) -- [INSERT](#grant-insert) -- [ALTER](#grant-alter) - - `ALTER TABLE` - - `ALTER UPDATE` - - `ALTER DELETE` - - `ALTER COLUMN` - - `ALTER ADD COLUMN` - - `ALTER DROP COLUMN` - - `ALTER MODIFY COLUMN` - - `ALTER COMMENT COLUMN` - - `ALTER CLEAR COLUMN` - - `ALTER RENAME COLUMN` - - `ALTER INDEX` - - `ALTER ORDER BY` - - `ALTER ADD INDEX` - - `ALTER DROP INDEX` - - `ALTER MATERIALIZE INDEX` - - `ALTER CLEAR INDEX` - - `ALTER CONSTRAINT` - - `ALTER ADD CONSTRAINT` - - `ALTER DROP CONSTRAINT` - - `ALTER TTL` - - `ALTER MATERIALIZE TTL` - - `ALTER SETTINGS` - - `ALTER MOVE PARTITION` - - `ALTER FETCH PARTITION` - - `ALTER FREEZE PARTITION` - - `ALTER VIEW` - - `ALTER VIEW REFRESH ` - - `ALTER VIEW MODIFY QUERY` -- [CREATE](#grant-create) - - `CREATE DATABASE` - - `CREATE TABLE` - - `CREATE VIEW` - - `CREATE DICTIONARY` - - `CREATE TEMPORARY TABLE` -- [DROP](#grant-drop) - - `DROP DATABASE` - - `DROP TABLE` - - `DROP VIEW` - - `DROP DICTIONARY` -- [TRUNCATE](#grant-truncate) -- [OPTIMIZE](#grant-optimize) -- [SHOW](#grant-show) - - `SHOW DATABASES` - - `SHOW TABLES` - - `SHOW COLUMNS` - - `SHOW DICTIONARIES` -- [KILL QUERY](#grant-kill-query) -- [ACCESS MANAGEMENT](#grant-access-management) - - `CREATE USER` - - `ALTER USER` - - `DROP USER` - - `CREATE ROLE` - - `ALTER ROLE` - - `DROP ROLE` - - `CREATE ROW POLICY` - - `ALTER ROW POLICY` - - `DROP ROW POLICY` - - `CREATE QUOTA` - - `ALTER QUOTA` - - `DROP QUOTA` - - `CREATE SETTINGS PROFILE` - - `ALTER SETTINGS PROFILE` - - `DROP SETTINGS PROFILE` - - `SHOW ACCESS` - - `SHOW_USERS` - - `SHOW_ROLES` - - `SHOW_ROW_POLICIES` - - `SHOW_QUOTAS` - - `SHOW_SETTINGS_PROFILES` - - `ROLE ADMIN` -- [SYSTEM](#grant-system) - - `SYSTEM SHUTDOWN` - - `SYSTEM DROP CACHE` - - `SYSTEM DROP DNS CACHE` - - `SYSTEM DROP MARK CACHE` - - `SYSTEM DROP UNCOMPRESSED CACHE` - - `SYSTEM RELOAD` - - `SYSTEM RELOAD CONFIG` - - `SYSTEM RELOAD DICTIONARY` - - `SYSTEM RELOAD EMBEDDED DICTIONARIES` - - `SYSTEM MERGES` - - `SYSTEM TTL MERGES` - - `SYSTEM FETCHES` - - `SYSTEM MOVES` - - `SYSTEM SENDS` - - `SYSTEM DISTRIBUTED SENDS` - - `SYSTEM REPLICATED SENDS` - - `SYSTEM REPLICATION QUEUES` - - `SYSTEM SYNC REPLICA` - - `SYSTEM RESTART REPLICA` - - `SYSTEM FLUSH` - - `SYSTEM FLUSH DISTRIBUTED` - - `SYSTEM FLUSH LOGS` -- [INTROSPECTION](#grant-introspection) - - `addressToLine` - - `addressToSymbol` - - `demangle` -- [SOURCES](#grant-sources) - - `FILE` - - `URL` - - `REMOTE` - - `YSQL` - - `ODBC` - - `JDBC` - - `HDFS` - - `S3` -- [dictGet](#grant-dictget) +- [SELECT](#grant-select) +- [INSERT](#grant-insert) +- [ALTER](#grant-alter) + - `ALTER TABLE` + - `ALTER UPDATE` + - `ALTER DELETE` + - `ALTER COLUMN` + - `ALTER ADD COLUMN` + - `ALTER DROP COLUMN` + - `ALTER MODIFY COLUMN` + - `ALTER COMMENT COLUMN` + - `ALTER CLEAR COLUMN` + - `ALTER RENAME COLUMN` + - `ALTER INDEX` + - `ALTER ORDER BY` + - `ALTER ADD INDEX` + - `ALTER DROP INDEX` + - `ALTER MATERIALIZE INDEX` + - `ALTER CLEAR INDEX` + - `ALTER CONSTRAINT` + - `ALTER ADD CONSTRAINT` + - `ALTER DROP CONSTRAINT` + - `ALTER TTL` + - `ALTER MATERIALIZE TTL` + - `ALTER SETTINGS` + - `ALTER MOVE PARTITION` + - `ALTER FETCH PARTITION` + - `ALTER FREEZE PARTITION` + - `ALTER VIEW` + - `ALTER VIEW REFRESH` + - `ALTER VIEW MODIFY QUERY` +- [CREATE](#grant-create) + - `CREATE DATABASE` + - `CREATE TABLE` + - `CREATE VIEW` + - `CREATE DICTIONARY` + - `CREATE TEMPORARY TABLE` +- [DROP](#grant-drop) + - `DROP DATABASE` + - `DROP TABLE` + - `DROP VIEW` + - `DROP DICTIONARY` +- [TRUNCATE](#grant-truncate) +- [OPTIMIZE](#grant-optimize) +- [SHOW](#grant-show) + - `SHOW DATABASES` + - `SHOW TABLES` + - `SHOW COLUMNS` + - `SHOW DICTIONARIES` +- [KILL QUERY](#grant-kill-query) +- [ACCESS MANAGEMENT](#grant-access-management) + - `CREATE USER` + - `ALTER USER` + - `DROP USER` + - `CREATE ROLE` + - `ALTER ROLE` + - `DROP ROLE` + - `CREATE ROW POLICY` + - `ALTER ROW POLICY` + - `DROP ROW POLICY` + - `CREATE QUOTA` + - `ALTER QUOTA` + - `DROP QUOTA` + - `CREATE SETTINGS PROFILE` + - `ALTER SETTINGS PROFILE` + - `DROP SETTINGS PROFILE` + - `SHOW ACCESS` + - `SHOW_USERS` + - `SHOW_ROLES` + - `SHOW_ROW_POLICIES` + - `SHOW_QUOTAS` + - `SHOW_SETTINGS_PROFILES` + - `ROLE ADMIN` +- [SYSTEM](#grant-system) + - `SYSTEM SHUTDOWN` + - `SYSTEM DROP CACHE` + - `SYSTEM DROP DNS CACHE` + - `SYSTEM DROP MARK CACHE` + - `SYSTEM DROP UNCOMPRESSED CACHE` + - `SYSTEM RELOAD` + - `SYSTEM RELOAD CONFIG` + - `SYSTEM RELOAD DICTIONARY` + - `SYSTEM RELOAD EMBEDDED DICTIONARIES` + - `SYSTEM MERGES` + - `SYSTEM TTL MERGES` + - `SYSTEM FETCHES` + - `SYSTEM MOVES` + - `SYSTEM SENDS` + - `SYSTEM DISTRIBUTED SENDS` + - `SYSTEM REPLICATED SENDS` + - `SYSTEM REPLICATION QUEUES` + - `SYSTEM SYNC REPLICA` + - `SYSTEM RESTART REPLICA` + - `SYSTEM FLUSH` + - `SYSTEM FLUSH DISTRIBUTED` + - `SYSTEM FLUSH LOGS` +- [INTROSPECTION](#grant-introspection) + - `addressToLine` + - `addressToSymbol` + - `demangle` +- [SOURCES](#grant-sources) + - `FILE` + - `URL` + - `REMOTE` + - `YSQL` + - `ODBC` + - `JDBC` + - `HDFS` + - `S3` +- [dictGet](#grant-dictget) Examples of how this hierarchy is treated: -- The `ALTER` privilege includes all other `ALTER*` privileges. -- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. +- The `ALTER` privilege includes all other `ALTER*` privileges. +- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. Privileges are applied at different levels. Knowing of a level suggests syntax available for privilege. Levels (from lower to higher): -- `COLUMN` — Privilege can be granted for column, table, database, or globally. -- `TABLE` — Privilege can be granted for table, database, or globally. -- `VIEW` — Privilege can be granted for view, database, or globally. -- `DICTIONARY` — Privilege can be granted for dictionary, database, or globally. -- `DATABASE` — Privilege can be granted for database or globally. -- `GLOBAL` — Privilege can be granted only globally. -- `GROUP` — Groups privileges of different levels. When `GROUP`-level privilege is granted, only that privileges from the group are granted which correspond to the used syntax. +- `COLUMN` — Privilege can be granted for column, table, database, or globally. +- `TABLE` — Privilege can be granted for table, database, or globally. +- `VIEW` — Privilege can be granted for view, database, or globally. +- `DICTIONARY` — Privilege can be granted for dictionary, database, or globally. +- `DATABASE` — Privilege can be granted for database or globally. +- `GLOBAL` — Privilege can be granted only globally. +- `GROUP` — Groups privileges of different levels. When `GROUP`-level privilege is granted, only that privileges from the group are granted which correspond to the used syntax. Examples of allowed syntax: -- `GRANT SELECT(x) ON db.table TO user` -- `GRANT SELECT ON db.* TO user` +- `GRANT SELECT(x) ON db.table TO user` +- `GRANT SELECT ON db.* TO user` Examples of disallowed syntax: -- `GRANT CREATE USER(x) ON db.table TO user` -- `GRANT CREATE USER ON db.* TO user` +- `GRANT CREATE USER(x) ON db.table TO user` +- `GRANT CREATE USER ON db.* TO user` The special privilege [ALL](#grant-all) grants all the privileges to a user account or a role. @@ -214,40 +211,39 @@ By default, a user account or a role has no privileges. If a user or a role has no privileges, it is displayed as [NONE](#grant-none) privilege. -Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](misc.md#misc_operations-rename) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`. - +Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`. ### SELECT {#grant-select} -Allows executing [SELECT](select/index.md) queries. +Allows executing [SELECT](../../sql-reference/statements/select/index.md) queries. Privilege level: `COLUMN`. **Description** -User granted with this privilege can execute `SELECT` queries over a specified list of columns in the specified table and database. If user includes other columns then specified a query returns no data. +User granted with this privilege can execute `SELECT` queries over a specified list of columns in the specified table and database. If user includes other columns then specified a query returns no data. Consider the following privilege: -```sql +``` sql GRANT SELECT(x,y) ON db.table TO john ``` -This privilege allows `john` to execute any `SELECT` query that involves data from the `x` and/or `y` columns in `db.table`, for example, `SELECT x FROM db.table`. `john` can't execute `SELECT z FROM db.table`. The `SELECT * FROM db.table` also is not available. Processing this query, ClickHouse doesn't return any data, even `x` and `y`. The only exception is if a table contains only `x` and `y` columns, in this case ClickHouse returns all the data. +This privilege allows `john` to execute any `SELECT` query that involves data from the `x` and/or `y` columns in `db.table`, for example, `SELECT x FROM db.table`. `john` can’t execute `SELECT z FROM db.table`. The `SELECT * FROM db.table` also is not available. Processing this query, ClickHouse doesn’t return any data, even `x` and `y`. The only exception is if a table contains only `x` and `y` columns, in this case ClickHouse returns all the data. ### INSERT {#grant-insert} -Allows executing [INSERT](insert-into.md) queries. +Allows executing [INSERT](../../sql-reference/statements/insert-into.md) queries. Privilege level: `COLUMN`. **Description** -User granted with this privilege can execute `INSERT` queries over a specified list of columns in the specified table and database. If user includes other columns then specified a query doesn't insert any data. +User granted with this privilege can execute `INSERT` queries over a specified list of columns in the specified table and database. If user includes other columns then specified a query doesn’t insert any data. **Example** -```sql +``` sql GRANT INSERT(x,y) ON db.table TO john ``` @@ -255,85 +251,84 @@ The granted privilege allows `john` to insert data to the `x` and/or `y` columns ### ALTER {#grant-alter} -Allows executing [ALTER](alter.md) queries according to the following hierarchy of privileges: +Allows executing [ALTER](../../sql-reference/statements/alter.md) queries according to the following hierarchy of privileges: -- `ALTER`. Level: `COLUMN`. - - `ALTER TABLE`. Level: `GROUP` - - `ALTER UPDATE`. Level: `COLUMN`. Aliases: `UPDATE` - - `ALTER DELETE`. Level: `COLUMN`. Aliases: `DELETE` - - `ALTER COLUMN`. Level: `GROUP` - - `ALTER ADD COLUMN`. Level: `COLUMN`. Aliases: `ADD COLUMN` - - `ALTER DROP COLUMN`. Level: `COLUMN`. Aliases: `DROP COLUMN` - - `ALTER MODIFY COLUMN`. Level: `COLUMN`. Aliases: `MODIFY COLUMN` - - `ALTER COMMENT COLUMN`. Level: `COLUMN`. Aliases: `COMMENT COLUMN` - - `ALTER CLEAR COLUMN`. Level: `COLUMN`. Aliases: `CLEAR COLUMN` - - `ALTER RENAME COLUMN`. Level: `COLUMN`. Aliases: `RENAME COLUMN` - - `ALTER INDEX`. Level: `GROUP`. Aliases: `INDEX` - - `ALTER ORDER BY`. Level: `TABLE`. Aliases: `ALTER MODIFY ORDER BY`, `MODIFY ORDER BY` - - `ALTER ADD INDEX`. Level: `TABLE`. Aliases: `ADD INDEX` - - `ALTER DROP INDEX`. Level: `TABLE`. Aliases: `DROP INDEX` - - `ALTER MATERIALIZE INDEX`. Level: `TABLE`. Aliases: `MATERIALIZE INDEX` - - `ALTER CLEAR INDEX`. Level: `TABLE`. Aliases: `CLEAR INDEX` - - `ALTER CONSTRAINT`. Level: `GROUP`. Aliases: `CONSTRAINT` - - `ALTER ADD CONSTRAINT`. Level: `TABLE`. Aliases: `ADD CONSTRAINT` - - `ALTER DROP CONSTRAINT`. Level: `TABLE`. Aliases: `DROP CONSTRAINT` - - `ALTER TTL`. Level: `TABLE`. Aliases: `ALTER MODIFY TTL`, `MODIFY TTL` - - `ALTER MATERIALIZE TTL`. Level: `TABLE`. Aliases: `MATERIALIZE TTL` - - `ALTER SETTINGS`. Level: `TABLE`. Aliases: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING` - - `ALTER MOVE PARTITION`. Level: `TABLE`. Aliases: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART` - - `ALTER FETCH PARTITION`. Level: `TABLE`. Aliases: `FETCH PARTITION` - - `ALTER FREEZE PARTITION`. Level: `TABLE`. Aliases: `FREEZE PARTITION` - - `ALTER VIEW` Level: `GROUP` - - `ALTER VIEW REFRESH `. Level: `VIEW`. Aliases: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` - - `ALTER VIEW MODIFY QUERY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY QUERY` +- `ALTER`. Level: `COLUMN`. + - `ALTER TABLE`. Level: `GROUP` + - `ALTER UPDATE`. Level: `COLUMN`. Aliases: `UPDATE` + - `ALTER DELETE`. Level: `COLUMN`. Aliases: `DELETE` + - `ALTER COLUMN`. Level: `GROUP` + - `ALTER ADD COLUMN`. Level: `COLUMN`. Aliases: `ADD COLUMN` + - `ALTER DROP COLUMN`. Level: `COLUMN`. Aliases: `DROP COLUMN` + - `ALTER MODIFY COLUMN`. Level: `COLUMN`. Aliases: `MODIFY COLUMN` + - `ALTER COMMENT COLUMN`. Level: `COLUMN`. Aliases: `COMMENT COLUMN` + - `ALTER CLEAR COLUMN`. Level: `COLUMN`. Aliases: `CLEAR COLUMN` + - `ALTER RENAME COLUMN`. Level: `COLUMN`. Aliases: `RENAME COLUMN` + - `ALTER INDEX`. Level: `GROUP`. Aliases: `INDEX` + - `ALTER ORDER BY`. Level: `TABLE`. Aliases: `ALTER MODIFY ORDER BY`, `MODIFY ORDER BY` + - `ALTER ADD INDEX`. Level: `TABLE`. Aliases: `ADD INDEX` + - `ALTER DROP INDEX`. Level: `TABLE`. Aliases: `DROP INDEX` + - `ALTER MATERIALIZE INDEX`. Level: `TABLE`. Aliases: `MATERIALIZE INDEX` + - `ALTER CLEAR INDEX`. Level: `TABLE`. Aliases: `CLEAR INDEX` + - `ALTER CONSTRAINT`. Level: `GROUP`. Aliases: `CONSTRAINT` + - `ALTER ADD CONSTRAINT`. Level: `TABLE`. Aliases: `ADD CONSTRAINT` + - `ALTER DROP CONSTRAINT`. Level: `TABLE`. Aliases: `DROP CONSTRAINT` + - `ALTER TTL`. Level: `TABLE`. Aliases: `ALTER MODIFY TTL`, `MODIFY TTL` + - `ALTER MATERIALIZE TTL`. Level: `TABLE`. Aliases: `MATERIALIZE TTL` + - `ALTER SETTINGS`. Level: `TABLE`. Aliases: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING` + - `ALTER MOVE PARTITION`. Level: `TABLE`. Aliases: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART` + - `ALTER FETCH PARTITION`. Level: `TABLE`. Aliases: `FETCH PARTITION` + - `ALTER FREEZE PARTITION`. Level: `TABLE`. Aliases: `FREEZE PARTITION` + - `ALTER VIEW` Level: `GROUP` + - `ALTER VIEW REFRESH`. Level: `VIEW`. Aliases: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` + - `ALTER VIEW MODIFY QUERY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY QUERY` Examples of how this hierarchy is treated: -- The `ALTER` privilege includes all other `ALTER*` privileges. -- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. +- The `ALTER` privilege includes all other `ALTER*` privileges. +- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. **Notes** -- The `MODIFY SETTING` privilege allows modifying table engine settings. It doesn't affect settings or server configuration parameters. -- The `ATTACH` operation needs the [CREATE](#grant-create) privilege. -- The `DETACH` operation needs the [DROP](#grant-drop) privilege. -- To stop mutation by the [KILL MUTATION](misc.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. +- The `MODIFY SETTING` privilege allows modifying table engine settings. It doesn’t affect settings or server configuration parameters. +- The `ATTACH` operation needs the [CREATE](#grant-create) privilege. +- The `DETACH` operation needs the [DROP](#grant-drop) privilege. +- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. ### CREATE {#grant-create} -Allows executing [CREATE](create.md) and [ATTACH](misc.md#attach) DDL-queries according to the following hierarchy of privileges: +Allows executing [CREATE](../../sql-reference/statements/create.md) and [ATTACH](../../sql-reference/statements/misc.md#attach) DDL-queries according to the following hierarchy of privileges: -- `CREATE`. Level: `GROUP` - - `CREATE DATABASE`. Level: `DATABASE` - - `CREATE TABLE`. Level: `TABLE` - - `CREATE VIEW`. Level: `VIEW` - - `CREATE DICTIONARY`. Level: `DICTIONARY` - - `CREATE TEMPORARY TABLE`. Level: `GLOBAL` +- `CREATE`. Level: `GROUP` + - `CREATE DATABASE`. Level: `DATABASE` + - `CREATE TABLE`. Level: `TABLE` + - `CREATE VIEW`. Level: `VIEW` + - `CREATE DICTIONARY`. Level: `DICTIONARY` + - `CREATE TEMPORARY TABLE`. Level: `GLOBAL` **Notes** -- To delete the created table, a user needs [DROP](#grant-drop). +- To delete the created table, a user needs [DROP](#grant-drop). ### DROP {#grant-drop} -Allows executing [DROP](misc.md#drop) and [DETACH](misc.md#detach) queries according to the following hierarchy of privileges: - -- `DROP`. Level: - - `DROP DATABASE`. Level: `DATABASE` - - `DROP TABLE`. Level: `TABLE` - - `DROP VIEW`. Level: `VIEW` - - `DROP DICTIONARY`. Level: `DICTIONARY` +Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach) queries according to the following hierarchy of privileges: +- `DROP`. Level: + - `DROP DATABASE`. Level: `DATABASE` + - `DROP TABLE`. Level: `TABLE` + - `DROP VIEW`. Level: `VIEW` + - `DROP DICTIONARY`. Level: `DICTIONARY` ### TRUNCATE {#grant-truncate} -Allows executing [TRUNCATE](misc.md#truncate-statement) queries. +Allows executing [TRUNCATE](../../sql-reference/statements/misc.md#truncate-statement) queries. Privilege level: `TABLE`. ### OPTIMIZE {#grant-optimize} -Allows executing [OPTIMIZE TABLE](misc.md#misc_operations-optimize) queries. +Allows executing [OPTIMIZE TABLE](../../sql-reference/statements/misc.md#misc_operations-optimize) queries. Privilege level: `TABLE`. @@ -341,20 +336,19 @@ Privilege level: `TABLE`. Allows executing `SHOW`, `DESCRIBE`, `USE`, and `EXISTS` queries according to the following hierarchy of privileges: -- `SHOW`. Level: `GROUP` - - `SHOW DATABASES`. Level: `DATABASE`. Allows to execute `SHOW DATABASES`, `SHOW CREATE DATABASE`, `USE ` queries. - - `SHOW TABLES`. Level: `TABLE`. Allows to execute `SHOW TABLES`, `EXISTS `, `CHECK
` queries. - - `SHOW COLUMNS`. Level: `COLUMN`. Allows to execute `SHOW CREATE TABLE`, `DESCRIBE` queries. - - `SHOW DICTIONARIES`. Level: `DICTIONARY`. Allows to execute `SHOW DICTIONARIES`, `SHOW CREATE DICTIONARY`, `EXISTS ` queries. +- `SHOW`. Level: `GROUP` + - `SHOW DATABASES`. Level: `DATABASE`. Allows to execute `SHOW DATABASES`, `SHOW CREATE DATABASE`, `USE ` queries. + - `SHOW TABLES`. Level: `TABLE`. Allows to execute `SHOW TABLES`, `EXISTS
`, `CHECK
` queries. + - `SHOW COLUMNS`. Level: `COLUMN`. Allows to execute `SHOW CREATE TABLE`, `DESCRIBE` queries. + - `SHOW DICTIONARIES`. Level: `DICTIONARY`. Allows to execute `SHOW DICTIONARIES`, `SHOW CREATE DICTIONARY`, `EXISTS ` queries. **Notes** A user has the `SHOW` privilege if it has any other privilege concerning the specified table, dictionary or database. - ### KILL QUERY {#grant-kill-query} -Allows executing [KILL](misc.md#kill-query-statement) queries according to the following hierarchy of privileges: +Allows executing [KILL](../../sql-reference/statements/misc.md#kill-query-statement) queries according to the following hierarchy of privileges: Privilege level: `GLOBAL`. @@ -362,121 +356,116 @@ Privilege level: `GLOBAL`. `KILL QUERY` privilege allows one user to kill queries of other users. - ### ACCESS MANAGEMENT {#grant-access-management} Allows a user to execute queries that manage users, roles and row policies. -- `ACCESS MANAGEMENT`. Level: `GROUP` - - `CREATE USER`. Level: `GLOBAL` - - `ALTER USER`. Level: `GLOBAL` - - `DROP USER`. Level: `GLOBAL` - - `CREATE ROLE`. Level: `GLOBAL` - - `ALTER ROLE`. Level: `GLOBAL` - - `DROP ROLE`. Level: `GLOBAL` - - `ROLE ADMIN`. Level: `GLOBAL` - - `CREATE ROW POLICY`. Level: `GLOBAL`. Aliases: `CREATE POLICY` - - `ALTER ROW POLICY`. Level: `GLOBAL`. Aliases: `ALTER POLICY` - - `DROP ROW POLICY`. Level: `GLOBAL`. Aliases: `DROP POLICY` - - `CREATE QUOTA`. Level: `GLOBAL` - - `ALTER QUOTA`. Level: `GLOBAL` - - `DROP QUOTA`. Level: `GLOBAL` - - `CREATE SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `CREATE PROFILE` - - `ALTER SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `ALTER PROFILE` - - `DROP SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `DROP PROFILE` - - `SHOW ACCESS`. Level: `GROUP` - - `SHOW_USERS`. Level: `GLOBAL`. Aliases: `SHOW CREATE USER` - - `SHOW_ROLES`. Level: `GLOBAL`. Aliases: `SHOW CREATE ROLE` - - `SHOW_ROW_POLICIES`. Level: `GLOBAL`. Aliases: `SHOW POLICIES`, `SHOW CREATE ROW POLICY`, `SHOW CREATE POLICY` - - `SHOW_QUOTAS`. Level: `GLOBAL`. Aliases: `SHOW CREATE QUOTA` - - `SHOW_SETTINGS_PROFILES`. Level: `GLOBAL`. Aliases: `SHOW PROFILES`, `SHOW CREATE SETTINGS PROFILE`, `SHOW CREATE PROFILE` +- `ACCESS MANAGEMENT`. Level: `GROUP` + - `CREATE USER`. Level: `GLOBAL` + - `ALTER USER`. Level: `GLOBAL` + - `DROP USER`. Level: `GLOBAL` + - `CREATE ROLE`. Level: `GLOBAL` + - `ALTER ROLE`. Level: `GLOBAL` + - `DROP ROLE`. Level: `GLOBAL` + - `ROLE ADMIN`. Level: `GLOBAL` + - `CREATE ROW POLICY`. Level: `GLOBAL`. Aliases: `CREATE POLICY` + - `ALTER ROW POLICY`. Level: `GLOBAL`. Aliases: `ALTER POLICY` + - `DROP ROW POLICY`. Level: `GLOBAL`. Aliases: `DROP POLICY` + - `CREATE QUOTA`. Level: `GLOBAL` + - `ALTER QUOTA`. Level: `GLOBAL` + - `DROP QUOTA`. Level: `GLOBAL` + - `CREATE SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `CREATE PROFILE` + - `ALTER SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `ALTER PROFILE` + - `DROP SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `DROP PROFILE` + - `SHOW ACCESS`. Level: `GROUP` + - `SHOW_USERS`. Level: `GLOBAL`. Aliases: `SHOW CREATE USER` + - `SHOW_ROLES`. Level: `GLOBAL`. Aliases: `SHOW CREATE ROLE` + - `SHOW_ROW_POLICIES`. Level: `GLOBAL`. Aliases: `SHOW POLICIES`, `SHOW CREATE ROW POLICY`, `SHOW CREATE POLICY` + - `SHOW_QUOTAS`. Level: `GLOBAL`. Aliases: `SHOW CREATE QUOTA` + - `SHOW_SETTINGS_PROFILES`. Level: `GLOBAL`. Aliases: `SHOW PROFILES`, `SHOW CREATE SETTINGS PROFILE`, `SHOW CREATE PROFILE` The `ROLE ADMIN` privilege allows a user to assign and revoke any roles including those which are not assigned to the user with the admin option. ### SYSTEM {#grant-system} -Allows a user to execute [SYSTEM](system.md) queries according to the following hierarchy of privileges. +Allows a user to execute [SYSTEM](../../sql-reference/statements/system.md) queries according to the following hierarchy of privileges. -- `SYSTEM`. Level: `GROUP` - - `SYSTEM SHUTDOWN`. Level: `GLOBAL`. Aliases: `SYSTEM KILL`, `SHUTDOWN` - - `SYSTEM DROP CACHE`. Aliases: `DROP CACHE` - - `SYSTEM DROP DNS CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP DNS`, `DROP DNS CACHE`, `DROP DNS` - - `SYSTEM DROP MARK CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP MARK`, `DROP MARK CACHE`, `DROP MARKS` - - `SYSTEM DROP UNCOMPRESSED CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP UNCOMPRESSED`, `DROP UNCOMPRESSED CACHE`, `DROP UNCOMPRESSED` - - `SYSTEM RELOAD`. Level: `GROUP` - - `SYSTEM RELOAD CONFIG`. Level: `GLOBAL`. Aliases: `RELOAD CONFIG` - - `SYSTEM RELOAD DICTIONARY`. Level: `GLOBAL`. Aliases: `SYSTEM RELOAD DICTIONARIES`, `RELOAD DICTIONARY`, `RELOAD DICTIONARIES` - - `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Level: `GLOBAL`. Aliases: R`ELOAD EMBEDDED DICTIONARIES` - - `SYSTEM MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP MERGES`, `SYSTEM START MERGES`, `STOP MERGES`, `START MERGES` - - `SYSTEM TTL MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP TTL MERGES`, `SYSTEM START TTL MERGES`, `STOP TTL MERGES`, `START TTL MERGES` - - `SYSTEM FETCHES`. Level: `TABLE`. Aliases: `SYSTEM STOP FETCHES`, `SYSTEM START FETCHES`, `STOP FETCHES`, `START FETCHES` - - `SYSTEM MOVES`. Level: `TABLE`. Aliases: `SYSTEM STOP MOVES`, `SYSTEM START MOVES`, `STOP MOVES`, `START MOVES` - - `SYSTEM SENDS`. Level: `GROUP`. Aliases: `SYSTEM STOP SENDS`, `SYSTEM START SENDS`, `STOP SENDS`, `START SENDS` - - `SYSTEM DISTRIBUTED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP DISTRIBUTED SENDS`, `SYSTEM START DISTRIBUTED SENDS`, `STOP DISTRIBUTED SENDS`, `START DISTRIBUTED SENDS` - - `SYSTEM REPLICATED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATED SENDS`, `SYSTEM START REPLICATED SENDS`, `STOP REPLICATED SENDS`, `START REPLICATED SENDS` - - `SYSTEM REPLICATION QUEUES`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATION QUEUES`, `SYSTEM START REPLICATION QUEUES`, `STOP REPLICATION QUEUES`, `START REPLICATION QUEUES` - - `SYSTEM SYNC REPLICA`. Level: `TABLE`. Aliases: `SYNC REPLICA` - - `SYSTEM RESTART REPLICA`. Level: `TABLE`. Aliases: `RESTART REPLICA` - - `SYSTEM FLUSH`. Level: `GROUP` - - `SYSTEM FLUSH DISTRIBUTED`. Level: `TABLE`. Aliases: `FLUSH DISTRIBUTED` - - `SYSTEM FLUSH LOGS`. Level: `GLOBAL`. Aliases: `FLUSH LOGS` +- `SYSTEM`. Level: `GROUP` + - `SYSTEM SHUTDOWN`. Level: `GLOBAL`. Aliases: `SYSTEM KILL`, `SHUTDOWN` + - `SYSTEM DROP CACHE`. Aliases: `DROP CACHE` + - `SYSTEM DROP DNS CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP DNS`, `DROP DNS CACHE`, `DROP DNS` + - `SYSTEM DROP MARK CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP MARK`, `DROP MARK CACHE`, `DROP MARKS` + - `SYSTEM DROP UNCOMPRESSED CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP UNCOMPRESSED`, `DROP UNCOMPRESSED CACHE`, `DROP UNCOMPRESSED` + - `SYSTEM RELOAD`. Level: `GROUP` + - `SYSTEM RELOAD CONFIG`. Level: `GLOBAL`. Aliases: `RELOAD CONFIG` + - `SYSTEM RELOAD DICTIONARY`. Level: `GLOBAL`. Aliases: `SYSTEM RELOAD DICTIONARIES`, `RELOAD DICTIONARY`, `RELOAD DICTIONARIES` + - `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Level: `GLOBAL`. Aliases: R`ELOAD EMBEDDED DICTIONARIES` + - `SYSTEM MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP MERGES`, `SYSTEM START MERGES`, `STOP MERGES`, `START MERGES` + - `SYSTEM TTL MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP TTL MERGES`, `SYSTEM START TTL MERGES`, `STOP TTL MERGES`, `START TTL MERGES` + - `SYSTEM FETCHES`. Level: `TABLE`. Aliases: `SYSTEM STOP FETCHES`, `SYSTEM START FETCHES`, `STOP FETCHES`, `START FETCHES` + - `SYSTEM MOVES`. Level: `TABLE`. Aliases: `SYSTEM STOP MOVES`, `SYSTEM START MOVES`, `STOP MOVES`, `START MOVES` + - `SYSTEM SENDS`. Level: `GROUP`. Aliases: `SYSTEM STOP SENDS`, `SYSTEM START SENDS`, `STOP SENDS`, `START SENDS` + - `SYSTEM DISTRIBUTED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP DISTRIBUTED SENDS`, `SYSTEM START DISTRIBUTED SENDS`, `STOP DISTRIBUTED SENDS`, `START DISTRIBUTED SENDS` + - `SYSTEM REPLICATED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATED SENDS`, `SYSTEM START REPLICATED SENDS`, `STOP REPLICATED SENDS`, `START REPLICATED SENDS` + - `SYSTEM REPLICATION QUEUES`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATION QUEUES`, `SYSTEM START REPLICATION QUEUES`, `STOP REPLICATION QUEUES`, `START REPLICATION QUEUES` + - `SYSTEM SYNC REPLICA`. Level: `TABLE`. Aliases: `SYNC REPLICA` + - `SYSTEM RESTART REPLICA`. Level: `TABLE`. Aliases: `RESTART REPLICA` + - `SYSTEM FLUSH`. Level: `GROUP` + - `SYSTEM FLUSH DISTRIBUTED`. Level: `TABLE`. Aliases: `FLUSH DISTRIBUTED` + - `SYSTEM FLUSH LOGS`. Level: `GLOBAL`. Aliases: `FLUSH LOGS` The `SYSTEM RELOAD EMBEDDED DICTIONARIES` privilege implicitly granted by the `SYSTEM RELOAD DICTIONARY ON *.*` privilege. - ### INTROSPECTION {#grant-introspection} Allows using [introspection](../../operations/optimizing-performance/sampling-query-profiler.md) functions. -- `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS` - - `addressToLine`. Level: `GLOBAL` - - `addressToSymbol`. Level: `GLOBAL` - - `demangle`. Level: `GLOBAL` - +- `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS` + - `addressToLine`. Level: `GLOBAL` + - `addressToSymbol`. Level: `GLOBAL` + - `demangle`. Level: `GLOBAL` ### SOURCES {#grant-sources} -Allows using external data sources. Applies to [table engines](../../engines/table-engines/index.md) and [table functions](../table-functions/index.md#table-functions). +Allows using external data sources. Applies to [table engines](../../engines/table-engines/index.md) and [table functions](../../sql-reference/table-functions/index.md#table-functions). -- `SOURCES`. Level: `GROUP` - - `FILE`. Level: `GLOBAL` - - `URL`. Level: `GLOBAL` - - `REMOTE`. Level: `GLOBAL` - - `YSQL`. Level: `GLOBAL` - - `ODBC`. Level: `GLOBAL` - - `JDBC`. Level: `GLOBAL` - - `HDFS`. Level: `GLOBAL` - - `S3`. Level: `GLOBAL` +- `SOURCES`. Level: `GROUP` + - `FILE`. Level: `GLOBAL` + - `URL`. Level: `GLOBAL` + - `REMOTE`. Level: `GLOBAL` + - `YSQL`. Level: `GLOBAL` + - `ODBC`. Level: `GLOBAL` + - `JDBC`. Level: `GLOBAL` + - `HDFS`. Level: `GLOBAL` + - `S3`. Level: `GLOBAL` The `SOURCES` privilege enables use of all the sources. Also you can grant a privilege for each source individually. To use sources, you need additional privileges. Examples: -- To create a table with the [MySQL table engine](../../engines/table-engines/integrations/mysql.md), you need `CREATE TABLE (ON db.table_name)` and `MYSQL` privileges. -- To use the [mysql table function](../table-functions/mysql.md), you need `CREATE TEMPORARY TABLE` and `MYSQL` privileges. +- To create a table with the [MySQL table engine](../../engines/table-engines/integrations/mysql.md), you need `CREATE TABLE (ON db.table_name)` and `MYSQL` privileges. +- To use the [mysql table function](../../sql-reference/table-functions/mysql.md), you need `CREATE TEMPORARY TABLE` and `MYSQL` privileges. ### dictGet {#grant-dictget} -- `dictGet`. Aliases: `dictHas`, `dictGetHierarchy`, `dictIsIn` +- `dictGet`. Aliases: `dictHas`, `dictGetHierarchy`, `dictIsIn` -Allows a user to execute [dictGet](../functions/ext-dict-functions.md#dictget), [dictHas](../functions/ext-dict-functions.md#dicthas), [dictGetHierarchy](../functions/ext-dict-functions.md#dictgethierarchy), [dictIsIn](../functions/ext-dict-functions.md#dictisin) functions. +Allows a user to execute [dictGet](../../sql-reference/functions/ext-dict-functions.md#dictget), [dictHas](../../sql-reference/functions/ext-dict-functions.md#dicthas), [dictGetHierarchy](../../sql-reference/functions/ext-dict-functions.md#dictgethierarchy), [dictIsIn](../../sql-reference/functions/ext-dict-functions.md#dictisin) functions. Privilege level: `DICTIONARY`. **Examples** -- `GRANT dictGet ON mydb.mydictionary TO john` -- `GRANT dictGet ON mydictionary TO john` +- `GRANT dictGet ON mydb.mydictionary TO john` +- `GRANT dictGet ON mydictionary TO john` ### ALL {#grant-all} Grants all the privileges on regulated entity to a user account or a role. - ### NONE {#grant-none} -Doesn't grant any privileges. - +Doesn’t grant any privileges. ### ADMIN OPTION {#admin-option-privilege} diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index 6f576ad7eb9..28a05ff2870 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -46,7 +46,7 @@ You can insert data separately from the query by using the command-line client o ### Constraints {#constraints} -If table has [constraints](create.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped. +If table has [constraints](../../sql-reference/statements/create.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped. ### Inserting the Results of `SELECT` {#insert_query_insert-select} @@ -61,7 +61,7 @@ None of the data formats except Values allow setting values to expressions such Other queries for modifying data parts are not supported: `UPDATE`, `DELETE`, `REPLACE`, `MERGE`, `UPSERT`, `INSERT UPDATE`. However, you can delete old data using `ALTER TABLE ... DROP PARTITION`. -`FORMAT` clause must be specified in the end of query if `SELECT` clause contains table function [input()](../table-functions/input.md). +`FORMAT` clause must be specified in the end of query if `SELECT` clause contains table function [input()](../../sql-reference/table-functions/input.md). ### Performance Considerations {#performance-considerations} diff --git a/docs/en/sql-reference/statements/misc.md b/docs/en/sql-reference/statements/misc.md index bd978908588..59d07f5eebb 100644 --- a/docs/en/sql-reference/statements/misc.md +++ b/docs/en/sql-reference/statements/misc.md @@ -69,7 +69,7 @@ Returns the following `String` type columns: - `name` — Column name. - `type`— Column type. -- `default_type` — Clause that is used in [default expression](create.md#create-default-values) (`DEFAULT`, `MATERIALIZED` or `ALIAS`). Column contains an empty string, if the default expression isn’t specified. +- `default_type` — Clause that is used in [default expression](../../sql-reference/statements/create.md#create-default-values) (`DEFAULT`, `MATERIALIZED` or `ALIAS`). Column contains an empty string, if the default expression isn’t specified. - `default_expression` — Value specified in the `DEFAULT` clause. - `comment_expression` — Comment text. @@ -117,11 +117,10 @@ Deletes a user. ### Syntax {#drop-user-syntax} -```sql +``` sql DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` - ## DROP ROLE {#drop-role-statement} Deletes a role. @@ -130,7 +129,7 @@ Deleted role is revoked from all the entities where it was assigned. ### Syntax {#drop-role-syntax} -```sql +``` sql DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` @@ -146,7 +145,6 @@ Deleted row policy is revoked from all the entities where it was assigned. DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] ``` - ## DROP QUOTA {#drop-quota-statement} Deletes a quota. @@ -159,7 +157,6 @@ Deleted quota is revoked from all the entities where it was assigned. DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` - ## DROP SETTINGS PROFILE {#drop-settings-profile-statement} Deletes a settings profile. @@ -172,7 +169,6 @@ Deleted settings profile is revoked from all the entities where it was assigned. DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` - ## EXISTS {#exists-statement} ``` sql @@ -225,7 +221,7 @@ KILL MUTATION [ON CLUSTER cluster] [FORMAT format] ``` -Tries to cancel and remove [mutations](alter.md#alter-mutations) that are currently executing. Mutations to cancel are selected from the [`system.mutations`](../../operations/system-tables.md#system_tables-mutations) table using the filter specified by the `WHERE` clause of the `KILL` query. +Tries to cancel and remove [mutations](../../sql-reference/statements/alter.md#alter-mutations) that are currently executing. Mutations to cancel are selected from the [`system.mutations`](../../operations/system-tables/mutations.md#system_tables-mutations) table using the filter specified by the `WHERE` clause of the `KILL` query. A test query (`TEST`) only checks the user’s rights and displays a list of queries to stop. @@ -256,7 +252,7 @@ The `OPTMIZE` query is also supported for the [MaterializedView](../../engines/t When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all nodes (if the `replication_alter_partitions_sync` setting is enabled). - If `OPTIMIZE` doesn’t perform a merge for any reason, it doesn’t notify the client. To enable notifications, use the [optimize\_throw\_if\_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. -- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter.md#alter-how-to-specify-part-expr). +- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](../../sql-reference/statements/alter.md#alter-how-to-specify-part-expr). - If you specify `FINAL`, optimization is performed even when all the data is already in one part. - If you specify `DEDUPLICATE`, then completely identical rows will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine. @@ -303,8 +299,7 @@ SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]} Sets default roles to a user. -Default roles are automatically activated at user login. You can set as default only the previously granted roles. If the role isn't granted to a user, ClickHouse throws an exception. - +Default roles are automatically activated at user login. You can set as default only the previously granted roles. If the role isn’t granted to a user, ClickHouse throws an exception. ### Syntax {#set-default-role-syntax} @@ -312,7 +307,6 @@ Default roles are automatically activated at user login. You can set as default SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...] ``` - ### Examples {#set-default-role-examples} Set multiple default roles to a user: @@ -335,11 +329,10 @@ SET DEFAULT ROLE NONE TO user Set all the granted roles as default excepting some of them: -```sql +``` sql SET DEFAULT ROLE ALL EXCEPT role1, role2 TO user ``` - ## TRUNCATE {#truncate-statement} ``` sql diff --git a/docs/en/sql-reference/statements/revoke.md b/docs/en/sql-reference/statements/revoke.md index 66ff978ddfb..8b322689ad0 100644 --- a/docs/en/sql-reference/statements/revoke.md +++ b/docs/en/sql-reference/statements/revoke.md @@ -3,7 +3,7 @@ toc_priority: 40 toc_title: REVOKE --- -# REVOKE +# REVOKE {#revoke} Revokes privileges from users or roles. @@ -25,7 +25,6 @@ REVOKE [ON CLUSTER cluster_name] [ADMIN OPTION FOR] role [,...] FROM {user | rol To revoke some privilege you can use a privilege of a wider scope than you plan to revoke. For example, if a user has the `SELECT (x,y)` privilege, administrator can execute `REVOKE SELECT(x,y) ...`, or `REVOKE SELECT * ...`, or even `REVOKE ALL PRIVILEGES ...` query to revoke this privilege. - ### Partial Revokes {#partial-revokes-dscr} You can revoke a part of a privilege. For example, if a user has the `SELECT *.*` privilege you can revoke from it a privilege to read data from some table or a database. diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index 21f6ea40492..b4d99aaf6b2 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -6,7 +6,7 @@ toc_title: ARRAY JOIN It is a common operation for tables that contain an array column to produce a new table that has a column with each individual array element of that initial column, while values of other columns are duplicated. This is the basic case of what `ARRAY JOIN` clause does. -Its name comes from the fact that it can be looked at as executing `JOIN` with an array or nested data structure. The intent is similar to the [arrayJoin](../../functions/array-join.md#functions_arrayjoin) function, but the clause functionality is broader. +Its name comes from the fact that it can be looked at as executing `JOIN` with an array or nested data structure. The intent is similar to the [arrayJoin](../../../sql-reference/functions/array-join.md#functions_arrayjoin) function, but the clause functionality is broader. Syntax: @@ -25,7 +25,7 @@ Supported types of `ARRAY JOIN` are listed below: - `ARRAY JOIN` - In base case, empty arrays are not included in the result of `JOIN`. - `LEFT ARRAY JOIN` - The result of `JOIN` contains rows with empty arrays. The value for an empty array is set to the default value for the array element type (usually 0, empty string or NULL). -## Basic ARRAY JOIN Examples +## Basic ARRAY JOIN Examples {#basic-array-join-examples} The examples below demonstrate the usage of the `ARRAY JOIN` and `LEFT ARRAY JOIN` clauses. Let’s create a table with an [Array](../../../sql-reference/data-types/array.md) type column and insert values into it: @@ -276,6 +276,6 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num; └───────┴─────┴─────┴─────────┴────────────┴─────┘ ``` -## Implementation Details +## Implementation Details {#implementation-details} -The query execution order is optimized when running `ARRAY JOIN`. Although `ARRAY JOIN` must always be specified before the [WHERE](where.md)/[PREWHERE](prewhere.md) clause in a query, technically they can be performed in any order, unless result of `ARRAY JOIN` is used for filtering. The processing order is controlled by the query optimizer. +The query execution order is optimized when running `ARRAY JOIN`. Although `ARRAY JOIN` must always be specified before the [WHERE](../../../sql-reference/statements/select/where.md)/[PREWHERE](../../../sql-reference/statements/select/prewhere.md) clause in a query, technically they can be performed in any order, unless result of `ARRAY JOIN` is used for filtering. The processing order is controlled by the query optimizer. diff --git a/docs/en/sql-reference/statements/select/distinct.md b/docs/en/sql-reference/statements/select/distinct.md index d7799a8343e..71365b18855 100644 --- a/docs/en/sql-reference/statements/select/distinct.md +++ b/docs/en/sql-reference/statements/select/distinct.md @@ -6,23 +6,23 @@ toc_title: DISTINCT If `SELECT DISTINCT` is specified, only unique rows will remain in a query result. Thus only a single row will remain out of all the sets of fully matching rows in the result. -## Null Processing +## Null Processing {#null-processing} -`DISTINCT` works with [NULL](../../syntax.md#null-literal) as if `NULL` were a specific value, and `NULL==NULL`. In other words, in the `DISTINCT` results, different combinations with `NULL` occur only once. It differs from `NULL` processing in most other contexts. +`DISTINCT` works with [NULL](../../../sql-reference/syntax.md#null-literal) as if `NULL` were a specific value, and `NULL==NULL`. In other words, in the `DISTINCT` results, different combinations with `NULL` occur only once. It differs from `NULL` processing in most other contexts. -## Alternatives +## Alternatives {#alternatives} -It is possible to obtain the same result by applying [GROUP BY](group-by.md) across the same set of values as specified as `SELECT` clause, without using any aggregate functions. But there are few differences from `GROUP BY` approach: +It is possible to obtain the same result by applying [GROUP BY](../../../sql-reference/statements/select/group-by.md) across the same set of values as specified as `SELECT` clause, without using any aggregate functions. But there are few differences from `GROUP BY` approach: - `DISTINCT` can be applied together with `GROUP BY`. -- When [ORDER BY](order-by.md) is omitted and [LIMIT](limit.md) is defined, the query stops running immediately after the required number of different rows has been read. +- When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read. - Data blocks are output as they are processed, without waiting for the entire query to finish running. -## Limitations +## Limitations {#limitations} `DISTINCT` is not supported if `SELECT` has at least one array column. -## Examples +## Examples {#examples} ClickHouse supports using the `DISTINCT` and `ORDER BY` clauses for different columns in one query. The `DISTINCT` clause is executed before the `ORDER BY` clause. diff --git a/docs/en/sql-reference/statements/select/format.md b/docs/en/sql-reference/statements/select/format.md index ca4b89fae71..c3104bd12fe 100644 --- a/docs/en/sql-reference/statements/select/format.md +++ b/docs/en/sql-reference/statements/select/format.md @@ -8,10 +8,10 @@ ClickHouse supports a wide range of [serialization formats](../../../interfaces/ Specific format might be used either for convenience, integration with other systems or performance gain. -## Default Format +## Default Format {#default-format} -If the `FORMAT` clause is omitted, the default format is used, which depends on both the settings and the interface used for accessing the ClickHouse server. For the [HTTP interface](../../../interfaces/http.md) and the [command-line client ](../../../interfaces/cli.md) in batch mode, the default format is `TabSeparated`. For the command-line client in interactive mode, the default format is `PrettyCompact` (it produces compact human-readable tables). +If the `FORMAT` clause is omitted, the default format is used, which depends on both the settings and the interface used for accessing the ClickHouse server. For the [HTTP interface](../../../interfaces/http.md) and the [command-line client](../../../interfaces/cli.md) in batch mode, the default format is `TabSeparated`. For the command-line client in interactive mode, the default format is `PrettyCompact` (it produces compact human-readable tables). -## Implementation Details +## Implementation Details {#implementation-details} When using the command-line client, data is always passed over the network in an internal efficient format (`Native`). The client independently interprets the `FORMAT` clause of the query and formats the data itself (thus relieving the network and the server from the extra load). diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index fa29576276c..dbfd5431861 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -7,14 +7,14 @@ toc_title: FROM The `FROM` clause specifies the source to read data from: - [Table](../../../engines/table-engines/index.md) -- [Subquery](index.md) {## TODO: better link ##} -- [Table function](../../table-functions/index.md#table-functions) +- [Subquery](../../../sql-reference/statements/select/index.md) {## TODO: better link ##} +- [Table function](../../../sql-reference/table-functions/index.md#table-functions) -[JOIN](join.md) and [ARRAY JOIN](array-join.md) clauses may also be used to extend the functionality of the `FROM` clause. +[JOIN](../../../sql-reference/statements/select/join.md) and [ARRAY JOIN](../../../sql-reference/statements/select/array-join.md) clauses may also be used to extend the functionality of the `FROM` clause. Subquery is another `SELECT` query that may be specified in parenthesis inside `FROM` clause. -`FROM` clause can contain multiple data sources, separated by commas, which is equivalent of performing [CROSS JOIN](join.md) on them. +`FROM` clause can contain multiple data sources, separated by commas, which is equivalent of performing [CROSS JOIN](../../../sql-reference/statements/select/join.md) on them. ## FINAL Modifier {#select-from-final} @@ -25,16 +25,16 @@ It is applicable when selecting data from tables that use the [MergeTree](../../ - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines. - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables. -### Drawbacks +### Drawbacks {#drawbacks} Queries that use `FINAL` are executed not as fast as similar queries that don’t, because: - Query is executed in a single thread and data is merged during query execution. - Queries with `FINAL` read primary key columns in addition to the columns specified in the query. -**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have't happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##} +**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). {## TODO: examples ##} -## Implementation Details +## Implementation Details {#implementation-details} If the `FROM` clause is omitted, data will be read from the `system.one` table. The `system.one` table contains exactly one row (this table fulfills the same purpose as the DUAL table found in other DBMSs). diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index 49d3a2da8a4..0c2b0a2dc4a 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -7,15 +7,15 @@ toc_title: GROUP BY `GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows: - `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expressions”. -- All the expressions in the [SELECT](index.md), [HAVING](having.md), and [ORDER BY](order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. -- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. +- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. +- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. !!! note "Note" - There's an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row. + There’s an additional way to run aggregation over a table. If a query contains table columns only inside aggregate functions, the `GROUP BY clause` can be omitted, and aggregation by an empty set of keys is assumed. Such queries always return exactly one row. ## NULL Processing {#null-processing} -For grouping, ClickHouse interprets [NULL](../../syntax.md#null-literal) as a value, and `NULL==NULL`. It differs from `NULL` processing in most other contexts. +For grouping, ClickHouse interprets [NULL](../../../sql-reference/syntax.md#null-literal) as a value, and `NULL==NULL`. It differs from `NULL` processing in most other contexts. Here’s an example to show what this means. @@ -56,9 +56,9 @@ This extra row is only produced in `JSON*`, `TabSeparated*`, and `Pretty*` forma - In `Pretty*` formats, the row is output as a separate table after the main result. - In the other formats it is not available. -`WITH TOTALS` can be run in different ways when [HAVING](having.md) is present. The behavior depends on the `totals_mode` setting. +`WITH TOTALS` can be run in different ways when [HAVING](../../../sql-reference/statements/select/having.md) is present. The behavior depends on the `totals_mode` setting. -### Configuring Totals Processing +### Configuring Totals Processing {#configuring-totals-processing} By default, `totals_mode = 'before_having'`. In this case, ‘totals’ is calculated across all rows, including the ones that don’t pass through HAVING and `max_rows_to_group_by`. @@ -74,9 +74,9 @@ The other alternatives include only the rows that pass through HAVING in ‘tota If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all variations of `after_having` are the same, and you can use any of them (for example, `after_having_auto`). -You can use `WITH TOTALS` in subqueries, including subqueries in the [JOIN](join.md) clause (in this case, the respective total values are combined). +You can use `WITH TOTALS` in subqueries, including subqueries in the [JOIN](../../../sql-reference/statements/select/join.md) clause (in this case, the respective total values are combined). -## Examples +## Examples {#examples} Example: @@ -109,9 +109,9 @@ For every different key value encountered, `GROUP BY` calculates a set of aggreg A constant can’t be specified as arguments for aggregate functions. Example: `sum(1)`. Instead of this, you can get rid of the constant. Example: `count()`. -## Implementation Details +## Implementation Details {#implementation-details} -Aggregation is one of the most important features of a column-oriented DBMS, and thus it's implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types. +Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types. ### GROUP BY in External Memory {#select-group-by-in-external-memory} @@ -128,4 +128,4 @@ When merging data flushed to the disk, as well as when merging results from remo When external aggregation is enabled, if there was less than `max_bytes_before_external_group_by` of data (i.e. data was not flushed), the query runs just as fast as without external aggregation. If any temporary data was flushed, the run time will be several times longer (approximately three times). -If you have an [ORDER BY](order-by.md) with a [LIMIT](limit.md) after `GROUP BY`, then the amount of used RAM depends on the amount of data in `LIMIT`, not in the whole table. But if the `ORDER BY` doesn’t have `LIMIT`, don’t forget to enable external sorting (`max_bytes_before_external_sort`). +If you have an [ORDER BY](../../../sql-reference/statements/select/order-by.md) with a [LIMIT](../../../sql-reference/statements/select/limit.md) after `GROUP BY`, then the amount of used RAM depends on the amount of data in `LIMIT`, not in the whole table. But if the `ORDER BY` doesn’t have `LIMIT`, don’t forget to enable external sorting (`max_bytes_before_external_sort`). diff --git a/docs/en/sql-reference/statements/select/having.md b/docs/en/sql-reference/statements/select/having.md index 7a42f43bf53..93d56097b11 100644 --- a/docs/en/sql-reference/statements/select/having.md +++ b/docs/en/sql-reference/statements/select/having.md @@ -4,10 +4,10 @@ toc_title: HAVING # HAVING Clause {#having-clause} -Allows filtering the aggregation results produced by [GROUP BY](group-by.md). It is similar to the [WHERE](where.md) clause, but the difference is that `WHERE` is performed before aggregation, while `HAVING` is performed after it. +Allows filtering the aggregation results produced by [GROUP BY](../../../sql-reference/statements/select/group-by.md). It is similar to the [WHERE](../../../sql-reference/statements/select/where.md) clause, but the difference is that `WHERE` is performed before aggregation, while `HAVING` is performed after it. It is possible to reference aggregation results from `SELECT` clause in `HAVING` clause by their alias. Alternatively, `HAVING` clause can filter on results of additional aggregates that are not returned in query results. -## Limitations +## Limitations {#limitations} `HAVING` can’t be used if aggregation is not performed. Use `WHERE` instead. diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index a2dbeaded95..8db1b5ae835 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -1,15 +1,15 @@ --- -toc_priority: 33 -toc_folder_title: SELECT -toc_title: Overview title: SELECT Query +toc_folder_title: SELECT +toc_priority: 33 +toc_title: Overview --- # SELECT Query {#select-queries-syntax} `SELECT` queries perform data retrieval. By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../../sql-reference/statements/insert-into.md) it can be forwarded to a different table. -## Syntax +## Syntax {#syntax} ``` sql [WITH expr_list|(subquery)] @@ -34,25 +34,25 @@ All clauses are optional, except for the required list of expressions immediatel Specifics of each optional clause are covered in separate sections, which are listed in the same order as they are executed: -- [WITH clause](with.md) -- [FROM clause](from.md) -- [SAMPLE clause](sample.md) -- [JOIN clause](join.md) -- [PREWHERE clause](prewhere.md) -- [WHERE clause](where.md) -- [GROUP BY clause](group-by.md) -- [LIMIT BY clause](limit-by.md) -- [HAVING clause](having.md) +- [WITH clause](../../../sql-reference/statements/select/with.md) +- [FROM clause](../../../sql-reference/statements/select/from.md) +- [SAMPLE clause](../../../sql-reference/statements/select/sample.md) +- [JOIN clause](../../../sql-reference/statements/select/join.md) +- [PREWHERE clause](../../../sql-reference/statements/select/prewhere.md) +- [WHERE clause](../../../sql-reference/statements/select/where.md) +- [GROUP BY clause](../../../sql-reference/statements/select/group-by.md) +- [LIMIT BY clause](../../../sql-reference/statements/select/limit-by.md) +- [HAVING clause](../../../sql-reference/statements/select/having.md) - [SELECT clause](#select-clause) -- [DISTINCT clause](distinct.md) -- [LIMIT clause](limit.md) -- [UNION ALL clause](union-all.md) -- [INTO OUTFILE clause](into-outfile.md) -- [FORMAT clause](format.md) +- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) +- [LIMIT clause](../../../sql-reference/statements/select/limit.md) +- [UNION ALL clause](../../../sql-reference/statements/select/union-all.md) +- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md) +- [FORMAT clause](../../../sql-reference/statements/select/format.md) ## SELECT Clause {#select-clause} -[Expressions](../../syntax.md#syntax-expressions) specified in the `SELECT` clause are calculated after all the operations in the clauses described above are finished. These expressions work as if they apply to separate rows in the result. If expressions in the `SELECT` clause contain aggregate functions, then ClickHouse processes aggregate functions and expressions used as their arguments during the [GROUP BY](group-by.md) aggregation. +[Expressions](../../../sql-reference/syntax.md#syntax-expressions) specified in the `SELECT` clause are calculated after all the operations in the clauses described above are finished. These expressions work as if they apply to separate rows in the result. If expressions in the `SELECT` clause contain aggregate functions, then ClickHouse processes aggregate functions and expressions used as their arguments during the [GROUP BY](../../../sql-reference/statements/select/group-by.md) aggregation. If you want to include all columns in the result, use the asterisk (`*`) symbol. For example, `SELECT * FROM ...`. @@ -113,7 +113,7 @@ In this example, `COLUMNS('a')` returns two columns: `aa` and `ab`. `COLUMNS('c' Columns that matched the `COLUMNS` expression can have different data types. If `COLUMNS` doesn’t match any columns and is the only expression in `SELECT`, ClickHouse throws an exception. -### Asterisk +### Asterisk {#asterisk} You can put an asterisk in any part of a query instead of an expression. When the query is analyzed, the asterisk is expanded to a list of all table columns (excluding the `MATERIALIZED` and `ALIAS` columns). There are only a few cases when using an asterisk is justified: @@ -141,8 +141,7 @@ You can use synonyms (`AS` aliases) in any part of a query. The `GROUP BY` and `ORDER BY` clauses do not support positional arguments. This contradicts MySQL, but conforms to standard SQL. For example, `GROUP BY 1, 2` will be interpreted as grouping by constants (i.e. aggregation of all rows into one). - -## Implementation Details +## Implementation Details {#implementation-details} If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN` and `JOIN` subqueries, the query will be completely stream processed, using O(1) amount of RAM. Otherwise, the query might consume a lot of RAM if the appropriate restrictions are not specified: @@ -150,7 +149,7 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN - `max_rows_to_group_by` - `max_rows_to_sort` - `max_rows_in_distinct` -- `max_bytes_in_distinct` +- `max_bytes_in_distinct` - `max_rows_in_set` - `max_bytes_in_set` - `max_rows_in_join` @@ -160,5 +159,4 @@ If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation. - {## [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##} diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index 26b9c2cf8cb..4380cb11fa3 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -6,7 +6,7 @@ toc_title: INTO OUTFILE Add the `INTO OUTFILE filename` clause (where filename is a string literal) to `SELECT query` to redirect its output to the specified file on the client-side. -## Implementation Details +## Implementation Details {#implementation-details} - This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail. - The query will fail if a file with the same filename already exists. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 5ac3f4a0e25..0b42ed1a0d2 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -4,9 +4,10 @@ toc_title: JOIN # JOIN Clause {#select-join} -Join produces a new table by combining columns from one or multiple tables by using values common to each. It is a common operation in databases with SQL support, which corresponds to [relational algebra](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators) join. The special case of one table join is often referred to as "self-join". +Join produces a new table by combining columns from one or multiple tables by using values common to each. It is a common operation in databases with SQL support, which corresponds to [relational algebra](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators) join. The special case of one table join is often referred to as “self-join”. Syntax: + ``` sql SELECT FROM @@ -14,7 +15,7 @@ FROM (ON )|(USING ) ... ``` -Expressions from `ON` clause and columns from `USING` clause are called "join keys". Unless otherwise stated, join produces a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from rows with matching "join keys", which might produce results with much more rows than the source tables. +Expressions from `ON` clause and columns from `USING` clause are called “join keys”. Unless otherwise stated, join produces a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from rows with matching “join keys”, which might produce results with much more rows than the source tables. ## Supported Types of JOIN {#select-join-types} @@ -24,21 +25,21 @@ All standard [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) types are supp - `LEFT OUTER JOIN`, non-matching rows from left table are returned in addition to matching rows. - `RIGHT OUTER JOIN`, non-matching rows from right table are returned in addition to matching rows. - `FULL OUTER JOIN`, non-matching rows from both tables are returned in addition to matching rows. -- `CROSS JOIN`, produces cartesian product of whole tables, "join keys" are **not** specified. +- `CROSS JOIN`, produces cartesian product of whole tables, “join keys” are **not** specified. -`JOIN` without specified type implies `INNER`. Keyword `OUTER` can be safely omitted. Alternative syntax for `CROSS JOIN` is specifying multiple tables in [FROM clause](from.md) separated by commas. +`JOIN` without specified type implies `INNER`. Keyword `OUTER` can be safely omitted. Alternative syntax for `CROSS JOIN` is specifying multiple tables in [FROM clause](../../../sql-reference/statements/select/from.md) separated by commas. Additional join types available in ClickHouse: -- `LEFT SEMI JOIN` and `RIGHT SEMI JOIN`, a whitelist on "join keys", without producing a cartesian product. -- `LEFT ANTI JOIN` and `RIGHT ANTI JOIN`, a blacklist on "join keys", without producing a cartesian product. +- `LEFT SEMI JOIN` and `RIGHT SEMI JOIN`, a whitelist on “join keys”, without producing a cartesian product. +- `LEFT ANTI JOIN` and `RIGHT ANTI JOIN`, a blacklist on “join keys”, without producing a cartesian product. ## Strictness {#select-join-strictness} -Modifies how matching by "join keys" is performed +Modifies how matching by “join keys” is performed - `ALL` — The standard `JOIN` behavior in SQL as described above. The default. -- `ANY` — Partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. +- `ANY` — Partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. - `ASOF` — For joining sequences with a non-exact match. `ASOF JOIN` usage is described below. !!! note "Note" @@ -46,12 +47,15 @@ Modifies how matching by "join keys" is performed Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. - -### ASOF JOIN Usage +### ASOF JOIN Usage {#asof-join-usage} `ASOF JOIN` is useful when you need to join records that have no exact match. -Tables for `ASOF JOIN` must have an ordered sequence column. This column cannot be alone in a table, and should be one of the data types: `UInt32`, `UInt64`, `Float32`, `Float64`, `Date`, and `DateTime`. +Algorithm requires the special column in tables. This column: + +- Must contain an ordered sequence. +- Can be one of the following types: [Int*, UInt*](../../../sql-reference/data-types/int-uint.md), [Float\*](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal\*](../../../sql-reference/data-types/decimal.md). +- Can’t be the only column in the `JOIN` clause. Syntax `ASOF JOIN ... ON`: @@ -100,7 +104,7 @@ There are two ways to execute join involving distributed tables: - When using a normal `JOIN`, the query is sent to remote servers. Subqueries are run on each of them in order to make the right table, and the join is performed with this table. In other words, the right table is formed on each server separately. - When using `GLOBAL ... JOIN`, first the requestor server runs a subquery to calculate the right table. This temporary table is passed to each remote server, and queries are run on them using the temporary data that was transmitted. -Be careful when using `GLOBAL`. For more information, see the [Distributed subqueries](../../operators/in.md#select-distributed-subqueries) section. +Be careful when using `GLOBAL`. For more information, see the [Distributed subqueries](../../../sql-reference/operators/in.md#select-distributed-subqueries) section. ## Usage Recommendations {#usage-recommendations} @@ -108,9 +112,9 @@ Be careful when using `GLOBAL`. For more information, see the [Distributed subqu While joining tables, the empty cells may appear. The setting [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls) define how ClickHouse fills these cells. -If the `JOIN` keys are [Nullable](../../data-types/nullable.md) fields, the rows where at least one of the keys has the value [NULL](../../../sql-reference/syntax.md#null-literal) are not joined. +If the `JOIN` keys are [Nullable](../../../sql-reference/data-types/nullable.md) fields, the rows where at least one of the keys has the value [NULL](../../../sql-reference/syntax.md#null-literal) are not joined. -### Syntax +### Syntax {#syntax} The columns specified in `USING` must have the same names in both subqueries, and the other columns must be named differently. You can use aliases to change the names of columns in subqueries. @@ -127,17 +131,17 @@ For `ON`, `WHERE`, and `GROUP BY` clauses: - Arbitrary expressions cannot be used in `ON`, `WHERE`, and `GROUP BY` clauses, but you can define an expression in a `SELECT` clause and then use it in these clauses via an alias. -### Performance +### Performance {#performance} -When running a `JOIN`, there is no optimization of the order of execution in relation to other stages of the query. The join (a search in the right table) is run before filtering in `WHERE` and before aggregation. +When running a `JOIN`, there is no optimization of the order of execution in relation to other stages of the query. The join (a search in the right table) is run before filtering in `WHERE` and before aggregation. Each time a query is run with the same `JOIN`, the subquery is run again because the result is not cached. To avoid this, use the special [Join](../../../engines/table-engines/special/join.md) table engine, which is a prepared array for joining that is always in RAM. -In some cases, it is more efficient to use [IN](../../operators/in.md) instead of `JOIN`. +In some cases, it is more efficient to use [IN](../../../sql-reference/operators/in.md) instead of `JOIN`. -If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is an “external dictionaries” feature that you should use instead of `JOIN`. For more information, see the [External dictionaries](../../dictionaries/external-dictionaries/external-dicts.md) section. +If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is an “external dictionaries” feature that you should use instead of `JOIN`. For more information, see the [External dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section. -### Memory Limitations +### Memory Limitations {#memory-limitations} By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_join) algorithm. ClickHouse takes the `` and creates a hash table for it in RAM. After some threshold of memory consumption, ClickHouse falls back to merge join algorithm. @@ -148,7 +152,7 @@ If you need to restrict join operation memory consumption use the following sett When any of these limits is reached, ClickHouse acts as the [join\_overflow\_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode) setting instructs. -## Examples +## Examples {#examples} Example: diff --git a/docs/en/sql-reference/statements/select/limit-by.md b/docs/en/sql-reference/statements/select/limit-by.md index 05b9e7b9151..34645b68b03 100644 --- a/docs/en/sql-reference/statements/select/limit-by.md +++ b/docs/en/sql-reference/statements/select/limit-by.md @@ -4,19 +4,19 @@ toc_title: LIMIT BY # LIMIT BY Clause {#limit-by-clause} -A query with the `LIMIT n BY expressions` clause selects the first `n` rows for each distinct value of `expressions`. The key for `LIMIT BY` can contain any number of [expressions](../../syntax.md#syntax-expressions). +A query with the `LIMIT n BY expressions` clause selects the first `n` rows for each distinct value of `expressions`. The key for `LIMIT BY` can contain any number of [expressions](../../../sql-reference/syntax.md#syntax-expressions). ClickHouse supports the following syntax variants: - `LIMIT [offset_value, ]n BY expressions` - `LIMIT n OFFSET offset_value BY expressions` -During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](order-by.md) clause or implicitly as a property of the table engine. Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block. +During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](../../../sql-reference/statements/select/order-by.md) clause or implicitly as a property of the table engine. Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block. !!! note "Note" - `LIMIT BY` is not related to [LIMIT](limit.md). They can both be used in the same query. + `LIMIT BY` is not related to [LIMIT](../../../sql-reference/statements/select/limit.md). They can both be used in the same query. -## Examples +## Examples {#examples} Sample table: diff --git a/docs/en/sql-reference/statements/select/limit.md b/docs/en/sql-reference/statements/select/limit.md index 3f43e1b155e..3b19f5fae4e 100644 --- a/docs/en/sql-reference/statements/select/limit.md +++ b/docs/en/sql-reference/statements/select/limit.md @@ -10,4 +10,4 @@ toc_title: LIMIT `n` and `m` must be non-negative integers. -If there is no [ORDER BY](order-by.md) clause that explicitly sorts results, the choice of rows for the result may be arbitrary and non-deterministic. +If there is no [ORDER BY](../../../sql-reference/statements/select/order-by.md) clause that explicitly sorts results, the choice of rows for the result may be arbitrary and non-deterministic. diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 10c1f234057..318dd143f99 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -4,19 +4,19 @@ toc_title: ORDER BY # ORDER BY Clause {#select-order-by} -The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it's usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase` +The `ORDER BY` clause contains a list of expressions, which can each be attributed with `DESC` (descending) or `ASC` (ascending) modifier which determine the sorting direction. If the direction is not specified, `ASC` is assumed, so it’s usually omitted. The sorting direction applies to a single expression, not to the entire list. Example: `ORDER BY Visits DESC, SearchPhrase` Rows that have identical values for the list of sorting expressions are output in an arbitrary order, which can also be non-deterministic (different each time). If the ORDER BY clause is omitted, the order of the rows is also undefined, and may be non-deterministic as well. -## Sorting of Special Values +## Sorting of Special Values {#sorting-of-special-values} There are two approaches to `NaN` and `NULL` sorting order: - By default or with the `NULLS LAST` modifier: first the values, then `NaN`, then `NULL`. - With the `NULLS FIRST` modifier: first `NULL`, then `NaN`, then other values. -### Example +### Example {#example} For the table @@ -54,15 +54,15 @@ Run the query `SELECT * FROM t_null_nan ORDER BY y NULLS FIRST` to get: When floating point numbers are sorted, NaNs are separate from the other values. Regardless of the sorting order, NaNs come at the end. In other words, for ascending sorting they are placed as if they are larger than all the other numbers, while for descending sorting they are placed as if they are smaller than the rest. -## Collation Support +## Collation Support {#collation-support} For sorting by String values, you can specify collation (comparison). Example: `ORDER BY SearchPhrase COLLATE 'tr'` - for sorting by keyword in ascending order, using the Turkish alphabet, case insensitive, assuming that strings are UTF-8 encoded. `COLLATE` can be specified or not for each expression in ORDER BY independently. If `ASC` or `DESC` is specified, `COLLATE` is specified after it. When using `COLLATE`, sorting is always case-insensitive. We only recommend using `COLLATE` for final sorting of a small number of rows, since sorting with `COLLATE` is less efficient than normal sorting by bytes. -## Implementation Details +## Implementation Details {#implementation-details} -Less RAM is used if a small enough [LIMIT](limit.md) is specified in addition to `ORDER BY`. Otherwise, the amount of memory spent is proportional to the volume of data for sorting. For distributed query processing, if [GROUP BY](group-by.md) is omitted, sorting is partially done on remote servers, and the results are merged on the requestor server. This means that for distributed sorting, the volume of data to sort can be greater than the amount of memory on a single server. +Less RAM is used if a small enough [LIMIT](../../../sql-reference/statements/select/limit.md) is specified in addition to `ORDER BY`. Otherwise, the amount of memory spent is proportional to the volume of data for sorting. For distributed query processing, if [GROUP BY](../../../sql-reference/statements/select/group-by.md) is omitted, sorting is partially done on remote servers, and the results are merged on the requestor server. This means that for distributed sorting, the volume of data to sort can be greater than the amount of memory on a single server. If there is not enough RAM, it is possible to perform sorting in external memory (creating temporary files on a disk). Use the setting `max_bytes_before_external_sort` for this purpose. If it is set to 0 (the default), external sorting is disabled. If it is enabled, when the volume of data to sort reaches the specified number of bytes, the collected data is sorted and dumped into a temporary file. After all data is read, all the sorted files are merged and the results are output. Files are written to the `/var/lib/clickhouse/tmp/` directory in the config (by default, but you can use the `tmp_path` parameter to change this setting). diff --git a/docs/en/sql-reference/statements/select/prewhere.md b/docs/en/sql-reference/statements/select/prewhere.md index 38ff11dc548..fc43d1de0a1 100644 --- a/docs/en/sql-reference/statements/select/prewhere.md +++ b/docs/en/sql-reference/statements/select/prewhere.md @@ -4,11 +4,11 @@ toc_title: PREWHERE # PREWHERE Clause {#prewhere-clause} -Prewhere is an optimization to apply filtering more efficiently. It is enabled by default even if `PREWHERE` clause is not specified explicitly. It works by automatically moving part of [WHERE](where.md) condition to prewhere stage. The role of `PREWHERE` clause is only to control this optimization if you think that you know how to do it better than it happens by default. +Prewhere is an optimization to apply filtering more efficiently. It is enabled by default even if `PREWHERE` clause is not specified explicitly. It works by automatically moving part of [WHERE](../../../sql-reference/statements/select/where.md) condition to prewhere stage. The role of `PREWHERE` clause is only to control this optimization if you think that you know how to do it better than it happens by default. -With prewhere optimization, at first only the columns necessary for executing prewhere expression are read. Then the other columns are read that are needed for running the rest of the query, but only those blocks where the prewhere expression is "true" at least for some rows. If there are a lot of blocks where prewhere expression is "false" for all rows and prewhere needs less columns than other parts of query, this often allows to read a lot less data from disk for query execution. +With prewhere optimization, at first only the columns necessary for executing prewhere expression are read. Then the other columns are read that are needed for running the rest of the query, but only those blocks where the prewhere expression is “true” at least for some rows. If there are a lot of blocks where prewhere expression is “false” for all rows and prewhere needs less columns than other parts of query, this often allows to read a lot less data from disk for query execution. -## Controlling Prewhere Manually +## Controlling Prewhere Manually {#controlling-prewhere-manually} The clause has the same meaning as the `WHERE` clause. The difference is in which data is read from the table. When manually controlling `PREWHERE` for filtration conditions that are used by a minority of the columns in the query, but that provide strong data filtration. This reduces the volume of data to read. @@ -16,7 +16,6 @@ A query may simultaneously specify `PREWHERE` and `WHERE`. In this case, `PREWHE If the `optimize_move_to_prewhere` setting is set to 0, heuristics to automatically move parts of expressions from `WHERE` to `PREWHERE` are disabled. -## Limitations +## Limitations {#limitations} `PREWHERE` is only supported by tables from the `*MergeTree` family. - diff --git a/docs/en/sql-reference/statements/select/sample.md b/docs/en/sql-reference/statements/select/sample.md index 00431d8e44a..55c1919b81d 100644 --- a/docs/en/sql-reference/statements/select/sample.md +++ b/docs/en/sql-reference/statements/select/sample.md @@ -20,7 +20,7 @@ Approximated query processing can be useful in the following cases: The features of data sampling are listed below: - Data sampling is a deterministic mechanism. The result of the same `SELECT .. SAMPLE` query is always the same. -- Sampling works consistently for different tables. For tables with a single sampling key, a sample with the same coefficient always selects the same subset of possible data. For example, a sample of user IDs takes rows with the same subset of all the possible user IDs from different tables. This means that you can use the sample in subqueries in the [IN](../../operators/in.md) clause. Also, you can join samples using the [JOIN](join.md) clause. +- Sampling works consistently for different tables. For tables with a single sampling key, a sample with the same coefficient always selects the same subset of possible data. For example, a sample of user IDs takes rows with the same subset of all the possible user IDs from different tables. This means that you can use the sample in subqueries in the [IN](../../../sql-reference/operators/in.md) clause. Also, you can join samples using the [JOIN](../../../sql-reference/statements/select/join.md) clause. - Sampling allows reading less data from a disk. Note that you must specify the sampling key correctly. For more information, see [Creating a MergeTree Table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). For the `SAMPLE` clause the following syntax is supported: diff --git a/docs/en/sql-reference/statements/select/union-all.md b/docs/en/sql-reference/statements/select/union-all.md index 9c5be93a345..5230363609e 100644 --- a/docs/en/sql-reference/statements/select/union-all.md +++ b/docs/en/sql-reference/statements/select/union-all.md @@ -23,12 +23,12 @@ Result columns are matched by their index (order inside `SELECT`). If column nam Type casting is performed for unions. For example, if two queries being combined have the same field with non-`Nullable` and `Nullable` types from a compatible type, the resulting `UNION ALL` has a `Nullable` type field. -Queries that are parts of `UNION ALL` can’t be enclosed in round brackets. [ORDER BY](order-by.md) and [LIMIT](limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](from.md) clause. +Queries that are parts of `UNION ALL` can’t be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. -## Limitations +## Limitations {#limitations} Only `UNION ALL` is supported. The regular `UNION` (`UNION DISTINCT`) is not supported. If you need `UNION DISTINCT`, you can write `SELECT DISTINCT` from a subquery containing `UNION ALL`. -## Implementation Details +## Implementation Details {#implementation-details} Queries that are parts of `UNION ALL` can be run simultaneously, and their results can be mixed together. diff --git a/docs/en/sql-reference/statements/select/where.md b/docs/en/sql-reference/statements/select/where.md index 84618be5002..f1532115e55 100644 --- a/docs/en/sql-reference/statements/select/where.md +++ b/docs/en/sql-reference/statements/select/where.md @@ -4,11 +4,11 @@ toc_title: WHERE # WHERE Clause {#select-where} -`WHERE` clause allows to filter the data that is coming from [FROM](from.md) clause of `SELECT`. +`WHERE` clause allows to filter the data that is coming from [FROM](../../../sql-reference/statements/select/from.md) clause of `SELECT`. If there is a `WHERE` clause, it must contain an expression with the `UInt8` type. This is usually an expression with comparison and logical operators. Rows where this expression evaluates to 0 are expluded from further transformations or result. `WHERE` expression is evaluated on the ability to use indexes and partition pruning, if the underlying table engine supports that. !!! note "Note" - There's a filtering optimization called [prewhere](prewhere.md). + There’s a filtering optimization called [prewhere](../../../sql-reference/statements/select/prewhere.md). diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md index ac04ce69ae3..a507d5224aa 100644 --- a/docs/en/sql-reference/statements/select/with.md +++ b/docs/en/sql-reference/statements/select/with.md @@ -6,13 +6,13 @@ toc_title: WITH This section provides support for Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), so the results of `WITH` clause can be used in the rest of `SELECT` query. -## Limitations +## Limitations {#limitations} -1. Recursive queries are not supported. -2. When subquery is used inside WITH section, it’s result should be scalar with exactly one row. -3. Expression’s results are not available in subqueries. +1. Recursive queries are not supported. +2. When subquery is used inside WITH section, it’s result should be scalar with exactly one row. +3. Expression’s results are not available in subqueries. -## Examples +## Examples {#examples} **Example 1:** Using constant expression as “variable” diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index e892ee7e51b..65af3f75e73 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -28,7 +28,7 @@ This query is identical to `SELECT name FROM system.databases [INTO OUTFILE file SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] ``` -Outputs the content of the [system.processes](../../operations/system-tables.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries. +Outputs the content of the [system.processes](../../operations/system-tables/processes.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries. The `SELECT * FROM system.processes` query returns data about all the current queries. @@ -100,7 +100,6 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2 └──────────────┘ ``` - ## SHOW GRANTS {#show-grants-statement} Shows privileges for a user. @@ -113,13 +112,11 @@ SHOW GRANTS [FOR user] If user is not specified, the query returns privileges for the current user. - - ## SHOW CREATE USER {#show-create-user-statement} -Shows parameters that were used at a [user creation](create.md#create-user-statement). +Shows parameters that were used at a [user creation](../../sql-reference/statements/create.md#create-user-statement). -`SHOW CREATE USER` doesn't output user passwords. +`SHOW CREATE USER` doesn’t output user passwords. ### Syntax {#show-create-user-syntax} @@ -127,11 +124,9 @@ Shows parameters that were used at a [user creation](create.md#create-user-state SHOW CREATE USER [name | CURRENT_USER] ``` - - ## SHOW CREATE ROLE {#show-create-role-statement} -Shows parameters that were used at a [role creation](create.md#create-role-statement). +Shows parameters that were used at a [role creation](../../sql-reference/statements/create.md#create-role-statement). ### Syntax {#show-create-role-syntax} @@ -139,37 +134,33 @@ Shows parameters that were used at a [role creation](create.md#create-role-state SHOW CREATE ROLE name ``` - - ## SHOW CREATE ROW POLICY {#show-create-row-policy-statement} -Shows parameters that were used at a [row policy creation](create.md#create-row-policy-statement). +Shows parameters that were used at a [row policy creation](../../sql-reference/statements/create.md#create-row-policy-statement). ### Syntax {#show-create-row-policy-syntax} -```sql +``` sql SHOW CREATE [ROW] POLICY name ON [database.]table ``` - ## SHOW CREATE QUOTA {#show-create-quota-statement} -Shows parameters that were used at a [quota creation](create.md#create-quota-statement). +Shows parameters that were used at a [quota creation](../../sql-reference/statements/create.md#create-quota-statement). ### Syntax {#show-create-row-policy-syntax} -```sql +``` sql SHOW CREATE QUOTA [name | CURRENT] ``` - ## SHOW CREATE SETTINGS PROFILE {#show-create-settings-profile-statement} -Shows parameters that were used at a [settings profile creation](create.md#create-settings-profile-statement). +Shows parameters that were used at a [settings profile creation](../../sql-reference/statements/create.md#create-settings-profile-statement). ### Syntax {#show-create-row-policy-syntax} -```sql +``` sql SHOW CREATE [SETTINGS] PROFILE name ``` diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index e4823686c68..0987c15bcdd 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -5,12 +5,12 @@ toc_title: SYSTEM # SYSTEM Queries {#query-language-system} -- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) +- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries) - [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) - [RELOAD DICTIONARY](#query_language-system-reload-dictionary) - [DROP DNS CACHE](#query_language-system-drop-dns-cache) - [DROP MARK CACHE](#query_language-system-drop-mark-cache) -- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache) +- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache) - [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache) - [FLUSH LOGS](#query_language-system-flush_logs) - [RELOAD CONFIG](#query_language-system-reload-config) @@ -21,10 +21,10 @@ toc_title: SYSTEM - [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) - [STOP MERGES](#query_language-system-stop-merges) - [START MERGES](#query_language-system-start-merges) -- [STOP TTL MERGES](#query_language-stop-ttl-merges) -- [START TTL MERGES](#query_language-start-ttl-merges) -- [STOP MOVES](#query_language-stop-moves) -- [START MOVES](#query_language-start-moves) +- [STOP TTL MERGES](#query_language-stop-ttl-merges) +- [START TTL MERGES](#query_language-start-ttl-merges) +- [STOP MOVES](#query_language-stop-moves) +- [START MOVES](#query_language-start-moves) - [STOP FETCHES](#query_language-system-stop-fetches) - [START FETCHES](#query_language-system-start-fetches) - [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends) @@ -33,13 +33,14 @@ toc_title: SYSTEM - [START REPLICATION QUEUES](#query_language-system-start-replication-queues) - [SYNC REPLICA](#query_language-system-sync-replica) - [RESTART REPLICA](#query_language-system-restart-replica) -- [RESTART REPLICAS](#query_language-system-restart-replicas) +- [RESTART REPLICAS](#query_language-system-restart-replicas) -## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries} -Reload all [Internal dictionaries](../dictionaries/internal-dicts.md). -By default, internal dictionaries are disabled. +## RELOAD EMBEDDED DICTIONARIES\] {#query_language-system-reload-emdedded-dictionaries} + +Reload all [Internal dictionaries](../../sql-reference/dictionaries/internal-dicts.md). +By default, internal dictionaries are disabled. Always returns `Ok.` regardless of the result of the internal dictionary update. - + ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} Reloads all dictionaries that have been successfully loaded before. @@ -69,16 +70,17 @@ Resets the mark cache. Used in development of ClickHouse and performance tests. ## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache} Reset the uncompressed data cache. Used in development of ClickHouse and performance tests. -For manage uncompressed data cache parameters use following server level settings [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) and query/user/profile level settings [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) - +For manage uncompressed data cache parameters use following server level settings [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) and query/user/profile level settings [use\_uncompressed\_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) ## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache} + Reset the compiled expression cache. Used in development of ClickHouse and performance tests. Complied expression cache used when query/user/profile enable option [compile](../../operations/settings/settings.md#compile) ## FLUSH LOGS {#query_language-system-flush_logs} Flushes buffers of log messages to system tables (e.g. system.query\_log). Allows you to not wait 7.5 seconds when debugging. +This will also create system tables even if message queue is empty. ## RELOAD CONFIG {#query_language-system-reload-config} @@ -146,34 +148,34 @@ SYSTEM START MERGES [[db.]merge_tree_family_table_name] ### STOP TTL MERGES {#query_language-stop-ttl-merges} Provides possibility to stop background delete old data according to [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) for tables in the MergeTree family: -Return `Ok.` even table doesn't exists or table have not MergeTree engine. Return error when database doesn't exists: +Return `Ok.` even table doesn’t exists or table have not MergeTree engine. Return error when database doesn’t exists: ``` sql SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name] ``` -### START TTL MERGES {#query_language-start-ttl-merges} +### START TTL MERGES {#query_language-start-ttl-merges} Provides possibility to start background delete old data according to [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) for tables in the MergeTree family: -Return `Ok.` even table doesn't exists. Return error when database doesn't exists: +Return `Ok.` even table doesn’t exists. Return error when database doesn’t exists: ``` sql SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name] ``` -### STOP MOVES {#query_language-stop-moves} +### STOP MOVES {#query_language-stop-moves} Provides possibility to stop background move data according to [TTL table expression with TO VOLUME or TO DISK clause](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family: -Return `Ok.` even table doesn't exists. Return error when database doesn't exists: +Return `Ok.` even table doesn’t exists. Return error when database doesn’t exists: ``` sql SYSTEM STOP MOVES [[db.]merge_tree_family_table_name] ``` -### START MOVES {#query_language-start-moves} +### START MOVES {#query_language-start-moves} Provides possibility to start background move data according to [TTL table expression with TO VOLUME and TO DISK clause](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) for tables in the MergeTree family: -Return `Ok.` even table doesn't exists. Return error when database doesn't exists: +Return `Ok.` even table doesn’t exists. Return error when database doesn’t exists: ``` sql SYSTEM STOP MOVES [[db.]merge_tree_family_table_name] @@ -184,22 +186,25 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name] ClickHouse can manage background replication related processes in [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) tables. ### STOP FETCHES {#query_language-system-stop-fetches} + Provides possibility to stop background fetches for inserted parts for tables in the `ReplicatedMergeTree` family: -Always returns `Ok.` regardless of the table engine and even table or database doesn't exists. +Always returns `Ok.` regardless of the table engine and even table or database doesn’t exists. ``` sql SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name] ``` ### START FETCHES {#query_language-system-start-fetches} + Provides possibility to start background fetches for inserted parts for tables in the `ReplicatedMergeTree` family: -Always returns `Ok.` regardless of the table engine and even table or database doesn't exists. +Always returns `Ok.` regardless of the table engine and even table or database doesn’t exists. ``` sql SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name] ``` ### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends} + Provides possibility to stop background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family: ``` sql @@ -207,6 +212,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name] ``` ### START REPLICATED SENDS {#query_language-system-start-replicated-sends} + Provides possibility to start background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family: ``` sql @@ -214,20 +220,23 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name] ``` ### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues} -Provides possibility to stop background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause: + +Provides possibility to stop background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause: ``` sql SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name] ``` ### START REPLICATION QUEUES {#query_language-system-start-replication-queues} -Provides possibility to start background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause: + +Provides possibility to start background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause: ``` sql SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name] ``` ### SYNC REPLICA {#query_language-system-sync-replica} + Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster. Will run until `receive_timeout` if fetches currently disabled for the table. ``` sql @@ -235,7 +244,8 @@ SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name ``` ### RESTART REPLICA {#query_language-system-restart-replica} -Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed + +Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed Initialization replication quene based on ZooKeeper date happens in the same way as `ATTACH TABLE` statement. For a short time the table will be unavailable for any operations. ``` sql @@ -243,7 +253,8 @@ SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name ``` ### RESTART REPLICAS {#query_language-system-restart-replicas} -Provides possibility to reinitialize Zookeeper sessions state for all `ReplicatedMergeTree` tables, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed + +Provides possibility to reinitialize Zookeeper sessions state for all `ReplicatedMergeTree` tables, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed ``` sql SYSTEM RESTART QUEUES [db.]replicated_merge_tree_family_table_name diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index cca01114681..65a4f7b7332 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -40,7 +40,7 @@ Keywords are case-insensitive when they correspond to: - SQL standard. For example, `SELECT`, `select` and `SeLeCt` are all valid. - Implementation in some popular DBMS (MySQL or Postgres). For example, `DateTime` is the same as `datetime`. -You can check whether a data type name is case-sensitive in the [system.data_type_families](../operations/system-tables.md#system_tables-data_type_families) table. +You can check whether a data type name is case-sensitive in the [system.data\_type\_families](../operations/system-tables/data_type_families.md#system_tables-data_type_families) table. In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**. @@ -102,7 +102,7 @@ Depending on the data format (input or output), `NULL` may have a different repr There are many nuances to processing `NULL`. For example, if at least one of the arguments of a comparison operation is `NULL`, the result of this operation is also `NULL`. The same is true for multiplication, addition, and other operations. For more information, read the documentation for each operation. -In queries, you can check `NULL` using the [IS NULL](operators/index.md#operator-is-null) and [IS NOT NULL](operators/index.md) operators and the related functions `isNull` and `isNotNull`. +In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#operator-is-null) and [IS NOT NULL](../sql-reference/operators/index.md) operators and the related functions `isNull` and `isNotNull`. ## Functions {#functions} diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 7ffb09c5bd9..a67d9bbfd90 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -5,7 +5,7 @@ toc_title: file # file {#file} -Creates a table from a file. This table function is similar to [url](url.md) and [hdfs](hdfs.md) ones. +Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones. ``` sql file(path, format, structure) diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 250f6112b51..8cfdb1eedf2 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -5,7 +5,7 @@ toc_title: hdfs # hdfs {#hdfs} -Creates a table from files in HDFS. This table function is similar to [url](url.md) and [file](file.md) ones. +Creates a table from files in HDFS. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [file](../../sql-reference/table-functions/file.md) ones. ``` sql hdfs(URI, format, structure) diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md index 515e7083e5e..630bf6d8c27 100644 --- a/docs/en/sql-reference/table-functions/index.md +++ b/docs/en/sql-reference/table-functions/index.md @@ -10,27 +10,27 @@ Table functions are methods for constructing tables. You can use table functions in: -- [FROM](../statements/select/from.md) clause of the `SELECT` query. +- [FROM](../../sql-reference/statements/select/from.md) clause of the `SELECT` query. The method for creating a temporary table that is available only in the current query. The table is deleted when the query finishes. -- [CREATE TABLE AS \](../statements/create.md#create-table-query) query. +- [CREATE TABLE AS \](../../sql-reference/statements/create.md#create-table-query) query. It's one of the methods of creating a table. !!! warning "Warning" You can’t use table functions if the [allow\_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. -| Function | Description | -|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------| -| [file](file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. | -| [merge](merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. | -| [numbers](numbers.md) | Creates a table with a single column filled with integer numbers. | -| [remote](remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. | -| [url](url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. | -| [mysql](mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. | -| [jdbc](jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. | -| [odbc](odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. | -| [hdfs](hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. | +| Function | Description | +|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------| +| [file](../../sql-reference/table-functions/file.md) | Creates a [File](../../engines/table-engines/special/file.md)-engine table. | +| [merge](../../sql-reference/table-functions/merge.md) | Creates a [Merge](../../engines/table-engines/special/merge.md)-engine table. | +| [numbers](../../sql-reference/table-functions/numbers.md) | Creates a table with a single column filled with integer numbers. | +| [remote](../../sql-reference/table-functions/remote.md) | Allows you to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md)-engine table. | +| [url](../../sql-reference/table-functions/url.md) | Creates a [Url](../../engines/table-engines/special/url.md)-engine table. | +| [mysql](../../sql-reference/table-functions/mysql.md) | Creates a [MySQL](../../engines/table-engines/integrations/mysql.md)-engine table. | +| [jdbc](../../sql-reference/table-functions/jdbc.md) | Creates a [JDBC](../../engines/table-engines/integrations/jdbc.md)-engine table. | +| [odbc](../../sql-reference/table-functions/odbc.md) | Creates a [ODBC](../../engines/table-engines/integrations/odbc.md)-engine table. | +| [hdfs](../../sql-reference/table-functions/hdfs.md) | Creates a [HDFS](../../engines/table-engines/integrations/hdfs.md)-engine table. | [Original article](https://clickhouse.tech/docs/en/query_language/table_functions/) diff --git a/docs/en/whats-new/changelog/2017.md b/docs/en/whats-new/changelog/2017.md index d819324b07a..3b48e23233f 100644 --- a/docs/en/whats-new/changelog/2017.md +++ b/docs/en/whats-new/changelog/2017.md @@ -24,7 +24,7 @@ This release contains bug fixes for the previous release 1.1.54310: #### New Features: {#new-features} - Custom partitioning key for the MergeTree family of table engines. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) table engine. +- [Kafka](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) table engine. - Added support for loading [CatBoost](https://catboost.yandex/) models and applying them to data stored in ClickHouse. - Added support for time zones with non-integer offsets from UTC. - Added support for arithmetic operations with time intervals. diff --git a/docs/en/whats-new/changelog/2018.md b/docs/en/whats-new/changelog/2018.md index 38e1932707f..5ddd9a10f0f 100644 --- a/docs/en/whats-new/changelog/2018.md +++ b/docs/en/whats-new/changelog/2018.md @@ -1058,4 +1058,4 @@ This release contains bug fixes for the previous release 1.1.54337: - When doing a rolling update on a cluster, at the point when some of the replicas are running the old version of ClickHouse and some are running the new version, replication is temporarily stopped and the message `unknown parameter 'shard'` appears in the log. Replication will continue after all replicas of the cluster are updated. - If different versions of ClickHouse are running on the cluster servers, it is possible that distributed queries using the following functions will have incorrect results: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. You should update all cluster nodes. -## [Changelog for 2017](./2017.md#clickhouse-release-1-1-54327-2017-12-21) {#changelog-for-2017} +## [Changelog for 2017](../../whats-new/changelog/2017.md#clickhouse-release-1-1-54327-2017-12-21) {#changelog-for-2017} diff --git a/docs/en/whats-new/changelog/2019.md b/docs/en/whats-new/changelog/2019.md index d5a984905b7..ee4e66bc2f3 100644 --- a/docs/en/whats-new/changelog/2019.md +++ b/docs/en/whats-new/changelog/2019.md @@ -2069,4 +2069,4 @@ This release contains exactly the same set of patches as 19.3.6. - Fixed misspells in comments and string literals under `dbms`. [\#4122](https://github.com/ClickHouse/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) - Fixed typos in comments. [\#4089](https://github.com/ClickHouse/ClickHouse/pull/4089) ([Evgenii Pravda](https://github.com/kvinty)) -## [Changelog for 2018](./2018.md#clickhouse-release-18-16) {#changelog-for-2018} +## [Changelog for 2018](../../whats-new/changelog/2018.md#clickhouse-release-18-16) {#changelog-for-2018} diff --git a/docs/es/interfaces/third-party/integrations.md b/docs/es/interfaces/third-party/integrations.md index 716e774871b..e752725d727 100644 --- a/docs/es/interfaces/third-party/integrations.md +++ b/docs/es/interfaces/third-party/integrations.md @@ -12,6 +12,7 @@ toc_title: Integrations - Relational database management systems - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) @@ -97,5 +98,11 @@ toc_title: Integrations - Elixir - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on Rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [Original article](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) diff --git a/docs/es/introduction/adopters.md b/docs/es/introduction/adopters.md index e41e8005cc7..4c0aa78d57b 100644 --- a/docs/es/introduction/adopters.md +++ b/docs/es/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: Adoptante | Exness | Comercio | Métricas, Registro | — | — | [Charla en ruso, mayo 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | Sistema abierto. | Red Ad | Producto principal | — | — | [Publicación de blog en japonés, julio 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | Video Streaming | Analítica | — | — | [Diapositivas en chino, octubre 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| Idealista | Inmobiliario | Analítica | — | — | [Blog Post en Inglés, Abril 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Idealista | Inmobiliario | Analítica | — | — | [Blog Post en Inglés, Abril 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | Infovista | Red | Analítica | — | — | [Diapositivas en español, octubre 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | Juego | Métricas, Registro | — | — | [Diapositivas en ruso, septiembre 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | Integros | Plataforma para servicios de video | Analítica | — | — | [Diapositivas en ruso, mayo 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/es/sql-reference/data-types/domains/ipv4.md b/docs/es/sql-reference/data-types/domains/ipv4.md index c97229610d3..6e271f10fd2 100644 --- a/docs/es/sql-reference/data-types/domains/ipv4.md +++ b/docs/es/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` domain admite formato de entrada personalizado como cadenas IPv4: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/es/sql-reference/data-types/domains/ipv6.md b/docs/es/sql-reference/data-types/domains/ipv6.md index bee82ff2898..2f45a353053 100644 --- a/docs/es/sql-reference/data-types/domains/ipv6.md +++ b/docs/es/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` domain admite entradas personalizadas como cadenas IPv6: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/es/sql-reference/functions/array-functions.md b/docs/es/sql-reference/functions/array-functions.md index 3a0ad14b24e..677996efabd 100644 --- a/docs/es/sql-reference/functions/array-functions.md +++ b/docs/es/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **Parámetros** -- `array` – [Matriz](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Matriz](https://clickhouse.tech/docs/en/data_types/array/). **Valores devueltos** Devuelve una matriz de diferencias entre los elementos adyacentes. -Tipo: [UInt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [En\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [Flotante\*](https://clickhouse.yandex/docs/en/data_types/float/). +Tipo: [UInt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [En\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [Flotante\*](https://clickhouse.tech/docs/en/data_types/float/). **Ejemplo** @@ -754,7 +754,7 @@ arrayDistinct(array) **Parámetros** -- `array` – [Matriz](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Matriz](https://clickhouse.tech/docs/en/data_types/array/). **Valores devueltos** diff --git a/docs/es/whats-new/changelog/2017.md b/docs/es/whats-new/changelog/2017.md index 97b2cafd198..33e48b0409f 100644 --- a/docs/es/whats-new/changelog/2017.md +++ b/docs/es/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ Esta versión contiene correcciones de errores para la versión anterior 1.1.543 #### Novedad: {#new-features} - Clave de partición personalizada para la familia MergeTree de motores de tabla. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) motor de mesa. +- [Kafka](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) motor de mesa. - Se agregó soporte para cargar [CatBoost](https://catboost.yandex/) modelos y aplicarlos a los datos almacenados en ClickHouse. - Se agregó soporte para zonas horarias con desplazamientos no enteros de UTC. - Se agregó soporte para operaciones aritméticas con intervalos de tiempo. diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index 657432c7958..d198909095d 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -14,6 +14,7 @@ toc_title: "\u06CC\u06A9\u067E\u0627\u0631\u0686\u06AF\u06CC" - سیستم های مدیریت پایگاه داده رابطه ای - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [در حال بارگذاری](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [تاتر-خروجی زیر-داده خوان](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) @@ -99,5 +100,11 @@ toc_title: "\u06CC\u06A9\u067E\u0627\u0631\u0686\u06AF\u06CC" - اکسیر - [Ecto](https://github.com/elixir-ecto/ecto) - [حذف جستجو](https://github.com/appodeal/clickhouse_ecto) - +- Ruby + - [Ruby on Rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) + [مقاله اصلی](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) diff --git a/docs/fa/introduction/adopters.md b/docs/fa/introduction/adopters.md index a4ad16faf6c..654f3a24736 100644 --- a/docs/fa/introduction/adopters.md +++ b/docs/fa/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: "\u067E\u0630\u06CC\u0631\u0627" | اعمال | بازرگانی | معیارهای ورود به سیستم | — | — | [بحث در روسیه, بیشتر 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | ژنی | شبکه تبلیغاتی | محصول اصلی | — | — | [پست وبلاگ در ژاپن, جولای 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | جریان ویدیو | تجزیه و تحلیل | — | — | [اسلاید در چین, اکتبر 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| Idealista | املاک و مستغلات | تجزیه و تحلیل | — | — | [پست وبلاگ به زبان انگلیسی, مارس 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Idealista | املاک و مستغلات | تجزیه و تحلیل | — | — | [پست وبلاگ به زبان انگلیسی, مارس 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | اینفویستا | شبکه ها | تجزیه و تحلیل | — | — | [اسلاید به زبان انگلیسی, اکتبر 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | نام | بازی ها | معیارهای ورود به سیستم | — | — | [اسلاید در روسیه, سپتامبر 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | پوششی | بستر های نرم افزاری برای خدمات تصویری | تجزیه و تحلیل | — | — | [اسلاید در روسیه, بیشتر 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/fa/sql-reference/data-types/domains/ipv4.md b/docs/fa/sql-reference/data-types/domains/ipv4.md index 645e839f6d8..a010409d58b 100644 --- a/docs/fa/sql-reference/data-types/domains/ipv4.md +++ b/docs/fa/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` دامنه پشتیبانی از فرمت ورودی سفارشی به عنوان ایپو4 رشته: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/fa/sql-reference/data-types/domains/ipv6.md b/docs/fa/sql-reference/data-types/domains/ipv6.md index 6677916c49b..64a9487cb07 100644 --- a/docs/fa/sql-reference/data-types/domains/ipv6.md +++ b/docs/fa/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` دامنه پشتیبانی از ورودی های سفارشی به عنوان ایپو6 رشته: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/fa/sql-reference/functions/array-functions.md b/docs/fa/sql-reference/functions/array-functions.md index 1988ed4266e..6f4e8326557 100644 --- a/docs/fa/sql-reference/functions/array-functions.md +++ b/docs/fa/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **پارامترها** -- `array` – [& حذف](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [& حذف](https://clickhouse.tech/docs/en/data_types/array/). **مقادیر بازگشتی** بازگرداندن مجموعه ای از تفاوت بین عناصر مجاور. -نوع: [اینترنت\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [شناور\*](https://clickhouse.yandex/docs/en/data_types/float/). +نوع: [اینترنت\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [شناور\*](https://clickhouse.tech/docs/en/data_types/float/). **مثال** @@ -754,7 +754,7 @@ arrayDistinct(array) **پارامترها** -- `array` – [& حذف](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [& حذف](https://clickhouse.tech/docs/en/data_types/array/). **مقادیر بازگشتی** diff --git a/docs/fa/whats-new/changelog/2017.md b/docs/fa/whats-new/changelog/2017.md index 939ed966c22..ea4946cf185 100644 --- a/docs/fa/whats-new/changelog/2017.md +++ b/docs/fa/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ toc_title: '2017' #### ویژگی های جدید: {#new-features} - کلید پارتیشن بندی سفارشی برای خانواده ادغام موتورهای جدول. -- [کافکا](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) موتور جدول. +- [کافکا](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) موتور جدول. - اضافه شدن پشتیبانی برای بارگذاری [مانتو](https://catboost.yandex/) مدل ها و استفاده از داده های ذخیره شده در کلیک. - اضافه شدن پشتیبانی برای مناطق زمانی با شیپور خاموشی غیر عدد صحیح از مجموعه مقالات. - اضافه شدن پشتیبانی برای عملیات ریاضی با فواصل زمانی. diff --git a/docs/fr/interfaces/third-party/integrations.md b/docs/fr/interfaces/third-party/integrations.md index f252fd6229b..889408909b5 100644 --- a/docs/fr/interfaces/third-party/integrations.md +++ b/docs/fr/interfaces/third-party/integrations.md @@ -14,6 +14,7 @@ toc_title: "Int\xE9gration" - Systèmes de gestion de bases de données relationnelles - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-lecteur de données](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-réplicateur](https://github.com/larsnovikov/horgh-replicator) @@ -99,5 +100,11 @@ toc_title: "Int\xE9gration" - Elixir - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on Rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [Article Original](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) diff --git a/docs/fr/introduction/adopters.md b/docs/fr/introduction/adopters.md index 833fc111fbe..e970c61955c 100644 --- a/docs/fr/introduction/adopters.md +++ b/docs/fr/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: Adoptant | Exness | Trading | Métriques, Journalisation | — | — | [Parler en russe, mai 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | Geniee | Réseau publicitaire | Produit principal | — | — | [Billet de Blog en japonais, juillet 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | Le Streaming Vidéo | Analytics | — | — | [Diapositives en chinois, octobre 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| Idealista | Immobilier | Analytics | — | — | [Billet de Blog en anglais, avril 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Idealista | Immobilier | Analytics | — | — | [Billet de Blog en anglais, avril 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | Infovista | Réseau | Analytics | — | — | [Diapositives en anglais, octobre 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | Jeu | Métriques, Journalisation | — | — | [Diapositives en russe, septembre 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | Integros | Plate-forme pour les services vidéo | Analytics | — | — | [Diapositives en russe, mai 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/fr/sql-reference/data-types/domains/ipv4.md b/docs/fr/sql-reference/data-types/domains/ipv4.md index 7cf36c0aaef..12895992e77 100644 --- a/docs/fr/sql-reference/data-types/domains/ipv4.md +++ b/docs/fr/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` le domaine prend en charge le format d'entrée personnalisé en tant que chaînes IPv4: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/fr/sql-reference/data-types/domains/ipv6.md b/docs/fr/sql-reference/data-types/domains/ipv6.md index 1d0f3cd47fd..77510a950cb 100644 --- a/docs/fr/sql-reference/data-types/domains/ipv6.md +++ b/docs/fr/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` le domaine prend en charge l'entrée personnalisée en tant que chaînes IPv6: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/fr/sql-reference/functions/array-functions.md b/docs/fr/sql-reference/functions/array-functions.md index 5590774732d..ef09800614f 100644 --- a/docs/fr/sql-reference/functions/array-functions.md +++ b/docs/fr/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **Paramètre** -- `array` – [Tableau](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Tableau](https://clickhouse.tech/docs/en/data_types/array/). **Valeurs renvoyées** Renvoie un tableau de différences entre les éléments adjacents. -Type: [UInt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [Flottant\*](https://clickhouse.yandex/docs/en/data_types/float/). +Type: [UInt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [Flottant\*](https://clickhouse.tech/docs/en/data_types/float/). **Exemple** @@ -754,7 +754,7 @@ arrayDistinct(array) **Paramètre** -- `array` – [Tableau](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Tableau](https://clickhouse.tech/docs/en/data_types/array/). **Valeurs renvoyées** diff --git a/docs/fr/whats-new/changelog/2017.md b/docs/fr/whats-new/changelog/2017.md index be2cb7de9f4..c812f345fdd 100644 --- a/docs/fr/whats-new/changelog/2017.md +++ b/docs/fr/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ Cette version contient des corrections de bugs pour la version précédente 1.1. #### Nouveauté: {#new-features} - Clé de partitionnement personnalisée pour la famille MergeTree des moteurs de table. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) tableau moteur. +- [Kafka](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) tableau moteur. - Ajout du support pour le chargement [CatBoost](https://catboost.yandex/) modèles et les appliquer aux données stockées dans ClickHouse. - Ajout du support pour les fuseaux horaires avec des décalages non entiers de UTC. - Ajout du support pour les opérations arithmétiques avec des intervalles de temps. diff --git a/docs/ja/interfaces/third-party/integrations.md b/docs/ja/interfaces/third-party/integrations.md index 3e38d578093..f88adfd4f4f 100644 --- a/docs/ja/interfaces/third-party/integrations.md +++ b/docs/ja/interfaces/third-party/integrations.md @@ -14,6 +14,7 @@ toc_title: "\u7D71\u5408" - リレーショナルデータベース管理システム - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-データリーダー](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-レプリケーター](https://github.com/larsnovikov/horgh-replicator) @@ -99,5 +100,11 @@ toc_title: "\u7D71\u5408" - エリクサー - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [元の記事](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) diff --git a/docs/ja/introduction/adopters.md b/docs/ja/introduction/adopters.md index 084b5034a62..a1a89f6795f 100644 --- a/docs/ja/introduction/adopters.md +++ b/docs/ja/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: "\u30A2\u30C0\u30D7\u30BF\u30FC" | Exness | 取引 | 指標、ロギング | — | — | [ロシア語で話す,May2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | 魔神 | 広告ネットワーク | 主な製品 | — | — | [ブログ投稿日本語,July2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | ビデオストリーミング | 分析 | — | — | [中国語でのスライド,October2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| イデアリスタ | 不動産 | 分析 | — | — | [ブログ投稿英語,April2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| イデアリスタ | 不動産 | 分析 | — | — | [ブログ投稿英語,April2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | インフォビスタ | ネット | 分析 | — | — | [2019年のスライド](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | ゲーム | 指標、ロギング | — | — | [2019年ロシア](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | インテグロス | Platformビデオサービス | 分析 | — | — | [ロシア語でのスライド,月2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/ja/introduction/distinctive-features.md b/docs/ja/introduction/distinctive-features.md index 5cf44ee0002..88dc91e0a3b 100644 --- a/docs/ja/introduction/distinctive-features.md +++ b/docs/ja/introduction/distinctive-features.md @@ -69,4 +69,4 @@ ClickHouseには、精度を犠牲にしてパフォーマンスを得るため 2. 既に挿入されたデータの変更または削除を、高頻度かつ低遅延に行う機能はありません。 [GDPR](https://gdpr-info.eu)に準拠するなど、データをクリーンアップまたは変更するために、バッチ削除およびバッチ更新が利用可能です。 3. インデックスが疎であるため、ClickHouseは、キーで単一行を取得するようなクエリにはあまり適していません。 -[Original article](https://clickhouse.yandex/docs/en/introduction/distinctive_features/) +[Original article](https://clickhouse.tech/docs/en/introduction/distinctive_features/) diff --git a/docs/ja/introduction/history.md b/docs/ja/introduction/history.md index af5dc40145d..162ed3ba415 100644 --- a/docs/ja/introduction/history.md +++ b/docs/ja/introduction/history.md @@ -48,4 +48,4 @@ Yandex.Metricaには、Metrageと呼ばれるデータを集計するための OLAPServerの制限を取り除き、レポートのための非集計データを扱う問題を解決するために、私達は ClickHouse DBMSを開発しました。 -[Original article](https://clickhouse.yandex/docs/en/introduction/history/) +[Original article](https://clickhouse.tech/docs/en/introduction/history/) diff --git a/docs/ja/introduction/performance.md b/docs/ja/introduction/performance.md index d6404853ccd..7750a10c0ec 100644 --- a/docs/ja/introduction/performance.md +++ b/docs/ja/introduction/performance.md @@ -5,9 +5,9 @@ toc_title: "\u30D1\u30D5\u30A9\u30FC\u30DE\u30F3\u30B9" # パフォーマンス {#pahuomansu} -Yandexの内部テスト結果によると、ClickHouseは、テスト可能なクラスのシステム間で同等の動作シナリオで最高のパフォーマンス(長時間のクエリで最も高いスループットと、短時間のクエリで最小のレイテンシの両方)を示します。 [別のページで](https://clickhouse.yandex/benchmark/dbms/)テスト結果を表示できます 。 +Yandexの内部テスト結果によると、ClickHouseは、テスト可能なクラスのシステム間で同等の動作シナリオで最高のパフォーマンス(長時間のクエリで最も高いスループットと、短時間のクエリで最小のレイテンシの両方)を示します。 [別のページで](https://clickhouse.tech/benchmark/dbms/)テスト結果を表示できます 。 -これは、多数の独立したベンチマークでも確認されています。インターネット検索で見つけることは難しくありませんし、 [私達がまとめた関連リンク集](https://clickhouse.yandex/#independent-benchmarks) から見つけることもできます。 +これは、多数の独立したベンチマークでも確認されています。インターネット検索で見つけることは難しくありませんし、 [私達がまとめた関連リンク集](https://clickhouse.tech/#independent-benchmarks) から見つけることもできます。 ## 単一の巨大なクエリのスループット {#dan-yi-noju-da-nakuerinosurupututo} @@ -27,4 +27,4 @@ Yandexの内部テスト結果によると、ClickHouseは、テスト可能な 少なくとも1000行のパケットにデータを挿入することをお勧めします。または、1秒あたり1回のリクエストを超えないでください。タブ区切りのダンプデータをMergeTreeテーブルに挿入する場合、挿入速度は50〜200MB/sになります。挿入された行のサイズが約1Kbの場合、速度は毎秒50,000〜200,000行になります。行が小さい場合、パフォーマンスは1秒あたりの行数で高くなります(Banner System データ- `>` 500,000行/秒、Graphite データ- `>` 1,000,000行/秒)。パフォーマンスを向上させるために、複数のINSERTクエリを並行して作成することで、パフォーマンスを線形に向上できます。 -[Original article](https://clickhouse.yandex/docs/ja/introduction/performance/) +[Original article](https://clickhouse.tech/docs/ja/introduction/performance/) diff --git a/docs/ja/sql-reference/data-types/domains/ipv4.md b/docs/ja/sql-reference/data-types/domains/ipv4.md index e355ae4f70f..c329028ad40 100644 --- a/docs/ja/sql-reference/data-types/domains/ipv4.md +++ b/docs/ja/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` ドメインはIPv4文字列としてカスタム入力形式をサポート: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/ja/sql-reference/data-types/domains/ipv6.md b/docs/ja/sql-reference/data-types/domains/ipv6.md index 73227e7a2b7..26583429ec8 100644 --- a/docs/ja/sql-reference/data-types/domains/ipv6.md +++ b/docs/ja/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` ドメイ: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/ja/sql-reference/data-types/simpleaggregatefunction.md b/docs/ja/sql-reference/data-types/simpleaggregatefunction.md deleted file mode 120000 index 76a7ef3b802..00000000000 --- a/docs/ja/sql-reference/data-types/simpleaggregatefunction.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/data-types/simpleaggregatefunction.md \ No newline at end of file diff --git a/docs/ja/sql-reference/data-types/simpleaggregatefunction.md b/docs/ja/sql-reference/data-types/simpleaggregatefunction.md new file mode 100644 index 00000000000..9d28bcdb62c --- /dev/null +++ b/docs/ja/sql-reference/data-types/simpleaggregatefunction.md @@ -0,0 +1,40 @@ +--- +machine_translated: true +machine_translated_rev: 71d72c1f237f4a553fe91ba6c6c633e81a49e35b +--- + +# SimpleAggregateFunction {#data-type-simpleaggregatefunction} + +`SimpleAggregateFunction(name, types_of_arguments…)` データ型は、集計関数の現在の値を格納し、その完全な状態を次のように格納しません [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) そうだ この最適化は、次のプロパティが保持される関数に適用できます。 `f` 行セットに `S1 UNION ALL S2` 取得できるよ `f` 行の一部に別々に設定し、再び適用します `f` 結果に: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. このプロパティは、部分集計の結果が結合された結果を計算するのに十分であることを保証するため、余分なデータを格納して処理する必要はあり + +次の集計関数がサポートされます: + +- [`any`](../../sql-reference/aggregate-functions/reference.md#agg_function-any) +- [`anyLast`](../../sql-reference/aggregate-functions/reference.md#anylastx) +- [`min`](../../sql-reference/aggregate-functions/reference.md#agg_function-min) +- [`max`](../../sql-reference/aggregate-functions/reference.md#agg_function-max) +- [`sum`](../../sql-reference/aggregate-functions/reference.md#agg_function-sum) +- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference.md#groupbitand) +- [`groupBitOr`](../../sql-reference/aggregate-functions/reference.md#groupbitor) +- [`groupBitXor`](../../sql-reference/aggregate-functions/reference.md#groupbitxor) +- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference.md#agg_function-grouparray) +- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference.md#groupuniqarrayx-groupuniqarraymax-sizex) + +の値 `SimpleAggregateFunction(func, Type)` 見て、同じように格納 `Type` したがって、次の関数を適用する必要はありません `-Merge`/`-State` 接尾辞。 `SimpleAggregateFunction` は以上のパフォーマンス `AggregateFunction` 同じ集計機能を使って。 + +**パラメータ** + +- 集計関数の名前。 +- 集計関数の引数の型。 + +**例** + +``` sql +CREATE TABLE t +( + column1 SimpleAggregateFunction(sum, UInt64), + column2 SimpleAggregateFunction(any, String) +) ENGINE = ... +``` + +[元の記事](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) diff --git a/docs/ja/sql-reference/functions/array-functions.md b/docs/ja/sql-reference/functions/array-functions.md index 5a70770a54b..bd30262cc1e 100644 --- a/docs/ja/sql-reference/functions/array-functions.md +++ b/docs/ja/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **パラメータ** -- `array` – [配列](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [配列](https://clickhouse.tech/docs/en/data_types/array/). **戻り値** 隣接する要素間の差分の配列を返します。 -タイプ: [UInt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [フロート\*](https://clickhouse.yandex/docs/en/data_types/float/). +タイプ: [UInt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [フロート\*](https://clickhouse.tech/docs/en/data_types/float/). **例** @@ -754,7 +754,7 @@ arrayDistinct(array) **パラメータ** -- `array` – [配列](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [配列](https://clickhouse.tech/docs/en/data_types/array/). **戻り値** diff --git a/docs/ja/whats-new/changelog/2017.md b/docs/ja/whats-new/changelog/2017.md index ada7b74e431..9561062f31d 100644 --- a/docs/ja/whats-new/changelog/2017.md +++ b/docs/ja/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ toc_title: '2017' #### 新しい機能: {#new-features} - カスタムパーティショニングキーのMergeTree家族のテーブルエンジンです。 -- [カフカ](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) テーブルエンジン。 +- [カフカ](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) テーブルエンジン。 - ロードのサポートを追加 [CatBoost](https://catboost.yandex/) モデルとClickHouseに格納されたデータにそれらを適用します。 - サポートが追加された時間帯と非整数オフセットからのUTCです。 - 時間間隔での算術演算のサポートが追加されました。 diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index afe38132cc9..afd4d083365 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -244,7 +244,7 @@ X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_ro - `read_bytes` — объём прочитанных данных в байтах. - `total_rows_to_read` — общее количество строк для чтения. - `written_rows` — количество записанных строк. -- `written_bytes` — объём прочитанных данных в байтах. +- `written_bytes` — объём записанных данных в байтах. Запущенные запросы не останавливаются автоматически при разрыве HTTP соединения. Парсинг и форматирование данных производится на стороне сервера и использование сети может быть неэффективным. Может быть передан необязательный параметр query\_id - идентификатор запроса, произвольная строка. Подробнее смотрите раздел «Настройки, replace\_running\_query». @@ -276,7 +276,335 @@ $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000& ### Пример {#primer} ``` bash -$ curl -sS "
?param_id=2¶m_phrase=test" -d "SELECT * FROM table WHERE int_column = {id:UInt8} and string_column = {phrase:String}" +$ curl -sS "http://localhost:8123/?param_id=2¶m_phrase=test" -d "SELECT * FROM table WHERE int_column = {id:UInt8} and string_column = {phrase:String}" +``` + +## Предопределенный HTTP интерфейс {#predefined_http_interface} + +ClickHouse поддерживает определенные запросы через HTTP-интерфейс. Например, вы можете записать данные в таблицу следующим образом: + +``` bash +$ echo '(4),(5),(6)' | curl 'http://localhost:8123/?query=INSERT%20INTO%20t%20VALUES' --data-binary @- +``` + +ClickHouse также поддерживает предопределенный HTTP-интерфейс, который может помочь вам легче интегрироваться со сторонними инструментами, такими как [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter). + +Пример: + +- Прежде всего, добавьте раздел в конфигурационный файл сервера: + + + +``` xml + + + /predefined_query + POST,GET + + predefined_query_handler + SELECT * FROM system.metrics LIMIT 5 FORMAT Template SETTINGS format_template_resultset = 'prometheus_template_output_format_resultset', format_template_row = 'prometheus_template_output_format_row', format_template_rows_between_delimiter = '\n' + + + ... + ... + +``` + +- Теперь вы можете напрямую запросить URL-адрес для получения данных в формате Prometheus: + + + +``` bash +$ curl -v 'http://localhost:8123/predefined_query' +* Trying ::1... +* Connected to localhost (::1) port 8123 (#0) +> GET /predefined_query HTTP/1.1 +> Host: localhost:8123 +> User-Agent: curl/7.47.0 +> Accept: */* +> +< HTTP/1.1 200 OK +< Date: Tue, 28 Apr 2020 08:52:56 GMT +< Connection: Keep-Alive +< Content-Type: text/plain; charset=UTF-8 +< X-ClickHouse-Server-Display-Name: i-mloy5trc +< Transfer-Encoding: chunked +< X-ClickHouse-Query-Id: 96fe0052-01e6-43ce-b12a-6b7370de6e8a +< X-ClickHouse-Format: Template +< X-ClickHouse-Timezone: Asia/Shanghai +< Keep-Alive: timeout=3 +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< +# HELP "Query" "Number of executing queries" +# TYPE "Query" counter +"Query" 1 + +# HELP "Merge" "Number of executing background merges" +# TYPE "Merge" counter +"Merge" 0 + +# HELP "PartMutation" "Number of mutations (ALTER DELETE/UPDATE)" +# TYPE "PartMutation" counter +"PartMutation" 0 + +# HELP "ReplicatedFetch" "Number of data parts being fetched from replica" +# TYPE "ReplicatedFetch" counter +"ReplicatedFetch" 0 + +# HELP "ReplicatedSend" "Number of data parts being sent to replicas" +# TYPE "ReplicatedSend" counter +"ReplicatedSend" 0 + +* Connection #0 to host localhost left intact + +* Connection #0 to host localhost left intact +``` + +Как вы можете видеть из примера, `http_handlers` настраивается в файле config.xml и может содержать несколько правил. ClickHouse будет сопоставлять полученные HTTP-запросы с предопределенным типом в правиле, и первое совпадение запустит обработчик. Затем ClickHouse выполнит соответствующий предопределенный запрос. + +В настоящий момент с помощью `rule` можно настроить `method`, `headers`, `url`, `handler`: + - `method` отвечает за соответствие метода HTTP-запроса. `method` соответствует методу [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) протокола HTTP. Это необязательная настройка. Если она не определена в файле конфигурации, она не соответствует методу HTTP-запроса. + + - `url` отвечает за соответствие URL HTTP-запроса. Она совместима с регулярными выражениями [RE2](https://github.com/google/re2). Это необязательная настройка. Если она не определена в файле конфигурации, она не соответствует URL-адресу HTTP-запроса. + + - `headers` отвечают за соответствие заголовка HTTP-запроса. Она совместим с регулярными выражениями RE2. Это необязательная настройка. Если она не определен в файле конфигурации, она не соответствует заголовку HTTP-запроса. + + - `handler` содержит основную часть обработчика. Сейчас `handler` может настраивать `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`. + `type` на данный момент поддерживает три типа: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static). + + - `query` — используется с типом `predefined_query_handler`, выполняет запрос при вызове обработчика. + + - `query_param_name` — используется с типом `dynamic_query_handler`, извлекает и выполняет значение, соответствующее значению `query_param_name` в параметрах HTTP-запроса. + + - `status` — используется с типом `static`, возвращает код состояния ответа. + + - `content_type` — используется с типом `static`, возвращает [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). + + - `response_content` — используется с типом`static`, содержимое ответа, отправленное клиенту, при использовании префикса ‘file://’ or ‘config://’, находит содержимое из файла или конфигурации, отправленного клиенту. + +Далее приведены методы настройки для различных типов. + +### predefined_query_handler {#predefined_query_handler} + +`predefined_query_handler` поддерживает настройки `Settings` и `query_params` значений. Вы можете настроить запрос в типе `predefined_query_handler`. + +Значение `query` — это предопределенный запрос `predefined_query_handler`, который выполняется ClickHouse при совпадении HTTP-запроса и возврате результата запроса. Это обязательная настройка. + +В следующем примере определяются настройки [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_alter_threads`, а затем запрашивается системная таблица, чтобы проверить, были ли эти параметры успешно установлены. + +Пример: + +``` xml + + + [^/]+)(/(?P[^/]+))?]]> + GET + + TEST_HEADER_VALUE + [^/]+)(/(?P[^/]+))?]]> + + + predefined_query_handler + SELECT value FROM system.settings WHERE name = {name_1:String} + SELECT name, value FROM system.settings WHERE name = {name_2:String} + + + +``` + +``` bash +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2' +1 +max_alter_threads 2 +``` + +!!! note "Предупреждение" + В одном `predefined_query_handler` поддерживается только один запрос типа `INSERT`. + +### dynamic_query_handler {#dynamic_query_handler} + +В `dynamic_query_handler`, запрос пишется в виде параметров HTTP-запроса. Разница в том, что в `predefined_query_handler`, запрос записывается в конфигурационный файл. Вы можете настроить `query_param_name` в `dynamic_query_handler`. + +ClickHouse извлекает и выполняет значение, соответствующее значению `query_param_name` URL-адресе HTTP-запроса. Значение по умолчанию `query_param_name` — это `/query` . Это необязательная настройка. Если в файле конфигурации нет определения, параметр не передается. + +Чтобы поэкспериментировать с этой функциональностью, в примере определяются значения [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_alter_threads` и запрашивается, успешно ли были установлены настройки. + +Пример: + +``` xml + + + + TEST_HEADER_VALUE_DYNAMIC + + dynamic_query_handler + query_param + + + +``` + +``` bash +$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2¶m_name_1=max_threads¶m_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D' +max_threads 1 +max_alter_threads 2 +``` + +### static {#static} + +`static` может возвращать [content_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) и `response_content`. `response_content` может возвращать конкретное содержимое. + +Пример: + +Возвращает сообщение. + +``` xml + + + GET + xxx + /hi + + static + 402 + text/html; charset=UTF-8 + Say Hi! + + + +``` + +``` bash +$ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' +* Trying ::1... +* Connected to localhost (::1) port 8123 (#0) +> GET /hi HTTP/1.1 +> Host: localhost:8123 +> User-Agent: curl/7.47.0 +> Accept: */* +> XXX:xxx +> +< HTTP/1.1 402 Payment Required +< Date: Wed, 29 Apr 2020 03:51:26 GMT +< Connection: Keep-Alive +< Content-Type: text/html; charset=UTF-8 +< Transfer-Encoding: chunked +< Keep-Alive: timeout=3 +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< +* Connection #0 to host localhost left intact +Say Hi!% +``` + +Находит содержимое настроек отправленных клиенту. + +``` xml +
]]>
+ + + + GET + xxx + /get_config_static_handler + + static + config://get_config_static_handler + + + +``` + +``` bash +$ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' +* Trying ::1... +* Connected to localhost (::1) port 8123 (#0) +> GET /get_config_static_handler HTTP/1.1 +> Host: localhost:8123 +> User-Agent: curl/7.47.0 +> Accept: */* +> XXX:xxx +> +< HTTP/1.1 200 OK +< Date: Wed, 29 Apr 2020 04:01:24 GMT +< Connection: Keep-Alive +< Content-Type: text/plain; charset=UTF-8 +< Transfer-Encoding: chunked +< Keep-Alive: timeout=3 +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< +* Connection #0 to host localhost left intact +
% +``` + +Находит содержимое файла, отправленного клиенту. + +``` xml + + + GET + xxx + /get_absolute_path_static_handler + + static + text/html; charset=UTF-8 + file:///absolute_path_file.html + + + + GET + xxx + /get_relative_path_static_handler + + static + text/html; charset=UTF-8 + file://./relative_path_file.html + + + +``` + +``` bash +$ user_files_path='/var/lib/clickhouse/user_files' +$ sudo echo "Relative Path File" > $user_files_path/relative_path_file.html +$ sudo echo "Absolute Path File" > $user_files_path/absolute_path_file.html +$ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' +* Trying ::1... +* Connected to localhost (::1) port 8123 (#0) +> GET /get_absolute_path_static_handler HTTP/1.1 +> Host: localhost:8123 +> User-Agent: curl/7.47.0 +> Accept: */* +> XXX:xxx +> +< HTTP/1.1 200 OK +< Date: Wed, 29 Apr 2020 04:18:16 GMT +< Connection: Keep-Alive +< Content-Type: text/html; charset=UTF-8 +< Transfer-Encoding: chunked +< Keep-Alive: timeout=3 +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< +Absolute Path File +* Connection #0 to host localhost left intact +$ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' +* Trying ::1... +* Connected to localhost (::1) port 8123 (#0) +> GET /get_relative_path_static_handler HTTP/1.1 +> Host: localhost:8123 +> User-Agent: curl/7.47.0 +> Accept: */* +> XXX:xxx +> +< HTTP/1.1 200 OK +< Date: Wed, 29 Apr 2020 04:18:31 GMT +< Connection: Keep-Alive +< Content-Type: text/html; charset=UTF-8 +< Transfer-Encoding: chunked +< Keep-Alive: timeout=3 +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< +Relative Path File +* Connection #0 to host localhost left intact ``` [Оригинальная статья](https://clickhouse.tech/docs/ru/interfaces/http_interface/) diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 39449b54df8..106cd6c859b 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -7,6 +7,7 @@ - Реляционные системы управления базами данных - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) @@ -92,5 +93,11 @@ - Elixir - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) - +- Ruby + - [Ruby on Rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) + [Оригинальная статья](https://clickhouse.tech/docs/ru/interfaces/third-party/integrations/) diff --git a/docs/ru/operations/optimizing-performance/sampling-query-profiler.md b/docs/ru/operations/optimizing-performance/sampling-query-profiler.md deleted file mode 120000 index fe5373bfe13..00000000000 --- a/docs/ru/operations/optimizing-performance/sampling-query-profiler.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/optimizing-performance/sampling-query-profiler.md \ No newline at end of file diff --git a/docs/ru/operations/optimizing-performance/sampling-query-profiler.md b/docs/ru/operations/optimizing-performance/sampling-query-profiler.md new file mode 100644 index 00000000000..327b254ddf8 --- /dev/null +++ b/docs/ru/operations/optimizing-performance/sampling-query-profiler.md @@ -0,0 +1,62 @@ +--- +toc_priority: 54 +toc_title: Query Profiling +--- + +# Sampling Query Profiler {#sampling-query-profiler} + +ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time. + +To use profiler: + +- Setup the [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration. + + This section configures the [trace\_log](../../operations/system-tables.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesn’t clean up the table and all the stored virtual memory address may become invalid. + +- Setup the [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. + + These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query. + +The default sampling frequency is one sample per second and both CPU and real timers are enabled. This frequency allows collecting enough information about ClickHouse cluster. At the same time, working with this frequency, profiler doesn’t affect ClickHouse server’s performance. If you need to profile each individual query try to use higher sampling frequency. + +To analyze the `trace_log` system table: + +- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages). + +- Allow introspection functions by the [allow\_introspection\_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting. + + For security reasons, introspection functions are disabled by default. + +- Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. + +If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). + +## Example {#example} + +In this example we: + +- Filtering `trace_log` data by a query identifier and the current date. + +- Aggregating by stack trace. + +- Using introspection functions, we will get a report of: + + - Names of symbols and corresponding source code functions. + - Source code locations of these functions. + + + +``` sql +SELECT + count(), + arrayStringConcat(arrayMap(x -> concat(demangle(addressToSymbol(x)), '\n ', addressToLine(x)), trace), '\n') AS sym +FROM system.trace_log +WHERE (query_id = 'ebca3574-ad0a-400a-9cbc-dca382f5998c') AND (event_date = today()) +GROUP BY trace +ORDER BY count() DESC +LIMIT 10 +``` + +``` text +{% include "examples/sampling_query_profiler_result.txt" %} +``` diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index e3c1629a46a..5bfedf4c520 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -78,7 +78,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat default ``` -## dictionaries\_config {#dictionaries-config} +## dictionaries\_config {#server_configuration_parameters-dictionaries_config} Путь к конфигурации внешних словарей. @@ -95,7 +95,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat *_dictionary.xml ``` -## dictionaries\_lazy\_load {#dictionaries-lazy-load} +## dictionaries\_lazy\_load {#server_configuration_parameters-dictionaries_lazy_load} Отложенная загрузка словарей. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 5e34affcaac..4af14455d4f 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1181,6 +1181,74 @@ Default value: 0. Значение по умолчанию: 16. +## transform_null_in {#transform_null_in} + +Разрешает сравнивать значения [NULL](../../sql-reference/syntax.md#null-literal) в операторе [IN](../../sql-reference/operators/in.md). + +По умолчанию, значения `NULL` нельзя сравнивать, поскольку `NULL` обозначает неопределённое значение. Следовательно, сравнение `expr = NULL` должно всегда возвращать `false`. С этой настройкой `NULL = NULL` возвращает `true` в операторе `IN`. + +Possible values: + +- 0 — Сравнение значений `NULL` в операторе `IN` возвращает `false`. +- 1 — Сравнение значений `NULL` в операторе `IN` возвращает `true`. + +Значение по умолчанию: 0. + +**Пример** + +Рассмотрим таблицу `null_in`: + +```text +┌──idx─┬─────i─┐ +│ 1 │ 1 │ +│ 2 │ NULL │ +│ 3 │ 3 │ +└──────┴───────┘ +``` + +Consider the `null_in` table: + +```text +┌──idx─┬─────i─┐ +│ 1 │ 1 │ +│ 2 │ NULL │ +│ 3 │ 3 │ +└──────┴───────┘ +``` + +Запрос: + +```sql +SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 0; +``` + +Ответ: + +```text +┌──idx─┬────i─┐ +│ 1 │ 1 │ +└──────┴──────┘ +``` + +Запрос: + +```sql +SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; +``` + +Ответ: + +```text +┌──idx─┬─────i─┐ +│ 1 │ 1 │ +│ 2 │ NULL │ +└──────┴───────┘ +``` + +**См. также** + +- [Обработка значения NULL в операторе IN](../../sql-reference/operators/in.md#in-null-processing) + ## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} Задает количество потоков для выполнения фонового сброса данных в таблицах с движком [Buffer](../../engines/table-engines/special/buffer.md). Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе. @@ -1221,4 +1289,34 @@ Default value: 0. Значение по умолчанию: 16. +## min_insert_block_size_rows_for_materialized_views {#min-insert-block-size-rows-for-materialized-views} + +Устанавливает минимальное количество строк в блоке, который может быть вставлен в таблицу запросом `INSERT`. Блоки меньшего размера склеиваются в блоки большего размера. Настройка применяется только для блоков, вставляемых в [материализованное представление](../../sql-reference/statements/create.md#create-view). Настройка позволяет избежать избыточного потребления памяти. + +Допустимые значения: + +- Положительное целое число. +- 0 — Склейка блоков выключена. + +Значение по умолчанию: 1048576. + +**См. также:** + +- [min_insert_block_size_rows](#min-insert-block-size-rows) + +## min_insert_block_size_bytes_for_materialized_views {#min-insert-block-size-bytes-for-materialized-views} + +Устанавливает минимальное количество байтов в блоке, который может быть вставлен в таблицу запросом `INSERT`. Блоки меньшего размера склеиваются в блоки большего размера. Настройка применяется только для блоков, вставляемых в [материализованное представление](../../sql-reference/statements/create.md#create-view). Настройка позволяет избежать избыточного потребления памяти. + +Допустимые значения: + +- Положительное целое число. +- 0 — Склейка блоков выключена. + +Значение по умолчанию: 268435456. + +**См. также:** + +- [min_insert_block_size_bytes](#min-insert-block-size-bytes) + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) diff --git a/docs/ru/sql-reference/data-types/domains/ipv4.md b/docs/ru/sql-reference/data-types/domains/ipv4.md index 2903404774b..68b67bcca60 100644 --- a/docs/ru/sql-reference/data-types/domains/ipv4.md +++ b/docs/ru/sql-reference/data-types/domains/ipv4.md @@ -26,7 +26,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` поддерживает вставку в виде строк с текстовым представлением IPv4 адреса: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/ru/sql-reference/data-types/domains/ipv6.md b/docs/ru/sql-reference/data-types/domains/ipv6.md index 045a2ad1960..c88ee74adea 100644 --- a/docs/ru/sql-reference/data-types/domains/ipv6.md +++ b/docs/ru/sql-reference/data-types/domains/ipv6.md @@ -26,7 +26,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` поддерживает вставку в виде строк с текстовым представлением IPv6 адреса: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md deleted file mode 120000 index 76a7ef3b802..00000000000 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/data-types/simpleaggregatefunction.md \ No newline at end of file diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md new file mode 100644 index 00000000000..dc3286d035a --- /dev/null +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -0,0 +1,35 @@ +# SimpleAggregateFunction {#data-type-simpleaggregatefunction} + +`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we don’t have to store and process any extra data. + +The following aggregate functions are supported: + +- [`any`](../../sql-reference/aggregate-functions/reference.md#agg_function-any) +- [`anyLast`](../../sql-reference/aggregate-functions/reference.md#anylastx) +- [`min`](../../sql-reference/aggregate-functions/reference.md#agg_function-min) +- [`max`](../../sql-reference/aggregate-functions/reference.md#agg_function-max) +- [`sum`](../../sql-reference/aggregate-functions/reference.md#agg_function-sum) +- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference.md#groupbitand) +- [`groupBitOr`](../../sql-reference/aggregate-functions/reference.md#groupbitor) +- [`groupBitXor`](../../sql-reference/aggregate-functions/reference.md#groupbitxor) +- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference.md#agg_function-grouparray) +- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference.md#groupuniqarrayx-groupuniqarraymax-sizex) + +Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. + +**Parameters** + +- Name of the aggregate function. +- Types of the aggregate function arguments. + +**Example** + +``` sql +CREATE TABLE t +( + column1 SimpleAggregateFunction(sum, UInt64), + column2 SimpleAggregateFunction(any, String) +) ENGINE = ... +``` + +[Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 71b6bda47d0..7abebc6a059 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -692,7 +692,7 @@ arrayDifference(array) **Параметры** -- `array` – [Массив](https://clickhouse.yandex/docs/ru/data_types/array/). +- `array` – [Массив](https://clickhouse.tech/docs/ru/data_types/array/). **Возвращаемое значение** @@ -742,7 +742,7 @@ arrayDistinct(array) **Параметры** -- `array` – [Массив](https://clickhouse.yandex/docs/ru/data_types/array/). +- `array` – [Массив](https://clickhouse.tech/docs/ru/data_types/array/). **Возвращаемое значение** diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 2c715cd15a5..7161b1a2468 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -1153,4 +1153,52 @@ SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers └────────┴────────────────────────────────┴──────────────────────────────────┘ ``` +## randomString {#randomstring} + +Генерирует бинарную строку заданной длины, заполненную случайными байтами (в том числе нулевыми). + +**Синтаксис** + +``` sql +randomString(length) +``` + +**Параметры** + +- `length` — длина строки. Положительное целое число. + +**Возвращаемое значение** + +- Строка, заполненная случайными байтами. + +Type: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +``` sql +SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical; +``` + +Ответ: + +``` text +Row 1: +────── +str: 3 G : pT ?w тi k aV f6 +len: 30 + +Row 2: +────── +str: 9 ,] ^ ) ]?? 8 +len: 30 +``` + +**Смотрите также** + +- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom) +- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) + + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/other_functions/) diff --git a/docs/ru/sql-reference/operators/in.md b/docs/ru/sql-reference/operators/in.md deleted file mode 120000 index 3a2feda2f61..00000000000 --- a/docs/ru/sql-reference/operators/in.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/operators/in.md \ No newline at end of file diff --git a/docs/ru/sql-reference/operators/in.md b/docs/ru/sql-reference/operators/in.md new file mode 100644 index 00000000000..98006288b05 --- /dev/null +++ b/docs/ru/sql-reference/operators/in.md @@ -0,0 +1,199 @@ +# Операторы IN {#select-in-operators} + +Операторы `IN`, `NOT IN`, `GLOBAL IN`, `GLOBAL NOT IN` рассматриваются отдельно, так как их функциональность достаточно богатая. + +В качестве левой части оператора, может присутствовать как один столбец, так и кортеж. + +Примеры: + +``` sql +SELECT UserID IN (123, 456) FROM ... +SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ... +``` + +Если слева стоит один столбец, входящий в индекс, а справа - множество констант, то при выполнении запроса, система воспользуется индексом. + +Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел «Внешние данные для обработки запроса»), и затем воспользоваться подзапросом. + +В качестве правой части оператора может быть множество константных выражений, множество кортежей с константными выражениями (показано в примерах выше), а также имя таблицы или подзапрос SELECT в скобках. + +Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию. + +Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе. + +В подзапросе может быть указано более одного столбца для фильтрации кортежей. +Пример: + +``` sql +SELECT (CounterID, UserID) IN (SELECT CounterID, UserID FROM ...) FROM ... +``` + +Типы столбцов слева и справа оператора IN, должны совпадать. + +Оператор IN и подзапрос могут встречаться в любой части запроса, в том числе в агрегатных и лямбда функциях. +Пример: + +``` sql +SELECT + EventDate, + avg(UserID IN + ( + SELECT UserID + FROM test.hits + WHERE EventDate = toDate('2014-03-17') + )) AS ratio +FROM test.hits +GROUP BY EventDate +ORDER BY EventDate ASC +``` + +``` text +┌──EventDate─┬────ratio─┐ +│ 2014-03-17 │ 1 │ +│ 2014-03-18 │ 0.807696 │ +│ 2014-03-19 │ 0.755406 │ +│ 2014-03-20 │ 0.723218 │ +│ 2014-03-21 │ 0.697021 │ +│ 2014-03-22 │ 0.647851 │ +│ 2014-03-23 │ 0.648416 │ +└────────────┴──────────┘ +``` + +за каждый день после 17 марта считаем долю хитов, сделанных посетителями, которые заходили на сайт 17 марта. +Подзапрос в секции IN на одном сервере всегда выполняется только один раз. Зависимых подзапросов не существует. + +## Обработка NULL {#in-null-processing} + +При обработке запроса оператор IN будет считать, что результат операции с [NULL](../syntax.md#null-literal) всегда равен `0`, независимо от того, находится `NULL` в правой или левой части оператора. Значения `NULL` не входят ни в какое множество, не соответствуют друг другу и не могут сравниваться, если [transform_null_in = 0](../../operations/settings/settings.md#transform_null_in). + +Рассмотрим для примера таблицу `t_null`: + +``` text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` + +При выполнении запроса `SELECT x FROM t_null WHERE y IN (NULL,3)` получим следующий результат: + +``` text +┌─x─┐ +│ 2 │ +└───┘ +``` + +Видно, что строка, в которой `y = NULL`, выброшена из результатов запроса. Это произошло потому, что ClickHouse не может решить входит ли `NULL` в множество `(NULL,3)`, возвращает результат операции `0`, а `SELECT` выбрасывает эту строку из финальной выдачи. + +``` sql +SELECT y IN (NULL, 3) +FROM t_null +``` + +``` text +┌─in(y, tuple(NULL, 3))─┐ +│ 0 │ +│ 1 │ +└───────────────────────┘ +``` + +## Распределённые подзапросы {#select-distributed-subqueries} + +Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса. + +!!! attention "Attention" + Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`. + +При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`. + +При использовании `GLOBAL IN` / `GLOBAL JOIN-а`, сначала выполняются все подзапросы для `GLOBAL IN` / `GLOBAL JOIN-ов`, и результаты складываются во временные таблицы. Затем эти временные таблицы передаются на каждый удалённый сервер, и на них выполняются запросы, с использованием этих переданных временных данных. + +Если запрос не распределённый, используйте обычный `IN` / `JOIN`. + +Следует быть внимательным при использовании подзапросов в секции `IN` / `JOIN` в случае распределённой обработки запроса. + +Рассмотрим это на примерах. Пусть на каждом сервере кластера есть обычная таблица **local\_table**. Пусть также есть таблица **distributed\_table** типа **Distributed**, которая смотрит на все серверы кластера. + +При запросе к распределённой таблице **distributed\_table**, запрос будет отправлен на все удалённые серверы, и на них будет выполнен с использованием таблицы **local\_table**. + +Например, запрос + +``` sql +SELECT uniq(UserID) FROM distributed_table +``` + +будет отправлен на все удалённые серверы в виде + +``` sql +SELECT uniq(UserID) FROM local_table +``` + +, выполнен параллельно на каждом из них до стадии, позволяющей объединить промежуточные результаты; затем промежуточные результаты вернутся на сервер-инициатор запроса, будут на нём объединены, и финальный результат будет отправлен клиенту. + +Теперь рассмотрим запрос с IN-ом: + +``` sql +SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) +``` + +- расчёт пересечения аудиторий двух сайтов. + +Этот запрос будет отправлен на все удалённые серверы в виде + +``` sql +SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) +``` + +То есть, множество в секции IN будет собрано на каждом сервере независимо, только по тем данным, которые есть локально на каждом из серверов. + +Это будет работать правильно и оптимально, если вы предусмотрели такой случай, и раскладываете данные по серверам кластера таким образом, чтобы данные одного UserID-а лежали только на одном сервере. В таком случае все необходимые данные будут присутствовать на каждом сервере локально. В противном случае результат будет посчитан неточно. Назовём этот вариант запроса «локальный IN». + +Чтобы исправить работу запроса, когда данные размазаны по серверам кластера произвольным образом, можно было бы указать **distributed\_table** внутри подзапроса. Запрос будет выглядеть так: + +``` sql +SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) +``` + +Этот запрос будет отправлен на все удалённые серверы в виде + +``` sql +SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) +``` + +На каждом удалённом сервере начнёт выполняться подзапрос. Так как в подзапросе используется распределённая таблица, то подзапрос будет, на каждом удалённом сервере, снова отправлен на каждый удалённый сервер, в виде + +``` sql +SELECT UserID FROM local_table WHERE CounterID = 34 +``` + +Например, если у вас кластер из 100 серверов, то выполнение всего запроса потребует 10 000 элементарных запросов, что, как правило, является неприемлемым. + +В таких случаях всегда следует использовать GLOBAL IN вместо IN. Рассмотрим его работу для запроса + +``` sql +SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID GLOBAL IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) +``` + +На сервере-инициаторе запроса будет выполнен подзапрос + +``` sql +SELECT UserID FROM distributed_table WHERE CounterID = 34 +``` + +, и результат будет сложен во временную таблицу в оперативке. Затем запрос будет отправлен на каждый удалённый сервер в виде + +``` sql +SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL IN _data1 +``` + +, и вместе с запросом, на каждый удалённый сервер будет отправлена временная таблица `_data1` (имя временной таблицы - implementation defined). + +Это гораздо более оптимально, чем при использовании обычного IN. Но при этом, следует помнить о нескольких вещах: + +1. При создании временной таблицы данные не уникализируются. Чтобы уменьшить объём передаваемых по сети данных, укажите в подзапросе DISTINCT (для обычного IN-а этого делать не нужно). +2. Временная таблица будет передана на все удалённые серверы. Передача не учитывает топологию сети. Например, если 10 удалённых серверов расположены в удалённом относительно сервера-инициатора запроса дата-центре, то по каналу в удалённый дата-центр данные будет переданы 10 раз. Старайтесь не использовать большие множества при использовании GLOBAL IN. +3. При передаче данных на удалённые серверы не настраивается ограничение использования сетевой полосы. Вы можете перегрузить сеть. +4. Старайтесь распределять данные по серверам так, чтобы в GLOBAL IN-ах не было частой необходимости. +5. Если в GLOBAL IN есть частая необходимость, то спланируйте размещение кластера ClickHouse таким образом, чтобы в каждом дата-центре была хотя бы одна реплика каждого шарда, и среди них была быстрая сеть - чтобы запрос целиком можно было бы выполнить, передавая данные в пределах одного дата-центра. + +В секции `GLOBAL IN` также имеет смысл указывать локальную таблицу - в случае, если эта локальная таблица есть только на сервере-инициаторе запроса, и вы хотите воспользоваться данными из неё на удалённых серверах. diff --git a/docs/ru/sql-reference/statements/select/join.md b/docs/ru/sql-reference/statements/select/join.md index 60f391d888b..26e7ae8257e 100644 --- a/docs/ru/sql-reference/statements/select/join.md +++ b/docs/ru/sql-reference/statements/select/join.md @@ -45,7 +45,11 @@ FROM `ASOF JOIN` применим в том случае, когда необходимо объединять записи, которые не имеют точного совпадения. -Таблицы для `ASOF JOIN` должны иметь столбец с отсортированной последовательностью. Этот столбец не может быть единственным в таблице и должен быть одного из типов: `UInt32`, `UInt64`, `Float32`, `Float64`, `Date` и `DateTime`. +Для работы алгоритма необходим специальный столбец в таблицах. Этот столбец: + +- Должен содержать упорядоченную последовательность. +- Может быть одного из следующих типов: [Int*, UInt*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Date](../../data-types/date.md), [DateTime](../../data-types/datetime.md), [Decimal*](../../data-types/decimal.md). +- Не может быть единственным столбцом в секции `JOIN`. Синтаксис `ASOF JOIN ... ON`: diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md index 1b66fa039d9..5e168c09074 100644 --- a/docs/ru/sql-reference/statements/system.md +++ b/docs/ru/sql-reference/statements/system.md @@ -38,7 +38,7 @@ ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} Перегружает все словари, которые были успешно загружены до этого. -По умолчанию включена ленивая загрузка [dictionaries\_lazy\_load](../../sql-reference/statements/system.md#dictionaries-lazy-load), поэтому словари не загружаются автоматически при старте, а только при первом обращении через dictGet или SELECT к ENGINE=Dictionary. После этого такие словари (LOADED) будут перегружаться командой `system reload dictionaries`. +По умолчанию включена ленивая загрузка [dictionaries\_lazy\_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load), поэтому словари не загружаются автоматически при старте, а только при первом обращении через dictGet или SELECT к ENGINE=Dictionary. После этого такие словари (LOADED) будут перегружаться командой `system reload dictionaries`. Всегда возвращает `Ok.`, вне зависимости от результата обновления словарей. ## RELOAD DICTIONARY Dictionary\_name {#query_language-system-reload-dictionary} @@ -74,6 +74,7 @@ SELECT name, status FROM system.dictionaries; ## FLUSH LOGS {#query_language-system-flush_logs} Записывает буферы логов в системные таблицы (например system.query\_log). Позволяет не ждать 7.5 секунд при отладке. +Если буфер логов пустой, то этот запрос просто создаст системные таблицы. ## RELOAD CONFIG {#query_language-system-reload-config} diff --git a/docs/ru/sql-reference/table-functions/generate.md b/docs/ru/sql-reference/table-functions/generate.md index b1abdbf1d63..9e6d36b2a4b 100644 --- a/docs/ru/sql-reference/table-functions/generate.md +++ b/docs/ru/sql-reference/table-functions/generate.md @@ -1,4 +1,4 @@ -# generateRandom {#generateRandom} +# generateRandom {#generaterandom} Генерирует случайные данные с заданной схемой. Позволяет заполнять тестовые таблицы данными. diff --git a/docs/ru/whats-new/extended-roadmap.md b/docs/ru/whats-new/extended-roadmap.md index 20ebe28fe5b..07f2c8f4cd6 100644 --- a/docs/ru/whats-new/extended-roadmap.md +++ b/docs/ru/whats-new/extended-roadmap.md @@ -344,13 +344,14 @@ Upd. Появилась вторая версия LTS - 20.3. Исправление долгоживущей проблемы с дрифтом учёта оперативки. Нужна для Метрики и БК. -### 6.4. Поддержка perf events как метрик запроса {#podderzhka-perf-events-kak-metrik-zaprosa} +### 6.4. + Поддержка perf events как метрик запроса {#podderzhka-perf-events-kak-metrik-zaprosa} Делает Андрей Скобцов, ВШЭ. В Linux существует возможность получать в программе информацию о счётчиках производительности и событиях, относящихся к CPU и ядру ОС. Подробнее смотрите `man perf_event_open`. Предлагается добавить эти метрики в ClickHouse для инструментирования запросов. -Есть прототип. +Есть прототип. +Сделано. ### 6.5. Эксперименты с LLVM X-Ray {#eksperimenty-s-llvm-x-ray} @@ -706,7 +707,7 @@ Upd. Задача взята в работу. Сделал [Andrey Bodrov](https://github.com/apbodrov) -### 8.10. Запись в табличную функцию ODBC {#zapis-v-tablichnuiu-funktsiiu-odbc} +### 8.10. + Запись в табличную функцию ODBC {#zapis-v-tablichnuiu-funktsiiu-odbc} Артемий Бобровский, ВШЭ Есть pull request. @@ -718,15 +719,11 @@ Upd. Задача взята в работу. ### 8.12. Пропуск столбцов в форматах Parquet, ORC {#propusk-stolbtsov-v-formatakh-parquet-orc} -Артемий Бобровский, ВШЭ или другой человек. - ### 8.13. Поддержка массивов в Parquet, ORC {#podderzhka-massivov-v-parquet-orc} -Артемий Бобровский, ВШЭ - ### 8.14. Запись данных в ORC {#zapis-dannykh-v-orc} -Возможно, Андрей Коняев, ArenaData (зависит от желания). +Павел Круглов, ВШЭ. Есть pull request. ### 8.15. Запись данных в CapNProto {#zapis-dannykh-v-capnproto} @@ -856,6 +853,8 @@ Upd. Ура, нашли причину и исправили. ### 10.6. Словари из Cassandra и Couchbase {#slovari-iz-cassandra-i-couchbase} +Готова Cassandra. + ### 10.7. Поддержка Nullable в словарях {#podderzhka-nullable-v-slovariakh} Артём Стрельцов, Николай Дегтеринский, Наталия Михненко, ВШЭ. @@ -876,7 +875,7 @@ Upd. Ура, нашли причину и исправили. Никита Михайлов. Q1. Нужно для БК и Метрики. Требует 10.10. -### 10.12. Layout direct для словарей {#layout-direct-dlia-slovarei} +### 10.12. + Layout direct для словарей {#layout-direct-dlia-slovarei} Артём Стрельцов, Николай Дегтеринский, Наталия Михненко, ВШЭ. Приступили к этой задаче. @@ -888,7 +887,7 @@ Upd. Ура, нашли причину и исправили. ### 10.14. Поддержка всех типов в функции transform {#podderzhka-vsekh-tipov-v-funktsii-transform} -### 10.15. Использование словарей как специализированного layout для Join {#ispolzovanie-slovarei-kak-spetsializirovannogo-layout-dlia-join} +### 10.15. + Использование словарей как специализированного layout для Join {#ispolzovanie-slovarei-kak-spetsializirovannogo-layout-dlia-join} ### 10.16. Словари на локальном SSD {#slovari-na-lokalnom-ssd} @@ -1001,14 +1000,14 @@ Q2. Вариант реализации выбрал Александр Казаков. Upd. Не уследили, и задачу стали обсуждать менеджеры. Upd. Задачу смотрит Александр Казаков. +Upd. Задача взята в работу. ## 14. Диалект SQL {#dialekt-sql} -### 14.1. Исправление семантики CAST для Nullable {#ispravlenie-semantiki-cast-dlia-nullable} +### 14.1. + Исправление семантики CAST для Nullable {#ispravlenie-semantiki-cast-dlia-nullable} Нужно для DataLens. А также для внедрения в BI инструмент Looker. -Павел Потёмкин, ВШЭ. ### 14.2. Поддержка WITH для подзапросов {#podderzhka-with-dlia-podzaprosov} @@ -1025,12 +1024,8 @@ zhang2014 ### 14.6. Глобальный scope для WITH {#globalnyi-scope-dlia-with} -Павел Потёмкин, ВШЭ. - ### 14.7. Nullable для WITH ROLLUP, WITH CUBE, WITH TOTALS {#nullable-dlia-with-rollup-with-cube-with-totals} -Павел Потёмкин, ВШЭ. - Простая задача. ### 14.8. Модификаторы DISTINCT, ORDER BY для агрегатных функций {#modifikatory-distinct-order-by-dlia-agregatnykh-funktsii} @@ -1039,7 +1034,7 @@ zhang2014 Результат некоторых агрегатных функций зависит от порядка данных. Предлагается реализовать модификатор ORDER BY, задающий порядок явно. Пример: groupArray(x ORDER BY y, z). -Upd. Есть pull request на DISTINCT. +Upd. Есть pull request-ы. ### 14.9. Поддержка запроса EXPLAIN {#podderzhka-zaprosa-explain} @@ -1061,7 +1056,8 @@ Upd. Есть pull request на DISTINCT. ### 14.14. Неявные преобразования типов констант {#neiavnye-preobrazovaniia-tipov-konstant} -Требует 2.12. +Сделано для операторов сравнения с константами (подавляющее большинство use cases). +В общем виде требует 2.12. ### 14.15. Неявные преобразования типов под настройкой {#neiavnye-preobrazovaniia-tipov-pod-nastroikoi} @@ -1077,7 +1073,6 @@ zhang2014. ### 14.18. UNION DISTINCT и возможность включить его по-умолчанию {#union-distinct-i-vozmozhnost-vkliuchit-ego-po-umolchaniiu} -Павел Потёмкин, ВШЭ. Для BI систем. ### 14.19. Совместимость парсера типов данных с SQL {#sovmestimost-parsera-tipov-dannykh-s-sql} @@ -1087,7 +1082,6 @@ zhang2014. ### 14.20. Позиционные аргументы для GROUP BY и ORDER BY {#pozitsionnye-argumenty-dlia-group-by-i-order-by} -Павел Потёмкин, ВШЭ. Тривиально и используется многими системами, но не входит в стандарт SQL. ### 14.21. Приведение типов для IN (подзапрос) и для JOIN {#privedenie-tipov-dlia-in-podzapros-i-dlia-join} @@ -1104,11 +1098,12 @@ Q1. Сделали адаптивный вариант, но вроде он ч ### 15.1.1. Алгоритм two-level merge JOIN {#algoritm-two-level-merge-join} -Александр Кузьменков. В очереди. +Также известен как grace hash join. +Артём Зуйков. В очереди. ### 15.1.2. Тестирование реализации JOIN в Greenplum {#testirovanie-realizatsii-join-v-greenplum} -В очереди. +В очереди. Как будто задача самоотменилась. ### 15.2. Прокидывание условий в OUTER JOIN {#prokidyvanie-uslovii-v-outer-join} @@ -1167,6 +1162,7 @@ ClickHouse не является geospatial СУБД. Тем не менее, в Upd. Андрей сделал прототип интерфейса и реализацию-заглушку внутри него. Upd. Андрей сделал прототип более оптимальной структуры данных. Upd. Есть обнадёживающие результаты. +Upd. В ревью. ### 17.2. GIS типы данных и операции {#gis-tipy-dannykh-i-operatsii} @@ -1201,7 +1197,7 @@ Upd. Есть обнадёживающие результаты. Предлагается реализовать в ClickHouse статистические тесты (Analysis of Variance, тесты нормальности распределения и т. п.) в виде агрегатных функций. Пример: `welchTTest(value, sample_idx)`. -Сделали прототип одного теста, есть pull request. +Сделали прототип двух тестов, есть pull request. ### 18.3. Инфраструктура для тренировки моделей в ClickHouse {#infrastruktura-dlia-trenirovki-modelei-v-clickhouse} @@ -1212,8 +1208,6 @@ Upd. Есть обнадёживающие результаты. ### 19.1. Параллельные кворумные вставки без линеаризуемости {#parallelnye-kvorumnye-vstavki-bez-linearizuemosti} -Александра Латышева, ВШЭ и Яндекс. - Репликация данных в ClickHouse по-умолчанию является асинхронной без выделенного мастера. Это значит, что клиент, осуществляющий вставку данных, получает успешный ответ после того, как данные попали на один сервер; репликация данных по остальным серверам осуществляется в другой момент времени. Это ненадёжно, потому что допускает потерю только что вставленных данных при потере лишь одного сервера. Для решения этой проблемы, в ClickHouse есть возможность включить «кворумную» вставку. Это значит, что клиент, осуществляющий вставку данных, получает успешный ответ после того, как данные попали на несколько (кворум) серверов. Обеспечивается линеаризуемость: клиент, получает успешный ответ после того, как данные попали на несколько реплик, *которые содержат все предыдущие данные, вставленные с кворумом* (такие реплики можно называть «синхронными»), и при запросе SELECT можно выставить настройку, разрешающую только чтение с синхронных реплик. @@ -1249,9 +1243,9 @@ Hold. Полезно для заказчиков внутри Яндекса, н Предлагается реализовать «движок баз данных», который осуществляет репликацию метаданных (множество имеющихся таблиц и лог DDL операций над ними: CREATE, DROP, RENAME, ALTER). Пользователь сможет создать реплицируемую базу данных; при её создании или восстановлении на другом сервере, все реплицируемые таблицы будут созданы автоматически. -### 19.6. Одновременный выбор кусков для слияния многими репликами, отказ от leader election в ZK {#odnovremennyi-vybor-kuskov-dlia-sliianiia-mnogimi-replikami-otkaz-ot-leader-election-v-zk} +### 19.6. + Одновременный выбор кусков для слияния многими репликами, отказ от leader election в ZK {#odnovremennyi-vybor-kuskov-dlia-sliianiia-mnogimi-replikami-otkaz-ot-leader-election-v-zk} -Обсуждается. Возможно, будет делать Александр Сапин. +Готово. ### 19.7. Возможность записи данных при недоступности ZK и отказ от линейного порядка кусков в большинстве случаев {#vozmozhnost-zapisi-dannykh-pri-nedostupnosti-zk-i-otkaz-ot-lineinogo-poriadka-kuskov-v-bolshinstve-sluchaev} @@ -1289,6 +1283,7 @@ Hold. Полезно для заказчиков внутри Яндекса, н ### 21.2. Параллельное форматирование форматов {#parallelnoe-formatirovanie-formatov} После 21.1, предположительно Никита Михайлов. Задача сильно проще чем 21.1. +В процессе. ### 21.3. + Исправление низкой производительности анализа индекса в случае большого множества в секции IN {#ispravlenie-nizkoi-proizvoditelnosti-analiza-indeksa-v-sluchae-bolshogo-mnozhestva-v-sektsii-in} @@ -1307,6 +1302,7 @@ Upd. Антон делает эту задачу. Большая часть уж В прошлом году, аналогичное решение сделали для операции ORDER BY. Upd. Есть pull request для GROUP BY. Приличные результаты. +Upd. Для GROUP BY готово, в процессе для DISTINCT. ### 21.5. + Распараллеливание INSERT при INSERT SELECT, если это необходимо {#rasparallelivanie-insert-pri-insert-select-esli-eto-neobkhodimo} @@ -1354,6 +1350,7 @@ Amos Bird. - При GROUP BY по transform или if по строкам, замена строк на Enum. Сделана замена цепочек if на multiIf, но внезапно оказалось, что это является не оптимизацией, а наоборот. +Сделано ещё несколько оптимизаций. ### 21.12. Алгебраические оптимизации запросов {#algebraicheskie-optimizatsii-zaprosov} @@ -1603,7 +1600,7 @@ Altinity. Q1. [Николай Кочетов](https://github.com/KochetovNicolai). -### 23.10. Включение mlock бинарника {#vkliuchenie-mlock-binarnika} +### 23.10. + Включение mlock бинарника {#vkliuchenie-mlock-binarnika} Возможность mlock бинарника сделал Олег Алексеенков [\#3553](https://github.com/ClickHouse/ClickHouse/pull/3553) . Поможет, когда на серверах кроме ClickHouse работает много посторонних программ (мы иногда называем их в шутку «треш-программами»). @@ -1669,6 +1666,8 @@ RAID позволяет одновременно увеличить надёжн Для преодоления этих ограничений, предлагается реализовать в ClickHouse встроенный алгоритм расположения данных на дисках. +Есть pull request на начальной стадии. + ### 24.7. Вероятностные структуры данных для фильтрации по подзапросам {#veroiatnostnye-struktury-dannykh-dlia-filtratsii-po-podzaprosam} Рузель Ибрагимов, ВШЭ и Яндекс. @@ -1679,6 +1678,8 @@ RAID позволяет одновременно увеличить надёжн Предлагается реализовать это в языке запросов ClickHouse с помощью специального синтаксиса, например `x IN BLOOM FILTER (n, m) (SELECT ...)`. +Есть pull request на стадии работающего прототипа. + ### 24.8. Специализация векторизованного кода для AVX/AVX2/AVX512 и ARM NEON {#spetsializatsiia-vektorizovannogo-koda-dlia-avxavx2avx512-i-arm-neon} [\#1017](https://github.com/ClickHouse/ClickHouse/issues/1017) @@ -1697,7 +1698,7 @@ RAID позволяет одновременно увеличить надёжн Продолжение 24.8. -Upd. Есть pull request. +Upd. Есть pull request. В стадии ревью. ### 24.10. Поддержка типов half/bfloat16/unum {#podderzhka-tipov-halfbfloat16unum} @@ -1705,6 +1706,8 @@ Upd. Есть pull request. Рустам Гусейн-заде, ВШЭ. +Есть pull request на промежуточной стадии. + ### 24.11. User Defined Functions {#user-defined-functions} Игорь Минеев, ВШЭ. @@ -1725,6 +1728,8 @@ ClickHouse предоставляет достаточно богатый наб Также рассматривается возможность написания UDF на Rust, а также использование Web Assembly. Отдельно можно рассмотреть подключение NumPy и R и других технологий, которые предоставляют операции над целыми массивами. +Upd. В работе два варианта реализации UDF. + ### 24.12. GPU offloading {#gpu-offloading} Риск состоит в том, что даже известные GPU базы, такие как OmniSci, работают медленнее, чем ClickHouse. @@ -1763,6 +1768,7 @@ Upd. Прототип bitonic sort помержен, но целесообраз Задача в работе. Upd. Достигнуты обнадёживающие результаты. +Upd. Есть pull request - в большинстве случаев одновременно и ускорение и снижение потребления памяти, но требуются доработки. ### 24.18. Не TCP протокол передачи файлов при репликации {#ne-tcp-protokol-peredachi-failov-pri-replikatsii} @@ -1804,7 +1810,7 @@ ClickHouse также может использоваться для быстр [\#6874](https://github.com/ClickHouse/ClickHouse/issues/6874) -Артём Вишняков, ВШЭ. +Артём Вишняков, ВШЭ. Есть pull request. ### 24.25. Интеграция в ClickHouse функциональности обработки HTTP User Agent {#integratsiia-v-clickhouse-funktsionalnosti-obrabotki-http-user-agent} @@ -1841,9 +1847,9 @@ ucasFL, ICT. Жанна Зосимова, ВШЭ. Upd. Пока поддержали Arrow как формат ввода-вывода. -### 24.30. ClickHouse как графовая СУБД {#clickhouse-kak-grafovaia-subd} +### - 24.30. ClickHouse как графовая СУБД {#clickhouse-kak-grafovaia-subd} -Amos Bird, но его решение слишком громоздкое и пока не open-source. +Amos Bird, но его решение слишком громоздкое и пока не open-source. Отменено. ### 24.31. Кореллированные подзапросы {#korellirovannye-podzaprosy} diff --git a/docs/tools/amp.py b/docs/tools/amp.py index ec2484405cd..b08b58d3cba 100644 --- a/docs/tools/amp.py +++ b/docs/tools/amp.py @@ -85,6 +85,15 @@ def html_to_amp(content): tag.attrs['width'] = '640' if not tag.attrs.get('height'): tag.attrs['height'] = '320' + if tag.name == 'iframe': + tag.name = 'amp-iframe' + tag.attrs['layout'] = 'responsive' + del tag.attrs['alt'] + del tag.attrs['allowfullscreen'] + if not tag.attrs.get('width'): + tag.attrs['width'] = '640' + if not tag.attrs.get('height'): + tag.attrs['height'] = '320' elif tag.name == 'a': href = tag.attrs.get('href') if href: diff --git a/docs/tools/blog.py b/docs/tools/blog.py new file mode 100644 index 00000000000..f5415bec608 --- /dev/null +++ b/docs/tools/blog.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +import datetime +import logging +import os +import time + +import nav # monkey patches mkdocs + +import mkdocs.commands +from mkdocs import config +from mkdocs import exceptions + +import mdx_clickhouse +import redirects + +import util + + +def build_for_lang(lang, args): + logging.info(f'Building {lang} blog') + + try: + theme_cfg = { + 'name': None, + 'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), + 'language': lang, + 'direction': 'ltr', + 'static_templates': ['404.html'], + 'extra': { + 'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching + } + } + + # the following list of languages is sorted according to + # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers + languages = { + 'en': 'English', + 'ru': 'Русский' + } + + site_names = { + 'en': 'ClickHouse Blog', + 'ru': 'Блог ClickHouse ' + } + + assert len(site_names) == len(languages) + + site_dir = os.path.join(args.blog_output_dir, lang) + + plugins = ['macros'] + if args.htmlproofer: + plugins.append('htmlproofer') + + website_url = 'https://clickhouse.tech' + site_name = site_names.get(lang, site_names['en']) + blog_nav, post_meta = nav.build_blog_nav(lang, args) + raw_config = dict( + site_name=site_name, + site_url=f'{website_url}/blog/{lang}/', + docs_dir=os.path.join(args.blog_dir, lang), + site_dir=site_dir, + strict=True, + theme=theme_cfg, + nav=blog_nav, + copyright='©2016–2020 Yandex LLC', + use_directory_urls=True, + repo_name='ClickHouse/ClickHouse', + repo_url='https://github.com/ClickHouse/ClickHouse/', + edit_uri=f'edit/master/website/blog/{lang}', + markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, + plugins=plugins, + extra=dict( + now=datetime.datetime.now().isoformat(), + rev=args.rev, + rev_short=args.rev_short, + rev_url=args.rev_url, + website_url=website_url, + events=args.events, + languages=languages, + includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), + is_amp=False, + is_blog=True, + post_meta=post_meta + ) + ) + + cfg = config.load_config(**raw_config) + mkdocs.commands.build.build(cfg) + + redirects.build_blog_redirects(args) + + # TODO: AMP for blog + # if not args.skip_amp: + # amp.build_amp(lang, args, cfg) + + logging.info(f'Finished building {lang} blog') + + except exceptions.ConfigurationError as e: + raise SystemExit('\n' + str(e)) + + +def build_blog(args): + tasks = [] + for lang in args.blog_lang.split(','): + if lang: + tasks.append((lang, args,)) + util.run_function_in_parallel(build_for_lang, tasks, threads=False) diff --git a/docs/tools/build.py b/docs/tools/build.py index 95e887f046f..1c8165fb36f 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -20,8 +20,8 @@ from mkdocs import exceptions import mkdocs.commands.build import amp +import blog import mdx_clickhouse - import redirects import single_page import test @@ -95,25 +95,6 @@ def build_for_lang(lang, args): else: site_dir = os.path.join(args.docs_output_dir, lang) - markdown_extensions = [ - 'mdx_clickhouse', - 'admonition', - 'attr_list', - 'codehilite', - 'nl2br', - 'sane_lists', - 'pymdownx.details', - 'pymdownx.magiclink', - 'pymdownx.superfences', - 'extra', - { - 'toc': { - 'permalink': True, - 'slugify': mdx_clickhouse.slugify - } - } - ] - plugins = ['macros'] if args.htmlproofer: plugins.append('htmlproofer') @@ -133,7 +114,7 @@ def build_for_lang(lang, args): repo_name='ClickHouse/ClickHouse', repo_url='https://github.com/ClickHouse/ClickHouse/', edit_uri=f'edit/master/docs/{lang}', - markdown_extensions=markdown_extensions, + markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, plugins=plugins, extra=dict( now=datetime.datetime.now().isoformat(), @@ -147,14 +128,15 @@ def build_for_lang(lang, args): events=args.events, languages=languages, includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), - is_amp=False + is_amp=False, + is_blog=False ) ) if os.path.exists(config_path): raw_config['config_file'] = config_path else: - raw_config['nav'] = nav.build_nav(lang, args) + raw_config['nav'] = nav.build_docs_nav(lang, args) cfg = config.load_config(**raw_config) @@ -187,7 +169,7 @@ def build_docs(args): if lang: tasks.append((lang, args,)) util.run_function_in_parallel(build_for_lang, tasks, threads=False) - redirects.build_redirects(args) + redirects.build_docs_redirects(args) def build(args): @@ -204,6 +186,9 @@ def build(args): from github import build_releases build_releases(args, build_docs) + if not args.skip_blog: + blog.build_blog(args) + if not args.skip_website: website.process_benchmark_results(args) website.minify_website(args) @@ -215,12 +200,14 @@ if __name__ == '__main__': website_dir = os.path.join('..', 'website') arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja,tr,fa') + arg_parser.add_argument('--blog-lang', default='en,ru') arg_parser.add_argument('--docs-dir', default='.') arg_parser.add_argument('--theme-dir', default=website_dir) arg_parser.add_argument('--website-dir', default=website_dir) + arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog')) arg_parser.add_argument('--output-dir', default='build') arg_parser.add_argument('--enable-stable-releases', action='store_true') - arg_parser.add_argument('--stable-releases-limit', type=int, default='4') + arg_parser.add_argument('--stable-releases-limit', type=int, default='3') arg_parser.add_argument('--lts-releases-limit', type=int, default='2') arg_parser.add_argument('--nav-limit', type=int, default='0') arg_parser.add_argument('--version-prefix', type=str, default='') @@ -230,6 +217,7 @@ if __name__ == '__main__': arg_parser.add_argument('--skip-amp', action='store_true') arg_parser.add_argument('--skip-pdf', action='store_true') arg_parser.add_argument('--skip-website', action='store_true') + arg_parser.add_argument('--skip-blog', action='store_true') arg_parser.add_argument('--skip-git-log', action='store_true') arg_parser.add_argument('--test-only', action='store_true') arg_parser.add_argument('--minify', action='store_true') @@ -249,6 +237,7 @@ if __name__ == '__main__': logging.getLogger('MARKDOWN').setLevel(logging.INFO) args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs') + args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog') from github import choose_latest_releases, get_events args.stable_releases = choose_latest_releases(args) if args.enable_stable_releases else [] @@ -259,6 +248,7 @@ if __name__ == '__main__': if args.test_only: args.skip_multi_page = True + args.skip_blog = True args.skip_website = True args.skip_pdf = True args.skip_amp = True diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index 393658be2d7..5ea93002cd2 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -18,6 +18,30 @@ import amp import website +def slugify(value, separator): + return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) + + +MARKDOWN_EXTENSIONS = [ + 'mdx_clickhouse', + 'admonition', + 'attr_list', + 'codehilite', + 'nl2br', + 'sane_lists', + 'pymdownx.details', + 'pymdownx.magiclink', + 'pymdownx.superfences', + 'extra', + { + 'toc': { + 'permalink': True, + 'slugify': slugify + } + } +] + + class ClickHouseLinkMixin(object): def handleMatch(self, m, data): @@ -72,10 +96,6 @@ def makeExtension(**kwargs): return ClickHouseMarkdown(**kwargs) -def slugify(value, separator): - return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) - - def get_translations(dirname, lang): import babel.support return babel.support.Translations.load( diff --git a/docs/tools/nav.py b/docs/tools/nav.py index 3c4fd304bd3..0e90bae6b81 100644 --- a/docs/tools/nav.py +++ b/docs/tools/nav.py @@ -1,4 +1,6 @@ import collections +import datetime +import hashlib import logging import os @@ -19,7 +21,8 @@ def build_nav_entry(root, args): return None, None, None result_items = [] index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md')) - current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title', find_first_header(index_content))) + current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title')) + current_title = current_title or index_meta.get('title', find_first_header(index_content)) for filename in os.listdir(root): path = os.path.join(root, filename) if os.path.isdir(path): @@ -37,17 +40,21 @@ def build_nav_entry(root, args): title = meta.get('toc_folder_title', 'hidden') prio = meta.get('toc_priority', 9999) logging.debug(f'Nav entry: {prio}, {title}, {path}') - if not content.strip(): + if meta.get('toc_hidden') or not content.strip(): title = 'hidden' + if title == 'hidden': + title = 'hidden-' + hashlib.sha1(content.encode('utf-8')).hexdigest() if args.nav_limit and len(result_items) >= args.nav_limit: break result_items.append((prio, title, path)) result_items = sorted(result_items, key=lambda x: (x[0], x[1])) result = collections.OrderedDict([(item[1], item[2]) for item in result_items]) + if index_meta.get('toc_hidden_folder'): + current_title += '|hidden-folder' return index_meta.get('toc_priority', 10000), current_title, result -def build_nav(lang, args): +def build_docs_nav(lang, args): docs_dir = os.path.join(args.docs_dir, lang) _, _, nav = build_nav_entry(docs_dir, args) result = [] @@ -64,10 +71,50 @@ def build_nav(lang, args): key = list(result[0].keys())[0] result[0][key][index_key] = 'index.md' result[0][key].move_to_end(index_key, last=False) - print('result', result) return result +def build_blog_nav(lang, args): + blog_dir = os.path.join(args.blog_dir, lang) + years = sorted(os.listdir(blog_dir), reverse=True) + result_nav = [{'hidden': 'index.md'}] + post_meta = collections.OrderedDict() + for year in years: + year_dir = os.path.join(blog_dir, year) + if not os.path.isdir(year_dir): + continue + result_nav.append({year: collections.OrderedDict()}) + posts = [] + post_meta_items = [] + for post in os.listdir(year_dir): + meta, _ = util.read_md_file(os.path.join(year_dir, post)) + post_date = meta['date'] + post_title = meta['title'] + if datetime.date.fromisoformat(post_date) > datetime.date.today(): + continue + posts.append( + (post_date, post_title, os.path.join(year, post),) + ) + if post_title in post_meta: + raise RuntimeError(f'Duplicate post title: {post_title}') + if not post_date.startswith(f'{year}-'): + raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}') + post_url_part = post.replace('.md', '') + post_meta_items.append((post_date, { + 'date': post_date, + 'title': post_title, + 'image': meta.get('image'), + 'url': f'/blog/{lang}/{year}/{post_url_part}/' + },)) + for _, title, path in sorted(posts, reverse=True): + result_nav[-1][year][title] = path + for _, post_meta_item in sorted(post_meta_items, + reverse=True, + key=lambda item: item[0]): + post_meta[post_meta_item['title']] = post_meta_item + return result_nav, post_meta + + def _custom_get_navigation(files, config): nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages()) items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config) diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py index fc4d60aaf5a..2f5ebc8a620 100644 --- a/docs/tools/redirects.py +++ b/docs/tools/redirects.py @@ -25,24 +25,34 @@ def write_redirect_html(out_path, to_url): ''') -def build_redirect_html(args, from_path, to_path): - for lang in args.lang.split(','): - out_path = os.path.join( - args.docs_output_dir, lang, - from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') - ) - version_prefix = f'/{args.version_prefix}/' if args.version_prefix else '/' - target_path = to_path.replace('/index.md', '/').replace('.md', '/') - to_url = f'/docs{version_prefix}{lang}/{target_path}' - to_url = to_url.strip() - write_redirect_html(out_path, to_url) +def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path): + out_path = os.path.join( + output_dir, lang, + from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') + ) + version_prefix = f'/{args.version_prefix}/' if args.version_prefix else '/' + target_path = to_path.replace('/index.md', '/').replace('.md', '/') + to_url = f'/{base_prefix}{version_prefix}{lang}/{target_path}' + to_url = to_url.strip() + write_redirect_html(out_path, to_url) -def build_redirects(args): +def build_docs_redirects(args): with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f: for line in f: - from_path, to_path = line.split(' ', 1) - build_redirect_html(args, from_path, to_path) + for lang in args.lang.split(','): + from_path, to_path = line.split(' ', 1) + build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path) + + +def build_blog_redirects(args): + for lang in args.blog_lang.split(','): + redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt') + if os.path.exists(redirects_path): + with open(redirects_path, 'r') as f: + for line in f: + from_path, to_path = line.split(' ', 1) + build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path) def build_static_redirects(args): diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 570dcf0aaf2..d46c9891e89 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -28,7 +28,7 @@ pymdown-extensions==7.1 python-slugify==1.2.6 PyYAML==5.3.1 repackage==0.7.3 -requests==2.23.0 +requests==2.24.0 singledispatch==3.4.0.3 six==1.15.0 soupsieve==2.0.1 diff --git a/docs/tools/test.py b/docs/tools/test.py index 63b84885d9f..5c0cf4b799d 100755 --- a/docs/tools/test.py +++ b/docs/tools/test.py @@ -92,9 +92,11 @@ def test_single_page(input_path, lang): logging.warning('Found %d duplicate anchor points' % duplicate_anchor_points) if links_to_nowhere: - logging.warning(f'Found {links_to_nowhere} links to nowhere in {lang}') if lang == 'en': # TODO: check all languages again + logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}') sys.exit(1) + else: + logging.warning(f'Found {links_to_nowhere} links to nowhere in {lang}') if len(anchor_points) <= 10: logging.error('Html parsing is probably broken') diff --git a/docs/tools/translate/filter.py b/docs/tools/translate/filter.py index fe421381700..1d927ca6341 100755 --- a/docs/tools/translate/filter.py +++ b/docs/tools/translate/filter.py @@ -160,6 +160,15 @@ def translate_filter(key, value, _format, _): attempts = 10 if '#' in href: href, anchor = href.split('#', 1) + if href.endswith('.md') and not href.startswith('/'): + parts = [part for part in os.environ['INPUT'].split('/') if len(part) == 2] + lang = parts[-1] + script_path = os.path.dirname(__file__) + base_path = os.path.abspath(f'{script_path}/../../{lang}') + href = os.path.join( + os.path.relpath(base_path, os.path.dirname(os.environ['INPUT'])), + os.path.relpath(href, base_path) + ) if anchor: href = f'{href}#{anchor}' value[2][0] = href @@ -178,6 +187,7 @@ def translate_filter(key, value, _format, _): if __name__ == "__main__": + os.environ['INPUT'] = os.path.abspath(os.environ['INPUT']) pwd = os.path.dirname(filename or '.') if pwd: with util.cd(pwd): diff --git a/docs/tools/translate/requirements.txt b/docs/tools/translate/requirements.txt index 0c9d44a346e..98892cf4afd 100644 --- a/docs/tools/translate/requirements.txt +++ b/docs/tools/translate/requirements.txt @@ -1,12 +1,12 @@ Babel==2.8.0 certifi==2020.4.5.2 chardet==3.0.4 -googletrans==2.4.0 +googletrans==3.0.0 idna==2.9 Jinja2==2.11.2 pandocfilters==1.4.2 python-slugify==4.0.0 PyYAML==5.3.1 -requests==2.23.0 +requests==2.24.0 text-unidecode==1.3 urllib3==1.25.9 diff --git a/docs/tools/website.py b/docs/tools/website.py index ed950bd06e3..bd1120e2d80 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -17,20 +17,56 @@ import jsmin import mdx_clickhouse +def handle_iframe(iframe, soup): + if not iframe.attrs['src'].startswith('https://www.youtube.com/'): + raise RuntimeError('iframes are allowed only for YouTube') + wrapper = soup.new_tag('div') + wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9'] + iframe.insert_before(wrapper) + iframe.extract() + wrapper.insert(0, iframe) + if 'width' in iframe.attrs: + del iframe.attrs['width'] + if 'height' in iframe.attrs: + del iframe.attrs['height'] + iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture' + iframe.attrs['class'] = 'embed-responsive-item' + iframe.attrs['frameborder'] = '0' + iframe.attrs['allowfullscreen'] = '1' + + def adjust_markdown_html(content): soup = bs4.BeautifulSoup( content, features='html.parser' ) + for a in soup.find_all('a'): a_class = a.attrs.get('class') if a_class and 'headerlink' in a_class: a.string = '\xa0' + + for iframe in soup.find_all('iframe'): + handle_iframe(iframe, soup) + + for img in soup.find_all('img'): + if img.attrs.get('alt') == 'iframe': + img.name = 'iframe' + img.string = '' + handle_iframe(img, soup) + continue + img_class = img.attrs.get('class') + if img_class: + img.attrs['class'] = img_class + ['img-fluid'] + else: + img.attrs['class'] = 'img-fluid' + for details in soup.find_all('details'): for summary in details.find_all('summary'): if summary.parent != details: summary.extract() details.insert(0, summary) + for div in soup.find_all('div'): div_class = div.attrs.get('class') is_admonition = div_class and 'admonition' in div.attrs.get('class') @@ -41,10 +77,12 @@ def adjust_markdown_html(content): a.attrs['class'] = a_class + ['alert-link'] else: a.attrs['class'] = 'alert-link' + for p in div.find_all('p'): p_class = p.attrs.get('class') if is_admonition and p_class and ('admonition-title' in p_class): p.attrs['class'] = p_class + ['alert-heading', 'display-6', 'mb-2'] + if is_admonition: div.attrs['role'] = 'alert' if ('info' in div_class) or ('note' in div_class): @@ -107,10 +145,13 @@ def build_website(args): 'public', 'node_modules', 'templates', - 'feathericons', 'locale' ) ) + shutil.copy2( + os.path.join(args.website_dir, 'js', 'embedd.min.js'), + os.path.join(args.output_dir, 'js', 'embedd.min.js') + ) for root, _, filenames in os.walk(args.output_dir): for filename in filenames: @@ -136,6 +177,7 @@ def get_css_in(args): f"'{args.website_dir}/css/bootstrap.css'", f"'{args.website_dir}/css/docsearch.css'", f"'{args.website_dir}/css/base.css'", + f"'{args.website_dir}/css/blog.css'", f"'{args.website_dir}/css/docs.css'", f"'{args.website_dir}/css/highlight.css'" ] @@ -236,6 +278,10 @@ def minify_website(args): def process_benchmark_results(args): benchmark_root = os.path.join(args.website_dir, 'benchmark') + required_keys = { + 'dbms': ['result'], + 'hardware': ['result', 'system', 'system_full', 'kind'] + } for benchmark_kind in ['dbms', 'hardware']: results = [] results_root = os.path.join(benchmark_root, benchmark_kind, 'results') @@ -243,7 +289,11 @@ def process_benchmark_results(args): result_file = os.path.join(results_root, result) logging.debug(f'Reading benchmark result from {result_file}') with open(result_file, 'r') as f: - results += json.loads(f.read()) + result = json.loads(f.read()) + for item in result: + for required_key in required_keys[benchmark_kind]: + assert required_key in item, f'No "{required_key}" in {result_file}' + results += result results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js') with open(results_js, 'w') as f: data = json.dumps(results) diff --git a/docs/tr/interfaces/third-party/integrations.md b/docs/tr/interfaces/third-party/integrations.md index 8a1d5c239f6..d28046272b7 100644 --- a/docs/tr/interfaces/third-party/integrations.md +++ b/docs/tr/interfaces/third-party/integrations.md @@ -14,6 +14,7 @@ toc_title: Entegrasyonlar - İlişkisel veritabanı yönetim sistemleri - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-çoğaltıcı](https://github.com/larsnovikov/horgh-replicator) @@ -99,5 +100,11 @@ toc_title: Entegrasyonlar - İksir - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on Rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [Orijinal makale](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) diff --git a/docs/tr/introduction/adopters.md b/docs/tr/introduction/adopters.md index 444902e0b96..1da65ebb903 100644 --- a/docs/tr/introduction/adopters.md +++ b/docs/tr/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: Benimseyenler | Exness | Ticaret | Metrikler, Günlük Kaydı | — | — | [Rusça konuşun, Mayıs 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | Geniee | Reklam Ağı | Ana ürün | — | — | [Japonca Blog yazısı, Temmuz 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | Video Akışı | Analiz | — | — | [Çince slaytlar, Ekim 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| Idealista | Emlak | Analiz | — | — | [İngilizce Blog yazısı, Nisan 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Idealista | Emlak | Analiz | — | — | [İngilizce Blog yazısı, Nisan 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | Infovista | Ağlar | Analiz | — | — | [İngilizce slaytlar, Ekim 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | Innogames | Oyun | Metrikler, Günlük Kaydı | — | — | [Rusça slaytlar, Eylül 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | Integros | Video hizmetleri platformu | Analiz | — | — | [Rusça slaytlar, Mayıs 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/tr/sql-reference/data-types/domains/ipv4.md b/docs/tr/sql-reference/data-types/domains/ipv4.md index 22ca6e7240c..4caf031c0c3 100644 --- a/docs/tr/sql-reference/data-types/domains/ipv4.md +++ b/docs/tr/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` etki alanı IPv4 dizeleri olarak özel giriş biçimini destekler: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/tr/sql-reference/data-types/domains/ipv6.md b/docs/tr/sql-reference/data-types/domains/ipv6.md index 642fe397e52..7f721cc07f6 100644 --- a/docs/tr/sql-reference/data-types/domains/ipv6.md +++ b/docs/tr/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` etki alanı IPv6 dizeleri olarak özel girişi destekler: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/tr/sql-reference/data-types/simpleaggregatefunction.md b/docs/tr/sql-reference/data-types/simpleaggregatefunction.md deleted file mode 120000 index 76a7ef3b802..00000000000 --- a/docs/tr/sql-reference/data-types/simpleaggregatefunction.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/data-types/simpleaggregatefunction.md \ No newline at end of file diff --git a/docs/tr/sql-reference/data-types/simpleaggregatefunction.md b/docs/tr/sql-reference/data-types/simpleaggregatefunction.md new file mode 100644 index 00000000000..8ecaf174688 --- /dev/null +++ b/docs/tr/sql-reference/data-types/simpleaggregatefunction.md @@ -0,0 +1,40 @@ +--- +machine_translated: true +machine_translated_rev: 71d72c1f237f4a553fe91ba6c6c633e81a49e35b +--- + +# SimpleAggregateFunction {#data-type-simpleaggregatefunction} + +`SimpleAggregateFunction(name, types_of_arguments…)` veri türü, toplama işlevinin geçerli değerini depolar ve tam durumunu şu şekilde depolamaz [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) yapar. Bu optimizasyon, aşağıdaki özelliğin bulunduğu işlevlere uygulanabilir: bir işlev uygulama sonucu `f` bir satır kümesi için `S1 UNION ALL S2` uygulayarak elde edilebilir `f` satır parçalarına ayrı ayrı ayarlayın ve sonra tekrar uygulayın `f` sonuçlara: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Bu özellik, kısmi toplama sonuçlarının Birleşik olanı hesaplamak için yeterli olduğunu garanti eder, bu nedenle herhangi bir ek veri depolamak ve işlemek zorunda kalmayız. + +Aşağıdaki toplama işlevleri desteklenir: + +- [`any`](../../sql-reference/aggregate-functions/reference.md#agg_function-any) +- [`anyLast`](../../sql-reference/aggregate-functions/reference.md#anylastx) +- [`min`](../../sql-reference/aggregate-functions/reference.md#agg_function-min) +- [`max`](../../sql-reference/aggregate-functions/reference.md#agg_function-max) +- [`sum`](../../sql-reference/aggregate-functions/reference.md#agg_function-sum) +- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference.md#groupbitand) +- [`groupBitOr`](../../sql-reference/aggregate-functions/reference.md#groupbitor) +- [`groupBitXor`](../../sql-reference/aggregate-functions/reference.md#groupbitxor) +- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference.md#agg_function-grouparray) +- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference.md#groupuniqarrayx-groupuniqarraymax-sizex) + +Değerleri `SimpleAggregateFunction(func, Type)` bak ve aynı şekilde saklanır `Type`, bu yüzden fonksiyonları ile uygulamak gerekmez `-Merge`/`-State` sonekler. `SimpleAggregateFunction` daha iyi performans vardır `AggregateFunction` aynı toplama fonksiyonu ile. + +**Parametre** + +- Toplama işlevinin adı. +- Toplama işlevi bağımsız değişkenleri türleri. + +**Örnek** + +``` sql +CREATE TABLE t +( + column1 SimpleAggregateFunction(sum, UInt64), + column2 SimpleAggregateFunction(any, String) +) ENGINE = ... +``` + +[Orijinal makale](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) diff --git a/docs/tr/sql-reference/functions/array-functions.md b/docs/tr/sql-reference/functions/array-functions.md index 9ecb255ebbe..9638481db52 100644 --- a/docs/tr/sql-reference/functions/array-functions.md +++ b/docs/tr/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **Parametre** -- `array` – [Dizi](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Dizi](https://clickhouse.tech/docs/en/data_types/array/). **Döndürülen değerler** Bitişik öğeler arasındaki farklar dizisini döndürür. -Tür: [Uİnt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Tamsayı\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [Yüzdürmek\*](https://clickhouse.yandex/docs/en/data_types/float/). +Tür: [Uİnt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Tamsayı\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [Yüzdürmek\*](https://clickhouse.tech/docs/en/data_types/float/). **Örnek** @@ -754,7 +754,7 @@ arrayDistinct(array) **Parametre** -- `array` – [Dizi](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Dizi](https://clickhouse.tech/docs/en/data_types/array/). **Döndürülen değerler** diff --git a/docs/tr/whats-new/changelog/2017.md b/docs/tr/whats-new/changelog/2017.md index 98643fe449a..1011ebadb84 100644 --- a/docs/tr/whats-new/changelog/2017.md +++ b/docs/tr/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ Bu sürüm önceki sürüm 1.1.54310 için hata düzeltmeleri içerir: #### Yenilik: {#new-features} - Tablo motorları MergeTree ailesi için özel bölümleme anahtarı. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) masa motoru. +- [Kafka](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) masa motoru. - Yükleme için destek eklendi [CatBoost](https://catboost.yandex/) modelleri ve ClickHouse saklanan verilere uygulayarak. - UTC olmayan tamsayı uzaklıklar ile saat dilimleri için destek eklendi. - Zaman aralıklarıyla aritmetik işlemler için destek eklendi. diff --git a/docs/zh/commercial/support.md b/docs/zh/commercial/support.md deleted file mode 120000 index 1eb20ccf36a..00000000000 --- a/docs/zh/commercial/support.md +++ /dev/null @@ -1 +0,0 @@ -../../en/commercial/support.md \ No newline at end of file diff --git a/docs/zh/commercial/support.md b/docs/zh/commercial/support.md new file mode 100644 index 00000000000..e8462fc962e --- /dev/null +++ b/docs/zh/commercial/support.md @@ -0,0 +1,23 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_priority: 3 +toc_title: "\u788C\u83BD\u7984Support:" +--- + +# ClickHouse商业支持服务提供商 {#clickhouse-commercial-support-service-providers} + +!!! info "信息" + 如果您已经推出ClickHouse商业支持服务,请随时 [打开拉取请求](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。 + +## 敏锐性 {#altinity} + +隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄and陇.貌路.隆拢脳枚脢虏 隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄.陇 访问 [www.altinity.com](https://www.altinity.com/) 欲了解更多信息. + +## Mafiree {#mafiree} + +[服务说明](http://mafiree.com/clickhouse-analytics-services.php) + +## MinervaDB {#minervadb} + +[服务说明](https://minervadb.com/index.php/clickhouse-consulting-and-support-by-minervadb/) diff --git a/docs/zh/engines/database-engines/lazy.md b/docs/zh/engines/database-engines/lazy.md index c0a08e37559..bca7eaeda6c 100644 --- a/docs/zh/engines/database-engines/lazy.md +++ b/docs/zh/engines/database-engines/lazy.md @@ -1,15 +1,13 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 31 -toc_title: "\u61D2\u60F0" +toc_title: "延时引擎" --- -# 懒惰 {#lazy} +# 延时引擎Lazy {#lazy} -仅将表保留在RAM中 `expiration_time_in_seconds` 上次访问后几秒钟。 只能与\*日志表一起使用。 +在距最近一次访问间隔`expiration_time_in_seconds`时间段内,将表保存在内存中,仅适用于 \*Log引擎表 -它针对存储许多小\*日志表进行了优化,访问之间存在较长的时间间隔。 +由于针对这类表的访问间隔较长,对保存大量小的 \*Log引擎表进行了优化, ## 创建数据库 {#creating-a-database} diff --git a/docs/zh/engines/table-engines/integrations/jdbc.md b/docs/zh/engines/table-engines/integrations/jdbc.md index 774afcc56bc..179d78e5a3b 100644 --- a/docs/zh/engines/table-engines/integrations/jdbc.md +++ b/docs/zh/engines/table-engines/integrations/jdbc.md @@ -1,19 +1,19 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 34 -toc_title: JDBC +toc_title: JDBC表引擎 --- # JDBC {#table-engine-jdbc} -允许ClickHouse通过以下方式连接到外部数据库 [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity). +允许CH通过 [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity) 连接到外部数据库。 -要实现JDBC连接,ClickHouse使用单独的程序 [ツ暗ェツ氾环催ツ団ツ法ツ人](https://github.com/alex-krash/clickhouse-jdbc-bridge) 这应该作为守护进程运行。 -该引擎支持 [可为空](../../../sql-reference/data-types/nullable.md) 数据类型。 +要实现JDBC连接,CH需要使用以后台进程运行的程序 [clickhouse-jdbc-bridge](https://github.com/alex-krash/clickhouse-jdbc-bridge)。 -## 创建表 {#creating-a-table} +该引擎支持 [Nullable](../../../sql-reference/data-types/nullable.md) 数据类型。 + + +## 建表 {#creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name @@ -23,20 +23,22 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = JDBC(dbms_uri, external_database, external_table) ``` -**发动机参数** +**引擎参数** -- `dbms_uri` — URI of an external DBMS. +- `dbms_uri` — 外部DBMS的uri. 格式: `jdbc:://:/?user=&password=`. - Mysql的示例: `jdbc:mysql://localhost:3306/?user=root&password=root`. + MySQL示例: `jdbc:mysql://localhost:3306/?user=root&password=root`. -- `external_database` — Database in an external DBMS. +- `external_database` — 外部DBMS的数据库名. -- `external_table` — Name of the table in `external_database`. +- `external_table` — `external_database`中的外部表名. ## 用法示例 {#usage-example} -通过直接与它的控制台客户端连接在MySQL服务器中创建一个表: +通过mysql控制台客户端来创建表 + +Creating a table in MySQL server by connecting directly with it’s console client: ``` text mysql> CREATE TABLE `test`.`test` ( @@ -59,7 +61,7 @@ mysql> select * from test; 1 row in set (0,00 sec) ``` -在ClickHouse服务器中创建表并从中选择数据: +在CH服务端创建表,并从中查询数据: ``` sql CREATE TABLE jdbc_table @@ -83,8 +85,8 @@ FROM jdbc_table └────────┴──────────────┴───────┴────────────────┘ ``` -## 另请参阅 {#see-also} +## 参见 {#see-also} - [JDBC表函数](../../../sql-reference/table-functions/jdbc.md). -[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/jdbc/) +[原始文档](https://clickhouse.tech/docs/en/operations/table_engines/jdbc/) diff --git a/docs/zh/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/zh/engines/table-engines/mergetree-family/graphitemergetree.md index 1ca762e8537..9af39bcf964 100644 --- a/docs/zh/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/graphitemergetree.md @@ -1,17 +1,16 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 38 toc_title: GraphiteMergeTree --- # GraphiteMergeTree {#graphitemergetree} -此引擎专为细化和聚合/平均(rollup) [石墨](http://graphite.readthedocs.io/en/latest/index.html) 戴达 对于想要使用ClickHouse作为Graphite的数据存储的开发人员来说,这可能会有所帮助。 +该引擎用来对 [Graphite](http://graphite.readthedocs.io/en/latest/index.html)数据进行瘦身及汇总。对于想使用CH来存储Graphite数据的开发者来说可能有用。 -您可以使用任何ClickHouse表引擎来存储石墨数据,如果你不需要汇总,但如果你需要一个汇总使用 `GraphiteMergeTree`. 该引擎减少了存储量,并提高了Graphite查询的效率。 -引擎继承从属性 [MergeTree](mergetree.md). +如果不需要对Graphite数据做汇总,那么可以使用任意的CH表引擎;但若需要,那就采用 `GraphiteMergeTree` 引擎。它能减少存储空间,同时能提高Graphite数据的查询效率。 + +该引擎继承自 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md). ## 创建表 {#creating-table} @@ -30,36 +29,32 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -请参阅的详细说明 [CREATE TABLE](../../../sql-reference/statements/create.md#create-table-query) 查询。 +建表语句的详细说明请参见 [创建表](../../../sql-reference/statements/create.md#create-table-query) -Graphite数据的表应具有以下数据的列: +含有Graphite数据集的表应该包含以下的数据列: +- 指标名称(Graphite sensor),数据类型:`String` +- 指标的时间度量,数据类型: `DateTime` +- 指标的值,数据类型:任意数值类型 +- 指标的版本号,数据类型: 任意数值类型 -- 公制名称(石墨传感器)。 数据类型: `String`. + CH以最大的版本号保存行记录,若版本号相同,保留最后写入的数据。 -- 测量度量的时间。 数据类型: `DateTime`. +以上列必须设置在汇总参数配置中。 -- 度量值。 数据类型:任何数字。 -- 指标的版本。 数据类型:任何数字。 +**GraphiteMergeTree 参数** +- `config_section` - 配置文件中标识汇总规则的节点名称 - 如果版本相同,ClickHouse会保存版本最高或最后写入的行。 其他行在数据部分合并期间被删除。 +**建表语句** -应在汇总配置中设置这些列的名称。 - -**GraphiteMergeTree参数** - -- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. - -**查询子句** - -当创建一个 `GraphiteMergeTree` 表,相同 [条款](mergetree.md#table_engine-mergetree-creating-a-table) 是必需的,因为当创建 `MergeTree` 桌子 +在创建 `GraphiteMergeTree` 表时,需要采用和 [clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) 相同的语句,就像创建 `MergeTree` 一样。
-不推荐使用的创建表的方法 +已废弃的建表语句 -!!! attention "注意" - 不要在新项目中使用此方法,如果可能的话,请将旧项目切换到上述方法。 +!!! 注意 "Attention" + 请不要在新项目中使用;如有可能,请将旧的项目按上述的方法进行替换。 ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -73,31 +68,30 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ) ENGINE [=] GraphiteMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, config_section) ``` -所有参数除外 `config_section` 具有相同的含义 `MergeTree`. +除了`config_section`,其它所有参数和`MergeTree`的相应参数一样. -- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. +- `config_section` —配置文件中设置汇总规则的节点
-## 汇总配置 {#rollup-configuration} +## 汇总配置的参数 {#rollup-configuration} +汇总的配置参数由服务器配置的 [graphite\_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 参数定义。参数名称可以是任意的。允许为多个不同表创建多组配置并使用。 -汇总的设置由 [graphite\_rollup](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 服务器配置中的参数。 参数的名称可以是any。 您可以创建多个配置并将它们用于不同的表。 -汇总配置结构: +汇总配置的结构如下: + 所需的列 + 模式Patterns - required-columns - patterns -### 必填列 {#required-columns} +### 所需的列 {#required-columns} +- `path_column_name` — 保存指标名称的列名 (Graphite sensor). 默认值: `Path`. +- `time_column_name` — 保存指标时间度量的列名. Default value: `Time`. +- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`.默认值: `Value`. +- `version_column_name` - 保存指标的版本号列. 默认值: `Timestamp`. -- `path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`. -- `time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`. -- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. 默认值: `Value`. -- `version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`. -### 模式 {#patterns} - -的结构 `patterns` 科: +### 模式Patterns {#patterns} +`patterns` 的结构: ``` text pattern @@ -120,21 +114,20 @@ default ... ``` -!!! warning "注意" - 模式必须严格排序: +!!! 注意 "Attention" + 模式必须严格按顺序配置: + 1. 不含`function` or `retention`的Patterns + 1. 同时含有`function` and `retention`的Patterns + 1. `default`的Patterns. - 1. Patterns without `function` or `retention`. - 1. Patterns with both `function` and `retention`. - 1. Pattern `default`. +CH在处理行记录时,会检查 `pattern`节点的规则。每个 `pattern`(含`default`)节点可以包含 `function` 用于聚合操作,或`retention`参数,或者两者都有。如果指标名称和 `regexp`相匹配,相应 `pattern`的规则会生效;否则,使用 `default` 节点的规则。 -在处理行时,ClickHouse会检查以下内容中的规则 `pattern` 部分。 每个 `pattern` (包括 `default`)部分可以包含 `function` 聚合参数, `retention` 参数或两者兼而有之。 如果指标名称匹配 `regexp`,从规则 `pattern` 部分(sections节)的应用;否则,从规则 `default` 部分被使用。 +`pattern` 和 `default` 节点的字段设置: -字段为 `pattern` 和 `default` 科: - -- `regexp`– A pattern for the metric name. -- `age` – The minimum age of the data in seconds. -- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day). -- `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. +- `regexp`– 指标名的pattern. +- `age` – 数据的最小存活时间(按秒算). +- `precision`– 按秒来衡量数据存活时间时的精确程度. 必须能被86400整除 (一天的秒数). +- `function` – 对于存活时间在 `[age, age + precision]`之内的数据,需要使用的聚合函数 ### 配置示例 {#configuration-example} @@ -171,4 +164,4 @@ default ``` -[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) +[原始文档](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) diff --git a/docs/zh/engines/table-engines/mergetree-family/index.md b/docs/zh/engines/table-engines/mergetree-family/index.md index 746d9f03281..c24dd02bb72 100644 --- a/docs/zh/engines/table-engines/mergetree-family/index.md +++ b/docs/zh/engines/table-engines/mergetree-family/index.md @@ -1,7 +1,5 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: "\u6885\u6811\u5BB6\u65CF" +toc_folder_title: "合并树家族" toc_priority: 28 --- diff --git a/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md index 8cf1ab8af57..626597eeaf0 100644 --- a/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md @@ -1,4 +1,4 @@ -# 更换麦树 {#replacingmergetree} +# 替换合并树 {#replacingmergetree} 该引擎和[MergeTree](mergetree.md)的不同之处在于它会删除具有相同主键的重复项。 @@ -23,7 +23,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 请求参数的描述,参考[请求参数](../../../engines/table-engines/mergetree-family/replacingmergetree.md)。 -**替换树参数** +**参数** - `ver` — 版本列。类型为 `UInt*`, `Date` 或 `DateTime`。可选参数。 diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 19caae5e1a1..257bc2ad203 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -1,6 +1,4 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 37 toc_title: "\u7248\u672C\u96C6\u5408\u5728\u65B0\u6811" --- @@ -33,23 +31,23 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 有关查询参数的说明,请参阅 [查询说明](../../../sql-reference/statements/create.md). -**发动机参数** +**引擎参数** ``` sql VersionedCollapsingMergeTree(sign, version) ``` -- `sign` — Name of the column with the type of row: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 +- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 列数据类型应为 `Int8`. -- `version` — Name of the column with the version of the object state. +- `version` — 指定对象状态版本的列名。 列数据类型应为 `UInt*`. -**查询子句** +**查询 Clauses** -当创建一个 `VersionedCollapsingMergeTree` 表,相同 [条款](mergetree.md) 需要创建一个时 `MergeTree` 桌子 +当创建一个 `VersionedCollapsingMergeTree` 表时,跟创建一个 `MergeTree`表的时候需要相同 [Clause](mergetree.md)
@@ -69,17 +67,17 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 所有的参数,除了 `sign` 和 `version` 具有相同的含义 `MergeTree`. -- `sign` — Name of the column with the type of row: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 +- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 Column Data Type — `Int8`. -- `version` — Name of the column with the version of the object state. +- `version` — 指定对象状态版本的列名。 列数据类型应为 `UInt*`.
-## 崩溃 {#table_engines_versionedcollapsingmergetree} +## 折叠 {#table_engines_versionedcollapsingmergetree} ### 数据 {#data} @@ -125,23 +123,23 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 1. 写入数据的程序应该记住对象的状态以取消它。 该 “cancel” 字符串应该是 “state” 与相反的字符串 `Sign`. 这增加了存储的初始大小,但允许快速写入数据。 2. 列中长时间增长的数组由于写入负载而降低了引擎的效率。 数据越简单,效率就越高。 -3. `SELECT` 结果很大程度上取决于对象变化历史的一致性。 准备插入数据时要准确。 您可以通过不一致的数据获得不可预测的结果,例如会话深度等非负指标的负值。 +3. `SELECT` 结果很大程度上取决于对象变化历史的一致性。 准备插入数据时要准确。 不一致的数据将导致不可预测的结果,例如会话深度等非负指标的负值。 ### 算法 {#table_engines-versionedcollapsingmergetree-algorithm} -当ClickHouse合并数据部分时,它会删除具有相同主键和版本且不同主键和版本的每对行 `Sign`. 行的顺序并不重要。 +当ClickHouse合并数据部分时,它会删除具有相同主键和版本但 `Sign`值不同的一对行. 行的顺序并不重要。 当ClickHouse插入数据时,它会按主键对行进行排序。 如果 `Version` 列不在主键中,ClickHouse将其隐式添加到主键作为最后一个字段并使用它进行排序。 ## 选择数据 {#selecting-data} -ClickHouse不保证具有相同主键的所有行都将位于相同的结果数据部分中,甚至位于相同的物理服务器上。 对于写入数据和随后合并数据部分都是如此。 此外,ClickHouse流程 `SELECT` 具有多个线程的查询,并且无法预测结果中的行顺序。 这意味着聚合是必需的,如果有必要得到完全 “collapsed” 从数据 `VersionedCollapsingMergeTree` 桌子 +ClickHouse不保证具有相同主键的所有行都将位于相同的结果数据部分中,甚至位于相同的物理服务器上。 对于写入数据和随后合并数据部分都是如此。 此外,ClickHouse流程 `SELECT` 具有多个线程的查询,并且无法预测结果中的行顺序。 这意味着,如果有必要从`VersionedCollapsingMergeTree` 表中得到完全 “collapsed” 的数据,聚合是必需的。 要完成折叠,请使用 `GROUP BY` 考虑符号的子句和聚合函数。 例如,要计算数量,请使用 `sum(Sign)` 而不是 `count()`. 要计算的东西的总和,使用 `sum(Sign * x)` 而不是 `sum(x)`,并添加 `HAVING sum(Sign) > 0`. 聚合 `count`, `sum` 和 `avg` 可以这样计算。 聚合 `uniq` 如果对象至少具有一个非折叠状态,则可以计算。 聚合 `min` 和 `max` 无法计算是因为 `VersionedCollapsingMergeTree` 不保存折叠状态值的历史记录。 -如果您需要提取数据 “collapsing” 但是,如果没有聚合(例如,要检查是否存在其最新值与某些条件匹配的行),则可以使用 `FINAL` 修饰符 `FROM` 条款 这种方法效率低下,不应与大型表一起使用。 +如果您需要提取数据 “collapsing” 但是,如果没有聚合(例如,要检查是否存在其最新值与某些条件匹配的行),则可以使用 `FINAL` 修饰 `FROM` 条件这种方法效率低下,不应与大型表一起使用。 ## 使用示例 {#example-of-use} @@ -233,6 +231,6 @@ SELECT * FROM UAct FINAL └─────────────────────┴───────────┴──────────┴──────┴─────────┘ ``` -这是一个非常低效的方式来选择数据。 不要把它用于大桌子。 +这是一个非常低效的方式来选择数据。 不要把它用于数据量大的表。 [原始文章](https://clickhouse.tech/docs/en/operations/table_engines/versionedcollapsingmergetree/) diff --git a/docs/zh/engines/table-engines/special/generate.md b/docs/zh/engines/table-engines/special/generate.md index 41bd6d66918..80966767462 100644 --- a/docs/zh/engines/table-engines/special/generate.md +++ b/docs/zh/engines/table-engines/special/generate.md @@ -1,35 +1,31 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 46 -toc_title: GenerateRandom +toc_title: 随机数生成 --- -# Generaterandom {#table_engines-generate} +# 随机数生成表引擎 {#table_engines-generate} -GenerateRandom表引擎为给定的表架构生成随机数据。 +随机数生成表引擎为指定的表模式生成随机数 使用示例: +- 测试时生成可复写的大表 +- 为复杂测试生成随机输入 -- 在测试中使用填充可重复的大表。 -- 为模糊测试生成随机输入。 - -## 在ClickHouse服务器中的使用 {#usage-in-clickhouse-server} +## CH服务端的用法 {#usage-in-clickhouse-server} ``` sql ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) ``` -该 `max_array_length` 和 `max_string_length` 参数指定所有的最大长度 -数组列和字符串相应地在生成的数据中。 +生成数据时,通过`max_array_length` 设置array列的最大长度, `max_string_length`设置string数据的最大长度 -生成表引擎仅支持 `SELECT` 查询。 +该引擎仅支持 `SELECT` 查询语句. -它支持所有 [数据类型](../../../sql-reference/data-types/index.md) 可以存储在一个表中,除了 `LowCardinality` 和 `AggregateFunction`. +该引擎支持能在表中存储的所有数据类型 [DataTypes](../../../sql-reference/data-types/index.md) ,除了 `LowCardinality` 和 `AggregateFunction`. -**示例:** +## 示例 {#example} -**1.** 设置 `generate_engine_table` 表: +**1.** 设置 `generate_engine_table` 引擎表: ``` sql CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE = GenerateRandom(1, 5, 3) @@ -49,13 +45,13 @@ SELECT * FROM generate_engine_table LIMIT 3 └──────┴────────────┘ ``` -## 实施细节 {#details-of-implementation} +## 实现细节 {#details-of-implementation} -- 不支持: +- 以下特性不支持: - `ALTER` - `SELECT ... SAMPLE` - `INSERT` - - 指数 - - 复制 + - Indices + - Replication -[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/generate/) +[原始文档](https://clickhouse.tech/docs/en/operations/table_engines/generate/) diff --git a/docs/zh/engines/table-engines/special/join.md b/docs/zh/engines/table-engines/special/join.md index f9621069353..a94803a401b 100644 --- a/docs/zh/engines/table-engines/special/join.md +++ b/docs/zh/engines/table-engines/special/join.md @@ -1,28 +1,115 @@ -# 加入我们 {#join} +--- +toc_priority: 40 +toc_title: 关联表引擎 +--- -加载好的 JOIN 表数据会常驻内存中。 +# 关联表引擎 {#join} - Join(ANY|ALL, LEFT|INNER, k1[, k2, ...]) +使用 [JOIN](../../../sql-reference/statements/select/join.md#select-join)操作的一种可选的数据结构。 -引擎参数:`ANY|ALL` – 连接修饰;`LEFT|INNER` – 连接类型。更多信息可参考 [JOIN子句](../../../engines/table-engines/special/join.md#select-join)。 -这些参数设置不用带引号,但必须与要 JOIN 表匹配。 k1,k2,……是 USING 子句中要用于连接的关键列。 +!!! 注意 "Note" + 该文档和 [JOIN 语句](../../../sql-reference/statements/select/join.md#select-join) 无关. -此引擎表不能用于 GLOBAL JOIN 。 +## 建表语句 {#creating-a-table} -类似于 Set 引擎,可以使用 INSERT 向表中添加数据。设置为 ANY 时,重复键的数据会被忽略(仅一条用于连接)。设置为 ALL 时,重复键的数据都会用于连接。不能直接对 JOIN 表进行 SELECT。检索其数据的唯一方法是将其作为 JOIN 语句右边的表。 +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], +) ENGINE = Join(join_strictness, join_type, k1[, k2, ...]) +``` -跟 Set 引擎类似,Join 引擎把数据存储在磁盘中。 +建表语句详情参见[创建表](../../../sql-reference/statements/create.md#create-table-query). -### 限制和设置 {#join-limitations-and-settings} +**引擎参数** -创建表时,将应用以下设置: +- `join_strictness` – [JOIN 限制](../../../sql-reference/statements/select/join.md#select-join-strictness). +- `join_type` – [JOIN 类型](../../../sql-reference/statements/select/join.md#select-join-types). +- `k1[, k2, ...]` – 进行`JOIN` 操作时 `USING`语句用到的key列 -- join\_use\_nulls -- max\_rows\_in\_join -- max\_bytes\_in\_join -- join\_overflow\_mode -- join\_any\_take\_last\_row +使用`join_strictness` 和 `join_type` 参数时不需要用引号, 例如, `Join(ANY, LEFT, col1)`. 这些参数必须和进行join操作的表相匹配。否则,CH不会报错,但是可能返回错误的数据。 -该 `Join`-发动机表不能用于 `GLOBAL JOIN` 操作。 +## 表用法 {#table-usage} -[来源文章](https://clickhouse.tech/docs/en/operations/table_engines/join/) +### 示例 {#example} + +创建左关联表: + +``` sql +CREATE TABLE id_val(`id` UInt32, `val` UInt32) ENGINE = TinyLog +``` + +``` sql +INSERT INTO id_val VALUES (1,11)(2,12)(3,13) +``` + +创建 `Join` 右边的表: + +``` sql +CREATE TABLE id_val_join(`id` UInt32, `val` UInt8) ENGINE = Join(ANY, LEFT, id) +``` + +``` sql +INSERT INTO id_val_join VALUES (1,21)(1,22)(3,23) +``` + +表关联: + +``` sql +SELECT * FROM id_val ANY LEFT JOIN id_val_join USING (id) SETTINGS join_use_nulls = 1 +``` + +``` text +┌─id─┬─val─┬─id_val_join.val─┐ +│ 1 │ 11 │ 21 │ +│ 2 │ 12 │ ᴺᵁᴸᴸ │ +│ 3 │ 13 │ 23 │ +└────┴─────┴─────────────────┘ +``` + +作为一种替换方式,可以从 `Join`表获取数据,需要设置好join的key字段值。 + +``` sql +SELECT joinGet('id_val_join', 'val', toUInt32(1)) +``` + +``` text +┌─joinGet('id_val_join', 'val', toUInt32(1))─┐ +│ 21 │ +└────────────────────────────────────────────┘ +``` + +### 数据查询及插入 {#selecting-and-inserting-data} + +可以使用 `INSERT`语句向 `Join`引擎表中添加数据。如果表是通过指定 `ANY`限制参数来创建的,那么重复key的数据会被忽略。指定 `ALL`限制参数时,所有行记录都会被添加进去。 + +不能通过 `SELECT` 语句直接从表中获取数据。请使用下面的方式: +- 将表放在 `JOIN` 的右边进行查询 +- 调用 [joinGet](../../../sql-reference/functions/other-functions.md#joinget)函数,就像从字典中获取数据一样来查询表。 + + +### 使用限制及参数设置 {#join-limitations-and-settings} + +创建表时,会应用下列设置参数: + +- [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls) +- [max\_rows\_in\_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) +- [max\_bytes\_in\_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) +- [join\_overflow\_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode) +- [join\_any\_take\_last\_row](../../../operations/settings/settings.md#settings-join_any_take_last_row) + + +`Join`表不能在 `GLOBAL JOIN`操作中使用 + + `Join`表创建及 [查询](../../../sql-reference/statements/select/index.md)时,允许使用[join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls)参数。如果使用不同的`join_use_nulls`设置,会导致表关联异常(取决于join的类型)。当使用函数 [joinGet](../../../sql-reference/functions/other-functions.md#joinget)时,请在建表和查询语句中使用相同的 `join_use_nulls` 参数设置。 + + +## 数据存储 {#data-storage} + +`Join`表的数据总是保存在内存中。当往表中插入行记录时,CH会将数据块保存在硬盘目录中,这样服务器重启时数据可以恢复。 + +如果服务器非正常重启,保存在硬盘上的数据块会丢失或被损坏。这种情况下,需要手动删除被损坏的数据文件。 + + +[原始文档](https://clickhouse.tech/docs/en/operations/table_engines/join/) diff --git a/docs/zh/engines/table-engines/special/memory.md b/docs/zh/engines/table-engines/special/memory.md index a7ae74d355c..becf0f3b04f 100644 --- a/docs/zh/engines/table-engines/special/memory.md +++ b/docs/zh/engines/table-engines/special/memory.md @@ -1,6 +1,6 @@ -# 记忆 {#memory} +# 内存表 {#memory} -Memory 引擎以未压缩的形式将数据存储在 RAM 中。数据完全以读取时获得的形式存储。换句话说,从这张表中读取是很轻松的。并发数据访问是同步的。锁范围小:读写操作不会相互阻塞。不支持索引。阅读是并行化的。在简单查询上达到最大生产率(超过10 GB /秒),因为没有磁盘读取,不需要解压缩或反序列化数据。(值得注意的是,在许多情况下,与 MergeTree 引擎的性能几乎一样高)。重新启动服务器时,表中的数据消失,表将变为空。通常,使用此表引擎是不合理的。但是,它可用于测试,以及在相对较少的行(最多约100,000,000)上需要最高性能的查询。 +Memory 引擎以未压缩的形式将数据存储在 RAM 中。数据完全以读取时获得的形式存储。换句话说,从这张表中读取是很轻松的。并发数据访问是同步的。锁范围小:读写操作不会相互阻塞。不支持索引。查询是并行化的。在简单查询上达到最大速率(超过10 GB /秒),因为没有磁盘读取,不需要解压缩或反序列化数据。(值得注意的是,在许多情况下,与 MergeTree 引擎的性能几乎一样高)。重新启动服务器时,表中的数据消失,表将变为空。通常,使用此表引擎是不合理的。但是,它可用于测试,以及在相对较少的行(最多约100,000,000)上需要最高性能的查询。 Memory 引擎是由系统用于临时表进行外部数据的查询(请参阅 «外部数据用于请求处理» 部分),以及用于实现 `GLOBAL IN`(请参见 «IN 运算符» 部分)。 diff --git a/docs/zh/engines/table-engines/special/view.md b/docs/zh/engines/table-engines/special/view.md index 1c501b819c0..e2dca177e4e 100644 --- a/docs/zh/engines/table-engines/special/view.md +++ b/docs/zh/engines/table-engines/special/view.md @@ -1,4 +1,4 @@ -# 查看 {#table_engines-view} +# 视图 {#table_engines-view} 用于构建视图(有关更多信息,请参阅 `CREATE VIEW 查询`)。 它不存储数据,仅存储指定的 `SELECT` 查询。 从表中读取时,它会运行此查询(并从查询中删除所有不必要的列)。 diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 014fdc88304..374cecb4c9a 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -7,6 +7,7 @@ - 关系数据库管理系统 - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-复制器](https://github.com/larsnovikov/horgh-replicator) @@ -90,5 +91,10 @@ - 仙丹 - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on Rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [来源文章](https://clickhouse.tech/docs/zh/interfaces/third-party/integrations/) diff --git a/docs/zh/introduction/adopters.md b/docs/zh/introduction/adopters.md index 895ec961751..38b9ca690e3 100644 --- a/docs/zh/introduction/adopters.md +++ b/docs/zh/introduction/adopters.md @@ -35,7 +35,7 @@ toc_title: "\u91C7\u7528\u8005" | [Exness](https://www.exness.com) | 交易 | 指标,日志记录 | — | — | [俄语交谈,2019年5月](https://youtu.be/_rpU-TvSfZ8?t=3215) | | [精灵](https://geniee.co.jp) | 广告网络 | 主要产品 | — | — | [日文博客,2017年7月](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | [虎牙](https://www.huya.com/) | 视频流 | 分析 | — | — | [中文幻灯片,2018年10月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| [Idealista](https://www.idealista.com) | 房地产 | 分析 | — | — | [英文博客文章,四月2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| [Idealista](https://www.idealista.com) | 房地产 | 分析 | — | — | [英文博客文章,四月2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | [Infovista](https://www.infovista.com/) | 网络 | 分析 | — | — | [英文幻灯片,十月2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | [InnoGames](https://www.innogames.com) | 游戏 | 指标,日志记录 | — | — | [俄文幻灯片,2019年9月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | [Integros](https://integros.com) | 视频服务平台 | 分析 | — | — | [俄文幻灯片,2019年5月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md index 0bf8556a870..ee913f998ca 100644 --- a/docs/zh/operations/monitoring.md +++ b/docs/zh/operations/monitoring.md @@ -33,7 +33,7 @@ ClickHouse 收集的指标项: - 服务用于计算的资源占用的各种指标。 - 关于查询处理的常见统计信息。 -可以在 [系统指标](system-tables.md#system_tables-metrics) ,[系统事件](system-tables.md#system_tables-events) 以及[系统异步指标](system-tables.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 +可以在 [系统指标](system-tables/metrics.md#system_tables-metrics) ,[系统事件](system-tables/events.md#system_tables-events) 以及[系统异步指标](system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 diff --git a/docs/zh/operations/optimizing-performance/sampling-query-profiler.md b/docs/zh/operations/optimizing-performance/sampling-query-profiler.md index e4242199713..a1d75fda0c3 100644 --- a/docs/zh/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/zh/operations/optimizing-performance/sampling-query-profiler.md @@ -13,7 +13,7 @@ ClickHouse运行允许分析查询执行的采样探查器。 使用探查器, - 设置 [trace\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) 服务器配置部分。 - 本节配置 [trace\_log](../../operations/system-tables.md#system_tables-trace_log) 系统表包含探查器运行的结果。 它是默认配置的。 请记住,此表中的数据仅对正在运行的服务器有效。 服务器重新启动后,ClickHouse不会清理表,所有存储的虚拟内存地址都可能无效。 + 本节配置 [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) 系统表包含探查器运行的结果。 它是默认配置的。 请记住,此表中的数据仅对正在运行的服务器有效。 服务器重新启动后,ClickHouse不会清理表,所有存储的虚拟内存地址都可能无效。 - 设置 [query\_profiler\_cpu\_time\_period\_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) 或 [query\_profiler\_real\_time\_period\_ns](../settings/settings.md#query_profiler_real_time_period_ns) 设置。 这两种设置可以同时使用。 diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md index 2c9d611b6a7..252323351b2 100644 --- a/docs/zh/operations/server-configuration-parameters/settings.md +++ b/docs/zh/operations/server-configuration-parameters/settings.md @@ -145,10 +145,10 @@ ClickHouse每x秒重新加载内置字典。 这使得编辑字典 “on the fly - interval – The interval for sending, in seconds. - timeout – The timeout for sending data, in seconds. - root\_path – Prefix for keys. -- metrics – Sending data from the [系统。指标](../../operations/system-tables.md#system_tables-metrics) 桌子 -- events – Sending deltas data accumulated for the time period from the [系统。活动](../../operations/system-tables.md#system_tables-events) 桌子 -- events\_cumulative – Sending cumulative data from the [系统。活动](../../operations/system-tables.md#system_tables-events) 桌子 -- asynchronous\_metrics – Sending data from the [系统。asynchronous\_metrics](../../operations/system-tables.md#system_tables-asynchronous_metrics) 桌子 +- metrics – Sending data from the [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) 桌子 +- events – Sending deltas data accumulated for the time period from the [系统。活动](../../operations/system-tables/events.md#system_tables-events) 桌子 +- events\_cumulative – Sending cumulative data from the [系统。活动](../../operations/system-tables/events.md#system_tables-events) 桌子 +- asynchronous\_metrics – Sending data from the [系统。asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) 桌子 您可以配置多个 `` 条款 例如,您可以使用它以不同的时间间隔发送不同的数据。 @@ -503,7 +503,7 @@ SSL客户端/服务器配置。 记录与之关联的事件 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). 例如,添加或合并数据。 您可以使用日志来模拟合并算法并比较它们的特征。 您可以可视化合并过程。 -查询记录在 [系统。part\_log](../../operations/system-tables.md#system_tables-part-log) 表,而不是在一个单独的文件。 您可以在以下命令中配置此表的名称 `table` 参数(见下文)。 +查询记录在 [系统。part\_log](../../operations/system-tables/part_log.md#system_tables-part-log) 表,而不是在一个单独的文件。 您可以在以下命令中配置此表的名称 `table` 参数(见下文)。 使用以下参数配置日志记录: @@ -540,7 +540,7 @@ SSL客户端/服务器配置。 用于记录接收到的查询的设置 [log\_queries=1](../settings/settings.md) 设置。 -查询记录在 [系统。query\_log](../../operations/system-tables.md#system_tables-query_log) 表,而不是在一个单独的文件。 您可以更改表的名称 `table` 参数(见下文)。 +查询记录在 [系统。query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) 表,而不是在一个单独的文件。 您可以更改表的名称 `table` 参数(见下文)。 使用以下参数配置日志记录: @@ -566,7 +566,7 @@ SSL客户端/服务器配置。 设置用于记录接收到的查询的线程 [log\_query\_threads=1](../settings/settings.md#settings-log-query-threads) 设置。 -查询记录在 [系统。query\_thread\_log](../../operations/system-tables.md#system_tables-query-thread-log) 表,而不是在一个单独的文件。 您可以更改表的名称 `table` 参数(见下文)。 +查询记录在 [系统。query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query-thread-log) 表,而不是在一个单独的文件。 您可以更改表的名称 `table` 参数(见下文)。 使用以下参数配置日志记录: @@ -590,7 +590,7 @@ SSL客户端/服务器配置。 ## trace\_log {#server_configuration_parameters-trace_log} -设置为 [trace\_log](../../operations/system-tables.md#system_tables-trace_log) 系统表操作。 +设置为 [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) 系统表操作。 参数: diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 07362dcaceb..d6c411c70fb 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1165,7 +1165,7 @@ ClickHouse生成异常 另请参阅: -- 系统表 [trace\_log](../../operations/system-tables.md#system_tables-trace_log) +- 系统表 [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) ## query\_profiler\_cpu\_time\_period\_ns {#query_profiler_cpu_time_period_ns} @@ -1188,7 +1188,7 @@ ClickHouse生成异常 另请参阅: -- 系统表 [trace\_log](../../operations/system-tables.md#system_tables-trace_log) +- 系统表 [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) ## allow\_introspection\_functions {#settings-allow_introspection_functions} @@ -1204,7 +1204,7 @@ ClickHouse生成异常 **另请参阅** - [采样查询探查器](../optimizing-performance/sampling-query-profiler.md) -- 系统表 [trace\_log](../../operations/system-tables.md#system_tables-trace_log) +- 系统表 [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) ## input\_format\_parallel\_parsing {#input-format-parallel-parsing} diff --git a/docs/zh/operations/system-tables.md b/docs/zh/operations/system-tables.md deleted file mode 100644 index 5070d3455ab..00000000000 --- a/docs/zh/operations/system-tables.md +++ /dev/null @@ -1,1168 +0,0 @@ ---- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_priority: 52 -toc_title: "\u7CFB\u7EDF\u8868" ---- - -# 系统表 {#system-tables} - -系统表用于实现系统的部分功能,并提供对有关系统如何工作的信息的访问。 -您无法删除系统表(但可以执行分离)。 -系统表没有包含磁盘上数据的文件或包含元数据的文件。 服务器在启动时创建所有系统表。 -系统表是只读的。 -它们位于 ‘system’ 数据库。 - -## 系统。asynchronous\_metrics {#system_tables-asynchronous_metrics} - -包含在后台定期计算的指标。 例如,在使用的RAM量。 - -列: - -- `metric` ([字符串](../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../sql-reference/data-types/float.md)) — Metric value. - -**示例** - -``` sql -SELECT * FROM system.asynchronous_metrics LIMIT 10 -``` - -``` text -┌─metric──────────────────────────────────┬──────value─┐ -│ jemalloc.background_thread.run_interval │ 0 │ -│ jemalloc.background_thread.num_runs │ 0 │ -│ jemalloc.background_thread.num_threads │ 0 │ -│ jemalloc.retained │ 422551552 │ -│ jemalloc.mapped │ 1682989056 │ -│ jemalloc.resident │ 1656446976 │ -│ jemalloc.metadata_thp │ 0 │ -│ jemalloc.metadata │ 10226856 │ -│ UncompressedCacheCells │ 0 │ -│ MarkCacheFiles │ 0 │ -└─────────────────────────────────────────┴────────────┘ -``` - -**另请参阅** - -- [监测](monitoring.md) — Base concepts of ClickHouse monitoring. -- [系统。指标](#system_tables-metrics) — Contains instantly calculated metrics. -- [系统。活动](#system_tables-events) — Contains a number of events that have occurred. -- [系统。metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - -## 系统。集群 {#system-clusters} - -包含有关配置文件中可用的集群及其中的服务器的信息。 - -列: - -- `cluster` (String) — The cluster name. -- `shard_num` (UInt32) — The shard number in the cluster, starting from 1. -- `shard_weight` (UInt32) — The relative weight of the shard when writing data. -- `replica_num` (UInt32) — The replica number in the shard, starting from 1. -- `host_name` (String) — The host name, as specified in the config. -- `host_address` (String) — The host IP address obtained from DNS. -- `port` (UInt16) — The port to use for connecting to the server. -- `user` (String) — The name of the user for connecting to the server. -- `errors_count` (UInt32)-此主机无法到达副本的次数。 -- `estimated_recovery_time` (UInt32)-剩下的秒数,直到副本错误计数归零,它被认为是恢复正常。 - -请注意 `errors_count` 每个查询集群更新一次,但 `estimated_recovery_time` 按需重新计算。 所以有可能是非零的情况 `errors_count` 和零 `estimated_recovery_time`,下一个查询将为零 `errors_count` 并尝试使用副本,就好像它没有错误。 - -**另请参阅** - -- [表引擎分布式](../engines/table-engines/special/distributed.md) -- [distributed\_replica\_error\_cap设置](settings/settings.md#settings-distributed_replica_error_cap) -- [distributed\_replica\_error\_half\_life设置](settings/settings.md#settings-distributed_replica_error_half_life) - -## 系统。列 {#system-columns} - -包含有关所有表中列的信息。 - -您可以使用此表获取类似于以下内容的信息 [DESCRIBE TABLE](../sql-reference/statements/misc.md#misc-describe-table) 查询,但对于多个表一次。 - -该 `system.columns` 表包含以下列(列类型显示在括号中): - -- `database` (String) — Database name. -- `table` (String) — Table name. -- `name` (String) — Column name. -- `type` (String) — Column type. -- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`)为默认值,如果没有定义,则为空字符串。 -- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. -- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. -- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. -- `marks_bytes` (UInt64) — The size of marks, in bytes. -- `comment` (String) — Comment on the column, or an empty string if it is not defined. -- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression. -- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. -- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. -- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. - -## 系统。贡献者 {#system-contributors} - -包含有关贡献者的信息。 按随机顺序排列所有构造。 该顺序在查询执行时是随机的。 - -列: - -- `name` (String) — Contributor (author) name from git log. - -**示例** - -``` sql -SELECT * FROM system.contributors LIMIT 10 -``` - -``` text -┌─name─────────────┐ -│ Olga Khvostikova │ -│ Max Vetrov │ -│ LiuYangkuan │ -│ svladykin │ -│ zamulla │ -│ Šimon Podlipský │ -│ BayoNet │ -│ Ilya Khomutov │ -│ Amy Krishnevsky │ -│ Loud_Scream │ -└──────────────────┘ -``` - -要在表中找出自己,请使用查询: - -``` sql -SELECT * FROM system.contributors WHERE name='Olga Khvostikova' -``` - -``` text -┌─name─────────────┐ -│ Olga Khvostikova │ -└──────────────────┘ -``` - -## 系统。数据库 {#system-databases} - -此表包含一个名为"字符串"的列 ‘name’ – the name of a database. -服务器知道的每个数据库在表中都有相应的条目。 -该系统表用于实现 `SHOW DATABASES` 查询。 - -## 系统。detached\_parts {#system_tables-detached_parts} - -包含有关分离部分的信息 [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) 桌子 该 `reason` 列指定分离部件的原因。 对于用户分离的部件,原因是空的。 这些部件可以附加 [ALTER TABLE ATTACH PARTITION\|PART](../sql-reference/statements/alter.md#alter_attach-partition) 指挥部 有关其他列的说明,请参阅 [系统。零件](#system_tables-parts). 如果部件名称无效,某些列的值可能为 `NULL`. 这些部分可以删除 [ALTER TABLE DROP DETACHED PART](../sql-reference/statements/alter.md#alter_drop-detached). - -## 系统。字典 {#system_tables-dictionaries} - -包含以下信息 [外部字典](../sql-reference/dictionaries/external-dictionaries/external-dicts.md). - -列: - -- `database` ([字符串](../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. -- `name` ([字符串](../sql-reference/data-types/string.md)) — [字典名称](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md). -- `status` ([枚举8](../sql-reference/data-types/enum.md)) — Dictionary status. Possible values: - - `NOT_LOADED` — Dictionary was not loaded because it was not used. - - `LOADED` — Dictionary loaded successfully. - - `FAILED` — Unable to load the dictionary as a result of an error. - - `LOADING` — Dictionary is loading now. - - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../sql-reference/statements/system.md#query_language-system-reload-dictionary) 查询,超时,字典配置已更改)。 - - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. -- `origin` ([字符串](../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. -- `type` ([字符串](../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [在内存中存储字典](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). -- `key` — [密钥类型](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”. -- `attribute.names` ([阵列](../sql-reference/data-types/array.md)([字符串](../sql-reference/data-types/string.md))) — Array of [属性名称](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 由字典提供。 -- `attribute.types` ([阵列](../sql-reference/data-types/array.md)([字符串](../sql-reference/data-types/string.md))) — Corresponding array of [属性类型](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 这是由字典提供。 -- `bytes_allocated` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. -- `query_count` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. -- `hit_rate` ([Float64](../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. -- `element_count` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. -- `load_factor` ([Float64](../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). -- `source` ([字符串](../sql-reference/data-types/string.md)) — Text describing the [数据源](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) 为了字典 -- `lifetime_min` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [使用寿命](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) 在内存中的字典,之后ClickHouse尝试重新加载字典(如果 `invalidate_query` 被设置,那么只有当它已经改变)。 在几秒钟内设置。 -- `lifetime_max` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [使用寿命](../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) 在内存中的字典,之后ClickHouse尝试重新加载字典(如果 `invalidate_query` 被设置,那么只有当它已经改变)。 在几秒钟内设置。 -- `loading_start_time` ([日期时间](../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary. -- `last_successful_update_time` ([日期时间](../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes. -- `loading_duration` ([Float32](../sql-reference/data-types/float.md)) — Duration of a dictionary loading. -- `last_exception` ([字符串](../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn't be created. - -**示例** - -配置字典。 - -``` sql -CREATE DICTIONARY dictdb.dict -( - `key` Int64 DEFAULT -1, - `value_default` String DEFAULT 'world', - `value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)' -) -PRIMARY KEY key -SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb')) -LIFETIME(MIN 0 MAX 1) -LAYOUT(FLAT()) -``` - -确保字典已加载。 - -``` sql -SELECT * FROM system.dictionaries -``` - -``` text -┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐ -│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │ -└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ -``` - -## 系统。活动 {#system_tables-events} - -包含有关系统中发生的事件数的信息。 例如,在表中,您可以找到多少 `SELECT` 自ClickHouse服务器启动以来已处理查询。 - -列: - -- `event` ([字符串](../sql-reference/data-types/string.md)) — Event name. -- `value` ([UInt64](../sql-reference/data-types/int-uint.md)) — Number of events occurred. -- `description` ([字符串](../sql-reference/data-types/string.md)) — Event description. - -**示例** - -``` sql -SELECT * FROM system.events LIMIT 5 -``` - -``` text -┌─event─────────────────────────────────┬─value─┬─description────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ Query │ 12 │ Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries. │ -│ SelectQuery │ 8 │ Same as Query, but only for SELECT queries. │ -│ FileOpen │ 73 │ Number of files opened. │ -│ ReadBufferFromFileDescriptorRead │ 155 │ Number of reads (read/pread) from a file descriptor. Does not include sockets. │ -│ ReadBufferFromFileDescriptorReadBytes │ 9931 │ Number of bytes read from file descriptors. If the file is compressed, this will show the compressed data size. │ -└───────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -**另请参阅** - -- [系统。asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [系统。指标](#system_tables-metrics) — Contains instantly calculated metrics. -- [系统。metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -- [监测](monitoring.md) — Base concepts of ClickHouse monitoring. - -## 系统。功能 {#system-functions} - -包含有关正常函数和聚合函数的信息。 - -列: - -- `name`(`String`) – The name of the function. -- `is_aggregate`(`UInt8`) — Whether the function is aggregate. - -## 系统。graphite\_retentions {#system-graphite-retentions} - -包含有关参数的信息 [graphite\_rollup](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 这是在表中使用 [\*GraphiteMergeTree](../engines/table-engines/mergetree-family/graphitemergetree.md) 引擎 - -列: - -- `config_name` (字符串) - `graphite_rollup` 参数名称。 -- `regexp` (String)-指标名称的模式。 -- `function` (String)-聚合函数的名称。 -- `age` (UInt64)-以秒为单位的数据的最小期限。 -- `precision` (UInt64)-如何精确地定义以秒为单位的数据的年龄。 -- `priority` (UInt16)-模式优先级。 -- `is_default` (UInt8)-模式是否为默认值。 -- `Tables.database` (Array(String))-使用数据库表名称的数组 `config_name` 参数。 -- `Tables.table` (Array(String))-使用表名称的数组 `config_name` 参数。 - -## 系统。合并 {#system-merges} - -包含有关MergeTree系列中表当前正在进行的合并和部件突变的信息。 - -列: - -- `database` (String) — The name of the database the table is in. -- `table` (String) — Table name. -- `elapsed` (Float64) — The time elapsed (in seconds) since the merge started. -- `progress` (Float64) — The percentage of completed work from 0 to 1. -- `num_parts` (UInt64) — The number of pieces to be merged. -- `result_part_name` (String) — The name of the part that will be formed as the result of merging. -- `is_mutation` (UInt8)-1如果这个过程是一个部分突变. -- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. -- `total_size_marks` (UInt64) — The total number of marks in the merged parts. -- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. -- `rows_read` (UInt64) — Number of rows read. -- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. -- `rows_written` (UInt64) — Number of rows written. - -## 系统。指标 {#system_tables-metrics} - -包含可以立即计算或具有当前值的指标。 例如,同时处理的查询的数量或当前副本的延迟。 此表始终是最新的。 - -列: - -- `metric` ([字符串](../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Int64](../sql-reference/data-types/int-uint.md)) — Metric value. -- `description` ([字符串](../sql-reference/data-types/string.md)) — Metric description. - -支持的指标列表,您可以在 [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) ClickHouse的源文件。 - -**示例** - -``` sql -SELECT * FROM system.metrics LIMIT 10 -``` - -``` text -┌─metric─────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -│ Query │ 1 │ Number of executing queries │ -│ Merge │ 0 │ Number of executing background merges │ -│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE) │ -│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas │ -│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas │ -│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency │ -│ BackgroundPoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping) │ -│ BackgroundSchedulePoolTask │ 0 │ Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc. │ -│ DiskSpaceReservedForMerge │ 0 │ Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts. │ -│ DistributedSend │ 0 │ Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode. │ -└────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -**另请参阅** - -- [系统。asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [系统。活动](#system_tables-events) — Contains a number of events that occurred. -- [系统。metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -- [监测](monitoring.md) — Base concepts of ClickHouse monitoring. - -## 系统。metric\_log {#system_tables-metric_log} - -包含表中度量值的历史记录 `system.metrics` 和 `system.events`,定期刷新到磁盘。 -打开指标历史记录收集 `system.metric_log`,创建 `/etc/clickhouse-server/config.d/metric_log.xml` 具有以下内容: - -``` xml - - - system -
metric_log
- 7500 - 1000 - - -``` - -**示例** - -``` sql -SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical; -``` - -``` text -Row 1: -────── -event_date: 2020-02-18 -event_time: 2020-02-18 07:15:33 -milliseconds: 554 -ProfileEvent_Query: 0 -ProfileEvent_SelectQuery: 0 -ProfileEvent_InsertQuery: 0 -ProfileEvent_FileOpen: 0 -ProfileEvent_Seek: 0 -ProfileEvent_ReadBufferFromFileDescriptorRead: 1 -ProfileEvent_ReadBufferFromFileDescriptorReadFailed: 0 -ProfileEvent_ReadBufferFromFileDescriptorReadBytes: 0 -ProfileEvent_WriteBufferFromFileDescriptorWrite: 1 -ProfileEvent_WriteBufferFromFileDescriptorWriteFailed: 0 -ProfileEvent_WriteBufferFromFileDescriptorWriteBytes: 56 -... -CurrentMetric_Query: 0 -CurrentMetric_Merge: 0 -CurrentMetric_PartMutation: 0 -CurrentMetric_ReplicatedFetch: 0 -CurrentMetric_ReplicatedSend: 0 -CurrentMetric_ReplicatedChecks: 0 -... -``` - -**另请参阅** - -- [系统。asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [系统。活动](#system_tables-events) — Contains a number of events that occurred. -- [系统。指标](#system_tables-metrics) — Contains instantly calculated metrics. -- [监测](monitoring.md) — Base concepts of ClickHouse monitoring. - -## 系统。数字 {#system-numbers} - -此表包含一个名为UInt64的列 ‘number’ 它包含几乎所有从零开始的自然数。 -您可以使用此表进行测试,或者如果您需要进行暴力搜索。 -从此表中读取的内容不是并行的。 - -## 系统。numbers\_mt {#system-numbers-mt} - -一样的 ‘system.numbers’ 但读取是并行的。 这些数字可以以任何顺序返回。 -用于测试。 - -## 系统。一 {#system-one} - -此表包含一行,其中包含一行 ‘dummy’ UInt8列包含值0。 -如果SELECT查询未指定FROM子句,则使用此表。 -这与其他Dbms中的双表类似。 - -## 系统。零件 {#system_tables-parts} - -包含有关的部分信息 [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) 桌子 - -每行描述一个数据部分。 - -列: - -- `partition` (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../sql-reference/statements/alter.md#query_language_queries_alter) 查询。 - - 格式: - - - `YYYYMM` 用于按月自动分区。 - - `any_string` 手动分区时。 - -- `name` (`String`) – Name of the data part. - -- `active` (`UInt8`) – Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's deleted. Inactive data parts remain after merging. - -- `marks` (`UInt64`) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` 通过索引粒度(通常为8192)(此提示不适用于自适应粒度)。 - -- `rows` (`UInt64`) – The number of rows. - -- `bytes_on_disk` (`UInt64`) – Total size of all the data part files in bytes. - -- `data_compressed_bytes` (`UInt64`) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. - -- `data_uncompressed_bytes` (`UInt64`) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. - -- `marks_bytes` (`UInt64`) – The size of the file with marks. - -- `modification_time` (`DateTime`) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation.\| - -- `remove_time` (`DateTime`) – The time when the data part became inactive. - -- `refcount` (`UInt32`) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. - -- `min_date` (`Date`) – The minimum value of the date key in the data part. - -- `max_date` (`Date`) – The maximum value of the date key in the data part. - -- `min_time` (`DateTime`) – The minimum value of the date and time key in the data part. - -- `max_time`(`DateTime`) – The maximum value of the date and time key in the data part. - -- `partition_id` (`String`) – ID of the partition. - -- `min_block_number` (`UInt64`) – The minimum number of data parts that make up the current part after merging. - -- `max_block_number` (`UInt64`) – The maximum number of data parts that make up the current part after merging. - -- `level` (`UInt32`) – Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. - -- `data_version` (`UInt64`) – Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). - -- `primary_key_bytes_in_memory` (`UInt64`) – The amount of memory (in bytes) used by primary key values. - -- `primary_key_bytes_in_memory_allocated` (`UInt64`) – The amount of memory (in bytes) reserved for primary key values. - -- `is_frozen` (`UInt8`) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesn't exist. For more details, see [FREEZE PARTITION](../sql-reference/statements/alter.md#alter_freeze-partition) - -- `database` (`String`) – Name of the database. - -- `table` (`String`) – Name of the table. - -- `engine` (`String`) – Name of the table engine without parameters. - -- `path` (`String`) – Absolute path to the folder with data part files. - -- `disk` (`String`) – Name of a disk that stores the data part. - -- `hash_of_all_files` (`String`) – [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 的压缩文件。 - -- `hash_of_uncompressed_files` (`String`) – [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 未压缩的文件(带标记的文件,索引文件等。). - -- `uncompressed_hash_of_compressed_files` (`String`) – [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 压缩文件中的数据,就好像它们是未压缩的。 - -- `bytes` (`UInt64`) – Alias for `bytes_on_disk`. - -- `marks_size` (`UInt64`) – Alias for `marks_bytes`. - -## 系统。part\_log {#system_tables-part-log} - -该 `system.part_log` 表只有当创建 [part\_log](server-configuration-parameters/settings.md#server_configuration_parameters-part-log) 指定了服务器设置。 - -此表包含与以下情况发生的事件有关的信息 [数据部分](../engines/table-engines/mergetree-family/custom-partitioning-key.md) 在 [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) 家庭表,例如添加或合并数据。 - -该 `system.part_log` 表包含以下列: - -- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values: - - `NEW_PART` — Inserting of a new data part. - - `MERGE_PARTS` — Merging of data parts. - - `DOWNLOAD_PART` — Downloading a data part. - - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../sql-reference/statements/alter.md#alter_detach-partition). - - `MUTATE_PART` — Mutating of a data part. - - `MOVE_PART` — Moving the data part from the one disk to another one. -- `event_date` (Date) — Event date. -- `event_time` (DateTime) — Event time. -- `duration_ms` (UInt64) — Duration. -- `database` (String) — Name of the database the data part is in. -- `table` (String) — Name of the table the data part is in. -- `part_name` (String) — Name of the data part. -- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ 值,如果分区是由 `tuple()`. -- `rows` (UInt64) — The number of rows in the data part. -- `size_in_bytes` (UInt64) — Size of the data part in bytes. -- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge). -- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes. -- `read_rows` (UInt64) — The number of rows was read during the merge. -- `read_bytes` (UInt64) — The number of bytes was read during the merge. -- `error` (UInt16) — The code number of the occurred error. -- `exception` (String) — Text message of the occurred error. - -该 `system.part_log` 表的第一个插入数据到后创建 `MergeTree` 桌子 - -## 系统。流程 {#system_tables-processes} - -该系统表用于实现 `SHOW PROCESSLIST` 查询。 - -列: - -- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` 用户。 该字段包含特定查询的用户名,而不是此查询启动的查询的用户名。 -- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` 查询请求者服务器上。 -- `elapsed` (Float64) – The time in seconds since request execution started. -- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. -- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. -- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. -- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max\_memory\_usage](../operations/settings/query-complexity.md#settings_max_memory_usage) 设置。 -- `query` (String) – The query text. For `INSERT`,它不包括要插入的数据。 -- `query_id` (String) – Query ID, if defined. - -## 系统。text\_log {#system-tables-text-log} - -包含日志记录条目。 进入该表的日志记录级别可以通过以下方式进行限制 `text_log.level` 服务器设置。 - -列: - -- `event_date` (`Date`)-条目的日期。 -- `event_time` (`DateTime`)-条目的时间。 -- `microseconds` (`UInt32`)-条目的微秒。 -- `thread_name` (String) — Name of the thread from which the logging was done. -- `thread_id` (UInt64) — OS thread ID. -- `level` (`Enum8`)-入门级。 - - `'Fatal' = 1` - - `'Critical' = 2` - - `'Error' = 3` - - `'Warning' = 4` - - `'Notice' = 5` - - `'Information' = 6` - - `'Debug' = 7` - - `'Trace' = 8` -- `query_id` (`String`)-查询的ID。 -- `logger_name` (`LowCardinality(String)`) - Name of the logger (i.e. `DDLWorker`) -- `message` (`String`)-消息本身。 -- `revision` (`UInt32`)-ClickHouse修订。 -- `source_file` (`LowCardinality(String)`)-从中完成日志记录的源文件。 -- `source_line` (`UInt64`)-从中完成日志记录的源代码行。 - -## 系统。query\_log {#system_tables-query_log} - -包含有关查询执行的信息。 对于每个查询,您可以看到处理开始时间,处理持续时间,错误消息和其他信息。 - -!!! note "注" - 该表不包含以下内容的输入数据 `INSERT` 查询。 - -ClickHouse仅在以下情况下创建此表 [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) 指定服务器参数。 此参数设置日志记录规则,例如日志记录间隔或将记录查询的表的名称。 - -要启用查询日志记录,请设置 [log\_queries](settings/settings.md#settings-log-queries) 参数为1。 有关详细信息,请参阅 [设置](settings/settings.md) 科。 - -该 `system.query_log` 表注册两种查询: - -1. 客户端直接运行的初始查询。 -2. 由其他查询启动的子查询(用于分布式查询执行)。 对于这些类型的查询,有关父查询的信息显示在 `initial_*` 列。 - -列: - -- `type` (`Enum8`) — Type of event that occurred when executing the query. Values: - - `'QueryStart' = 1` — Successful start of query execution. - - `'QueryFinish' = 2` — Successful end of query execution. - - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. - - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. -- `event_date` (Date) — Query starting date. -- `event_time` (DateTime) — Query starting time. -- `query_start_time` (DateTime) — Start time of query execution. -- `query_duration_ms` (UInt64) — Duration of query execution. -- `read_rows` (UInt64) — Number of read rows. -- `read_bytes` (UInt64) — Number of read bytes. -- `written_rows` (UInt64) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。 -- `written_bytes` (UInt64) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。 -- `result_rows` (UInt64) — Number of rows in the result. -- `result_bytes` (UInt64) — Number of bytes in the result. -- `memory_usage` (UInt64) — Memory consumption by the query. -- `query` (String) — Query string. -- `exception` (String) — Exception message. -- `stack_trace` (String) — Stack trace (a list of methods called before the error occurred). An empty string, if the query is completed successfully. -- `is_initial_query` (UInt8) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query for distributed query execution. -- `user` (String) — Name of the user who initiated the current query. -- `query_id` (String) — ID of the query. -- `address` (IPv6) — IP address that was used to make the query. -- `port` (UInt16) — The client port that was used to make the query. -- `initial_user` (String) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` (String) — ID of the initial query (for distributed query execution). -- `initial_address` (IPv6) — IP address that the parent query was launched from. -- `initial_port` (UInt16) — The client port that was used to make the parent query. -- `interface` (UInt8) — Interface that the query was initiated from. Possible values: - - 1 — TCP. - - 2 — HTTP. -- `os_user` (String) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md). -- `client_hostname` (String) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或者运行另一个TCP客户端。 -- `client_name` (String) — The [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端名称。 -- `client_revision` (UInt32) — Revision of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_major` (UInt32) — Major version of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_minor` (UInt32) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_patch` (UInt32) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端版本。 -- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. - - 1 — `GET` 方法被使用。 - - 2 — `POST` 方法被使用。 -- `http_user_agent` (String) — The `UserAgent` http请求中传递的标头。 -- `quota_key` (String) — The “quota key” 在指定 [配额](quotas.md) 设置(见 `keyed`). -- `revision` (UInt32) — ClickHouse revision. -- `thread_numbers` (Array(UInt32)) — Number of threads that are participating in query execution. -- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [系统。活动](#system_tables-events) -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` 列。 -- `Settings.Names` (Array(String)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` 参数为1。 -- `Settings.Values` (Array(String)) — Values of settings that are listed in the `Settings.Names` 列。 - -每个查询创建一个或两个行中 `query_log` 表,具体取决于查询的状态: - -1. 如果查询执行成功,将创建两个类型为1和2的事件(请参阅 `type` 列)。 -2. 如果在查询处理过程中发生错误,将创建两个类型为1和4的事件。 -3. 如果在启动查询之前发生错误,将创建类型为3的单个事件。 - -默认情况下,日志以7.5秒的间隔添加到表中。 您可以在设置此时间间隔 [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) 服务器设置(请参阅 `flush_interval_milliseconds` 参数)。 要强制将日志从内存缓冲区刷新到表中,请使用 `SYSTEM FLUSH LOGS` 查询。 - -当手动删除表时,它将自动动态创建。 请注意,所有以前的日志将被删除。 - -!!! note "注" - 日志的存储周期是无限的。 日志不会自动从表中删除。 您需要自己组织删除过时的日志。 - -您可以指定一个任意的分区键 `system.query_log` 表中的 [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) 服务器设置(请参阅 `partition_by` 参数)。 - -## 系统。query\_thread\_log {#system_tables-query-thread-log} - -该表包含有关每个查询执行线程的信息。 - -ClickHouse仅在以下情况下创建此表 [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) 指定服务器参数。 此参数设置日志记录规则,例如日志记录间隔或将记录查询的表的名称。 - -要启用查询日志记录,请设置 [log\_query\_threads](settings/settings.md#settings-log-query-threads) 参数为1。 有关详细信息,请参阅 [设置](settings/settings.md) 科。 - -列: - -- `event_date` (Date) — the date when the thread has finished execution of the query. -- `event_time` (DateTime) — the date and time when the thread has finished execution of the query. -- `query_start_time` (DateTime) — Start time of query execution. -- `query_duration_ms` (UInt64) — Duration of query execution. -- `read_rows` (UInt64) — Number of read rows. -- `read_bytes` (UInt64) — Number of read bytes. -- `written_rows` (UInt64) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。 -- `written_bytes` (UInt64) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。 -- `memory_usage` (Int64) — The difference between the amount of allocated and freed memory in context of this thread. -- `peak_memory_usage` (Int64) — The maximum difference between the amount of allocated and freed memory in context of this thread. -- `thread_name` (String) — Name of the thread. -- `thread_number` (UInt32) — Internal thread ID. -- `os_thread_id` (Int32) — OS thread ID. -- `master_thread_id` (UInt64) — OS initial ID of initial thread. -- `query` (String) — Query string. -- `is_initial_query` (UInt8) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query for distributed query execution. -- `user` (String) — Name of the user who initiated the current query. -- `query_id` (String) — ID of the query. -- `address` (IPv6) — IP address that was used to make the query. -- `port` (UInt16) — The client port that was used to make the query. -- `initial_user` (String) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` (String) — ID of the initial query (for distributed query execution). -- `initial_address` (IPv6) — IP address that the parent query was launched from. -- `initial_port` (UInt16) — The client port that was used to make the parent query. -- `interface` (UInt8) — Interface that the query was initiated from. Possible values: - - 1 — TCP. - - 2 — HTTP. -- `os_user` (String) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md). -- `client_hostname` (String) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或者运行另一个TCP客户端。 -- `client_name` (String) — The [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端名称。 -- `client_revision` (UInt32) — Revision of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_major` (UInt32) — Major version of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_minor` (UInt32) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_patch` (UInt32) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端版本。 -- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. - - 1 — `GET` 方法被使用。 - - 2 — `POST` 方法被使用。 -- `http_user_agent` (String) — The `UserAgent` http请求中传递的标头。 -- `quota_key` (String) — The “quota key” 在指定 [配额](quotas.md) 设置(见 `keyed`). -- `revision` (UInt32) — ClickHouse revision. -- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events) -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` 列。 - -默认情况下,日志以7.5秒的间隔添加到表中。 您可以在设置此时间间隔 [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) 服务器设置(请参阅 `flush_interval_milliseconds` 参数)。 要强制将日志从内存缓冲区刷新到表中,请使用 `SYSTEM FLUSH LOGS` 查询。 - -当手动删除表时,它将自动动态创建。 请注意,所有以前的日志将被删除。 - -!!! note "注" - 日志的存储周期是无限的。 日志不会自动从表中删除。 您需要自己组织删除过时的日志。 - -您可以指定一个任意的分区键 `system.query_thread_log` 表中的 [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) 服务器设置(请参阅 `partition_by` 参数)。 - -## 系统。trace\_log {#system_tables-trace_log} - -包含采样查询探查器收集的堆栈跟踪。 - -ClickHouse创建此表时 [trace\_log](server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) 服务器配置部分被设置。 也是 [query\_profiler\_real\_time\_period\_ns](settings/settings.md#query_profiler_real_time_period_ns) 和 [query\_profiler\_cpu\_time\_period\_ns](settings/settings.md#query_profiler_cpu_time_period_ns) 应设置设置。 - -要分析日志,请使用 `addressToLine`, `addressToSymbol` 和 `demangle` 内省功能。 - -列: - -- `event_date` ([日期](../sql-reference/data-types/date.md)) — Date of sampling moment. - -- `event_time` ([日期时间](../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. - -- `timestamp_ns` ([UInt64](../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. - -- `revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. - - 通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1 revision 54429.`. 该字段包含 `revision`,但不是 `version` 的服务器。 - -- `timer_type` ([枚举8](../sql-reference/data-types/enum.md)) — Timer type: - - - `Real` 表示挂钟时间。 - - `CPU` 表示CPU时间。 - -- `thread_number` ([UInt32](../sql-reference/data-types/int-uint.md)) — Thread identifier. - -- `query_id` ([字符串](../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) 系统表. - -- `trace` ([数组(UInt64)](../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. - -**示例** - -``` sql -SELECT * FROM system.trace_log LIMIT 1 \G -``` - -``` text -Row 1: -────── -event_date: 2019-11-15 -event_time: 2019-11-15 15:09:38 -revision: 54428 -timer_type: Real -thread_number: 48 -query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 -trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] -``` - -## 系统。副本 {#system_tables-replicas} - -包含驻留在本地服务器上的复制表的信息和状态。 -此表可用于监视。 该表对于每个已复制的\*表都包含一行。 - -示例: - -``` sql -SELECT * -FROM system.replicas -WHERE table = 'visits' -FORMAT Vertical -``` - -``` text -Row 1: -────── -database: merge -table: visits -engine: ReplicatedCollapsingMergeTree -is_leader: 1 -can_become_leader: 1 -is_readonly: 0 -is_session_expired: 0 -future_parts: 1 -parts_to_check: 0 -zookeeper_path: /clickhouse/tables/01-06/visits -replica_name: example01-06-1.yandex.ru -replica_path: /clickhouse/tables/01-06/visits/replicas/example01-06-1.yandex.ru -columns_version: 9 -queue_size: 1 -inserts_in_queue: 0 -merges_in_queue: 1 -part_mutations_in_queue: 0 -queue_oldest_time: 2020-02-20 08:34:30 -inserts_oldest_time: 0000-00-00 00:00:00 -merges_oldest_time: 2020-02-20 08:34:30 -part_mutations_oldest_time: 0000-00-00 00:00:00 -oldest_part_to_get: -oldest_part_to_merge_to: 20200220_20284_20840_7 -oldest_part_to_mutate_to: -log_max_index: 596273 -log_pointer: 596274 -last_queue_update: 2020-02-20 08:34:32 -absolute_delay: 0 -total_replicas: 2 -active_replicas: 2 -``` - -列: - -- `database` (`String`)-数据库名称 -- `table` (`String`)-表名 -- `engine` (`String`)-表引擎名称 -- `is_leader` (`UInt8`)-副本是否是领导者。 - 一次只有一个副本可以成为领导者。 领导者负责选择要执行的后台合并。 - 请注意,可以对任何可用且在ZK中具有会话的副本执行写操作,而不管该副本是否为leader。 -- `can_become_leader` (`UInt8`)-副本是否可以当选为领导者。 -- `is_readonly` (`UInt8`)-副本是否处于只读模式。 - 如果配置没有ZooKeeper的部分,如果在ZooKeeper中重新初始化会话时发生未知错误,以及在ZooKeeper中重新初始化会话时发生未知错误,则此模式将打开。 -- `is_session_expired` (`UInt8`)-与ZooKeeper的会话已经过期。 基本上一样 `is_readonly`. -- `future_parts` (`UInt32`)-由于尚未完成的插入或合并而显示的数据部分的数量。 -- `parts_to_check` (`UInt32`)-队列中用于验证的数据部分的数量。 如果怀疑零件可能已损坏,则将其放入验证队列。 -- `zookeeper_path` (`String`)-在ZooKeeper中的表数据路径。 -- `replica_name` (`String`)-在动物园管理员副本名称. 同一表的不同副本具有不同的名称。 -- `replica_path` (`String`)-在ZooKeeper中的副本数据的路径。 与连接相同 ‘zookeeper\_path/replicas/replica\_path’. -- `columns_version` (`Int32`)-表结构的版本号。 指示执行ALTER的次数。 如果副本有不同的版本,这意味着一些副本还没有做出所有的改变。 -- `queue_size` (`UInt32`)-等待执行的操作的队列大小。 操作包括插入数据块、合并和某些其他操作。 它通常与 `future_parts`. -- `inserts_in_queue` (`UInt32`)-需要插入数据块的数量。 插入通常复制得相当快。 如果这个数字很大,这意味着有什么不对劲。 -- `merges_in_queue` (`UInt32`)-等待进行合并的数量。 有时合并时间很长,因此此值可能长时间大于零。 -- `part_mutations_in_queue` (`UInt32`)-等待进行的突变的数量。 -- `queue_oldest_time` (`DateTime`)-如果 `queue_size` 大于0,显示何时将最旧的操作添加到队列中。 -- `inserts_oldest_time` (`DateTime`)-看 `queue_oldest_time` -- `merges_oldest_time` (`DateTime`)-看 `queue_oldest_time` -- `part_mutations_oldest_time` (`DateTime`)-看 `queue_oldest_time` - -接下来的4列只有在有ZK活动会话的情况下才具有非零值。 - -- `log_max_index` (`UInt64`)-一般活动日志中的最大条目数。 -- `log_pointer` (`UInt64`)-副本复制到其执行队列的常规活动日志中的最大条目数加一。 如果 `log_pointer` 比 `log_max_index`,有点不对劲。 -- `last_queue_update` (`DateTime`)-上次更新队列时。 -- `absolute_delay` (`UInt64`)-当前副本有多大滞后秒。 -- `total_replicas` (`UInt8`)-此表的已知副本总数。 -- `active_replicas` (`UInt8`)-在ZooKeeper中具有会话的此表的副本的数量(即正常运行的副本的数量)。 - -如果您请求所有列,表可能会工作得有点慢,因为每行都会从ZooKeeper进行几次读取。 -如果您没有请求最后4列(log\_max\_index,log\_pointer,total\_replicas,active\_replicas),表工作得很快。 - -例如,您可以检查一切是否正常工作,如下所示: - -``` sql -SELECT - database, - table, - is_leader, - is_readonly, - is_session_expired, - future_parts, - parts_to_check, - columns_version, - queue_size, - inserts_in_queue, - merges_in_queue, - log_max_index, - log_pointer, - total_replicas, - active_replicas -FROM system.replicas -WHERE - is_readonly - OR is_session_expired - OR future_parts > 20 - OR parts_to_check > 10 - OR queue_size > 20 - OR inserts_in_queue > 10 - OR log_max_index - log_pointer > 10 - OR total_replicas < 2 - OR active_replicas < total_replicas -``` - -如果这个查询没有返回任何东西,这意味着一切都很好。 - -## 系统。设置 {#system-tables-system-settings} - -包含有关当前用户的会话设置的信息。 - -列: - -- `name` ([字符串](../sql-reference/data-types/string.md)) — Setting name. -- `value` ([字符串](../sql-reference/data-types/string.md)) — Setting value. -- `changed` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. -- `description` ([字符串](../sql-reference/data-types/string.md)) — Short setting description. -- `min` ([可为空](../sql-reference/data-types/nullable.md)([字符串](../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [制约因素](settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最小值,则包含 [NULL](../sql-reference/syntax.md#null-literal). -- `max` ([可为空](../sql-reference/data-types/nullable.md)([字符串](../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [制约因素](settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最大值,则包含 [NULL](../sql-reference/syntax.md#null-literal). -- `readonly` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: - - `0` — Current user can change the setting. - - `1` — Current user can't change the setting. - -**示例** - -下面的示例演示如何获取有关名称包含的设置的信息 `min_i`. - -``` sql -SELECT * -FROM system.settings -WHERE name LIKE '%min_i%' -``` - -``` text -┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐ -│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘ -``` - -使用 `WHERE changed` 可以是有用的,例如,当你想检查: - -- 配置文件中的设置是否正确加载并正在使用。 -- 在当前会话中更改的设置。 - - - -``` sql -SELECT * FROM system.settings WHERE changed AND name='load_balancing' -``` - -**另请参阅** - -- [设置](settings/index.md#session-settings-intro) -- [查询权限](settings/permissions-for-queries.md#settings_readonly) -- [对设置的限制](settings/constraints-on-settings.md) - -## 系统。表\_engines {#system.table_engines} - -``` text -┌─name───────────────────┬─value───────┐ -│ max_threads │ 8 │ -│ use_uncompressed_cache │ 0 │ -│ load_balancing │ random │ -│ max_memory_usage │ 10000000000 │ -└────────────────────────┴─────────────┘ -``` - -## 系统。merge\_tree\_settings {#system-merge_tree_settings} - -包含有关以下设置的信息 `MergeTree` 桌子 - -列: - -- `name` (String) — Setting name. -- `value` (String) — Setting value. -- `description` (String) — Setting description. -- `type` (String) — Setting type (implementation specific string value). -- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. - -## 系统。表\_engines {#system-table-engines} - -包含服务器支持的表引擎的描述及其功能支持信息。 - -此表包含以下列(列类型显示在括号中): - -- `name` (String) — The name of table engine. -- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` 条款 -- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [跳过索引](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). -- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). -- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` 和 `SAMPLE_BY`. -- `supports_replication` (UInt8) — Flag that indicates if table engine supports [数据复制](../engines/table-engines/mergetree-family/replication.md). -- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. - -示例: - -``` sql -SELECT * -FROM system.table_engines -WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') -``` - -``` text -┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┐ -│ Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ -│ MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ -│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ -└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┘ -``` - -**另请参阅** - -- 梅树家族 [查询子句](../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) -- 卡夫卡 [设置](../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) -- 加入我们 [设置](../engines/table-engines/special/join.md#join-limitations-and-settings) - -## 系统。表 {#system-tables} - -包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`. - -此表包含以下列(列类型显示在括号中): - -- `database` (String) — The name of the database the table is in. - -- `name` (String) — Table name. - -- `engine` (String) — Table engine name (without parameters). - -- `is_temporary` (UInt8)-指示表是否是临时的标志。 - -- `data_path` (String)-文件系统中表数据的路径。 - -- `metadata_path` (String)-文件系统中表元数据的路径。 - -- `metadata_modification_time` (DateTime)-表元数据的最新修改时间。 - -- `dependencies_database` (数组(字符串))-数据库依赖关系. - -- `dependencies_table` (数组(字符串))-表依赖关系 ([MaterializedView](../engines/table-engines/special/materializedview.md) 基于当前表的表)。 - -- `create_table_query` (String)-用于创建表的查询。 - -- `engine_full` (String)-表引擎的参数。 - -- `partition_key` (String)-表中指定的分区键表达式。 - -- `sorting_key` (String)-表中指定的排序键表达式。 - -- `primary_key` (String)-表中指定的主键表达式。 - -- `sampling_key` (String)-表中指定的采样键表达式。 - -- `storage_policy` (字符串)-存储策略: - - - [MergeTree](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - - [分布](../engines/table-engines/special/distributed.md#distributed) - -- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则 `Null` (包括内衣 `Buffer` 表)。 - -- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则 `Null` (**不** 包括任何底层存储)。 - - - If the table stores data on disk, returns used space on disk (i.e. compressed). - - 如果表在内存中存储数据,返回在内存中使用的近似字节数. - -该 `system.tables` 表中使用 `SHOW TABLES` 查询实现。 - -## 系统。动物园管理员 {#system-zookeeper} - -如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 -查询必须具有 ‘path’ WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 - -查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。 -要输出所有根节点的数据,write path= ‘/’. -如果在指定的路径 ‘path’ 不存在,将引发异常。 - -列: - -- `name` (String) — The name of the node. -- `path` (String) — The path to the node. -- `value` (String) — Node value. -- `dataLength` (Int32) — Size of the value. -- `numChildren` (Int32) — Number of descendants. -- `czxid` (Int64) — ID of the transaction that created the node. -- `mzxid` (Int64) — ID of the transaction that last changed the node. -- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. -- `ctime` (DateTime) — Time of node creation. -- `mtime` (DateTime) — Time of the last modification of the node. -- `version` (Int32) — Node version: the number of times the node was changed. -- `cversion` (Int32) — Number of added or removed descendants. -- `aversion` (Int32) — Number of changes to the ACL. -- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. - -示例: - -``` sql -SELECT * -FROM system.zookeeper -WHERE path = '/clickhouse/tables/01-08/visits/replicas' -FORMAT Vertical -``` - -``` text -Row 1: -────── -name: example01-08-1.yandex.ru -value: -czxid: 932998691229 -mzxid: 932998691229 -ctime: 2015-03-27 16:49:51 -mtime: 2015-03-27 16:49:51 -version: 0 -cversion: 47 -aversion: 0 -ephemeralOwner: 0 -dataLength: 0 -numChildren: 7 -pzxid: 987021031383 -path: /clickhouse/tables/01-08/visits/replicas - -Row 2: -────── -name: example01-08-2.yandex.ru -value: -czxid: 933002738135 -mzxid: 933002738135 -ctime: 2015-03-27 16:57:01 -mtime: 2015-03-27 16:57:01 -version: 0 -cversion: 37 -aversion: 0 -ephemeralOwner: 0 -dataLength: 0 -numChildren: 7 -pzxid: 987021252247 -path: /clickhouse/tables/01-08/visits/replicas -``` - -## 系统。突变 {#system_tables-mutations} - -该表包含以下信息 [突变](../sql-reference/statements/alter.md#alter-mutations) MergeTree表及其进展。 每个突变命令由一行表示。 该表具有以下列: - -**数据库**, **表** -应用突变的数据库和表的名称。 - -**mutation\_id** -变异的ID 对于复制的表,这些Id对应于znode中的名称 `/mutations/` 动物园管理员的目录。 对于未复制的表,Id对应于表的数据目录中的文件名。 - -**命令** -Mutation命令字符串(查询后的部分 `ALTER TABLE [db.]table`). - -**create\_time** -当这个突变命令被提交执行。 - -**block\_numbers.partition\_id**, **block\_numbers.编号** -嵌套列。 对于复制表的突变,它包含每个分区的一条记录:分区ID和通过突变获取的块编号(在每个分区中,只有包含编号小于该分区中突变获取的块编号的块的 在非复制表中,所有分区中的块编号形成一个序列。 这意味着对于非复制表的突变,该列将包含一条记录,其中包含由突变获取的单个块编号。 - -**parts\_to\_do** -为了完成突变,需要突变的数据部分的数量。 - -**is\_done** -变异完成了?? 请注意,即使 `parts_to_do = 0` 由于长时间运行的INSERT将创建需要突变的新数据部分,因此可能尚未完成复制表的突变。 - -如果在改变某些部分时出现问题,以下列将包含其他信息: - -**latest\_failed\_part** -不能变异的最新部分的名称。 - -**latest\_fail\_time** -最近的部分突变失败的时间。 - -**latest\_fail\_reason** -导致最近部件变异失败的异常消息。 - -## 系统。磁盘 {#system_tables-disks} - -包含有关在定义的磁盘信息 [服务器配置](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). - -列: - -- `name` ([字符串](../sql-reference/data-types/string.md)) — Name of a disk in the server configuration. -- `path` ([字符串](../sql-reference/data-types/string.md)) — Path to the mount point in the file system. -- `free_space` ([UInt64](../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. -- `total_space` ([UInt64](../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. -- `keep_free_space` ([UInt64](../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` 磁盘配置参数。 - -## 系统。storage\_policies {#system_tables-storage_policies} - -包含有关存储策略和卷中定义的信息 [服务器配置](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). - -列: - -- `policy_name` ([字符串](../sql-reference/data-types/string.md)) — Name of the storage policy. -- `volume_name` ([字符串](../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. -- `volume_priority` ([UInt64](../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration. -- `disks` ([数组(字符串)](../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. -- `max_data_part_size` ([UInt64](../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). -- `move_factor` ([Float64](../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. - -如果存储策略包含多个卷,则每个卷的信息将存储在表的单独行中。 - -[原始文章](https://clickhouse.tech/docs/en/operations/system_tables/) diff --git a/docs/zh/operations/system-tables/asynchronous_metric_log.md b/docs/zh/operations/system-tables/asynchronous_metric_log.md new file mode 100644 index 00000000000..9f6c697a18e --- /dev/null +++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +## 系统。asynchronous\_metric\_log {#system-tables-async-log} + +包含以下内容的历史值 `system.asynchronous_log` (见 [系统。asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)) diff --git a/docs/zh/operations/system-tables/asynchronous_metrics.md b/docs/zh/operations/system-tables/asynchronous_metrics.md new file mode 100644 index 00000000000..2bd615085a8 --- /dev/null +++ b/docs/zh/operations/system-tables/asynchronous_metrics.md @@ -0,0 +1,41 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。asynchronous\_metrics {#system_tables-asynchronous_metrics} + +包含在后台定期计算的指标。 例如,在使用的RAM量。 + +列: + +- `metric` ([字符串](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. + +**示例** + +``` sql +SELECT * FROM system.asynchronous_metrics LIMIT 10 +``` + +``` text +┌─metric──────────────────────────────────┬──────value─┐ +│ jemalloc.background_thread.run_interval │ 0 │ +│ jemalloc.background_thread.num_runs │ 0 │ +│ jemalloc.background_thread.num_threads │ 0 │ +│ jemalloc.retained │ 422551552 │ +│ jemalloc.mapped │ 1682989056 │ +│ jemalloc.resident │ 1656446976 │ +│ jemalloc.metadata_thp │ 0 │ +│ jemalloc.metadata │ 10226856 │ +│ UncompressedCacheCells │ 0 │ +│ MarkCacheFiles │ 0 │ +└─────────────────────────────────────────┴────────────┘ +``` + +**另请参阅** + +- [监测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. +- [系统。metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. diff --git a/docs/zh/operations/system-tables/clusters.md b/docs/zh/operations/system-tables/clusters.md new file mode 100644 index 00000000000..4bc8d4210ff --- /dev/null +++ b/docs/zh/operations/system-tables/clusters.md @@ -0,0 +1,29 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。集群 {#system-clusters} + +包含有关配置文件中可用的集群及其中的服务器的信息。 + +列: + +- `cluster` (String) — The cluster name. +- `shard_num` (UInt32) — The shard number in the cluster, starting from 1. +- `shard_weight` (UInt32) — The relative weight of the shard when writing data. +- `replica_num` (UInt32) — The replica number in the shard, starting from 1. +- `host_name` (String) — The host name, as specified in the config. +- `host_address` (String) — The host IP address obtained from DNS. +- `port` (UInt16) — The port to use for connecting to the server. +- `user` (String) — The name of the user for connecting to the server. +- `errors_count` (UInt32)-此主机无法到达副本的次数。 +- `estimated_recovery_time` (UInt32)-剩下的秒数,直到副本错误计数归零,它被认为是恢复正常。 + +请注意 `errors_count` 每个查询集群更新一次,但 `estimated_recovery_time` 按需重新计算。 所以有可能是非零的情况 `errors_count` 和零 `estimated_recovery_time`,下一个查询将为零 `errors_count` 并尝试使用副本,就好像它没有错误。 + +**另请参阅** + +- [表引擎分布式](../../engines/table-engines/special/distributed.md) +- [distributed\_replica\_error\_cap设置](../../operations/settings/settings.md#settings-distributed_replica_error_cap) +- [distributed\_replica\_error\_half\_life设置](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) diff --git a/docs/zh/operations/system-tables/columns.md b/docs/zh/operations/system-tables/columns.md new file mode 100644 index 00000000000..24296dc715c --- /dev/null +++ b/docs/zh/operations/system-tables/columns.md @@ -0,0 +1,27 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。列 {#system-columns} + +包含有关所有表中列的信息。 + +您可以使用此表获取类似于以下内容的信息 [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) 查询,但对于多个表一次。 + +该 `system.columns` 表包含以下列(列类型显示在括号中): + +- `database` (String) — Database name. +- `table` (String) — Table name. +- `name` (String) — Column name. +- `type` (String) — Column type. +- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`)为默认值,如果没有定义,则为空字符串。 +- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. +- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. +- `marks_bytes` (UInt64) — The size of marks, in bytes. +- `comment` (String) — Comment on the column, or an empty string if it is not defined. +- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression. +- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. +- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. +- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. diff --git a/docs/zh/operations/system-tables/contributors.md b/docs/zh/operations/system-tables/contributors.md new file mode 100644 index 00000000000..e9374a7dc9c --- /dev/null +++ b/docs/zh/operations/system-tables/contributors.md @@ -0,0 +1,45 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。贡献者 {#system-contributors} + +包含有关贡献者的信息。 该顺序在查询执行时是随机的。 + +列: + +- `name` (String) — Contributor (author) name from git log. + +**示例** + +``` sql +SELECT * FROM system.contributors LIMIT 10 +``` + +``` text +┌─name─────────────┐ +│ Olga Khvostikova │ +│ Max Vetrov │ +│ LiuYangkuan │ +│ svladykin │ +│ zamulla │ +│ Šimon Podlipský │ +│ BayoNet │ +│ Ilya Khomutov │ +│ Amy Krishnevsky │ +│ Loud_Scream │ +└──────────────────┘ +``` + +要在表中找出自己,请使用查询: + +``` sql +SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova' +``` + +``` text +┌─name─────────────┐ +│ Olga Khvostikova │ +└──────────────────┘ +``` diff --git a/docs/zh/operations/system-tables/data_type_families.md b/docs/zh/operations/system-tables/data_type_families.md new file mode 100644 index 00000000000..e6ec3fdbfce --- /dev/null +++ b/docs/zh/operations/system-tables/data_type_families.md @@ -0,0 +1,39 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。data\_type\_families {#system_tables-data_type_families} + +包含有关受支持的信息 [数据类型](../../sql-reference/data-types/). + +列: + +- `name` ([字符串](../../sql-reference/data-types/string.md)) — Data type name. +- `case_insensitive` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` 和 `date` 都是有效的。 +- `alias_to` ([字符串](../../sql-reference/data-types/string.md)) — Data type name for which `name` 是个化名 + +**示例** + +``` sql +SELECT * FROM system.data_type_families WHERE alias_to = 'String' +``` + +``` text +┌─name───────┬─case_insensitive─┬─alias_to─┐ +│ LONGBLOB │ 1 │ String │ +│ LONGTEXT │ 1 │ String │ +│ TINYTEXT │ 1 │ String │ +│ TEXT │ 1 │ String │ +│ VARCHAR │ 1 │ String │ +│ MEDIUMBLOB │ 1 │ String │ +│ BLOB │ 1 │ String │ +│ TINYBLOB │ 1 │ String │ +│ CHAR │ 1 │ String │ +│ MEDIUMTEXT │ 1 │ String │ +└────────────┴──────────────────┴──────────┘ +``` + +**另请参阅** + +- [语法](../../sql-reference/syntax.md) — Information about supported syntax. diff --git a/docs/zh/operations/system-tables/databases.md b/docs/zh/operations/system-tables/databases.md new file mode 100644 index 00000000000..134b8ebc7ab --- /dev/null +++ b/docs/zh/operations/system-tables/databases.md @@ -0,0 +1,12 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。数据库 {#system-databases} + +此表包含一个名为"字符串"的列 ‘name’ – the name of a database. + +服务器知道的每个数据库在表中都有相应的条目。 + +该系统表用于实现 `SHOW DATABASES` 查询。 diff --git a/docs/zh/operations/system-tables/detached_parts.md b/docs/zh/operations/system-tables/detached_parts.md new file mode 100644 index 00000000000..dd561dec6f3 --- /dev/null +++ b/docs/zh/operations/system-tables/detached_parts.md @@ -0,0 +1,14 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。detached\_parts {#system_tables-detached_parts} + +包含有关分离部分的信息 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 桌子 该 `reason` 列指定分离部件的原因。 + +对于用户分离的部件,原因是空的。 这些部件可以附加 [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter.md#alter_attach-partition) 指挥部 + +有关其他列的说明,请参阅 [系统。零件](../../operations/system-tables/parts.md#system_tables-parts). + +如果部件名称无效,某些列的值可能为 `NULL`. 这些部分可以删除 [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter.md#alter_drop-detached). diff --git a/docs/zh/operations/system-tables/dictionaries.md b/docs/zh/operations/system-tables/dictionaries.md new file mode 100644 index 00000000000..6cfe71de3cb --- /dev/null +++ b/docs/zh/operations/system-tables/dictionaries.md @@ -0,0 +1,66 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。字典 {#system_tables-dictionaries} + +包含以下信息 [外部字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). + +列: + +- `database` ([字符串](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. +- `name` ([字符串](../../sql-reference/data-types/string.md)) — [字典名称](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md). +- `status` ([枚举8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values: + - `NOT_LOADED` — Dictionary was not loaded because it was not used. + - `LOADED` — Dictionary loaded successfully. + - `FAILED` — Unable to load the dictionary as a result of an error. + - `LOADING` — Dictionary is loading now. + - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) 查询,超时,字典配置已更改)。 + - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. +- `origin` ([字符串](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. +- `type` ([字符串](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [在内存中存储字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md). +- `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”. +- `attribute.names` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Array of [属性名称](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 由字典提供。 +- `attribute.types` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Corresponding array of [属性类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 这是由字典提供。 +- `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. +- `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. +- `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. +- `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. +- `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). +- `source` ([字符串](../../sql-reference/data-types/string.md)) — Text describing the [数据源](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) 为了字典 +- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [使用寿命](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) 在内存中的字典,之后ClickHouse尝试重新加载字典(如果 `invalidate_query` 被设置,那么只有当它已经改变)。 在几秒钟内设置。 +- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [使用寿命](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) 在内存中的字典,之后ClickHouse尝试重新加载字典(如果 `invalidate_query` 被设置,那么只有当它已经改变)。 在几秒钟内设置。 +- `loading_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary. +- `last_successful_update_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes. +- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading. +- `last_exception` ([字符串](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn't be created. + +**示例** + +配置字典。 + +``` sql +CREATE DICTIONARY dictdb.dict +( + `key` Int64 DEFAULT -1, + `value_default` String DEFAULT 'world', + `value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)' +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb')) +LIFETIME(MIN 0 MAX 1) +LAYOUT(FLAT()) +``` + +确保字典已加载。 + +``` sql +SELECT * FROM system.dictionaries +``` + +``` text +┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐ +│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │ +└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ +``` diff --git a/docs/zh/operations/system-tables/disks.md b/docs/zh/operations/system-tables/disks.md new file mode 100644 index 00000000000..39cacccb4db --- /dev/null +++ b/docs/zh/operations/system-tables/disks.md @@ -0,0 +1,31 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。磁盘 {#system_tables-disks} + +包含有关在定义的磁盘信息 [服务器配置](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). + +列: + +- `name` ([字符串](../../sql-reference/data-types/string.md)) — Name of a disk in the server configuration. +- `path` ([字符串](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system. +- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. +- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. +- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` 磁盘配置参数。 + +## 系统。storage\_policies {#system_tables-storage_policies} + +包含有关存储策略和卷中定义的信息 [服务器配置](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). + +列: + +- `policy_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the storage policy. +- `volume_name` ([字符串](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. +- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration. +- `disks` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. +- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). +- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. + +如果存储策略包含多个卷,则每个卷的信息将存储在表的单独行中。 diff --git a/docs/zh/operations/system-tables/events.md b/docs/zh/operations/system-tables/events.md new file mode 100644 index 00000000000..21b787c6064 --- /dev/null +++ b/docs/zh/operations/system-tables/events.md @@ -0,0 +1,37 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。活动 {#system_tables-events} + +包含有关系统中发生的事件数的信息。 例如,在表中,您可以找到多少 `SELECT` 自ClickHouse服务器启动以来已处理查询。 + +列: + +- `event` ([字符串](../../sql-reference/data-types/string.md)) — Event name. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. +- `description` ([字符串](../../sql-reference/data-types/string.md)) — Event description. + +**示例** + +``` sql +SELECT * FROM system.events LIMIT 5 +``` + +``` text +┌─event─────────────────────────────────┬─value─┬─description────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ Query │ 12 │ Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries. │ +│ SelectQuery │ 8 │ Same as Query, but only for SELECT queries. │ +│ FileOpen │ 73 │ Number of files opened. │ +│ ReadBufferFromFileDescriptorRead │ 155 │ Number of reads (read/pread) from a file descriptor. Does not include sockets. │ +│ ReadBufferFromFileDescriptorReadBytes │ 9931 │ Number of bytes read from file descriptors. If the file is compressed, this will show the compressed data size. │ +└───────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**另请参阅** + +- [系统。asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [系统。metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [监测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/zh/operations/system-tables/functions.md b/docs/zh/operations/system-tables/functions.md new file mode 100644 index 00000000000..ff716b0bc6c --- /dev/null +++ b/docs/zh/operations/system-tables/functions.md @@ -0,0 +1,13 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。功能 {#system-functions} + +包含有关正常函数和聚合函数的信息。 + +列: + +- `name`(`String`) – The name of the function. +- `is_aggregate`(`UInt8`) — Whether the function is aggregate. diff --git a/docs/zh/operations/system-tables/graphite_retentions.md b/docs/zh/operations/system-tables/graphite_retentions.md new file mode 100644 index 00000000000..b2579541920 --- /dev/null +++ b/docs/zh/operations/system-tables/graphite_retentions.md @@ -0,0 +1,20 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。graphite\_retentions {#system-graphite-retentions} + +包含有关参数的信息 [graphite\_rollup](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 这是在表中使用 [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md) 引擎 + +列: + +- `config_name` (字符串) - `graphite_rollup` 参数名称。 +- `regexp` (String)-指标名称的模式。 +- `function` (String)-聚合函数的名称。 +- `age` (UInt64)-以秒为单位的数据的最小期限。 +- `precision` (UInt64)-如何精确地定义以秒为单位的数据的年龄。 +- `priority` (UInt16)-模式优先级。 +- `is_default` (UInt8)-模式是否为默认值。 +- `Tables.database` (Array(String))-使用数据库表名称的数组 `config_name` 参数。 +- `Tables.table` (Array(String))-使用表名称的数组 `config_name` 参数。 diff --git a/docs/zh/operations/system-tables/index.md b/docs/zh/operations/system-tables/index.md new file mode 100644 index 00000000000..73a57300de8 --- /dev/null +++ b/docs/zh/operations/system-tables/index.md @@ -0,0 +1,50 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_priority: 52 +toc_title: "\u7CFB\u7EDF\u8868" +--- + +# 系统表 {#system-tables} + +## 导言 {#system-tables-introduction} + +系统表提供以下信息: + +- 服务器状态、进程和环境。 +- 服务器的内部进程。 + +系统表: + +- 坐落于 `system` 数据库。 +- 仅适用于读取数据。 +- 不能删除或更改,但可以分离。 + +大多数系统表将数据存储在RAM中。 ClickHouse服务器在开始时创建此类系统表。 + +与其他系统表不同,系统表 [metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log), [query\_log](../../operations/system-tables/query_log.md#system_tables-query_log), [query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log), [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) 由 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表引擎并将其数据存储在存储文件系统中。 如果从文件系统中删除表,ClickHouse服务器会在下一次写入数据时再次创建空表。 如果系统表架构在新版本中发生更改,则ClickHouse会重命名当前表并创建一个新表。 + +默认情况下,表增长是无限的。 要控制表的大小,可以使用 [TTL](../../sql-reference/statements/alter.md#manipulations-with-table-ttl) 删除过期日志记录的设置。 你也可以使用分区功能 `MergeTree`-发动机表。 + +## 系统指标的来源 {#system-tables-sources-of-system-metrics} + +用于收集ClickHouse服务器使用的系统指标: + +- `CAP_NET_ADMIN` 能力。 +- [procfs](https://en.wikipedia.org/wiki/Procfs) (仅在Linux中)。 + +**procfs** + +如果ClickHouse服务器没有 `CAP_NET_ADMIN` 能力,它试图回落到 `ProcfsMetricsProvider`. `ProcfsMetricsProvider` 允许收集每个查询系统指标(用于CPU和I/O)。 + +如果系统上支持并启用procfs,ClickHouse server将收集这些指标: + +- `OSCPUVirtualTimeMicroseconds` +- `OSCPUWaitMicroseconds` +- `OSIOWaitMicroseconds` +- `OSReadChars` +- `OSWriteChars` +- `OSReadBytes` +- `OSWriteBytes` + +[原始文章](https://clickhouse.tech/docs/en/operations/system-tables/) diff --git a/docs/zh/operations/system-tables/merge_tree_settings.md b/docs/zh/operations/system-tables/merge_tree_settings.md new file mode 100644 index 00000000000..d2a5f64ba21 --- /dev/null +++ b/docs/zh/operations/system-tables/merge_tree_settings.md @@ -0,0 +1,16 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。merge\_tree\_settings {#system-merge_tree_settings} + +包含有关以下设置的信息 `MergeTree` 桌子 + +列: + +- `name` (String) — Setting name. +- `value` (String) — Setting value. +- `description` (String) — Setting description. +- `type` (String) — Setting type (implementation specific string value). +- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. diff --git a/docs/zh/operations/system-tables/merges.md b/docs/zh/operations/system-tables/merges.md new file mode 100644 index 00000000000..f5cf2a56118 --- /dev/null +++ b/docs/zh/operations/system-tables/merges.md @@ -0,0 +1,24 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。合并 {#system-merges} + +包含有关MergeTree系列中表当前正在进行的合并和部件突变的信息。 + +列: + +- `database` (String) — The name of the database the table is in. +- `table` (String) — Table name. +- `elapsed` (Float64) — The time elapsed (in seconds) since the merge started. +- `progress` (Float64) — The percentage of completed work from 0 to 1. +- `num_parts` (UInt64) — The number of pieces to be merged. +- `result_part_name` (String) — The name of the part that will be formed as the result of merging. +- `is_mutation` (UInt8)-1如果这个过程是一个部分突变. +- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. +- `total_size_marks` (UInt64) — The total number of marks in the merged parts. +- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. +- `rows_read` (UInt64) — Number of rows read. +- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. +- `rows_written` (UInt64) — Number of rows written. diff --git a/docs/zh/operations/system-tables/metric_log.md b/docs/zh/operations/system-tables/metric_log.md new file mode 100644 index 00000000000..46b28f8d2f8 --- /dev/null +++ b/docs/zh/operations/system-tables/metric_log.md @@ -0,0 +1,60 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。metric\_log {#system_tables-metric_log} + +包含表中度量值的历史记录 `system.metrics` 和 `system.events`,定期刷新到磁盘。 +打开指标历史记录收集 `system.metric_log`,创建 `/etc/clickhouse-server/config.d/metric_log.xml` 具有以下内容: + +``` xml + + + system + metric_log
+ 7500 + 1000 +
+
+``` + +**示例** + +``` sql +SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +event_date: 2020-02-18 +event_time: 2020-02-18 07:15:33 +milliseconds: 554 +ProfileEvent_Query: 0 +ProfileEvent_SelectQuery: 0 +ProfileEvent_InsertQuery: 0 +ProfileEvent_FileOpen: 0 +ProfileEvent_Seek: 0 +ProfileEvent_ReadBufferFromFileDescriptorRead: 1 +ProfileEvent_ReadBufferFromFileDescriptorReadFailed: 0 +ProfileEvent_ReadBufferFromFileDescriptorReadBytes: 0 +ProfileEvent_WriteBufferFromFileDescriptorWrite: 1 +ProfileEvent_WriteBufferFromFileDescriptorWriteFailed: 0 +ProfileEvent_WriteBufferFromFileDescriptorWriteBytes: 56 +... +CurrentMetric_Query: 0 +CurrentMetric_Merge: 0 +CurrentMetric_PartMutation: 0 +CurrentMetric_ReplicatedFetch: 0 +CurrentMetric_ReplicatedSend: 0 +CurrentMetric_ReplicatedChecks: 0 +... +``` + +**另请参阅** + +- [系统。asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. +- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [监测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/zh/operations/system-tables/metrics.md b/docs/zh/operations/system-tables/metrics.md new file mode 100644 index 00000000000..1bf74524785 --- /dev/null +++ b/docs/zh/operations/system-tables/metrics.md @@ -0,0 +1,44 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。指标 {#system_tables-metrics} + +包含可以立即计算或具有当前值的指标。 例如,同时处理的查询的数量或当前副本的延迟。 此表始终是最新的。 + +列: + +- `metric` ([字符串](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. +- `description` ([字符串](../../sql-reference/data-types/string.md)) — Metric description. + +支持的指标列表,您可以在 [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) ClickHouse的源文件。 + +**示例** + +``` sql +SELECT * FROM system.metrics LIMIT 10 +``` + +``` text +┌─metric─────────────────────┬─value─┬─description──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ Query │ 1 │ Number of executing queries │ +│ Merge │ 0 │ Number of executing background merges │ +│ PartMutation │ 0 │ Number of mutations (ALTER DELETE/UPDATE) │ +│ ReplicatedFetch │ 0 │ Number of data parts being fetched from replicas │ +│ ReplicatedSend │ 0 │ Number of data parts being sent to replicas │ +│ ReplicatedChecks │ 0 │ Number of data parts checking for consistency │ +│ BackgroundPoolTask │ 0 │ Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping) │ +│ BackgroundSchedulePoolTask │ 0 │ Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc. │ +│ DiskSpaceReservedForMerge │ 0 │ Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts. │ +│ DistributedSend │ 0 │ Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode. │ +└────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**另请参阅** + +- [系统。asynchronous\_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. +- [系统。metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [监测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/zh/operations/system-tables/mutations.md b/docs/zh/operations/system-tables/mutations.md new file mode 100644 index 00000000000..8e2d66a42e8 --- /dev/null +++ b/docs/zh/operations/system-tables/mutations.md @@ -0,0 +1,30 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。突变 {#system_tables-mutations} + +该表包含以下信息 [突变](../../sql-reference/statements/alter.md#alter-mutations) MergeTree表及其进展。 每个突变命令由一行表示。 该表具有以下列: + +**数据库**, **表** -应用突变的数据库和表的名称。 + +**mutation\_id** -变异的ID 对于复制的表,这些Id对应于znode中的名称 `/mutations/` 动物园管理员的目录。 对于未复制的表,Id对应于表的数据目录中的文件名。 + +**命令** -Mutation命令字符串(查询后的部分 `ALTER TABLE [db.]table`). + +**create\_time** -当这个突变命令被提交执行。 + +**block\_numbers.partition\_id**, **block\_numbers.编号** -嵌套列。 对于复制表的突变,它包含每个分区的一条记录:分区ID和通过突变获取的块编号(在每个分区中,只有包含编号小于该分区中突变获取的块编号的块的 在非复制表中,所有分区中的块编号形成一个序列。 这意味着对于非复制表的突变,该列将包含一条记录,其中包含由突变获取的单个块编号。 + +**parts\_to\_do** -为了完成突变,需要突变的数据部分的数量。 + +**is\_done** -变异完成了?? 请注意,即使 `parts_to_do = 0` 由于长时间运行的INSERT将创建需要突变的新数据部分,因此可能尚未完成复制表的突变。 + +如果在改变某些部分时出现问题,以下列将包含其他信息: + +**latest\_failed\_part** -不能变异的最新部分的名称。 + +**latest\_fail\_time** -最近的部分突变失败的时间。 + +**latest\_fail\_reason** -导致最近部件变异失败的异常消息。 diff --git a/docs/zh/operations/system-tables/numbers.md b/docs/zh/operations/system-tables/numbers.md new file mode 100644 index 00000000000..c42c87053ca --- /dev/null +++ b/docs/zh/operations/system-tables/numbers.md @@ -0,0 +1,12 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。数字 {#system-numbers} + +此表包含一个名为UInt64的列 `number` 它包含几乎所有从零开始的自然数。 + +您可以使用此表进行测试,或者如果您需要进行暴力搜索。 + +从此表中读取的内容不是并行的。 diff --git a/docs/zh/operations/system-tables/numbers_mt.md b/docs/zh/operations/system-tables/numbers_mt.md new file mode 100644 index 00000000000..90addea157d --- /dev/null +++ b/docs/zh/operations/system-tables/numbers_mt.md @@ -0,0 +1,10 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。numbers\_mt {#system-numbers-mt} + +一样的 [系统。数字](../../operations/system-tables/numbers.md) 但读取是并行的。 这些数字可以以任何顺序返回。 + +用于测试。 diff --git a/docs/zh/operations/system-tables/one.md b/docs/zh/operations/system-tables/one.md new file mode 100644 index 00000000000..a8dc64c18c7 --- /dev/null +++ b/docs/zh/operations/system-tables/one.md @@ -0,0 +1,12 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。一 {#system-one} + +此表包含一行,其中包含一行 `dummy` UInt8列包含值0。 + +如果使用此表 `SELECT` 查询不指定 `FROM` 条款 + +这类似于 `DUAL` 表在其他Dbms中找到。 diff --git a/docs/zh/operations/system-tables/part_log.md b/docs/zh/operations/system-tables/part_log.md new file mode 100644 index 00000000000..b8388455948 --- /dev/null +++ b/docs/zh/operations/system-tables/part_log.md @@ -0,0 +1,37 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。part\_log {#system_tables-part-log} + +该 `system.part_log` 表只有当创建 [part\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-part-log) 指定了服务器设置。 + +此表包含与以下情况发生的事件有关的信息 [数据部分](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) 在 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 家庭表,例如添加或合并数据。 + +该 `system.part_log` 表包含以下列: + +- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values: + - `NEW_PART` — Inserting of a new data part. + - `MERGE_PARTS` — Merging of data parts. + - `DOWNLOAD_PART` — Downloading a data part. + - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter.md#alter_detach-partition). + - `MUTATE_PART` — Mutating of a data part. + - `MOVE_PART` — Moving the data part from the one disk to another one. +- `event_date` (Date) — Event date. +- `event_time` (DateTime) — Event time. +- `duration_ms` (UInt64) — Duration. +- `database` (String) — Name of the database the data part is in. +- `table` (String) — Name of the table the data part is in. +- `part_name` (String) — Name of the data part. +- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ 值,如果分区是由 `tuple()`. +- `rows` (UInt64) — The number of rows in the data part. +- `size_in_bytes` (UInt64) — Size of the data part in bytes. +- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge). +- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes. +- `read_rows` (UInt64) — The number of rows was read during the merge. +- `read_bytes` (UInt64) — The number of bytes was read during the merge. +- `error` (UInt16) — The code number of the occurred error. +- `exception` (String) — Text message of the occurred error. + +该 `system.part_log` 表的第一个插入数据到后创建 `MergeTree` 桌子 diff --git a/docs/zh/operations/system-tables/parts.md b/docs/zh/operations/system-tables/parts.md new file mode 100644 index 00000000000..e924ee27df3 --- /dev/null +++ b/docs/zh/operations/system-tables/parts.md @@ -0,0 +1,85 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。零件 {#system_tables-parts} + +包含有关的部分信息 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 桌子 + +每行描述一个数据部分。 + +列: + +- `partition` (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter.md#query_language_queries_alter) 查询。 + + 格式: + + - `YYYYMM` 用于按月自动分区。 + - `any_string` 手动分区时。 + +- `name` (`String`) – Name of the data part. + +- `active` (`UInt8`) – Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's deleted. Inactive data parts remain after merging. + +- `marks` (`UInt64`) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` 通过索引粒度(通常为8192)(此提示不适用于自适应粒度)。 + +- `rows` (`UInt64`) – The number of rows. + +- `bytes_on_disk` (`UInt64`) – Total size of all the data part files in bytes. + +- `data_compressed_bytes` (`UInt64`) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. + +- `data_uncompressed_bytes` (`UInt64`) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. + +- `marks_bytes` (`UInt64`) – The size of the file with marks. + +- `modification_time` (`DateTime`) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation.\| + +- `remove_time` (`DateTime`) – The time when the data part became inactive. + +- `refcount` (`UInt32`) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. + +- `min_date` (`Date`) – The minimum value of the date key in the data part. + +- `max_date` (`Date`) – The maximum value of the date key in the data part. + +- `min_time` (`DateTime`) – The minimum value of the date and time key in the data part. + +- `max_time`(`DateTime`) – The maximum value of the date and time key in the data part. + +- `partition_id` (`String`) – ID of the partition. + +- `min_block_number` (`UInt64`) – The minimum number of data parts that make up the current part after merging. + +- `max_block_number` (`UInt64`) – The maximum number of data parts that make up the current part after merging. + +- `level` (`UInt32`) – Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. + +- `data_version` (`UInt64`) – Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). + +- `primary_key_bytes_in_memory` (`UInt64`) – The amount of memory (in bytes) used by primary key values. + +- `primary_key_bytes_in_memory_allocated` (`UInt64`) – The amount of memory (in bytes) reserved for primary key values. + +- `is_frozen` (`UInt8`) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesn't exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter.md#alter_freeze-partition) + +- `database` (`String`) – Name of the database. + +- `table` (`String`) – Name of the table. + +- `engine` (`String`) – Name of the table engine without parameters. + +- `path` (`String`) – Absolute path to the folder with data part files. + +- `disk` (`String`) – Name of a disk that stores the data part. + +- `hash_of_all_files` (`String`) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 的压缩文件。 + +- `hash_of_uncompressed_files` (`String`) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 未压缩的文件(带标记的文件,索引文件等。). + +- `uncompressed_hash_of_compressed_files` (`String`) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 压缩文件中的数据,就好像它们是未压缩的。 + +- `bytes` (`UInt64`) – Alias for `bytes_on_disk`. + +- `marks_size` (`UInt64`) – Alias for `marks_bytes`. diff --git a/docs/zh/operations/system-tables/processes.md b/docs/zh/operations/system-tables/processes.md new file mode 100644 index 00000000000..c42b7e59827 --- /dev/null +++ b/docs/zh/operations/system-tables/processes.md @@ -0,0 +1,20 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。流程 {#system_tables-processes} + +该系统表用于实现 `SHOW PROCESSLIST` 查询。 + +列: + +- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` 用户。 该字段包含特定查询的用户名,而不是此查询启动的查询的用户名。 +- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` 查询请求者服务器上。 +- `elapsed` (Float64) – The time in seconds since request execution started. +- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. +- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) 设置。 +- `query` (String) – The query text. For `INSERT`,它不包括要插入的数据。 +- `query_id` (String) – Query ID, if defined. diff --git a/docs/zh/operations/system-tables/query_log.md b/docs/zh/operations/system-tables/query_log.md new file mode 100644 index 00000000000..7658196b81b --- /dev/null +++ b/docs/zh/operations/system-tables/query_log.md @@ -0,0 +1,143 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。query\_log {#system_tables-query_log} + +包含有关已执行查询的信息,例如,开始时间、处理持续时间、错误消息。 + +!!! note "注" + 此表不包含以下内容的摄取数据 `INSERT` 查询。 + +您可以更改查询日志记录的设置 [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) 服务器配置部分。 + +您可以通过设置禁用查询日志记录 [log\_queries=0](../../operations/settings/settings.md#settings-log-queries). 我们不建议关闭日志记录,因为此表中的信息对于解决问题很重要。 + +数据的冲洗周期设置在 `flush_interval_milliseconds` 的参数 [query\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) 服务器设置部分。 要强制冲洗,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询。 + +ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/system-tables/index.md#system-tables-introduction) 欲了解更多详情。 + +该 `system.query_log` 表注册两种查询: + +1. 客户端直接运行的初始查询。 +2. 由其他查询启动的子查询(用于分布式查询执行)。 对于这些类型的查询,有关父查询的信息显示在 `initial_*` 列。 + +每个查询创建一个或两个行中 `query_log` 表,这取决于状态(见 `type` 列)的查询: + +1. 如果查询执行成功,则两行具有 `QueryStart` 和 `QueryFinish` 创建类型。 +2. 如果在查询处理过程中发生错误,两个事件与 `QueryStart` 和 `ExceptionWhileProcessing` 创建类型。 +3. 如果在启动查询之前发生错误,则单个事件具有 `ExceptionBeforeStart` 创建类型。 + +列: + +- `type` ([枚举8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: + - `'QueryStart' = 1` — Successful start of query execution. + - `'QueryFinish' = 2` — Successful end of query execution. + - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. + - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. +- `event_date` ([日期](../../sql-reference/data-types/date.md)) — Query starting date. +- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Query starting time. +- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Start time of query execution. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` 和 `JOIN`. 对于分布式查询 `read_rows` 包括在所有副本上读取的行总数。 每个副本发送它的 `read_rows` 值,并且查询的服务器-发起方汇总所有接收到的和本地的值。 缓存卷不会影响此值。 +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` 和 `JOIN`. 对于分布式查询 `read_bytes` 包括在所有副本上读取的行总数。 每个副本发送它的 `read_bytes` 值,并且查询的服务器-发起方汇总所有接收到的和本地的值。 缓存卷不会影响此值。 +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。 +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。 +- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` 查询,或者在一些行 `INSERT` 查询。 +- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. +- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. +- `query` ([字符串](../../sql-reference/data-types/string.md)) — Query string. +- `exception` ([字符串](../../sql-reference/data-types/string.md)) — Exception message. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. +- `stack_trace` ([字符串](../../sql-reference/data-types/string.md)) — [堆栈跟踪](https://en.wikipedia.org/wiki/Stack_trace). 如果查询成功完成,则为空字符串。 +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Query type. Possible values: + - 1 — Query was initiated by the client. + - 0 — Query was initiated by another query as part of distributed query execution. +- `user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. +- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the query. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query. +- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query. +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values: + - 1 — TCP. + - 2 — HTTP. +- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — Operating system username who runs [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md). +- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或者运行另一个TCP客户端。 +- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — The [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端名称。 +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端版本。 +- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: + - 0 — The query was launched from the TCP interface. + - 1 — `GET` 方法被使用。 + - 2 — `POST` 方法被使用。 +- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。 +- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `thread_numbers` ([数组(UInt32)](../../sql-reference/data-types/array.md)) — Number of threads that are participating in query execution. +- `ProfileEvents.Names` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [系统。活动](../../operations/system-tables/events.md#system_tables-events) +- `ProfileEvents.Values` ([数组(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` 列。 +- `Settings.Names` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` 参数为1。 +- `Settings.Values` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` 列。 + +**示例** + +``` sql +SELECT * FROM system.query_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +type: QueryStart +event_date: 2020-05-13 +event_time: 2020-05-13 14:02:28 +query_start_time: 2020-05-13 14:02:28 +query_duration_ms: 0 +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +memory_usage: 0 +query: SELECT 1 +exception_code: 0 +exception: +stack_trace: +is_initial_query: 1 +user: default +query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +address: ::ffff:127.0.0.1 +port: 57720 +initial_user: default +initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +initial_address: ::ffff:127.0.0.1 +initial_port: 57720 +interface: 1 +os_user: bayonet +client_hostname: clickhouse.ru-central1.internal +client_name: ClickHouse client +client_revision: 54434 +client_version_major: 20 +client_version_minor: 4 +client_version_patch: 1 +http_method: 0 +http_user_agent: +quota_key: +revision: 54434 +thread_ids: [] +ProfileEvents.Names: [] +ProfileEvents.Values: [] +Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage'] +Settings.Values: ['0','random','1','10000000000'] +``` + +**另请参阅** + +- [系统。query\_thread\_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. diff --git a/docs/zh/operations/system-tables/query_thread_log.md b/docs/zh/operations/system-tables/query_thread_log.md new file mode 100644 index 00000000000..115e69ec93c --- /dev/null +++ b/docs/zh/operations/system-tables/query_thread_log.md @@ -0,0 +1,118 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。query\_thread\_log {#system_tables-query_thread_log} + +包含有关执行查询的线程的信息,例如,线程名称、线程开始时间、查询处理的持续时间。 + +开始记录: + +1. 在配置参数 [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 科。 +2. 设置 [log\_query\_threads](../../operations/settings/settings.md#settings-log-query-threads) 到1。 + +数据的冲洗周期设置在 `flush_interval_milliseconds` 的参数 [query\_thread\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。 要强制冲洗,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询。 + +ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/system-tables/index.md#system-tables-introduction) 欲了解更多详情。 + +列: + +- `event_date` ([日期](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. +- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. +- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Start time of query execution. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。 +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。 +- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. +- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the thread. +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID. +- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID. +- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. +- `query` ([字符串](../../sql-reference/data-types/string.md)) — Query string. +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: + - 1 — Query was initiated by the client. + - 0 — Query was initiated by another query for distributed query execution. +- `user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. +- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the query. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. +- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: + - 1 — TCP. + - 2 — HTTP. +- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md). +- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或者运行另一个TCP客户端。 +- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — The [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端名称。 +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端版本。 +- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values: + - 0 — The query was launched from the TCP interface. + - 1 — `GET` 方法被使用。 + - 2 — `POST` 方法被使用。 +- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。 +- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `ProfileEvents.Names` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events). +- `ProfileEvents.Values` ([数组(UInt64)](../../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` 列。 + +**示例** + +``` sql + SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical +``` + +``` text +Row 1: +────── +event_date: 2020-05-13 +event_time: 2020-05-13 14:02:28 +query_start_time: 2020-05-13 14:02:28 +query_duration_ms: 0 +read_rows: 1 +read_bytes: 1 +written_rows: 0 +written_bytes: 0 +memory_usage: 0 +peak_memory_usage: 0 +thread_name: QueryPipelineEx +thread_id: 28952 +master_thread_id: 28924 +query: SELECT 1 +is_initial_query: 1 +user: default +query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +address: ::ffff:127.0.0.1 +port: 57720 +initial_user: default +initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +initial_address: ::ffff:127.0.0.1 +initial_port: 57720 +interface: 1 +os_user: bayonet +client_hostname: clickhouse.ru-central1.internal +client_name: ClickHouse client +client_revision: 54434 +client_version_major: 20 +client_version_minor: 4 +client_version_patch: 1 +http_method: 0 +http_user_agent: +quota_key: +revision: 54434 +ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds'] +ProfileEvents.Values: [1,97,81,5,81] +... +``` + +**另请参阅** + +- [系统。query\_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` 系统表,其中包含有关查询执行的公共信息。 diff --git a/docs/zh/operations/system-tables/replicas.md b/docs/zh/operations/system-tables/replicas.md new file mode 100644 index 00000000000..4be74b118c5 --- /dev/null +++ b/docs/zh/operations/system-tables/replicas.md @@ -0,0 +1,126 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。副本 {#system_tables-replicas} + +包含驻留在本地服务器上的复制表的信息和状态。 +此表可用于监视。 该表对于每个已复制的\*表都包含一行。 + +示例: + +``` sql +SELECT * +FROM system.replicas +WHERE table = 'visits' +FORMAT Vertical +``` + +``` text +Row 1: +────── +database: merge +table: visits +engine: ReplicatedCollapsingMergeTree +is_leader: 1 +can_become_leader: 1 +is_readonly: 0 +is_session_expired: 0 +future_parts: 1 +parts_to_check: 0 +zookeeper_path: /clickhouse/tables/01-06/visits +replica_name: example01-06-1.yandex.ru +replica_path: /clickhouse/tables/01-06/visits/replicas/example01-06-1.yandex.ru +columns_version: 9 +queue_size: 1 +inserts_in_queue: 0 +merges_in_queue: 1 +part_mutations_in_queue: 0 +queue_oldest_time: 2020-02-20 08:34:30 +inserts_oldest_time: 0000-00-00 00:00:00 +merges_oldest_time: 2020-02-20 08:34:30 +part_mutations_oldest_time: 0000-00-00 00:00:00 +oldest_part_to_get: +oldest_part_to_merge_to: 20200220_20284_20840_7 +oldest_part_to_mutate_to: +log_max_index: 596273 +log_pointer: 596274 +last_queue_update: 2020-02-20 08:34:32 +absolute_delay: 0 +total_replicas: 2 +active_replicas: 2 +``` + +列: + +- `database` (`String`)-数据库名称 +- `table` (`String`)-表名 +- `engine` (`String`)-表引擎名称 +- `is_leader` (`UInt8`)-副本是否是领导者。 + 一次只有一个副本可以成为领导者。 领导者负责选择要执行的后台合并。 + 请注意,可以对任何可用且在ZK中具有会话的副本执行写操作,而不管该副本是否为leader。 +- `can_become_leader` (`UInt8`)-副本是否可以当选为领导者。 +- `is_readonly` (`UInt8`)-副本是否处于只读模式。 + 如果配置没有ZooKeeper的部分,如果在ZooKeeper中重新初始化会话时发生未知错误,以及在ZooKeeper中重新初始化会话时发生未知错误,则此模式将打开。 +- `is_session_expired` (`UInt8`)-与ZooKeeper的会话已经过期。 基本上一样 `is_readonly`. +- `future_parts` (`UInt32`)-由于尚未完成的插入或合并而显示的数据部分的数量。 +- `parts_to_check` (`UInt32`)-队列中用于验证的数据部分的数量。 如果怀疑零件可能已损坏,则将其放入验证队列。 +- `zookeeper_path` (`String`)-在ZooKeeper中的表数据路径。 +- `replica_name` (`String`)-在动物园管理员副本名称. 同一表的不同副本具有不同的名称。 +- `replica_path` (`String`)-在ZooKeeper中的副本数据的路径。 与连接相同 ‘zookeeper\_path/replicas/replica\_path’. +- `columns_version` (`Int32`)-表结构的版本号。 指示执行ALTER的次数。 如果副本有不同的版本,这意味着一些副本还没有做出所有的改变。 +- `queue_size` (`UInt32`)-等待执行的操作的队列大小。 操作包括插入数据块、合并和某些其他操作。 它通常与 `future_parts`. +- `inserts_in_queue` (`UInt32`)-需要插入数据块的数量。 插入通常复制得相当快。 如果这个数字很大,这意味着有什么不对劲。 +- `merges_in_queue` (`UInt32`)-等待进行合并的数量。 有时合并时间很长,因此此值可能长时间大于零。 +- `part_mutations_in_queue` (`UInt32`)-等待进行的突变的数量。 +- `queue_oldest_time` (`DateTime`)-如果 `queue_size` 大于0,显示何时将最旧的操作添加到队列中。 +- `inserts_oldest_time` (`DateTime`)-看 `queue_oldest_time` +- `merges_oldest_time` (`DateTime`)-看 `queue_oldest_time` +- `part_mutations_oldest_time` (`DateTime`)-看 `queue_oldest_time` + +接下来的4列只有在有ZK活动会话的情况下才具有非零值。 + +- `log_max_index` (`UInt64`)-一般活动日志中的最大条目数。 +- `log_pointer` (`UInt64`)-副本复制到其执行队列的常规活动日志中的最大条目数加一。 如果 `log_pointer` 比 `log_max_index`,有点不对劲。 +- `last_queue_update` (`DateTime`)-上次更新队列时。 +- `absolute_delay` (`UInt64`)-当前副本有多大滞后秒。 +- `total_replicas` (`UInt8`)-此表的已知副本总数。 +- `active_replicas` (`UInt8`)-在ZooKeeper中具有会话的此表的副本的数量(即正常运行的副本的数量)。 + +如果您请求所有列,表可能会工作得有点慢,因为每行都会从ZooKeeper进行几次读取。 +如果您没有请求最后4列(log\_max\_index,log\_pointer,total\_replicas,active\_replicas),表工作得很快。 + +例如,您可以检查一切是否正常工作,如下所示: + +``` sql +SELECT + database, + table, + is_leader, + is_readonly, + is_session_expired, + future_parts, + parts_to_check, + columns_version, + queue_size, + inserts_in_queue, + merges_in_queue, + log_max_index, + log_pointer, + total_replicas, + active_replicas +FROM system.replicas +WHERE + is_readonly + OR is_session_expired + OR future_parts > 20 + OR parts_to_check > 10 + OR queue_size > 20 + OR inserts_in_queue > 10 + OR log_max_index - log_pointer > 10 + OR total_replicas < 2 + OR active_replicas < total_replicas +``` + +如果这个查询没有返回任何东西,这意味着一切都很好。 diff --git a/docs/zh/operations/system-tables/settings.md b/docs/zh/operations/system-tables/settings.md new file mode 100644 index 00000000000..c717c8c9562 --- /dev/null +++ b/docs/zh/operations/system-tables/settings.md @@ -0,0 +1,55 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。设置 {#system-tables-system-settings} + +包含有关当前用户的会话设置的信息。 + +列: + +- `name` ([字符串](../../sql-reference/data-types/string.md)) — Setting name. +- `value` ([字符串](../../sql-reference/data-types/string.md)) — Setting value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. +- `description` ([字符串](../../sql-reference/data-types/string.md)) — Short setting description. +- `min` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [制约因素](../../operations/settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最小值,则包含 [NULL](../../sql-reference/syntax.md#null-literal). +- `max` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [制约因素](../../operations/settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最大值,则包含 [NULL](../../sql-reference/syntax.md#null-literal). +- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: + - `0` — Current user can change the setting. + - `1` — Current user can't change the setting. + +**示例** + +下面的示例演示如何获取有关名称包含的设置的信息 `min_i`. + +``` sql +SELECT * +FROM system.settings +WHERE name LIKE '%min_i%' +``` + +``` text +┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐ +│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ +│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ +│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ +└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘ +``` + +使用 `WHERE changed` 可以是有用的,例如,当你想检查: + +- 配置文件中的设置是否正确加载并正在使用。 +- 在当前会话中更改的设置。 + + + +``` sql +SELECT * FROM system.settings WHERE changed AND name='load_balancing' +``` + +**另请参阅** + +- [设置](../../operations/settings/index.md#session-settings-intro) +- [查询权限](../../operations/settings/permissions-for-queries.md#settings_readonly) +- [对设置的限制](../../operations/settings/constraints-on-settings.md) diff --git a/docs/zh/operations/system-tables/storage_policies.md b/docs/zh/operations/system-tables/storage_policies.md new file mode 100644 index 00000000000..29347aa36c2 --- /dev/null +++ b/docs/zh/operations/system-tables/storage_policies.md @@ -0,0 +1,19 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。storage\_policies {#system_tables-storage_policies} + +包含有关存储策略和卷中定义的信息 [服务器配置](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). + +列: + +- `policy_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the storage policy. +- `volume_name` ([字符串](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. +- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration. +- `disks` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. +- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). +- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. + +如果存储策略包含多个卷,则每个卷的信息将存储在表的单独行中。 diff --git a/docs/zh/operations/system-tables/table_engines.md b/docs/zh/operations/system-tables/table_engines.md new file mode 100644 index 00000000000..401a3a8616d --- /dev/null +++ b/docs/zh/operations/system-tables/table_engines.md @@ -0,0 +1,40 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。表\_engines {#system-table-engines} + +包含服务器支持的表引擎的描述及其功能支持信息。 + +此表包含以下列(列类型显示在括号中): + +- `name` (String) — The name of table engine. +- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` 条款 +- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [跳过索引](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). +- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). +- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` 和 `SAMPLE_BY`. +- `supports_replication` (UInt8) — Flag that indicates if table engine supports [数据复制](../../engines/table-engines/mergetree-family/replication.md). +- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. + +示例: + +``` sql +SELECT * +FROM system.table_engines +WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') +``` + +``` text +┌─name──────────────────────────┬─supports_settings─┬─supports_skipping_indices─┬─supports_sort_order─┬─supports_ttl─┬─supports_replication─┬─supports_deduplication─┐ +│ Kafka │ 1 │ 0 │ 0 │ 0 │ 0 │ 0 │ +│ MergeTree │ 1 │ 1 │ 1 │ 1 │ 0 │ 0 │ +│ ReplicatedCollapsingMergeTree │ 1 │ 1 │ 1 │ 1 │ 1 │ 1 │ +└───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┘ +``` + +**另请参阅** + +- 梅树家族 [查询子句](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) +- 卡夫卡 [设置](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) +- 加入我们 [设置](../../engines/table-engines/special/join.md#join-limitations-and-settings) diff --git a/docs/zh/operations/system-tables/tables.md b/docs/zh/operations/system-tables/tables.md new file mode 100644 index 00000000000..a690e938a3a --- /dev/null +++ b/docs/zh/operations/system-tables/tables.md @@ -0,0 +1,54 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。表 {#system-tables} + +包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`. + +此表包含以下列(列类型显示在括号中): + +- `database` (String) — The name of the database the table is in. + +- `name` (String) — Table name. + +- `engine` (String) — Table engine name (without parameters). + +- `is_temporary` (UInt8)-指示表是否是临时的标志。 + +- `data_path` (String)-文件系统中表数据的路径。 + +- `metadata_path` (String)-文件系统中表元数据的路径。 + +- `metadata_modification_time` (DateTime)-表元数据的最新修改时间。 + +- `dependencies_database` (数组(字符串))-数据库依赖关系. + +- `dependencies_table` (数组(字符串))-表依赖关系 ([MaterializedView](../../engines/table-engines/special/materializedview.md) 基于当前表的表)。 + +- `create_table_query` (String)-用于创建表的查询。 + +- `engine_full` (String)-表引擎的参数。 + +- `partition_key` (String)-表中指定的分区键表达式。 + +- `sorting_key` (String)-表中指定的排序键表达式。 + +- `primary_key` (String)-表中指定的主键表达式。 + +- `sampling_key` (String)-表中指定的采样键表达式。 + +- `storage_policy` (字符串)-存储策略: + + - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) + - [分布](../../engines/table-engines/special/distributed.md#distributed) + +- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则 `Null` (包括内衣 `Buffer` 表)。 + +- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则 `Null` (**不** 包括任何底层存储)。 + + - If the table stores data on disk, returns used space on disk (i.e. compressed). + - 如果表在内存中存储数据,返回在内存中使用的近似字节数. + +该 `system.tables` 表中使用 `SHOW TABLES` 查询实现。 diff --git a/docs/zh/operations/system-tables/text_log.md b/docs/zh/operations/system-tables/text_log.md new file mode 100644 index 00000000000..60c7a3ed90c --- /dev/null +++ b/docs/zh/operations/system-tables/text_log.md @@ -0,0 +1,31 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。text\_log {#system-tables-text-log} + +包含日志记录条目。 进入该表的日志记录级别可以通过以下方式进行限制 `text_log.level` 服务器设置。 + +列: + +- `event_date` (Date) — Date of the entry. +- `event_time` (DateTime) — Time of the entry. +- `microseconds` (UInt32) — Microseconds of the entry. +- `thread_name` (String) — Name of the thread from which the logging was done. +- `thread_id` (UInt64) — OS thread ID. +- `level` (`Enum8`) — Entry level. Possible values: + - `1` 或 `'Fatal'`. + - `2` 或 `'Critical'`. + - `3` 或 `'Error'`. + - `4` 或 `'Warning'`. + - `5` 或 `'Notice'`. + - `6` 或 `'Information'`. + - `7` 或 `'Debug'`. + - `8` 或 `'Trace'`. +- `query_id` (String) — ID of the query. +- `logger_name` (LowCardinality(String)) — Name of the logger (i.e. `DDLWorker`). +- `message` (String) — The message itself. +- `revision` (UInt32) — ClickHouse revision. +- `source_file` (LowCardinality(String)) — Source file from which the logging was done. +- `source_line` (UInt64) — Source line from which the logging was done. diff --git a/docs/zh/operations/system-tables/trace_log.md b/docs/zh/operations/system-tables/trace_log.md new file mode 100644 index 00000000000..3004dca707a --- /dev/null +++ b/docs/zh/operations/system-tables/trace_log.md @@ -0,0 +1,53 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。trace\_log {#system_tables-trace_log} + +包含采样查询探查器收集的堆栈跟踪。 + +ClickHouse创建此表时 [trace\_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) 服务器配置部分被设置。 也是 [query\_profiler\_real\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) 和 [query\_profiler\_cpu\_time\_period\_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) 应设置设置。 + +要分析日志,请使用 `addressToLine`, `addressToSymbol` 和 `demangle` 内省功能。 + +列: + +- `event_date` ([日期](../../sql-reference/data-types/date.md)) — Date of sampling moment. + +- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. + +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. + +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. + + 通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1 revision 54429.`. 该字段包含 `revision`,但不是 `version` 的服务器。 + +- `timer_type` ([枚举8](../../sql-reference/data-types/enum.md)) — Timer type: + + - `Real` 表示挂钟时间。 + - `CPU` 表示CPU时间。 + +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier. + +- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) 系统表. + +- `trace` ([数组(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. + +**示例** + +``` sql +SELECT * FROM system.trace_log LIMIT 1 \G +``` + +``` text +Row 1: +────── +event_date: 2019-11-15 +event_time: 2019-11-15 15:09:38 +revision: 54428 +timer_type: Real +thread_number: 48 +query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 +trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] +``` diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md new file mode 100644 index 00000000000..b66e5262df3 --- /dev/null +++ b/docs/zh/operations/system-tables/zookeeper.md @@ -0,0 +1,75 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 系统。动物园管理员 {#system-zookeeper} + +如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 +查询必须具有 ‘path’ WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 + +查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。 +要输出所有根节点的数据,write path= ‘/’. +如果在指定的路径 ‘path’ 不存在,将引发异常。 + +列: + +- `name` (String) — The name of the node. +- `path` (String) — The path to the node. +- `value` (String) — Node value. +- `dataLength` (Int32) — Size of the value. +- `numChildren` (Int32) — Number of descendants. +- `czxid` (Int64) — ID of the transaction that created the node. +- `mzxid` (Int64) — ID of the transaction that last changed the node. +- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. +- `ctime` (DateTime) — Time of node creation. +- `mtime` (DateTime) — Time of the last modification of the node. +- `version` (Int32) — Node version: the number of times the node was changed. +- `cversion` (Int32) — Number of added or removed descendants. +- `aversion` (Int32) — Number of changes to the ACL. +- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. + +示例: + +``` sql +SELECT * +FROM system.zookeeper +WHERE path = '/clickhouse/tables/01-08/visits/replicas' +FORMAT Vertical +``` + +``` text +Row 1: +────── +name: example01-08-1.yandex.ru +value: +czxid: 932998691229 +mzxid: 932998691229 +ctime: 2015-03-27 16:49:51 +mtime: 2015-03-27 16:49:51 +version: 0 +cversion: 47 +aversion: 0 +ephemeralOwner: 0 +dataLength: 0 +numChildren: 7 +pzxid: 987021031383 +path: /clickhouse/tables/01-08/visits/replicas + +Row 2: +────── +name: example01-08-2.yandex.ru +value: +czxid: 933002738135 +mzxid: 933002738135 +ctime: 2015-03-27 16:57:01 +mtime: 2015-03-27 16:57:01 +version: 0 +cversion: 37 +aversion: 0 +ephemeralOwner: 0 +dataLength: 0 +numChildren: 7 +pzxid: 987021252247 +path: /clickhouse/tables/01-08/visits/replicas +``` diff --git a/docs/zh/operations/utilities/clickhouse-benchmark.md b/docs/zh/operations/utilities/clickhouse-benchmark.md index d1e83cb9789..1c255f621c0 100644 --- a/docs/zh/operations/utilities/clickhouse-benchmark.md +++ b/docs/zh/operations/utilities/clickhouse-benchmark.md @@ -1,11 +1,9 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 61 -toc_title: "\uFF82\u6697\uFF6A\uFF82\u6C3E\u73AF\u50AC\uFF82\u56E3" +toc_title: "性能测试" --- -# ツ暗ェツ氾环催ツ団 {#clickhouse-benchmark} +# 性能测试 {#clickhouse-benchmark} 连接到ClickHouse服务器并重复发送指定的查询。 @@ -21,7 +19,7 @@ $ echo "single query" | clickhouse-benchmark [keys] $ clickhouse-benchmark [keys] <<< "single query" ``` -如果要发送一组查询,请创建一个文本文件,并将每个查询放在此文件中的单个字符串上。 例如: +如果要发送一组查询,请创建一个文本文件,并将每个查询的字符串放在此文件中。 例如: ``` sql SELECT * FROM system.numbers LIMIT 10000000 @@ -34,15 +32,15 @@ SELECT 1 clickhouse-benchmark [keys] < queries_file ``` -## 键 {#clickhouse-benchmark-keys} +## keys参数 {#clickhouse-benchmark-keys} - `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` 同时发送。 默认值:1。 - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. -- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. 为 [比较模式](#clickhouse-benchmark-comparison-mode) 您可以使用多个 `-h` 钥匙 +- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. 为 [比较模式](#clickhouse-benchmark-comparison-mode) 您可以使用多个 `-h` 参数 - `-p N`, `--port=N` — Server port. Default value: 9000. For the [比较模式](#clickhouse-benchmark-comparison-mode) 您可以使用多个 `-p` 钥匙 -- `-i N`, `--iterations=N` — Total number of queries. Default value: 0. -- `-r`, `--randomize` — Random order of queries execution if there is more then one input query. -- `-s`, `--secure` — Using TLS connection. +- `-i N`, `--iterations=N` — 查询的总次数. Default value: 0. +- `-r`, `--randomize` — 有多个查询时,以随机顺序执行. +- `-s`, `--secure` — 使用TLS安全连接. - `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` 达到指定的时间限制时停止发送查询。 默认值:0(禁用时间限制)。 - `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [比较模式](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` 执行 [独立双样本学生的t测试](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) 测试以确定两个分布是否与所选置信水平没有不同。 - `--cumulative` — Printing cumulative data instead of data per interval. @@ -51,14 +49,14 @@ clickhouse-benchmark [keys] < queries_file - `--user=USERNAME` — ClickHouse user name. Default value: `default`. - `--password=PSWD` — ClickHouse user password. Default value: empty string. - `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` 输出异常的堆栈跟踪。 -- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` 在指定的阶段。 可能的值: `complete`, `fetch_columns`, `with_mergeable_state`. 默认值: `complete`. +- `--stage=WORD` — 查询请求的服务端处理状态. 在特定阶段Clickhouse会停止查询处理,并返回结果给`clickhouse-benchmark`。 可能的值: `complete`, `fetch_columns`, `with_mergeable_state`. 默认值: `complete`. - `--help` — Shows the help message. -如果你想申请一些 [设置](../../operations/settings/index.md) 对于查询,请将它们作为键传递 `--= SETTING_VALUE`. 例如, `--max_memory_usage=1048576`. +如果你想在查询时应用上述的部分参数 [设置](../../operations/settings/index.md) ,请将它们作为键传递 `--= SETTING_VALUE`. 例如, `--max_memory_usage=1048576`. ## 输出 {#clickhouse-benchmark-output} -默认情况下, `clickhouse-benchmark` 每个报表 `--delay` 间隔。 +默认情况下, `clickhouse-benchmark` 按照 `--delay` 参数间隔输出结果。 报告示例: @@ -83,27 +81,27 @@ localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, resul 99.990% 0.150 sec. ``` -在报告中,您可以找到: +在结果报告中,您可以找到: -- 在查询的数量 `Queries executed:` 场。 +- 查询数量:参见`Queries executed:`字段。 -- 状态字符串包含(按顺序): +- 状态码(按顺序给出): - - ClickHouse服务器的端点。 + - ClickHouse服务器的连接信息。 - 已处理的查询数。 - - QPS:QPS:在指定的时间段内每秒执行多少个查询服务器 `--delay` 争论。 - - RPS:在指定的时间段内,服务器每秒读取多少行 `--delay` 争论。 - - MiB/s:在指定的时间段内每秒读取多少mebibytes服务器 `--delay` 争论。 - - 结果RPS:在指定的时间段内,服务器每秒放置到查询结果的行数 `--delay` 争论。 - - 结果MiB/s.在指定的时间段内,服务器每秒将多少mebibytes放置到查询结果中 `--delay` 争论。 + - QPS:服务端每秒处理的查询数量 + - RPS:服务器每秒读取多少行 + - MiB/s:服务器每秒读取多少字节的数据 + - 结果RPS:服务端每秒生成多少行的结果集数据 + - 结果MiB/s.服务端每秒生成多少字节的结果集数据 -- 查询执行时间的百分位数。 +- 查询执行时间的百分比。 -## 比较模式 {#clickhouse-benchmark-comparison-mode} +## 对比模式 {#clickhouse-benchmark-comparison-mode} `clickhouse-benchmark` 可以比较两个正在运行的ClickHouse服务器的性能。 -要使用比较模式,请通过以下两对指定两个服务器的端点 `--host`, `--port` 钥匙 键在参数列表中的位置匹配在一起,第一 `--host` 与第一匹配 `--port` 等等。 `clickhouse-benchmark` 建立到两个服务器的连接,然后发送查询。 每个查询寻址到随机选择的服务器。 每个服务器的结果分别显示。 +要使用对比模式,分别为每个服务器配置各自的`--host`, `--port`参数。`clickhouse-benchmark` 会根据设置的参数建立到各个Server的连接并发送请求。每个查询请求会随机发送到某个服务器。输出结果会按服务器分组输出 ## 示例 {#clickhouse-benchmark-example} diff --git a/docs/zh/sql-reference/aggregate-functions/combinators.md b/docs/zh/sql-reference/aggregate-functions/combinators.md index c5c3e8a9577..c458097a5fb 100644 --- a/docs/zh/sql-reference/aggregate-functions/combinators.md +++ b/docs/zh/sql-reference/aggregate-functions/combinators.md @@ -1,51 +1,49 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 37 -toc_title: "\u7EC4\u5408\u5668" +toc_title: 聚合函数组合器 --- # 聚合函数组合器 {#aggregate_functions_combinators} 聚合函数的名称可以附加一个后缀。 这改变了聚合函数的工作方式。 -## -如果 {#agg-functions-combinator-if} +## -If {#agg-functions-combinator-if} -The suffix -If can be appended to the name of any aggregate function. In this case, the aggregate function accepts an extra argument – a condition (Uint8 type). The aggregate function processes only the rows that trigger the condition. If the condition was not triggered even once, it returns a default value (usually zeros or empty strings). +-If可以加到任何聚合函数之后。加了-If之后聚合函数需要接受一个额外的参数,一个条件(Uint8类型),如果条件满足,那聚合函数处理当前的行数据,如果不满足,那返回默认值(通常是0或者空字符串)。 -例: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTimingIf(level1, level2)(x, cond)`, `argMinIf(arg, val, cond)` 等等。 +例: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTimingIf(level1, level2)(x, cond)`, `argMinIf(arg, val, cond)` 等等。 -使用条件聚合函数,您可以一次计算多个条件的聚合,而无需使用子查询和 `JOIN`例如,在Yandex的。Metrica,条件聚合函数用于实现段比较功能。 +使用条件聚合函数,您可以一次计算多个条件的聚合,而无需使用子查询和 `JOIN`例如,在Yandex.Metrica,条件聚合函数用于实现段比较功能。 -## -阵列 {#agg-functions-combinator-array} +## -Array {#agg-functions-combinator-array} -Array后缀可以附加到任何聚合函数。 在这种情况下,聚合函数采用的参数 ‘Array(T)’ 类型(数组)而不是 ‘T’ 类型参数。 如果聚合函数接受多个参数,则它必须是长度相等的数组。 在处理数组时,聚合函数的工作方式与所有数组元素的原始聚合函数类似。 -示例1: `sumArray(arr)` -总计所有的所有元素 ‘arr’ 阵列。 在这个例子中,它可以更简单地编写: `sum(arraySum(arr))`. +示例1: `sumArray(arr)` -总计所有的所有元素 ‘arr’ 阵列。在这个例子中,它可以更简单地编写: `sum(arraySum(arr))`. -示例2: `uniqArray(arr)` – Counts the number of unique elements in all ‘arr’ 阵列。 这可以做一个更简单的方法: `uniq(arrayJoin(arr))`,但它并不总是可以添加 ‘arrayJoin’ 到查询。 +示例2: `uniqArray(arr)` – 计算‘arr’中唯一元素的个数。这可以是一个更简单的方法: `uniq(arrayJoin(arr))`,但它并不总是可以添加 ‘arrayJoin’ 到查询。 --如果和-阵列可以组合。 然而, ‘Array’ 必须先来,然后 ‘If’. 例: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. 由于这个顺序,该 ‘cond’ 参数不会是数组。 +如果和-If组合,‘Array’ 必须先来,然后 ‘If’. 例: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`。由于这个顺序,该 ‘cond’ 参数不会是数组。 -## -州 {#agg-functions-combinator-state} +## -State {#agg-functions-combinator-state} -如果应用此combinator,则聚合函数不会返回结果值(例如唯一值的数量 [uniq](reference.md#agg_function-uniq) 函数),但聚合的中间状态(用于 `uniq`,这是用于计算唯一值的数量的散列表)。 这是一个 `AggregateFunction(...)` 可用于进一步处理或存储在表中以完成聚合。 +如果应用此combinator,则聚合函数不会返回结果值(例如唯一值的数量 [uniq](reference.md#agg_function-uniq) 函数),但是返回聚合的中间状态(对于 `uniq`,返回的是计算唯一值的数量的哈希表)。 这是一个 `AggregateFunction(...)` 可用于进一步处理或存储在表中以完成稍后的聚合。 要使用这些状态,请使用: - [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) 表引擎。 -- [最后聚会](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) 功能。 -- [跑累积](../../sql-reference/functions/other-functions.md#function-runningaccumulate) 功能。 -- [-合并](#aggregate_functions_combinators-merge) combinator +- [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) 功能。 +- [runningAccumulate](../../sql-reference/functions/other-functions.md#function-runningaccumulate) 功能。 +- [-Merge](#aggregate_functions_combinators-merge) combinator - [-MergeState](#aggregate_functions_combinators-mergestate) combinator -## -合并 {#aggregate_functions_combinators-merge} +## -Merge {#aggregate_functions_combinators-merge} 如果应用此组合器,则聚合函数将中间聚合状态作为参数,组合状态以完成聚合,并返回结果值。 ## -MergeState {#aggregate_functions_combinators-mergestate} -以与-Merge combinator相同的方式合并中间聚合状态。 但是,它不会返回结果值,而是返回中间聚合状态,类似于-State combinator。 +以与-Merge 相同的方式合并中间聚合状态。 但是,它不会返回结果值,而是返回中间聚合状态,类似于-State。 ## -ForEach {#agg-functions-combinator-foreach} @@ -55,7 +53,7 @@ The suffix -If can be appended to the name of any aggregate function. In this ca 更改聚合函数的行为。 -如果聚合函数没有输入值,则使用此combinator,它返回其返回数据类型的默认值。 适用于可以采用空输入数据的聚合函数。 +如果聚合函数没有输入值,则使用此组合器它返回其返回数据类型的默认值。 适用于可以采用空输入数据的聚合函数。 `-OrDefault` 可与其他组合器一起使用。 @@ -67,7 +65,7 @@ The suffix -If can be appended to the name of any aggregate function. In this ca **参数** -- `x` — Aggregate function parameters. +- `x` — 聚合函数参数。 **返回值** @@ -174,7 +172,7 @@ FROM └────────────────────────────────┘ ``` -## -重新采样 {#agg-functions-combinator-resample} +## -Resample {#agg-functions-combinator-resample} 允许您将数据划分为组,然后单独聚合这些组中的数据。 通过将一列中的值拆分为间隔来创建组。 @@ -184,19 +182,19 @@ FROM **参数** -- `start` — Starting value of the whole required interval for `resampling_key` 值。 -- `stop` — Ending value of the whole required interval for `resampling_key` 值。 整个时间间隔不包括 `stop` 价值 `[start, stop)`. -- `step` — Step for separating the whole interval into subintervals. The `aggFunction` 在每个子区间上独立执行。 -- `resampling_key` — Column whose values are used for separating data into intervals. +- `start` — `resampling_key` 开始值。 +- `stop` — `resampling_key` 结束边界。 区间内部不包含 `stop` 值,即 `[start, stop)`. +- `step` — 分组的步长。 The `aggFunction` 在每个子区间上独立执行。 +- `resampling_key` — 取样列,被用来分组. - `aggFunction_params` — `aggFunction` 参数。 **返回值** -- 阵列 `aggFunction` 每个子区间的结果。 +- `aggFunction` 每个子区间的结果,结果为数组。 **示例** -考虑一下 `people` 具有以下数据的表: +考虑一下 `people` 表具有以下数据的表结构: ``` text ┌─name───┬─age─┬─wage─┐ @@ -209,9 +207,9 @@ FROM └────────┴─────┴──────┘ ``` -让我们得到的人的名字,他们的年龄在于的时间间隔 `[30,60)` 和 `[60,75)`. 由于我们使用整数表示的年龄,我们得到的年龄 `[30, 59]` 和 `[60,74]` 间隔。 +让我们得到的人的名字,他们的年龄在于的时间间隔 `[30,60)` 和 `[60,75)`。 由于我们使用整数表示的年龄,我们得到的年龄 `[30, 59]` 和 `[60,74]` 间隔。 -要在数组中聚合名称,我们使用 [groupArray](reference.md#agg_function-grouparray) 聚合函数。 这需要一个参数。 在我们的例子中,它是 `name` 列。 该 `groupArrayResample` 函数应该使用 `age` 按年龄聚合名称的列。 要定义所需的时间间隔,我们通过 `30, 75, 30` 参数到 `groupArrayResample` 功能。 +要在数组中聚合名称,我们使用 [groupArray](reference.md#agg_function-grouparray) 聚合函数。 这需要一个参数。 在我们的例子中,它是 `name` 列。 `groupArrayResample` 函数应该使用 `age` 按年龄聚合名称, 要定义所需的时间间隔,我们传入 `30, 75, 30` 参数给 `groupArrayResample` 函数。 ``` sql SELECT groupArrayResample(30, 75, 30)(name, age) FROM people @@ -225,7 +223,7 @@ SELECT groupArrayResample(30, 75, 30)(name, age) FROM people 考虑结果。 -`Jonh` 是因为他太年轻了 其他人按照指定的年龄间隔进行分配。 +`Jonh` 没有被选中,因为他太年轻了。 其他人按照指定的年龄间隔进行分配。 现在让我们计算指定年龄间隔内的总人数和平均工资。 diff --git a/docs/zh/sql-reference/aggregate-functions/index.md b/docs/zh/sql-reference/aggregate-functions/index.md index 06666c49d03..57d8e362d99 100644 --- a/docs/zh/sql-reference/aggregate-functions/index.md +++ b/docs/zh/sql-reference/aggregate-functions/index.md @@ -1,9 +1,6 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: "\u805A\u5408\u51FD\u6570" toc_priority: 33 -toc_title: "\u5BFC\u8A00" +toc_title: 简介 --- # 聚合函数 {#aggregate-functions} diff --git a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md index 830581beba7..69572086549 100644 --- a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md @@ -1,15 +1,13 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 38 -toc_title: "\u53C2\u6570" +toc_title: 参数聚合函数 --- # 参数聚合函数 {#aggregate_functions_parametric} -Some aggregate functions can accept not only argument columns (used for compression), but a set of parameters – constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. +一些聚合函数不仅可以接受参数列(用于压缩),也可以接收常量的初始化参数。这种语法是接受两个括号的参数,第一个数初始化参数,第二个是入参。 -## 直方图 {#histogram} +## histogram {#histogram} 计算自适应直方图。 它不能保证精确的结果。 @@ -21,20 +19,21 @@ histogram(number_of_bins)(values) **参数** -`number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins. -`values` — [表达式](../syntax.md#syntax-expressions) 导致输入值。 +`number_of_bins` — 直方图bin个数,这个函数会自动计算bin的数量,而且会尽量使用指定值,如果无法做到,那就使用更小的bin个数。 + +`values` — [表达式](../syntax.md#syntax-expressions) 输入值。 **返回值** -- [阵列](../../sql-reference/data-types/array.md) 的 [元组](../../sql-reference/data-types/tuple.md) 下面的格式: +- [Array](../../sql-reference/data-types/array.md) 的 [Tuples](../../sql-reference/data-types/tuple.md) 如下: ``` [(lower_1, upper_1, height_1), ... (lower_N, upper_N, height_N)] ``` - - `lower` — Lower bound of the bin. - - `upper` — Upper bound of the bin. - - `height` — Calculated height of the bin. + - `lower` — bin的下边界。 + - `upper` — bin的上边界。 + - `height` — bin的计算权重。 **示例** @@ -53,7 +52,7 @@ FROM ( └─────────────────────────────────────────────────────────────────────────┘ ``` -您可以使用 [酒吧](../../sql-reference/functions/other-functions.md#function-bar) 功能,例如: +您可以使用 [bar](../../sql-reference/functions/other-functions.md#function-bar) 功能,例如: ``` sql WITH histogram(5)(rand() % 100) AS hist @@ -93,11 +92,11 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) **参数** -- `pattern` — Pattern string. See [模式语法](#sequence-function-pattern-syntax). +- `pattern` — 模式字符串。 参考 [模式语法](#sequence-function-pattern-syntax). -- `timestamp` — Column considered to contain time data. Typical data types are `Date` 和 `DateTime`. 您还可以使用任何支持的 [UInt](../../sql-reference/data-types/int-uint.md) 数据类型。 +- `timestamp` — 包含时间的列。典型的时间类型是: `Date` 和 `DateTime`。您还可以使用任何支持的 [UInt](../../sql-reference/data-types/int-uint.md) 数据类型。 -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. 最多可以传递32个条件参数。 该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 +- `cond1`, `cond2` — 事件链的约束条件。 数据类型是: `UInt8`。 最多可以传递32个条件参数。 该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 **返回值** @@ -109,11 +108,11 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) **模式语法** -- `(?N)` — Matches the condition argument at position `N`. 条件在编号 `[1, 32]` 范围。 例如, `(?1)` 匹配传递给 `cond1` 参数。 +- `(?N)` — 在位置`N`匹配条件参数。 条件在编号 `[1, 32]` 范围。 例如, `(?1)` 匹配传递给 `cond1` 参数。 -- `.*` — Matches any number of events. You don't need conditional arguments to match this element of the pattern. +- `.*` — 匹配任何事件的数字。 不需要条件参数来匹配这个模式。 -- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` 匹配彼此发生超过1800秒的事件。 这些事件之间可以存在任意数量的任何事件。 您可以使用 `>=`, `>`, `<`, `<=` 运营商。 +- `(?t operator value)` — 分开两个事件的时间。 例如: `(?1)(?t>1800)(?2)` 匹配彼此发生超过1800秒的事件。 这些事件之间可以存在任意数量的任何事件。 您可以使用 `>=`, `>`, `<`, `<=` 运算符。 **例** @@ -169,7 +168,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM ## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount} -计数与模式匹配的事件链的数量。 该函数搜索不重叠的事件链。 当前链匹配后,它开始搜索下一个链。 +计算与模式匹配的事件链的数量。该函数搜索不重叠的事件链。当前链匹配后,它开始搜索下一个链。 !!! warning "警告" 在同一秒钟发生的事件可能以未定义的顺序排列在序列中,影响结果。 @@ -180,11 +179,11 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...) **参数** -- `pattern` — Pattern string. See [模式语法](#sequence-function-pattern-syntax). +- `pattern` — 模式字符串。 参考:[模式语法](#sequence-function-pattern-syntax). -- `timestamp` — Column considered to contain time data. Typical data types are `Date` 和 `DateTime`. 您还可以使用任何支持的 [UInt](../../sql-reference/data-types/int-uint.md) 数据类型。 +- `timestamp` — 包含时间的列。典型的时间类型是: `Date` 和 `DateTime`。您还可以使用任何支持的 [UInt](../../sql-reference/data-types/int-uint.md) 数据类型。 -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. 最多可以传递32个条件参数。 该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 +- `cond1`, `cond2` — 事件链的约束条件。 数据类型是: `UInt8`。 最多可以传递32个条件参数。该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 **返回值** @@ -227,9 +226,9 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t 搜索滑动时间窗中的事件链,并计算从链中发生的最大事件数。 -该函数根据算法工作: +该函数采用如下算法: -- 该函数搜索触发链中的第一个条件并将事件计数器设置为1的数据。 这是滑动窗口启动的时刻。 +- 该函数搜索触发链中的第一个条件并将事件计数器设置为1。 这是滑动窗口启动的时刻。 - 如果来自链的事件在窗口内顺序发生,则计数器将递增。 如果事件序列中断,则计数器不会增加。 @@ -243,11 +242,11 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) **参数** -- `window` — Length of the sliding window in seconds. -- `mode` -这是一个可选的参数。 - - `'strict'` -当 `'strict'` 设置时,windowFunnel()仅对唯一值应用条件。 -- `timestamp` — Name of the column containing the timestamp. Data types supported: [日期](../../sql-reference/data-types/date.md), [日期时间](../../sql-reference/data-types/datetime.md#data_type-datetime) 和其他无符号整数类型(请注意,即使时间戳支持 `UInt64` 类型,它的值不能超过Int64最大值,即2^63-1)。 -- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). +- `window` — 滑动窗户的大小,单位是秒。 +- `mode` - 这是一个可选的参数。 + - `'strict'` - 当 `'strict'` 设置时,windowFunnel()仅对唯一值应用匹配条件。 +- `timestamp` — 包含时间的列。 数据类型支持: [日期](../../sql-reference/data-types/date.md), [日期时间](../../sql-reference/data-types/datetime.md#data_type-datetime) 和其他无符号整数类型(请注意,即使时间戳支持 `UInt64` 类型,它的值不能超过Int64最大值,即2^63-1)。 +- `cond` — 事件链的约束条件。 [UInt8](../../sql-reference/data-types/int-uint.md) 类型。 **返回值** @@ -284,7 +283,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) └────────────┴─────────┴─────────────────────┴─────────┴─────────┘ ``` -了解用户有多远 `user_id` 可以在2019的1-2月期间通过链条。 +了解用户`user_id` 可以在2019的1-2月期间通过链条多远。 查询: @@ -315,10 +314,10 @@ ORDER BY level ASC ## Retention {#retention} -该函数将一组条件作为参数,类型为1到32个参数 `UInt8` 表示事件是否满足特定条件。 +该函数将一组条件作为参数,类型为1到32个 `UInt8` 类型的参数,用来表示事件是否满足特定条件。 任何条件都可以指定为参数(如 [WHERE](../../sql-reference/statements/select/where.md#select-where)). -除了第一个以外,条件成对适用:如果第一个和第二个是真的,第二个结果将是真的,如果第一个和fird是真的,第三个结果将是真的,等等。 +除了第一个以外,条件成对适用:如果第一个和第二个是真的,第二个结果将是真的,如果第一个和第三个是真的,第三个结果将是真的,等等。 **语法** @@ -328,22 +327,22 @@ retention(cond1, cond2, ..., cond32); **参数** -- `cond` — an expression that returns a `UInt8` 结果(1或0)。 +- `cond` — 返回 `UInt8` 结果(1或0)的表达式。 **返回值** 数组为1或0。 -- 1 — condition was met for the event. -- 0 — condition wasn't met for the event. +- 1 — 条件满足。 +- 0 — 条件不满足。 类型: `UInt8`. **示例** -让我们考虑计算的一个例子 `retention` 功能,以确定网站流量。 +让我们考虑使用 `retention` 功能的一个例子 ,以确定网站流量。 -**1.** Сreate a table to illustrate an example. +**1.** 举例说明,先创建一张表。 ``` sql CREATE TABLE retention_test(date Date, uid Int32) ENGINE = Memory; @@ -402,7 +401,7 @@ SELECT * FROM retention_test └────────────┴─────┘ ``` -**2.** 按唯一ID对用户进行分组 `uid` 使用 `retention` 功能。 +**2.** 按唯一ID `uid` 对用户进行分组,使用 `retention` 功能。 查询: @@ -466,7 +465,7 @@ FROM └────┴────┴────┘ ``` -哪里: +条件: - `r1`-2020-01-01期间访问该网站的独立访问者数量( `cond1` 条件)。 - `r2`-在2020-01-01和2020-01-02之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond2` 条件)。 @@ -474,9 +473,9 @@ FROM ## uniqUpTo(N)(x) {#uniquptonx} -Calculates the number of different argument values ​​if it is less than or equal to N. If the number of different argument values is greater than N, it returns N + 1. +计算小于或者等于N的不同参数的个数。如果结果大于N,那返回N+1。 -建议使用小Ns,高达10。 N的最大值为100。 +建议使用较小的Ns,比如:10。N的最大值为100。 对于聚合函数的状态,它使用的内存量等于1+N\*一个字节值的大小。 对于字符串,它存储8个字节的非加密哈希。 也就是说,计算是近似的字符串。 @@ -488,12 +487,12 @@ Calculates the number of different argument values ​​if it is less than or e 用法示例: ``` text -Problem: Generate a report that shows only keywords that produced at least 5 unique users. -Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >= 5 +问题:产出一个不少于五个唯一用户的关键字报告 +解决方案: 写group by查询语句 HAVING uniqUpTo(4)(UserID) >= 5 ``` +## sumMapFiltered(keys\_to\_keep)(keys, values) {#summapfilteredkeys-to-keepkeys-values} + +和 [sumMap](reference.md#agg_functions-summap) 基本一致, 除了一个键数组作为参数传递。这在使用高基数key时尤其有用。 + [原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/parametric_functions/) - -## sumMapFiltered(keys\_to\_keep)(键值) {#summapfilteredkeys-to-keepkeys-values} - -同样的行为 [sumMap](reference.md#agg_functions-summap) 除了一个键数组作为参数传递。 这在使用高基数密钥时尤其有用。 diff --git a/docs/zh/sql-reference/aggregate-functions/reference.md b/docs/zh/sql-reference/aggregate-functions/reference.md index 53510900536..7d5ecda7bb4 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference.md +++ b/docs/zh/sql-reference/aggregate-functions/reference.md @@ -1,13 +1,11 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 36 -toc_title: "\u53C2\u8003\u8D44\u6599" +toc_title: 聚合函数 --- # 聚合函数引用 {#aggregate-functions-reference} -## 计数 {#agg_function-count} +## count {#agg_function-count} 计数行数或非空值。 @@ -73,7 +71,7 @@ SELECT count(DISTINCT num) FROM t 这个例子表明 `count(DISTINCT num)` 由执行 `uniqExact` 根据功能 `count_distinct_implementation` 设定值。 -## 任何(x) {#agg_function-any} +## any(x) {#agg_function-any} 选择第一个遇到的值。 查询可以以任何顺序执行,甚至每次都以不同的顺序执行,因此此函数的结果是不确定的。 @@ -115,7 +113,7 @@ FROM ontime 选择遇到的最后一个值。 其结果是一样不确定的 `any` 功能。 -## 集团比特 {#groupbitand} +## groupBitAnd {#groupbitand} 按位应用 `AND` 对于一系列的数字。 @@ -337,7 +335,7 @@ SELECT argMin(user, salary) FROM salary 总计 ‘value’ 数组根据在指定的键 ‘key’ 阵列。 传递键和值数组的元组与传递两个键和值数组是同义的。 元素的数量 ‘key’ 和 ‘value’ 总计的每一行必须相同。 -Returns a tuple of two arrays: keys in sorted order, and values ​​summed for the corresponding keys. +返回两个数组的一个二元组: key是排好序的,value是对应key的求和。 示例: @@ -374,7 +372,7 @@ GROUP BY timeslot ## skewPop {#skewpop} -计算 [歪斜](https://en.wikipedia.org/wiki/Skewness) 的序列。 +计算的序列[偏度](https://en.wikipedia.org/wiki/Skewness)。 ``` sql skewPop(expr) @@ -386,7 +384,7 @@ skewPop(expr) **返回值** -The skewness of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md) +给定序列的偏度。类型 — [Float64](../../sql-reference/data-types/float.md) **示例** @@ -410,7 +408,7 @@ skewSamp(expr) **返回值** -The skewness of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. +给定序列的偏度。 类型 — [Float64](../../sql-reference/data-types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. **示例** @@ -432,7 +430,7 @@ kurtPop(expr) **返回值** -The kurtosis of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md) +给定序列的峰度。 类型 — [Float64](../../sql-reference/data-types/float.md) **示例** @@ -456,7 +454,7 @@ kurtSamp(expr) **返回值** -The kurtosis of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. +给定序列的峰度。类型 — [Float64](../../sql-reference/data-types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. **示例** @@ -533,7 +531,7 @@ FROM ( 只适用于数字。 结果总是Float64。 -## 平均加权 {#avgweighted} +## avgWeighted {#avgweighted} 计算 [加权算术平均值](https://en.wikipedia.org/wiki/Weighted_arithmetic_mean). @@ -545,10 +543,10 @@ avgWeighted(x, weight) **参数** -- `x` — Values. [整数](../data-types/int-uint.md) 或 [浮点](../data-types/float.md). -- `weight` — Weights of the values. [整数](../data-types/int-uint.md) 或 [浮点](../data-types/float.md). +- `x` — 值。 [整数](../data-types/int-uint.md) 或 [浮点](../data-types/float.md). +- `weight` — 值的加权。 [整数](../data-types/int-uint.md) 或 [浮点](../data-types/float.md). -类型 `x` 和 `weight` 一定是一样的 +`x` 和 `weight` 的类型一定是一样的 **返回值** @@ -590,7 +588,7 @@ uniq(x[, ...]) - A [UInt64](../../sql-reference/data-types/int-uint.md)-键入号码。 -**实施细节** +**实现细节** 功能: @@ -598,7 +596,7 @@ uniq(x[, ...]) - 使用自适应采样算法。 对于计算状态,该函数使用最多65536个元素哈希值的样本。 - This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. + 这个算法是非常精确的,并且对于CPU来说非常高效。如果查询包含一些这样的函数,那和其他聚合函数相比 `uniq` 将是几乎一样快。 - 确定性地提供结果(它不依赖于查询处理顺序)。 @@ -629,17 +627,17 @@ uniqCombined(HLL_precision)(x[, ...]) **返回值** -- 一个数字 [UInt64](../../sql-reference/data-types/int-uint.md)-键入号码。 +- 一个[UInt64](../../sql-reference/data-types/int-uint.md)类型的数字。 -**实施细节** +**实现细节** 功能: - 计算散列(64位散列 `String` 否则32位)对于聚合中的所有参数,然后在计算中使用它。 -- 使用三种算法的组合:数组、哈希表和HyperLogLog与error错表。 +- 使用三种算法的组合:数组、哈希表和包含错误修正表的HyperLogLog。 - For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. + 少量的不同的值,使用数组。 值再多一些,使用哈希表。对于大量的数据来说,使用HyperLogLog,HyperLogLog占用一个固定的内存空间。 - 确定性地提供结果(它不依赖于查询处理顺序)。 @@ -650,7 +648,7 @@ uniqCombined(HLL_precision)(x[, ...]) - 消耗少几倍的内存。 - 计算精度高出几倍。 -- 通常具有略低的性能。 在某些情况下, `uniqCombined` 可以表现得比 `uniq`,例如,使用通过网络传输大量聚合状态的分布式查询。 +- 通常具有略低的性能。 在某些情况下, `uniqCombined` 可以表现得比 `uniq` 好,例如,使用通过网络传输大量聚合状态的分布式查询。 **另请参阅** @@ -679,7 +677,7 @@ uniqHLL12(x[, ...]) - A [UInt64](../../sql-reference/data-types/int-uint.md)-键入号码。 -**实施细节** +**实现细节** 功能: @@ -707,9 +705,9 @@ uniqHLL12(x[, ...]) uniqExact(x[, ...]) ``` -使用 `uniqExact` 功能,如果你绝对需要一个确切的结果。 否则使用 [uniq](#agg_function-uniq) 功能。 +如果你绝对需要一个确切的结果,使用 `uniqExact` 功能。 否则使用 [uniq](#agg_function-uniq) 功能。 -该 `uniqExact` 功能使用更多的内存比 `uniq`,因为状态的大小随着不同值的数量的增加而无界增长。 +`uniqExact` 比 `uniq` 使用更多的内存,因为状态的大小随着不同值的数量的增加而无界增长。 **参数** @@ -721,7 +719,7 @@ uniqExact(x[, ...]) - [uniqCombined](#agg_function-uniqcombined) - [uniqHLL12](#agg_function-uniqhll12) -## 群交(x),群交(max\_size)(x) {#agg_function-grouparray} +## groupArray(x), groupArray(max\_size)(x) {#agg_function-grouparray} 创建参数值的数组。 值可以按任何(不确定)顺序添加到数组中。 @@ -748,10 +746,10 @@ groupArrayInsertAt(default_x, size)(x, pos); **参数** -- `x` — Value to be inserted. [表达式](../syntax.md#syntax-expressions) 导致的一个 [支持的数据类型](../../sql-reference/data-types/index.md). -- `pos` — Position at which the specified element `x` 将被插入。 数组中的索引编号从零开始。 [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). -- `default_x`— Default value for substituting in empty positions. Optional parameter. [表达式](../syntax.md#syntax-expressions) 导致为配置的数据类型 `x` 参数。 如果 `default_x` 未定义,则 [默认值](../../sql-reference/statements/create.md#create-default-values) 被使用。 -- `size`— Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` 必须指定。 [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `x` — 被插入的值。[表达式](../syntax.md#syntax-expressions) 导致的一个 [支持的数据类型](../../sql-reference/data-types/index.md). +- `pos` — `x` 将被插入的位置。 数组中的索引编号从零开始。 [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `default_x`— 如果代入值为空,则使用默认值。可选参数。[表达式](../syntax.md#syntax-expressions) 为 `x` 数据类型的数据。 如果 `default_x` 未定义,则 [默认值](../../sql-reference/statements/create.md#create-default-values) 被使用。 +- `size`— 结果数组的长度。可选参数。如果使用该参数,`default_x` 必须指定。 [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). **返回值** @@ -803,7 +801,7 @@ SELECT groupArrayInsertAt('-', 5)(toString(number), number * 2) FROM numbers(5); └───────────────────────────────────────────────────────────────────┘ ``` -元件的多线程插入到一个位置。 +在一个位置多线程插入数据。 查询: @@ -832,8 +830,8 @@ groupArrayMovingSum(window_size)(numbers_for_summing) **参数** -- `numbers_for_summing` — [表达式](../syntax.md#syntax-expressions) 生成数值数据类型值。 -- `window_size` — Size of the calculation window. +- `numbers_for_summing` — [表达式](../syntax.md#syntax-expressions) 为数值数据类型值。 +- `window_size` — 窗口大小。 **返回值** @@ -906,13 +904,13 @@ groupArrayMovingAvg(window_size)(numbers_for_summing) **参数** - `numbers_for_summing` — [表达式](../syntax.md#syntax-expressions) 生成数值数据类型值。 -- `window_size` — Size of the calculation window. +- `window_size` — 窗口大小。 **返回值** - 与输入数据大小和类型相同的数组。 -该函数使用 [四舍五入到零](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). 它截断结果数据类型的小数位数。 +该函数使用 [四舍五入到零](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). 它截断无意义的小数位来保证结果的数据类型。 **示例** @@ -967,20 +965,20 @@ FROM t └───────────┴──────────────────────────────────┴───────────────────────┘ ``` -## 禄,赂麓ta脌麓,):脡,,拢脢,group媒group)galaxy s8碌胫脢)禄煤)酶脱脩) {#groupuniqarrayx-groupuniqarraymax-sizex} +## groupUniqArray(x), groupUniqArray(max\_size)(x) {#groupuniqarrayx-groupuniqarraymax-sizex} 从不同的参数值创建一个数组。 内存消耗是一样的 `uniqExact` 功能。 -第二个版本(与 `max_size` 参数)将结果数组的大小限制为 `max_size` 元素。 +第二个版本(`max_size` 参数)将结果数组的大小限制为 `max_size` 元素。 例如, `groupUniqArray(1)(x)` 相当于 `[any(x)]`. -## 分位数 {#quantile} +## quantile {#quantile} -计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 +计算数字序列的近似[分位数](https://en.wikipedia.org/wiki/Quantile)。 -此功能适用 [油藏采样](https://en.wikipedia.org/wiki/Reservoir_sampling) 随着储存器大小高达8192和随机数发生器进行采样。 结果是非确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 +此功能适用 [水塘抽样(](https://en.wikipedia.org/wiki/Reservoir_sampling),使用储存器最大到8192和随机数发生器进行采样。 结果是非确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -992,12 +990,12 @@ quantile(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 **返回值** -- 指定电平的近似分位数。 +- 指定层次的近似分位数。 类型: @@ -1037,13 +1035,13 @@ SELECT quantile(val) FROM t - [中位数](#median) - [分位数](#quantiles) -## 量化确定 {#quantiledeterministic} +## quantileDeterministic {#quantiledeterministic} -计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 +计算数字序列的近似[分位数](https://en.wikipedia.org/wiki/Quantile)。 -此功能适用 [油藏采样](https://en.wikipedia.org/wiki/Reservoir_sampling) 与储层大小高达8192和采样的确定性算法。 结果是确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 +此功能适用 [水塘抽样(](https://en.wikipedia.org/wiki/Reservoir_sampling),使用储存器最大到8192和随机数发生器进行采样。 结果是非确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1055,13 +1053,13 @@ quantileDeterministic(level)(expr, determinator) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). -- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly. +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 +- `determinator` — 一个数字,其hash被用来代替在水塘抽样中随机生成的数字,这样可以保证取样的确定性。你可以使用用户ID或者事件ID等任何正数,但是如果相同的 `determinator` 出现多次,那结果很可能不正确。 **返回值** -- 指定电平的近似分位数。 +- 指定层次的近似分位数。 类型: @@ -1103,11 +1101,11 @@ SELECT quantileDeterministic(val, 1) FROM t ## quantileExact {#quantileexact} -正是计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 +准确计算数字序列的[分位数](https://en.wikipedia.org/wiki/Quantile)。 -To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` 内存,其中 `n` 是传递的多个值。 然而,对于少量的值,该函数是非常有效的。 +为了准确计算,所有输入的数据被合并为一个数组,并且部分的排序。因此该函数需要 `O(n)` 的内存,n为输入数据的个数。但是对于少量数据来说,该函数还是非常有效的。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1119,12 +1117,12 @@ quantileExact(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 **返回值** -- 指定电平的分位数。 +- 指定层次的分位数。 类型: @@ -1153,13 +1151,13 @@ SELECT quantileExact(number) FROM numbers(10) - [中位数](#median) - [分位数](#quantiles) -## 分位数加权 {#quantileexactweighted} +## quantileExactWeighted {#quantileexactweighted} -正是计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 数值数据序列,考虑到每个元素的权重。 +考虑到每个元素的权重,然后准确计算数值序列的[分位数](https://en.wikipedia.org/wiki/Quantile)。 -To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Each value is counted with its weight, as if it is present `weight` times. A hash table is used in the algorithm. Because of this, if the passed values ​​are frequently repeated, the function consumes less RAM than [quantileExact](#quantileexact). 您可以使用此功能,而不是 `quantileExact` 并指定重量1。 +为了准确计算,所有输入的数据被合并为一个数组,并且部分的排序。每个输入值需要根据 `weight` 计算求和。该算法使用哈希表。正因为如此,在数据重复较多的时候使用的内存是少于[quantileExact](#quantileexact)的。 您可以使用此函数代替 `quantileExact` 并指定重量1。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1171,13 +1169,13 @@ quantileExactWeighted(level)(expr, weight) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 +- `weight` — 权重序列。 权重是一个数据出现的数值。 **返回值** -- 指定电平的分位数。 +- 指定层次的分位数。 类型: @@ -1217,13 +1215,13 @@ SELECT quantileExactWeighted(n, val) FROM t - [中位数](#median) - [分位数](#quantiles) -## 分位定时 {#quantiletiming} +## quantileTiming {#quantiletiming} -随着确定的精度计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 +使用确定的精度计算数字数据序列的[分位数](https://en.wikipedia.org/wiki/Quantile)。 结果是确定性的(它不依赖于查询处理顺序)。 该函数针对描述加载网页时间或后端响应时间等分布的序列进行了优化。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1235,12 +1233,12 @@ quantileTiming(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — [表达式](../syntax.md#syntax-expressions) 在一个列值返回 [浮动\*](../../sql-reference/data-types/float.md)-键入号码。 +- `expr` — [表达式](../syntax.md#syntax-expressions),返回 [浮动\*](../../sql-reference/data-types/float.md)类型数据。 - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + - 如果输入负值,那结果是不可预期的。 + - 如果输入值大于30000(页面加载时间大于30s),那我们假设为30000。 **精度** @@ -1252,16 +1250,16 @@ quantileTiming(level)(expr) 否则,计算结果将四舍五入到16毫秒的最接近倍数。 !!! note "注" - 对于计算页面加载时间分位数,此函数比 [分位数](#quantile). + 对于计算页面加载时间分位数,此函数比 [分位数](#quantile)更有效和准确。 **返回值** -- 指定电平的分位数。 +- 指定层次的分位数。 类型: `Float32`. !!! note "注" - 如果没有值传递给函数(当使用 `quantileTimingIf`), [阿南](../../sql-reference/data-types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 看 [按条款订购](../statements/select/order-by.md#select-order-by) 对于排序注意事项 `NaN` 值。 + 如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../sql-reference/data-types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 看 [ORDER BY clause](../statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。 **示例** @@ -1300,13 +1298,13 @@ SELECT quantileTiming(response_time) FROM t - [中位数](#median) - [分位数](#quantiles) -## 分位时间加权 {#quantiletimingweighted} +## quantileTimingWeighted {#quantiletimingweighted} -随着确定的精度计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 根据每个序列成员的权重对数字数据序列进行处理。 +根据每个序列成员的权重,使用确定的精度计算数字序列的[分位数](https://en.wikipedia.org/wiki/Quantile)。 结果是确定性的(它不依赖于查询处理顺序)。 该函数针对描述加载网页时间或后端响应时间等分布的序列进行了优化。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1318,14 +1316,14 @@ quantileTimingWeighted(level)(expr, weight) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — [表达式](../syntax.md#syntax-expressions) 在一个列值返回 [浮动\*](../../sql-reference/data-types/float.md)-键入号码。 +- `expr` — [表达式](../syntax.md#syntax-expressions),返回 [浮动\*](../../sql-reference/data-types/float.md)类型数据。 - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + - 如果输入负值,那结果是不可预期的。 + - 如果输入值大于30000(页面加载时间大于30s),那我们假设为30000。 -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. +- `weight` — 权重序列。 权重是一个数据出现的数值。 **精度** @@ -1337,16 +1335,16 @@ quantileTimingWeighted(level)(expr, weight) 否则,计算结果将四舍五入到16毫秒的最接近倍数。 !!! note "注" - 对于计算页面加载时间分位数,此函数比 [分位数](#quantile). + 对于计算页面加载时间分位数,此函数比 [分位数](#quantile)更高效和准确。 **返回值** -- 指定电平的分位数。 +- 指定层次的分位数。 类型: `Float32`. !!! note "注" - 如果没有值传递给函数(当使用 `quantileTimingIf`), [阿南](../../sql-reference/data-types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 看 [按条款订购](../statements/select/order-by.md#select-order-by) 对于排序注意事项 `NaN` 值。 + 如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../sql-reference/data-types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。看 [ORDER BY clause](../statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。 **示例** @@ -1384,13 +1382,13 @@ SELECT quantileTimingWeighted(response_time, weight) FROM t ## quantileTDigest {#quantiletdigest} -计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 使用的数字数据序列 [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法。 +使用[t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法计算近似[分位数](https://en.wikipedia.org/wiki/Quantile)。 -最大误差为1%。 内存消耗 `log(n)`,哪里 `n` 是多个值。 结果取决于运行查询的顺序,并且是不确定的。 +最大误差为1%。 内存消耗 `log(n)`,这里 `n` 是值的个数。 结果取决于运行查询的顺序,并且是不确定的。 -该功能的性能低于性能 [分位数](#quantile) 或 [分位定时](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`. +该功能的性能低于性能 [分位数](#quantile) 或 [时间分位](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`更优秀。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1402,12 +1400,12 @@ quantileTDigest(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 **回值** -- 指定电平的近似分位数。 +- 指定层次的分位数。 类型: @@ -1438,13 +1436,13 @@ SELECT quantileTDigest(number) FROM numbers(10) ## quantileTDigestWeighted {#quantiletdigestweighted} -计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 使用的数字数据序列 [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法。 该函数考虑了每个序列成员的权重。 最大误差为1%。 内存消耗 `log(n)`,哪里 `n` 是多个值。 +使用[t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法计算近似[分位数](https://en.wikipedia.org/wiki/Quantile)。 该函数考虑了每个序列成员的权重。最大误差为1%。 内存消耗 `log(n)`,这里 `n` 是值的个数。 -该功能的性能低于性能 [分位数](#quantile) 或 [分位定时](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`. +该功能的性能低于性能 [分位数](#quantile) 或 [时间分位](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`更优秀。 结果取决于运行查询的顺序,并且是不确定的。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能 **语法** @@ -1456,13 +1454,13 @@ quantileTDigest(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 +- `weight` — 权重序列。 权重是一个数据出现的数值。 **返回值** -- 指定电平的近似分位数。 +- 指定层次的分位数。 类型: @@ -1491,20 +1489,20 @@ SELECT quantileTDigestWeighted(number, 1) FROM numbers(10) - [中位数](#median) - [分位数](#quantiles) -## 中位数 {#median} +## median {#median} -该 `median*` 函数是相应的别名 `quantile*` 功能。 它们计算数字数据样本的中位数。 +`median*` 函数是 `quantile*` 函数的别名。 它们计算数字数据样本的中位数。 -功能: +函数: -- `median` — Alias for [分位数](#quantile). -- `medianDeterministic` — Alias for [量化确定](#quantiledeterministic). -- `medianExact` — Alias for [quantileExact](#quantileexact). -- `medianExactWeighted` — Alias for [分位数加权](#quantileexactweighted). -- `medianTiming` — Alias for [分位定时](#quantiletiming). -- `medianTimingWeighted` — Alias for [分位时间加权](#quantiletimingweighted). -- `medianTDigest` — Alias for [quantileTDigest](#quantiletdigest). -- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](#quantiletdigestweighted). +- `median` — [quantile](#quantile)别名。 +- `medianDeterministic` — [quantileDeterministic](#quantiledeterministic)别名。 +- `medianExact` — [quantileExact](#quantileexact)别名。 +- `medianExactWeighted` — [quantileExactWeighted](#quantileexactweighted)别名。 +- `medianTiming` — [quantileTiming](#quantiletiming)别名。 +- `medianTimingWeighted` — [quantileTimingWeighted](#quantiletimingweighted)别名。 +- `medianTDigest` — [quantileTDigest](#quantiletdigest)别名。 +- `medianTDigestWeighted` — [quantileTDigestWeighted](#quantiletdigestweighted)别名。 **示例** @@ -1535,11 +1533,11 @@ SELECT medianDeterministic(val, 1) FROM t ## quantiles(level1, level2, …)(x) {#quantiles} -所有分位数函数也具有相应的分位数函数: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. 这些函数在一遍中计算所列电平的所有分位数,并返回结果值的数组。 +所有分位数函数也有相应的函数: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`。这些函数一次计算所列层次的所有分位数,并返回结果值的数组。 ## varSamp(x) {#varsampx} -计算金额 `Σ((x - x̅)^2) / (n - 1)`,哪里 `n` 是样本大小和 `x̅`是平均值 `x`. +计算 `Σ((x - x̅)^2) / (n - 1)`,这里 `n` 是样本大小, `x̅`是`x`的平均值。 它表示随机变量的方差的无偏估计,如果传递的值形成其样本。 @@ -1550,23 +1548,23 @@ SELECT medianDeterministic(val, 1) FROM t ## varPop(x) {#varpopx} -计算金额 `Σ((x - x̅)^2) / n`,哪里 `n` 是样本大小和 `x̅`是平均值 `x`. +计算 `Σ((x - x̅)^2) / n`,这里 `n` 是样本大小, `x̅`是`x`的平均值。 -换句话说,分散为一组值。 返回 `Float64`. +换句话说,计算一组数据的离差。 返回 `Float64`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 ## stddevSamp(x) {#stddevsampx} -结果等于平方根 `varSamp(x)`. +结果等于平方根 `varSamp(x)`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 ## stddevPop(x) {#stddevpopx} -结果等于平方根 `varPop(x)`. +结果等于平方根 `varPop(x)`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 @@ -1575,15 +1573,15 @@ SELECT medianDeterministic(val, 1) FROM t 返回指定列中近似最常见值的数组。 生成的数组按值的近似频率降序排序(而不是值本身)。 -实现了 [过滤节省空间](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) 基于reduce-and-combine算法的TopK分析算法 [并行节省空间](https://arxiv.org/pdf/1401.0702.pdf). +实现了[过滤节省空间](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf)算法, 使用基于reduce-and-combine的算法,借鉴[并行节省空间](https://arxiv.org/pdf/1401.0702.pdf). ``` sql topK(N)(column) ``` -此函数不提供保证的结果。 在某些情况下,可能会发生错误,并且可能会返回不是最常见值的常见值。 +此函数不提供保证的结果。 在某些情况下,可能会发生错误,并且可能会返回不是最高频的值。 -我们建议使用 `N < 10` 值;性能降低了大 `N` 值。 的最大值 `N = 65536`. +我们建议使用 `N < 10` 值,`N` 值越大,性能越低。最大值 `N = 65536`。 **参数** @@ -1593,11 +1591,11 @@ topK(N)(column) **参数** -- ' x ' – The value to calculate frequency. +- ' x ' – 计算的频率值。 **示例** -就拿 [时间](../../getting-started/example-datasets/ontime.md) 数据集,并选择在三个最频繁出现的值 `AirlineID` 列。 +就拿 [OnTime](../../getting-started/example-datasets/ontime.md) 数据集来说,选择`AirlineID` 列中出现最频繁的三个。 ``` sql SELECT topK(3)(AirlineID) AS res @@ -1612,7 +1610,7 @@ FROM ontime ## topKWeighted {#topkweighted} -类似于 `topK` 但需要一个整数类型的附加参数 - `weight`. 每个价值都被记入 `weight` 次频率计算。 +类似于 `topK` 但需要一个整数类型的附加参数 - `weight`. 每个输入都被记入 `weight` 次频率计算。 **语法** @@ -1622,12 +1620,12 @@ topKWeighted(N)(x, weight) **参数** -- `N` — The number of elements to return. +- `N` — 返回值个数。 **参数** -- `x` – The value. -- `weight` — The weight. [UInt8](../../sql-reference/data-types/int-uint.md). +- `x` – 输入值。 +- `weight` — 权重。 [UInt8](../../sql-reference/data-types/int-uint.md)类型。 **返回值** @@ -1651,36 +1649,36 @@ SELECT topKWeighted(10)(number, number) FROM numbers(1000) ## covarSamp(x,y) {#covarsampx-y} -计算的值 `Σ((x - x̅)(y - y̅)) / (n - 1)`. +计算 `Σ((x - x̅)(y - y̅)) / (n - 1)`。 -返回Float64。 当 `n <= 1`, returns +∞. +返回Float64。 当 `n <= 1`, returns +∞。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 ## covarPop(x,y) {#covarpopx-y} -计算的值 `Σ((x - x̅)(y - y̅)) / n`. +计算 `Σ((x - x̅)(y - y̅)) / n`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 功能。 它的工作速度较慢,但提供了较低的计算错误。 ## corr(x,y) {#corrx-y} -计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. +计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 ## categoricalInformationValue {#categoricalinformationvalue} -计算的值 `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` 对于每个类别。 +对于每个类别计算 `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` 。 ``` sql categoricalInformationValue(category1, category2, ..., tag) ``` -结果指示离散(分类)要素如何使用 `[category1, category2, ...]` 有助于预测的价值的学习模型 `tag`. +结果指示离散(分类)要素如何使用 `[category1, category2, ...]` 有助于使用学习模型预测`tag`的值。 ## simpleLinearRegression {#simplelinearregression} @@ -1692,12 +1690,12 @@ simpleLinearRegression(x, y) 参数: -- `x` — Column with dependent variable values. -- `y` — Column with explanatory variable values. +- `x` — x轴。 +- `y` — y轴。 返回值: -常量 `(a, b)` 结果行的 `y = a*x + b`. +符合`y = a*x + b`的常量 `(a, b)` 。 **例** @@ -1721,9 +1719,9 @@ SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) └───────────────────────────────────────────────────────────────────┘ ``` -## 随机指标线上回归 {#agg_functions-stochasticlinearregression} +## stochasticLinearRegression {#agg_functions-stochasticlinearregression} -该函数实现随机线性回归。 它支持自定义参数的学习率,L2正则化系数,迷你批量大小,并具有更新权重的方法很少 ([亚当](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (默认使用), [简单SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [动量](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). +该函数实现随机线性回归。 它支持自定义参数的学习率、L2正则化系数、微批,并且具有少量更新权重的方法([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (默认), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf))。 ### 参数 {#agg_functions-stochasticlinearregression-parameters} @@ -1738,14 +1736,14 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD') 3. `mini-batch size` 设置元素的数量,这些元素将被计算和求和以执行梯度下降的一个步骤。 纯随机下降使用一个元素,但是具有小批量(约10个元素)使梯度步骤更稳定。 默认值为 `15`. 4. `method for updating weights` 他们是: `Adam` (默认情况下), `SGD`, `Momentum`, `Nesterov`. `Momentum` 和 `Nesterov` 需要更多的计算和内存,但是它们恰好在收敛速度和随机梯度方法的稳定性方面是有用的。 -### 用途 {#agg_functions-stochasticlinearregression-usage} +### 用法 {#agg_functions-stochasticlinearregression-usage} `stochasticLinearRegression` 用于两个步骤:拟合模型和预测新数据。 为了拟合模型并保存其状态以供以后使用,我们使用 `-State` combinator,它基本上保存了状态(模型权重等)。 为了预测我们使用函数 [evalMLMethod](../functions/machine-learning-functions.md#machine_learning_methods-evalmlmethod),这需要一个状态作为参数以及特征来预测。 -**1.** 适合 +**1.** 安装 可以使用这种查询。 @@ -1807,28 +1805,28 @@ evalMLMethod(model, param1, param2) FROM test_data stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') ``` -1. 适合 +**1.** 安装 - See the `Fitting` section in the [stochasticLinearRegression](#stochasticlinearregression-usage-fitting) description. + 参考stochasticLinearRegression相关文档 - Predicted labels have to be in \[-1, 1\]. + 预测标签的取值范围为[-1, 1] -1. 预测 +**2.** 预测 - Using saved state we can predict probability of object having label `1`. + 使用已经保存的state我们可以预测标签为 `1` 的对象的概率。 ``` sql WITH (SELECT state FROM your_model) AS model SELECT evalMLMethod(model, param1, param2) FROM test_data ``` - The query will return a column of probabilities. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features. + 查询结果返回一个列的概率。注意 `evalMLMethod` 的第一个参数是 `AggregateFunctionState` 对象,接下来的参数是列的特性。 - We can also set a bound of probability, which assigns elements to different labels. + 我们也可以设置概率的范围, 这样需要给元素指定不同的标签。 ``` sql SELECT ans < 1.1 AND ans > 0.5 FROM @@ -1836,14 +1834,14 @@ stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') evalMLMethod(model, param1, param2) AS ans FROM test_data) ``` - Then the result will be labels. + 结果是标签。 - `test_data` is a table like `train_data` but may not contain target value. + `test_data` 是一个像 `train_data` 一样的表,但是不包含目标值。 **另请参阅** - [随机指标线上回归](#agg_functions-stochasticlinearregression) -- [线性回归和逻辑回归之间的差异。](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) +- [线性回归和逻辑回归之间的差异](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) ## groupBitmapAnd {#groupbitmapand} diff --git a/docs/zh/sql-reference/data-types/domains/ipv4.md b/docs/zh/sql-reference/data-types/domains/ipv4.md index 65c066fb487..9ce12025405 100644 --- a/docs/zh/sql-reference/data-types/domains/ipv4.md +++ b/docs/zh/sql-reference/data-types/domains/ipv4.md @@ -24,7 +24,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; 在写入与查询时,`IPv4`类型能够识别可读性更加友好的输入输出格式: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/zh/sql-reference/data-types/domains/ipv6.md b/docs/zh/sql-reference/data-types/domains/ipv6.md index bc0f95932aa..5b1afc2cd39 100644 --- a/docs/zh/sql-reference/data-types/domains/ipv6.md +++ b/docs/zh/sql-reference/data-types/domains/ipv6.md @@ -24,7 +24,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; 在写入与查询时,`IPv6`类型能够识别可读性更加友好的输入输出格式: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md deleted file mode 120000 index 76a7ef3b802..00000000000 --- a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/data-types/simpleaggregatefunction.md \ No newline at end of file diff --git a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md new file mode 100644 index 00000000000..e827adb817e --- /dev/null +++ b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md @@ -0,0 +1,40 @@ +--- +machine_translated: true +machine_translated_rev: 71d72c1f237f4a553fe91ba6c6c633e81a49e35b +--- + +# SimpleAggregateFunction {#data-type-simpleaggregatefunction} + +`SimpleAggregateFunction(name, types_of_arguments…)` 数据类型存储聚合函数的当前值,而不将其完整状态存储为 [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) 有 此优化可应用于具有以下属性的函数:应用函数的结果 `f` 到行集 `S1 UNION ALL S2` 可以通过应用来获得 `f` 行的部分单独设置,然后再次应用 `f` 到结果: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. 此属性保证部分聚合结果足以计算组合结果,因此我们不必存储和处理任何额外的数据。 + +支持以下聚合函数: + +- [`any`](../../sql-reference/aggregate-functions/reference.md#agg_function-any) +- [`anyLast`](../../sql-reference/aggregate-functions/reference.md#anylastx) +- [`min`](../../sql-reference/aggregate-functions/reference.md#agg_function-min) +- [`max`](../../sql-reference/aggregate-functions/reference.md#agg_function-max) +- [`sum`](../../sql-reference/aggregate-functions/reference.md#agg_function-sum) +- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference.md#groupbitand) +- [`groupBitOr`](../../sql-reference/aggregate-functions/reference.md#groupbitor) +- [`groupBitXor`](../../sql-reference/aggregate-functions/reference.md#groupbitxor) +- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference.md#agg_function-grouparray) +- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference.md#groupuniqarrayx-groupuniqarraymax-sizex) + +的值 `SimpleAggregateFunction(func, Type)` 看起来和存储方式相同 `Type`,所以你不需要应用函数 `-Merge`/`-State` 后缀。 `SimpleAggregateFunction` 具有比更好的性能 `AggregateFunction` 具有相同的聚合功能。 + +**参数** + +- 聚合函数的名称。 +- 聚合函数参数的类型。 + +**示例** + +``` sql +CREATE TABLE t +( + column1 SimpleAggregateFunction(sum, UInt64), + column2 SimpleAggregateFunction(any, String) +) ENGINE = ... +``` + +[原始文章](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) diff --git a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md index c67deb55401..756eee31026 100644 --- a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -19,7 +19,7 @@ ClickHouse: 字典可以在服务器启动或首次使用时加载,具体取决于 [dictionaries\_lazy\_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) 设置。 -该 [字典](../../../operations/system-tables.md#system_tables-dictionaries) 系统表包含有关在服务器上配置的字典的信息。 对于每个字典,你可以在那里找到: +该 [字典](../../../operations/system-tables/dictionaries.md#system_tables-dictionaries) 系统表包含有关在服务器上配置的字典的信息。 对于每个字典,你可以在那里找到: - 字典的状态。 - 配置参数。 diff --git a/docs/zh/sql-reference/functions/introspection.md b/docs/zh/sql-reference/functions/introspection.md index 43d8b596dfb..4b4367f3dc8 100644 --- a/docs/zh/sql-reference/functions/introspection.md +++ b/docs/zh/sql-reference/functions/introspection.md @@ -20,7 +20,7 @@ toc_title: "\u81EA\u7701" For security reasons introspection functions are disabled by default. -ClickHouse将探查器报告保存到 [trace\_log](../../operations/system-tables.md#system_tables-trace_log) 系统表. 确保正确配置了表和探查器。 +ClickHouse将探查器报告保存到 [trace\_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) 系统表. 确保正确配置了表和探查器。 ## addressToLine {#addresstoline} diff --git a/docs/zh/sql-reference/index.md b/docs/zh/sql-reference/index.md index aed96c4b34f..c47c20b9cf9 100644 --- a/docs/zh/sql-reference/index.md +++ b/docs/zh/sql-reference/index.md @@ -1,15 +1,13 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: "SQL\u53C2\u8003" +toc_folder_title: SQL参考 toc_hidden: true toc_priority: 28 -toc_title: "\u9690\u85CF" +toc_title: hidden --- # SQL参考 {#sql-reference} -ClickHouse支持以下类型的查询: +ClickHouse支持以下形式的查询: - [SELECT](statements/select/index.md) - [INSERT INTO](statements/insert-into.md) @@ -17,4 +15,4 @@ ClickHouse支持以下类型的查询: - [ALTER](statements/alter.md#query_language_queries_alter) - [其他类型的查询](statements/misc.md) -[原始文章](https://clickhouse.tech/docs/en/sql-reference/) +[原始文档](https://clickhouse.tech/docs/zh/sql-reference/) diff --git a/docs/zh/sql-reference/operators/in.md b/docs/zh/sql-reference/operators/in.md deleted file mode 120000 index 3a2feda2f61..00000000000 --- a/docs/zh/sql-reference/operators/in.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/operators/in.md \ No newline at end of file diff --git a/docs/zh/sql-reference/operators/in.md b/docs/zh/sql-reference/operators/in.md new file mode 100644 index 00000000000..eaaa477fbe1 --- /dev/null +++ b/docs/zh/sql-reference/operators/in.md @@ -0,0 +1,204 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +--- + +# 在运营商 {#select-in-operators} + +该 `IN`, `NOT IN`, `GLOBAL IN`,和 `GLOBAL NOT IN` 运算符是单独复盖的,因为它们的功能相当丰富。 + +运算符的左侧是单列或元组。 + +例: + +``` sql +SELECT UserID IN (123, 456) FROM ... +SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ... +``` + +如果左侧是索引中的单列,而右侧是一组常量,则系统将使用索引处理查询。 + +Don't list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section “External data for query processing”),然后使用子查询。 + +运算符的右侧可以是一组常量表达式、一组带有常量表达式的元组(如上面的示例所示),或括号中的数据库表或SELECT子查询的名称。 + +如果运算符的右侧是表的名称(例如, `UserID IN users`),这相当于子查询 `UserID IN (SELECT * FROM users)`. 使用与查询一起发送的外部数据时,请使用此选项。 例如,查询可以与一组用户Id一起发送到 ‘users’ 应过滤的临时表。 + +如果运算符的右侧是具有Set引擎的表名(始终位于RAM中的准备好的数据集),则不会为每个查询重新创建数据集。 + +子查询可以指定多个用于筛选元组的列。 +示例: + +``` sql +SELECT (CounterID, UserID) IN (SELECT CounterID, UserID FROM ...) FROM ... +``` + +IN运算符左侧和右侧的列应具有相同的类型。 + +IN运算符和子查询可能出现在查询的任何部分,包括聚合函数和lambda函数。 +示例: + +``` sql +SELECT + EventDate, + avg(UserID IN + ( + SELECT UserID + FROM test.hits + WHERE EventDate = toDate('2014-03-17') + )) AS ratio +FROM test.hits +GROUP BY EventDate +ORDER BY EventDate ASC +``` + +``` text +┌──EventDate─┬────ratio─┐ +│ 2014-03-17 │ 1 │ +│ 2014-03-18 │ 0.807696 │ +│ 2014-03-19 │ 0.755406 │ +│ 2014-03-20 │ 0.723218 │ +│ 2014-03-21 │ 0.697021 │ +│ 2014-03-22 │ 0.647851 │ +│ 2014-03-23 │ 0.648416 │ +└────────────┴──────────┘ +``` + +对于3月17日后的每一天,计算3月17日访问该网站的用户所做的浏览量百分比。 +IN子句中的子查询始终只在单个服务器上运行一次。 没有依赖子查询。 + +## 空处理 {#in-null-processing} + +在请求处理过程中, `IN` 运算符假定运算的结果 [NULL](../../sql-reference/syntax.md#null-literal) 总是等于 `0`,无论是否 `NULL` 位于操作员的右侧或左侧。 `NULL` 值不包含在任何数据集中,彼此不对应,并且在以下情况下无法进行比较 [transform\_null\_in=0](../../operations/settings/settings.md#transform_null_in). + +下面是一个例子 `t_null` 表: + +``` text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` + +运行查询 `SELECT x FROM t_null WHERE y IN (NULL,3)` 为您提供以下结果: + +``` text +┌─x─┐ +│ 2 │ +└───┘ +``` + +你可以看到,在其中的行 `y = NULL` 被抛出的查询结果。 这是因为ClickHouse无法决定是否 `NULL` 包含在 `(NULL,3)` 设置,返回 `0` 作为操作的结果,和 `SELECT` 从最终输出中排除此行。 + +``` sql +SELECT y IN (NULL, 3) +FROM t_null +``` + +``` text +┌─in(y, tuple(NULL, 3))─┐ +│ 0 │ +│ 1 │ +└───────────────────────┘ +``` + +## 分布式子查询 {#select-distributed-subqueries} + +带子查询的IN-s有两个选项(类似于连接):normal `IN` / `JOIN` 和 `GLOBAL IN` / `GLOBAL JOIN`. 它们在分布式查询处理的运行方式上有所不同。 + +!!! attention "注意" + 请记住,下面描述的算法可能会有不同的工作方式取决于 [设置](../../operations/settings/settings.md) `distributed_product_mode` 设置。 + +当使用常规IN时,查询被发送到远程服务器,并且它们中的每个服务器都在运行子查询 `IN` 或 `JOIN` 条款 + +使用时 `GLOBAL IN` / `GLOBAL JOINs`,首先所有的子查询都运行 `GLOBAL IN` / `GLOBAL JOINs`,并将结果收集在临时表中。 然后将临时表发送到每个远程服务器,其中使用此临时数据运行查询。 + +对于非分布式查询,请使用常规 `IN` / `JOIN`. + +在使用子查询时要小心 `IN` / `JOIN` 用于分布式查询处理的子句。 + +让我们来看看一些例子。 假设集群中的每个服务器都有一个正常的 **local\_table**. 每个服务器还具有 **distributed\_table** 表与 **分布** 类型,它查看群集中的所有服务器。 + +对于查询 **distributed\_table**,查询将被发送到所有远程服务器,并使用以下命令在其上运行 **local\_table**. + +例如,查询 + +``` sql +SELECT uniq(UserID) FROM distributed_table +``` + +将被发送到所有远程服务器 + +``` sql +SELECT uniq(UserID) FROM local_table +``` + +并且并行运行它们中的每一个,直到达到可以结合中间结果的阶段。 然后将中间结果返回给请求者服务器并在其上合并,并将最终结果发送给客户端。 + +现在让我们检查一个查询IN: + +``` sql +SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) +``` + +- 计算两个网站的受众的交集。 + +此查询将以下列方式发送到所有远程服务器 + +``` sql +SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) +``` + +换句话说,IN子句中的数据集将在每台服务器上独立收集,仅在每台服务器上本地存储的数据中收集。 + +如果您已经为此情况做好准备,并且已经将数据分散到群集服务器上,以便单个用户Id的数据完全驻留在单个服务器上,则这将正常和最佳地工作。 在这种情况下,所有必要的数据将在每台服务器上本地提供。 否则,结果将是不准确的。 我们将查询的这种变体称为 “local IN”. + +若要更正数据在群集服务器上随机传播时查询的工作方式,可以指定 **distributed\_table** 在子查询中。 查询如下所示: + +``` sql +SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) +``` + +此查询将以下列方式发送到所有远程服务器 + +``` sql +SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) +``` + +子查询将开始在每个远程服务器上运行。 由于子查询使用分布式表,因此每个远程服务器上的子查询将重新发送到每个远程服务器 + +``` sql +SELECT UserID FROM local_table WHERE CounterID = 34 +``` + +例如,如果您有100台服务器的集群,则执行整个查询将需要10,000个基本请求,这通常被认为是不可接受的。 + +在这种情况下,应始终使用GLOBAL IN而不是IN。 让我们来看看它是如何工作的查询 + +``` sql +SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID GLOBAL IN (SELECT UserID FROM distributed_table WHERE CounterID = 34) +``` + +请求者服务器将运行子查询 + +``` sql +SELECT UserID FROM distributed_table WHERE CounterID = 34 +``` + +结果将被放在RAM中的临时表中。 然后请求将被发送到每个远程服务器 + +``` sql +SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL IN _data1 +``` + +和临时表 `_data1` 将通过查询发送到每个远程服务器(临时表的名称是实现定义的)。 + +这比使用正常IN更优化。 但是,请记住以下几点: + +1. 创建临时表时,数据不是唯一的。 要减少通过网络传输的数据量,请在子查询中指定DISTINCT。 (你不需要为正常人做这个。) +2. 临时表将被发送到所有远程服务器。 传输不考虑网络拓扑。 例如,如果10个远程服务器驻留在与请求者服务器非常远程的数据中心中,则数据将通过通道发送10次到远程数据中心。 使用GLOBAL IN时尽量避免使用大型数据集。 +3. 将数据传输到远程服务器时,无法配置网络带宽限制。 您可能会使网络过载。 +4. 尝试跨服务器分发数据,以便您不需要定期使用GLOBAL IN。 +5. 如果您需要经常使用GLOBAL IN,请规划ClickHouse集群的位置,以便单个副本组驻留在不超过一个数据中心中,并且它们之间具有快速网络,以便可以完全在单个数据中心内处理查询。 + +这也是有意义的,在指定一个本地表 `GLOBAL IN` 子句,以防此本地表仅在请求者服务器上可用,并且您希望在远程服务器上使用来自它的数据。 diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 24ca1e47372..26b5e66cc8a 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -1,48 +1,47 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 36 toc_title: ALTER --- ## ALTER {#query_language_queries_alter} -该 `ALTER` 查询仅支持 `*MergeTree` 表,以及 `Merge`和`Distributed`. 查询有几个变体。 + `ALTER` 仅支持 `*MergeTree` ,`Merge`以及`Distributed`等引擎表。 + 该操作有多种形式。 ### 列操作 {#column-manipulations} -更改表结构。 +改变表结构: ``` sql ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN ... ``` -在查询中,指定一个或多个逗号分隔操作的列表。 -每个操作都是对列的操作。 +在语句中,配置一个或多个用逗号分隔的动作。每个动作是对某个列实施的操作行为。 -支持以下操作: +支持下列动作: -- [ADD COLUMN](#alter_add-column) — Adds a new column to the table. -- [DROP COLUMN](#alter_drop-column) — Deletes the column. -- [CLEAR COLUMN](#alter_clear-column) — Resets column values. -- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column. -- [MODIFY COLUMN](#alter_modify-column) — Changes column's type, default expression and TTL. +- [ADD COLUMN](#alter_add-column) — 添加列 +- [DROP COLUMN](#alter_drop-column) — 删除列 +- [CLEAR COLUMN](#alter_clear-column) — 重置列的值 +- [COMMENT COLUMN](#alter_comment-column) — 给列增加注释说明 +- [MODIFY COLUMN](#alter_modify-column) — 改变列的值类型,默认表达式以及TTL -下面详细描述这些动作。 +这些动作将在下文中进行详述。 -#### ADD COLUMN {#alter_add-column} +#### 增加列 {#alter_add-column} ``` sql ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after] ``` -将一个新列添加到表中,并指定 `name`, `type`, [`codec`](create.md#codecs) 和 `default_expr` (请参阅部分 [默认表达式](create.md#create-default-values)). +使用指定的`name`, `type`, [`codec`](../../sql-reference/statements/create.md#codecs) 以及 `default_expr` (请参见 [Default expressions](../../sql-reference/statements/create.md#create-default-values)),往表中增加新的列。 -如果 `IF NOT EXISTS` 如果列已经存在,则查询不会返回错误。 如果您指定 `AFTER name_after` (另一列的名称),该列被添加在表列表中指定的一列之后。 否则,该列将添加到表的末尾。 请注意,没有办法将列添加到表的开头。 为了一系列的行动, `name_after` 可以是在以前的操作之一中添加的列的名称。 -添加列只是更改表结构,而不对数据执行任何操作。 数据不会出现在磁盘上后 `ALTER`. 如果从表中读取某一列的数据缺失,则将使用默认值填充该列(如果存在默认表达式,则执行默认表达式,或使用零或空字符串)。 合并数据部分后,该列将出现在磁盘上(请参阅 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)). +如果sql中包含 `IF NOT EXISTS` ,执行语句时如果列已经存在,CH不会报错。如果指定`AFTER name_after`(表中另一个列的名称),则新的列会加在指定列的后面。否则,新的列将被添加到表的末尾。注意,不能讲新的列添加到表的开始位置, `name_after` 可以是执行该动作时已经在表中存在的任意列。 -这种方法使我们能够完成 `ALTER` 即时查询,不增加旧数据量。 +添加列仅仅是改变原有表的结构不会对已有数据产生影响。执行完 `ALTER`后磁盘中也不会出现新的数据。如果查询表时列的数据为空,那么CH会使用列的默认值来进行填充(如果有默认表达式,则使用这个;或者用0或空字符串)。当数据块完成合并(参见[MergeTree](../../engines/table-engines/mergetree-family/mergetree.md))后,磁盘中会出现该列的数据。 + +这种方式允许 `ALTER` 语句能马上执行。不需要增加原有数据的大小。 示例: @@ -50,15 +49,16 @@ ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after] ALTER TABLE visits ADD COLUMN browser String AFTER user_id ``` -#### DROP COLUMN {#alter_drop-column} +#### 删除列 {#alter_drop-column} ``` sql DROP COLUMN [IF EXISTS] name ``` -删除具有名称的列 `name`. 如果 `IF EXISTS` 如果指定了子句,如果该列不存在,则查询不会返回错误。 +通过指定 `name`删除列。如果语句包含 `IF EXISTS`,执行时遇到不存在的列也不会报错。 + +从文件系统中删除数据。由于是删除列的整个文件,该语句几乎是立即执行完成的。 -从文件系统中删除数据。 由于这将删除整个文件,查询几乎立即完成。 示例: @@ -66,15 +66,16 @@ DROP COLUMN [IF EXISTS] name ALTER TABLE visits DROP COLUMN browser ``` -#### CLEAR COLUMN {#alter_clear-column} +#### 清空列 {#alter_clear-column} ``` sql CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name ``` -重置指定分区的列中的所有数据。 了解有关设置分区名称的详细信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). +重置指定分区中列的值。 分区名称 `partition_name` 请参见 [怎样设置分区表达式](#alter-how-to-specify-part-expr) + +如果语句中包含 `IF EXISTS` ,遇到不存在的列,sql执行不会报错。 -如果 `IF EXISTS` 如果指定了子句,如果该列不存在,则查询不会返回错误。 示例: @@ -82,17 +83,16 @@ CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name ALTER TABLE visits CLEAR COLUMN browser IN PARTITION tuple() ``` -#### COMMENT COLUMN {#alter_comment-column} +#### 增加注释 {#alter_comment-column} ``` sql COMMENT COLUMN [IF EXISTS] name 'comment' ``` -向列添加注释。 如果 `IF EXISTS` 如果指定了子句,如果该列不存在,则查询不会返回错误。 +给列增加注释说明。如果语句中包含 `IF EXISTS` ,遇到不存在的列,sql执行不会报错。 -每列可以有一个注释。 如果列的注释已存在,则新注释将复盖以前的注释。 - -注释存储在 `comment_expression` 由返回的列 [DESCRIBE TABLE](misc.md#misc-describe-table) 查询。 +每个列都可以包含注释。如果列的注释已经存在,新的注释会替换旧的。 +注释信息保存在 [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table)查询的 `comment_expression` 字段中。 示例: @@ -100,25 +100,24 @@ COMMENT COLUMN [IF EXISTS] name 'comment' ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for accessing the site.' ``` -#### MODIFY COLUMN {#alter_modify-column} +#### 修改列 {#alter_modify-column} ``` sql MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] ``` +该语句可以改变 `name` 列的属性: -此查询更改 `name` 列属性: +- Type -- 类型 - -- 默认表达式 +- Default expression - TTL - For examples of columns TTL modifying, see [Column TTL](../engines/table_engines/mergetree_family/mergetree.md#mergetree-column-ttl). +有关修改列TTL的示例,请参见 [Column TTL](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl). -如果 `IF EXISTS` 如果指定了子句,如果该列不存在,则查询不会返回错误。 +如果语句中包含 `IF EXISTS` ,遇到不存在的列,sql执行不会报错。 -更改类型时,值将被转换为 [toType](../../sql-reference/functions/type-conversion-functions.md) 函数被应用到它们。 如果仅更改默认表达式,则查询不会执行任何复杂的操作,并且几乎立即完成。 +当改变列的类型时,列的值也被转换了,如同对列使用 [toType](../../sql-reference/functions/type-conversion-functions.md)函数一样。如果只改变了默认表达式,该语句几乎不会做任何复杂操作,并且几乎是立即执行完成的。 示例: @@ -126,205 +125,198 @@ MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] ALTER TABLE visits MODIFY COLUMN browser Array(String) ``` -Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time. +改变列的类型是唯一的复杂型动作 - 它改变了数据文件的内容。对于大型表,执行起来要花费较长的时间。 +该操作分为如下处理步骤: -有几个处理阶段: +- 为修改的数据准备新的临时文件 +- 重命名原来的文件 +- 将新的临时文件改名为原来的数据文件名 +- 删除原来的文件 -- 准备具有修改数据的临时(新)文件。 -- 重命名旧文件。 -- 将临时(新)文件重命名为旧名称。 -- 删除旧文件。 +仅仅在第一步是耗费时间的。如果该阶段执行失败,那么数据没有变化。如果执行后续的步骤中失败了,数据可以手动恢复。例外的情形是,当原来的文件从文件系统中被删除了,但是新的数据没有写入到临时文件中并且丢失了。 -只有第一阶段需要时间。 如果在此阶段出现故障,则不会更改数据。 -如果在其中一个连续阶段中出现故障,可以手动恢复数据。 例外情况是,如果旧文件从文件系统中删除,但新文件的数据没有写入磁盘并丢失。 -该 `ALTER` 复制更改列的查询。 这些指令保存在ZooKeeper中,然后每个副本应用它们。 全部 `ALTER` 查询以相同的顺序运行。 查询等待对其他副本完成适当的操作。 但是,更改复制表中的列的查询可能会中断,并且所有操作都将异步执行。 +列操作的 `ALTER`行为是可以被复制的。这些指令会保存在ZooKeeper中,这样每个副本节点都能执行它们。所有的 `ALTER` 将按相同的顺序执行。 + The query waits for the appropriate actions to be completed on the other replicas. +然而,改变可复制表的列是可以被中断的,并且所有动作都以异步方式执行。 -#### 更改查询限制 {#alter-query-limitations} -该 `ALTER` query允许您在嵌套数据结构中创建和删除单独的元素(列),但不能创建整个嵌套数据结构。 要添加嵌套数据结构,可以添加名称如下的列 `name.nested_name` 和类型 `Array(T)`. 嵌套数据结构等效于名称在点之前具有相同前缀的多个数组列。 +#### ALTER 操作限制 {#alter-query-limitations} -不支持删除主键或采样键中的列(在主键中使用的列 `ENGINE` 表达式)。 只有在此更改不会导致数据被修改时,才可以更改主键中包含的列的类型(例如,允许您向枚举添加值或更改类型 `DateTime` 到 `UInt32`). + `ALTER` 操作允许在嵌套的数据结构中创建和删除单独的元素(列),但是不是整个嵌套结构。添加一个嵌套数据结构的列时,你可以用类似这样的名称 `name.nested_name` 及类型 `Array(T)` 来操作。嵌套数据结构等同于 +列名前带有同样前缀的多个数组列。 -如果 `ALTER` 查询不足以使您需要的表更改,您可以创建一个新的表,使用 [INSERT SELECT](insert-into.md#insert_query_insert-select) 查询,然后使用切换表 [RENAME](misc.md#misc_operations-rename) 查询并删除旧表。 您可以使用 [ツ环板-ョツ嘉ッツ偲](../../operations/utilities/clickhouse-copier.md) 作为替代 `INSERT SELECT` 查询。 -该 `ALTER` 查询阻止对表的所有读取和写入。 换句话说,如果长 `SELECT` 正在运行的时间 `ALTER` 查询,该 `ALTER` 查询将等待它完成。 同时,对同一个表的所有新查询将等待 `ALTER` 正在运行。 +不支持对primary key或者sampling key中的列(在 `ENGINE` 表达式中用到的列)进行删除操作。改变包含在primary key中的列的类型时,如果操作不会导致数据的变化(例如,往Enum中添加一个值,或者将`DateTime` 类型改成 `UInt32`),那么这种操作是可行的。 -对于本身不存储数据的表(例如 `Merge` 和 `Distributed`), `ALTER` 只是改变了表结构,并且不改变从属表的结构。 例如,当运行ALTER时 `Distributed` 表,你还需要运行 `ALTER` 对于所有远程服务器上的表。 +如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。 -### 使用键表达式进行操作 {#manipulations-with-key-expressions} + `ALTER` 操作会阻塞对表的所有读写操作。换句话说,当一个大的 `SELECT` 语句和 `ALTER`同时执行时,`ALTER`会等待,直到 `SELECT` 执行结束。与此同时,当 `ALTER` 运行时,新的 sql 语句将会等待。 -支持以下命令: + +对于不存储数据的表(例如 `Merge` 及 `Distributed` 表), `ALTER` 仅仅改变了自身的表结构,不会改变从属的表结构。例如,对 `Distributed` 表执行 ALTER 操作时,需要对其它包含该表的服务器执行该操作。 + +### key表达式的修改 {#manipulations-with-key-expressions} + +支持下列表达式: ``` sql MODIFY ORDER BY new_expression ``` -它只适用于在表 [`MergeTree`](../../engines/table-engines/mergetree-family/mergetree.md) 家庭(包括 -[复制](../../engines/table-engines/mergetree-family/replication.md) 表)。 该命令更改 -[排序键](../../engines/table-engines/mergetree-family/mergetree.md) 表 -到 `new_expression` (表达式或表达式元组)。 主键保持不变。 +该操作仅支持 [`MergeTree`](../../engines/table-engines/mergetree-family/mergetree.md) 系列表 (含 [replicated](../../engines/table-engines/mergetree-family/replication.md) 表)。它会将表的 [排序键](../../engines/table-engines/mergetree-family/mergetree.md)变成 `new_expression` (元组表达式)。主键仍保持不变。 -该命令是轻量级的,因为它只更改元数据。 要保持该数据部分的属性 -行按排序键表达式排序您不能添加包含现有列的表达式 -到排序键(仅由列添加 `ADD COLUMN` 命令在同一个 `ALTER` 查询)。 +该操作时轻量级的,仅会改变元数据。 -### 使用数据跳过索引进行操作 {#manipulations-with-data-skipping-indices} -它只适用于在表 [`*MergeTree`](../../engines/table-engines/mergetree-family/mergetree.md) 家庭(包括 -[复制](../../engines/table-engines/mergetree-family/replication.md) 表)。 以下操作 -可用: +### 跳过索引来更改数据 {#manipulations-with-data-skipping-indices} -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` -将索引描述添加到表元数据。 +该操作仅支持 [`MergeTree`](../../engines/table-engines/mergetree-family/mergetree.md) 系列表 (含 [replicated](../../engines/table-engines/mergetree-family/replication.md) 表)。 +下列操作是允许的: -- `ALTER TABLE [db].name DROP INDEX name` -从表元数据中删除索引描述并从磁盘中删除索引文件。 +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - 在表的元数据中增加索引说明 -这些命令是轻量级的,因为它们只更改元数据或删除文件。 -此外,它们被复制(通过ZooKeeper同步索引元数据)。 +- `ALTER TABLE [db].name DROP INDEX name` - 从表的元数据中删除索引描述,并从磁盘上删除索引文件 -### 使用约束进行操作 {#manipulations-with-constraints} +由于只改变表的元数据或者删除文件,因此该操作是轻量级的,也可以被复制到其它节点(通过Zookeeper同步索引元数据) -查看更多 [制约因素](create.md#constraints) +### 更改约束 {#manipulations-with-constraints} -可以使用以下语法添加或删除约束: +参见 [constraints](../../sql-reference/statements/create.md#constraints)查看更多信息。 + +通过下面的语法,可以添加或删除约束: ``` sql ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression; ALTER TABLE [db].name DROP CONSTRAINT constraint_name; ``` -查询将从表中添加或删除有关约束的元数据,以便立即处理它们。 +上述语句会从表中增加或删除约束的元数据,因此会被立即处理。 +对已有数据的约束检查 *将不会执行* 。 -约束检查 *不会被执行* 在现有数据上,如果它被添加。 +对可复制表的操作可通过Zookeeper传播到其它副本节点。 -复制表上的所有更改都广播到ZooKeeper,因此将应用于其他副本。 +### 更改分区及文件块 {#alter_manipulations-with-partitions} -### 操作与分区和零件 {#alter_manipulations-with-partitions} +允许进行下列关于 [partitions](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) 的操作: -下面的操作与 [分区](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) 可用: - -- [DETACH PARTITION](#alter_detach-partition) – Moves a partition to the `detached` 目录和忘记它。 -- [DROP PARTITION](#alter_drop-partition) – Deletes a partition. -- [ATTACH PART\|PARTITION](#alter_attach-partition) – Adds a part or partition from the `detached` 目录到表。 -- [ATTACH PARTITION FROM](#alter_attach-partition-from) – Copies the data partition from one table to another and adds. -- [REPLACE PARTITION](#alter_replace-partition) -将数据分区从一个表复制到另一个表并替换。 -- [MOVE PARTITION TO TABLE](#alter_move_to_table-partition)(\#alter\_move\_to\_table-partition)-将数据分区从一个表移动到另一个表。 -- [CLEAR COLUMN IN PARTITION](#alter_clear-column-partition) -重置分区中指定列的值。 -- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) -重置分区中指定的二级索引。 -- [FREEZE PARTITION](#alter_freeze-partition) – Creates a backup of a partition. -- [FETCH PARTITION](#alter_fetch-partition) – Downloads a partition from another server. -- [MOVE PARTITION\|PART](#alter_move-partition) – Move partition/data part to another disk or volume. +- [DETACH PARTITION](#alter_detach-partition) — 将分区数据移动到 `detached` ,并且忘记它 +- [DROP PARTITION](#alter_drop-partition) — 删除一个partition. +- [ATTACH PART\|PARTITION](#alter_attach-partition) — 将`detached` 目录中的分区重新添加到表中. +- [ATTACH PARTITION FROM](#alter_attach-partition-from) — 从表中复制数据分区到另一张表,并添加分区 +- [REPLACE PARTITION](#alter_replace-partition) — 从表中复制数据分区到其它表及副本 +- [MOVE PARTITION TO TABLE](#alter_move_to_table-partition) — 从表中复制数据分区到其它表. +- [CLEAR COLUMN IN PARTITION](#alter_clear-column-partition) — 重置分区中某个列的值 +- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) — 重置分区中指定的二级索引 +- [FREEZE PARTITION](#alter_freeze-partition) — 创建分区的备份 +- [FETCH PARTITION](#alter_fetch-partition) — 从其它服务器上下载分 +- [MOVE PARTITION\|PART](#alter_move-partition) — 将分区/数据块移动到另外的磁盘/卷 -#### DETACH PARTITION {#alter_detach-partition} +#### 分区剥离 {#alter_detach-partition} ``` sql ALTER TABLE table_name DETACH PARTITION partition_expr ``` - -将指定分区的所有数据移动到 `detached` 目录。 服务器会忘记分离的数据分区,就好像它不存在一样。 服务器不会知道这个数据,直到你做 [ATTACH](#alter_attach-partition) 查询。 +将指定分区的数据移动到 `detached` 目录。服务器会忽略被分离的数据分区。只有当你使用 [ATTACH](#alter_attach-partition) 时,服务器才会知晓这部分数据。 示例: ``` sql ALTER TABLE visits DETACH PARTITION 201901 ``` +从 [如何设置分区表达式](#alter-how-to-specify-part-expr)章节中获取分区表达式的设置说明。 -阅读有关在一节中设置分区表达式的信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). +当执行操作以后,可以对 `detached` 目录的数据进行任意操作,例如删除文件,或者放着不管。 -执行查询后,您可以对查询中的数据进行任何操作 `detached` directory — delete it from the file system, or just leave it. +该操作是可以复制的,它会将所有副本节点上的数据移动到 `detached` 目录。注意仅能在副本的leader节点上执行该操作。想了解副本是否是leader节点,需要在 [system.replicas](../../operations/system-tables/replicas.md#system_tables-replicas) 表执行 `SELECT` 操作。或者,可以很方便的在所有副本节点上执行 `DETACH`操作,但除leader外其它的副本节点会抛出异常。 -This query is replicated – it moves the data to the `detached` 所有副本上的目录。 请注意,您只能对领导副本执行此查询。 要确定副本是否为领导者,请执行 `SELECT` 查询到 [系统。副本](../../operations/system-tables.md#system_tables-replicas) 桌子 或者,它更容易使 `DETACH` 对所有副本进行查询-除了领导副本之外,所有副本都会引发异常。 -#### DROP PARTITION {#alter_drop-partition} +#### 删除分区 {#alter_drop-partition} ``` sql ALTER TABLE table_name DROP PARTITION partition_expr ``` -从表中删除指定的分区。 此查询将分区标记为非活动分区,并在大约10分钟内完全删除数据。 +从表中删除指定分区。该操作会将分区标记为不活跃的,然后在大约10分钟内删除全部数据。 -阅读有关在一节中设置分区表达式的信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). +在 [如何设置分区表达式](#alter-how-to-specify-part-expr)中获取分区表达式的设置说明。 +该操作是可复制的,副本节点的数据也将被删除。 -The query is replicated – it deletes data on all replicas. -#### DROP DETACHED PARTITION\|PART {#alter_drop-detached} +#### 删除已剥离的分区\|数据块 {#alter_drop-detached} ``` sql ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr ``` -从中删除指定分区的指定部分或所有部分 `detached`. -了解有关在一节中设置分区表达式的详细信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). +从`detached`目录中删除指定分区的特定部分或所有数据。访问 [如何设置分区表达式](#alter-how-to-specify-part-expr)可获取设置分区表达式的详细信息。 -#### ATTACH PARTITION\|PART {#alter_attach-partition} +#### 关联分区\|数据块 {#alter_attach-partition} ``` sql ALTER TABLE table_name ATTACH PARTITION|PART partition_expr ``` - -将数据从 `detached` 目录。 可以为整个分区或单独的部分添加数据。 例: +从`detached`目录中添加数据到数据表。可以添加整个分区的数据,或者单独的数据块。例如: ``` sql ALTER TABLE visits ATTACH PARTITION 201901; ALTER TABLE visits ATTACH PART 201901_2_2_0; ``` -了解有关在一节中设置分区表达式的详细信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). +访问 [如何设置分区表达式](#alter-how-to-specify-part-expr)可获取设置分区表达式的详细信息。 -此查询被复制。 副本发起程序检查是否有数据在 `detached` 目录。 如果数据存在,则查询将检查其完整性。 如果一切正确,则查询将数据添加到表中。 所有其他副本都从副本发起程序下载数据。 +该操作是可以复制的。副本启动器检查 `detached`目录是否有数据。如果有,该操作会检查数据的完整性。如果一切正常,该操作将数据添加到表中。其它副本节点通过副本启动器下载这些数据。 -所以你可以把数据到 `detached` 在一个副本上的目录,并使用 `ALTER ... ATTACH` 查询以将其添加到所有副本上的表中。 +因此可以在某个副本上将数据放到 `detached`目录,然后通过 `ALTER ... ATTACH` 操作将这部分数据添加到该表的所有副本。 -#### ATTACH PARTITION FROM {#alter_attach-partition-from} +#### 从...关联分区 {#alter_attach-partition-from} ``` sql ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1 ``` +该操作将 `table1` 表的数据分区复制到 `table2` 表的已有分区。注意`table1`表的数据不会被删除。 -此查询将数据分区从 `table1` 到 `table2` 将数据添加到存在 `table2`. 请注意,数据不会从中删除 `table1`. +为保证该操作能成功运行,下列条件必须满足: -要使查询成功运行,必须满足以下条件: +- 2张表必须有相同的结构 +- 2张表必须有相同的分区键 -- 两个表必须具有相同的结构。 -- 两个表必须具有相同的分区键。 - -#### REPLACE PARTITION {#alter_replace-partition} +#### 替换分区 {#alter_replace-partition} ``` sql ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1 ``` +该操作将 `table1` 表的数据分区复制到 `table2`表,并替换 `table2`表的已有分区。注意`table1`表的数据不会被删除。 -此查询将数据分区从 `table1` 到 `table2` 并替换在现有的分区 `table2`. 请注意,数据不会从中删除 `table1`. +为保证该操作能成功运行,下列条件必须满足: -要使查询成功运行,必须满足以下条件: +- 2张表必须有相同的结构 +- 2张表必须有相同的分区键 -- 两个表必须具有相同的结构。 -- 两个表必须具有相同的分区键。 - -#### MOVE PARTITION TO TABLE {#alter_move_to_table-partition} +#### 将分区移动到表 {#alter_move_to_table-partition} ``` sql ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest ``` -此查询将数据分区从 `table_source` 到 `table_dest` 删除数据 `table_source`. +该操作将 `table_source`表的数据分区移动到 `table_dest`表,并删除`table_source`表的数据。 -要使查询成功运行,必须满足以下条件: +为保证该操作能成功运行,下列条件必须满足: -- 两个表必须具有相同的结构。 -- 两个表必须具有相同的分区键。 -- 两个表必须是相同的引擎系列。 (已复制或未复制) -- 两个表必须具有相同的存储策略。 +- 2张表必须有相同的结构 +- 2张表必须有相同的分区键 +- 2张表必须属于相同的引擎系列(可复制表或不可复制表) +- 2张表必须有相同的存储方式 -#### CLEAR COLUMN IN PARTITION {#alter_clear-column-partition} +#### 清空分区的列 {#alter_clear-column-partition} ``` sql ALTER TABLE table_name CLEAR COLUMN column_name IN PARTITION partition_expr ``` -重置分区中指定列中的所有值。 如果 `DEFAULT` 创建表时确定了子句,此查询将列值设置为指定的默认值。 +重置指定分区的特定列的值。如果建表时使用了 `DEFAULT` 语句,该操作会将列的值重置为该默认值。 示例: @@ -332,95 +324,93 @@ ALTER TABLE table_name CLEAR COLUMN column_name IN PARTITION partition_expr ALTER TABLE visits CLEAR COLUMN hour in PARTITION 201902 ``` -#### FREEZE PARTITION {#alter_freeze-partition} +#### 冻结分区 {#alter_freeze-partition} ``` sql ALTER TABLE table_name FREEZE [PARTITION partition_expr] ``` -此查询创建指定分区的本地备份。 如果 `PARTITION` 子句被省略,查询一次创建所有分区的备份。 +该操作为指定分区创建一个本地备份。如果 `PARTITION` 语句省略,该操作会一次性为所有分区创建备份。 -!!! note "注" - 在不停止服务器的情况下执行整个备份过程。 +!!! 注意 "Note" + 整个备份过程不需要停止服务 -请注意,对于旧式表,您可以指定分区名称的前缀(例如, ‘2019’)-然后查询为所有相应的分区创建备份。 阅读有关在一节中设置分区表达式的信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). +注意对于老式的表,可以指定分区名前缀(例如,‘2019’),然后该操作会创建所有对应分区的备份。访问 [如何设置分区表达式](#alter-how-to-specify-part-expr)可获取设置分区表达式的详细信息。 -在执行时,对于数据快照,查询将创建指向表数据的硬链接。 硬链接被放置在目录中 `/var/lib/clickhouse/shadow/N/...`,哪里: +在执行操作的同时,对于数据快照,该操作会创建到表数据的硬链接。硬链接放置在 `/var/lib/clickhouse/shadow/N/...`,也就是: +- `/var/lib/clickhouse/` 服务器配置文件中指定的CH工作目录 +- `N` 备份的增长序号 -- `/var/lib/clickhouse/` 是配置中指定的工作ClickHouse目录。 -- `N` 是备份的增量编号。 -!!! note "注" - 如果您使用 [用于在表中存储数据的一组磁盘](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes),该 `shadow/N` 目录出现在每个磁盘上,存储由匹配的数据部分 `PARTITION` 表达。 +!!! 注意 "Note" + 如果你使用 [多个磁盘存储数据表](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes), + 那么每个磁盘上都有 `shadow/N`目录,用来保存`PARTITION` 表达式对应的数据块。 -在备份内部创建的目录结构与在备份内部创建的目录结构相同 `/var/lib/clickhouse/`. 查询执行 ‘chmod’ 对于所有文件,禁止写入它们。 +备份内部也会创建和 `/var/lib/clickhouse/` 内部一样的目录结构。该操作在所有文件上执行‘chmod’,禁止往里写入数据 -创建备份后,您可以从以下位置复制数据 `/var/lib/clickhouse/shadow/` 然后将其从本地服务器中删除。 请注意, `ALTER t FREEZE PARTITION` 不复制查询。 它仅在本地服务器上创建本地备份。 +当备份创建完毕,你可以从 `/var/lib/clickhouse/shadow/`复制数据到远端服务器,然后删除本地数据。注意 `ALTER t FREEZE PARTITION`操作是不能复制的,它仅在本地服务器上创建本地备份。 -查询几乎立即创建备份(但首先它会等待对相应表的当前查询完成运行)。 +该操作创建备份几乎是即时的(但是首先它会等待相关表的当前操作执行完成) -`ALTER TABLE t FREEZE PARTITION` 仅复制数据,而不复制表元数据。 若要备份表元数据,请复制该文件 `/var/lib/clickhouse/metadata/database/table.sql` -要从备份还原数据,请执行以下操作: +`ALTER TABLE t FREEZE PARTITION` 仅仅复制数据, 而不是元数据信息. 要复制表的元数据信息, 拷贝这个文件 `/var/lib/clickhouse/metadata/database/table.sql` -1. 如果表不存在,则创建该表。 要查看查询,请使用。sql文件(替换 `ATTACH` 在它与 `CREATE`). -2. 从复制数据 `data/database/table/` 目录内的备份到 `/var/lib/clickhouse/data/database/table/detached/` 目录。 -3. 快跑 `ALTER TABLE t ATTACH PARTITION` 将数据添加到表的查询。 +从备份中恢复数据,按如下步骤操作: +1. 如果表不存在,先创建。 查看.sql 文件获取执行语句 (将`ATTACH` 替换成 `CREATE`). +2. 从 备份的 `data/database/table/`目录中将数据复制到 `/var/lib/clickhouse/data/database/table/detached/`目录 +3. 运行 `ALTER TABLE t ATTACH PARTITION`操作,将数据添加到表中 -从备份还原不需要停止服务器。 +恢复数据不需要停止服务进程。 +想了解备份及数据恢复的更多信息,请参见 [数据备份](../../operations/backup.md) 。 -有关备份和还原数据的详细信息,请参阅 [数据备份](../../operations/backup.md) 科。 - -#### CLEAR INDEX IN PARTITION {#alter_clear-index-partition} +#### 删除分区的索引 {#alter_clear-index-partition} ``` sql ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr ``` -查询的工作原理类似于 `CLEAR COLUMN`,但它重置索引而不是列数据。 +该操作和 `CLEAR COLUMN`类似,但是它重置的是索引而不是列的数据。 -#### FETCH PARTITION {#alter_fetch-partition} +#### 获取分区 {#alter_fetch-partition} ``` sql ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper' ``` -从另一台服务器下载分区。 此查询仅适用于复制的表。 +从另一服务器上下载分区数据。仅支持可复制引擎表。 +该操作做了如下步骤: +1. 从指定数据分片上下载分区。在 path-in-zookeeper 这一参数你必须设置Zookeeper中该分片的path值。 +2. 然后将已下载的数据放到 `table_name` 表的 `detached` 目录下。通过 [ATTACH PARTITION\|PART](#alter_attach-partition)将数据加载到表中。 -查询执行以下操作: - -1. 从指定的分片下载分区。 在 ‘path-in-zookeeper’ 您必须在ZooKeeper中指定分片的路径。 -2. 然后查询将下载的数据放到 `detached` 的目录 `table_name` 桌子 使用 [ATTACH PARTITION\|PART](#alter_attach-partition) 查询将数据添加到表中。 - -例如: +示例: ``` sql ALTER TABLE users FETCH PARTITION 201902 FROM '/clickhouse/tables/01-01/visits'; ALTER TABLE users ATTACH PARTITION 201902; ``` -请注意: +注意: -- 该 `ALTER ... FETCH PARTITION` 查询不被复制。 它将分区放置在 `detached` 仅在本地服务器上的目录。 -- 该 `ALTER TABLE ... ATTACH` 复制查询。 它将数据添加到所有副本。 数据被添加到从副本之一 `detached` 目录,以及其他-从相邻的副本。 +- `ALTER ... FETCH PARTITION` 操作不支持复制,它仅在本地服务器上将分区移动到 `detached`目录。 +- `ALTER TABLE ... ATTACH`操作是可复制的。它将数据添加到所有副本。数据从某个副本的`detached` 目录中添加进来,然后添加到邻近的副本 -在下载之前,系统会检查分区是否存在并且表结构匹配。 从正常副本中自动选择最合适的副本。 +在开始下载之前,系统检查分区是否存在以及和表结构是否匹配。然后从健康的副本集中自动选择最合适的副本。 -虽然查询被调用 `ALTER TABLE`,它不会更改表结构,并且不会立即更改表中可用的数据。 +虽然操作叫做 `ALTER TABLE`,但是它并不能改变表结构,也不会立即改变表中可用的数据。 -#### MOVE PARTITION\|PART {#alter_move-partition} +#### 移动分区\|数据块 {#alter_move-partition} -将分区或数据部分移动到另一个卷或磁盘 `MergeTree`-发动机表。 看 [使用多个块设备进行数据存储](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). +将 `MergeTree`引擎表的分区或数据块移动到另外的卷/磁盘中。参见 [使用多个块设备存储数据](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) ``` sql ALTER TABLE table_name MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name' ``` -该 `ALTER TABLE t MOVE` 查询: + `ALTER TABLE t MOVE` 操作: -- 不复制,因为不同的副本可能具有不同的存储策略。 -- 如果未配置指定的磁盘或卷,则返回错误。 如果无法应用存储策略中指定的数据移动条件,Query还会返回错误。 -- 可以在返回错误的情况下,当要移动的数据已经被后台进程移动时,并发 `ALTER TABLE t MOVE` 查询或作为后台数据合并的结果。 在这种情况下,用户不应该执行任何其他操作。 +- 不支持复制,因为不同副本可以有不同的存储方式 +- 如果指定的磁盘或卷没有配置,返回错误。如果存储方式中设定的数据移动条件不能满足,该操作同样报错。 +- 这种情况也会报错:即将移动的数据已经由后台进程在进行移动操作时,并行的 `ALTER TABLE t MOVE`操作或者作为后台数据合并的结果。这种情形下用户不能任何额外的动作。 示例: @@ -431,79 +421,75 @@ ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd' #### 如何设置分区表达式 {#alter-how-to-specify-part-expr} -您可以在以下内容中指定分区表达式 `ALTER ... PARTITION` 以不同方式查询: +通过不同方式在 `ALTER ... PARTITION` 操作中设置分区表达式: -- 作为从值 `partition` 列 `system.parts` 桌子 例如, `ALTER TABLE visits DETACH PARTITION 201901`. -- 作为来自表列的表达式。 支持常量和常量表达式。 例如, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`. -- 使用分区ID。 分区ID是用作文件系统和ZooKeeper中分区名称的分区的字符串标识符(如果可能的话,人类可读)。 分区ID必须在指定 `PARTITION ID` 子句,用单引号。 例如, `ALTER TABLE visits DETACH PARTITION ID '201901'`. -- 在 [ALTER ATTACH PART](#alter_attach-partition) 和 [DROP DETACHED PART](#alter_drop-detached) 查询时,要指定部件的名称,请将字符串文字与来自 `name` 列 [系统。detached\_parts](../../operations/system-tables.md#system_tables-detached_parts) 桌子 例如, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. +- `system.parts`表 `partition`列的某个值,例如, `ALTER TABLE visits DETACH PARTITION 201901` +- 表的列表达式。支持常量及常量表达式。例如, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))` +- 使用分区ID。分区ID是字符串变量(可能的话有较好的可读性),在文件系统和ZooKeeper中作为分区名称。分区ID必须配置在 `PARTITION ID`中,用单引号包含,例如, `ALTER TABLE visits DETACH PARTITION ID '201901'` +- 在 [ALTER ATTACH PART](#alter_attach-partition) 和 [DROP DETACHED PART](#alter_drop-detached) 操作中,要配置块的名称,使用 [system.detached\_parts](../../operations/system-tables/detached_parts.md#system_tables-detached_parts)表中 `name`列的字符串值,例如: `ALTER TABLE visits ATTACH PART '201901_1_1_0'` -指定分区时引号的使用取决于分区表达式的类型。 例如,对于 `String` 类型,你必须在引号中指定其名称 (`'`). 为 `Date` 和 `Int*` 类型不需要引号。 -对于旧式表,您可以将分区指定为数字 `201901` 或者一个字符串 `'201901'`. 对于类型,新样式表的语法更严格(类似于值输入格式的解析器)。 +设置分区时,引号使用要看分区表达式的类型。例如,对于 `String`类型,需要设置用引号(`'`)包含的名称。对于 `Date` 和 `Int*`引号就不需要了。 +对于老式的表,可以用数值`201901` 或字符串 `'201901'`来设置分区。新式的表语法严格和类型一致(类似于VALUES输入的解析) -上述所有规则也适用于 [OPTIMIZE](misc.md#misc_operations-optimize) 查询。 如果在优化非分区表时需要指定唯一的分区,请设置表达式 `PARTITION tuple()`. 例如: +上述所有规则同样适用于 [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) 操作。在对未分区的表进行 OPTIMIZE 操作时,如果需要指定唯一的分区,这样设置表达式`PARTITION tuple()`。例如: ``` sql OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL; ``` -的例子 `ALTER ... PARTITION` 查询在测试中演示 [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) 和 [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql). + `ALTER ... PARTITION` 操作的示例在 [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) 和 [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql) 提供了演示。 -### 使用表TTL进行操作 {#manipulations-with-table-ttl} +### 更改表的TTL {#manipulations-with-table-ttl} -你可以改变 [表TTL](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) 请填写以下表格: +通过以下形式的请求可以修改 [table TTL](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) ``` sql ALTER TABLE table-name MODIFY TTL ttl-expression ``` -### ALTER查询的同步性 {#synchronicity-of-alter-queries} +### ALTER操作的同步性 {#synchronicity-of-alter-queries} -对于不可复制的表,所有 `ALTER` 查询是同步执行的。 对于可复制的表,查询仅添加相应操作的说明 `ZooKeeper`,并尽快执行操作本身。 但是,查询可以等待在所有副本上完成这些操作。 +对于不可复制的表,所有 `ALTER`操作都是同步执行的。对于可复制的表,ALTER操作会将指令添加到ZooKeeper中,然后会尽快的执行它们。然而,该操作可以等待其它所有副本将指令执行完毕。 -为 `ALTER ... ATTACH|DETACH|DROP` 查询,您可以使用 `replication_alter_partitions_sync` 设置设置等待。 -可能的值: `0` – do not wait; `1` – only wait for own execution (default); `2` – wait for all. +对于 `ALTER ... ATTACH|DETACH|DROP`操作,可以通过设置 `replication_alter_partitions_sync` 来启用等待。可用参数值: `0` – 不需要等待; `1` – 仅等待自己执行(默认); `2` – 等待所有节点 -### 突变 {#alter-mutations} +### Mutations {#alter-mutations} -突变是允许更改或删除表中的行的ALTER查询变体。 与标准相比 `UPDATE` 和 `DELETE` 用于点数据更改的查询,mutations适用于更改表中大量行的繁重操作。 支持的 `MergeTree` 表引擎系列,包括具有复制支持的引擎。 +Mutations是一类允许对表的行记录进行删除或更新的ALTER操作。相较于标准的 `UPDATE` 和 `DELETE` 用于少量行操作而言,Mutations用来对表的很多行进行重量级的操作。该操作支持 `MergeTree`系列表,包含支持复制功能的表。 -现有表可以按原样进行突变(无需转换),但是在将第一次突变应用于表之后,其元数据格式将与以前的服务器版本不兼容,并且无法回退到以前的版本。 +已有的表已经支持mutations操作(不需要转换)。但是在首次对表进行mutation操作以后,它的元数据格式变得和和之前的版本不兼容,并且不能回退到之前版本。 -当前可用的命令: +目前可用的命令: ``` sql ALTER TABLE [db.]table DELETE WHERE filter_expr ``` - -该 `filter_expr` 必须是类型 `UInt8`. 查询删除表中此表达式采用非零值的行。 +`filter_expr`必须是 `UInt8`型。该操作将删除表中 `filter_expr`表达式值为非0的列 ``` sql ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr ``` - -该 `filter_expr` 必须是类型 `UInt8`. 此查询将指定列的值更新为行中相应表达式的值。 `filter_expr` 取非零值。 使用以下命令将值转换为列类型 `CAST` 接线员 不支持更新用于计算主键或分区键的列。 +`filter_expr`必须是 `UInt8`型。该操作将更新表中各行 `filter_expr`表达式值为非0的指定列的值。通过 `CAST` 操作将值转换成对应列的类型。不支持对用于主键或分区键表达式的列进行更新操作。 ``` sql ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name ``` -查询将重新生成二级索引 `name` 在分区中 `partition_name`. +该操作更新 `partition_name`分区中的二级索引 `name`. +单次操作可以包含多个逗号分隔的命令。 -一个查询可以包含多个用逗号分隔的命令。 +对于 \*MergeTree引擎表,mutation操作通过重写整个数据块来实现。没有原子性保证 - 被mutation操作的数据会被替换,在mutation期间开始执行的`SELECT`查询能看到所有已经完成mutation的数据,以及还没有被mutation替换的数据。 -For\*MergeTree表的突变通过重写整个数据部分来执行。 没有原子性-部分被取代为突变的部分,只要他们准备好和 `SELECT` 在突变期间开始执行的查询将看到来自已经突变的部件的数据以及来自尚未突变的部件的数据。 +mutation总是按照它们的创建顺序来排序并以同样顺序在每个数据块中执行。mutation操作也会部分的和Insert操作一起排序 - 在mutation提交之前插入的数据会参与mutation操作,在mutation提交之后的插入的数据则不会参与mutation。注意mutation从来不会阻塞插入操作。 -突变完全按其创建顺序排序,并以该顺序应用于每个部分。 突变也使用插入进行部分排序-在提交突变之前插入到表中的数据将被突变,之后插入的数据将不会被突变。 请注意,突变不会以任何方式阻止插入。 +mutation操作在提交后(对于可复制表,添加到Zookeeper,对于不可复制表,添加到文件系统)立即返回。mutation操作本身是根据系统的配置参数异步执行的。要跟踪mutation的进度,可以使用系统表 [`system.mutations`](../../operations/system-tables/mutations.md#system_tables-mutations)。已经成功提交的mutation操作在服务重启后仍会继续执行。一旦mutation完成提交,就不能回退了,但是如果因为某种原因操作被卡住了,可以通过 [`KILL MUTATION`](../../sql-reference/statements/misc.md#kill-mutation)操作来取消它的执行。 -Mutation查询在添加mutation条目后立即返回(如果将复制的表复制到ZooKeeper,则将非复制的表复制到文件系统)。 突变本身使用系统配置文件设置异步执行。 要跟踪突变的进度,您可以使用 [`system.mutations`](../../operations/system-tables.md#system_tables-mutations) 桌子 即使重新启动ClickHouse服务器,成功提交的突变仍将继续执行。 一旦提交,没有办法回滚突变,但如果突变由于某种原因被卡住,可以使用 [`KILL MUTATION`](misc.md#kill-mutation) 查询。 +已完成的mutations记录不会立即删除(要保留的记录数量由 `finished_mutations_to_keep` 这一参数决定)。之前的mutation记录会被删除。 -已完成突变的条目不会立即删除(保留条目的数量由 `finished_mutations_to_keep` 存储引擎参数)。 旧的突变条目将被删除。 +## 修改用户 {#alter-user-statement} -## ALTER USER {#alter-user-statement} - -更改ClickHouse用户帐户. +修改CH的用户账号 ### 语法 {#alter-user-syntax} @@ -516,37 +502,37 @@ ALTER USER [IF EXISTS] name [ON CLUSTER cluster_name] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] ``` -### 产品描述 {#alter-user-dscr} +### 说明 {#alter-user-dscr} -使用 `ALTER USER` 你必须有 [ALTER USER](grant.md#grant-access-management) 特权 +要使用 `ALTER USER`,你必须拥有 [ALTER USER](../../sql-reference/statements/grant.md#grant-access-management) 操作的权限 -### 例 {#alter-user-examples} +### Examples {#alter-user-examples} -将授予的角色设置为默认值: +设置默认角色: ``` sql ALTER USER user DEFAULT ROLE role1, role2 ``` -如果以前未向用户授予角色,ClickHouse将引发异常。 +如果角色之前没分配给用户,CH会抛出异常。 -将所有授予的角色设置为默认值: +将所有分配的角色设为默认 ``` sql ALTER USER user DEFAULT ROLE ALL ``` -如果将来将某个角色授予某个用户,它将自动成为默认值。 +如果以后给用户分配了某个角色,它将自动成为默认角色 -将所有授予的角色设置为默认值,除非 `role1` 和 `role2`: +将除了 `role1` 和 `role2`之外的其它角色 设为默认 ``` sql ALTER USER user DEFAULT ROLE ALL EXCEPT role1, role2 ``` -## ALTER ROLE {#alter-role-statement} +## 修改角色 {#alter-role-statement} -更改角色。 +修改角色. ### 语法 {#alter-role-syntax} @@ -556,9 +542,10 @@ ALTER ROLE [IF EXISTS] name [ON CLUSTER cluster_name] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] ``` -## ALTER ROW POLICY {#alter-row-policy-statement} +## 修改row policy {#alter-row-policy-statement} -更改行策略。 + +修改row policy. ### 语法 {#alter-row-policy-syntax} @@ -571,9 +558,9 @@ ALTER [ROW] POLICY [IF EXISTS] name [ON CLUSTER cluster_name] ON [database.]tabl [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` -## ALTER QUOTA {#alter-quota-statement} +## 修改配额quotas {#alter-quota-statement} -更改配额。 +修改配额quotas. ### 语法 {#alter-quota-syntax} @@ -587,9 +574,9 @@ ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name] [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` -## ALTER SETTINGS PROFILE {#alter-settings-profile-statement} +## 修改settings配置 {#alter-settings-profile-statement} -更改配额。 +修改settings配置. ### 语法 {#alter-settings-profile-syntax} @@ -599,4 +586,4 @@ ALTER SETTINGS PROFILE [IF EXISTS] name [ON CLUSTER cluster_name] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...] ``` -[原始文章](https://clickhouse.tech/docs/en/query_language/alter/) +[Original article](https://clickhouse.tech/docs/en/query_language/alter/) diff --git a/docs/zh/sql-reference/statements/grant.md b/docs/zh/sql-reference/statements/grant.md deleted file mode 120000 index f2acbe125b4..00000000000 --- a/docs/zh/sql-reference/statements/grant.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/statements/grant.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/grant.md b/docs/zh/sql-reference/statements/grant.md new file mode 100644 index 00000000000..f8d85679fa3 --- /dev/null +++ b/docs/zh/sql-reference/statements/grant.md @@ -0,0 +1,472 @@ +--- +toc_priority: 39 +toc_title: 授权操作 +--- + +# 授权 {#grant} +- 给ClickHouse的用户或角色赋予 [权限](#grant-privileges) +- 将角色分配给用户或其他角色 + +取消权限,使用 [REVOKE](../../sql-reference/statements/revoke.md)语句。查看已授权的权限请使用 [SHOW GRANTS](../../sql-reference/statements/show.md#show-grants-statement)。 + +## 授权操作语法 {#grant-privigele-syntax} + +``` sql +GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] +``` + +- `privilege` — 权限类型 +- `role` — 用户角色 +- `user` — 用户账号 + +`WITH GRANT OPTION` 授予 `user` 或 `role`执行 `GRANT` 操作的权限。用户可将在自身权限范围内的权限进行授权 + +## 角色分配的语法 {#assign-role-syntax} + +``` sql +GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION] +``` + +- `role` — 角色 +- `user` — 用户 + + `WITH ADMIN OPTION` 授予 `user` 或 `role` 执行[ADMIN OPTION](#admin-option-privilege) 的权限 + +## 用法 {#grant-usage} + +使用 `GRANT`,你的账号必须有 `GRANT OPTION`的权限。用户只能将在自身权限范围内的权限进行授权 + +例如,管理员有权通过下面的语句给 `john`账号添加授权 + +``` sql +GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION +``` + +这意味着 `john` 有权限执行以下操作: + +- `SELECT x,y FROM db.table`. +- `SELECT x FROM db.table`. +- `SELECT y FROM db.table`. + +`john` 不能执行`SELECT z FROM db.table`。同样的 `SELECT * FROMdb.table` 也是不允许的。执行这个查询时,CH不会返回任何数据,甚至 `x` 和 `y`列。唯一的例外是,当表仅包含 `x`和`y`列时。这种情况下,CH返回所有数据。 + +同样 `john` 有权执行 `GRANT OPTION`,因此他能给其它账号进行和自己账号权限范围相同的授权。 + +可以使用`*` 号代替表或库名进行授权操作。例如, `GRANT SELECT ONdb.* TO john` 操作运行 `john`对 `db`库的所有表执行 `SELECT`查询。同样,你可以忽略库名。在这种情形下,权限将指向当前的数据库。例如, `GRANT SELECT ON* to john` 对当前数据库的所有表指定授权, `GARNT SELECT ON mytable to john`对当前数据库的 `mytable`表进行授权。 + +访问 `systen`数据库总是被允许的(因为这个数据库用来处理sql操作) +可以一次给多个账号进行多种授权操作。 `GRANT SELECT,INSERT ON *.* TO john,robin` 允许 `john`和`robin` 账号对任意数据库的任意表执行 `INSERT`和 `SELECT`操作。 + +## 权限 {#grant-privileges} + +权限是指执行特定操作的许可 + +权限有层级结构。一组允许的操作依赖相应的权限范围。 + +权限的层级: + +- [SELECT](#grant-select) +- [INSERT](#grant-insert) +- [ALTER](#grant-alter) + - `ALTER TABLE` + - `ALTER UPDATE` + - `ALTER DELETE` + - `ALTER COLUMN` + - `ALTER ADD COLUMN` + - `ALTER DROP COLUMN` + - `ALTER MODIFY COLUMN` + - `ALTER COMMENT COLUMN` + - `ALTER CLEAR COLUMN` + - `ALTER RENAME COLUMN` + - `ALTER INDEX` + - `ALTER ORDER BY` + - `ALTER ADD INDEX` + - `ALTER DROP INDEX` + - `ALTER MATERIALIZE INDEX` + - `ALTER CLEAR INDEX` + - `ALTER CONSTRAINT` + - `ALTER ADD CONSTRAINT` + + - `ALTER DROP CONSTRAINT` + - `ALTER TTL` + - `ALTER MATERIALIZE TTL` + - `ALTER SETTINGS` + - `ALTER MOVE PARTITION` + - `ALTER FETCH PARTITION` + - `ALTER FREEZE PARTITION` + - `ALTER VIEW` + - `ALTER VIEW REFRESH` + - `ALTER VIEW MODIFY QUERY` +- [CREATE](#grant-create) + - `CREATE DATABASE` + - `CREATE TABLE` + - `CREATE VIEW` + - `CREATE DICTIONARY` + - `CREATE TEMPORARY TABLE` +- [DROP](#grant-drop) + - `DROP DATABASE` + - `DROP TABLE` + - `DROP VIEW` + - `DROP DICTIONARY` +- [TRUNCATE](#grant-truncate) +- [OPTIMIZE](#grant-optimize) +- [SHOW](#grant-show) + - `SHOW DATABASES` + - `SHOW TABLES` + - `SHOW COLUMNS` + - `SHOW DICTIONARIES` +- [KILL QUERY](#grant-kill-query) +- [ACCESS MANAGEMENT](#grant-access-management) + - `CREATE USER` + - `ALTER USER` + - `DROP USER` + - `CREATE ROLE` + - `ALTER ROLE` + - `DROP ROLE` + - `CREATE ROW POLICY` + - `ALTER ROW POLICY` + - `DROP ROW POLICY` + - `CREATE QUOTA` + - `ALTER QUOTA` + - `DROP QUOTA` + - `CREATE SETTINGS PROFILE` + - `ALTER SETTINGS PROFILE` + - `DROP SETTINGS PROFILE` + - `SHOW ACCESS` + - `SHOW_USERS` + - `SHOW_ROLES` + - `SHOW_ROW_POLICIES` + - `SHOW_QUOTAS` + - `SHOW_SETTINGS_PROFILES` + - `ROLE ADMIN` +- [SYSTEM](#grant-system) + - `SYSTEM SHUTDOWN` + - `SYSTEM DROP CACHE` + - `SYSTEM DROP DNS CACHE` + - `SYSTEM DROP MARK CACHE` + - `SYSTEM DROP UNCOMPRESSED CACHE` + - `SYSTEM RELOAD` + - `SYSTEM RELOAD CONFIG` + - `SYSTEM RELOAD DICTIONARY` + - `SYSTEM RELOAD EMBEDDED DICTIONARIES` + - `SYSTEM MERGES` + - `SYSTEM TTL MERGES` + - `SYSTEM FETCHES` + - `SYSTEM MOVES` + - `SYSTEM SENDS` + - `SYSTEM DISTRIBUTED SENDS` + - `SYSTEM REPLICATED SENDS` + - `SYSTEM REPLICATION QUEUES` + - `SYSTEM SYNC REPLICA` + - `SYSTEM RESTART REPLICA` + - `SYSTEM FLUSH` + - `SYSTEM FLUSH DISTRIBUTED` + - `SYSTEM FLUSH LOGS` +- [INTROSPECTION](#grant-introspection) + - `addressToLine` + - `addressToSymbol` + - `demangle` +- [SOURCES](#grant-sources) + - `FILE` + - `URL` + - `REMOTE` + - `YSQL` + - `ODBC` + - `JDBC` + - `HDFS` + - `S3` +- [dictGet](#grant-dictget) + +如何对待该层级的示例: +- `ALTER` 权限包含所有其它 `ALTER *` 的权限 +- `ALTER CONSTRAINT` 包含 `ALTER ADD CONSTRAINT` 和 `ALTER DROP CONSTRAINT`权限 + +权限被应用到不同级别。 Knowing of a level suggests syntax available for privilege. + +级别(由低到高): + +- `COLUMN` - 可以授权到列,表,库或者全局 +- `TABLE` - 可以授权到表,库,或全局 +- `VIEW` - 可以授权到视图,库,或全局 +- `DICTIONARY` - 可以授权到字典,库,或全局 +- `DATABASE` - 可以授权到数据库或全局 +- `GLABLE` - 可以授权到全局 +- `GROUP` - 不同级别的权限分组。当授予 `GROUP`级别的权限时, 根据所用的语法,只有对应分组中的权限才会被分配。 + +允许的语法示例: + +- `GRANT SELECT(x) ON db.table TO user` +- `GRANT SELECT ON db.* TO user` + +不允许的语法示例: + +- `GRANT CREATE USER(x) ON db.table TO user` +- `GRANT CREATE USER ON db.* TO user` + +特殊的权限 `ALL` 将所有权限授予给用户或角色 + +默认情况下,一个用户账号或角色没有可授予的权限 + +如果用户或角色没有任何权限,它将显示为 `NONE`权限 + +有些操作根据它们的实现需要一系列的权限。例如, [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)操作需要以下权限来执行:`SELECT`, `CREATE TABLE`, `INSERT` 和 `DROP TABLE`。 + +### SELECT {#grant-select} + +允许执行 [SELECT](../../sql-reference/statements/select/index.md) 查询 + +权限级别: `COLUMN`. + +**说明** + +有该权限的用户可以对指定的表和库的指定列进行 `SELECT`查询。如果用户查询包含了其它列则结果不返回数据。 + +考虑如下的授权语句: + +``` sql +GRANT SELECT(x,y) ON db.table TO john +``` + +该权限允许 `john` 对 `db.table`表的列`x`,`y`执行任意 `SELECT `查询,例如 `SELECT x FROM db.table`。 `john` 不能执行 `SELECT z FROM db.table`以及 `SELECT * FROM db.table`。执行这个查询时,CH不会返回任何数据,甚至 `x` 和 `y`列。唯一的例外是,当表仅包含 `x`和`y`列时。这种情况下,CH返回所有数据。 + +### INSERT {#grant-insert} + +允许执行 [INSERT](../../sql-reference/statements/insert-into.md) 操作. + +权限级别: `COLUMN`. + +**说明** + +有该权限的用户可以对指定的表和库的指定列进行 `INSERT`操作。如果用户查询包含了其它列则结果不返回数据。 + +**示例** + +``` sql +GRANT INSERT(x,y) ON db.table TO john +``` + +该权限允许 `john` 对 `db.table`表的列`x`,`y`执行数据插入操作 + +### ALTER {#grant-alter} + +允许根据下列权限层级执行 [ALTER](../../sql-reference/statements/alter.md)操作 + +- `ALTER`. 级别: `COLUMN`. + - `ALTER TABLE`. 级别: `GROUP` + - `ALTER UPDATE`. 级别: `COLUMN`. 别名: `UPDATE` + - `ALTER DELETE`. 级别: `COLUMN`. 别名: `DELETE` + - `ALTER COLUMN`. 级别: `GROUP` + - `ALTER ADD COLUMN`. 级别: `COLUMN`. 别名: `ADD COLUMN` + - `ALTER DROP COLUMN`. 级别: `COLUMN`. 别名: `DROP COLUMN` + - `ALTER MODIFY COLUMN`. 级别: `COLUMN`. 别名: `MODIFY COLUMN` + - `ALTER COMMENT COLUMN`. 级别: `COLUMN`. 别名: `COMMENT COLUMN` + - `ALTER CLEAR COLUMN`. 级别: `COLUMN`. 别名: `CLEAR COLUMN` + - `ALTER RENAME COLUMN`. 级别: `COLUMN`. 别名: `RENAME COLUMN` + - `ALTER INDEX`. 级别: `GROUP`. 别名: `INDEX` + - `ALTER ORDER BY`. 级别: `TABLE`. 别名: `ALTER MODIFY ORDER BY`, `MODIFY ORDER BY` + - `ALTER ADD INDEX`. 级别: `TABLE`. 别名: `ADD INDEX` + - `ALTER DROP INDEX`. 级别: `TABLE`. 别名: `DROP INDEX` + - `ALTER MATERIALIZE INDEX`. 级别: `TABLE`. 别名: `MATERIALIZE INDEX` + - `ALTER CLEAR INDEX`. 级别: `TABLE`. 别名: `CLEAR INDEX` + - `ALTER CONSTRAINT`. 级别: `GROUP`. 别名: `CONSTRAINT` + - `ALTER ADD CONSTRAINT`. 级别: `TABLE`. 别名: `ADD CONSTRAINT` + - `ALTER DROP CONSTRAINT`. 级别: `TABLE`. 别名: `DROP CONSTRAINT` + - `ALTER TTL`. 级别: `TABLE`. 别名: `ALTER MODIFY TTL`, `MODIFY TTL` + - `ALTER MATERIALIZE TTL`. 级别: `TABLE`. 别名: `MATERIALIZE TTL` + - `ALTER SETTINGS`. 级别: `TABLE`. 别名: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING` + - `ALTER MOVE PARTITION`. 级别: `TABLE`. 别名: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART` + - `ALTER FETCH PARTITION`. 级别: `TABLE`. 别名: `FETCH PARTITION` + - `ALTER FREEZE PARTITION`. 级别: `TABLE`. 别名: `FREEZE PARTITION` + - `ALTER VIEW` 级别: `GROUP` + - `ALTER VIEW REFRESH`. 级别: `VIEW`. 别名: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` + - `ALTER VIEW MODIFY QUERY`. 级别: `VIEW`. 别名: `ALTER TABLE MODIFY QUERY` + +如何对待该层级的示例: +- `ALTER` 权限包含所有其它 `ALTER *` 的权限 +- `ALTER CONSTRAINT` 包含 `ALTER ADD CONSTRAINT` 和 `ALTER DROP CONSTRAINT`权限 + +**备注** + +- `MODIFY SETTING`权限允许修改表的引擎设置。它不会影响服务的配置参数 +- `ATTACH` 操作需要 [CREATE](#grant-create) 权限. +- `DETACH` 操作需要 [DROP](#grant-drop) 权限. +- 要通过 [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation) 操作来终止mutation, 你需要有发起mutation操作的权限。例如,当你想终止 `ALTER UPDATE`操作时,需要有 `ALTER UPDATE`, `ALTER TABLE`, 或 `ALTER`权限 + +### CREATE {#grant-create} + +允许根据下面的权限层级来执行 [CREATE](../../sql-reference/statements/create.md) 和 [ATTACH](../../sql-reference/statements/misc.md#attach) DDL语句: + +- `CREATE`. 级别: `GROUP` + - `CREATE DATABASE`. 级别: `DATABASE` + - `CREATE TABLE`. 级别: `TABLE` + - `CREATE VIEW`. 级别: `VIEW` + - `CREATE DICTIONARY`. 级别: `DICTIONARY` + - `CREATE TEMPORARY TABLE`. 级别: `GLOBAL` + +**备注** + +- 删除已创建的表,用户需要 [DROP](#grant-drop)权限 + +### DROP {#grant-drop} + +允许根据下面的权限层级来执行 [DROP](../../sql-reference/statements/misc.md#drop) 和 [DETACH](../../sql-reference/statements/misc.md#detach) : + +- `DROP`. 级别: + - `DROP DATABASE`. 级别: `DATABASE` + - `DROP TABLE`. 级别: `TABLE` + - `DROP VIEW`. 级别: `VIEW` + - `DROP DICTIONARY`. 级别: `DICTIONARY` + +### TRUNCATE {#grant-truncate} + +允许执行 [TRUNCATE](../../sql-reference/statements/misc.md#truncate-statement) . + +权限级别: `TABLE`. + +### OPTIMIZE {#grant-optimize} + +允许执行 [OPTIMIZE TABLE](../../sql-reference/statements/misc.md#misc_operations-optimize) . + +权限级别: `TABLE`. + +### SHOW {#grant-show} + +允许根据下面的权限层级来执行 `SHOW`, `DESCRIBE`, `USE`, 和 `EXISTS` : + +- `SHOW`. 级别: `GROUP` + - `SHOW DATABASES`. 级别: `DATABASE`. 允许执行 `SHOW DATABASES`, `SHOW CREATE DATABASE`, `USE ` . + - `SHOW TABLES`. 级别: `TABLE`. 允许执行 `SHOW TABLES`, `EXISTS `, `CHECK
` . + - `SHOW COLUMNS`. 级别: `COLUMN`. 允许执行 `SHOW CREATE TABLE`, `DESCRIBE` . + - `SHOW DICTIONARIES`. 级别: `DICTIONARY`. 允许执行 `SHOW DICTIONARIES`, `SHOW CREATE DICTIONARY`, `EXISTS ` . + +**备注** + +用户同时拥有 `SHOW`权限,当用户对指定表,字典或数据库有其它的权限时。 + +### KILL QUERY {#grant-kill-query} + +允许根据下面的权限层级来执行 [KILL](../../sql-reference/statements/misc.md#kill-query-statement): + +权限级别: `GLOBAL`. + +**备注** + +`KILL QUERY` 权限允许用户终止其它用户提交的操作。 + +### 访问管理 {#grant-access-management} + +允许用户执行管理用户/角色和行规则的操作: + +- `ACCESS MANAGEMENT`. 级别: `GROUP` + - `CREATE USER`. 级别: `GLOBAL` + - `ALTER USER`. 级别: `GLOBAL` + - `DROP USER`. 级别: `GLOBAL` + - `CREATE ROLE`. 级别: `GLOBAL` + - `ALTER ROLE`. 级别: `GLOBAL` + - `DROP ROLE`. 级别: `GLOBAL` + - `ROLE ADMIN`. 级别: `GLOBAL` + - `CREATE ROW POLICY`. 级别: `GLOBAL`. 别名: `CREATE POLICY` + - `ALTER ROW POLICY`. 级别: `GLOBAL`. 别名: `ALTER POLICY` + - `DROP ROW POLICY`. 级别: `GLOBAL`. 别名: `DROP POLICY` + - `CREATE QUOTA`. 级别: `GLOBAL` + - `ALTER QUOTA`. 级别: `GLOBAL` + - `DROP QUOTA`. 级别: `GLOBAL` + - `CREATE SETTINGS PROFILE`. 级别: `GLOBAL`. 别名: `CREATE PROFILE` + - `ALTER SETTINGS PROFILE`. 级别: `GLOBAL`. 别名: `ALTER PROFILE` + - `DROP SETTINGS PROFILE`. 级别: `GLOBAL`. 别名: `DROP PROFILE` + - `SHOW ACCESS`. 级别: `GROUP` + - `SHOW_USERS`. 级别: `GLOBAL`. 别名: `SHOW CREATE USER` + - `SHOW_ROLES`. 级别: `GLOBAL`. 别名: `SHOW CREATE ROLE` + - `SHOW_ROW_POLICIES`. 级别: `GLOBAL`. 别名: `SHOW POLICIES`, `SHOW CREATE ROW POLICY`, `SHOW CREATE POLICY` + - `SHOW_QUOTAS`. 级别: `GLOBAL`. 别名: `SHOW CREATE QUOTA` + - `SHOW_SETTINGS_PROFILES`. 级别: `GLOBAL`. 别名: `SHOW PROFILES`, `SHOW CREATE SETTINGS PROFILE`, `SHOW CREATE PROFILE` + +`ROLE ADMIN` 权限允许用户对角色进行分配以及撤回,包括根据管理选项尚未分配的角色 + +### SYSTEM {#grant-system} + +允许根据下面的权限层级来执行 [SYSTEM](../../sql-reference/statements/system.md) : + +- `SYSTEM`. 级别: `GROUP` + - `SYSTEM SHUTDOWN`. 级别: `GLOBAL`. 别名: `SYSTEM KILL`, `SHUTDOWN` + - `SYSTEM DROP CACHE`. 别名: `DROP CACHE` + - `SYSTEM DROP DNS CACHE`. 级别: `GLOBAL`. 别名: `SYSTEM DROP DNS`, `DROP DNS CACHE`, `DROP DNS` + - `SYSTEM DROP MARK CACHE`. 级别: `GLOBAL`. 别名: `SYSTEM DROP MARK`, `DROP MARK CACHE`, `DROP MARKS` + - `SYSTEM DROP UNCOMPRESSED CACHE`. 级别: `GLOBAL`. 别名: `SYSTEM DROP UNCOMPRESSED`, `DROP UNCOMPRESSED CACHE`, `DROP UNCOMPRESSED` + - `SYSTEM RELOAD`. 级别: `GROUP` + - `SYSTEM RELOAD CONFIG`. 级别: `GLOBAL`. 别名: `RELOAD CONFIG` + - `SYSTEM RELOAD DICTIONARY`. 级别: `GLOBAL`. 别名: `SYSTEM RELOAD DICTIONARIES`, `RELOAD DICTIONARY`, `RELOAD DICTIONARIES` + - `SYSTEM RELOAD EMBEDDED DICTIONARIES`. 级别: `GLOBAL`. 别名: R`ELOAD EMBEDDED DICTIONARIES` + - `SYSTEM MERGES`. 级别: `TABLE`. 别名: `SYSTEM STOP MERGES`, `SYSTEM START MERGES`, `STOP MERGES`, `START MERGES` + - `SYSTEM TTL MERGES`. 级别: `TABLE`. 别名: `SYSTEM STOP TTL MERGES`, `SYSTEM START TTL MERGES`, `STOP TTL MERGES`, `START TTL MERGES` + - `SYSTEM FETCHES`. 级别: `TABLE`. 别名: `SYSTEM STOP FETCHES`, `SYSTEM START FETCHES`, `STOP FETCHES`, `START FETCHES` + - `SYSTEM MOVES`. 级别: `TABLE`. 别名: `SYSTEM STOP MOVES`, `SYSTEM START MOVES`, `STOP MOVES`, `START MOVES` + - `SYSTEM SENDS`. 级别: `GROUP`. 别名: `SYSTEM STOP SENDS`, `SYSTEM START SENDS`, `STOP SENDS`, `START SENDS` + - `SYSTEM DISTRIBUTED SENDS`. 级别: `TABLE`. 别名: `SYSTEM STOP DISTRIBUTED SENDS`, `SYSTEM START DISTRIBUTED SENDS`, `STOP DISTRIBUTED SENDS`, `START DISTRIBUTED SENDS` + - `SYSTEM REPLICATED SENDS`. 级别: `TABLE`. 别名: `SYSTEM STOP REPLICATED SENDS`, `SYSTEM START REPLICATED SENDS`, `STOP REPLICATED SENDS`, `START REPLICATED SENDS` + - `SYSTEM REPLICATION QUEUES`. 级别: `TABLE`. 别名: `SYSTEM STOP REPLICATION QUEUES`, `SYSTEM START REPLICATION QUEUES`, `STOP REPLICATION QUEUES`, `START REPLICATION QUEUES` + - `SYSTEM SYNC REPLICA`. 级别: `TABLE`. 别名: `SYNC REPLICA` + - `SYSTEM RESTART REPLICA`. 级别: `TABLE`. 别名: `RESTART REPLICA` + - `SYSTEM FLUSH`. 级别: `GROUP` + - `SYSTEM FLUSH DISTRIBUTED`. 级别: `TABLE`. 别名: `FLUSH DISTRIBUTED` + - `SYSTEM FLUSH LOGS`. 级别: `GLOBAL`. 别名: `FLUSH LOGS` + + +`SYSTEM RELOAD EMBEDDED DICTIONARIES` 权限隐式的通过操作 `SYSTEM RELOAD DICTIONARY ON *.*` 来进行授权. + +### 内省introspection {#grant-introspection} + +允许使用 [introspection](../../operations/optimizing-performance/sampling-query-profiler.md) 函数. + +- `INTROSPECTION`. 级别: `GROUP`. 别名: `INTROSPECTION FUNCTIONS` + - `addressToLine`. 级别: `GLOBAL` + - `addressToSymbol`. 级别: `GLOBAL` + - `demangle`. 级别: `GLOBAL` + +### 数据源 {#grant-sources} + +允许在 [table engines](../../engines/table-engines/index.md) 和 [table functions](../../sql-reference/table-functions/index.md#table-functions)中使用外部数据源。 + +- `SOURCES`. 级别: `GROUP` + - `FILE`. 级别: `GLOBAL` + - `URL`. 级别: `GLOBAL` + - `REMOTE`. 级别: `GLOBAL` + - `YSQL`. 级别: `GLOBAL` + - `ODBC`. 级别: `GLOBAL` + - `JDBC`. 级别: `GLOBAL` + - `HDFS`. 级别: `GLOBAL` + - `S3`. 级别: `GLOBAL` + +`SOURCES` 权限允许使用所有数据源。当然也可以单独对每个数据源进行授权。要使用数据源时,还需要额外的权限。 + +示例: + +- 创建 [MySQL table engine](../../engines/table-engines/integrations/mysql.md), 需要 `CREATE TABLE (ON db.table_name)` 和 `MYSQL`权限。4 +- 要使用 [mysql table function](../../sql-reference/table-functions/mysql.md),需要 `CREATE TEMPORARY TABLE` 和 `MYSQL` 权限 + +### dictGet {#grant-dictget} + +- `dictGet`. 别名: `dictHas`, `dictGetHierarchy`, `dictIsIn` + +允许用户执行 [dictGet](../../sql-reference/functions/ext-dict-functions.md#dictget), [dictHas](../../sql-reference/functions/ext-dict-functions.md#dicthas), [dictGetHierarchy](../../sql-reference/functions/ext-dict-functions.md#dictgethierarchy), [dictIsIn](../../sql-reference/functions/ext-dict-functions.md#dictisin) 等函数. + +权限级别: `DICTIONARY`. + +**示例** + +- `GRANT dictGet ON mydb.mydictionary TO john` +- `GRANT dictGet ON mydictionary TO john` + +### ALL {#grant-all} + +对规定的实体(列,表,库等)给用户或角色授予所有权限 + +### NONE {#grant-none} + +不授予任何权限 + +### ADMIN OPTION {#admin-option-privilege} + +`ADMIN OPTION` 权限允许用户将他们的角色分配给其它用户 + +[原始文档](https://clickhouse.tech/docs/en/query_language/grant/) diff --git a/docs/zh/sql-reference/statements/misc.md b/docs/zh/sql-reference/statements/misc.md index aa10350280f..5320fbd0869 100644 --- a/docs/zh/sql-reference/statements/misc.md +++ b/docs/zh/sql-reference/statements/misc.md @@ -223,7 +223,7 @@ KILL MUTATION [ON CLUSTER cluster] [FORMAT format] ``` -尝试取消和删除 [突变](alter.md#alter-mutations) 当前正在执行。 要取消的突变选自 [`system.mutations`](../../operations/system-tables.md#system_tables-mutations) 表使用由指定的过滤器 `WHERE` 《公约》条款 `KILL` 查询。 +尝试取消和删除 [突变](alter.md#alter-mutations) 当前正在执行。 要取消的突变选自 [`system.mutations`](../../operations/system-tables/mutations.md#system_tables-mutations) 表使用由指定的过滤器 `WHERE` 《公约》条款 `KILL` 查询。 测试查询 (`TEST`)仅检查用户的权限并显示要停止的查询列表。 diff --git a/docs/zh/sql-reference/statements/revoke.md b/docs/zh/sql-reference/statements/revoke.md deleted file mode 120000 index 4321fdb14a7..00000000000 --- a/docs/zh/sql-reference/statements/revoke.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/sql-reference/statements/revoke.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/revoke.md b/docs/zh/sql-reference/statements/revoke.md new file mode 100644 index 00000000000..f784b134a3b --- /dev/null +++ b/docs/zh/sql-reference/statements/revoke.md @@ -0,0 +1,48 @@ +--- +toc_priority: 40 +toc_title: REVOKE +--- + +# 权限取消 {#revoke} + +取消用户或角色的权限 + +## 语法 {#revoke-语法} + +**取消用户的权限** + +``` sql +REVOKE [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} FROM {user | CURRENT_USER} [,...] | ALL | ALL EXCEPT {user | CURRENT_USER} [,...] +``` + +**取消用户的角色** + +``` sql +REVOKE [ON CLUSTER cluster_name] [ADMIN OPTION FOR] role [,...] FROM {user | role | CURRENT_USER} [,...] | ALL | ALL EXCEPT {user_name | role_name | CURRENT_USER} [,...] +``` + +## 说明 {#revoke-description} + +要取消某些权限,可使用比要撤回的权限更大范围的权限。例如,当用户有 `SELECT (x,y)`权限时,管理员可执行 `REVOKE SELECT(x,y) ...`, 或 `REVOKE SELECT * ...`, 甚至是 `REVOKE ALL PRIVILEGES ...`来取消原有权限。 + +### 取消部分权限 {#partial-revokes-dscr} + +可以取消部分权限。例如,当用户有 `SELECT *.*` 权限时,可以通过授予对部分库或表的读取权限来撤回原有权限。 + +## 示例 {#revoke-example} + +授权 `john`账号能查询所有库的所有表,除了 `account`库。 + +``` sql +GRANT SELECT ON *.* TO john; +REVOKE SELECT ON accounts.* FROM john; +``` + +授权 `mira`账号能查询 `accounts.staff`表的所有列,除了 `wage`这一列。 + +``` sql +GRANT SELECT ON accounts.staff TO mira; +REVOKE SELECT(wage) ON accounts.staff FROM mira; +``` + +{## [原始文档](https://clickhouse.tech/docs/en/operations/settings/settings/) ##} diff --git a/docs/zh/sql-reference/statements/select/array-join.md b/docs/zh/sql-reference/statements/select/array-join.md deleted file mode 120000 index c341801e419..00000000000 --- a/docs/zh/sql-reference/statements/select/array-join.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/array-join.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/array-join.md b/docs/zh/sql-reference/statements/select/array-join.md new file mode 100644 index 00000000000..e84682838f4 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/array-join.md @@ -0,0 +1,283 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: ARRAY JOIN +--- + +# ARRAY JOIN子句 {#select-array-join-clause} + +对于包含数组列的表来说,这是一种常见的操作,用于生成一个新表,该表具有包含该初始列的每个单独数组元素的列,而其他列的值将被重复。 这是什么基本情况 `ARRAY JOIN` 子句有 + +它的名字来自这样一个事实,即它可以被视为执行 `JOIN` 具有数组或嵌套数据结构。 意图类似于 [arrayJoin](../../../sql-reference/functions/array-join.md#functions_arrayjoin) 功能,但该子句功能更广泛。 + +语法: + +``` sql +SELECT +FROM +[LEFT] ARRAY JOIN +[WHERE|PREWHERE ] +... +``` + +您只能指定一个 `ARRAY JOIN` a中的条款 `SELECT` 查询。 + +支持的类型 `ARRAY JOIN` 下面列出: + +- `ARRAY JOIN` -在基本情况下,空数组不包括在结果中 `JOIN`. +- `LEFT ARRAY JOIN` -的结果 `JOIN` 包含具有空数组的行。 空数组的值设置为数组元素类型的默认值(通常为0、空字符串或NULL)。 + +## 基本数组连接示例 {#basic-array-join-examples} + +下面的例子演示的用法 `ARRAY JOIN` 和 `LEFT ARRAY JOIN` 条款 让我们创建一个表 [阵列](../../../sql-reference/data-types/array.md) 键入column并在其中插入值: + +``` sql +CREATE TABLE arrays_test +( + s String, + arr Array(UInt8) +) ENGINE = Memory; + +INSERT INTO arrays_test +VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []); +``` + +``` text +┌─s───────────┬─arr─────┐ +│ Hello │ [1,2] │ +│ World │ [3,4,5] │ +│ Goodbye │ [] │ +└─────────────┴─────────┘ +``` + +下面的例子使用 `ARRAY JOIN` 条款: + +``` sql +SELECT s, arr +FROM arrays_test +ARRAY JOIN arr; +``` + +``` text +┌─s─────┬─arr─┐ +│ Hello │ 1 │ +│ Hello │ 2 │ +│ World │ 3 │ +│ World │ 4 │ +│ World │ 5 │ +└───────┴─────┘ +``` + +下一个示例使用 `LEFT ARRAY JOIN` 条款: + +``` sql +SELECT s, arr +FROM arrays_test +LEFT ARRAY JOIN arr; +``` + +``` text +┌─s───────────┬─arr─┐ +│ Hello │ 1 │ +│ Hello │ 2 │ +│ World │ 3 │ +│ World │ 4 │ +│ World │ 5 │ +│ Goodbye │ 0 │ +└─────────────┴─────┘ +``` + +## 使用别名 {#using-aliases} + +可以为数组中的别名指定 `ARRAY JOIN` 条款 在这种情况下,数组项目可以通过此别名访问,但数组本身可以通过原始名称访问。 示例: + +``` sql +SELECT s, arr, a +FROM arrays_test +ARRAY JOIN arr AS a; +``` + +``` text +┌─s─────┬─arr─────┬─a─┐ +│ Hello │ [1,2] │ 1 │ +│ Hello │ [1,2] │ 2 │ +│ World │ [3,4,5] │ 3 │ +│ World │ [3,4,5] │ 4 │ +│ World │ [3,4,5] │ 5 │ +└───────┴─────────┴───┘ +``` + +使用别名,您可以执行 `ARRAY JOIN` 与外部阵列。 例如: + +``` sql +SELECT s, arr_external +FROM arrays_test +ARRAY JOIN [1, 2, 3] AS arr_external; +``` + +``` text +┌─s───────────┬─arr_external─┐ +│ Hello │ 1 │ +│ Hello │ 2 │ +│ Hello │ 3 │ +│ World │ 1 │ +│ World │ 2 │ +│ World │ 3 │ +│ Goodbye │ 1 │ +│ Goodbye │ 2 │ +│ Goodbye │ 3 │ +└─────────────┴──────────────┘ +``` + +多个数组可以在逗号分隔 `ARRAY JOIN` 条款 在这种情况下, `JOIN` 与它们同时执行(直接和,而不是笛卡尔积)。 请注意,所有数组必须具有相同的大小。 示例: + +``` sql +SELECT s, arr, a, num, mapped +FROM arrays_test +ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num, arrayMap(x -> x + 1, arr) AS mapped; +``` + +``` text +┌─s─────┬─arr─────┬─a─┬─num─┬─mapped─┐ +│ Hello │ [1,2] │ 1 │ 1 │ 2 │ +│ Hello │ [1,2] │ 2 │ 2 │ 3 │ +│ World │ [3,4,5] │ 3 │ 1 │ 4 │ +│ World │ [3,4,5] │ 4 │ 2 │ 5 │ +│ World │ [3,4,5] │ 5 │ 3 │ 6 │ +└───────┴─────────┴───┴─────┴────────┘ +``` + +下面的例子使用 [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) 功能: + +``` sql +SELECT s, arr, a, num, arrayEnumerate(arr) +FROM arrays_test +ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num; +``` + +``` text +┌─s─────┬─arr─────┬─a─┬─num─┬─arrayEnumerate(arr)─┐ +│ Hello │ [1,2] │ 1 │ 1 │ [1,2] │ +│ Hello │ [1,2] │ 2 │ 2 │ [1,2] │ +│ World │ [3,4,5] │ 3 │ 1 │ [1,2,3] │ +│ World │ [3,4,5] │ 4 │ 2 │ [1,2,3] │ +│ World │ [3,4,5] │ 5 │ 3 │ [1,2,3] │ +└───────┴─────────┴───┴─────┴─────────────────────┘ +``` + +## 具有嵌套数据结构的数组连接 {#array-join-with-nested-data-structure} + +`ARRAY JOIN` 也适用于 [嵌套数据结构](../../../sql-reference/data-types/nested-data-structures/nested.md): + +``` sql +CREATE TABLE nested_test +( + s String, + nest Nested( + x UInt8, + y UInt32) +) ENGINE = Memory; + +INSERT INTO nested_test +VALUES ('Hello', [1,2], [10,20]), ('World', [3,4,5], [30,40,50]), ('Goodbye', [], []); +``` + +``` text +┌─s───────┬─nest.x──┬─nest.y─────┐ +│ Hello │ [1,2] │ [10,20] │ +│ World │ [3,4,5] │ [30,40,50] │ +│ Goodbye │ [] │ [] │ +└─────────┴─────────┴────────────┘ +``` + +``` sql +SELECT s, `nest.x`, `nest.y` +FROM nested_test +ARRAY JOIN nest; +``` + +``` text +┌─s─────┬─nest.x─┬─nest.y─┐ +│ Hello │ 1 │ 10 │ +│ Hello │ 2 │ 20 │ +│ World │ 3 │ 30 │ +│ World │ 4 │ 40 │ +│ World │ 5 │ 50 │ +└───────┴────────┴────────┘ +``` + +当指定嵌套数据结构的名称 `ARRAY JOIN`,意思是一样的 `ARRAY JOIN` 它包含的所有数组元素。 下面列出了示例: + +``` sql +SELECT s, `nest.x`, `nest.y` +FROM nested_test +ARRAY JOIN `nest.x`, `nest.y`; +``` + +``` text +┌─s─────┬─nest.x─┬─nest.y─┐ +│ Hello │ 1 │ 10 │ +│ Hello │ 2 │ 20 │ +│ World │ 3 │ 30 │ +│ World │ 4 │ 40 │ +│ World │ 5 │ 50 │ +└───────┴────────┴────────┘ +``` + +这种变化也是有道理的: + +``` sql +SELECT s, `nest.x`, `nest.y` +FROM nested_test +ARRAY JOIN `nest.x`; +``` + +``` text +┌─s─────┬─nest.x─┬─nest.y─────┐ +│ Hello │ 1 │ [10,20] │ +│ Hello │ 2 │ [10,20] │ +│ World │ 3 │ [30,40,50] │ +│ World │ 4 │ [30,40,50] │ +│ World │ 5 │ [30,40,50] │ +└───────┴────────┴────────────┘ +``` + +可以将别名用于嵌套数据结构,以便选择 `JOIN` 结果或源数组。 示例: + +``` sql +SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y` +FROM nested_test +ARRAY JOIN nest AS n; +``` + +``` text +┌─s─────┬─n.x─┬─n.y─┬─nest.x──┬─nest.y─────┐ +│ Hello │ 1 │ 10 │ [1,2] │ [10,20] │ +│ Hello │ 2 │ 20 │ [1,2] │ [10,20] │ +│ World │ 3 │ 30 │ [3,4,5] │ [30,40,50] │ +│ World │ 4 │ 40 │ [3,4,5] │ [30,40,50] │ +│ World │ 5 │ 50 │ [3,4,5] │ [30,40,50] │ +└───────┴─────┴─────┴─────────┴────────────┘ +``` + +使用的例子 [arrayEnumerate](../../../sql-reference/functions/array-functions.md#array_functions-arrayenumerate) 功能: + +``` sql +SELECT s, `n.x`, `n.y`, `nest.x`, `nest.y`, num +FROM nested_test +ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num; +``` + +``` text +┌─s─────┬─n.x─┬─n.y─┬─nest.x──┬─nest.y─────┬─num─┐ +│ Hello │ 1 │ 10 │ [1,2] │ [10,20] │ 1 │ +│ Hello │ 2 │ 20 │ [1,2] │ [10,20] │ 2 │ +│ World │ 3 │ 30 │ [3,4,5] │ [30,40,50] │ 1 │ +│ World │ 4 │ 40 │ [3,4,5] │ [30,40,50] │ 2 │ +│ World │ 5 │ 50 │ [3,4,5] │ [30,40,50] │ 3 │ +└───────┴─────┴─────┴─────────┴────────────┴─────┘ +``` + +## 实施细节 {#implementation-details} + +运行时优化查询执行顺序 `ARRAY JOIN`. 虽然 `ARRAY JOIN` 必须始终之前指定 [WHERE](../../../sql-reference/statements/select/where.md)/[PREWHERE](../../../sql-reference/statements/select/prewhere.md) 子句中的查询,从技术上讲,它们可以以任何顺序执行,除非结果 `ARRAY JOIN` 用于过滤。 处理顺序由查询优化器控制。 diff --git a/docs/zh/sql-reference/statements/select/distinct.md b/docs/zh/sql-reference/statements/select/distinct.md deleted file mode 120000 index 59319557dc1..00000000000 --- a/docs/zh/sql-reference/statements/select/distinct.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/distinct.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/distinct.md b/docs/zh/sql-reference/statements/select/distinct.md new file mode 100644 index 00000000000..ea430e8602f --- /dev/null +++ b/docs/zh/sql-reference/statements/select/distinct.md @@ -0,0 +1,64 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: DISTINCT +--- + +# DISTINCT子句 {#select-distinct} + +如果 `SELECT DISTINCT` 如果指定,则查询结果中只保留唯一行。 因此,在结果中所有完全匹配的行集合中,只有一行将保留。 + +## 空处理 {#null-processing} + +`DISTINCT` 适用于 [NULL](../../../sql-reference/syntax.md#null-literal) 就好像 `NULL` 是一个特定的值,并且 `NULL==NULL`. 换句话说,在 `DISTINCT` 结果,不同的组合 `NULL` 仅发生一次。 它不同于 `NULL` 在大多数其他上下文中进行处理。 + +## 替代办法 {#alternatives} + +通过应用可以获得相同的结果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 在同一组值指定为 `SELECT` 子句,而不使用任何聚合函数。 但有几个区别 `GROUP BY` 方法: + +- `DISTINCT` 可以一起应用 `GROUP BY`. +- 当 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 省略和 [LIMIT](../../../sql-reference/statements/select/limit.md) 定义时,查询在读取所需数量的不同行后立即停止运行。 +- 数据块在处理时输出,而无需等待整个查询完成运行。 + +## 限制 {#limitations} + +`DISTINCT` 如果不支持 `SELECT` 具有至少一个数组列。 + +## 例 {#examples} + +ClickHouse支持使用 `DISTINCT` 和 `ORDER BY` 一个查询中不同列的子句。 该 `DISTINCT` 子句之前执行 `ORDER BY` 条款 + +示例表: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 1 │ 2 │ +│ 3 │ 3 │ +│ 2 │ 4 │ +└───┴───┘ +``` + +当与选择数据 `SELECT DISTINCT a FROM t1 ORDER BY b ASC` 查询,我们得到以下结果: + +``` text +┌─a─┐ +│ 2 │ +│ 1 │ +│ 3 │ +└───┘ +``` + +如果我们改变排序方向 `SELECT DISTINCT a FROM t1 ORDER BY b DESC`,我们得到以下结果: + +``` text +┌─a─┐ +│ 3 │ +│ 1 │ +│ 2 │ +└───┘ +``` + +行 `2, 4` 分拣前被切割。 + +在编程查询时考虑这种实现特异性。 diff --git a/docs/zh/sql-reference/statements/select/format.md b/docs/zh/sql-reference/statements/select/format.md deleted file mode 120000 index 106b2d9ebbc..00000000000 --- a/docs/zh/sql-reference/statements/select/format.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/format.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/format.md b/docs/zh/sql-reference/statements/select/format.md new file mode 100644 index 00000000000..014aec3b72e --- /dev/null +++ b/docs/zh/sql-reference/statements/select/format.md @@ -0,0 +1,19 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: FORMAT +--- + +# 格式子句 {#format-clause} + +ClickHouse支持广泛的 [序列化格式](../../../interfaces/formats.md) 可用于查询结果等。 有多种方法可以选择以下格式 `SELECT` 输出,其中之一是指定 `FORMAT format` 在查询结束时以任何特定格式获取结果数据。 + +特定的格式可以用于方便使用,与其他系统集成或性能增益。 + +## 默认格式 {#default-format} + +如果 `FORMAT` 省略子句,使用默认格式,这取决于用于访问ClickHouse服务器的设置和接口。 为 [HTTP接口](../../../interfaces/http.md) 和 [命令行客户端](../../../interfaces/cli.md) 在批处理模式下,默认格式为 `TabSeparated`. 对于交互模式下的命令行客户端,默认格式为 `PrettyCompact` (它生成紧凑的人类可读表)。 + +## 实施细节 {#implementation-details} + +使用命令行客户端时,数据始终以内部高效格式通过网络传递 (`Native`). 客户端独立解释 `FORMAT` 查询子句并格式化数据本身(从而减轻网络和服务器的额外负载)。 diff --git a/docs/zh/sql-reference/statements/select/from.md b/docs/zh/sql-reference/statements/select/from.md deleted file mode 120000 index f8ebfe655cc..00000000000 --- a/docs/zh/sql-reference/statements/select/from.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/from.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/from.md b/docs/zh/sql-reference/statements/select/from.md new file mode 100644 index 00000000000..86ba0959e16 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/from.md @@ -0,0 +1,45 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: FROM +--- + +# FROM条款 {#select-from} + +该 `FROM` 子句指定从中读取数据的源: + +- [表](../../../engines/table-engines/index.md) +- [子查询](../../../sql-reference/statements/select/index.md) {## TODO: better link ##} +- [表函数](../../../sql-reference/table-functions/index.md#table-functions) + +[JOIN](../../../sql-reference/statements/select/join.md) 和 [ARRAY JOIN](../../../sql-reference/statements/select/array-join.md) 子句也可以用来扩展的功能 `FROM` 条款 + +子查询是另一个 `SELECT` 可以在括号内指定的查询 `FROM` 条款 + +`FROM` 子句可以包含多个数据源,用逗号分隔,这相当于执行 [CROSS JOIN](../../../sql-reference/statements/select/join.md) 在他们身上 + +## 最终修饰符 {#select-from-final} + +当 `FINAL` 如果指定,ClickHouse会在返回结果之前完全合并数据,从而执行给定表引擎合并期间发生的所有数据转换。 + +它适用于从使用 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-发动机系列(除了 `GraphiteMergeTree`). 还支持: + +- [复制](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎 +- [查看](../../../engines/table-engines/special/view.md), [缓冲区](../../../engines/table-engines/special/buffer.md), [分布](../../../engines/table-engines/special/distributed.md),和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎,只要它们是在创建 `MergeTree`-发动机表。 + +### 缺点 {#drawbacks} + +使用的查询 `FINAL` 执行速度不如类似的查询那么快,因为: + +- 查询在单个线程中执行,并在查询执行期间合并数据。 +- 查询与 `FINAL` 除了读取查询中指定的列之外,还读取主键列。 + +**在大多数情况下,避免使用 `FINAL`.** 常见的方法是使用假设后台进程的不同查询 `MergeTree` 引擎还没有发生,并通过应用聚合(例如,丢弃重复项)来处理它。 {## TODO: examples ##} + +## 实施细节 {#implementation-details} + +如果 `FROM` 子句被省略,数据将从读取 `system.one` 桌子 +该 `system.one` 表只包含一行(此表满足与其他Dbms中找到的双表相同的目的)。 + +若要执行查询,将从相应的表中提取查询中列出的所有列。 外部查询不需要的任何列都将从子查询中抛出。 +如果查询未列出任何列(例如, `SELECT count() FROM t`),无论如何都会从表中提取一些列(最小的列是首选),以便计算行数。 diff --git a/docs/zh/sql-reference/statements/select/group-by.md b/docs/zh/sql-reference/statements/select/group-by.md deleted file mode 120000 index cf519ad7781..00000000000 --- a/docs/zh/sql-reference/statements/select/group-by.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/group-by.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/group-by.md b/docs/zh/sql-reference/statements/select/group-by.md new file mode 100644 index 00000000000..082fec94498 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/group-by.md @@ -0,0 +1,133 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: GROUP BY +--- + +# GROUP BY子句 {#select-group-by-clause} + +`GROUP BY` 子句切换 `SELECT` 查询转换为聚合模式,其工作原理如下: + +- `GROUP BY` 子句包含表达式列表(或单个表达式,其被认为是长度为1的列表)。 这份名单充当 “grouping key”,而每个单独的表达式将被称为 “key expressions”. +- 在所有的表达式 [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md),和 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 条款 **必须** 基于键表达式进行计算 **或** 上 [聚合函数](../../../sql-reference/aggregate-functions/index.md) 在非键表达式(包括纯列)上。 换句话说,从表中选择的每个列必须用于键表达式或聚合函数内,但不能同时使用。 +- 聚合结果 `SELECT` 查询将包含尽可能多的行,因为有唯一值 “grouping key” 在源表中。 通常这会显着减少行数,通常是数量级,但不一定:如果所有行数保持不变 “grouping key” 值是不同的。 + +!!! note "注" + 还有一种额外的方法可以在表上运行聚合。 如果查询仅在聚合函数中包含表列,则 `GROUP BY clause` 可以省略,并且通过一个空的键集合来假定聚合。 这样的查询总是只返回一行。 + +## 空处理 {#null-processing} + +对于分组,ClickHouse解释 [NULL](../../../sql-reference/syntax.md#null-literal) 作为一个值,并且 `NULL==NULL`. 它不同于 `NULL` 在大多数其他上下文中进行处理。 + +这里有一个例子来说明这意味着什么。 + +假设你有这张桌子: + +``` text +┌─x─┬────y─┐ +│ 1 │ 2 │ +│ 2 │ ᴺᵁᴸᴸ │ +│ 3 │ 2 │ +│ 3 │ 3 │ +│ 3 │ ᴺᵁᴸᴸ │ +└───┴──────┘ +``` + +查询 `SELECT sum(x), y FROM t_null_big GROUP BY y` 结果: + +``` text +┌─sum(x)─┬────y─┐ +│ 4 │ 2 │ +│ 3 │ 3 │ +│ 5 │ ᴺᵁᴸᴸ │ +└────────┴──────┘ +``` + +你可以看到 `GROUP BY` 为 `y = NULL` 总结 `x`,仿佛 `NULL` 是这个值。 + +如果你通过几个键 `GROUP BY`,结果会给你选择的所有组合,就好像 `NULL` 是一个特定的值。 + +## 使用总计修饰符 {#with-totals-modifier} + +如果 `WITH TOTALS` 指定修饰符,将计算另一行。 此行将具有包含默认值(零或空行)的关键列,以及包含跨所有行计算值的聚合函数列( “total” 值)。 + +这个额外的行仅产生于 `JSON*`, `TabSeparated*`,和 `Pretty*` 格式,与其他行分开: + +- 在 `JSON*` 格式,这一行是作为一个单独的输出 ‘totals’ 场。 +- 在 `TabSeparated*` 格式,该行位于主结果之后,前面有一个空行(在其他数据之后)。 +- 在 `Pretty*` 格式时,该行在主结果之后作为单独的表输出。 +- 在其他格式中,它不可用。 + +`WITH TOTALS` 可以以不同的方式运行时 [HAVING](../../../sql-reference/statements/select/having.md) 是存在的。 该行为取决于 `totals_mode` 设置。 + +### 配置合计处理 {#configuring-totals-processing} + +默认情况下, `totals_mode = 'before_having'`. 在这种情况下, ‘totals’ 是跨所有行计算,包括那些不通过具有和 `max_rows_to_group_by`. + +其他替代方案仅包括通过具有在 ‘totals’,并与设置不同的行为 `max_rows_to_group_by` 和 `group_by_overflow_mode = 'any'`. + +`after_having_exclusive` – Don't include rows that didn't pass through `max_rows_to_group_by`. 换句话说, ‘totals’ 将有少于或相同数量的行,因为它会 `max_rows_to_group_by` 被省略。 + +`after_having_inclusive` – Include all the rows that didn't pass through ‘max\_rows\_to\_group\_by’ 在 ‘totals’. 换句话说, ‘totals’ 将有多个或相同数量的行,因为它会 `max_rows_to_group_by` 被省略。 + +`after_having_auto` – Count the number of rows that passed through HAVING. If it is more than a certain amount (by default, 50%), include all the rows that didn't pass through ‘max\_rows\_to\_group\_by’ 在 ‘totals’. 否则,不包括它们。 + +`totals_auto_threshold` – By default, 0.5. The coefficient for `after_having_auto`. + +如果 `max_rows_to_group_by` 和 `group_by_overflow_mode = 'any'` 不使用,所有的变化 `after_having` 是相同的,你可以使用它们中的任何一个(例如, `after_having_auto`). + +您可以使用 `WITH TOTALS` 在子查询中,包括在子查询 [JOIN](../../../sql-reference/statements/select/join.md) 子句(在这种情况下,将各自的总值合并)。 + +## 例 {#examples} + +示例: + +``` sql +SELECT + count(), + median(FetchTiming > 60 ? 60 : FetchTiming), + count() - sum(Refresh) +FROM hits +``` + +但是,与标准SQL相比,如果表没有任何行(根本没有任何行,或者在使用WHERE to filter之后没有任何行),则返回一个空结果,而不是来自包含聚合函数初始值的行之 + +相对于MySQL(并且符合标准SQL),您无法获取不在键或聚合函数(常量表达式除外)中的某些列的某些值。 要解决此问题,您可以使用 ‘any’ 聚合函数(获取第一个遇到的值)或 ‘min/max’. + +示例: + +``` sql +SELECT + domainWithoutWWW(URL) AS domain, + count(), + any(Title) AS title -- getting the first occurred page header for each domain. +FROM hits +GROUP BY domain +``` + +对于遇到的每个不同的键值, `GROUP BY` 计算一组聚合函数值。 + +`GROUP BY` 不支持数组列。 + +不能将常量指定为聚合函数的参数。 示例: `sum(1)`. 相反,你可以摆脱常数。 示例: `count()`. + +## 实施细节 {#implementation-details} + +聚合是面向列的DBMS最重要的功能之一,因此它的实现是ClickHouse中最优化的部分之一。 默认情况下,聚合使用哈希表在内存中完成。 它有40+的专业化是自动选择取决于 “grouping key” 数据类型。 + +### 在外部存储器中分组 {#select-group-by-in-external-memory} + +您可以启用将临时数据转储到磁盘以限制内存使用期间 `GROUP BY`. +该 [max\_bytes\_before\_external\_group\_by](../../../operations/settings/settings.md#settings-max_bytes_before_external_group_by) 设置确定倾销的阈值RAM消耗 `GROUP BY` 临时数据到文件系统。 如果设置为0(默认值),它将被禁用。 + +使用时 `max_bytes_before_external_group_by`,我们建议您设置 `max_memory_usage` 大约两倍高。 这是必要的,因为聚合有两个阶段:读取数据和形成中间数据(1)和合并中间数据(2)。 将数据转储到文件系统只能在阶段1中发生。 如果未转储临时数据,则阶段2可能需要与阶段1相同的内存量。 + +例如,如果 [max\_memory\_usage](../../../operations/settings/settings.md#settings_max_memory_usage) 设置为10000000000,你想使用外部聚合,这是有意义的设置 `max_bytes_before_external_group_by` 到10000000000,和 `max_memory_usage` 到200亿。 当触发外部聚合(如果至少有一个临时数据转储)时,RAM的最大消耗仅略高于 `max_bytes_before_external_group_by`. + +通过分布式查询处理,在远程服务器上执行外部聚合。 为了使请求者服务器只使用少量的RAM,设置 `distributed_aggregation_memory_efficient` 到1。 + +当合并数据刷新到磁盘时,以及当合并来自远程服务器的结果时, `distributed_aggregation_memory_efficient` 设置被启用,消耗高达 `1/256 * the_number_of_threads` 从RAM的总量。 + +当启用外部聚合时,如果有小于 `max_bytes_before_external_group_by` of data (i.e. data was not flushed), the query runs just as fast as without external aggregation. If any temporary data was flushed, the run time will be several times longer (approximately three times). + +如果你有一个 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 用一个 [LIMIT](../../../sql-reference/statements/select/limit.md) 后 `GROUP BY`,然后使用的RAM的量取决于数据的量 `LIMIT`,不是在整个表。 但如果 `ORDER BY` 没有 `LIMIT`,不要忘记启用外部排序 (`max_bytes_before_external_sort`). diff --git a/docs/zh/sql-reference/statements/select/having.md b/docs/zh/sql-reference/statements/select/having.md deleted file mode 120000 index 4a038beb126..00000000000 --- a/docs/zh/sql-reference/statements/select/having.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/having.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/having.md b/docs/zh/sql-reference/statements/select/having.md new file mode 100644 index 00000000000..d5c5b96a280 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/having.md @@ -0,0 +1,15 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: HAVING +--- + +# 有条款 {#having-clause} + +允许过滤由以下方式生成的聚合结果 [GROUP BY](../../../sql-reference/statements/select/group-by.md). 它类似于 [WHERE](../../../sql-reference/statements/select/where.md) 条款,但不同的是 `WHERE` 在聚合之前执行,而 `HAVING` 之后进行。 + +可以从以下引用聚合结果 `SELECT` 中的条款 `HAVING` 子句由他们的化名。 或者, `HAVING` 子句可以筛选查询结果中未返回的其他聚合的结果。 + +## 限制 {#limitations} + +`HAVING` 如果不执行聚合,则无法使用。 使用 `WHERE` 相反。 diff --git a/docs/zh/sql-reference/statements/select/index.md b/docs/zh/sql-reference/statements/select/index.md deleted file mode 120000 index 9c649322c82..00000000000 --- a/docs/zh/sql-reference/statements/select/index.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/index.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/index.md b/docs/zh/sql-reference/statements/select/index.md new file mode 100644 index 00000000000..58850b91a02 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/index.md @@ -0,0 +1,164 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +title: SELECT Query +toc_folder_title: SELECT +toc_priority: 33 +toc_title: "\u6982\u8FF0" +--- + +# 选择查询 {#select-queries-syntax} + +`SELECT` 查询执行数据检索。 默认情况下,请求的数据返回给客户端,同时与 [INSERT INTO](../../../sql-reference/statements/insert-into.md) 它可以被转发到不同的表。 + +## 语法 {#syntax} + +``` sql +[WITH expr_list|(subquery)] +SELECT [DISTINCT] expr_list +[FROM [db.]table | (subquery) | table_function] [FINAL] +[SAMPLE sample_coeff] +[ARRAY JOIN ...] +[GLOBAL] [ANY|ALL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER] JOIN (subquery)|table USING columns_list +[PREWHERE expr] +[WHERE expr] +[GROUP BY expr_list] [WITH TOTALS] +[HAVING expr] +[ORDER BY expr_list] +[LIMIT [offset_value, ]n BY columns] +[LIMIT [n, ]m] +[UNION ALL ...] +[INTO OUTFILE filename] +[FORMAT format] +``` + +所有子句都是可选的,但紧接在后面的必需表达式列表除外 `SELECT` 这是更详细的复盖 [下面](#select-clause). + +每个可选子句的具体内容在单独的部分中进行了介绍,这些部分按与执行顺序相同的顺序列出: + +- [WITH条款](../../../sql-reference/statements/select/with.md) +- [FROM条款](../../../sql-reference/statements/select/from.md) +- [示例子句](../../../sql-reference/statements/select/sample.md) +- [JOIN子句](../../../sql-reference/statements/select/join.md) +- [PREWHERE条款](../../../sql-reference/statements/select/prewhere.md) +- [WHERE条款](../../../sql-reference/statements/select/where.md) +- [GROUP BY子句](../../../sql-reference/statements/select/group-by.md) +- [限制条款](../../../sql-reference/statements/select/limit-by.md) +- [有条款](../../../sql-reference/statements/select/having.md) +- [SELECT子句](#select-clause) +- [DISTINCT子句](../../../sql-reference/statements/select/distinct.md) +- [限制条款](../../../sql-reference/statements/select/limit.md) +- [UNION ALL条款](../../../sql-reference/statements/select/union-all.md) +- [INTO OUTFILE条款](../../../sql-reference/statements/select/into-outfile.md) +- [格式子句](../../../sql-reference/statements/select/format.md) + +## SELECT子句 {#select-clause} + +[表达式](../../../sql-reference/syntax.md#syntax-expressions) 在指定 `SELECT` 子句是在上述子句中的所有操作完成后计算的。 这些表达式的工作方式就好像它们应用于结果中的单独行一样。 如果在表达式 `SELECT` 子句包含聚合函数,然后ClickHouse处理过程中用作其参数的聚合函数和表达式 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 聚合。 + +如果要在结果中包含所有列,请使用星号 (`*`)符号。 例如, `SELECT * FROM ...`. + +将结果中的某些列与 [re2](https://en.wikipedia.org/wiki/RE2_(software)) 正则表达式,您可以使用 `COLUMNS` 表达。 + +``` sql +COLUMNS('regexp') +``` + +例如,考虑表: + +``` sql +CREATE TABLE default.col_names (aa Int8, ab Int8, bc Int8) ENGINE = TinyLog +``` + +以下查询从包含以下内容的所有列中选择数据 `a` 在他们的名字符号。 + +``` sql +SELECT COLUMNS('a') FROM col_names +``` + +``` text +┌─aa─┬─ab─┐ +│ 1 │ 1 │ +└────┴────┘ +``` + +所选列不按字母顺序返回。 + +您可以使用多个 `COLUMNS` 查询中的表达式并将函数应用于它们。 + +例如: + +``` sql +SELECT COLUMNS('a'), COLUMNS('c'), toTypeName(COLUMNS('c')) FROM col_names +``` + +``` text +┌─aa─┬─ab─┬─bc─┬─toTypeName(bc)─┐ +│ 1 │ 1 │ 1 │ Int8 │ +└────┴────┴────┴────────────────┘ +``` + +由返回的每一列 `COLUMNS` 表达式作为单独的参数传递给函数。 如果函数支持其他参数,您也可以将其他参数传递给函数。 使用函数时要小心。 如果函数不支持您传递给它的参数数,ClickHouse将引发异常。 + +例如: + +``` sql +SELECT COLUMNS('a') + COLUMNS('c') FROM col_names +``` + +``` text +Received exception from server (version 19.14.1): +Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of arguments for function plus doesn't match: passed 3, should be 2. +``` + +在这个例子中, `COLUMNS('a')` 返回两列: `aa` 和 `ab`. `COLUMNS('c')` 返回 `bc` 列。 该 `+` 运算符不能应用于3个参数,因此ClickHouse引发一个带有相关消息的异常。 + +匹配的列 `COLUMNS` 表达式可以具有不同的数据类型。 如果 `COLUMNS` 不匹配任何列,并且是唯一的表达式 `SELECT`,ClickHouse抛出异常。 + +### 星号 {#asterisk} + +您可以在查询的任何部分而不是表达式中添加星号。 分析查询时,星号将展开为所有表列的列表(不包括 `MATERIALIZED` 和 `ALIAS` 列)。 只有少数情况下使用星号是合理的: + +- 创建表转储时。 +- 对于只包含几列的表,例如系统表。 +- 获取有关表中哪些列的信息。 在这种情况下,设置 `LIMIT 1`. 但最好使用 `DESC TABLE` 查询。 +- 当对少量柱进行强过滤时,使用 `PREWHERE`. +- 在子查询中(因为外部查询不需要的列从子查询中排除)。 + +在所有其他情况下,我们不建议使用星号,因为它只给你一个列DBMS的缺点,而不是优点。 换句话说,不建议使用星号。 + +### 极端值 {#extreme-values} + +除了结果之外,还可以获取结果列的最小值和最大值。 要做到这一点,设置 **极端** 设置为1。 最小值和最大值是针对数字类型、日期和带有时间的日期计算的。 对于其他列,默认值为输出。 + +An extra two rows are calculated – the minimums and maximums, respectively. These extra two rows are output in `JSON*`, `TabSeparated*`,和 `Pretty*` [格式](../../../interfaces/formats.md),与其他行分开。 它们不是其他格式的输出。 + +在 `JSON*` 格式时,极端值在一个单独的输出 ‘extremes’ 场。 在 `TabSeparated*` 格式中,该行来的主要结果之后,和之后 ‘totals’ 如果存在。 它前面有一个空行(在其他数据之后)。 在 `Pretty*` 格式中,该行被输出为一个单独的表之后的主结果,和之后 `totals` 如果存在。 + +极值计算之前的行 `LIMIT`,但之后 `LIMIT BY`. 但是,使用时 `LIMIT offset, size`,之前的行 `offset` 都包含在 `extremes`. 在流请求中,结果还可能包括少量通过的行 `LIMIT`. + +### 注 {#notes} + +您可以使用同义词 (`AS` 别名)在查询的任何部分。 + +该 `GROUP BY` 和 `ORDER BY` 子句不支持位置参数。 这与MySQL相矛盾,但符合标准SQL。 例如, `GROUP BY 1, 2` will be interpreted as grouping by constants (i.e. aggregation of all rows into one). + +## 实施细节 {#implementation-details} + +如果查询省略 `DISTINCT`, `GROUP BY` 和 `ORDER BY` 条款和 `IN` 和 `JOIN` 子查询,查询将被完全流处理,使用O(1)量的RAM。 否则,如果未指定适当的限制,则查询可能会消耗大量RAM: + +- `max_memory_usage` +- `max_rows_to_group_by` +- `max_rows_to_sort` +- `max_rows_in_distinct` +- `max_bytes_in_distinct` +- `max_rows_in_set` +- `max_bytes_in_set` +- `max_rows_in_join` +- `max_bytes_in_join` +- `max_bytes_before_external_sort` +- `max_bytes_before_external_group_by` + +有关详细信息,请参阅部分 “Settings”. 可以使用外部排序(将临时表保存到磁盘)和外部聚合。 + +{## [原始文章](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##} diff --git a/docs/zh/sql-reference/statements/select/into-outfile.md b/docs/zh/sql-reference/statements/select/into-outfile.md deleted file mode 120000 index 2c9c812b3d5..00000000000 --- a/docs/zh/sql-reference/statements/select/into-outfile.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/into-outfile.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/into-outfile.md b/docs/zh/sql-reference/statements/select/into-outfile.md new file mode 100644 index 00000000000..f1eb3e55b89 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/into-outfile.md @@ -0,0 +1,15 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: INTO OUTFILE +--- + +# INTO OUTFILE条款 {#into-outfile-clause} + +添加 `INTO OUTFILE filename` 子句(其中filename是字符串文字) `SELECT query` 将其输出重定向到客户端上的指定文件。 + +## 实施细节 {#implementation-details} + +- 此功能是在可用 [命令行客户端](../../../interfaces/cli.md) 和 [ツ环板-ョツ嘉ッツ偲](../../../operations/utilities/clickhouse-local.md). 因此,通过发送查询 [HTTP接口](../../../interfaces/http.md) 都会失败 +- 如果具有相同文件名的文件已经存在,则查询将失败。 +- 默认值 [输出格式](../../../interfaces/formats.md) 是 `TabSeparated` (就像在命令行客户端批处理模式中一样)。 diff --git a/docs/zh/sql-reference/statements/select/join.md b/docs/zh/sql-reference/statements/select/join.md deleted file mode 120000 index 5951a105137..00000000000 --- a/docs/zh/sql-reference/statements/select/join.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/join.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/join.md b/docs/zh/sql-reference/statements/select/join.md new file mode 100644 index 00000000000..47fd0137717 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/join.md @@ -0,0 +1,198 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: JOIN +--- + +# JOIN子句 {#select-join} + +Join通过使用一个或多个表的公共值合并来自一个或多个表的列来生成新表。 它是支持SQL的数据库中的常见操作,它对应于 [关系代数](https://en.wikipedia.org/wiki/Relational_algebra#Joins_and_join-like_operators) 加入。 一个表连接的特殊情况通常被称为 “self-join”. + +语法: + +``` sql +SELECT +FROM +[GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN +(ON )|(USING ) ... +``` + +从表达式 `ON` 从子句和列 `USING` 子句被称为 “join keys”. 除非另有说明,加入产生一个 [笛卡尔积](https://en.wikipedia.org/wiki/Cartesian_product) 从具有匹配的行 “join keys”,这可能会产生比源表更多的行的结果。 + +## 支持的联接类型 {#select-join-types} + +所有标准 [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) 支持类型: + +- `INNER JOIN`,只返回匹配的行。 +- `LEFT OUTER JOIN`,除了匹配的行之外,还返回左表中的非匹配行。 +- `RIGHT OUTER JOIN`,除了匹配的行之外,还返回右表中的非匹配行。 +- `FULL OUTER JOIN`,除了匹配的行之外,还会返回两个表中的非匹配行。 +- `CROSS JOIN`,产生整个表的笛卡尔积, “join keys” 是 **不** 指定。 + +`JOIN` 没有指定类型暗示 `INNER`. 关键字 `OUTER` 可以安全地省略。 替代语法 `CROSS JOIN` 在指定多个表 [FROM条款](../../../sql-reference/statements/select/from.md) 用逗号分隔。 + +ClickHouse中提供的其他联接类型: + +- `LEFT SEMI JOIN` 和 `RIGHT SEMI JOIN`,白名单 “join keys”,而不产生笛卡尔积。 +- `LEFT ANTI JOIN` 和 `RIGHT ANTI JOIN`,黑名单 “join keys”,而不产生笛卡尔积。 + +## 严格 {#select-join-strictness} + +修改如何匹配 “join keys” 执行 + +- `ALL` — The standard `JOIN` sql中的行为如上所述。 默认值。 +- `ANY` — Partially (for opposite side of `LEFT` 和 `RIGHT`)或完全(为 `INNER` 和 `FULL`)禁用笛卡尔积为标准 `JOIN` 类型。 +- `ASOF` — For joining sequences with a non-exact match. `ASOF JOIN` 用法描述如下。 + +!!! note "注" + 可以使用以下方式复盖默认的严格性值 [join\_default\_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) 设置。 + + Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. + +### ASOF加入使用 {#asof-join-usage} + +`ASOF JOIN` 当您需要连接没有完全匹配的记录时非常有用。 + +算法需要表中的特殊列。 本专栏: + +- 必须包含有序序列。 +- 可以是以下类型之一: [Int*,UInt*](../../../sql-reference/data-types/int-uint.md), [浮动\*](../../../sql-reference/data-types/float.md), [日期](../../../sql-reference/data-types/date.md), [日期时间](../../../sql-reference/data-types/datetime.md), [十进制\*](../../../sql-reference/data-types/decimal.md). +- 不能是唯一的列 `JOIN` 条款 + +语法 `ASOF JOIN ... ON`: + +``` sql +SELECT expressions_list +FROM table_1 +ASOF LEFT JOIN table_2 +ON equi_cond AND closest_match_cond +``` + +您可以使用任意数量的相等条件和恰好一个最接近的匹配条件。 例如, `SELECT count() FROM table_1 ASOF LEFT JOIN table_2 ON table_1.a == table_2.b AND table_2.t <= table_1.t`. + +支持最接近匹配的条件: `>`, `>=`, `<`, `<=`. + +语法 `ASOF JOIN ... USING`: + +``` sql +SELECT expressions_list +FROM table_1 +ASOF JOIN table_2 +USING (equi_column1, ... equi_columnN, asof_column) +``` + +`ASOF JOIN` 用途 `equi_columnX` 对于加入平等和 `asof_column` 用于加入与最接近的比赛 `table_1.asof_column >= table_2.asof_column` 条件。 该 `asof_column` 列总是在最后一个 `USING` 条款 + +例如,请考虑下表: + + table_1 table_2 + event | ev_time | user_id event | ev_time | user_id + ----------|---------|---------- ----------|---------|---------- + ... ... + event_1_1 | 12:00 | 42 event_2_1 | 11:59 | 42 + ... event_2_2 | 12:30 | 42 + event_1_2 | 13:00 | 42 event_2_3 | 13:00 | 42 + ... ... + +`ASOF JOIN` 可以从用户事件的时间戳 `table_1` 并找到一个事件 `table_2` 其中时间戳最接近事件的时间戳 `table_1` 对应于最接近的匹配条件。 如果可用,则相等的时间戳值是最接近的值。 在这里,该 `user_id` 列可用于连接相等和 `ev_time` 列可用于在最接近的匹配加入。 在我们的例子中, `event_1_1` 可以加入 `event_2_1` 和 `event_1_2` 可以加入 `event_2_3`,但是 `event_2_2` 不能加入。 + +!!! note "注" + `ASOF` 加入是 **不** 支持在 [加入我们](../../../engines/table-engines/special/join.md) 表引擎。 + +## 分布式联接 {#global-join} + +有两种方法可以执行涉及分布式表的join: + +- 当使用正常 `JOIN`,将查询发送到远程服务器。 为了创建正确的表,在每个子查询上运行子查询,并使用此表执行联接。 换句话说,在每个服务器上单独形成右表。 +- 使用时 `GLOBAL ... JOIN`,首先请求者服务器运行一个子查询来计算正确的表。 此临时表将传递到每个远程服务器,并使用传输的临时数据对其运行查询。 + +使用时要小心 `GLOBAL`. 有关详细信息,请参阅 [分布式子查询](../../../sql-reference/operators/in.md#select-distributed-subqueries) 科。 + +## 使用建议 {#usage-recommendations} + +### 处理空单元格或空单元格 {#processing-of-empty-or-null-cells} + +在连接表时,可能会出现空单元格。 设置 [join\_use\_nulls](../../../operations/settings/settings.md#join_use_nulls) 定义ClickHouse如何填充这些单元格。 + +如果 `JOIN` 键是 [可为空](../../../sql-reference/data-types/nullable.md) 字段,其中至少有一个键具有值的行 [NULL](../../../sql-reference/syntax.md#null-literal) 没有加入。 + +### 语法 {#syntax} + +在指定的列 `USING` 两个子查询中必须具有相同的名称,并且其他列必须以不同的方式命名。 您可以使用别名更改子查询中的列名。 + +该 `USING` 子句指定一个或多个要联接的列,这将建立这些列的相等性。 列的列表设置不带括号。 不支持更复杂的连接条件。 + +### 语法限制 {#syntax-limitations} + +对于多个 `JOIN` 单个子句 `SELECT` 查询: + +- 通过以所有列 `*` 仅在联接表时才可用,而不是子查询。 +- 该 `PREWHERE` 条款不可用。 + +为 `ON`, `WHERE`,和 `GROUP BY` 条款: + +- 任意表达式不能用于 `ON`, `WHERE`,和 `GROUP BY` 子句,但你可以定义一个表达式 `SELECT` 子句,然后通过别名在这些子句中使用它。 + +### 性能 {#performance} + +当运行 `JOIN`,与查询的其他阶段相关的执行顺序没有优化。 连接(在右表中搜索)在过滤之前运行 `WHERE` 和聚集之前。 + +每次使用相同的查询运行 `JOIN`,子查询再次运行,因为结果未缓存。 为了避免这种情况,使用特殊的 [加入我们](../../../engines/table-engines/special/join.md) 表引擎,它是一个用于连接的准备好的数组,总是在RAM中。 + +在某些情况下,使用效率更高 [IN](../../../sql-reference/operators/in.md) 而不是 `JOIN`. + +如果你需要一个 `JOIN` 对于连接维度表(这些是包含维度属性的相对较小的表,例如广告活动的名称), `JOIN` 由于每个查询都会重新访问正确的表,因此可能不太方便。 对于这种情况下,有一个 “external dictionaries” 您应该使用的功能 `JOIN`. 有关详细信息,请参阅 [外部字典](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 科。 + +### 内存限制 {#memory-limitations} + +默认情况下,ClickHouse使用 [哈希联接](https://en.wikipedia.org/wiki/Hash_join) 算法。 ClickHouse采取 `` 并在RAM中为其创建哈希表。 在某个内存消耗阈值之后,ClickHouse回退到合并联接算法。 + +如果需要限制联接操作内存消耗,请使用以下设置: + +- [max\_rows\_in\_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) — Limits number of rows in the hash table. +- [max\_bytes\_in\_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) — Limits size of the hash table. + +当任何这些限制达到,ClickHouse作为 [join\_overflow\_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode) 设置指示。 + +## 例 {#examples} + +示例: + +``` sql +SELECT + CounterID, + hits, + visits +FROM +( + SELECT + CounterID, + count() AS hits + FROM test.hits + GROUP BY CounterID +) ANY LEFT JOIN +( + SELECT + CounterID, + sum(Sign) AS visits + FROM test.visits + GROUP BY CounterID +) USING CounterID +ORDER BY hits DESC +LIMIT 10 +``` + +``` text +┌─CounterID─┬───hits─┬─visits─┐ +│ 1143050 │ 523264 │ 13665 │ +│ 731962 │ 475698 │ 102716 │ +│ 722545 │ 337212 │ 108187 │ +│ 722889 │ 252197 │ 10547 │ +│ 2237260 │ 196036 │ 9522 │ +│ 23057320 │ 147211 │ 7689 │ +│ 722818 │ 90109 │ 17847 │ +│ 48221 │ 85379 │ 4652 │ +│ 19762435 │ 77807 │ 7026 │ +│ 722884 │ 77492 │ 11056 │ +└───────────┴────────┴────────┘ +``` diff --git a/docs/zh/sql-reference/statements/select/limit-by.md b/docs/zh/sql-reference/statements/select/limit-by.md deleted file mode 120000 index f3a63e9fe22..00000000000 --- a/docs/zh/sql-reference/statements/select/limit-by.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/limit-by.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/limit-by.md b/docs/zh/sql-reference/statements/select/limit-by.md new file mode 100644 index 00000000000..ae2bd491817 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/limit-by.md @@ -0,0 +1,72 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: LIMIT BY +--- + +# 限制条款 {#limit-by-clause} + +与查询 `LIMIT n BY expressions` 子句选择第一个 `n` 每个不同值的行 `expressions`. 的关键 `LIMIT BY` 可以包含任意数量的 [表达式](../../../sql-reference/syntax.md#syntax-expressions). + +ClickHouse支持以下语法变体: + +- `LIMIT [offset_value, ]n BY expressions` +- `LIMIT n OFFSET offset_value BY expressions` + +在查询处理过程中,ClickHouse会选择按排序键排序的数据。 排序键使用以下命令显式设置 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句或隐式作为表引擎的属性。 然后ClickHouse应用 `LIMIT n BY expressions` 并返回第一 `n` 每个不同组合的行 `expressions`. 如果 `OFFSET` 被指定,则对于每个数据块属于一个不同的组合 `expressions`,ClickHouse跳过 `offset_value` 从块开始的行数,并返回最大值 `n` 行的结果。 如果 `offset_value` 如果数据块中的行数大于数据块中的行数,ClickHouse将从该块返回零行。 + +!!! note "注" + `LIMIT BY` 是不相关的 [LIMIT](../../../sql-reference/statements/select/limit.md). 它们都可以在同一个查询中使用。 + +## 例 {#examples} + +样品表: + +``` sql +CREATE TABLE limit_by(id Int, val Int) ENGINE = Memory; +INSERT INTO limit_by VALUES (1, 10), (1, 11), (1, 12), (2, 20), (2, 21); +``` + +查询: + +``` sql +SELECT * FROM limit_by ORDER BY id, val LIMIT 2 BY id +``` + +``` text +┌─id─┬─val─┐ +│ 1 │ 10 │ +│ 1 │ 11 │ +│ 2 │ 20 │ +│ 2 │ 21 │ +└────┴─────┘ +``` + +``` sql +SELECT * FROM limit_by ORDER BY id, val LIMIT 1, 2 BY id +``` + +``` text +┌─id─┬─val─┐ +│ 1 │ 11 │ +│ 1 │ 12 │ +│ 2 │ 21 │ +└────┴─────┘ +``` + +该 `SELECT * FROM limit_by ORDER BY id, val LIMIT 2 OFFSET 1 BY id` 查询返回相同的结果。 + +以下查询返回每个引用的前5个引用 `domain, device_type` 最多可与100行配对 (`LIMIT n BY + LIMIT`). + +``` sql +SELECT + domainWithoutWWW(URL) AS domain, + domainWithoutWWW(REFERRER_URL) AS referrer, + device_type, + count() cnt +FROM hits +GROUP BY domain, referrer, device_type +ORDER BY cnt DESC +LIMIT 5 BY domain, device_type +LIMIT 100 +``` diff --git a/docs/zh/sql-reference/statements/select/limit.md b/docs/zh/sql-reference/statements/select/limit.md deleted file mode 120000 index e0a0c632dac..00000000000 --- a/docs/zh/sql-reference/statements/select/limit.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/limit.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/limit.md b/docs/zh/sql-reference/statements/select/limit.md new file mode 100644 index 00000000000..4d02df88600 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/limit.md @@ -0,0 +1,15 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: LIMIT +--- + +# 限制条款 {#limit-clause} + +`LIMIT m` 允许选择第一个 `m` 结果中的行。 + +`LIMIT n, m` 允许选择 `m` 跳过第一个结果后的行 `n` 行。 该 `LIMIT m OFFSET n` 语法是等效的。 + +`n` 和 `m` 必须是非负整数。 + +如果没有 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句显式排序结果,结果的行选择可能是任意的和非确定性的。 diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md deleted file mode 120000 index cc2567bce0b..00000000000 --- a/docs/zh/sql-reference/statements/select/order-by.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/order-by.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md new file mode 100644 index 00000000000..e853a788075 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/order-by.md @@ -0,0 +1,73 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: ORDER BY +--- + +# 按条款订购 {#select-order-by} + +该 `ORDER BY` 子句包含一个表达式列表,每个表达式都可以用 `DESC` (降序)或 `ASC` (升序)修饰符确定排序方向。 如果未指定方向, `ASC` 假设,所以它通常被省略。 排序方向适用于单个表达式,而不适用于整个列表。 示例: `ORDER BY Visits DESC, SearchPhrase` + +对于排序表达式列表具有相同值的行以任意顺序输出,也可以是非确定性的(每次都不同)。 +如果省略ORDER BY子句,则行的顺序也是未定义的,并且可能也是非确定性的。 + +## 特殊值的排序 {#sorting-of-special-values} + +有两种方法 `NaN` 和 `NULL` 排序顺序: + +- 默认情况下或与 `NULLS LAST` 修饰符:首先是值,然后 `NaN`,然后 `NULL`. +- 与 `NULLS FIRST` 修饰符:第一 `NULL`,然后 `NaN`,然后其他值。 + +### 示例 {#example} + +对于表 + +``` text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 2 │ +│ 1 │ nan │ +│ 2 │ 2 │ +│ 3 │ 4 │ +│ 5 │ 6 │ +│ 6 │ nan │ +│ 7 │ ᴺᵁᴸᴸ │ +│ 6 │ 7 │ +│ 8 │ 9 │ +└───┴──────┘ +``` + +运行查询 `SELECT * FROM t_null_nan ORDER BY y NULLS FIRST` 获得: + +``` text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 7 │ ᴺᵁᴸᴸ │ +│ 1 │ nan │ +│ 6 │ nan │ +│ 2 │ 2 │ +│ 2 │ 2 │ +│ 3 │ 4 │ +│ 5 │ 6 │ +│ 6 │ 7 │ +│ 8 │ 9 │ +└───┴──────┘ +``` + +当对浮点数进行排序时,Nan与其他值是分开的。 无论排序顺序如何,Nan都在最后。 换句话说,对于升序排序,它们被放置为好像它们比所有其他数字大,而对于降序排序,它们被放置为好像它们比其他数字小。 + +## 排序规则支持 {#collation-support} + +对于按字符串值排序,可以指定排序规则(比较)。 示例: `ORDER BY SearchPhrase COLLATE 'tr'` -对于按关键字升序排序,使用土耳其字母,不区分大小写,假设字符串是UTF-8编码。 `COLLATE` 可以按顺序独立地指定或不按每个表达式。 如果 `ASC` 或 `DESC` 被指定, `COLLATE` 在它之后指定。 使用时 `COLLATE`,排序始终不区分大小写。 + +我们只建议使用 `COLLATE` 对于少量行的最终排序,因为排序与 `COLLATE` 比正常的按字节排序效率低。 + +## 实施细节 {#implementation-details} + +更少的RAM使用,如果一个足够小 [LIMIT](../../../sql-reference/statements/select/limit.md) 除了指定 `ORDER BY`. 否则,所花费的内存量与用于排序的数据量成正比。 对于分布式查询处理,如果 [GROUP BY](../../../sql-reference/statements/select/group-by.md) 省略排序,在远程服务器上部分完成排序,并将结果合并到请求者服务器上。 这意味着对于分布式排序,要排序的数据量可以大于单个服务器上的内存量。 + +如果没有足够的RAM,则可以在外部存储器中执行排序(在磁盘上创建临时文件)。 使用设置 `max_bytes_before_external_sort` 为此目的。 如果将其设置为0(默认值),则禁用外部排序。 如果启用,则当要排序的数据量达到指定的字节数时,将对收集的数据进行排序并转储到临时文件中。 读取所有数据后,将合并所有已排序的文件并输出结果。 文件被写入到 `/var/lib/clickhouse/tmp/` 目录中的配置(默认情况下,但你可以使用 `tmp_path` 参数来更改此设置)。 + +运行查询可能占用的内存比 `max_bytes_before_external_sort`. 因此,此设置的值必须大大小于 `max_memory_usage`. 例如,如果您的服务器有128GB的RAM,并且您需要运行单个查询,请设置 `max_memory_usage` 到100GB,和 `max_bytes_before_external_sort` 至80GB。 + +外部排序的工作效率远远低于在RAM中进行排序。 diff --git a/docs/zh/sql-reference/statements/select/prewhere.md b/docs/zh/sql-reference/statements/select/prewhere.md deleted file mode 120000 index 567fc95356f..00000000000 --- a/docs/zh/sql-reference/statements/select/prewhere.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/prewhere.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/prewhere.md b/docs/zh/sql-reference/statements/select/prewhere.md new file mode 100644 index 00000000000..ec6607d4ecc --- /dev/null +++ b/docs/zh/sql-reference/statements/select/prewhere.md @@ -0,0 +1,23 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: PREWHERE +--- + +# PREWHERE条款 {#prewhere-clause} + +Prewhere是更有效地应用过滤的优化。 默认情况下,即使在 `PREWHERE` 子句未显式指定。 它的工作原理是自动移动的一部分 [WHERE](../../../sql-reference/statements/select/where.md) 条件到prewhere阶段。 的作用 `PREWHERE` 子句只是控制这个优化,如果你认为你知道如何做得比默认情况下更好。 + +使用prewhere优化,首先只读取执行prewhere表达式所需的列。 然后读取运行其余查询所需的其他列,但只读取prewhere表达式所在的那些块 “true” 至少对于一些行。 如果有很多块,其中prewhere表达式是 “false” 对于所有行和prewhere需要比查询的其他部分更少的列,这通常允许从磁盘读取更少的数据以执行查询。 + +## 手动控制Prewhere {#controlling-prewhere-manually} + +该条款具有相同的含义 `WHERE` 条款 区别在于从表中读取数据。 当手动控制 `PREWHERE` 对于查询中的少数列使用的过滤条件,但这些过滤条件提供了强大的数据过滤。 这减少了要读取的数据量。 + +查询可以同时指定 `PREWHERE` 和 `WHERE`. 在这种情况下, `PREWHERE` 先于 `WHERE`. + +如果 `optimize_move_to_prewhere` 设置为0,启发式自动移动部分表达式 `WHERE` 到 `PREWHERE` 被禁用。 + +## 限制 {#limitations} + +`PREWHERE` 只有从表支持 `*MergeTree` 家人 diff --git a/docs/zh/sql-reference/statements/select/sample.md b/docs/zh/sql-reference/statements/select/sample.md deleted file mode 120000 index 9df6e25d0f3..00000000000 --- a/docs/zh/sql-reference/statements/select/sample.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/sample.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/sample.md b/docs/zh/sql-reference/statements/select/sample.md new file mode 100644 index 00000000000..9b760601959 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/sample.md @@ -0,0 +1,114 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: SAMPLE +--- + +# 示例子句 {#select-sample-clause} + +该 `SAMPLE` 子句允许近似 `SELECT` 查询处理。 + +启用数据采样时,不会对所有数据执行查询,而只对特定部分数据(样本)执行查询。 例如,如果您需要计算所有访问的统计信息,只需对所有访问的1/10分数执行查询,然后将结果乘以10即可。 + +近似查询处理在以下情况下可能很有用: + +- 当你有严格的时间requirements(如\<100ms),但你不能证明额外的硬件资源来满足他们的成本。 +- 当您的原始数据不准确时,所以近似不会明显降低质量。 +- 业务需求的目标是近似结果(为了成本效益,或者向高级用户推销确切结果)。 + +!!! note "注" + 您只能使用采样中的表 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) 家庭,并且只有在表创建过程中指定了采样表达式(请参阅 [MergeTree引擎](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). + +下面列出了数据采样的功能: + +- 数据采样是一种确定性机制。 同样的结果 `SELECT .. SAMPLE` 查询始终是相同的。 +- 对于不同的表,采样工作始终如一。 对于具有单个采样键的表,具有相同系数的采样总是选择相同的可能数据子集。 例如,用户Id的示例采用来自不同表的所有可能的用户Id的相同子集的行。 这意味着您可以在子查询中使用示例 [IN](../../../sql-reference/operators/in.md) 条款 此外,您可以使用 [JOIN](../../../sql-reference/statements/select/join.md) 条款 +- 采样允许从磁盘读取更少的数据。 请注意,您必须正确指定采样键。 有关详细信息,请参阅 [创建MergeTree表](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). + +为 `SAMPLE` 子句支持以下语法: + +| SAMPLE Clause Syntax | 产品描述 | +|----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `SAMPLE k` | 这里 `k` 是从0到1的数字。
查询执行于 `k` 数据的分数。 例如, `SAMPLE 0.1` 对10%的数据运行查询。 [碌莽禄more拢more](#select-sample-k) | +| `SAMPLE n` | 这里 `n` 是足够大的整数。
该查询是在至少一个样本上执行的 `n` 行(但不超过这个)。 例如, `SAMPLE 10000000` 在至少10,000,000行上运行查询。 [碌莽禄more拢more](#select-sample-n) | +| `SAMPLE k OFFSET m` | 这里 `k` 和 `m` 是从0到1的数字。
查询在以下示例上执行 `k` 数据的分数。 用于采样的数据由以下偏移 `m` 分数。 [碌莽禄more拢more](#select-sample-offset) | + +## SAMPLE K {#select-sample-k} + +这里 `k` 从0到1的数字(支持小数和小数表示法)。 例如, `SAMPLE 1/2` 或 `SAMPLE 0.5`. + +在一个 `SAMPLE k` 子句,样品是从 `k` 数据的分数。 示例如下所示: + +``` sql +SELECT + Title, + count() * 10 AS PageViews +FROM hits_distributed +SAMPLE 0.1 +WHERE + CounterID = 34 +GROUP BY Title +ORDER BY PageViews DESC LIMIT 1000 +``` + +在此示例中,对0.1(10%)数据的样本执行查询。 聚合函数的值不会自动修正,因此要获得近似结果,值 `count()` 手动乘以10。 + +## SAMPLE N {#select-sample-n} + +这里 `n` 是足够大的整数。 例如, `SAMPLE 10000000`. + +在这种情况下,查询在至少一个样本上执行 `n` 行(但不超过这个)。 例如, `SAMPLE 10000000` 在至少10,000,000行上运行查询。 + +由于数据读取的最小单位是一个颗粒(其大小由 `index_granularity` 设置),是有意义的设置一个样品,其大小远大于颗粒。 + +使用时 `SAMPLE n` 子句,你不知道处理了哪些数据的相对百分比。 所以你不知道聚合函数应该乘以的系数。 使用 `_sample_factor` 虚拟列得到近似结果。 + +该 `_sample_factor` 列包含动态计算的相对系数。 当您执行以下操作时,将自动创建此列 [创建](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) 具有指定采样键的表。 的使用示例 `_sample_factor` 列如下所示。 + +让我们考虑表 `visits`,其中包含有关网站访问的统计信息。 第一个示例演示如何计算页面浏览量: + +``` sql +SELECT sum(PageViews * _sample_factor) +FROM visits +SAMPLE 10000000 +``` + +下一个示例演示如何计算访问总数: + +``` sql +SELECT sum(_sample_factor) +FROM visits +SAMPLE 10000000 +``` + +下面的示例显示了如何计算平均会话持续时间。 请注意,您不需要使用相对系数来计算平均值。 + +``` sql +SELECT avg(Duration) +FROM visits +SAMPLE 10000000 +``` + +## SAMPLE K OFFSET M {#select-sample-offset} + +这里 `k` 和 `m` 是从0到1的数字。 示例如下所示。 + +**示例1** + +``` sql +SAMPLE 1/10 +``` + +在此示例中,示例是所有数据的十分之一: + +`[++------------]` + +**示例2** + +``` sql +SAMPLE 1/10 OFFSET 1/2 +``` + +这里,从数据的后半部分取出10%的样本。 + +`[------++------]` diff --git a/docs/zh/sql-reference/statements/select/union-all.md b/docs/zh/sql-reference/statements/select/union-all.md deleted file mode 120000 index 837caae2698..00000000000 --- a/docs/zh/sql-reference/statements/select/union-all.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/union-all.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/union-all.md b/docs/zh/sql-reference/statements/select/union-all.md new file mode 100644 index 00000000000..a04996bc0a6 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/union-all.md @@ -0,0 +1,36 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: UNION ALL +--- + +# UNION ALL条款 {#union-all-clause} + +您可以使用 `UNION ALL` 结合任意数量的 `SELECT` 通过扩展其结果进行查询。 示例: + +``` sql +SELECT CounterID, 1 AS table, toInt64(count()) AS c + FROM test.hits + GROUP BY CounterID + +UNION ALL + +SELECT CounterID, 2 AS table, sum(Sign) AS c + FROM test.visits + GROUP BY CounterID + HAVING c > 0 +``` + +结果列通过它们的索引进行匹配(在内部的顺序 `SELECT`). 如果列名称不匹配,则从第一个查询中获取最终结果的名称。 + +对联合执行类型转换。 例如,如果合并的两个查询具有相同的字段与非-`Nullable` 和 `Nullable` 从兼容类型的类型,由此产生的 `UNION ALL` 有一个 `Nullable` 类型字段。 + +属于以下部分的查询 `UNION ALL` 不能用圆括号括起来。 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 和 [LIMIT](../../../sql-reference/statements/select/limit.md) 应用于单独的查询,而不是最终结果。 如果您需要将转换应用于最终结果,则可以将所有查询 `UNION ALL` 在子查询中 [FROM](../../../sql-reference/statements/select/from.md) 条款 + +## 限制 {#limitations} + +只有 `UNION ALL` 支持。 定期的 `UNION` (`UNION DISTINCT`)不支持。 如果你需要 `UNION DISTINCT`,你可以写 `SELECT DISTINCT` 从包含 `UNION ALL`. + +## 实施细节 {#implementation-details} + +属于以下部分的查询 `UNION ALL` 可以同时运行,并且它们的结果可以混合在一起。 diff --git a/docs/zh/sql-reference/statements/select/where.md b/docs/zh/sql-reference/statements/select/where.md deleted file mode 120000 index 8ba28926879..00000000000 --- a/docs/zh/sql-reference/statements/select/where.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/where.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/where.md b/docs/zh/sql-reference/statements/select/where.md new file mode 100644 index 00000000000..eb1da0d1027 --- /dev/null +++ b/docs/zh/sql-reference/statements/select/where.md @@ -0,0 +1,16 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: WHERE +--- + +# WHERE条款 {#select-where} + +`WHERE` 子句允许过滤来自 [FROM](../../../sql-reference/statements/select/from.md) 的条款 `SELECT`. + +如果有一个 `WHERE` 子句,它必须包含一个表达式与 `UInt8` 类型。 这通常是一个带有比较和逻辑运算符的表达式。 此表达式计算结果为0的行将从进一步的转换或结果中解释出来。 + +`WHERE` 如果基础表引擎支持,则根据使用索引和分区修剪的能力评估expression。 + +!!! note "注" + 有一个叫做过滤优化 [去哪里](../../../sql-reference/statements/select/prewhere.md). diff --git a/docs/zh/sql-reference/statements/select/with.md b/docs/zh/sql-reference/statements/select/with.md deleted file mode 120000 index 8b7ea4db44c..00000000000 --- a/docs/zh/sql-reference/statements/select/with.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/with.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/select/with.md b/docs/zh/sql-reference/statements/select/with.md new file mode 100644 index 00000000000..224cd4790bb --- /dev/null +++ b/docs/zh/sql-reference/statements/select/with.md @@ -0,0 +1,81 @@ +--- +machine_translated: true +machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 +toc_title: WITH +--- + +# WITH条款 {#with-clause} + +本节提供对公共表表达式的支持 ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)),所以结果 `WITH` 子句可以在其余部分中使用 `SELECT` 查询。 + +## 限制 {#limitations} + +1. 不支持递归查询。 +2. 当在section中使用子查询时,它的结果应该是只有一行的标量。 +3. Expression的结果在子查询中不可用。 + +## 例 {#examples} + +**示例1:** 使用常量表达式作为 “variable” + +``` sql +WITH '2019-08-01 15:23:00' as ts_upper_bound +SELECT * +FROM hits +WHERE + EventDate = toDate(ts_upper_bound) AND + EventTime <= ts_upper_bound +``` + +**示例2:** 从SELECT子句列表中逐出sum(bytes)表达式结果 + +``` sql +WITH sum(bytes) as s +SELECT + formatReadableSize(s), + table +FROM system.parts +GROUP BY table +ORDER BY s +``` + +**例3:** 使用标量子查询的结果 + +``` sql +/* this example would return TOP 10 of most huge tables */ +WITH + ( + SELECT sum(bytes) + FROM system.parts + WHERE active + ) AS total_disk_usage +SELECT + (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage, + table +FROM system.parts +GROUP BY table +ORDER BY table_disk_usage DESC +LIMIT 10 +``` + +**例4:** 在子查询中重用表达式 + +作为子查询中表达式使用的当前限制的解决方法,您可以复制它。 + +``` sql +WITH ['hello'] AS hello +SELECT + hello, + * +FROM +( + WITH ['hello'] AS hello + SELECT hello +) +``` + +``` text +┌─hello─────┬─hello─────┐ +│ ['hello'] │ ['hello'] │ +└───────────┴───────────┘ +``` diff --git a/docs/zh/sql-reference/statements/show.md b/docs/zh/sql-reference/statements/show.md index 95404f3d416..d9f09855916 100644 --- a/docs/zh/sql-reference/statements/show.md +++ b/docs/zh/sql-reference/statements/show.md @@ -1,19 +1,17 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 38 toc_title: SHOW --- -# 显示查询 {#show-queries} +# SHOW 查询 {#show-queries} ## SHOW CREATE TABLE {#show-create-table} ``` sql SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY] [db.]table [INTO OUTFILE filename] [FORMAT format] ``` +返回单个字符串类型的 ‘statement’列,其中只包含了一个值 - 用来创建指定对象的 `CREATE` 语句。 -返回单 `String`-类型 ‘statement’ column, which contains a single value – the `CREATE` 用于创建指定对象的查询。 ## SHOW DATABASES {#show-databases} @@ -21,8 +19,7 @@ SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY] [db.]table [INTO OUTFILE filename] [F SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] ``` -打印所有数据库的列表。 -这个查询是相同的 `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`. +打印所有的数据库列表,该查询等同于 `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]` ## SHOW PROCESSLIST {#show-processlist} @@ -30,11 +27,13 @@ SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] ``` -输出的内容 [系统。流程](../../operations/system-tables.md#system_tables-processes) 表,包含目前正在处理的查询列表,除了 `SHOW PROCESSLIST` 查询。 +输出 [system.processes](../../operations/system-tables/processes.md#system_tables-processes)表的内容,包含有当前正在处理的请求列表,除了 `SHOW PROCESSLIST`查询。 -该 `SELECT * FROM system.processes` 查询返回有关所有当前查询的数据。 -提示(在控制台中执行): + `SELECT * FROM system.processes` 查询返回和当前请求相关的所有数据 + + +提示 (在控制台执行): ``` bash $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" @@ -42,15 +41,15 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" ## SHOW TABLES {#show-tables} -显示表的列表。 +显示表的清单 ``` sql SHOW [TEMPORARY] TABLES [{FROM | IN} ] [LIKE '' | WHERE expr] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -如果 `FROM` 如果未指定子句,则查询返回当前数据库中的表列表。 +如果未使用 `FROM` 字句,该查询返回当前数据库的所有表清单 -你可以得到相同的结果 `SHOW TABLES` 通过以下方式进行查询: +可以用下面的方式获得和 `SHOW TABLES`一样的结果: ``` sql SELECT name FROM system.tables WHERE database = [AND name LIKE ] [LIMIT ] [INTO OUTFILE ] [FORMAT ] @@ -58,7 +57,7 @@ SELECT name FROM system.tables WHERE database = [AND name LIKE ] [ **示例** -下面的查询从表的列表中选择前两行 `system` 数据库,其名称包含 `co`. +下列查询获取最前面的2个位于`system`库中且表名包含 `co`的表。 ``` sql SHOW TABLES FROM system LIKE '%co%' LIMIT 2 @@ -73,15 +72,15 @@ SHOW TABLES FROM system LIKE '%co%' LIMIT 2 ## SHOW DICTIONARIES {#show-dictionaries} -显示列表 [外部字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). +以列表形式显示 [外部字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md). ``` sql SHOW DICTIONARIES [FROM ] [LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -如果 `FROM` 如果未指定子句,则查询从当前数据库返回字典列表。 +如果 `FROM`字句没有指定,返回当前数据库的字典列表 -你可以得到相同的结果 `SHOW DICTIONARIES` 通过以下方式进行查询: +可以通过下面的查询获取和 `SHOW DICTIONARIES`相同的结果: ``` sql SELECT name FROM system.dictionaries WHERE database = [AND name LIKE ] [LIMIT ] [INTO OUTFILE ] [FORMAT ] @@ -89,7 +88,7 @@ SELECT name FROM system.dictionaries WHERE database = [AND name LIKE +[原始文档](https://clickhouse.tech/docs/en/query_language/show/) diff --git a/docs/zh/sql-reference/syntax.md b/docs/zh/sql-reference/syntax.md index b0aa9e7364f..a53de43d8f4 100644 --- a/docs/zh/sql-reference/syntax.md +++ b/docs/zh/sql-reference/syntax.md @@ -1,156 +1,162 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 31 -toc_title: "\u8BED\u6CD5" +toc_title: SQL语法 --- -# 语法 {#syntax} - -系统中有两种类型的解析器:完整SQL解析器(递归下降解析器)和数据格式解析器(快速流解析器)。 -在所有情况下,除了 `INSERT` 查询时,只使用完整的SQL解析器。 -该 `INSERT` 查询使用两个解析器: +# SQL语法 {#syntax} +CH有2类解析器:完整SQL解析器(递归式解析器),以及数据格式解析器(快速流式解析器) +除了 `INSERT` 查询,其它情况下仅使用完整SQL解析器。 + `INSERT`查询会同时使用2种解析器: ``` sql INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ``` -该 `INSERT INTO t VALUES` 片段由完整的解析器解析,并且数据 `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` 由快速流解析器解析。 您也可以通过使用 [input\_format\_values\_interpret\_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) 设置。 当 `input_format_values_interpret_expressions = 1`,ClickHouse首先尝试使用fast stream解析器解析值。 如果失败,ClickHouse将尝试对数据使用完整的解析器,将其视为SQL [表达式](#syntax-expressions). +含`INSERT INTO t VALUES` 的部分由完整SQL解析器处理,包含数据的部分 `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` 交给快速流式解析器解析。通过设置参数 [input\_format\_values\_interpret\_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions),你也可以对数据部分开启完整SQL解析器。当 `input_format_values_interpret_expressions = 1` 时,CH优先采用快速流式解析器来解析数据。如果失败,CH再尝试用完整SQL解析器来处理,就像处理SQL [expression](#syntax-expressions) 一样。 -数据可以有任何格式。 当接收到查询时,服务器计算不超过 [max\_query\_size](../operations/settings/settings.md#settings-max_query_size) RAM中请求的字节(默认为1MB),其余的是流解析。 -它允许避免与大的问题 `INSERT` 查询。 +数据可以采用任何格式。当CH接受到请求时,服务端先在内存中计算不超过 [max\_query\_size](../operations/settings/settings.md#settings-max_query_size) 字节的请求数据(默认1 mb),然后剩下部分交给快速流式解析器。 -使用时 `Values` 格式为 `INSERT` 查询,它可能看起来数据被解析相同的表达式 `SELECT` 查询,但事实并非如此。 该 `Values` 格式更为有限。 +这将避免在处理大型的 `INSERT`语句时出现问题。 -本文的其余部分将介绍完整的解析器。 有关格式解析器的详细信息,请参阅 [格式](../interfaces/formats.md) 科。 +当 `INSERT` 语句中使用 `Values` 形式时,看起来 数据部分的解析和解析`SELECT` 中的表达式相同,但并不是这样的。 `Values` 形式非常有限。 +该篇的剩余部分涵盖了完整SQL解析器。关于格式解析的更多信息,参见 [Formats](../interfaces/formats.md) 章节。 -## 空间 {#spaces} +## 空字符 {#spaces} -语法结构之间可能有任意数量的空格符号(包括查询的开始和结束)。 空格符号包括空格、制表符、换行符、CR和换页符。 +sql语句中(包含sql的起始和结束)可以有任意的空字符,这些空字符类型包括:空格字符,tab制表符,换行符,CR符,换页符等。 -## 评论 {#comments} +## 注释 {#comments} -ClickHouse支持SQL风格和C风格的注释。 -SQL风格的注释以下开头 `--` 并继续到线的末尾,一个空格后 `--` 可以省略。 -C型是从 `/*` 到 `*/`并且可以是多行,也不需要空格。 +CH支持SQL风格或C语言风格的注释: +- SQL风格的注释以 `--` 开始,直到行末,`--` 后紧跟的空格可以忽略 +- C语言风格的注释以 `/*` 开始,以 `*/` 结束,支持多行形式,同样可以省略 `/*` 后的空格 -## 关键词 {#syntax-keywords} +## 关键字 {#syntax-keywords} -当关键字对应于以下关键字时,不区分大小写: +以下场景的关键字是大小写不敏感的: +- 标准SQL。例如,`SELECT`, `select` 和 `SeLeCt` 都是允许的 +- 在某些流行的RDBMS中被实现的关键字,例如,`DateTime` 和 `datetime`是一样的 -- SQL标准。 例如, `SELECT`, `select` 和 `SeLeCt` 都是有效的。 -- 在一些流行的DBMS(MySQL或Postgres)中实现。 例如, `DateTime` 是一样的 `datetime`. -数据类型名称是否区分大小写可以在 `system.data_type_families` 桌子 +你可以在系统表 [system.data_type_families](../operations/system-tables/data_type_families.md#system_tables-data_type_families) 中检查某个数据类型的名称是否是大小写敏感型。 -与标准SQL相比,所有其他关键字(包括函数名称)都是 **区分大小写**. +和标准SQL相反,所有其它的关键字都是 **大小写敏感的**,包括函数名称。 +In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**. -不保留关键字;它们仅在相应的上下文中被视为保留关键字。 如果您使用 [标识符](#syntax-identifiers) 使用与关键字相同的名称,将它们括在双引号或反引号中。 例如,查询 `SELECT "FROM" FROM table_name` 是有效的,如果表 `table_name` 具有名称的列 `"FROM"`. +关键字不是保留的;它们仅在相应的上下文中才会被处理。如果你使用和关键字同名的 [变量名](#syntax-identifiers) ,需要使用双引号或转移符将它们包含起来。例如:如果表 `table_name` 包含列 `"FROM"`,那么 `SELECT "FROM" FROM table_name` 是合法的 -## 标识符 {#syntax-identifiers} +## 变量名 {#syntax-identifiers} -标识符是: +变量包括: +Identifiers are: -- 集群、数据库、表、分区和列名称。 -- 功能。 -- 数据类型。 -- [表达式别名](#syntax-expression_aliases). +- 集群,数据库,表,分区,列名称 +- 函数 +- 数据类型 +- 表达式别名 -标识符可以是引号或非引号。 后者是优选的。 +变量名可以使用反引号包含起来 -非引号标识符必须与正则表达式匹配 `^[a-zA-Z_][0-9a-zA-Z_]*$` 并且不能等于 [关键词](#syntax-keywords). 例: `x, _1, X_y__Z123_.` +没有使用反引号包含的变量名,必须匹配正则表达式 `^[a-zA-Z_][0-9a-zA-Z_]*$`,并且不能和 [关键字]相同 -如果要使用与关键字相同的标识符,或者要在标识符中使用其他符号,请使用双引号或反引号对其进行引用,例如, `"id"`, `` `id` ``. +如果想使用和关键字同名的变量名称,或者在变量名称中包含其它符号,你需要通过双引号或转义符号,例如: `"id"`, `` `id` `` -## 文字数 {#literals} +## 字符 {#literals} -有数字,字符串,复合和 `NULL` 文字。 +CH包含数字,字母,括号,NULL值等字符 ### 数字 {#numeric} -数值文字尝试进行分析: +数字类型字符会被做如下解析: +- 首先,当做64位的有符号整数,使用该函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) +- 如果失败,解析成64位无符号整数,同样使用函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) -- 首先,作为一个64位有符号的数字,使用 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) 功能。 -- 如果不成功,作为64位无符号数,使用 [strtoll](https://en.cppreference.com/w/cpp/string/byte/strtol) 功能。 -- 如果不成功,作为一个浮点数使用 [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) 功能。 -- 否则,将返回错误。 +- 如果还失败了,试图解析成浮点型数值,使用函数 [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) +Numeric literal tries to be parsed: -文本值具有该值适合的最小类型。 -例如,1被解析为 `UInt8`,但256被解析为 `UInt16`. 有关详细信息,请参阅 [数据类型](../sql-reference/data-types/index.md). +- 最后,以上情形都不符合时,返回异常 -例: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. -### 字符串 {#syntax-string-literal} +数字类型的值类型为能容纳该值的最小数据类型。 +例如:1 解析成 `UInt8`型,256 则解析成 `UInt16`。更多信息,参见 [数据类型](../sql-reference/data-types/index.md) -仅支持单引号中的字符串文字。 封闭的字符可以反斜杠转义。 以下转义序列具有相应的特殊值: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. 在所有其他情况下,转义序列的格式为 `\c`,哪里 `c` 是任何字符,被转换为 `c`. 这意味着你可以使用序列 `\'`和`\\`. 该值将具有 [字符串](../sql-reference/data-types/string.md) 类型。 +例如: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. -在字符串文字中,你至少需要转义 `'` 和 `\`. 单引号可以用单引号,文字转义 `'It\'s'` 和 `'It''s'` 是平等的。 +### 字母 {#syntax-string-literal} +CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转义。下列转义字符都有相应的实际值: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`。其它情况下,以 `\c`形式出现的转义字符,当`c`表示任意字符时,转义字符会转换成`c`。这意味着你可以使用 `\'`和`\\`。该值将拥有[String](../sql-reference/data-types/string.md)类型。 -### 化合物 {#compound} -数组使用方括号构造 `[1, 2, 3]`. Nuples用圆括号构造 `(1, 'Hello, world!', 2)`. -从技术上讲,这些不是文字,而是分别具有数组创建运算符和元组创建运算符的表达式。 -数组必须至少包含一个项目,元组必须至少包含两个项目。 -有一个单独的情况下,当元组出现在 `IN` a条款 `SELECT` 查询。 查询结果可以包含元组,但元组不能保存到数据库(除了具有以下内容的表 [记忆](../engines/table-engines/special/memory.md) 发动机)。 +在字符串中,你至少需要对 `'` 和 `\` 进行转义。单引号可以使用单引号转义,例如 `'It\'s'` 和 `'It''s'` 是相同的。 -### NULL {#null-literal} +### 括号 {#compound} +数组都是使用方括号进行构造 `[1, 2, 3]`,元组则使用圆括号 `(1, 'Hello, world!', 2)` -指示该值丢失。 +从技术上来讲,这些都不是字符串,而是包含创建数组和元组运算符的表达式。 -为了存储 `NULL` 在表字段中,它必须是 [可为空](../sql-reference/data-types/nullable.md) 类型。 +创建一个数组必须至少包含一个元素,创建一个元组至少包含2个元素 -根据数据格式(输入或输出), `NULL` 可能有不同的表示。 有关详细信息,请参阅以下文档 [数据格式](../interfaces/formats.md#formats). +当元组出现在 `SELECT` 查询的 `IN` 部分时,是一种例外情形。查询结果可以包含元组,但是元组类型不能保存到数据库中(除非表采用 [内存表](../engines/table-engines/special/memory.md)引擎) -处理有许多细微差别 `NULL`. 例如,如果比较操作的至少一个参数是 `NULL`,此操作的结果也是 `NULL`. 对于乘法,加法和其他操作也是如此。 有关详细信息,请阅读每个操作的文档。 -在查询中,您可以检查 `NULL` 使用 [IS NULL](operators/index.md#operator-is-null) 和 [IS NOT NULL](operators/index.md) 运算符及相关功能 `isNull` 和 `isNotNull`. +### NULL值 {#null-literal} -## 功能 {#functions} +代表不存在的值 -函数调用像一个标识符一样写入,并在圆括号中包含一个参数列表(可能是空的)。 与标准SQL相比,括号是必需的,即使是空的参数列表。 示例: `now()`. -有常规函数和聚合函数(请参阅部分 “Aggregate functions”). 某些聚合函数可以包含括号中的两个参数列表。 示例: `quantile (0.9) (x)`. 这些聚合函数被调用 “parametric” 函数,并在第一个列表中的参数被调用 “parameters”. 不带参数的聚合函数的语法与常规函数的语法相同。 +为了能在表字段中存储NULL值,该字段必须声明为 [空值](../sql-reference/data-types/nullable.md) 类型 +根据数据的格式(输入或输出),NULL值有不同的表现形式。更多信息参见文档 [数据格式](../interfaces/formats.md#formats) -## 运营商 {#operators} +在处理 `NULL`时存在很多细微差别。例如,比较运算的至少一个参数为 `NULL` ,该结果也是 `NULL` 。与之类似的还有乘法运算, 加法运算,以及其它运算。更多信息,请参阅每种运算的文档部分。 -在查询解析过程中,运算符会转换为相应的函数,同时考虑它们的优先级和关联性。 -例如,表达式 `1 + 2 * 3 + 4` 转化为 `plus(plus(1, multiply(2, 3)), 4)`. +在语句中,可以通过 [是否为NULL](operators/index.md#operator-is-null) 以及 [是否不为NULL](operators/index.md) 运算符,以及 `isNull` 、 `isNotNull` 函数来检查 `NULL` 值 -## 数据类型和数据库表引擎 {#data_types-and-database-table-engines} +## 函数 {#functions} +函数调用的写法,类似于变量并带有被圆括号包含的参数列表(可能为空)。与标准SQL不同,圆括号是必须的,不管参数列表是否为空。例如: `now()`。 -数据类型和表引擎 `CREATE` 查询的编写方式与标识符或函数相同。 换句话说,它们可能包含也可能不包含括号中的参数列表。 有关详细信息,请参阅部分 “Data types,” “Table engines,” 和 “CREATE”. +函数分为常规函数和聚合函数(参见“Aggregate functions”一章)。有些聚合函数包含2个参数列表,第一个参数列表中的参数被称为“parameters”。不包含“parameters”的聚合函数语法和常规函数是一样的。 + + +## 运算符 {#operators} + +在查询解析阶段,运算符会被转换成对应的函数,使用时请注意它们的优先级。例如: +表达式 `1 + 2 * 3 + 4` 会被解析成 `plus(plus(1, multiply(2, 3)), 4)`. + + +## 数据类型及数据库/表引擎 {#data_types-and-database-table-engines} + +`CREATE` 语句中的数据类型和表引擎写法与变量或函数类似。 +换句话说,它们可以用括号包含参数列表。更多信息,参见“数据类型,” “数据表引擎” 和 “CREATE语句”等章节 ## 表达式别名 {#syntax-expression_aliases} -别名是查询中表达式的用户定义名称。 +别名是用户对表达式的自定义名称 ``` sql expr AS alias ``` -- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` 子句不使用 `AS` 关键字。 +- `AS` — 用于定义别名的关键字。可以对表或select语句中的列定义别名(`AS` 可以省略) + 例如, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. - For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. + 在 [CAST函数](sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) 中,`AS`有其它含义。请参见该函数的说明部分。 - In the [CAST](sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function. -- `expr` — Any expression supported by ClickHouse. +- `expr` — 任意CH支持的表达式. - For example, `SELECT column_name * 2 AS double FROM some_table`. + 例如, `SELECT column_name * 2 AS double FROM some_table`. -- `alias` — Name for `expr`. 别名应符合 [标识符](#syntax-identifiers) 语法 +- `alias` — `expr` 的名称。别名必须符合 [变量名]](#syntax-identifiers) 语法. - For example, `SELECT "table t".column_name FROM table_name AS "table t"`. + 例如, `SELECT "table t".column_name FROM table_name AS "table t"`. -### 使用注意事项 {#notes-on-usage} +### 用法注意 {#notes-on-usage} -别名对于查询或子查询是全局的,您可以在查询的任何部分中为任何表达式定义别名。 例如, `SELECT (1 AS n) + 2, n`. +别名在当前查询或子查询中是全局可见的,你可以在查询语句的任何位置对表达式定义别名 -别名在子查询和子查询之间不可见。 例如,在执行查询时 `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ClickHouse生成异常 `Unknown identifier: num`. +别名在当前查询的子查询及不同子查询中是不可见的。例如,执行如下查询SQL: `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ,CH会提示异常 `Unknown identifier: num`. -如果为结果列定义了别名 `SELECT` 子查询的子句,这些列在外部查询中可见。 例如, `SELECT n + m FROM (SELECT 1 AS n, 2 AS m)`. - -小心使用与列或表名相同的别名。 让我们考虑以下示例: +如果给select子查询语句的结果列定义其别名,那么在外层可以使用该别名。例如, `SELECT n + m FROM (SELECT 1 AS n, 2 AS m)`. +注意列的别名和表的别名相同时的情形,考虑如下示例: ``` sql CREATE TABLE t ( @@ -172,16 +178,18 @@ Received exception from server (version 18.14.17): Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. ``` -在这个例子中,我们声明表 `t` 带柱 `b`. 然后,在选择数据时,我们定义了 `sum(b) AS b` 别名 由于别名是全局的,ClickHouse替换了文字 `b` 在表达式中 `argMax(a, b)` 用表达式 `sum(b)`. 这种替换导致异常。 +在这个示例中,先声明了表 `t` 以及列 `b`。然后,在查询数据时,又定义了别名 `sum(b) AS b`。由于别名是全局的,CH使用表达式 `sum(b)` 来替换表达式 `argMax(a, b)` 中的变量 `b`。这种替换导致出现异常。 ## 星号 {#asterisk} -在一个 `SELECT` 查询中,星号可以替换表达式。 有关详细信息,请参阅部分 “SELECT”. +select查询中,星号可以代替表达式使用。详情请参见“select”部分 + ## 表达式 {#syntax-expressions} -表达式是函数、标识符、文字、运算符的应用程序、括号中的表达式、子查询或星号。 它还可以包含别名。 -表达式列表是一个或多个用逗号分隔的表达式。 -函数和运算符,反过来,可以有表达式作为参数。 -[原始文章](https://clickhouse.tech/docs/en/sql_reference/syntax/) +An expression is a function, identifier, literal, application of an operator, expression in brackets, subquery, or asterisk. It can also contain an alias. +A list of expressions is one or more expressions separated by commas. +Functions and operators, in turn, can have expressions as arguments. + +[原始文档](https://clickhouse.tech/docs/en/sql_reference/syntax/) diff --git a/docs/zh/whats-new/changelog/2017.md b/docs/zh/whats-new/changelog/2017.md index de62730b093..35d839c50c9 100644 --- a/docs/zh/whats-new/changelog/2017.md +++ b/docs/zh/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ toc_title: '2017' #### 新功能: {#new-features} - MergeTree表引擎系列的自定义分区键。 -- [卡夫卡](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) 表引擎。 +- [卡夫卡](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) 表引擎。 - 增加了对加载的支持 [CatBoost](https://catboost.yandex/) 模型并将其应用到ClickHouse中存储的数据。 - 增加了对UTC非整数偏移的时区的支持。 - 增加了对具有时间间隔的算术运算的支持。 diff --git a/docs/zh/whats-new/roadmap.md b/docs/zh/whats-new/roadmap.md index 49532c046f5..377746efcb7 100644 --- a/docs/zh/whats-new/roadmap.md +++ b/docs/zh/whats-new/roadmap.md @@ -1,9 +1,17 @@ -# 规划 {#gui-hua} +--- +toc_priority: 74 +toc_title: 路线图 +--- -## Q1 2020 {#q1-2020} +# 路线图 {#roadmap} -- 更精确的用户资源池,可以在用户之间合理分配集群资源 -- 细粒度的授权管理 -- 与外部认证服务集成 +## Q2 2020 {#q2-2020} + +- 和外部认证服务集成 + +## Q3 2020 {#q3-2020} + +- 资源池,为用户提供更精准的集群资源分配 + +{## [原始文档](https://clickhouse.tech/docs/en/roadmap/) ##} -[来源文章](https://clickhouse.tech/docs/en/roadmap/) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 1b2867940ea..bb814f474e3 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -469,7 +470,7 @@ private: const auto & info = infos[i]; json_out << double_quote << connections[i]->getDescription() << ": {\n"; - json_out << double_quote << "statistics: {\n"; + json_out << double_quote << "statistics" << ": {\n"; print_key_value("QPS", info->queries / info->work_time); print_key_value("RPS", info->read_rows / info->work_time); @@ -479,7 +480,7 @@ private: print_key_value("num_queries", info->queries.load(), false); json_out << "},\n"; - json_out << double_quote << "query_time_percentiles: {\n"; + json_out << double_quote << "query_time_percentiles" << ": {\n"; for (int percent = 0; percent <= 90; percent += 10) print_percentile(*info, percent); @@ -539,7 +540,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("password", value()->default_value(""), "") ("database", value()->default_value("default"), "") ("stacktrace", "print stack traces of exceptions") - ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") + ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("query_id", value()->default_value(""), "") ; @@ -550,6 +551,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); boost::program_options::notify(options); + clearPasswordFromCommandLine(argc, argv); + if (options.count("help")) { std::cout << "Usage: " << argv[0] << " [options] < queries.txt\n"; diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 917acdc2a83..05fc1ba9141 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -122,7 +123,7 @@ private: }; bool is_interactive = true; /// Use either interactive line editing interface or batch mode. bool need_render_progress = true; /// Render query execution progress. - bool send_logs = false; /// send_logs_level passed, do not use previous cursor position, to avoid overlaps with logs + bool has_received_logs = false; /// We have received some logs, do not use previous cursor position, to avoid overlaps with logs bool echo_queries = false; /// Print queries before execution in batch mode. bool ignore_error = false; /// In case of errors, don't print error message, continue to next query. Only applicable for non-interactive mode. bool print_time_to_stderr = false; /// Output execution time to stderr in batch mode. @@ -397,6 +398,7 @@ private: { TokenType::GreaterOrEquals, Replxx::Color::INTENSE }, { TokenType::Concatenation, Replxx::Color::INTENSE }, { TokenType::At, Replxx::Color::INTENSE }, + { TokenType::DoubleAt, Replxx::Color::MAGENTA }, { TokenType::EndOfStream, Replxx::Color::DEFAULT }, @@ -906,8 +908,6 @@ private: connection->forceConnected(connection_parameters.timeouts); - send_logs = context.getSettingsRef().send_logs_level != LogsLevel::none; - ASTPtr input_function; if (insert && insert->select) insert->tryFindInputFunction(input_function); @@ -985,7 +985,10 @@ private: /// Process the query that doesn't require transferring data blocks to the server. void processOrdinaryQuery() { - /// We will always rewrite query (even if there are no query_parameters) because it will help to find errors in query formatter. + /// Rewrite query only when we have query parameters. + /// Note that if query is rewritten, comments in query are lost. + /// But the user often wants to see comments in server logs, query log, processlist, etc. + if (!query_parameters.empty()) { /// Replace ASTQueryParameter with ASTLiteral for prepared statements. ReplaceQueryParameterVisitor visitor(query_parameters); @@ -1513,6 +1516,7 @@ private: void onLogData(Block & block) { + has_received_logs = true; initLogsOutputStream(); logs_out_stream->write(block); logs_out_stream->flush(); @@ -1548,7 +1552,7 @@ private: void clearProgress() { written_progress_chars = 0; - if (!send_logs) + if (!has_received_logs) std::cerr << "\r" CLEAR_TO_END_OF_LINE; } @@ -1576,7 +1580,7 @@ private: const char * indicator = indicators[increment % 8]; - if (!send_logs && written_progress_chars) + if (!has_received_logs && written_progress_chars) message << '\r'; size_t prefix_size = message.count(); @@ -1630,7 +1634,7 @@ private: message << CLEAR_TO_END_OF_LINE; - if (send_logs) + if (has_received_logs) message << '\n'; ++increment; @@ -1920,7 +1924,11 @@ public: std::string text = e.displayText(); std::cerr << "Code: " << e.code() << ". " << text << std::endl; std::cerr << "Table №" << i << std::endl << std::endl; - exit(e.code()); + /// Avoid the case when error exit code can possibly overflow to normal (zero). + auto exit_code = e.code() % 256; + if (exit_code == 0) + exit_code = 255; + exit(exit_code); } } @@ -2002,6 +2010,7 @@ public: argsToConfig(common_arguments, config(), 100); + clearPasswordFromCommandLine(argc, argv); } }; diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 5254d2a97ac..7fa0f663295 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -25,7 +25,7 @@ void ClusterCopier::init() task_description_watch_callback = [this] (const Coordination::WatchResponse & response) { - if (response.error != Coordination::ZOK) + if (response.error != Coordination::Error::ZOK) return; UInt64 version = ++task_description_version; LOG_DEBUG(log, "Task description should be updated, local version {}", version); @@ -206,11 +206,11 @@ void ClusterCopier::uploadTaskDescription(const std::string & task_path, const s zookeeper->createAncestors(local_task_description_path); auto code = zookeeper->tryCreate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); - if (code && force) + if (code != Coordination::Error::ZOK && force) zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); LOG_DEBUG(log, "Task description {} uploaded to {} with result {} ({})", - ((code && !force) ? "not " : ""), local_task_description_path, code, zookeeper->error2string(code)); + ((code != Coordination::Error::ZOK && !force) ? "not " : ""), local_task_description_path, code, Coordination::errorMessage(code)); } void ClusterCopier::reloadTaskDescription() @@ -220,10 +220,10 @@ void ClusterCopier::reloadTaskDescription() String task_config_str; Coordination::Stat stat{}; - int code; + Coordination::Error code; zookeeper->tryGetWatch(task_description_path, task_config_str, &stat, task_description_watch_callback, &code); - if (code) + if (code != Coordination::Error::ZOK) throw Exception("Can't get description node " + task_description_path, ErrorCodes::BAD_ARGUMENTS); LOG_DEBUG(log, "Loading description, zxid={}", task_description_current_stat.czxid); @@ -376,10 +376,10 @@ zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNee Coordination::Responses responses; auto code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::ZOK || code == Coordination::ZNODEEXISTS) + if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS) return std::make_shared(current_worker_path, *zookeeper, false, false, description); - if (code == Coordination::ZBADVERSION) + if (code == Coordination::Error::ZBADVERSION) { ++num_bad_version_errors; @@ -545,7 +545,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNODEEXISTS) + if (e.code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "Someone is already moving pieces {}", current_partition_attach_is_active); return TaskStatus::Active; @@ -745,7 +745,7 @@ bool ClusterCopier::tryDropPartitionPiece( } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNODEEXISTS) + if (e.code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "Partition {} piece {} is cleaning now by somebody, sleep", task_partition.name, toString(current_piece_number)); std::this_thread::sleep_for(default_sleep_time); @@ -778,7 +778,7 @@ bool ClusterCopier::tryDropPartitionPiece( } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNODEEXISTS) + if (e.code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "Partition {} is being filled now by somebody, sleep", task_partition.name); return false; @@ -795,7 +795,7 @@ bool ClusterCopier::tryDropPartitionPiece( /// Remove all status nodes { Strings children; - if (zookeeper->tryGetChildren(current_shards_path, children) == Coordination::ZOK) + if (zookeeper->tryGetChildren(current_shards_path, children) == Coordination::Error::ZOK) for (const auto & child : children) { zookeeper->removeRecursive(current_shards_path + "/" + child); @@ -845,7 +845,7 @@ bool ClusterCopier::tryDropPartitionPiece( } LOG_INFO(log, "Partition {} piece {} was dropped on cluster {}", task_partition.name, toString(current_piece_number), task_table.cluster_push_name); - if (zookeeper->tryCreate(current_shards_path, host_id, zkutil::CreateMode::Persistent) == Coordination::ZNODEEXISTS) + if (zookeeper->tryCreate(current_shards_path, host_id, zkutil::CreateMode::Persistent) == Coordination::Error::ZNODEEXISTS) zookeeper->set(current_shards_path, host_id); } @@ -1233,7 +1233,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNODEEXISTS) + if (e.code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "Someone is already processing {}", current_task_piece_is_active_path); return TaskStatus::Active; @@ -1271,9 +1271,9 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( { String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); auto res = zookeeper->tryCreate(current_task_piece_status_path, state_finished, zkutil::CreateMode::Persistent); - if (res == Coordination::ZNODEEXISTS) + if (res == Coordination::Error::ZNODEEXISTS) LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. But other replicas have already marked it as done.", task_partition.name, current_piece_number); - if (res == Coordination::ZOK) + if (res == Coordination::Error::ZOK) LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.", task_partition.name, current_piece_number); return TaskStatus::Finished; } @@ -1429,7 +1429,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( { Coordination::ExistsResponse status = future_is_dirty_checker.get(); - if (status.error != Coordination::ZNONODE) + if (status.error != Coordination::Error::ZNONODE) { LogicalClock dirt_discovery_epoch (status.stat.mzxid); if (dirt_discovery_epoch == clean_state_clock.discovery_zxid) diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 0613381a763..518395e3b7d 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -1,5 +1,6 @@ #include "Internals.h" #include +#include namespace DB { @@ -184,9 +185,9 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) const auto sorting_key_ast = extractOrderBy(storage_ast); const auto primary_key_ast = extractPrimaryKey(storage_ast); - const auto sorting_key_expr_list = MergeTreeData::extractKeyExpressionList(sorting_key_ast); + const auto sorting_key_expr_list = extractKeyExpressionList(sorting_key_ast); const auto primary_key_expr_list = primary_key_ast - ? MergeTreeData::extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone(); + ? extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone(); /// Maybe we have to handle VersionedCollapsing engine separately. But in our case in looks pointless. diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h index edd0d9e43d2..66036ae2f27 100644 --- a/programs/copier/ZooKeeperStaff.h +++ b/programs/copier/ZooKeeperStaff.h @@ -178,7 +178,7 @@ public: [stale = stale] (const Coordination::WatchResponse & rsp) { auto logger = &Poco::Logger::get("ClusterCopier"); - if (rsp.error == Coordination::ZOK) + if (rsp.error == Coordination::Error::ZOK) { switch (rsp.type) { diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index ab8d94f2a0c..628f9ee018a 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -14,6 +14,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES set (CLICKHOUSE_ODBC_BRIDGE_LINK PRIVATE clickhouse_parsers + clickhouse_aggregate_functions daemon dbms Poco::Data diff --git a/programs/odbc-bridge/validateODBCConnectionString.cpp b/programs/odbc-bridge/validateODBCConnectionString.cpp index e564cebdeee..a54021431ef 100644 --- a/programs/odbc-bridge/validateODBCConnectionString.cpp +++ b/programs/odbc-bridge/validateODBCConnectionString.cpp @@ -65,7 +65,11 @@ std::string validateODBCConnectionString(const std::string & connection_string) else throw Exception("ODBC connection string parameter name doesn't begin with valid identifier character", ErrorCodes::BAD_ODBC_CONNECTION_STRING); - while (pos < end && isWordCharASCII(*pos)) + /// Additionally allow dash and dot symbols in names. + /// Strictly speaking, the name with that characters should be escaped. + /// But some ODBC drivers (e.g.) Postgres don't like escaping. + + while (pos < end && (isWordCharASCII(*pos) || *pos == '-' || *pos == '.')) ++pos; return std::string(begin, pos); @@ -213,7 +217,11 @@ std::string validateODBCConnectionString(const std::string & connection_string) auto write_value = [&](const std::string & value) { - if (std::all_of(value.begin(), value.end(), isWordCharASCII)) + /// Additionally allow dash and dot symbols - for hostnames. + /// Strictly speaking, hostname with that characters should be escaped. + /// But some ODBC drivers (e.g.) Postgres don't like escaping. + + if (std::all_of(value.begin(), value.end(), [](char c) { return isWordCharASCII(c) || c == '.' || c == '-'; })) write_plain_value(value); else write_escaped_value(value); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 9734bafe30e..9a5dc55ded2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -61,6 +61,8 @@ #include #include #include +#include + #if !defined(ARCADIA_BUILD) # include "config_core.h" @@ -869,7 +871,7 @@ int Server::main(const std::vector & /*args*/) if (listen_try) { - LOG_ERROR(log, "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " + LOG_WARNING(log, "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " "specify not disabled IPv4 or IPv6 address to listen in element of configuration " "file. Example for disabled IPv6: 0.0.0.0 ." " Example for disabled IPv4: ::", @@ -998,6 +1000,21 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for MySQL compatibility protocol: {}", address.toString()); }); + create_server("postgresql_port", [&](UInt16 port) + { + Poco::Net::ServerSocket socket; + auto address = socket_bind_listen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + servers.emplace_back(std::make_unique( + new PostgreSQLHandlerFactory(*this), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + + LOG_INFO(log, "Listening for PostgreSQL compatibility protocol: " + address.toString()); + }); + /// Prometheus (if defined and not setup yet with http_port) create_server("prometheus.port", [&](UInt16 port) { @@ -1013,7 +1030,8 @@ int Server::main(const std::vector & /*args*/) } if (servers.empty()) - throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", + ErrorCodes::NO_ELEMENTS_IN_CONFIG); global_context->enableNamedSessions(); diff --git a/programs/server/users.d/access_management.xml b/programs/server/users.d/access_management.xml new file mode 100644 index 00000000000..7e799cb7b10 --- /dev/null +++ b/programs/server/users.d/access_management.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 1195bcf842c..8b249813f7c 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -64,19 +64,23 @@ namespace bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override { - if (ParserCreateUserQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserCreateRoleQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserCreateRowPolicyQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserCreateQuotaQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserCreateSettingsProfileQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserGrantQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - return false; + ParserCreateUserQuery create_user_p; + ParserCreateRoleQuery create_role_p; + ParserCreateRowPolicyQuery create_policy_p; + ParserCreateQuotaQuery create_quota_p; + ParserCreateSettingsProfileQuery create_profile_p; + ParserGrantQuery grant_p; + + create_user_p.useAttachMode(); + create_role_p.useAttachMode(); + create_policy_p.useAttachMode(); + create_quota_p.useAttachMode(); + create_profile_p.useAttachMode(); + grant_p.useAttachMode(); + + return create_user_p.parse(pos, node, expected) || create_role_p.parse(pos, node, expected) + || create_policy_p.parse(pos, node, expected) || create_quota_p.parse(pos, node, expected) + || create_profile_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected); } }; @@ -261,7 +265,9 @@ namespace /// Calculates the path for storing a map of name of access entity to UUID for access entities of some type. std::filesystem::path getListFilePath(const String & directory_path, EntityType type) { - std::string_view file_name = EntityTypeInfo::get(type).list_filename; + String file_name = EntityTypeInfo::get(type).plural_raw_name; + boost::to_lower(file_name); + file_name += ".list"; return std::filesystem::path(directory_path).append(file_name); } diff --git a/src/Access/IAccessEntity.h b/src/Access/IAccessEntity.h index 39a5cefa7d7..68e14c99982 100644 --- a/src/Access/IAccessEntity.h +++ b/src/Access/IAccessEntity.h @@ -45,11 +45,13 @@ struct IAccessEntity struct TypeInfo { const char * const raw_name; + const char * const plural_raw_name; const String name; /// Uppercased with spaces instead of underscores, e.g. "SETTINGS PROFILE". const String alias; /// Alias of the keyword or empty string, e.g. "PROFILE". + const String plural_name; /// Uppercased with spaces plural name, e.g. "SETTINGS PROFILES". + const String plural_alias; /// Uppercased with spaces plural name alias, e.g. "PROFILES". const String name_for_output_with_entity_name; /// Lowercased with spaces instead of underscores, e.g. "settings profile". const char unique_char; /// Unique character for this type. E.g. 'P' for SETTINGS_PROFILE. - const String list_filename; /// Name of the file containing list of objects of this type, including the file extension ".list". const int not_found_error_code; static const TypeInfo & get(Type type_); @@ -69,6 +71,18 @@ struct IAccessEntity friend bool operator ==(const IAccessEntity & lhs, const IAccessEntity & rhs) { return lhs.equal(rhs); } friend bool operator !=(const IAccessEntity & lhs, const IAccessEntity & rhs) { return !(lhs == rhs); } + struct LessByName + { + bool operator()(const IAccessEntity & lhs, const IAccessEntity & rhs) const { return (lhs.getName() < rhs.getName()); } + bool operator()(const std::shared_ptr & lhs, const std::shared_ptr & rhs) const { return operator()(*lhs, *rhs); } + }; + + struct LessByTypeAndName + { + bool operator()(const IAccessEntity & lhs, const IAccessEntity & rhs) const { return (lhs.getType() < rhs.getType()) || ((lhs.getType() == rhs.getType()) && (lhs.getName() < rhs.getName())); } + bool operator()(const std::shared_ptr & lhs, const std::shared_ptr & rhs) const { return operator()(*lhs, *rhs); } + }; + protected: String name; @@ -87,44 +101,49 @@ using AccessEntityPtr = std::shared_ptr; inline const IAccessEntity::TypeInfo & IAccessEntity::TypeInfo::get(Type type_) { - static constexpr auto make_info = [](const char * raw_name_, char unique_char_, const char * list_filename_, int not_found_error_code_) + static constexpr auto make_info = [](const char * raw_name_, const char * plural_raw_name_, char unique_char_, int not_found_error_code_) { - String init_name = raw_name_; - boost::to_upper(init_name); - boost::replace_all(init_name, "_", " "); - String init_alias; - if (auto underscore_pos = init_name.find_first_of(" "); underscore_pos != String::npos) - init_alias = init_name.substr(underscore_pos + 1); - String init_name_for_output_with_entity_name = init_name; + String init_names[2] = {raw_name_, plural_raw_name_}; + String init_aliases[2]; + for (size_t i = 0; i != std::size(init_names); ++i) + { + String & init_name = init_names[i]; + String & init_alias = init_aliases[i]; + boost::to_upper(init_name); + boost::replace_all(init_name, "_", " "); + if (auto underscore_pos = init_name.find_first_of(" "); underscore_pos != String::npos) + init_alias = init_name.substr(underscore_pos + 1); + } + String init_name_for_output_with_entity_name = init_names[0]; boost::to_lower(init_name_for_output_with_entity_name); - return TypeInfo{raw_name_, std::move(init_name), std::move(init_alias), std::move(init_name_for_output_with_entity_name), unique_char_, list_filename_, not_found_error_code_}; + return TypeInfo{raw_name_, plural_raw_name_, std::move(init_names[0]), std::move(init_aliases[0]), std::move(init_names[1]), std::move(init_aliases[1]), std::move(init_name_for_output_with_entity_name), unique_char_, not_found_error_code_}; }; switch (type_) { case Type::USER: { - static const auto info = make_info("USER", 'U', "users.list", ErrorCodes::UNKNOWN_USER); + static const auto info = make_info("USER", "USERS", 'U', ErrorCodes::UNKNOWN_USER); return info; } case Type::ROLE: { - static const auto info = make_info("ROLE", 'R', "roles.list", ErrorCodes::UNKNOWN_ROLE); + static const auto info = make_info("ROLE", "ROLES", 'R', ErrorCodes::UNKNOWN_ROLE); return info; } case Type::SETTINGS_PROFILE: { - static const auto info = make_info("SETTINGS_PROFILE", 'S', "settings_profiles.list", ErrorCodes::THERE_IS_NO_PROFILE); + static const auto info = make_info("SETTINGS_PROFILE", "SETTINGS_PROFILES", 'S', ErrorCodes::THERE_IS_NO_PROFILE); return info; } case Type::ROW_POLICY: { - static const auto info = make_info("ROW_POLICY", 'P', "row_policies.list", ErrorCodes::UNKNOWN_ROW_POLICY); + static const auto info = make_info("ROW_POLICY", "ROW_POLICIES", 'P', ErrorCodes::UNKNOWN_ROW_POLICY); return info; } case Type::QUOTA: { - static const auto info = make_info("QUOTA", 'Q', "quotas.list", ErrorCodes::UNKNOWN_QUOTA); + static const auto info = make_info("QUOTA", "QUOTAS", 'Q', ErrorCodes::UNKNOWN_QUOTA); return info; } case Type::MAX: break; diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index a7af61c7712..6813b5eb558 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -24,16 +24,141 @@ namespace using EntityType = IAccessStorage::EntityType; using EntityTypeInfo = IAccessStorage::EntityTypeInfo; - bool isNotFoundErrorCode(int error_code) + + String outputID(const UUID & id) { - if (error_code == ErrorCodes::ACCESS_ENTITY_NOT_FOUND) - return true; + return "ID(" + toString(id) + ")"; + } - for (auto type : ext::range(EntityType::MAX)) - if (error_code == EntityTypeInfo::get(type).not_found_error_code) - return true; + String outputTypeAndNameOrID(const IAccessStorage & storage, const UUID & id) + { + auto entity = storage.tryRead(id); + if (entity) + return entity->outputTypeAndName(); + return outputID(id); + } - return false; + + template > + ResultType doTry(const Func & func) + { + try + { + return func(); + } + catch (Exception &) + { + return {}; + } + } + + + template , + typename ResultType = std::conditional_t, void, std::vector>> + ResultType applyToMultipleEntities( + const std::vector & multiple_entities, + const ApplyFunc & apply_function, + const char * error_message_format [[maybe_unused]] = nullptr, + const GetNameFunc & get_name_function [[maybe_unused]] = nullptr) + { + std::optional exception; + std::vector success; + + auto helper = [&](const auto & apply_and_store_result_function) + { + for (size_t i = 0; i != multiple_entities.size(); ++i) + { + try + { + apply_and_store_result_function(multiple_entities[i]); + if constexpr (!ignore_errors) + success[i] = true; + } + catch (Exception & e) + { + if (!ignore_errors && !exception) + exception.emplace(e); + } + catch (Poco::Exception & e) + { + if (!ignore_errors && !exception) + exception.emplace(Exception::CreateFromPocoTag{}, e); + } + catch (std::exception & e) + { + if (!ignore_errors && !exception) + exception.emplace(Exception::CreateFromSTDTag{}, e); + } + } + }; + + if constexpr (std::is_same_v) + { + if (multiple_entities.empty()) + return; + + if (multiple_entities.size() == 1) + { + apply_function(multiple_entities.front()); + return; + } + + if constexpr (!ignore_errors) + success.resize(multiple_entities.size(), false); + + helper(apply_function); + + if (ignore_errors || !exception) + return; + } + else + { + ResultType result; + if (multiple_entities.empty()) + return result; + + if (multiple_entities.size() == 1) + { + result.emplace_back(apply_function(multiple_entities.front())); + return result; + } + + result.reserve(multiple_entities.size()); + if constexpr (!ignore_errors) + success.resize(multiple_entities.size(), false); + + helper([&](const T & entity) { result.emplace_back(apply_function(entity)); }); + + if (ignore_errors || !exception) + return result; + } + + if constexpr (!ignore_errors) + { + Strings succeeded_names_list; + Strings failed_names_list; + for (size_t i = 0; i != multiple_entities.size(); ++i) + { + const auto & entity = multiple_entities[i]; + String name = get_name_function(entity); + if (success[i]) + succeeded_names_list.emplace_back(name); + else + failed_names_list.emplace_back(name); + } + String succeeded_names = boost::algorithm::join(succeeded_names_list, ", "); + String failed_names = boost::algorithm::join(failed_names_list, ", "); + if (succeeded_names.empty()) + succeeded_names = "none"; + + String error_message = error_message_format; + boost::replace_all(error_message, "{succeeded_names}", succeeded_names); + boost::replace_all(error_message, "{failed_names}", failed_names); + exception->addMessage(error_message); + exception->rethrow(); + } + __builtin_unreachable(); } } @@ -91,14 +216,7 @@ bool IAccessStorage::exists(const UUID & id) const AccessEntityPtr IAccessStorage::tryReadBase(const UUID & id) const { - try - { - return readImpl(id); - } - catch (Exception &) - { - return nullptr; - } + return doTry([&] { return readImpl(id); }); } @@ -110,14 +228,7 @@ String IAccessStorage::readName(const UUID & id) const std::optional IAccessStorage::tryReadName(const UUID & id) const { - try - { - return readNameImpl(id); - } - catch (Exception &) - { - return {}; - } + return doTry([&] { return std::optional{readNameImpl(id)}; }); } @@ -129,56 +240,25 @@ UUID IAccessStorage::insert(const AccessEntityPtr & entity) std::vector IAccessStorage::insert(const std::vector & multiple_entities) { - std::vector ids; - ids.reserve(multiple_entities.size()); - String error_message; - for (const auto & entity : multiple_entities) - { - try - { - ids.push_back(insertImpl(entity, false)); - } - catch (Exception & e) - { - if (e.code() != ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS) - throw; - error_message += (error_message.empty() ? "" : ". ") + e.message(); - } - } - if (!error_message.empty()) - throw Exception(error_message, ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); - return ids; + return applyToMultipleEntities( + multiple_entities, + [this](const AccessEntityPtr & entity) { return insertImpl(entity, /* replace_if_exists = */ false); }, + "Couldn't insert {failed_names}. Successfully inserted: {succeeded_names}", + [](const AccessEntityPtr & entity) { return entity->outputTypeAndName(); }); } std::optional IAccessStorage::tryInsert(const AccessEntityPtr & entity) { - try - { - return insertImpl(entity, false); - } - catch (Exception &) - { - return {}; - } + return doTry([&] { return std::optional{insertImpl(entity, false)}; }); } std::vector IAccessStorage::tryInsert(const std::vector & multiple_entities) { - std::vector ids; - ids.reserve(multiple_entities.size()); - for (const auto & entity : multiple_entities) - { - try - { - ids.push_back(insertImpl(entity, false)); - } - catch (Exception &) - { - } - } - return ids; + return applyToMultipleEntities( + multiple_entities, + [this](const AccessEntityPtr & entity) { return insertImpl(entity, /* replace_if_exists = */ false); }); } @@ -190,11 +270,11 @@ UUID IAccessStorage::insertOrReplace(const AccessEntityPtr & entity) std::vector IAccessStorage::insertOrReplace(const std::vector & multiple_entities) { - std::vector ids; - ids.reserve(multiple_entities.size()); - for (const auto & entity : multiple_entities) - ids.push_back(insertImpl(entity, true)); - return ids; + return applyToMultipleEntities( + multiple_entities, + [this](const AccessEntityPtr & entity) { return insertImpl(entity, /* replace_if_exists = */ true); }, + "Couldn't insert {failed_names}. Successfully inserted: {succeeded_names}", + [](const AccessEntityPtr & entity) -> String { return entity->outputTypeAndName(); }); } @@ -206,60 +286,25 @@ void IAccessStorage::remove(const UUID & id) void IAccessStorage::remove(const std::vector & ids) { - String error_message; - std::optional error_code; - for (const auto & id : ids) - { - try - { - removeImpl(id); - } - catch (Exception & e) - { - if (!isNotFoundErrorCode(e.code())) - throw; - error_message += (error_message.empty() ? "" : ". ") + e.message(); - if (error_code && (*error_code != e.code())) - error_code = ErrorCodes::ACCESS_ENTITY_NOT_FOUND; - else - error_code = e.code(); - } - } - if (!error_message.empty()) - throw Exception(error_message, *error_code); + applyToMultipleEntities( + ids, + [this](const UUID & id) { removeImpl(id); }, + "Couldn't remove {failed_names}. Successfully removed: {succeeded_names}", + [this](const UUID & id) { return outputTypeAndNameOrID(*this, id); }); } bool IAccessStorage::tryRemove(const UUID & id) { - try - { - removeImpl(id); - return true; - } - catch (Exception &) - { - return false; - } + return doTry([&] { removeImpl(id); return true; }); } std::vector IAccessStorage::tryRemove(const std::vector & ids) { - std::vector removed; - removed.reserve(ids.size()); - for (const auto & id : ids) - { - try - { - removeImpl(id); - removed.push_back(id); - } - catch (Exception &) - { - } - } - return removed; + return applyToMultipleEntities( + ids, + [this](const UUID & id) { removeImpl(id); return id; }); } @@ -271,60 +316,25 @@ void IAccessStorage::update(const UUID & id, const UpdateFunc & update_func) void IAccessStorage::update(const std::vector & ids, const UpdateFunc & update_func) { - String error_message; - std::optional error_code; - for (const auto & id : ids) - { - try - { - updateImpl(id, update_func); - } - catch (Exception & e) - { - if (!isNotFoundErrorCode(e.code())) - throw; - error_message += (error_message.empty() ? "" : ". ") + e.message(); - if (error_code && (*error_code != e.code())) - error_code = ErrorCodes::ACCESS_ENTITY_NOT_FOUND; - else - error_code = e.code(); - } - } - if (!error_message.empty()) - throw Exception(error_message, *error_code); + applyToMultipleEntities( + ids, + [this, &update_func](const UUID & id) { updateImpl(id, update_func); }, + "Couldn't update {failed_names}. Successfully updated: {succeeded_names}", + [this](const UUID & id) { return outputTypeAndNameOrID(*this, id); }); } bool IAccessStorage::tryUpdate(const UUID & id, const UpdateFunc & update_func) { - try - { - updateImpl(id, update_func); - return true; - } - catch (Exception &) - { - return false; - } + return doTry([&] { updateImpl(id, update_func); return true; }); } std::vector IAccessStorage::tryUpdate(const std::vector & ids, const UpdateFunc & update_func) { - std::vector updated; - updated.reserve(ids.size()); - for (const auto & id : ids) - { - try - { - updateImpl(id, update_func); - updated.push_back(id); - } - catch (Exception &) - { - } - } - return updated; + return applyToMultipleEntities( + ids, + [this, &update_func](const UUID & id) { updateImpl(id, update_func); return id; }); } @@ -388,7 +398,7 @@ Poco::Logger * IAccessStorage::getLogger() const void IAccessStorage::throwNotFound(const UUID & id) const { - throw Exception("ID {" + toString(id) + "} not found in [" + getStorageName() + "]", ErrorCodes::ACCESS_ENTITY_NOT_FOUND); + throw Exception(outputID(id) + " not found in [" + getStorageName() + "]", ErrorCodes::ACCESS_ENTITY_NOT_FOUND); } @@ -402,7 +412,7 @@ void IAccessStorage::throwNotFound(EntityType type, const String & name) const void IAccessStorage::throwBadCast(const UUID & id, EntityType type, const String & name, EntityType required_type) { throw Exception( - "ID {" + toString(id) + "}: " + outputEntityTypeAndName(type, name) + " expected to be of type " + toString(required_type), + outputID(id) + ": " + outputEntityTypeAndName(type, name) + " expected to be of type " + toString(required_type), ErrorCodes::LOGICAL_ERROR); } @@ -410,7 +420,7 @@ void IAccessStorage::throwBadCast(const UUID & id, EntityType type, const String void IAccessStorage::throwIDCollisionCannotInsert(const UUID & id, EntityType type, const String & name, EntityType existing_type, const String & existing_name) const { throw Exception( - outputEntityTypeAndName(type, name) + ": cannot insert because the ID {" + toString(id) + "} is already used by " + outputEntityTypeAndName(type, name) + ": cannot insert because the " + outputID(id) + " is already used by " + outputEntityTypeAndName(existing_type, existing_name) + " in [" + getStorageName() + "]", ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); } diff --git a/src/Access/Quota.h b/src/Access/Quota.h index 25b56756dc1..5bbea36cfda 100644 --- a/src/Access/Quota.h +++ b/src/Access/Quota.h @@ -1,7 +1,9 @@ #pragma once #include -#include +#include +#include +#include #include #include @@ -84,14 +86,15 @@ struct Quota : public IAccessEntity struct KeyTypeInfo { const char * const raw_name; - const String name; /// Lowercased with spaces, e.g. "client key". + const String name; /// Lowercased with underscores, e.g. "client_key". + const std::vector base_types; /// For combined types keeps base types, e.g. for CLIENT_KEY_OR_USER_NAME it keeps [KeyType::CLIENT_KEY, KeyType::USER_NAME]. static const KeyTypeInfo & get(KeyType type); }; KeyType key_type = KeyType::NONE; /// Which roles or users should use this quota. - ExtendedRoleSet to_roles; + RolesOrUsersSet to_roles; bool equal(const IAccessEntity & other) const override; std::shared_ptr clone() const override { return cloneImpl(); } @@ -195,8 +198,21 @@ inline const Quota::KeyTypeInfo & Quota::KeyTypeInfo::get(KeyType type) { String init_name = raw_name_; boost::to_lower(init_name); - boost::replace_all(init_name, "_", " "); - return KeyTypeInfo{raw_name_, std::move(init_name)}; + std::vector init_base_types; + String replaced = boost::algorithm::replace_all_copy(init_name, "_or_", "|"); + Strings tokens; + boost::algorithm::split(tokens, replaced, boost::is_any_of("|")); + if (tokens.size() > 1) + { + for (const auto & token : tokens) + for (auto kt : ext::range(KeyType::MAX)) + if (KeyTypeInfo::get(kt).name == token) + { + init_base_types.push_back(kt); + break; + } + } + return KeyTypeInfo{raw_name_, std::move(init_name), std::move(init_base_types)}; }; switch (type) diff --git a/src/Access/QuotaCache.h b/src/Access/QuotaCache.h index 6e794f0bbd2..0bb5c11a82b 100644 --- a/src/Access/QuotaCache.h +++ b/src/Access/QuotaCache.h @@ -39,7 +39,7 @@ private: QuotaPtr quota; UUID quota_id; - const ExtendedRoleSet * roles = nullptr; + const RolesOrUsersSet * roles = nullptr; std::unordered_map> key_to_intervals; }; diff --git a/src/Access/ExtendedRoleSet.cpp b/src/Access/RolesOrUsersSet.cpp similarity index 76% rename from src/Access/ExtendedRoleSet.cpp rename to src/Access/RolesOrUsersSet.cpp index a8e674b3722..cb0beb42700 100644 --- a/src/Access/ExtendedRoleSet.cpp +++ b/src/Access/RolesOrUsersSet.cpp @@ -1,9 +1,8 @@ - -#include +#include #include #include #include -#include +#include #include #include #include @@ -20,51 +19,51 @@ namespace ErrorCodes } -ExtendedRoleSet::ExtendedRoleSet() = default; -ExtendedRoleSet::ExtendedRoleSet(const ExtendedRoleSet & src) = default; -ExtendedRoleSet & ExtendedRoleSet::operator =(const ExtendedRoleSet & src) = default; -ExtendedRoleSet::ExtendedRoleSet(ExtendedRoleSet && src) = default; -ExtendedRoleSet & ExtendedRoleSet::operator =(ExtendedRoleSet && src) = default; +RolesOrUsersSet::RolesOrUsersSet() = default; +RolesOrUsersSet::RolesOrUsersSet(const RolesOrUsersSet & src) = default; +RolesOrUsersSet & RolesOrUsersSet::operator =(const RolesOrUsersSet & src) = default; +RolesOrUsersSet::RolesOrUsersSet(RolesOrUsersSet && src) = default; +RolesOrUsersSet & RolesOrUsersSet::operator =(RolesOrUsersSet && src) = default; -ExtendedRoleSet::ExtendedRoleSet(AllTag) +RolesOrUsersSet::RolesOrUsersSet(AllTag) { all = true; } -ExtendedRoleSet::ExtendedRoleSet(const UUID & id) +RolesOrUsersSet::RolesOrUsersSet(const UUID & id) { add(id); } -ExtendedRoleSet::ExtendedRoleSet(const std::vector & ids_) +RolesOrUsersSet::RolesOrUsersSet(const std::vector & ids_) { add(ids_); } -ExtendedRoleSet::ExtendedRoleSet(const ASTExtendedRoleSet & ast) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast) { init(ast, nullptr); } -ExtendedRoleSet::ExtendedRoleSet(const ASTExtendedRoleSet & ast, const std::optional & current_user_id) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const std::optional & current_user_id) { init(ast, nullptr, current_user_id); } -ExtendedRoleSet::ExtendedRoleSet(const ASTExtendedRoleSet & ast, const AccessControlManager & manager) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager) { init(ast, &manager); } -ExtendedRoleSet::ExtendedRoleSet(const ASTExtendedRoleSet & ast, const AccessControlManager & manager, const std::optional & current_user_id) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager, const std::optional & current_user_id) { init(ast, &manager, current_user_id); } -void ExtendedRoleSet::init(const ASTExtendedRoleSet & ast, const AccessControlManager * manager, const std::optional & current_user_id) +void RolesOrUsersSet::init(const ASTRolesOrUsersSet & ast, const AccessControlManager * manager, const std::optional & current_user_id) { all = ast.all; @@ -73,20 +72,20 @@ void ExtendedRoleSet::init(const ASTExtendedRoleSet & ast, const AccessControlMa if (ast.id_mode) return parse(name); assert(manager); - if (ast.can_contain_users && ast.can_contain_roles) + if (ast.allow_user_names && ast.allow_role_names) { auto id = manager->find(name); if (id) return *id; return manager->getID(name); } - else if (ast.can_contain_users) + else if (ast.allow_user_names) { return manager->getID(name); } else { - assert(ast.can_contain_roles); + assert(ast.allow_role_names); return manager->getID(name); } }; @@ -122,9 +121,9 @@ void ExtendedRoleSet::init(const ASTExtendedRoleSet & ast, const AccessControlMa } -std::shared_ptr ExtendedRoleSet::toAST() const +std::shared_ptr RolesOrUsersSet::toAST() const { - auto ast = std::make_shared(); + auto ast = std::make_shared(); ast->id_mode = true; ast->all = all; @@ -148,9 +147,9 @@ std::shared_ptr ExtendedRoleSet::toAST() const } -std::shared_ptr ExtendedRoleSet::toASTWithNames(const AccessControlManager & manager) const +std::shared_ptr RolesOrUsersSet::toASTWithNames(const AccessControlManager & manager) const { - auto ast = std::make_shared(); + auto ast = std::make_shared(); ast->all = all; if (!ids.empty()) @@ -181,21 +180,21 @@ std::shared_ptr ExtendedRoleSet::toASTWithNames(const Access } -String ExtendedRoleSet::toString() const +String RolesOrUsersSet::toString() const { auto ast = toAST(); return serializeAST(*ast); } -String ExtendedRoleSet::toStringWithNames(const AccessControlManager & manager) const +String RolesOrUsersSet::toStringWithNames(const AccessControlManager & manager) const { auto ast = toASTWithNames(manager); return serializeAST(*ast); } -Strings ExtendedRoleSet::toStringsWithNames(const AccessControlManager & manager) const +Strings RolesOrUsersSet::toStringsWithNames(const AccessControlManager & manager) const { if (!all && ids.empty()) return {}; @@ -233,13 +232,13 @@ Strings ExtendedRoleSet::toStringsWithNames(const AccessControlManager & manager } -bool ExtendedRoleSet::empty() const +bool RolesOrUsersSet::empty() const { return ids.empty() && !all; } -void ExtendedRoleSet::clear() +void RolesOrUsersSet::clear() { ids.clear(); all = false; @@ -247,26 +246,26 @@ void ExtendedRoleSet::clear() } -void ExtendedRoleSet::add(const UUID & id) +void RolesOrUsersSet::add(const UUID & id) { ids.insert(id); } -void ExtendedRoleSet::add(const std::vector & ids_) +void RolesOrUsersSet::add(const std::vector & ids_) { for (const auto & id : ids_) add(id); } -bool ExtendedRoleSet::match(const UUID & id) const +bool RolesOrUsersSet::match(const UUID & id) const { return (all || ids.count(id)) && !except_ids.count(id); } -bool ExtendedRoleSet::match(const UUID & user_id, const boost::container::flat_set & enabled_roles) const +bool RolesOrUsersSet::match(const UUID & user_id, const boost::container::flat_set & enabled_roles) const { if (!all && !ids.count(user_id)) { @@ -285,7 +284,7 @@ bool ExtendedRoleSet::match(const UUID & user_id, const boost::container::flat_s } -std::vector ExtendedRoleSet::getMatchingIDs() const +std::vector RolesOrUsersSet::getMatchingIDs() const { if (all) throw Exception("getAllMatchingIDs() can't get ALL ids without manager", ErrorCodes::LOGICAL_ERROR); @@ -295,7 +294,7 @@ std::vector ExtendedRoleSet::getMatchingIDs() const } -std::vector ExtendedRoleSet::getMatchingIDs(const AccessControlManager & manager) const +std::vector RolesOrUsersSet::getMatchingIDs(const AccessControlManager & manager) const { if (!all) return getMatchingIDs(); @@ -316,7 +315,7 @@ std::vector ExtendedRoleSet::getMatchingIDs(const AccessControlManager & m } -bool operator ==(const ExtendedRoleSet & lhs, const ExtendedRoleSet & rhs) +bool operator ==(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs) { return (lhs.all == rhs.all) && (lhs.ids == rhs.ids) && (lhs.except_ids == rhs.except_ids); } diff --git a/src/Access/ExtendedRoleSet.h b/src/Access/RolesOrUsersSet.h similarity index 57% rename from src/Access/ExtendedRoleSet.h rename to src/Access/RolesOrUsersSet.h index eeb4af84f78..bae7f52a574 100644 --- a/src/Access/ExtendedRoleSet.h +++ b/src/Access/RolesOrUsersSet.h @@ -8,35 +8,35 @@ namespace DB { -class ASTExtendedRoleSet; +class ASTRolesOrUsersSet; class AccessControlManager; /// Represents a set of users/roles like /// {user_name | role_name | CURRENT_USER} [,...] | NONE | ALL | ALL EXCEPT {user_name | role_name | CURRENT_USER} [,...] -/// Similar to ASTExtendedRoleSet, but with IDs instead of names. -struct ExtendedRoleSet +/// Similar to ASTRolesOrUsersSet, but with IDs instead of names. +struct RolesOrUsersSet { - ExtendedRoleSet(); - ExtendedRoleSet(const ExtendedRoleSet & src); - ExtendedRoleSet & operator =(const ExtendedRoleSet & src); - ExtendedRoleSet(ExtendedRoleSet && src); - ExtendedRoleSet & operator =(ExtendedRoleSet && src); + RolesOrUsersSet(); + RolesOrUsersSet(const RolesOrUsersSet & src); + RolesOrUsersSet & operator =(const RolesOrUsersSet & src); + RolesOrUsersSet(RolesOrUsersSet && src); + RolesOrUsersSet & operator =(RolesOrUsersSet && src); struct AllTag {}; - ExtendedRoleSet(AllTag); + RolesOrUsersSet(AllTag); - ExtendedRoleSet(const UUID & id); - ExtendedRoleSet(const std::vector & ids_); + RolesOrUsersSet(const UUID & id); + RolesOrUsersSet(const std::vector & ids_); /// The constructor from AST requires the AccessControlManager if `ast.id_mode == false`. - ExtendedRoleSet(const ASTExtendedRoleSet & ast); - ExtendedRoleSet(const ASTExtendedRoleSet & ast, const std::optional & current_user_id); - ExtendedRoleSet(const ASTExtendedRoleSet & ast, const AccessControlManager & manager); - ExtendedRoleSet(const ASTExtendedRoleSet & ast, const AccessControlManager & manager, const std::optional & current_user_id); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const std::optional & current_user_id); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager, const std::optional & current_user_id); - std::shared_ptr toAST() const; - std::shared_ptr toASTWithNames(const AccessControlManager & manager) const; + std::shared_ptr toAST() const; + std::shared_ptr toASTWithNames(const AccessControlManager & manager) const; String toString() const; String toStringWithNames(const AccessControlManager & manager) const; @@ -47,7 +47,7 @@ struct ExtendedRoleSet void add(const UUID & id); void add(const std::vector & ids_); - /// Checks if a specified ID matches this ExtendedRoleSet. + /// Checks if a specified ID matches this RolesOrUsersSet. bool match(const UUID & id) const; bool match(const UUID & user_id, const boost::container::flat_set & enabled_roles) const; @@ -57,15 +57,15 @@ struct ExtendedRoleSet /// Returns a list of matching users and roles. std::vector getMatchingIDs(const AccessControlManager & manager) const; - friend bool operator ==(const ExtendedRoleSet & lhs, const ExtendedRoleSet & rhs); - friend bool operator !=(const ExtendedRoleSet & lhs, const ExtendedRoleSet & rhs) { return !(lhs == rhs); } + friend bool operator ==(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs); + friend bool operator !=(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs) { return !(lhs == rhs); } boost::container::flat_set ids; bool all = false; boost::container::flat_set except_ids; private: - void init(const ASTExtendedRoleSet & ast, const AccessControlManager * manager = nullptr, const std::optional & current_user_id = {}); + void init(const ASTRolesOrUsersSet & ast, const AccessControlManager * manager = nullptr, const std::optional & current_user_id = {}); }; } diff --git a/src/Access/RowPolicy.cpp b/src/Access/RowPolicy.cpp index 4249f351eae..7441f915a46 100644 --- a/src/Access/RowPolicy.cpp +++ b/src/Access/RowPolicy.cpp @@ -11,22 +11,6 @@ namespace ErrorCodes } -String RowPolicy::NameParts::getName() const -{ - String name; - name.reserve(database.length() + table_name.length() + short_name.length() + 6); - name += backQuoteIfNeed(short_name); - name += " ON "; - if (!name.empty()) - { - name += backQuoteIfNeed(database); - name += '.'; - } - name += backQuoteIfNeed(table_name); - return name; -} - - void RowPolicy::setDatabase(const String & database) { name_parts.database = database; diff --git a/src/Access/RowPolicy.h b/src/Access/RowPolicy.h index 7febf5991fb..c9b4d69152d 100644 --- a/src/Access/RowPolicy.h +++ b/src/Access/RowPolicy.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include @@ -23,7 +23,9 @@ struct RowPolicy : public IAccessEntity String database; String table_name; + bool empty() const { return short_name.empty(); } String getName() const; + String toString() const { return getName(); } auto toTuple() const { return std::tie(short_name, database, table_name); } friend bool operator ==(const NameParts & left, const NameParts & right) { return left.toTuple() == right.toTuple(); } friend bool operator !=(const NameParts & left, const NameParts & right) { return left.toTuple() != right.toTuple(); } @@ -89,7 +91,7 @@ struct RowPolicy : public IAccessEntity Type getType() const override { return TYPE; } /// Which roles or users should use this row policy. - ExtendedRoleSet to_roles; + RolesOrUsersSet to_roles; private: void setName(const String & name_) override; @@ -153,4 +155,20 @@ inline String toString(RowPolicy::ConditionType type) return RowPolicy::ConditionTypeInfo::get(type).raw_name; } + +inline String RowPolicy::NameParts::getName() const +{ + String name; + name.reserve(database.length() + table_name.length() + short_name.length() + 6); + name += backQuoteIfNeed(short_name); + name += " ON "; + if (!database.empty()) + { + name += backQuoteIfNeed(database); + name += '.'; + } + name += backQuoteIfNeed(table_name); + return name; +} + } diff --git a/src/Access/RowPolicyCache.h b/src/Access/RowPolicyCache.h index 139949ae815..f7270c6fce9 100644 --- a/src/Access/RowPolicyCache.h +++ b/src/Access/RowPolicyCache.h @@ -27,7 +27,7 @@ private: void setPolicy(const RowPolicyPtr & policy_); RowPolicyPtr policy; - const ExtendedRoleSet * roles = nullptr; + const RolesOrUsersSet * roles = nullptr; std::shared_ptr> database_and_table_name; ASTPtr parsed_conditions[RowPolicy::MAX_CONDITION_TYPE]; }; diff --git a/src/Access/SettingsProfile.h b/src/Access/SettingsProfile.h index 9589b5b3eb5..210aa47c358 100644 --- a/src/Access/SettingsProfile.h +++ b/src/Access/SettingsProfile.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include @@ -14,7 +14,7 @@ struct SettingsProfile : public IAccessEntity SettingsProfileElements elements; /// Which roles or users should use this settings profile. - ExtendedRoleSet to_roles; + RolesOrUsersSet to_roles; bool equal(const IAccessEntity & other) const override; std::shared_ptr clone() const override { return cloneImpl(); } diff --git a/src/Access/User.h b/src/Access/User.h index da2fb14e131..4852fce375d 100644 --- a/src/Access/User.h +++ b/src/Access/User.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include @@ -19,7 +19,7 @@ struct User : public IAccessEntity AllowedClientHosts allowed_client_hosts = AllowedClientHosts::AnyHostTag{}; GrantedAccess access; GrantedRoles granted_roles; - ExtendedRoleSet default_roles = ExtendedRoleSet::AllTag{}; + RolesOrUsersSet default_roles = RolesOrUsersSet::AllTag{}; SettingsProfileElements settings; bool equal(const IAccessEntity & other) const override; diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index f5f48a2390e..4d7d1b4cdfe 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -353,16 +353,17 @@ namespace for (const String & name : names) { SettingsProfileElement profile_element; - profile_element.setting_index = Settings::findIndexStrict(name); + size_t setting_index = Settings::findIndexStrict(name); + profile_element.setting_index = setting_index; Poco::Util::AbstractConfiguration::Keys constraint_types; String path_to_name = path_to_constraints + "." + name; config.keys(path_to_name, constraint_types); for (const String & constraint_type : constraint_types) { if (constraint_type == "min") - profile_element.min_value = config.getString(path_to_name + "." + constraint_type); + profile_element.min_value = Settings::valueToCorrespondingType(setting_index, config.getString(path_to_name + "." + constraint_type)); else if (constraint_type == "max") - profile_element.max_value = config.getString(path_to_name + "." + constraint_type); + profile_element.max_value = Settings::valueToCorrespondingType(setting_index, config.getString(path_to_name + "." + constraint_type)); else if (constraint_type == "readonly") profile_element.readonly = true; else @@ -402,8 +403,9 @@ namespace } SettingsProfileElement profile_element; - profile_element.setting_index = Settings::findIndexStrict(key); - profile_element.value = config.getString(profile_config + "." + key); + size_t setting_index = Settings::findIndexStrict(key); + profile_element.setting_index = setting_index; + profile_element.value = Settings::valueToCorrespondingType(setting_index, config.getString(profile_config + "." + key)); profile->elements.emplace_back(std::move(profile_element)); } diff --git a/src/Access/ya.make b/src/Access/ya.make index 970c0714a93..bdd62ae2b7b 100644 --- a/src/Access/ya.make +++ b/src/Access/ya.make @@ -17,7 +17,6 @@ SRCS( EnabledRolesInfo.cpp EnabledRowPolicies.cpp EnabledSettings.cpp - ExtendedRoleSet.cpp GrantedAccess.cpp GrantedRoles.cpp IAccessEntity.cpp @@ -29,6 +28,7 @@ SRCS( QuotaUsage.cpp Role.cpp RoleCache.cpp + RolesOrUsersSet.cpp RowPolicy.cpp RowPolicyCache.cpp SettingsConstraints.cpp diff --git a/src/AggregateFunctions/AggregateFunctionArray.cpp b/src/AggregateFunctions/AggregateFunctionArray.cpp index ced95185263..7fe4f1f448b 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionArray.cpp @@ -36,7 +36,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array &) const override { return std::make_shared(nested_function, arguments); } diff --git a/src/AggregateFunctions/AggregateFunctionCount.cpp b/src/AggregateFunctions/AggregateFunctionCount.cpp index 6c22fec87a2..b00adaa0f1a 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -7,6 +7,12 @@ namespace DB { +AggregateFunctionPtr AggregateFunctionCount::getOwnNullAdapter( + const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const +{ + return std::make_shared(types[0], params); +} + namespace { @@ -22,7 +28,7 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons void registerAggregateFunctionCount(AggregateFunctionFactory & factory) { - factory.registerFunction("count", createAggregateFunctionCount, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("count", {createAggregateFunctionCount, {true}}, AggregateFunctionFactory::CaseInsensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index 092ffc6b6cf..feb5725d9f1 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -67,11 +67,15 @@ public: { data(place).count = new_count; } + + AggregateFunctionPtr getOwnNullAdapter( + const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const override; }; /// Simply count number of not-NULL values. -class AggregateFunctionCountNotNullUnary final : public IAggregateFunctionDataHelper +class AggregateFunctionCountNotNullUnary final + : public IAggregateFunctionDataHelper { public: AggregateFunctionCountNotNullUnary(const DataTypePtr & argument, const Array & params) diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index aeb4fb6db96..83221df784a 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -29,18 +29,18 @@ namespace ErrorCodes } -void AggregateFunctionFactory::registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness) +void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness) { - if (creator == nullptr) + if (creator_with_properties.creator == nullptr) throw Exception("AggregateFunctionFactory: the aggregate function " + name + " has been provided " " a null constructor", ErrorCodes::LOGICAL_ERROR); - if (!aggregate_functions.emplace(name, creator).second) + if (!aggregate_functions.emplace(name, creator_with_properties).second) throw Exception("AggregateFunctionFactory: the aggregate function name '" + name + "' is not unique", ErrorCodes::LOGICAL_ERROR); if (case_sensitiveness == CaseInsensitive - && !case_insensitive_aggregate_functions.emplace(Poco::toLower(name), creator).second) + && !case_insensitive_aggregate_functions.emplace(Poco::toLower(name), creator_with_properties).second) throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique", ErrorCodes::LOGICAL_ERROR); } @@ -59,34 +59,33 @@ AggregateFunctionPtr AggregateFunctionFactory::get( const String & name, const DataTypes & argument_types, const Array & parameters, + AggregateFunctionProperties & out_properties, int recursion_level) const { auto type_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types); - /// If one of types is Nullable, we apply aggregate function combinator "Null". + /// If one of the types is Nullable, we apply aggregate function combinator "Null". if (std::any_of(type_without_low_cardinality.begin(), type_without_low_cardinality.end(), [](const auto & type) { return type->isNullable(); })) { AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null"); if (!combinator) - throw Exception("Logical error: cannot find aggregate function combinator to apply a function to Nullable arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Logical error: cannot find aggregate function combinator to apply a function to Nullable arguments.", + ErrorCodes::LOGICAL_ERROR); DataTypes nested_types = combinator->transformArguments(type_without_low_cardinality); Array nested_parameters = combinator->transformParameters(parameters); - AggregateFunctionPtr nested_function; + bool has_null_arguments = std::any_of(type_without_low_cardinality.begin(), type_without_low_cardinality.end(), + [](const auto & type) { return type->onlyNull(); }); - /// A little hack - if we have NULL arguments, don't even create nested function. - /// Combinator will check if nested_function was created. - if (name == "count" || std::none_of(type_without_low_cardinality.begin(), type_without_low_cardinality.end(), - [](const auto & type) { return type->onlyNull(); })) - nested_function = getImpl(name, nested_types, nested_parameters, recursion_level); - - return combinator->transformAggregateFunction(nested_function, type_without_low_cardinality, parameters); + AggregateFunctionPtr nested_function = getImpl( + name, nested_types, nested_parameters, out_properties, has_null_arguments, recursion_level); + return combinator->transformAggregateFunction(nested_function, out_properties, type_without_low_cardinality, parameters); } - auto res = getImpl(name, type_without_low_cardinality, parameters, recursion_level); + auto res = getImpl(name, type_without_low_cardinality, parameters, out_properties, false, recursion_level); if (!res) throw Exception("Logical error: AggregateFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR); return res; @@ -97,19 +96,35 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( const String & name_param, const DataTypes & argument_types, const Array & parameters, + AggregateFunctionProperties & out_properties, + bool has_null_arguments, int recursion_level) const { String name = getAliasToOrName(name_param); + Value found; + /// Find by exact match. if (auto it = aggregate_functions.find(name); it != aggregate_functions.end()) - return it->second(name, argument_types, parameters); - + { + found = it->second; + } /// Find by case-insensitive name. /// Combinators cannot apply for case insensitive (SQL-style) aggregate function names. Only for native names. - if (recursion_level == 0) + else if (recursion_level == 0) { - if (auto it = case_insensitive_aggregate_functions.find(Poco::toLower(name)); it != case_insensitive_aggregate_functions.end()) - return it->second(name, argument_types, parameters); + if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end()) + found = jt->second; + } + + if (found.creator) + { + out_properties = found.properties; + + /// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method. + if (!out_properties.returns_default_when_only_null && has_null_arguments) + return nullptr; + + return found.creator(name, argument_types, parameters); } /// Combinators of aggregate functions. @@ -125,23 +140,24 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( DataTypes nested_types = combinator->transformArguments(argument_types); Array nested_parameters = combinator->transformParameters(parameters); - AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, recursion_level + 1); - - return combinator->transformAggregateFunction(nested_function, argument_types, parameters); + AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties, recursion_level + 1); + return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters); } auto hints = this->getHints(name); if (!hints.empty()) - throw Exception("Unknown aggregate function " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); + throw Exception(fmt::format("Unknown aggregate function {}. Maybe you meant: {}", name, toString(hints)), + ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); else - throw Exception("Unknown aggregate function " + name, ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); + throw Exception(fmt::format("Unknown aggregate function {}", name), ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); } -AggregateFunctionPtr AggregateFunctionFactory::tryGet(const String & name, const DataTypes & argument_types, const Array & parameters) const +AggregateFunctionPtr AggregateFunctionFactory::tryGet( + const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const { return isAggregateFunctionName(name) - ? get(name, argument_types, parameters) + ? get(name, argument_types, parameters, out_properties) : nullptr; } diff --git a/src/AggregateFunctions/AggregateFunctionFactory.h b/src/AggregateFunctions/AggregateFunctionFactory.h index 6e755cc9e8c..90e44145f4b 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.h +++ b/src/AggregateFunctions/AggregateFunctionFactory.h @@ -26,34 +26,51 @@ using DataTypes = std::vector; */ using AggregateFunctionCreator = std::function; +struct AggregateFunctionWithProperties +{ + AggregateFunctionCreator creator; + AggregateFunctionProperties properties; + + AggregateFunctionWithProperties() = default; + AggregateFunctionWithProperties(const AggregateFunctionWithProperties &) = default; + AggregateFunctionWithProperties & operator = (const AggregateFunctionWithProperties &) = default; + + template > * = nullptr> + AggregateFunctionWithProperties(Creator creator_, AggregateFunctionProperties properties_ = {}) + : creator(std::forward(creator_)), properties(std::move(properties_)) + { + } +}; + /** Creates an aggregate function by name. */ -class AggregateFunctionFactory final : private boost::noncopyable, public IFactoryWithAliases +class AggregateFunctionFactory final : private boost::noncopyable, public IFactoryWithAliases { public: - static AggregateFunctionFactory & instance(); /// Register a function by its name. /// No locking, you must register all functions before usage of get. void registerFunction( const String & name, - Creator creator, + Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive); /// Throws an exception if not found. AggregateFunctionPtr get( const String & name, const DataTypes & argument_types, - const Array & parameters = {}, + const Array & parameters, + AggregateFunctionProperties & out_properties, int recursion_level = 0) const; /// Returns nullptr if not found. AggregateFunctionPtr tryGet( const String & name, const DataTypes & argument_types, - const Array & parameters = {}) const; + const Array & parameters, + AggregateFunctionProperties & out_properties) const; bool isAggregateFunctionName(const String & name, int recursion_level = 0) const; @@ -62,19 +79,21 @@ private: const String & name, const DataTypes & argument_types, const Array & parameters, + AggregateFunctionProperties & out_properties, + bool has_null_arguments, int recursion_level) const; private: - using AggregateFunctions = std::unordered_map; + using AggregateFunctions = std::unordered_map; AggregateFunctions aggregate_functions; /// Case insensitive aggregate functions will be additionally added here with lowercased name. AggregateFunctions case_insensitive_aggregate_functions; - const AggregateFunctions & getCreatorMap() const override { return aggregate_functions; } + const AggregateFunctions & getMap() const override { return aggregate_functions; } - const AggregateFunctions & getCaseInsensitiveCreatorMap() const override { return case_insensitive_aggregate_functions; } + const AggregateFunctions & getCaseInsensitiveMap() const override { return case_insensitive_aggregate_functions; } String getFactoryName() const override { return "AggregateFunctionFactory"; } diff --git a/src/AggregateFunctions/AggregateFunctionForEach.cpp b/src/AggregateFunctions/AggregateFunctionForEach.cpp index 775dab2dcd9..693bc6839fa 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.cpp +++ b/src/AggregateFunctions/AggregateFunctionForEach.cpp @@ -33,7 +33,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array &) const override { return std::make_shared(nested_function, arguments); } diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index cb5f9f15b1c..19a175de911 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -31,7 +31,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array &) const override { return std::make_shared(nested_function, arguments); } diff --git a/src/AggregateFunctions/AggregateFunctionMerge.cpp b/src/AggregateFunctions/AggregateFunctionMerge.cpp index 05d941844d9..2ce3f0e11f6 100644 --- a/src/AggregateFunctions/AggregateFunctionMerge.cpp +++ b/src/AggregateFunctions/AggregateFunctionMerge.cpp @@ -34,7 +34,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array &) const override { const DataTypePtr & argument = arguments[0]; diff --git a/src/AggregateFunctions/AggregateFunctionNothing.h b/src/AggregateFunctions/AggregateFunctionNothing.h index 511dbbecd38..b3206f6db6e 100644 --- a/src/AggregateFunctions/AggregateFunctionNothing.h +++ b/src/AggregateFunctions/AggregateFunctionNothing.h @@ -25,7 +25,7 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared(std::make_shared()); + return argument_types.front(); } void create(AggregateDataPtr) const override diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 60712636562..b8fbad53350 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -31,7 +31,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties & properties, + const DataTypes & arguments, + const Array & params) const override { bool has_nullable_types = false; bool has_null_types = false; @@ -49,35 +52,55 @@ public: } if (!has_nullable_types) - throw Exception("Aggregate function combinator 'Null' requires at least one argument to be Nullable", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (nested_function) - if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params)) - return adapter; - - /// Special case for 'count' function. It could be called with Nullable arguments - /// - that means - count number of calls, when all arguments are not NULL. - if (nested_function && nested_function->getName() == "count") - return std::make_shared(arguments[0], params); + throw Exception("Aggregate function combinator 'Null' requires at least one argument to be Nullable", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (has_null_types) - return std::make_shared(arguments, params); + { + /// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64. + if (properties.returns_default_when_only_null) + return std::make_shared(DataTypes{ + std::make_shared()}, params); + else + return std::make_shared(DataTypes{ + std::make_shared(std::make_shared())}, params); + } - bool return_type_is_nullable = nested_function->getReturnType()->canBeInsideNullable(); + assert(nested_function); + + if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params)) + return adapter; + + bool return_type_is_nullable = !properties.returns_default_when_only_null && nested_function->getReturnType()->canBeInsideNullable(); + bool serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null; if (arguments.size() == 1) { if (return_type_is_nullable) - return std::make_shared>(nested_function, arguments, params); + { + return std::make_shared>(nested_function, arguments, params); + } else - return std::make_shared>(nested_function, arguments, params); + { + if (serialize_flag) + return std::make_shared>(nested_function, arguments, params); + else + return std::make_shared>(nested_function, arguments, params); + } } else { if (return_type_is_nullable) - return std::make_shared>(nested_function, arguments, params); + { + return std::make_shared>(nested_function, arguments, params); + } else - return std::make_shared>(nested_function, arguments, params); + { + if (serialize_flag) + return std::make_shared>(nested_function, arguments, params); + else + return std::make_shared>(nested_function, arguments, params); + } } } }; diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index 55d610207f1..d6f0079232c 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -28,7 +28,10 @@ namespace ErrorCodes /// If all rows had NULL, the behaviour is determined by "result_is_nullable" template parameter. /// true - return NULL; false - return value from empty aggregation state of nested function. -template +/// When serialize_flag is set to true, the flag about presense of values is serialized +/// regardless to the "result_is_nullable" even if it's unneeded - for protocol compatibility. + +template class AggregateFunctionNullBase : public IAggregateFunctionHelper { protected: @@ -129,7 +132,7 @@ public: void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { bool flag = getFlag(place); - if constexpr (result_is_nullable) + if constexpr (serialize_flag) writeBinary(flag, buf); if (flag) nested_function->serialize(nestedPlace(place), buf); @@ -138,7 +141,7 @@ public: void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { bool flag = 1; - if constexpr (result_is_nullable) + if constexpr (serialize_flag) readBinary(flag, buf); if (flag) { @@ -183,12 +186,15 @@ public: /** There are two cases: for single argument and variadic. * Code for single argument is much more efficient. */ -template -class AggregateFunctionNullUnary final : public AggregateFunctionNullBase> +template +class AggregateFunctionNullUnary final + : public AggregateFunctionNullBase> { public: AggregateFunctionNullUnary(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) - : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params) + : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params) { } @@ -218,12 +224,15 @@ public: }; -template -class AggregateFunctionNullVariadic final : public AggregateFunctionNullBase> +template +class AggregateFunctionNullVariadic final + : public AggregateFunctionNullBase> { public: AggregateFunctionNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) - : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params), + : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params), number_of_arguments(arguments.size()) { if (number_of_arguments == 1) @@ -263,11 +272,6 @@ public: this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena); } - bool allocatesMemoryInArena() const override - { - return this->nested_function->allocatesMemoryInArena(); - } - private: enum { MAX_ARGS = 8 }; size_t number_of_arguments = 0; diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.cpp b/src/AggregateFunctions/AggregateFunctionOrFill.cpp index b9cc2f9b8b7..ce8fc8d9ca5 100644 --- a/src/AggregateFunctions/AggregateFunctionOrFill.cpp +++ b/src/AggregateFunctions/AggregateFunctionOrFill.cpp @@ -21,6 +21,7 @@ public: AggregateFunctionPtr transformAggregateFunction( const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, const DataTypes & arguments, const Array & params) const override { diff --git a/src/AggregateFunctions/AggregateFunctionResample.cpp b/src/AggregateFunctions/AggregateFunctionResample.cpp index d8d13e22120..389c9048918 100644 --- a/src/AggregateFunctions/AggregateFunctionResample.cpp +++ b/src/AggregateFunctions/AggregateFunctionResample.cpp @@ -43,6 +43,7 @@ public: AggregateFunctionPtr transformAggregateFunction( const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, const DataTypes & arguments, const Array & params) const override { diff --git a/src/AggregateFunctions/AggregateFunctionState.cpp b/src/AggregateFunctions/AggregateFunctionState.cpp index fd92953d114..9d1c677c0ff 100644 --- a/src/AggregateFunctions/AggregateFunctionState.cpp +++ b/src/AggregateFunctions/AggregateFunctionState.cpp @@ -24,7 +24,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array & params) const override { return std::make_shared(nested_function, arguments, params); } diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp index f4e299fe7c9..9d553ebddb4 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -18,21 +18,6 @@ namespace ErrorCodes namespace { -template -struct SumMap -{ - template - using F = AggregateFunctionSumMap; -}; - -template -struct SumMapFiltered -{ - template - using F = AggregateFunctionSumMapFiltered; -}; - - auto parseArguments(const std::string & name, const DataTypes & arguments) { DataTypes args; @@ -85,30 +70,32 @@ auto parseArguments(const std::string & name, const DataTypes & arguments) tuple_argument}; } -template -AggregateFunctionPtr createAggregateFunctionSumMap(const std::string & name, const DataTypes & arguments, const Array & params) +// This function instantiates a particular overload of the sumMap family of +// functions. +// The template parameter MappedFunction is an aggregate +// function template that allows to choose the aggregate function variant that +// accepts either normal arguments or tuple argument. +template