diff --git a/CHANGELOG.md b/CHANGELOG.md index a0ea7f62b6c..d5301de8a23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -641,4 +641,4 @@ #### Security Fix * Fixed the possibility of reading directories structure in tables with `File` table engine. This fixes [#8536](https://github.com/ClickHouse/ClickHouse/issues/8536). [#8537](https://github.com/ClickHouse/ClickHouse/pull/8537) ([alexey-milovidov](https://github.com/alexey-milovidov)) -## [Changelog for 2019](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2019.md) +## [Changelog for 2019](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/whats_new/changelog/2019.md) diff --git a/README.md b/README.md index 84af1e30a6b..3db5e08d2a9 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,6 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events -* [ClickHouse in Avito (online in Russian)](https://avitotech.timepad.ru/event/1290051/) on April 9, 2020. +* [ClickHouse Monitoring Round Table (online in English)](https://www.eventbrite.com/e/clickhouse-april-virtual-meetup-tickets-102272923066) on April 15, 2020. * [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date. * [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date. diff --git a/docker/packager/README.md b/docker/packager/README.md index e02a45fdaea..5d9751a0fbd 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -3,10 +3,10 @@ compilers and build settings. Correctly configured Docker daemon is single depen Usage: -Build deb package with `gcc-8` in `debug` mode: +Build deb package with `gcc-9` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=gcc-8 --build-type=debug +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=gcc-9 --build-type=debug $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_18.14.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_18.14.2+debug_amd64.deb @@ -18,11 +18,11 @@ $ ls -l deb/test_output ``` -Build ClickHouse binary with `clang-6.0` and `address` sanitizer in `relwithdebuginfo` +Build ClickHouse binary with `clang-9.0` and `address` sanitizer in `relwithdebuginfo` mode: ``` $ mkdir $HOME/some_clickhouse -$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-6.0 --sanitizer=address +$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-9.0 --sanitizer=address $ ls -l $HOME/some_clickhouse -rwxr-xr-x 1 root root 787061952 clickhouse lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse diff --git a/docker/packager/freebsd/Vagrantfile b/docker/packager/freebsd/Vagrantfile deleted file mode 100644 index 765f46d5604..00000000000 --- a/docker/packager/freebsd/Vagrantfile +++ /dev/null @@ -1,4 +0,0 @@ -Vagrant.configure("2") do |config| - config.vm.box = "robot-clickhouse/clickhouse-freebsd" - config.vm.synced_folder ".", "/vagrant", disabled: true -end diff --git a/docker/packager/packager b/docker/packager/packager index 360a358c6e5..71380b92fac 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -11,48 +11,8 @@ SCRIPT_PATH = os.path.realpath(__file__) IMAGE_MAP = { "deb": "yandex/clickhouse-deb-builder", "binary": "yandex/clickhouse-binary-builder", - "freebsd": os.path.join(os.path.dirname(SCRIPT_PATH), "freebsd"), } -class Vagrant(object): - def __init__(self, path_to_vagrant_file): - self.prefix = "VAGRANT_CWD=" + path_to_vagrant_file - - def __enter__(self): - subprocess.check_call("{} vagrant up".format(self.prefix), shell=True) - self.ssh_path = "/tmp/vagrant-ssh" - subprocess.check_call("{} vagrant ssh-config > {}".format(self.prefix, self.ssh_path), shell=True) - return self - - def copy_to_image(self, local_path, remote_path): - cmd = "scp -F {ssh} -r {lpath} default:{rpath}".format(ssh=self.ssh_path, lpath=local_path, rpath=remote_path) - logging.info("Copying to image %s", cmd) - subprocess.check_call( - cmd, - shell=True - ) - - def copy_from_image(self, remote_path, local_path): - cmd = "scp -F {ssh} -r default:{rpath} {lpath}".format(ssh=self.ssh_path, rpath=remote_path, lpath=local_path) - logging.info("Copying from image %s", cmd) - subprocess.check_call( - cmd, - shell=True - ) - - def execute_cmd(self, cmd): - cmd = '{} vagrant ssh -c "{}"'.format(self.prefix, cmd) - logging.info("Executin cmd %s", cmd) - subprocess.check_call( - cmd, - shell=True - ) - - def __exit__(self, exc_type, exc_val, exc_tb): - logging.info("Destroying image") - subprocess.check_call("{} vagrant destroy --force".format(self.prefix), shell=True) - - def check_image_exists_locally(image_name): try: output = subprocess.check_output("docker images -q {} 2> /dev/null".format(image_name), shell=True) @@ -94,15 +54,6 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache subprocess.check_call(cmd, shell=True) -def run_vagrant_box_with_env(image_path, output_dir, ch_root): - with Vagrant(image_path) as vagrant: - logging.info("Copying folder to vagrant machine") - vagrant.copy_to_image(ch_root, "~/ClickHouse") - logging.info("Running build") - vagrant.execute_cmd("cd ~/ClickHouse && cmake . && ninja") - logging.info("Copying binary back") - vagrant.copy_from_image("~/ClickHouse/programs/clickhouse", output_dir) - def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage): CLANG_PREFIX = "clang" DARWIN_SUFFIX = "-darwin" @@ -210,11 +161,11 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') parser = argparse.ArgumentParser(description="ClickHouse building script using prebuilt Docker image") # 'performance' creates a combined .tgz with server and configs to be used for performance test. - parser.add_argument("--package-type", choices=['deb', 'binary', 'performance', 'freebsd'], required=True) + parser.add_argument("--package-type", choices=['deb', 'binary', 'performance'], required=True) parser.add_argument("--clickhouse-repo-path", default="../../") parser.add_argument("--output-dir", required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") - parser.add_argument("--compiler", choices=("clang-8", "clang-8-darwin", "clang-8-aarch64", "gcc-8", "gcc-9", "clang-9"), default="gcc-8") + parser.add_argument("--compiler", choices=("clang-8", "clang-8-darwin", "clang-9-aarch64", "clang-9-freebsd", "gcc-8", "gcc-9", "clang-9"), default="gcc-8") parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") parser.add_argument("--unbundled", action="store_true") parser.add_argument("--split-binary", action="store_true") @@ -252,9 +203,5 @@ if __name__ == "__main__": args.build_type, args.compiler, args.sanitizer, args.package_type, image_type, args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy, args.version, args.author, args.official, args.alien_pkgs, args.with_coverage) - if image_type != "freebsd": - run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) - else: - logging.info("Running freebsd build, arguments will be ignored") - run_vagrant_box_with_env(image_name, args.output_dir, ch_root) + run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir) logging.info("Output placed into {}".format(args.output_dir)) diff --git a/docs/en/commercial/index.md b/docs/en/commercial/index.md index 3e0a0ac236a..f9065c7cd50 100644 --- a/docs/en/commercial/index.md +++ b/docs/en/commercial/index.md @@ -1,6 +1,7 @@ --- -toc_title: Commercial toc_folder_title: Commercial toc_priority: 70 +toc_title: Commercial --- + diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index 1932e3e31d8..cfd852637fe 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -41,7 +41,7 @@ Various functions on columns can be implemented in a generic, non-efficient way ## Block {#block} -A `Block` is a container that represents a subset (chunk) of a table in memory. It is just a set of triples: `(IColumn, IDataType, column name)`. During query execution, data is processed by `Block`s. If we have a `Block`, we have data (in the `IColumn` object), we have information about its type (in `IDataType`) that tells us how to deal with that column, and we have the column name. It could be either the original column name from the table or some artificial name assigned for getting temporary results of calculations. +A `Block` is a container that represents a subset (chunk) of a table in memory. It is just a set of triples: `(IColumn, IDataType, column name)`. During query execution, data is processed by `Block`s. If we have a `Block`, we have data (in the `IColumn` object), we have information about its type (in `IDataType`) that tells us how to deal with that column, and we have the column name. It could be either the original column name from the table or some artificial name assigned for getting temporary results of calculations. When we calculate some function over columns in a block, we add another column with its result to the block, and we don’t touch columns for arguments of the function because operations are immutable. Later, unneeded columns can be removed from the block, but not modified. It is convenient for the elimination of common subexpressions. @@ -77,7 +77,7 @@ For byte-oriented input/output, there are `ReadBuffer` and `WriteBuffer` abstrac Implementations of `ReadBuffer`/`WriteBuffer` are used for working with files and file descriptors and network sockets, for implementing compression (`CompressedWriteBuffer` is initialized with another WriteBuffer and performs compression before writing data to it), and for other purposes – the names `ConcatReadBuffer`, `LimitReadBuffer`, and `HashingWriteBuffer` speak for themselves. -Read/WriteBuffers only deal with bytes. There are functions from `ReadHelpers` and `WriteHelpers` header files to help with formatting input/output. For example, there are helpers to write a number in decimal format. +Read/WriteBuffers only deal with bytes. There are functions from `ReadHelpers` and `WriteHelpers` header files to help with formatting input/output. For example, there are helpers to write a number in decimal format. Let’s look at what happens when you want to write a result set in `JSON` format to stdout. You have a result set ready to be fetched from `IBlockInputStream`. You create `WriteBufferFromFileDescriptor(STDOUT_FILENO)` to write bytes to stdout. You create `JSONRowOutputStream`, initialized with that `WriteBuffer`, to write rows in `JSON` to stdout. You create `BlockOutputStreamFromRowOutputStream` on top of it, to represent it as `IBlockOutputStream`. Then you call `copyData` to transfer data from `IBlockInputStream` to `IBlockOutputStream`, and everything works. Internally, `JSONRowOutputStream` will write various JSON delimiters and call the `IDataType::serializeTextJSON` method with a reference to `IColumn` and the row number as arguments. Consequently, `IDataType::serializeTextJSON` will call a method from `WriteHelpers.h`: for example, `writeText` for numeric types and `writeJSONString` for `DataTypeString`. @@ -155,7 +155,7 @@ The server initializes the `Context` class with the necessary environment for qu We maintain full backward and forward compatibility for the server TCP protocol: old clients can talk to new servers, and new clients can talk to old servers. But we don’t want to maintain it eternally, and we are removing support for old versions after about one year. !!! note "Note" - For most external applications, we recommend using the HTTP interface because it is simple and easy to use. The TCP protocol is more tightly linked to internal data structures: it uses an internal format for passing blocks of data, and it uses custom framing for compressed data. We haven’t released a C library for that protocol because it requires linking most of the ClickHouse codebase, which is not practical. + For most external applications, we recommend using the HTTP interface because it is simple and easy to use. The TCP protocol is more tightly linked to internal data structures: it uses an internal format for passing blocks of data, and it uses custom framing for compressed data. We haven’t released a C library for that protocol because it requires linking most of the ClickHouse codebase, which is not practical. ## Distributed Query Execution {#distributed-query-execution} diff --git a/docs/en/development/build_cross_arm.md b/docs/en/development/build_cross_arm.md index 24a7c8363f6..b2b5fa6b1e0 100644 --- a/docs/en/development/build_cross_arm.md +++ b/docs/en/development/build_cross_arm.md @@ -3,7 +3,7 @@ toc_priority: 67 toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64) --- -# How to Build ClickHouse on Linux for AARCH64 (ARM64) architecture {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} +# How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 9c594d3e03f..22f84370b6c 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -6,7 +6,7 @@ toc_title: Third-Party Libraries Used # Third-Party Libraries Used {#third-party-libraries-used} | Library | License | -|-------------|--------------------------------------------------------------------------------------| +|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------| | base64 | [BSD 2-Clause License](https://github.com/aklomp/base64/blob/a27c565d1b6c676beaf297fe503c4518185666f7/LICENSE) | | boost | [Boost Software License 1.0](https://github.com/ClickHouse-Extras/boost-extra/blob/6883b40449f378019aec792f9983ce3afc7ff16e/LICENSE_1_0.txt) | | brotli | [MIT](https://github.com/google/brotli/blob/master/LICENSE) | diff --git a/docs/en/development/developer_instruction.md b/docs/en/development/developer_instruction.md index 5cac7203d87..60a949893d7 100644 --- a/docs/en/development/developer_instruction.md +++ b/docs/en/development/developer_instruction.md @@ -5,15 +5,15 @@ toc_title: The Beginner ClickHouse Developer Instruction Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X. -# If you use Windows {#if-you-use-windows} +# If You Use Windows {#if-you-use-windows} If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. -# If you use a 32-bit system {#if-you-use-a-32-bit-system} +# If You Use a 32-bit System {#if-you-use-a-32-bit-system} ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading. -# Creating a repository on GitHub {#creating-a-repository-on-github} +# Creating a Repository on GitHub {#creating-a-repository-on-github} To start working with ClickHouse repository you will need a GitHub account. @@ -33,7 +33,7 @@ To do that in Ubuntu you would run in the command line terminal: A brief manual on using Git can be found here: https://services.github.com/on-demand/downloads/github-git-cheat-sheet.pdf. For a detailed manual on Git see https://git-scm.com/book/en/v2. -# Cloning a repository to your development machine {#cloning-a-repository-to-your-development-machine} +# Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine} Next, you need to download the source files onto your working machine. This is called “to clone a repository” because it creates a local copy of the repository on your working machine. @@ -77,7 +77,7 @@ You can also add original ClickHouse repo’s address to your local repository t After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`. -## Working with submodules {#working-with-submodules} +## Working with Submodules {#working-with-submodules} Working with submodules in git could be painful. Next commands will help to manage it: @@ -145,7 +145,7 @@ Mac OS X build is supported only for Clang. Just run `brew install llvm` If you decide to use Clang, you can also install `libc++` and `lld`, if you know what it is. Using `ccache` is also recommended. -# The Building process {#the-building-process} +# The Building Process {#the-building-process} Now that you are ready to build ClickHouse we recommend you to create a separate directory `build` inside `ClickHouse` that will contain all of the build artefacts: @@ -202,7 +202,7 @@ Upon successful build you get an executable file `ClickHouse//program ls -l programs/clickhouse -# Running the built executable of ClickHouse {#running-the-built-executable-of-clickhouse} +# Running the Built Executable of ClickHouse {#running-the-built-executable-of-clickhouse} To run the server under the current user you need to navigate to `ClickHouse/programs/server/` (located outside of `build`) and run: diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 2507940791d..02620b92367 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -11,7 +11,7 @@ Functional tests are the most simple and convenient to use. Most of ClickHouse f Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference. -Tests are located in `testsies` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. +Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. @@ -19,7 +19,7 @@ To run all tests, use `testskhouse-test` tool. Look `--help` for the list of pos The most simple way to invoke functional tests is to copy `clickhouse-client` to `/usr/bin/`, run `clickhouse-server` and then run `./clickhouse-test` from its own directory. -To add new test, create a `.sql` or `.sh` file in `testsies/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. @@ -32,9 +32,9 @@ meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively. -## Known bugs {#known-bugs} +## Known Bugs {#known-bugs} -If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `testsies/bugs` directory. These tests will be moved to `teststests_stateless` when bugs are fixed. +If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `queries/bugs` directory. These tests will be moved to `teststests_stateless` when bugs are fixed. ## Integration Tests {#integration-tests} @@ -58,7 +58,7 @@ Each test run one or miltiple queries (possibly with combinations of parameters) If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. It always makes sense to use `perf top` or other perf tools during your tests. -## Test Tools And Scripts {#test-tools-and-scripts} +## Test Tools and Scripts {#test-tools-and-scripts} Some programs in `tests` directory are not prepared tests, but are test tools. For example, for `Lexer` there is a tool `src/Parsers/tests/lexer` that just do tokenization of stdin and writes colorized result to stdout. You can use these kind of tools as a code examples and for exploration and manual testing. @@ -163,11 +163,11 @@ For example, build with system packages is bad practice, because we cannot guara Though we cannot run all tests on all variant of builds, we want to check at least that various build variants are not broken. For this purpose we use build tests. -## Testing For Protocol Compatibility {#testing-for-protocol-compatibility} +## Testing for Protocol Compatibility {#testing-for-protocol-compatibility} When we extend ClickHouse network protocol, we test manually that old clickhouse-client works with new clickhouse-server and new clickhouse-client works with old clickhouse-server (simply by running binaries from corresponding packages). -## Help From The Compiler {#help-from-the-compiler} +## Help from the Compiler {#help-from-the-compiler} Main ClickHouse code (that is located in `dbms` directory) is built with `-Wall -Wextra -Werror` and with some additional enabled warnings. Although these options are not enabled for third-party libraries. diff --git a/docs/en/engines/database_engines/mysql.md b/docs/en/engines/database_engines/mysql.md index 678c174e1fb..467a3aa032d 100644 --- a/docs/en/engines/database_engines/mysql.md +++ b/docs/en/engines/database_engines/mysql.md @@ -3,7 +3,7 @@ toc_priority: 30 toc_title: MySQL --- -# Mysql {#mysql} +# MySQL {#mysql} Allows to connect to databases on a remote MySQL server and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and MySQL. @@ -19,7 +19,7 @@ You cannot perform the following queries: ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MySQL('host:port', 'database', 'user', 'password') +ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') ``` **Engine Parameters** diff --git a/docs/en/engines/index.md b/docs/en/engines/index.md index 37b3f5998c8..c4b0b299858 100644 --- a/docs/en/engines/index.md +++ b/docs/en/engines/index.md @@ -3,3 +3,4 @@ toc_folder_title: Engines toc_priority: 25 --- + diff --git a/docs/en/engines/table_engines/index.md b/docs/en/engines/table_engines/index.md index f5ef56382c5..00f53a6f9f0 100644 --- a/docs/en/engines/table_engines/index.md +++ b/docs/en/engines/table_engines/index.md @@ -17,7 +17,7 @@ The table engine (type of table) determines: ## Engine Families {#engine-families} -### Mergetree {#mergetree} +### MergeTree {#mergetree} The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](mergetree_family/replication.md) versions of engines), partitioning, and other features not supported in other engines. diff --git a/docs/en/engines/table_engines/mergetree_family/collapsingmergetree.md b/docs/en/engines/table_engines/mergetree_family/collapsingmergetree.md index 61e0d82d32f..973377c0b58 100644 --- a/docs/en/engines/table_engines/mergetree_family/collapsingmergetree.md +++ b/docs/en/engines/table_engines/mergetree_family/collapsingmergetree.md @@ -3,7 +3,7 @@ toc_priority: 36 toc_title: CollapsingMergeTree --- -# Collapsingmergetree {#table_engine-collapsingmergetree} +# CollapsingMergeTree {#table_engine-collapsingmergetree} The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm. diff --git a/docs/en/engines/table_engines/mergetree_family/graphitemergetree.md b/docs/en/engines/table_engines/mergetree_family/graphitemergetree.md index 7785af9b6d3..ebfdfe4685a 100644 --- a/docs/en/engines/table_engines/mergetree_family/graphitemergetree.md +++ b/docs/en/engines/table_engines/mergetree_family/graphitemergetree.md @@ -3,7 +3,7 @@ toc_priority: 38 toc_title: GraphiteMergeTree --- -# Graphitemergetree {#graphitemergetree} +# GraphiteMergeTree {#graphitemergetree} This engine is designed for thinning and aggregating/averaging (rollup) [Graphite](http://graphite.readthedocs.io/en/latest/index.html) data. It may be helpful to developers who want to use ClickHouse as a data store for Graphite. diff --git a/docs/en/engines/table_engines/mergetree_family/mergetree.md b/docs/en/engines/table_engines/mergetree_family/mergetree.md index aabc0b45487..fc0b6e63158 100644 --- a/docs/en/engines/table_engines/mergetree_family/mergetree.md +++ b/docs/en/engines/table_engines/mergetree_family/mergetree.md @@ -3,7 +3,7 @@ toc_priority: 30 toc_title: MergeTree --- -# Mergetree {#table_engines-mergetree} +# MergeTree {#table_engines-mergetree} The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most robust ClickHouse table engines. diff --git a/docs/en/engines/table_engines/mergetree_family/replacingmergetree.md b/docs/en/engines/table_engines/mergetree_family/replacingmergetree.md index 5bdad0d4074..7fbd4edec68 100644 --- a/docs/en/engines/table_engines/mergetree_family/replacingmergetree.md +++ b/docs/en/engines/table_engines/mergetree_family/replacingmergetree.md @@ -3,7 +3,7 @@ toc_priority: 33 toc_title: ReplacingMergeTree --- -# Replacingmergetree {#replacingmergetree} +# ReplacingMergeTree {#replacingmergetree} The engine differs from [MergeTree](mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](mergetree.md) value). diff --git a/docs/en/engines/table_engines/mergetree_family/replication.md b/docs/en/engines/table_engines/mergetree_family/replication.md index 8a4edb7ac8e..9de68fdb9ed 100644 --- a/docs/en/engines/table_engines/mergetree_family/replication.md +++ b/docs/en/engines/table_engines/mergetree_family/replication.md @@ -186,7 +186,7 @@ An alternative recovery option is to delete information about the lost replica f There is no restriction on network bandwidth during recovery. Keep this in mind if you are restoring many replicas at once. -## Converting From Mergetree To Replicatedmergetree {#converting-from-mergetree-to-replicatedmergetree} +## Converting From MergeTree To ReplicatedMergeTree {#converting-from-mergetree-to-replicatedmergetree} We use the term `MergeTree` to refer to all table engines in the `MergeTree family`, the same as for `ReplicatedMergeTree`. @@ -198,7 +198,7 @@ Rename the existing MergeTree table, then create a `ReplicatedMergeTree` table w Move the data from the old table to the `detached` subdirectory inside the directory with the new table data (`/var/lib/clickhouse/data/db_name/table_name/`). Then run `ALTER TABLE ATTACH PARTITION` on one of the replicas to add these data parts to the working set. -## Converting From Replicatedmergetree To Mergetree {#converting-from-replicatedmergetree-to-mergetree} +## Converting From ReplicatedMergeTree To MergeTree {#converting-from-replicatedmergetree-to-mergetree} Create a MergeTree table with a different name. Move all the data from the directory with the `ReplicatedMergeTree` table data to the new table’s data directory. Then delete the `ReplicatedMergeTree` table and restart the server. diff --git a/docs/en/engines/table_engines/mergetree_family/summingmergetree.md b/docs/en/engines/table_engines/mergetree_family/summingmergetree.md index 5c007ee0b7a..c8f76b60c53 100644 --- a/docs/en/engines/table_engines/mergetree_family/summingmergetree.md +++ b/docs/en/engines/table_engines/mergetree_family/summingmergetree.md @@ -3,7 +3,7 @@ toc_priority: 34 toc_title: SummingMergeTree --- -# Summingmergetree {#summingmergetree} +# SummingMergeTree {#summingmergetree} The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection. @@ -94,7 +94,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key ## Data Processing {#data-processing} -When data are inserted into a table, they are saved as-is. Clickhouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data. +When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data. ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql_reference/aggregate_functions/reference.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above. diff --git a/docs/en/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md b/docs/en/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md index 2c7a28263f8..f9a95dbd626 100644 --- a/docs/en/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md +++ b/docs/en/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md @@ -3,7 +3,7 @@ toc_priority: 37 toc_title: VersionedCollapsingMergeTree --- -# Versionedcollapsingmergetree {#versionedcollapsingmergetree} +# VersionedCollapsingMergeTree {#versionedcollapsingmergetree} This engine: diff --git a/docs/en/engines/table_engines/special/file.md b/docs/en/engines/table_engines/special/file.md index f0a9cd4e7e6..b9a79ed2e3e 100644 --- a/docs/en/engines/table_engines/special/file.md +++ b/docs/en/engines/table_engines/special/file.md @@ -14,7 +14,7 @@ Usage examples: - Convert data from one format to another. - Updating data in ClickHouse via editing a file on a disk. -## Usage In Clickhouse Server {#usage-in-clickhouse-server} +## Usage In ClickHouse Server {#usage-in-clickhouse-server} ``` sql File(Format) @@ -65,7 +65,7 @@ SELECT * FROM file_engine_table └──────┴───────┘ ``` -## Usage In Clickhouse-local {#usage-in-clickhouse-local} +## Usage In ClickHouse-local {#usage-in-clickhouse-local} In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`. **Example:** diff --git a/docs/en/engines/table_engines/special/generate.md b/docs/en/engines/table_engines/special/generate.md index d4b573c6839..6e592674346 100644 --- a/docs/en/engines/table_engines/special/generate.md +++ b/docs/en/engines/table_engines/special/generate.md @@ -12,7 +12,7 @@ Usage examples: - Use in test to populate reproducible large table. - Generate random input for fuzzing tests. -## Usage In Clickhouse Server {#usage-in-clickhouse-server} +## Usage In ClickHouse Server {#usage-in-clickhouse-server} ``` sql ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) diff --git a/docs/en/engines/table_engines/special/url.md b/docs/en/engines/table_engines/special/url.md index 3c3e8d45f4c..db679b97c54 100644 --- a/docs/en/engines/table_engines/special/url.md +++ b/docs/en/engines/table_engines/special/url.md @@ -8,7 +8,7 @@ toc_title: URL Manages data on a remote HTTP/HTTPS server. This engine is similar to the [File](file.md) engine. -## Using the Engine In the Clickhouse Server {#using-the-engine-in-the-clickhouse-server} +## Using the Engine In the ClickHouse Server {#using-the-engine-in-the-clickhouse-server} The `format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see diff --git a/docs/en/faq/index.md b/docs/en/faq/index.md index 2f872ea45b0..2ee9d51e83b 100644 --- a/docs/en/faq/index.md +++ b/docs/en/faq/index.md @@ -3,3 +3,4 @@ toc_folder_title: F.A.Q. toc_priority: 76 --- + diff --git a/docs/en/getting_started/install.md b/docs/en/getting_started/install.md index 363fc928679..316cc5a47ef 100644 --- a/docs/en/getting_started/install.md +++ b/docs/en/getting_started/install.md @@ -69,7 +69,7 @@ sudo yum install clickhouse-server clickhouse-client You can also download and install packages manually from here: https://repo.clickhouse.tech/rpm/stable/x86\_64. -### From tgz archives {#from-tgz-archives} +### From Tgz Archives {#from-tgz-archives} It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible. diff --git a/docs/en/getting_started/playground.md b/docs/en/getting_started/playground.md index d7453e51eaa..353724b6e3e 100644 --- a/docs/en/getting_started/playground.md +++ b/docs/en/getting_started/playground.md @@ -32,7 +32,7 @@ You can make queries to playground using any HTTP client, for example [curl](htt More information about software products that support ClickHouse is available [here](../interfaces/index.md). | Parameter | Value | -|:------|:------------------------| +|:----------|:--------------------------------------| | Endpoint | https://play-api.clickhouse.tech:8443 | | User | `playground` | | Password | `clickhouse` | diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index aadd229490f..51985ba89e1 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -11,7 +11,7 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: | Format | Input | Output | -|---------------------------------------|-----|------| +|-----------------------------------------------------------------|-------|--------| | [TabSeparated](#tabseparated) | ✔ | ✔ | | [TabSeparatedRaw](#tabseparatedraw) | ✗ | ✔ | | [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | @@ -993,21 +993,21 @@ ClickHouse Avro format supports reading and writing [Avro data files](http://avr The table below shows supported data types and how they match ClickHouse [data types](../sql_reference/data_types/index.md) in `INSERT` and `SELECT` queries. -| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` | -|---------------------------|-------------------------------------------------------|------------------| +| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` | +|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------------|------------------------------| | `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](../sql_reference/data_types/int_uint.md), [UInt(8\|16\|32)](../sql_reference/data_types/int_uint.md) | `int` | | `boolean`, `int`, `long`, `float`, `double` | [Int64](../sql_reference/data_types/int_uint.md), [UInt64](../sql_reference/data_types/int_uint.md) | `long` | -| `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql_reference/data_types/float.md) | `float` | -| `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql_reference/data_types/float.md) | `double` | -| `bytes`, `string`, `fixed`, `enum` | [String](../sql_reference/data_types/string.md) | `bytes` | -| `bytes`, `string`, `fixed` | [FixedString(N)](../sql_reference/data_types/fixedstring.md) | `fixed(N)` | -| `enum` | [Enum(8\|16)](../sql_reference/data_types/enum.md) | `enum` | -| `array(T)` | [Array(T)](../sql_reference/data_types/array.md) | `array(T)` | -| `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql_reference/data_types/date.md) | `union(null, T)` | -| `null` | [Nullable(Nothing)](../sql_reference/data_types/special_data_types/nothing.md) | `null` | -| `int (date)` \* | [Date](../sql_reference/data_types/date.md) | `int (date)` \* | -| `long (timestamp-millis)` \* | [DateTime64(3)](../sql_reference/data_types/datetime.md) | `long (timestamp-millis)` \* | -| `long (timestamp-micros)` \* | [DateTime64(6)](../sql_reference/data_types/datetime.md) | `long (timestamp-micros)` \* | +| `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql_reference/data_types/float.md) | `float` | +| `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql_reference/data_types/float.md) | `double` | +| `bytes`, `string`, `fixed`, `enum` | [String](../sql_reference/data_types/string.md) | `bytes` | +| `bytes`, `string`, `fixed` | [FixedString(N)](../sql_reference/data_types/fixedstring.md) | `fixed(N)` | +| `enum` | [Enum(8\|16)](../sql_reference/data_types/enum.md) | `enum` | +| `array(T)` | [Array(T)](../sql_reference/data_types/array.md) | `array(T)` | +| `union(null, T)`, `union(T, null)` | [Nullable(T)](../sql_reference/data_types/date.md) | `union(null, T)` | +| `null` | [Nullable(Nothing)](../sql_reference/data_types/special_data_types/nothing.md) | `null` | +| `int (date)` \* | [Date](../sql_reference/data_types/date.md) | `int (date)` \* | +| `long (timestamp-millis)` \* | [DateTime64(3)](../sql_reference/data_types/datetime.md) | `long (timestamp-millis)` \* | +| `long (timestamp-micros)` \* | [DateTime64(6)](../sql_reference/data_types/datetime.md) | `long (timestamp-micros)` \* | \* [Avro logical types](http://avro.apache.org/docs/current/spec.html#Logical+Types) @@ -1101,8 +1101,8 @@ SELECT * FROM topic1_stream; The table below shows supported data types and how they match ClickHouse [data types](../sql_reference/data_types/index.md) in `INSERT` and `SELECT` queries. -| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) | -|------------------|---------------------------|------------------| +| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) | +|------------------------------|-----------------------------------------------------------|------------------------------| | `UINT8`, `BOOL` | [UInt8](../sql_reference/data_types/int_uint.md) | `UINT8` | | `INT8` | [Int8](../sql_reference/data_types/int_uint.md) | `INT8` | | `UINT16` | [UInt16](../sql_reference/data_types/int_uint.md) | `UINT16` | @@ -1149,8 +1149,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table_e The table below shows supported data types and how they match ClickHouse [data types](../sql_reference/data_types/index.md) in `INSERT` queries. -| ORC data type (`INSERT`) | ClickHouse data type | -|----------------|-------------------------| +| ORC data type (`INSERT`) | ClickHouse data type | +|--------------------------|-----------------------------------------------------| | `UINT8`, `BOOL` | [UInt8](../sql_reference/data_types/int_uint.md) | | `INT8` | [Int8](../sql_reference/data_types/int_uint.md) | | `UINT16` | [UInt16](../sql_reference/data_types/int_uint.md) | diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index df2ee5e19e5..0e18e00bb0d 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -286,31 +286,35 @@ $ curl -sS "
?param_id=2¶m_phrase=test" -d "SELECT * FROM table WHER ## Predefined HTTP Interface {#predefined_http_interface} ClickHouse supports specific queries through the HTTP interface. For example, you can write data to a table as follows: - -```bash + +``` bash $ echo '(4),(5),(6)' | curl 'http://localhost:8123/?query=INSERT%20INTO%20t%20VALUES' --data-binary @- ``` -ClickHouse also supports Predefined HTTP Interface which can help you more easy integration with third party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter). +ClickHouse also supports Predefined HTTP Interface which can help you more easy integration with third party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter). Example: - -* First of all, add this section to server configuration file: - + +- First of all, add this section to server configuration file: + + + ``` xml - /metrics - GET - - SELECT * FROM system.metrics LIMIT 5 FORMAT Template SETTINGS format_template_resultset = 'prometheus_template_output_format_resultset', format_template_row = 'prometheus_template_output_format_row', format_template_rows_between_delimiter = '\n' - + /metrics + GET + + SELECT * FROM system.metrics LIMIT 5 FORMAT Template SETTINGS format_template_resultset = 'prometheus_template_output_format_resultset', format_template_row = 'prometheus_template_output_format_row', format_template_rows_between_delimiter = '\n' + ``` - -* You can now request the url directly for data in the Prometheus format: - + +- You can now request the url directly for data in the Prometheus format: + + + ``` bash curl -vvv 'http://localhost:8123/metrics' * Trying ::1... @@ -319,7 +323,7 @@ curl -vvv 'http://localhost:8123/metrics' > Host: localhost:8123 > User-Agent: curl/7.47.0 > Accept: */* -> +> < HTTP/1.1 200 OK < Date: Wed, 27 Nov 2019 08:54:25 GMT < Connection: Keep-Alive @@ -329,7 +333,7 @@ curl -vvv 'http://localhost:8123/metrics' < X-ClickHouse-Query-Id: f39235f6-6ed7-488c-ae07-c7ceafb960f6 < Keep-Alive: timeout=3 < X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} -< +< # HELP "Query" "Number of executing queries" # TYPE "Query" counter "Query" 1 @@ -337,19 +341,19 @@ curl -vvv 'http://localhost:8123/metrics' # HELP "Merge" "Number of executing background merges" # TYPE "Merge" counter "Merge" 0 - + # HELP "PartMutation" "Number of mutations (ALTER DELETE/UPDATE)" # TYPE "PartMutation" counter "PartMutation" 0 - + # HELP "ReplicatedFetch" "Number of data parts being fetched from replica" # TYPE "ReplicatedFetch" counter "ReplicatedFetch" 0 - + # HELP "ReplicatedSend" "Number of data parts being sent to replicas" # TYPE "ReplicatedSend" counter "ReplicatedSend" 0 - + * Connection #0 to host localhost left intact ``` @@ -357,26 +361,24 @@ As you can see from the example, if `` is configured in the confi Now `` can configure ``, ``, ``, `` and `` . -## root_handler +## root\_handler {#root_handler} - `` returns the specified content for the root path request. The specific return content is configured by `http_server_default_response` in config.xml. if not specified, return **Ok.** +`` returns the specified content for the root path request. The specific return content is configured by `http_server_default_response` in config.xml. if not specified, return **Ok.** `http_server_default_response` is not defined and an HTTP request is sent to ClickHouse. The result is as follows: -```xml +``` xml ``` -``` -$ curl 'http://localhost:8123' -Ok. -``` + $ curl 'http://localhost:8123' + Ok. `http_server_default_response` is defined and an HTTP request is sent to ClickHouse. The result is as follows: -```xml +``` xml
]]>
@@ -384,35 +386,33 @@ Ok. ``` -``` -$ curl 'http://localhost:8123' -
% -``` + $ curl 'http://localhost:8123' +
% -## ping_handler +## ping\_handler {#ping_handler} `` can be used to probe the health of the current ClickHouse Server. When the ClickHouse HTTP Server is normal, accessing ClickHouse through `` will return **Ok.**. Example: -```xml +``` xml /ping ``` -```bash +``` bash $ curl 'http://localhost:8123/ping' Ok. ``` -## replicas_status_handler +## replicas\_status\_handler {#replicas_status_handler} -`` is used to detect the state of the replica node and return **Ok.** if the replica node has no delay. If there is a delay, return the specific delay. The value of `` supports customization. If you do not specify ``, ClickHouse default setting `` is **/replicas_status**. +`` is used to detect the state of the replica node and return **Ok.** if the replica node has no delay. If there is a delay, return the specific delay. The value of `` supports customization. If you do not specify ``, ClickHouse default setting `` is **/replicas\_status**. Example: -```xml +``` xml /replicas_status @@ -420,90 +420,90 @@ Example: No delay case: -```bash +``` bash $ curl 'http://localhost:8123/replicas_status' Ok. ``` Delayed case: -```bash +``` bash $ curl 'http://localhost:8123/replicas_status' db.stats: Absolute delay: 22. Relative delay: 22. ``` -## predefined_query_handler +## predefined\_query\_handler {#predefined_query_handler} You can configure ``, ``, `` and `` in ``. `` is responsible for matching the method part of the HTTP request. `` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request -`` is responsible for matching the url part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)'s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the url portion of the HTTP request +`` is responsible for matching the url part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the url portion of the HTTP request -`` is responsible for matching the header part of the HTTP request. It is compatible with RE2's regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request +`` is responsible for matching the header part of the HTTP request. It is compatible with RE2’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request `` value is a predefined query of ``, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration. -`` supports setting Settings and query_params values. +`` supports setting Settings and query\_params values. The following example defines the values of `max_threads` and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully. Example: -```xml +``` xml - - GET - - TEST_HEADER_VALUE - [^/]+)(/(?P[^/]+))?]]> - - [^/]+)(/(?P[^/]+))?]]> - - SELECT value FROM system.settings WHERE name = {name_1:String} - SELECT name, value FROM system.settings WHERE name = {name_2:String} - - + + GET + + TEST_HEADER_VALUE + [^/]+)(/(?P[^/]+))?]]> + + [^/]+)(/(?P[^/]+))?]]> + + SELECT value FROM system.settings WHERE name = {name_1:String} + SELECT name, value FROM system.settings WHERE name = {name_2:String} + + ``` -```bash +``` bash $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2' 1 -max_alter_threads 2 +max_alter_threads 2 ``` !!! note "Note" In one ``, one `` only supports one `` of an insert type. -## dynamic_query_handler +## dynamic\_query\_handler {#dynamic_query_handler} -`` than `` increased `` . +`` than `` increased `` . ClickHouse extracts and executes the value corresponding to the `` value in the url of the HTTP request. ClickHouse default setting `` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in. -To experiment with this functionality, the example defines the values of max_threads and max_alter_threads and queries whether the Settings were set successfully. +To experiment with this functionality, the example defines the values of max\_threads and max\_alter\_threads and queries whether the Settings were set successfully. The difference is that in ``, query is wrote in the configuration file. But in ``, query is written in the form of param of the HTTP request. Example: -```xml +``` xml - - - TEST_HEADER_VALUE_DYNAMIC - [^/]+)(/(?P[^/]+))?]]> - - query_param - + + + TEST_HEADER_VALUE_DYNAMIC + [^/]+)(/(?P[^/]+))?]]> + + query_param + ``` -```bash +``` bash $ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/?query_param=SELECT%20value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D&max_threads=1&max_alter_threads=2¶m_name_2=max_alter_threads' 1 2 ``` -[Original article](https://clickhouse.tech/docs/en/interfaces/http_interface/) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/interfaces/http_interface/) diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index 5f469079d54..9f9d7f89a87 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -3,7 +3,7 @@ toc_priority: 20 toc_title: MySQL Interface --- -# MySQL interface {#mysql-interface} +# MySQL Interface {#mysql-interface} ClickHouse supports MySQL wire protocol. It can be enabled by [mysql\_port](../operations/server_configuration_parameters/settings.md#server_configuration_parameters-mysql_port) setting in configuration file: diff --git a/docs/en/interfaces/third-party/client_libraries.md b/docs/en/interfaces/third-party/client_libraries.md index f408e4e0193..b049c37641c 100644 --- a/docs/en/interfaces/third-party/client_libraries.md +++ b/docs/en/interfaces/third-party/client_libraries.md @@ -37,7 +37,7 @@ toc_title: Client Libraries - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord) - R - [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r) - - [RClickhouse](https://github.com/IMSMWU/RClickhouse) + - [RClickHouse](https://github.com/IMSMWU/RClickHouse) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) - [clickhouse-client](https://github.com/Ecwid/clickhouse-client) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index ab84b3f4c19..ff0b7fe2e19 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -74,7 +74,7 @@ toc_title: Integrations - [pandahouse](https://github.com/kszucs/pandahouse) - R - [dplyr](https://db.rstudio.com/dplyr/) - - [RClickhouse](https://github.com/IMSMWU/RClickhouse) (uses [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp)) + - [RClickHouse](https://github.com/IMSMWU/RClickHouse) (uses [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp)) - Java - [Hadoop](http://hadoop.apache.org) - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (uses [JDBC](../../sql_reference/table_functions/jdbc.md)) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 9a1103c19a5..f7006ae15c8 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -9,7 +9,7 @@ toc_title: Adopters The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful. | Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | -|-----------------------------------------------|---------------------|---------------|------------------------------------|------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------| +|-----------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | [2gis](https://2gis.ru) | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | | [Aloha Browser](https://alohabrowser.com/) | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://github.com/yandex/clickhouse-presentations/blob/master/meetup22/aloha.pdf) | | [Amadeus](https://amadeus.com/) | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | @@ -20,7 +20,7 @@ toc_title: Adopters | [Bloomberg](https://www.bloomberg.com/) | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | | [Bloxy](https://bloxy.info) | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | | `Dataliance/UltraPower` | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | -| [CARTO](https://carto.com/) | Business Intelligence | Geo analytics | — | — | [Geospatial processing with Clickhouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | +| [CARTO](https://carto.com/) | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | | [CERN](http://public.web.cern.ch/public/) | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | | [Cisco](http://cisco.com/) | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | | [Citadel Securities](https://www.citadelsecurities.com/) | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | @@ -64,7 +64,7 @@ toc_title: Adopters | [Splunk](https://www.splunk.com/) | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | | [Spotify](https://www.spotify.com) | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | | [Tencent](https://www.tencent.com) | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | -| [Uber](https://www.uber.com) | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | +| [Uber](https://www.uber.com) | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | | [VKontakte](https://vk.com) | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | | [Wisebits](https://wisebits.com/) | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | | [Xiaoxin Tech.](https://www.xiaoheiban.cn/) | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | diff --git a/docs/en/introduction/distinctive_features.md b/docs/en/introduction/distinctive_features.md index 9704c654723..31770b0bf79 100644 --- a/docs/en/introduction/distinctive_features.md +++ b/docs/en/introduction/distinctive_features.md @@ -60,7 +60,7 @@ ClickHouse provides various ways to trade accuracy for performance: 2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk. 3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources. -## Data replication and data integrity support {#data-replication-and-data-integrity-support} +## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support} ClickHouse uses asynchronous multi-master replication. After being written to any available replica, all the remaining replicas retrieve their copy in the background. The system maintains identical data on different replicas. Recovery after most failures is performed automatically, or semi-automatically in complex cases. diff --git a/docs/en/introduction/features_considered_disadvantages.md b/docs/en/introduction/features_considered_disadvantages.md index 597dad30663..e295b5570ab 100644 --- a/docs/en/introduction/features_considered_disadvantages.md +++ b/docs/en/introduction/features_considered_disadvantages.md @@ -3,7 +3,7 @@ toc_priority: 5 toc_title: ClickHouse Features that Can Be Considered Disadvantages --- -# ClickHouse Features that Can be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages} +# ClickHouse Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages} 1. No full-fledged transactions. 2. Lack of ability to modify or delete already inserted data with high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example to comply with [GDPR](https://gdpr-info.eu). diff --git a/docs/en/introduction/index.md b/docs/en/introduction/index.md index 1a089551261..ba80f9c2640 100644 --- a/docs/en/introduction/index.md +++ b/docs/en/introduction/index.md @@ -3,3 +3,4 @@ toc_folder_title: Introduction toc_priority: 1 --- + diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 9510ef4b709..27418dff884 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -28,7 +28,7 @@ Some local filesystems provide snapshot functionality (for example, [ZFS](https: For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well. -## Manipulations With Parts {#manipulations-with-parts} +## Manipulations with Parts {#manipulations-with-parts} ClickHouse allows using the `ALTER TABLE ... FREEZE PARTITION ...` query to create a local copy of table partitions. This is implemented using hardlinks to the `/var/lib/clickhouse/shadow/` folder, so it usually does not consume extra disk space for old data. The created copies of files are not handled by ClickHouse server, so you can just leave them there: you will have a simple backup that doesn’t require any additional external system, but it will still be prone to hardware issues. For this reason, it’s better to remotely copy them to another location and then remove the local copies. Distributed filesystems and object stores are still a good options for this, but normal attached file servers with a large enough capacity might work as well (in this case the transfer will occur via the network filesystem or maybe [rsync](https://en.wikipedia.org/wiki/Rsync)). diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index 2e848dbb313..363e9cc4bff 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -22,7 +22,7 @@ It is highly recommended to set up monitoring for: - Utilization of storage system, RAM and network. -## Clickhouse Server Metrics {#clickhouse-server-metrics} +## ClickHouse Server Metrics {#clickhouse-server-metrics} ClickHouse server has embedded instruments for self-state monitoring. diff --git a/docs/en/operations/performance_test.md b/docs/en/operations/performance_test.md index 2af28147dbe..d955b50fa02 100644 --- a/docs/en/operations/performance_test.md +++ b/docs/en/operations/performance_test.md @@ -3,7 +3,7 @@ toc_priority: 54 toc_title: Testing Hardware --- -# How To Test Your Hardware With ClickHouse {#how-to-test-your-hardware-with-clickhouse} +# How to Test Your Hardware with ClickHouse {#how-to-test-your-hardware-with-clickhouse} With this instruction you can run basic ClickHouse performance test on any server without installation of ClickHouse packages. @@ -24,7 +24,7 @@ With this instruction you can run basic ClickHouse performance test on any serve # Then do: chmod a+x clickhouse -5. Download configs: +1. Download configs: @@ -34,7 +34,7 @@ With this instruction you can run basic ClickHouse performance test on any serve wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml -6. Download benchmark files: +1. Download benchmark files: @@ -42,7 +42,7 @@ With this instruction you can run basic ClickHouse performance test on any serve chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql -7. Download test data according to the [Yandex.Metrica dataset](../getting_started/example_datasets/metrica.md) instruction (“hits” table containing 100 million rows). +1. Download test data according to the [Yandex.Metrica dataset](../getting_started/example_datasets/metrica.md) instruction (“hits” table containing 100 million rows). @@ -50,31 +50,31 @@ With this instruction you can run basic ClickHouse performance test on any serve tar xvf hits_100m_obfuscated_v1.tar.xz -C . mv hits_100m_obfuscated_v1/* . -8. Run the server: +1. Run the server: ./clickhouse server -9. Check the data: ssh to the server in another terminal +1. Check the data: ssh to the server in another terminal ./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated" 100000000 -10. Edit the benchmark-new.sh, change “clickhouse-client” to “./clickhouse client” and add “–max\_memory\_usage 100000000000” parameter. +1. Edit the benchmark-new.sh, change “clickhouse-client” to “./clickhouse client” and add “–max\_memory\_usage 100000000000” parameter. mcedit benchmark-new.sh -11. Run the benchmark: +1. Run the benchmark: ./benchmark-new.sh hits_100m_obfuscated -12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com +1. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com -All the results are published here: https://clickhouse.tech/benchmark_hardware.html +All the results are published here: https://clickhouse.tech/benchmark\_hardware.html diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 1c7d9546ff8..8905ca14569 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -147,27 +147,68 @@ This system table is used for implementing the `SHOW DATABASES` query. Contains information about detached parts of [MergeTree](../engines/table_engines/mergetree_family/mergetree.md) tables. The `reason` column specifies why the part was detached. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../sql_reference/statements/alter.md#alter_attach-partition) command. For the description of other columns, see [system.parts](#system_tables-parts). If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../sql_reference/statements/alter.md#alter_drop-detached). -## system.dictionaries {#system-dictionaries} +## system.dictionaries {#system_tables-dictionaries} -Contains information about external dictionaries. +Contains information about [external dictionaries](../sql_reference/dictionaries/external_dictionaries/external_dicts.md). Columns: -- `name` (String) — Dictionary name. -- `type` (String) — Dictionary type: Flat, Hashed, Cache. -- `origin` (String) — Path to the configuration file that describes the dictionary. -- `attribute.names` (Array(String)) — Array of attribute names provided by the dictionary. -- `attribute.types` (Array(String)) — Corresponding array of attribute types that are provided by the dictionary. -- `has_hierarchy` (UInt8) — Whether the dictionary is hierarchical. -- `bytes_allocated` (UInt64) — The amount of RAM the dictionary uses. -- `hit_rate` (Float64) — For cache dictionaries, the percentage of uses for which the value was in the cache. -- `element_count` (UInt64) — The number of items stored in the dictionary. -- `load_factor` (Float64) — The percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). -- `creation_time` (DateTime) — The time when the dictionary was created or last successfully reloaded. -- `last_exception` (String) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. -- `source` (String) — Text describing the data source for the dictionary. +- `database` ([String](../sql_reference/data_types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. +- `name` ([String](../sql_reference/data_types/string.md)) — [Dictionary name](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md). +- `status` ([Enum8](../sql_reference/data_types/enum.md)) — Dictionary status. Possible values: + - `NOT_LOADED` — Dictionary was not loaded because it was not used. + - `LOADED` — Dictionary loaded successfully. + - `FAILED` — Unable to load the dictionary as a result of an error. + - `LOADING` — Dictionary is loading now. + - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../sql_reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). + - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. +- `origin` ([String](../sql_reference/data_types/string.md)) — Path to the configuration file that describes the dictionary. +- `type` ([String](../sql_reference/data_types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md). +- `key` — [Key type](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-key): Numeric Key ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) or Сomposite key ([String](../sql_reference/data_types/string.md)) — form "(type 1, type 2, ..., type n)". +- `attribute.names` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Array of [attribute names](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes) provided by the dictionary. +- `attribute.types` ([Array](../sql_reference/data_types/array.md)([String](../sql_reference/data_types/string.md))) — Corresponding array of [attribute types](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md#ext_dict_structure-attributes) that are provided by the dictionary. +- `bytes_allocated` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. +- `query_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. +- `hit_rate` ([Float64](../sql_reference/data_types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. +- `element_count` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Number of items stored in the dictionary. +- `load_factor` ([Float64](../sql_reference/data_types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). +- `source` ([String](../sql_reference/data_types/string.md)) — Text describing the [data source](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md) for the dictionary. +- `lifetime_min` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Minimum [lifetime](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `lifetime_max` ([UInt64](../sql_reference/data_types/int_uint.md#uint-ranges)) — Maximum [lifetime](../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `loading_start_time` ([DateTime](../sql_reference/data_types/datetime.md)) — Start time for loading the dictionary. +- `last_successful_update_time` ([DateTime](../sql_reference/data_types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes. +- `loading_duration` ([Float32](../sql_reference/data_types/float.md)) — Duration of a dictionary loading. +- `last_exception` ([String](../sql_reference/data_types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn't be created. -Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. + +**Example** + +Configure the dictionary. + +```sql +CREATE DICTIONARY dictdb.dict +( + `key` Int64 DEFAULT -1, + `value_default` String DEFAULT 'world', + `value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)' +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb')) +LIFETIME(MIN 0 MAX 1) +LAYOUT(FLAT()) +``` + +Make sure that the dictionary is loaded. + +```sql +SELECT * FROM system.dictionaries +``` + +```text +┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐ +│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │ +└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ +``` ## system.events {#system_tables-events} diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index 3cfcee39a22..b6afedfca7e 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -12,12 +12,12 @@ toc_title: Troubleshooting ## Installation {#troubleshooting-installation-errors} -### You Cannot Get Deb Packages From Clickhouse Repository With Apt-get {#you-cannot-get-deb-packages-from-clickhouse-repository-with-apt-get} +### You Cannot Get Deb Packages from ClickHouse Repository with Apt-get {#you-cannot-get-deb-packages-from-clickhouse-repository-with-apt-get} - Check firewall settings. - If you cannot access the repository for any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. -## Connecting To the Server {#troubleshooting-accepts-no-connections} +## Connecting to the Server {#troubleshooting-accepts-no-connections} Possible issues: @@ -137,7 +137,7 @@ If you start `clickhouse-client` with the `stack-trace` parameter, ClickHouse re You might see a message about a broken connection. In this case, you can repeat the query. If the connection breaks every time you perform the query, check the server logs for errors. -## Efficiency Of Query Processing {#troubleshooting-too-slow} +## Efficiency of Query Processing {#troubleshooting-too-slow} If you see that ClickHouse is working too slowly, you need to profile the load on the server resources and network for your queries. diff --git a/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md b/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md index 645c6347f66..97d5b6e4474 100644 --- a/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md +++ b/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md @@ -49,6 +49,11 @@ or LIFETIME(MIN 300 MAX 360) ``` +If `0` and `0`, ClickHouse does not reload the dictionary by timeout. +In this case, ClickHouse can reload the dictionary earlier if the dictionary configuration file was changed or the `SYSTEM RELOAD DICTIONARY` command was executed. + +When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](external_dicts_dict_sources.md): + When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](external_dicts_dict_sources.md): - For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. diff --git a/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md b/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md index 174341f697e..1d3b78635d0 100644 --- a/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md +++ b/docs/en/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md @@ -488,7 +488,7 @@ SOURCE(MYSQL( )) ``` -### Clickhouse {#dicts-external_dicts_dict_sources-clickhouse} +### ClickHouse {#dicts-external_dicts_dict_sources-clickhouse} Example of settings: diff --git a/docs/en/sql_reference/operators.md b/docs/en/sql_reference/operators.md index 6414a1d6469..418a9e32771 100644 --- a/docs/en/sql_reference/operators.md +++ b/docs/en/sql_reference/operators.md @@ -58,7 +58,7 @@ Groups of operators are listed in order of priority (the higher it is in the lis `a NOT BETWEEN b AND c` – The same as `a < b OR a > c`. -## Operators For Working With Data Sets {#operators-for-working-with-data-sets} +## Operators for Working with Data Sets {#operators-for-working-with-data-sets} *See [IN operators](statements/select.md#select-in-operators).* @@ -70,7 +70,7 @@ Groups of operators are listed in order of priority (the higher it is in the lis `a GLOBAL NOT IN ...` – The `globalNotIn(a, b)` function. -## Operators For Working With Dates and Times {#operators-datetime} +## Operators for Working with Dates and Times {#operators-datetime} ### EXTRACT {#operator-extract} @@ -231,7 +231,7 @@ Sometimes this doesn’t work the way you expect. For example, `SELECT 4 > 2 > 3 For efficiency, the `and` and `or` functions accept any number of arguments. The corresponding chains of `AND` and `OR` operators are transformed to a single call of these functions. -## Checking For `NULL` {#checking-for-null} +## Checking for `NULL` {#checking-for-null} ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. diff --git a/docs/en/whats_new/changelog/2017.md b/docs/en/whats_new/changelog/2017.md index 68744e5334a..df632d72d83 100644 --- a/docs/en/whats_new/changelog/2017.md +++ b/docs/en/whats_new/changelog/2017.md @@ -152,7 +152,7 @@ This release contains bug fixes for the previous release 1.1.54276: - Fixed parsing when inserting in RowBinary format if input data starts with’;’. - Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). -### Clickhouse Release 1.1.54276, 2017-08-16 {#clickhouse-release-1-1-54276-2017-08-16} +### ClickHouse Release 1.1.54276, 2017-08-16 {#clickhouse-release-1-1-54276-2017-08-16} #### New features: {#new-features-4} diff --git a/docs/en/whats_new/changelog/2018.md b/docs/en/whats_new/changelog/2018.md index f8eef5a911d..108332ce56b 100644 --- a/docs/en/whats_new/changelog/2018.md +++ b/docs/en/whats_new/changelog/2018.md @@ -839,7 +839,7 @@ toc_title: '2018' - Restored the behavior for queries like `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` when the right side of the `IN` should use a remote `default.table` instead of a local one. This behavior was broken in version 1.1.54358. - Removed extraneous error-level logging of `Not found column ... in block`. -### Clickhouse Release 1.1.54362, 2018-03-11 {#clickhouse-release-1-1-54362-2018-03-11} +### ClickHouse Release 1.1.54362, 2018-03-11 {#clickhouse-release-1-1-54362-2018-03-11} #### New features: {#new-features-16} @@ -929,13 +929,13 @@ toc_title: '2018' - Removed the `strict_insert_defaults` setting. If you were using this functionality, write to `clickhouse-feedback@yandex-team.com`. - Removed the `UnsortedMergeTree` engine. -### Clickhouse Release 1.1.54343, 2018-02-05 {#clickhouse-release-1-1-54343-2018-02-05} +### ClickHouse Release 1.1.54343, 2018-02-05 {#clickhouse-release-1-1-54343-2018-02-05} - Added macros support for defining cluster names in distributed DDL queries and constructors of Distributed tables: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. - Now queries like `SELECT ... FROM table WHERE expr IN (subquery)` are processed using the `table` index. - Improved processing of duplicates when inserting to Replicated tables, so they no longer slow down execution of the replication queue. -### Clickhouse Release 1.1.54342, 2018-01-22 {#clickhouse-release-1-1-54342-2018-01-22} +### ClickHouse Release 1.1.54342, 2018-01-22 {#clickhouse-release-1-1-54342-2018-01-22} This release contains bug fixes for the previous release 1.1.54337: @@ -947,7 +947,7 @@ This release contains bug fixes for the previous release 1.1.54337: - Buffer tables now work correctly when MATERIALIZED columns are present in the destination table (by zhang2014). - Fixed a bug in implementation of NULL. -### Clickhouse Release 1.1.54337, 2018-01-18 {#clickhouse-release-1-1-54337-2018-01-18} +### ClickHouse Release 1.1.54337, 2018-01-18 {#clickhouse-release-1-1-54337-2018-01-18} #### New features: {#new-features-17} diff --git a/docs/en/whats_new/changelog/2019.md b/docs/en/whats_new/changelog/2019.md index ec2c14055cb..5b0405f0b0a 100644 --- a/docs/en/whats_new/changelog/2019.md +++ b/docs/en/whats_new/changelog/2019.md @@ -128,11 +128,11 @@ toc_title: '2019' ## ClickHouse release v19.16 {#clickhouse-release-v19-16} -#### Clickhouse release v19.16.14.65, 2020-03-25 +#### ClickHouse release v19.16.14.65, 2020-03-25 * Fixed up a bug in batched calculations of ternary logical OPs on multiple arguments (more than 10). [#8718](https://github.com/ClickHouse/ClickHouse/pull/8718) ([Alexander Kazakov](https://github.com/Akazz)) This bugfix was backported to version 19.16 by a special request from Altinity. -#### Clickhouse release v19.16.14.65, 2020-03-05 {#clickhouse-release-v19-16-14-65-2020-03-05} +#### ClickHouse release v19.16.14.65, 2020-03-05 {#clickhouse-release-v19-16-14-65-2020-03-05} - Fix distributed subqueries incompatibility with older CH versions. Fixes [\#7851](https://github.com/ClickHouse/ClickHouse/issues/7851) [(tabplubix)](https://github.com/tavplubix) @@ -1865,7 +1865,7 @@ toc_title: '2019' - Аdded tool for converting an old month-partitioned part to the custom-partitioned format. [\#4195](https://github.com/ClickHouse/ClickHouse/pull/4195) ([Alex Zatelepin](https://github.com/ztlpn)) - Added docs about two datasets in s3. [\#4144](https://github.com/ClickHouse/ClickHouse/pull/4144) ([alesapin](https://github.com/alesapin)) - Added script which creates changelog from pull requests description. [\#4169](https://github.com/ClickHouse/ClickHouse/pull/4169) [\#4173](https://github.com/ClickHouse/ClickHouse/pull/4173) ([KochetovNicolai](https://github.com/KochetovNicolai)) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Added puppet module for Clickhouse. [\#4182](https://github.com/ClickHouse/ClickHouse/pull/4182) ([Maxim Fedotov](https://github.com/MaxFedotov)) +- Added puppet module for ClickHouse. [\#4182](https://github.com/ClickHouse/ClickHouse/pull/4182) ([Maxim Fedotov](https://github.com/MaxFedotov)) - Added docs for a group of undocumented functions. [\#4168](https://github.com/ClickHouse/ClickHouse/pull/4168) ([Winter Zhang](https://github.com/zhang2014)) - ARM build fixes. [\#4210](https://github.com/ClickHouse/ClickHouse/pull/4210)[\#4306](https://github.com/ClickHouse/ClickHouse/pull/4306) [\#4291](https://github.com/ClickHouse/ClickHouse/pull/4291) ([proller](https://github.com/proller)) ([proller](https://github.com/proller)) - Dictionary tests now able to run from `ctest`. [\#4189](https://github.com/ClickHouse/ClickHouse/pull/4189) ([proller](https://github.com/proller)) diff --git a/docs/en/whats_new/index.md b/docs/en/whats_new/index.md index 3b8886a9f0a..0901166b887 100644 --- a/docs/en/whats_new/index.md +++ b/docs/en/whats_new/index.md @@ -3,3 +3,4 @@ toc_folder_title: What's New toc_priority: 72 --- + diff --git a/docs/es/development/tests.md b/docs/es/development/tests.md index 2d1996e0535..388931e9436 100644 --- a/docs/es/development/tests.md +++ b/docs/es/development/tests.md @@ -13,7 +13,7 @@ Las pruebas funcionales son las más simples y cómodas de usar. La mayoría de Cada prueba funcional envía una o varias consultas al servidor ClickHouse en ejecución y compara el resultado con la referencia. -Las pruebas se encuentran en `testsies` directorio. Hay dos subdirectorios: `stateless` y `stateful`. Las pruebas sin estado ejecutan consultas sin datos de prueba precargados: a menudo crean pequeños conjuntos de datos sintéticos sobre la marcha, dentro de la prueba misma. Las pruebas estatales requieren datos de prueba precargados de Yandex.Métrica y no está disponible para el público en general. Tendemos a usar sólo `stateless` pruebas y evitar la adición de nuevos `stateful` prueba. +Las pruebas se encuentran en `queries` directorio. Hay dos subdirectorios: `stateless` y `stateful`. Las pruebas sin estado ejecutan consultas sin datos de prueba precargados: a menudo crean pequeños conjuntos de datos sintéticos sobre la marcha, dentro de la prueba misma. Las pruebas estatales requieren datos de prueba precargados de Yandex.Métrica y no está disponible para el público en general. Tendemos a usar sólo `stateless` pruebas y evitar la adición de nuevos `stateful` prueba. Cada prueba puede ser de dos tipos: `.sql` y `.sh`. `.sql` test es el script SQL simple que se canaliza a `clickhouse-client --multiquery --testmode`. `.sh` test es un script que se ejecuta por sí mismo. @@ -21,7 +21,7 @@ Para ejecutar todas las pruebas, use `testskhouse-test` herramienta. Mira `--hel La forma más sencilla de invocar pruebas funcionales es copiar `clickhouse-client` a `/usr/bin/`, ejecutar `clickhouse-server` y luego ejecutar `./clickhouse-test` de su propio directorio. -Para agregar una nueva prueba, cree un `.sql` o `.sh` archivo en `testsies/0_stateless` directorio, compruébelo manualmente y luego genere `.reference` archivo de la siguiente manera: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` o `./00000_test.sh > ./00000_test.reference`. +Para agregar una nueva prueba, cree un `.sql` o `.sh` archivo en `queries/0_stateless` directorio, compruébelo manualmente y luego genere `.reference` archivo de la siguiente manera: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` o `./00000_test.sh > ./00000_test.reference`. Las pruebas deben usar (crear, soltar, etc.) solo tablas en `test` base de datos que se supone que se crea de antemano; también las pruebas pueden usar tablas temporales. @@ -36,7 +36,7 @@ deshabilitar estos grupos de pruebas utilizando `--no-zookeeper`, `--no-shard` y ## Bugs conocidos {#known-bugs} -Si conocemos algunos errores que se pueden reproducir fácilmente mediante pruebas funcionales, colocamos pruebas funcionales preparadas en `testsies/bugs` directorio. Estas pruebas se moverán a `teststests_stateless` cuando se corrigen errores. +Si conocemos algunos errores que se pueden reproducir fácilmente mediante pruebas funcionales, colocamos pruebas funcionales preparadas en `queries/bugs` directorio. Estas pruebas se moverán a `teststests_stateless` cuando se corrigen errores. ## Pruebas de integración {#integration-tests} diff --git a/docs/fa/development/tests.md b/docs/fa/development/tests.md index 86bcd4d53c2..874ac3063b9 100644 --- a/docs/fa/development/tests.md +++ b/docs/fa/development/tests.md @@ -14,7 +14,7 @@ toc_title: "\u0646\u062D\u0648\u0647 \u0627\u062C\u0631\u0627\u06CC \u062A\u0633 هر تست عملکردی یک یا چند نمایش داده شد به سرور در حال اجرا تاتر می فرستد و نتیجه را با مرجع مقایسه می کند. -تست ها در واقع `testsies` فهرست راهنما. دو زیرشاخه وجود دارد: `stateless` و `stateful`. تست های بدون تابعیت بدون هیچ گونه داده های تست پیش بارگذاری شده نمایش داده می شوند-اغلب مجموعه داده های مصنوعی کوچک را در پرواز در داخل تست خود ایجاد می کنند. تست های نفرت انگیز نیاز به داده های تست از قبل نصب شده از یاندکس.متریکا و در دسترس عموم نیست. ما تمایل به استفاده از تنها `stateless` تست ها و جلوگیری از اضافه کردن جدید `stateful` تستها +تست ها در واقع `queries` فهرست راهنما. دو زیرشاخه وجود دارد: `stateless` و `stateful`. تست های بدون تابعیت بدون هیچ گونه داده های تست پیش بارگذاری شده نمایش داده می شوند-اغلب مجموعه داده های مصنوعی کوچک را در پرواز در داخل تست خود ایجاد می کنند. تست های نفرت انگیز نیاز به داده های تست از قبل نصب شده از یاندکس.متریکا و در دسترس عموم نیست. ما تمایل به استفاده از تنها `stateless` تست ها و جلوگیری از اضافه کردن جدید `stateful` تستها هر تست می تواند یکی از دو نوع باشد: `.sql` و `.sh`. `.sql` تست اسکریپت ساده مربع است که به لوله کشی است `clickhouse-client --multiquery --testmode`. `.sh` تست یک اسکریپت است که به خودی خود اجرا است. @@ -22,7 +22,7 @@ toc_title: "\u0646\u062D\u0648\u0647 \u0627\u062C\u0631\u0627\u06CC \u062A\u0633 ساده ترین راه برای فراخوانی تست های کاربردی کپی است `clickhouse-client` به `/usr/bin/` فرار کن `clickhouse-server` و سپس اجرا کنید `./clickhouse-test` از دایرکتوری خود را. -برای اضافه کردن تست جدید, ایجاد یک `.sql` یا `.sh` پرونده در `testsies/0_stateless` فهرست راهنما را به صورت دستی بررسی کنید و سپس تولید کنید `.reference` پرونده به روش زیر: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` یا `./00000_test.sh > ./00000_test.reference`. +برای اضافه کردن تست جدید, ایجاد یک `.sql` یا `.sh` پرونده در `queries/0_stateless` فهرست راهنما را به صورت دستی بررسی کنید و سپس تولید کنید `.reference` پرونده به روش زیر: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` یا `./00000_test.sh > ./00000_test.reference`. تست باید استفاده کنید (ساختن, قطره, و غیره) تنها جداول در `test` پایگاه داده است که فرض بر این است که از قبل ایجاد می شود; همچنین تست می توانید جداول موقت استفاده. @@ -37,7 +37,7 @@ toc_title: "\u0646\u062D\u0648\u0647 \u0627\u062C\u0631\u0627\u06CC \u062A\u0633 ## اشکالات شناخته شده {#known-bugs} -اگر ما می دانیم برخی از اشکالات است که می تواند به راحتی توسط تست های کاربردی تکثیر, ما تست های عملکردی تهیه شده در `testsies/bugs` فهرست راهنما. این تست خواهد شد به نقل مکان کرد `teststests_stateless` هنگامی که اشکالات ثابت هستند. +اگر ما می دانیم برخی از اشکالات است که می تواند به راحتی توسط تست های کاربردی تکثیر, ما تست های عملکردی تهیه شده در `queries/bugs` فهرست راهنما. این تست خواهد شد به نقل مکان کرد `teststests_stateless` هنگامی که اشکالات ثابت هستند. ## تست های ادغام {#integration-tests} diff --git a/docs/fr/development/tests.md b/docs/fr/development/tests.md index 9c79c65ba9d..e5c8a50fa31 100644 --- a/docs/fr/development/tests.md +++ b/docs/fr/development/tests.md @@ -13,7 +13,7 @@ Les tests fonctionnels sont les plus simples et pratiques à utiliser. La plupar Chaque test fonctionnel envoie une ou plusieurs requêtes au serveur clickhouse en cours d'exécution et compare le résultat avec la référence. -Les Tests sont situés dans `testsies` répertoire. Il y a deux sous-répertoires: `stateless` et `stateful`. Les tests sans état exécutent des requêtes sans données de test préchargées - ils créent souvent de petits ensembles de données synthétiques à la volée, dans le test lui-même. Les tests avec État nécessitent des données de test préchargées de Yandex.Metrica et non disponible pour le grand public. Nous avons tendance à utiliser uniquement `stateless` tests et éviter d'ajouter de nouveaux `stateful` test. +Les Tests sont situés dans `queries` répertoire. Il y a deux sous-répertoires: `stateless` et `stateful`. Les tests sans état exécutent des requêtes sans données de test préchargées - ils créent souvent de petits ensembles de données synthétiques à la volée, dans le test lui-même. Les tests avec État nécessitent des données de test préchargées de Yandex.Metrica et non disponible pour le grand public. Nous avons tendance à utiliser uniquement `stateless` tests et éviter d'ajouter de nouveaux `stateful` test. Chaque test peut être de deux types: `.sql` et `.sh`. `.sql` test est le script SQL simple qui est canalisé vers `clickhouse-client --multiquery --testmode`. `.sh` test est un script qui est exécuté par lui-même. @@ -21,7 +21,7 @@ Pour exécuter tous les tests, utilisez `testskhouse-test` outil. Regarder `--he Le moyen le plus simple d'invoquer des tests fonctionnels est de copier `clickhouse-client` de `/usr/bin/`, exécuter `clickhouse-server` et puis exécutez `./clickhouse-test` à partir de son propre répertoire. -Pour ajouter un nouveau test, créez un `.sql` ou `.sh` fichier dans `testsies/0_stateless` répertoire, vérifiez-le manuellement, puis générez `.reference` fichier de la façon suivante: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` ou `./00000_test.sh > ./00000_test.reference`. +Pour ajouter un nouveau test, créez un `.sql` ou `.sh` fichier dans `queries/0_stateless` répertoire, vérifiez-le manuellement, puis générez `.reference` fichier de la façon suivante: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` ou `./00000_test.sh > ./00000_test.reference`. Les Tests doivent utiliser (create, drop, etc) uniquement des tables dans `test` base de données supposée être créée au préalable; les tests peuvent également utiliser des tables temporaires. @@ -36,7 +36,7 @@ désactivez ces groupes de tests en utilisant `--no-zookeeper`, `--no-shard` et ## Bugs connus {#known-bugs} -Si nous connaissons des bugs qui peuvent être facilement reproduits par des tests fonctionnels, nous plaçons des tests fonctionnels préparés dans `testsies/bugs` répertoire. Ces tests seront déplacés à `teststests_stateless` quand les bugs sont corrigés. +Si nous connaissons des bugs qui peuvent être facilement reproduits par des tests fonctionnels, nous plaçons des tests fonctionnels préparés dans `queries/bugs` répertoire. Ces tests seront déplacés à `teststests_stateless` quand les bugs sont corrigés. ## Les Tests D'Intégration {#integration-tests} diff --git a/docs/ja/development/tests.md b/docs/ja/development/tests.md index 80901a859e7..27b8870461e 100644 --- a/docs/ja/development/tests.md +++ b/docs/ja/development/tests.md @@ -13,7 +13,7 @@ toc_title: "ClickHouse\u30C6\u30B9\u30C8\u3092\u5B9F\u884C\u3059\u308B\u65B9\u6C 各機能テストは、実行中のclickhouseサーバーに一つまたは複数のクエリを送信し、参照と結果を比較します。 -テストは `testsies` ディレクトリ。 つのサブディレクトリがあります: `stateless` と `stateful`. ステートレステストでは、プリロードされたテストデータを使用せずにクエリを実行します。 ステートフルテストでは、Yandexのテストデータが必要です。メトリカと一般市民には利用できません。 我々は唯一の使用する傾向があります `stateless` テストと新しい追加を避ける `stateful` テスト +テストは `queries` ディレクトリ。 つのサブディレクトリがあります: `stateless` と `stateful`. ステートレステストでは、プリロードされたテストデータを使用せずにクエリを実行します。 ステートフルテストでは、Yandexのテストデータが必要です。メトリカと一般市民には利用できません。 我々は唯一の使用する傾向があります `stateless` テストと新しい追加を避ける `stateful` テスト それぞれの試験できるの種類: `.sql` と `.sh`. `.sql` testは、パイプ処理される単純なSQLスクリプトです `clickhouse-client --multiquery --testmode`. `.sh` テストは、単独で実行されるスクリプトです。 @@ -21,7 +21,7 @@ toc_title: "ClickHouse\u30C6\u30B9\u30C8\u3092\u5B9F\u884C\u3059\u308B\u65B9\u6C 機能テストを呼び出す最も簡単な方法は、コピーすることです `clickhouse-client` に `/usr/bin/`、実行 `clickhouse-server` そして、実行 `./clickhouse-test` 独自のディレクトリから。 -新しいテストを追加するには、 `.sql` または `.sh` ファイル `testsies/0_stateless` ディレクトリは、手動でチェックしてから生成 `.reference` 次の方法でファイル: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` または `./00000_test.sh > ./00000_test.reference`. +新しいテストを追加するには、 `.sql` または `.sh` ファイル `queries/0_stateless` ディレクトリは、手動でチェックしてから生成 `.reference` 次の方法でファイル: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` または `./00000_test.sh > ./00000_test.reference`. テストでは、(create、dropなど)テーブルのみを使用する必要があります `test` テストでは一時テーブルを使用することもできます。 @@ -36,7 +36,7 @@ toc_title: "ClickHouse\u30C6\u30B9\u30C8\u3092\u5B9F\u884C\u3059\u308B\u65B9\u6C ## 既知のバグ {#known-bugs} -機能テストで簡単に再現できるいくつかのバグを知っていれば、準備された機能テストを `testsies/bugs` ディレクトリ。 これらのテストはに移動されます `teststests_stateless` バグが修正されたとき。 +機能テストで簡単に再現できるいくつかのバグを知っていれば、準備された機能テストを `queries/bugs` ディレクトリ。 これらのテストはに移動されます `teststests_stateless` バグが修正されたとき。 ## 統合テスト {#integration-tests} diff --git a/docs/ru/database_engines/mysql.md b/docs/ru/database_engines/mysql.md index 420ca370297..45547407be6 100644 --- a/docs/ru/database_engines/mysql.md +++ b/docs/ru/database_engines/mysql.md @@ -6,8 +6,6 @@ Не поддерживаемые виды запросов: -- `ATTACH`/`DETACH` -- `DROP` - `RENAME` - `CREATE TABLE` - `ALTER` @@ -16,7 +14,7 @@ ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MySQL('host:port', 'database', 'user', 'password') +ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') ``` **Параметры движка** diff --git a/docs/ru/development/tests.md b/docs/ru/development/tests.md index c9181349a2b..c703d6cd5b3 100644 --- a/docs/ru/development/tests.md +++ b/docs/ru/development/tests.md @@ -10,7 +10,7 @@ Functional tests are the most simple and convenient to use. Most of ClickHouse f Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference. -Tests are located in `testsies` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. +Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and not available to general public. We tend to use only `stateless` tests and avoid adding new `stateful` tests. Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. @@ -18,7 +18,7 @@ To run all tests, use `testskhouse-test` tool. Look `--help` for the list of pos The most simple way to invoke functional tests is to copy `clickhouse-client` to `/usr/bin/`, run `clickhouse-server` and then run `./clickhouse-test` from its own directory. -To add new test, create a `.sql` or `.sh` file in `testsies/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. +To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`. Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables. @@ -33,7 +33,7 @@ disable these groups of tests using `--no-zookeeper`, `--no-shard` and ## Known bugs {#known-bugs} -If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `testsies/bugs` directory. These tests will be moved to `teststests_stateless` when bugs are fixed. +If we know some bugs that can be easily reproduced by functional tests, we place prepared functional tests in `queries/bugs` directory. These tests will be moved to `teststests_stateless` when bugs are fixed. ## Integration Tests {#integration-tests} diff --git a/docs/ru/operations/system_tables.md b/docs/ru/operations/system_tables.md index a8ccd369562..fac1e63264b 100644 --- a/docs/ru/operations/system_tables.md +++ b/docs/ru/operations/system_tables.md @@ -135,27 +135,66 @@ SELECT * FROM system.contributors WHERE name='Olga Khvostikova' Такие куски могут быть присоединены с помощью [ALTER TABLE ATTACH PARTITION\|PART](../query_language/query_language/alter/#alter_attach-partition). Остальные столбцы описаны в [system.parts](#system_tables-parts). Если имя куска некорректно, значения некоторых столбцов могут быть `NULL`. Такие куски могут быть удалены с помощью [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached). -## system.dictionaries {#system-dictionaries} +## system.dictionaries {#system_tables-dictionaries} -Содержит информацию о внешних словарях. +Содержит информацию о [внешних словарях](../query_language/dicts/external_dicts.md). Столбцы: -- `name String` — Имя словаря. -- `type String` — Тип словаря: Flat, Hashed, Cache. -- `origin String` — Путь к конфигурационному файлу, в котором описан словарь. -- `attribute.names Array(String)` — Массив имён атрибутов, предоставляемых словарём. -- `attribute.types Array(String)` — Соответствующий массив типов атрибутов, предоставляемых словарём. -- `has_hierarchy UInt8` — Является ли словарь иерархическим. -- `bytes_allocated UInt64` — Количество оперативной памяти, которое использует словарь. -- `hit_rate Float64` — Для cache-словарей - доля использований, для которых значение было в кэше. -- `element_count UInt64` — Количество хранящихся в словаре элементов. -- `load_factor Float64` — Доля заполненности словаря (для hashed словаря - доля заполнения хэш-таблицы). -- `creation_time DateTime` — Время создания или последней успешной перезагрузки словаря. -- `last_exception String` — Текст ошибки, возникшей при создании или перезагрузке словаря, если словарь не удалось создать. -- `source String` - Текст, описывающий источник данных для словаря. +- `database` ([String](../data_types/string.md)) — Имя базы данных, в которой находится словарь, созданный с помощью DDL-запроса. Пустая строка для других словарей. +- `name` ([String](../data_types/string.md)) — [Имя словаря](../query_language/dicts/external_dicts_dict.md). +- `status` ([Enum8](../data_types/enum.md)) — Статус словаря. Возможные значения: + - `NOT_LOADED` — Словарь не загружен, потому что не использовался. + - `LOADED` — Словарь загружен успешно. + - `FAILED` — Словарь не загружен в результате ошибки. + - `LOADING` — Словарь в процессе загрузки. + - `LOADED_AND_RELOADING` — Словарь загружен успешно, сейчас перезагружается (частые причины: запрос [SYSTEM RELOAD DICTIONARY](../query_language/system.md#query_language-system-reload-dictionary), таймаут, изменение настроек словаря). + - `FAILED_AND_RELOADING` — Словарь не загружен в результате ошибки, сейчас перезагружается. +- `origin` ([String](../data_types/string.md)) — Путь к конфигурационному файлу, описывающему словарь. +- `type` ([String](../data_types/string.md)) — Тип размещения словаря. [Хранение словарей в памяти](../query_language/dicts/external_dicts_dict_layout.md). +- `key` — [Тип ключа](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-key): Числовой ключ ([UInt64](../data_types/int_uint.md#uint-ranges)) или Составной ключ ([String](../data_types/string.md)) — строка вида "(тип 1, тип 2, ..., тип n)". +- `attribute.names` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Массив [имен атрибутов](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. +- `attribute.types` ([Array](../data_types/array.md)([String](../data_types/string.md))) — Соответствующий массив [типов атрибутов](../query_language/dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), предоставляемых справочником. +- `bytes_allocated` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Объем оперативной памяти, используемый словарем. +- `query_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Количество запросов с момента загрузки словаря или с момента последней успешной перезагрузки. +- `hit_rate` ([Float64](../data_types/float.md)) — Для cache-словарей — процент закэшированных значений. +- `element_count` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Количество элементов, хранящихся в словаре. +- `load_factor` ([Float64](../data_types/float.md)) — Процент заполнения словаря (для хэшированного словаря — процент заполнения хэш-таблицы). +- `source` ([String](../data_types/string.md)) — Текст, описывающий [источник данных](../query_language/dicts/external_dicts_dict_sources.md) для словаря. +- `lifetime_min` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Минимальное [время обновления](../query_language/dicts/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. +- `lifetime_max` ([UInt64](../data_types/int_uint.md#uint-ranges)) — Максимальное [время обновления](../query_language/dicts/external_dicts_dict_lifetime.md) словаря в памяти, по истечении которого Clickhouse попытается перезагрузить словарь (если задано `invalidate_query`, то только если он изменился). Задается в секундах. +- `loading_start_time` ([DateTime](../data_types/datetime.md)) — Время начала загрузки словаря. +- `loading_duration` ([Float32](../data_types/float.md)) — Время, затраченное на загрузку словаря. +- `last_exception` ([String](../data_types/string.md)) — Текст ошибки, возникающей при создании или перезагрузке словаря, если словарь не удалось создать. -Заметим, что количество оперативной памяти, которое использует словарь, не является пропорциональным количеству элементов, хранящихся в словаре. Так, для flat и cached словарей, все ячейки памяти выделяются заранее, независимо от реальной заполненности словаря. +**Пример** + +Настройте словарь. + +```sql +CREATE DICTIONARY dictdb.dict +( + `key` Int64 DEFAULT -1, + `value_default` String DEFAULT 'world', + `value_expression` String DEFAULT 'xxx' EXPRESSION 'toString(127 * 172)' +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'dicttbl' DB 'dictdb')) +LIFETIME(MIN 0 MAX 1) +LAYOUT(FLAT()) +``` + +Убедитесь, что словарь загружен. + +```sql +SELECT * FROM system.dictionaries +``` + +```text +┌─database─┬─name─┬─status─┬─origin──────┬─type─┬─key────┬─attribute.names──────────────────────┬─attribute.types─────┬─bytes_allocated─┬─query_count─┬─hit_rate─┬─element_count─┬───────────load_factor─┬─source─────────────────────┬─lifetime_min─┬─lifetime_max─┬──loading_start_time─┌──last_successful_update_time─┬──────loading_duration─┬─last_exception─┐ +│ dictdb │ dict │ LOADED │ dictdb.dict │ Flat │ UInt64 │ ['value_default','value_expression'] │ ['String','String'] │ 74032 │ 0 │ 1 │ 1 │ 0.0004887585532746823 │ ClickHouse: dictdb.dicttbl │ 0 │ 1 │ 2020-03-04 04:17:34 │ 2020-03-04 04:30:34 │ 0.002 │ │ +└──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ +``` ## system.events {#system_tables-events} diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 764472605ba..f8bbc983b74 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -494,14 +494,14 @@ ALTER TABLE example_table - /mnt/fast_ssd/clickhouse + /mnt/fast_ssd/clickhouse/ - /mnt/hdd1/clickhouse + /mnt/hdd1/clickhouse/ 10485760 - /mnt/hdd2/clickhouse + /mnt/hdd2/clickhouse/ 10485760 diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml deleted file mode 100644 index f4aa73d91da..00000000000 --- a/docs/toc_zh.yml +++ /dev/null @@ -1,251 +0,0 @@ -nav: - -- '介绍': - - '概貌': 'index.md' - - 'ClickHouse的独特功能': 'introduction/distinctive_features.md' - - 'ClickHouse功能可被视为缺点': 'introduction/features_considered_disadvantages.md' - - '性能': 'introduction/performance.md' - - '历史': 'introduction/history.md' - - '使用者': 'introduction/adopters.md' - -- '入门指南': - - 'hidden': 'getting_started/index.md' - - '安装': 'getting_started/install.md' - - '教程': 'getting_started/tutorial.md' - - '示例数据集': - - '介绍': 'getting_started/example_datasets/index.md' - - '航班飞行数据': 'getting_started/example_datasets/ontime.md' - - '纽约市出租车数据': 'getting_started/example_datasets/nyc_taxi.md' - - 'AMPLab大数据基准测试': 'getting_started/example_datasets/amplab_benchmark.md' - - '维基访问数据': 'getting_started/example_datasets/wikistat.md' - - 'Criteo TB级别点击日志': 'getting_started/example_datasets/criteo.md' - - 'Star Schema基准测试': 'getting_started/example_datasets/star_schema.md' - - 'Yandex.Metrica': 'getting_started/example_datasets/metrica.md' - - 'Playground': 'getting_started/playground.md' - -- '客户端': - - '介绍': 'interfaces/index.md' - - '命令行客户端接口': 'interfaces/cli.md' - - '原生客户端接口 (TCP)': 'interfaces/tcp.md' - - 'HTTP 客户端接口': 'interfaces/http.md' - - 'MySQL 客户端接口': 'interfaces/mysql.md' - - '输入输出格式': 'interfaces/formats.md' - - 'JDBC 驱动': 'interfaces/jdbc.md' - - 'ODBC 驱动': 'interfaces/odbc.md' - - 'C ++客户端库': 'interfaces/cpp.md' - - '第三方': - - '客户端库': 'interfaces/third-party/client_libraries.md' - - '集成': 'interfaces/third-party/integrations.md' - - '可视界面': 'interfaces/third-party/gui.md' - - '代理': 'interfaces/third-party/proxy.md' - -- '数据类型': - - '介绍': 'data_types/index.md' - - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md' - - 'Float32, Float64': 'data_types/float.md' - - 'Decimal': 'data_types/decimal.md' - - 'Boolean values': 'data_types/boolean.md' - - 'String': 'data_types/string.md' - - 'FixedString(N)': 'data_types/fixedstring.md' - - 'UUID': 'data_types/uuid.md' - - 'Date': 'data_types/date.md' - - 'DateTime64': 'data_types/datetime64.md' - - 'DateTime': 'data_types/datetime.md' - - 'Enum': 'data_types/enum.md' - - 'Array(T)': 'data_types/array.md' - - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md' - - 'Tuple(T1, T2, ...)': 'data_types/tuple.md' - - 'Nullable': 'data_types/nullable.md' - - '嵌套数据结构': - - 'hidden': 'data_types/nested_data_structures/index.md' - - 'Nested(Name1 Type1, Name2 Type2, ...)': 'data_types/nested_data_structures/nested.md' - - '特殊数据类型': - - 'hidden': 'data_types/special_data_types/index.md' - - 'Expression': 'data_types/special_data_types/expression.md' - - 'Set': 'data_types/special_data_types/set.md' - - 'Nothing': 'data_types/special_data_types/nothing.md' - - 'Interval': 'data_types/special_data_types/interval.md' - - 'Domain类型': - - '介绍': 'data_types/domains/overview.md' - - 'IPv4': 'data_types/domains/ipv4.md' - - 'IPv6': 'data_types/domains/ipv6.md' - -- '数据库引擎': - - '介绍': 'database_engines/index.md' - - 'MySQL': 'database_engines/mysql.md' - - 'Lazy': 'database_engines/lazy.md' - -- '表引擎': - - '介绍': 'operations/table_engines/index.md' - - 'MergeTree': - - 'MergeTree': 'operations/table_engines/mergetree.md' - - 'Data Replication': 'operations/table_engines/replication.md' - - 'Custom Partitioning Key': 'operations/table_engines/custom_partitioning_key.md' - - 'ReplacingMergeTree': 'operations/table_engines/replacingmergetree.md' - - 'SummingMergeTree': 'operations/table_engines/summingmergetree.md' - - 'AggregatingMergeTree': 'operations/table_engines/aggregatingmergetree.md' - - 'CollapsingMergeTree': 'operations/table_engines/collapsingmergetree.md' - - 'VersionedCollapsingMergeTree': 'operations/table_engines/versionedcollapsingmergetree.md' - - 'GraphiteMergeTree': 'operations/table_engines/graphitemergetree.md' - - 'Log': - - '介绍': 'operations/table_engines/log_family.md' - - 'StripeLog': 'operations/table_engines/stripelog.md' - - 'Log': 'operations/table_engines/log.md' - - 'TinyLog': 'operations/table_engines/tinylog.md' - - '外部表引擎': - - 'Kafka': 'operations/table_engines/kafka.md' - - 'MySQL': 'operations/table_engines/mysql.md' - - 'JDBC': 'operations/table_engines/jdbc.md' - - 'ODBC': 'operations/table_engines/odbc.md' - - 'HDFS': 'operations/table_engines/hdfs.md' - - '其他表引擎': - - 'Distributed': 'operations/table_engines/distributed.md' - - 'External data': 'operations/table_engines/external_data.md' - - 'Dictionary': 'operations/table_engines/dictionary.md' - - 'Merge': 'operations/table_engines/merge.md' - - 'File': 'operations/table_engines/file.md' - - 'Null': 'operations/table_engines/null.md' - - 'Set': 'operations/table_engines/set.md' - - 'Join': 'operations/table_engines/join.md' - - 'URL': 'operations/table_engines/url.md' - - 'View': 'operations/table_engines/view.md' - - 'MaterializedView': 'operations/table_engines/materializedview.md' - - 'Memory': 'operations/table_engines/memory.md' - - 'Buffer': 'operations/table_engines/buffer.md' - - 'GenerateRandom': 'operations/table_engines/generate.md' - -- 'SQL语法': - - 'hidden': 'query_language/index.md' - - 'SELECT': 'query_language/select.md' - - 'INSERT INTO': 'query_language/insert_into.md' - - 'CREATE': 'query_language/create.md' - - 'ALTER': 'query_language/alter.md' - - 'SYSTEM': 'query_language/system.md' - - 'SHOW': 'query_language/show.md' - - '其他类型的查询': 'query_language/misc.md' - - '函数': - - '介绍': 'query_language/functions/index.md' - - '算术函数': 'query_language/functions/arithmetic_functions.md' - - '比较函数': 'query_language/functions/comparison_functions.md' - - '逻辑函数': 'query_language/functions/logical_functions.md' - - '类型转换函数': 'query_language/functions/type_conversion_functions.md' - - '时间日期函数': 'query_language/functions/date_time_functions.md' - - '字符串函数': 'query_language/functions/string_functions.md' - - '字符串搜索函数': 'query_language/functions/string_search_functions.md' - - '字符串替换函数': 'query_language/functions/string_replace_functions.md' - - '条件函数 ': 'query_language/functions/conditional_functions.md' - - '数学函数': 'query_language/functions/math_functions.md' - - '取整函数': 'query_language/functions/rounding_functions.md' - - '数组函数': 'query_language/functions/array_functions.md' - - '字符串拆分合并函数': 'query_language/functions/splitting_merging_functions.md' - - '位操作函数': 'query_language/functions/bit_functions.md' - - '位图函数': 'query_language/functions/bitmap_functions.md' - - 'Hash函数': 'query_language/functions/hash_functions.md' - - '随机函数': 'query_language/functions/random_functions.md' - - '编码函数': 'query_language/functions/encoding_functions.md' - - 'UUID函数': 'query_language/functions/uuid_functions.md' - - 'URL函数': 'query_language/functions/url_functions.md' - - 'IP函数': 'query_language/functions/ip_address_functions.md' - - 'JSON函数': 'query_language/functions/json_functions.md' - - '高阶函数': 'query_language/functions/higher_order_functions.md' - - '字典函数': 'query_language/functions/ext_dict_functions.md' - - 'Yandex.Metrica字典函数': 'query_language/functions/ym_dict_functions.md' - - 'IN运算符相关函数': 'query_language/functions/in_functions.md' - - 'arrayJoin函数': 'query_language/functions/array_join.md' - - 'GEO函数': 'query_language/functions/geo.md' - - 'Nullable处理函数': 'query_language/functions/functions_for_nulls.md' - - '机器学习函数': 'query_language/functions/machine_learning_functions.md' - - 'Introspection': 'query_language/functions/introspection.md' - - '其他函数': 'query_language/functions/other_functions.md' - - '聚合函数': - - '介绍': 'query_language/agg_functions/index.md' - - '函数列表': 'query_language/agg_functions/reference.md' - - '聚合函数组合子': 'query_language/agg_functions/combinators.md' - - '参数化聚合函数': 'query_language/agg_functions/parametric_functions.md' - - '表引擎函数': - - '介绍': 'query_language/table_functions/index.md' - - 'file': 'query_language/table_functions/file.md' - - 'merge': 'query_language/table_functions/merge.md' - - 'numbers': 'query_language/table_functions/numbers.md' - - 'remote': 'query_language/table_functions/remote.md' - - 'url': 'query_language/table_functions/url.md' - - 'mysql': 'query_language/table_functions/mysql.md' - - 'jdbc': 'query_language/table_functions/jdbc.md' - - 'odbc': 'query_language/table_functions/odbc.md' - - 'hdfs': 'query_language/table_functions/hdfs.md' - - 'input': 'query_language/table_functions/input.md' - - 'generateRandom': 'query_language/table_functions/generate.md' - - '字典': - - '介绍': 'query_language/dicts/index.md' - - '外部字典': - - '介绍': 'query_language/dicts/external_dicts.md' - - '配置外部字典': 'query_language/dicts/external_dicts_dict.md' - - '字典的内存布局': 'query_language/dicts/external_dicts_dict_layout.md' - - '字典的刷新策略': 'query_language/dicts/external_dicts_dict_lifetime.md' - - '字典的外部数据源': 'query_language/dicts/external_dicts_dict_sources.md' - - '字典的键和字段值': 'query_language/dicts/external_dicts_dict_structure.md' - - 'Hierarchical dictionaries': 'query_language/dicts/external_dicts_dict_hierarchical.md' - - '内部字典': 'query_language/dicts/internal_dicts.md' - - '操作符': 'query_language/operators.md' - - '语法说明': 'query_language/syntax.md' - -- 'Guides': - - 'Overview': 'guides/index.md' - - 'Applying CatBoost Models': 'guides/apply_catboost_model.md' - -- '运维': - - '介绍': 'operations/index.md' - - '环境要求': 'operations/requirements.md' - - '监控': 'operations/monitoring.md' - - '故障排查': 'operations/troubleshooting.md' - - '使用建议': 'operations/tips.md' - - '版本升级': 'operations/update.md' - - '访问权限控制': 'operations/access_rights.md' - - '数据备份': 'operations/backup.md' - - '配置文件': 'operations/configuration_files.md' - - '配额': 'operations/quotas.md' - - '系统表': 'operations/system_tables.md' - - '优化性能': - - '查询分析': 'operations/performance/sampling_query_profiler.md' - - '测试硬件': 'operations/performance_test.md' - - 'Server参数配置': - - '介绍': 'operations/server_settings/index.md' - - 'Server参数说明': 'operations/server_settings/settings.md' - - 'Settings配置': - - '介绍': 'operations/settings/index.md' - - '查询权限管理': 'operations/settings/permissions_for_queries.md' - - '查询复杂性的限制': 'operations/settings/query_complexity.md' - - 'Setting列表': 'operations/settings/settings.md' - - 'Setting配置组': 'operations/settings/settings_profiles.md' - - '用户配置': 'operations/settings/settings_users.md' - - 'Settings的约束': 'operations/settings/constraints_on_settings.md' - - '常用工具': - - '介绍': 'operations/utils/index.md' - - 'clickhouse-copier': 'operations/utils/clickhouse-copier.md' - - 'clickhouse-local': 'operations/utils/clickhouse-local.md' - - 'clickhouse-benchmark': 'operations/utils/clickhouse-benchmark.md' - -- '常见问题': - - '一般的问题': 'faq/general.md' - -- '开发者指南': - - 'hidden': 'development/index.md' - - '开发者指南': 'development/developer_instruction.md' - - 'ClickHouse架构概述': 'development/architecture.md' - - 'ClickHouse Code Browser': 'development/browse_code.md' - - '如何在Linux中编译ClickHouse': 'development/build.md' - - '如何在Mac OS X中编译ClickHouse': 'development/build_osx.md' - - '如何在Linux中编译Mac OS X ClickHouse': 'development/build_cross_osx.md' - - '如何在Linux中编译AARCH64 (ARM64) ClickHouse': 'development/build_cross_arm.md' - - '如何编写C++代码': 'development/style.md' - - '如何运行ClickHouse测试': 'development/tests.md' - - '使用的第三方库': 'development/contrib.md' - -- '新功能特性': - - '路线图': 'roadmap.md' - - '更新日志': - - '2020': 'changelog/index.md' - - '2019': 'changelog/2019.md' - - '2018': 'changelog/2018.md' - - '2017': 'changelog/2017.md' - - '安全更改日志': 'security_changelog.md' diff --git a/docs/tools/convert_toc.py b/docs/tools/convert_toc.py index 18178e3be72..9bfc347d244 100755 --- a/docs/tools/convert_toc.py +++ b/docs/tools/convert_toc.py @@ -8,7 +8,7 @@ import yaml import util -lang = 'ru' +lang = 'zh' base_dir = os.path.join(os.path.dirname(__file__), '..') en_dir = os.path.join(base_dir, 'en') docs_dir = os.path.join(base_dir, lang) @@ -57,7 +57,7 @@ def process_md_file(title, idx, original_path, proper_path): if original_path != proper_md_path: subprocess.check_call(f'git add {proper_md_path}', shell=True) if os.path.exists(original_path): - subprocess.check_call(f'git rm {original_path}', shell=True) + subprocess.check_call(f'rm {original_path}', shell=True) def process_toc_entry(entry, path, idx): @@ -131,7 +131,7 @@ def sync_translation(): util.write_md_file(lang_dst, en_meta, lang_content) subprocess.check_call(f'git add {lang_dst}', shell=True) - subprocess.check_call(f'git rm {lang_src}', shell=True) + subprocess.check_call(f'rm {lang_src}', shell=True) if __name__ == '__main__': diff --git a/docs/tools/nav.py b/docs/tools/nav.py index 2d99d4df3fe..56d47d58d07 100644 --- a/docs/tools/nav.py +++ b/docs/tools/nav.py @@ -16,8 +16,8 @@ def build_nav_entry(root): if root.endswith('images'): return None, None, None result_items = [] - index_meta, _ = util.read_md_file(os.path.join(root, 'index.md')) - current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title', 'hidden')) + index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md')) + current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title', find_first_header(index_content))) for filename in os.listdir(root): path = os.path.join(root, filename) if os.path.isdir(path): @@ -46,5 +46,7 @@ def build_nav(lang, args): _, _, nav = build_nav_entry(docs_dir) result = [] for key, value in nav.items(): - result.append({key: value}) + if key and value: + result.append({key: value}) + print('result', result) return result diff --git a/docs/tools/translate/filter.py b/docs/tools/translate/filter.py index 4376e7dc7f9..b5424f20921 100755 --- a/docs/tools/translate/filter.py +++ b/docs/tools/translate/filter.py @@ -36,7 +36,7 @@ def process_buffer(buffer, new_value, item=None, is_header=False): debug(f'Translate: "{text}" -> "{translated_text}"') if text and text[0].isupper() and not translated_text[0].isupper(): - translated_text = translated_text.capitalize() + translated_text = translated_text[0].upper() + translated_text[1:] if text.startswith(' ') and not translated_text.startswith(' '): translated_text = ' ' + translated_text @@ -47,12 +47,19 @@ def process_buffer(buffer, new_value, item=None, is_header=False): if is_header and translated_text.endswith('.'): translated_text = translated_text.rstrip('.') - title_case = False # is_header and translate.default_target_language == 'en' and text[0].isupper() - title_case_whitelist = {'a', 'an', 'the', 'and', 'or'} + title_case = is_header and translate.default_target_language == 'en' and text[0].isupper() + title_case_whitelist = { + 'a', 'an', 'the', 'and', 'or', 'that', + 'of', 'on', 'for', 'from', 'with', 'to', 'in' + } + is_first_iteration = True for token in translated_text.split(' '): - if title_case and not token.isupper(): - if token not in title_case_whitelist: - token = token.capitalize() + if title_case and token.isascii() and not token.isupper(): + if len(token) > 1 and token.lower() not in title_case_whitelist: + token = token[0].upper() + token[1:] + elif not is_first_iteration: + token = token.lower() + is_first_iteration = False new_value.append(pandocfilters.Str(token)) new_value.append(pandocfilters.Space()) diff --git a/docs/tools/translate/remove_machine_translated_meta.py b/docs/tools/translate/remove_machine_translated_meta.py new file mode 100755 index 00000000000..26cfde97f1e --- /dev/null +++ b/docs/tools/translate/remove_machine_translated_meta.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +import os +import sys +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) +import convert_toc +import util + + +if __name__ == '__main__': + path = sys.argv[1][2:] + convert_toc.init_redirects() + try: + path = convert_toc.redirects[path] + except KeyError: + pass + meta, content = util.read_md_file(path) + if 'machine_translated' in meta: + del meta['machine_translated'] + if 'machine_translated_rev' in meta: + del meta['machine_translated_rev'] + util.write_md_file(path, meta, content) diff --git a/docs/zh/changelog/2017.md b/docs/zh/changelog/2017.md deleted file mode 100644 index 95156754100..00000000000 --- a/docs/zh/changelog/2017.md +++ /dev/null @@ -1,265 +0,0 @@ ---- -en_copy: true ---- - -### ClickHouse release 1.1.54327, 2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21} - -This release contains bug fixes for the previous release 1.1.54318: - -- Fixed bug with possible race condition in replication that could lead to data loss. This issue affects versions 1.1.54310 and 1.1.54318. If you use one of these versions with Replicated tables, the update is strongly recommended. This issue shows in logs in Warning messages like `Part ... from own log doesn't exist.` The issue is relevant even if you don’t see these messages in logs. - -### ClickHouse release 1.1.54318, 2017-11-30 {#clickhouse-release-1-1-54318-2017-11-30} - -This release contains bug fixes for the previous release 1.1.54310: - -- Fixed incorrect row deletions during merges in the SummingMergeTree engine -- Fixed a memory leak in unreplicated MergeTree engines -- Fixed performance degradation with frequent inserts in MergeTree engines -- Fixed an issue that was causing the replication queue to stop running -- Fixed rotation and archiving of server logs - -### ClickHouse release 1.1.54310, 2017-11-01 {#clickhouse-release-1-1-54310-2017-11-01} - -#### New features: {#new-features} - -- Custom partitioning key for the MergeTree family of table engines. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) table engine. -- Added support for loading [CatBoost](https://catboost.yandex/) models and applying them to data stored in ClickHouse. -- Added support for time zones with non-integer offsets from UTC. -- Added support for arithmetic operations with time intervals. -- The range of values for the Date and DateTime types is extended to the year 2105. -- Added the `CREATE MATERIALIZED VIEW x TO y` query (specifies an existing table for storing the data of a materialized view). -- Added the `ATTACH TABLE` query without arguments. -- The processing logic for Nested columns with names ending in -Map in a SummingMergeTree table was extracted to the sumMap aggregate function. You can now specify such columns explicitly. -- Max size of the IP trie dictionary is increased to 128M entries. -- Added the getSizeOfEnumType function. -- Added the sumWithOverflow aggregate function. -- Added support for the Cap’n Proto input format. -- You can now customize compression level when using the zstd algorithm. - -#### Backward incompatible changes: {#backward-incompatible-changes} - -- Creation of temporary tables with an engine other than Memory is not allowed. -- Explicit creation of tables with the View or MaterializedView engine is not allowed. -- During table creation, a new check verifies that the sampling key expression is included in the primary key. - -#### Bug fixes: {#bug-fixes} - -- Fixed hangups when synchronously inserting into a Distributed table. -- Fixed nonatomic adding and removing of parts in Replicated tables. -- Data inserted into a materialized view is not subjected to unnecessary deduplication. -- Executing a query to a Distributed table for which the local replica is lagging and remote replicas are unavailable does not result in an error anymore. -- Users don’t need access permissions to the `default` database to create temporary tables anymore. -- Fixed crashing when specifying the Array type without arguments. -- Fixed hangups when the disk volume containing server logs is full. -- Fixed an overflow in the toRelativeWeekNum function for the first week of the Unix epoch. - -#### Build improvements: {#build-improvements} - -- Several third-party libraries (notably Poco) were updated and converted to git submodules. - -### ClickHouse release 1.1.54304, 2017-10-19 {#clickhouse-release-1-1-54304-2017-10-19} - -#### New features: {#new-features-1} - -- TLS support in the native protocol (to enable, set `tcp_ssl_port` in `config.xml` ). - -#### Bug fixes: {#bug-fixes-1} - -- `ALTER` for replicated tables now tries to start running as soon as possible. -- Fixed crashing when reading data with the setting `preferred_block_size_bytes=0.` -- Fixed crashes of `clickhouse-client` when pressing `Page Down` -- Correct interpretation of certain complex queries with `GLOBAL IN` and `UNION ALL` -- `FREEZE PARTITION` always works atomically now. -- Empty POST requests now return a response with code 411. -- Fixed interpretation errors for expressions like `CAST(1 AS Nullable(UInt8)).` -- Fixed an error when reading `Array(Nullable(String))` columns from `MergeTree` tables. -- Fixed crashing when parsing queries like `SELECT dummy AS dummy, dummy AS b` -- Users are updated correctly with invalid `users.xml` -- Correct handling when an executable dictionary returns a non-zero response code. - -### ClickHouse release 1.1.54292, 2017-09-20 {#clickhouse-release-1-1-54292-2017-09-20} - -#### New features: {#new-features-2} - -- Added the `pointInPolygon` function for working with coordinates on a coordinate plane. -- Added the `sumMap` aggregate function for calculating the sum of arrays, similar to `SummingMergeTree`. -- Added the `trunc` function. Improved performance of the rounding functions (`round`, `floor`, `ceil`, `roundToExp2`) and corrected the logic of how they work. Changed the logic of the `roundToExp2` function for fractions and negative numbers. -- The ClickHouse executable file is now less dependent on the libc version. The same ClickHouse executable file can run on a wide variety of Linux systems. There is still a dependency when using compiled queries (with the setting `compile = 1` , which is not used by default). -- Reduced the time needed for dynamic compilation of queries. - -#### Bug fixes: {#bug-fixes-2} - -- Fixed an error that sometimes produced `part ... intersects previous part` messages and weakened replica consistency. -- Fixed an error that caused the server to lock up if ZooKeeper was unavailable during shutdown. -- Removed excessive logging when restoring replicas. -- Fixed an error in the UNION ALL implementation. -- Fixed an error in the concat function that occurred if the first column in a block has the Array type. -- Progress is now displayed correctly in the system.merges table. - -### ClickHouse release 1.1.54289, 2017-09-13 {#clickhouse-release-1-1-54289-2017-09-13} - -#### New features: {#new-features-3} - -- `SYSTEM` queries for server administration: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`. -- Added functions for working with arrays: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`. -- Added `root` and `identity` parameters for the ZooKeeper configuration. This allows you to isolate individual users on the same ZooKeeper cluster. -- Added aggregate functions `groupBitAnd`, `groupBitOr`, and `groupBitXor` (for compatibility, they are also available under the names `BIT_AND`, `BIT_OR`, and `BIT_XOR`). -- External dictionaries can be loaded from MySQL by specifying a socket in the filesystem. -- External dictionaries can be loaded from MySQL over SSL (`ssl_cert`, `ssl_key`, `ssl_ca` parameters). -- Added the `max_network_bandwidth_for_user` setting to restrict the overall bandwidth use for queries per user. -- Support for `DROP TABLE` for temporary tables. -- Support for reading `DateTime` values in Unix timestamp format from the `CSV` and `JSONEachRow` formats. -- Lagging replicas in distributed queries are now excluded by default (the default threshold is 5 minutes). -- FIFO locking is used during ALTER: an ALTER query isn’t blocked indefinitely for continuously running queries. -- Option to set `umask` in the config file. -- Improved performance for queries with `DISTINCT` . - -#### Bug fixes: {#bug-fixes-3} - -- Improved the process for deleting old nodes in ZooKeeper. Previously, old nodes sometimes didn’t get deleted if there were very frequent inserts, which caused the server to be slow to shut down, among other things. -- Fixed randomization when choosing hosts for the connection to ZooKeeper. -- Fixed the exclusion of lagging replicas in distributed queries if the replica is localhost. -- Fixed an error where a data part in a `ReplicatedMergeTree` table could be broken after running `ALTER MODIFY` on an element in a `Nested` structure. -- Fixed an error that could cause SELECT queries to “hang”. -- Improvements to distributed DDL queries. -- Fixed the query `CREATE TABLE ... AS `. -- Resolved the deadlock in the `ALTER ... CLEAR COLUMN IN PARTITION` query for `Buffer` tables. -- Fixed the invalid default value for `Enum` s (0 instead of the minimum) when using the `JSONEachRow` and `TSKV` formats. -- Resolved the appearance of zombie processes when using a dictionary with an `executable` source. -- Fixed segfault for the HEAD query. - -#### Improved workflow for developing and assembling ClickHouse: {#improved-workflow-for-developing-and-assembling-clickhouse} - -- You can use `pbuilder` to build ClickHouse. -- You can use `libc++` instead of `libstdc++` for builds on Linux. -- Added instructions for using static code analysis tools: `Coverage`, `clang-tidy`, `cppcheck`. - -#### Please note when upgrading: {#please-note-when-upgrading} - -- There is now a higher default value for the MergeTree setting `max_bytes_to_merge_at_max_space_in_pool` (the maximum total size of data parts to merge, in bytes): it has increased from 100 GiB to 150 GiB. This might result in large merges running after the server upgrade, which could cause an increased load on the disk subsystem. If the free space available on the server is less than twice the total amount of the merges that are running, this will cause all other merges to stop running, including merges of small data parts. As a result, INSERT queries will fail with the message “Merges are processing significantly slower than inserts.” Use the `SELECT * FROM system.merges` query to monitor the situation. You can also check the `DiskSpaceReservedForMerge` metric in the `system.metrics` table, or in Graphite. You don’t need to do anything to fix this, since the issue will resolve itself once the large merges finish. If you find this unacceptable, you can restore the previous value for the `max_bytes_to_merge_at_max_space_in_pool` setting. To do this, go to the section in config.xml, set ``` ``107374182400 ``` and restart the server. - -### ClickHouse release 1.1.54284, 2017-08-29 {#clickhouse-release-1-1-54284-2017-08-29} - -- This is a bugfix release for the previous 1.1.54282 release. It fixes leaks in the parts directory in ZooKeeper. - -### ClickHouse release 1.1.54282, 2017-08-23 {#clickhouse-release-1-1-54282-2017-08-23} - -This release contains bug fixes for the previous release 1.1.54276: - -- Fixed `DB::Exception: Assertion violation: !_path.empty()` when inserting into a Distributed table. -- Fixed parsing when inserting in RowBinary format if input data starts with’;’. -- Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). - -### Clickhouse Release 1.1.54276, 2017-08-16 {#clickhouse-release-1-1-54276-2017-08-16} - -#### New features: {#new-features-4} - -- Added an optional WITH section for a SELECT query. Example query: `WITH 1+1 AS a SELECT a, a*a` -- INSERT can be performed synchronously in a Distributed table: OK is returned only after all the data is saved on all the shards. This is activated by the setting insert\_distributed\_sync=1. -- Added the UUID data type for working with 16-byte identifiers. -- Added aliases of CHAR, FLOAT and other types for compatibility with the Tableau. -- Added the functions toYYYYMM, toYYYYMMDD, and toYYYYMMDDhhmmss for converting time into numbers. -- You can use IP addresses (together with the hostname) to identify servers for clustered DDL queries. -- Added support for non-constant arguments and negative offsets in the function `substring(str, pos, len).` -- Added the max\_size parameter for the `groupArray(max_size)(column)` aggregate function, and optimized its performance. - -#### Main changes: {#main-changes} - -- Security improvements: all server files are created with 0640 permissions (can be changed via config parameter). -- Improved error messages for queries with invalid syntax. -- Significantly reduced memory consumption and improved performance when merging large sections of MergeTree data. -- Significantly increased the performance of data merges for the ReplacingMergeTree engine. -- Improved performance for asynchronous inserts from a Distributed table by combining multiple source inserts. To enable this functionality, use the setting distributed\_directory\_monitor\_batch\_inserts=1. - -#### Backward incompatible changes: {#backward-incompatible-changes-1} - -- Changed the binary format of aggregate states of `groupArray(array_column)` functions for arrays. - -#### Complete list of changes: {#complete-list-of-changes} - -- Added the `output_format_json_quote_denormals` setting, which enables outputting nan and inf values in JSON format. -- Optimized stream allocation when reading from a Distributed table. -- Settings can be configured in readonly mode if the value doesn’t change. -- Added the ability to retrieve non-integer granules of the MergeTree engine in order to meet restrictions on the block size specified in the preferred\_block\_size\_bytes setting. The purpose is to reduce the consumption of RAM and increase cache locality when processing queries from tables with large columns. -- Efficient use of indexes that contain expressions like `toStartOfHour(x)` for conditions like `toStartOfHour(x) op сonstexpr.` -- Added new settings for MergeTree engines (the merge\_tree section in config.xml): - - replicated\_deduplication\_window\_seconds sets the number of seconds allowed for deduplicating inserts in Replicated tables. - - cleanup\_delay\_period sets how often to start cleanup to remove outdated data. - - replicated\_can\_become\_leader can prevent a replica from becoming the leader (and assigning merges). -- Accelerated cleanup to remove outdated data from ZooKeeper. -- Multiple improvements and fixes for clustered DDL queries. Of particular interest is the new setting distributed\_ddl\_task\_timeout, which limits the time to wait for a response from the servers in the cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. -- Improved display of stack traces in the server logs. -- Added the “none” value for the compression method. -- You can use multiple dictionaries\_config sections in config.xml. -- It is possible to connect to MySQL through a socket in the file system. -- The system.parts table has a new column with information about the size of marks, in bytes. - -#### Bug fixes: {#bug-fixes-4} - -- Distributed tables using a Merge table now work correctly for a SELECT query with a condition on the `_table` field. -- Fixed a rare race condition in ReplicatedMergeTree when checking data parts. -- Fixed possible freezing on “leader election” when starting a server. -- The max\_replica\_delay\_for\_distributed\_queries setting was ignored when using a local replica of the data source. This has been fixed. -- Fixed incorrect behavior of `ALTER TABLE CLEAR COLUMN IN PARTITION` when attempting to clean a non-existing column. -- Fixed an exception in the multiIf function when using empty arrays or strings. -- Fixed excessive memory allocations when deserializing Native format. -- Fixed incorrect auto-update of Trie dictionaries. -- Fixed an exception when running queries with a GROUP BY clause from a Merge table when using SAMPLE. -- Fixed a crash of GROUP BY when using distributed\_aggregation\_memory\_efficient=1. -- Now you can specify the database.table in the right side of IN and JOIN. -- Too many threads were used for parallel aggregation. This has been fixed. -- Fixed how the “if” function works with FixedString arguments. -- SELECT worked incorrectly from a Distributed table for shards with a weight of 0. This has been fixed. -- Running `CREATE VIEW IF EXISTS no longer causes crashes.` -- Fixed incorrect behavior when input\_format\_skip\_unknown\_fields=1 is set and there are negative numbers. -- Fixed an infinite loop in the `dictGetHierarchy()` function if there is some invalid data in the dictionary. -- Fixed `Syntax error: unexpected (...)` errors when running distributed queries with subqueries in an IN or JOIN clause and Merge tables. -- Fixed an incorrect interpretation of a SELECT query from Dictionary tables. -- Fixed the “Cannot mremap” error when using arrays in IN and JOIN clauses with more than 2 billion elements. -- Fixed the failover for dictionaries with MySQL as the source. - -#### Improved workflow for developing and assembling ClickHouse: {#improved-workflow-for-developing-and-assembling-clickhouse-1} - -- Builds can be assembled in Arcadia. -- You can use gcc 7 to compile ClickHouse. -- Parallel builds using ccache+distcc are faster now. - -### ClickHouse release 1.1.54245, 2017-07-04 {#clickhouse-release-1-1-54245-2017-07-04} - -#### New features: {#new-features-5} - -- Distributed DDL (for example, `CREATE TABLE ON CLUSTER`) -- The replicated query `ALTER TABLE CLEAR COLUMN IN PARTITION.` -- The engine for Dictionary tables (access to dictionary data in the form of a table). -- Dictionary database engine (this type of database automatically has Dictionary tables available for all the connected external dictionaries). -- You can check for updates to the dictionary by sending a request to the source. -- Qualified column names -- Quoting identifiers using double quotation marks. -- Sessions in the HTTP interface. -- The OPTIMIZE query for a Replicated table can can run not only on the leader. - -#### Backward incompatible changes: {#backward-incompatible-changes-2} - -- Removed SET GLOBAL. - -#### Minor changes: {#minor-changes} - -- Now after an alert is triggered, the log prints the full stack trace. -- Relaxed the verification of the number of damaged/extra data parts at startup (there were too many false positives). - -#### Bug fixes: {#bug-fixes-5} - -- Fixed a bad connection “sticking” when inserting into a Distributed table. -- GLOBAL IN now works for a query from a Merge table that looks at a Distributed table. -- The incorrect number of cores was detected on a Google Compute Engine virtual machine. This has been fixed. -- Changes in how an executable source of cached external dictionaries works. -- Fixed the comparison of strings containing null characters. -- Fixed the comparison of Float32 primary key fields with constants. -- Previously, an incorrect estimate of the size of a field could lead to overly large allocations. -- Fixed a crash when querying a Nullable column added to a table using ALTER. -- Fixed a crash when sorting by a Nullable column, if the number of rows is less than LIMIT. -- Fixed an ORDER BY subquery consisting of only constant values. -- Previously, a Replicated table could remain in the invalid state after a failed DROP TABLE. -- Aliases for scalar subqueries with empty results are no longer lost. -- Now a query that used compilation does not fail with an error if the .so file gets damaged. diff --git a/docs/zh/changelog/2018.md b/docs/zh/changelog/2018.md deleted file mode 100644 index 49bef18cbf3..00000000000 --- a/docs/zh/changelog/2018.md +++ /dev/null @@ -1,1060 +0,0 @@ ---- -en_copy: true ---- - -## ClickHouse release 18.16 {#clickhouse-release-18-16} - -### ClickHouse release 18.16.1, 2018-12-21 {#clickhouse-release-18-16-1-2018-12-21} - -#### Bug fixes: {#bug-fixes} - -- Fixed an error that led to problems with updating dictionaries with the ODBC source. [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) -- JIT compilation of aggregate functions now works with LowCardinality columns. [\#3838](https://github.com/ClickHouse/ClickHouse/issues/3838) - -#### Improvements: {#improvements} - -- Added the `low_cardinality_allow_in_native_format` setting (enabled by default). When disabled, LowCardinality columns will be converted to ordinary columns for SELECT queries and ordinary columns will be expected for INSERT queries. [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) - -#### Build improvements: {#build-improvements} - -- Fixes for builds on macOS and ARM. - -### ClickHouse release 18.16.0, 2018-12-14 {#clickhouse-release-18-16-0-2018-12-14} - -#### New features: {#new-features} - -- `DEFAULT` expressions are evaluated for missing fields when loading data in semi-structured input formats (`JSONEachRow`, `TSKV`). The feature is enabled with the `insert_sample_with_metadata` setting. [\#3555](https://github.com/ClickHouse/ClickHouse/pull/3555) -- The `ALTER TABLE` query now has the `MODIFY ORDER BY` action for changing the sorting key when adding or removing a table column. This is useful for tables in the `MergeTree` family that perform additional tasks when merging based on this sorting key, such as `SummingMergeTree`, `AggregatingMergeTree`, and so on. [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) [\#3755](https://github.com/ClickHouse/ClickHouse/pull/3755) -- For tables in the `MergeTree` family, now you can specify a different sorting key (`ORDER BY`) and index (`PRIMARY KEY`). The sorting key can be longer than the index. [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) -- Added the `hdfs` table function and the `HDFS` table engine for importing and exporting data to HDFS. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/3617) -- Added functions for working with base64: `base64Encode`, `base64Decode`, `tryBase64Decode`. [Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3350) -- Now you can use a parameter to configure the precision of the `uniqCombined` aggregate function (select the number of HyperLogLog cells). [\#3406](https://github.com/ClickHouse/ClickHouse/pull/3406) -- Added the `system.contributors` table that contains the names of everyone who made commits in ClickHouse. [\#3452](https://github.com/ClickHouse/ClickHouse/pull/3452) -- Added the ability to omit the partition for the `ALTER TABLE ... FREEZE` query in order to back up all partitions at once. [\#3514](https://github.com/ClickHouse/ClickHouse/pull/3514) -- Added `dictGet` and `dictGetOrDefault` functions that don’t require specifying the type of return value. The type is determined automatically from the dictionary description. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3564) -- Now you can specify comments for a column in the table description and change it using `ALTER`. [\#3377](https://github.com/ClickHouse/ClickHouse/pull/3377) -- Reading is supported for `Join` type tables with simple keys. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) -- Now you can specify the options `join_use_nulls`, `max_rows_in_join`, `max_bytes_in_join`, and `join_overflow_mode` when creating a `Join` type table. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) -- Added the `joinGet` function that allows you to use a `Join` type table like a dictionary. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3728) -- Added the `partition_key`, `sorting_key`, `primary_key`, and `sampling_key` columns to the `system.tables` table in order to provide information about table keys. [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) -- Added the `is_in_partition_key`, `is_in_sorting_key`, `is_in_primary_key`, and `is_in_sampling_key` columns to the `system.columns` table. [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) -- Added the `min_time` and `max_time` columns to the `system.parts` table. These columns are populated when the partitioning key is an expression consisting of `DateTime` columns. [Emmanuel Donin de Rosière](https://github.com/ClickHouse/ClickHouse/pull/3800) - -#### Bug fixes: {#bug-fixes-1} - -- Fixes and performance improvements for the `LowCardinality` data type. `GROUP BY` using `LowCardinality(Nullable(...))`. Getting the values of `extremes`. Processing high-order functions. `LEFT ARRAY JOIN`. Distributed `GROUP BY`. Functions that return `Array`. Execution of `ORDER BY`. Writing to `Distributed` tables (nicelulu). Backward compatibility for `INSERT` queries from old clients that implement the `Native` protocol. Support for `LowCardinality` for `JOIN`. Improved performance when working in a single stream. [\#3823](https://github.com/ClickHouse/ClickHouse/pull/3823) [\#3803](https://github.com/ClickHouse/ClickHouse/pull/3803) [\#3799](https://github.com/ClickHouse/ClickHouse/pull/3799) [\#3769](https://github.com/ClickHouse/ClickHouse/pull/3769) [\#3744](https://github.com/ClickHouse/ClickHouse/pull/3744) [\#3681](https://github.com/ClickHouse/ClickHouse/pull/3681) [\#3651](https://github.com/ClickHouse/ClickHouse/pull/3651) [\#3649](https://github.com/ClickHouse/ClickHouse/pull/3649) [\#3641](https://github.com/ClickHouse/ClickHouse/pull/3641) [\#3632](https://github.com/ClickHouse/ClickHouse/pull/3632) [\#3568](https://github.com/ClickHouse/ClickHouse/pull/3568) [\#3523](https://github.com/ClickHouse/ClickHouse/pull/3523) [\#3518](https://github.com/ClickHouse/ClickHouse/pull/3518) -- Fixed how the `select_sequential_consistency` option works. Previously, when this setting was enabled, an incomplete result was sometimes returned after beginning to write to a new partition. [\#2863](https://github.com/ClickHouse/ClickHouse/pull/2863) -- Databases are correctly specified when executing DDL `ON CLUSTER` queries and `ALTER UPDATE/DELETE`. [\#3772](https://github.com/ClickHouse/ClickHouse/pull/3772) [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) -- Databases are correctly specified for subqueries inside a VIEW. [\#3521](https://github.com/ClickHouse/ClickHouse/pull/3521) -- Fixed a bug in `PREWHERE` with `FINAL` for `VersionedCollapsingMergeTree`. [7167bfd7](https://github.com/ClickHouse/ClickHouse/commit/7167bfd7b365538f7a91c4307ad77e552ab4e8c1) -- Now you can use `KILL QUERY` to cancel queries that have not started yet because they are waiting for the table to be locked. [\#3517](https://github.com/ClickHouse/ClickHouse/pull/3517) -- Corrected date and time calculations if the clocks were moved back at midnight (this happens in Iran, and happened in Moscow from 1981 to 1983). Previously, this led to the time being reset a day earlier than necessary, and also caused incorrect formatting of the date and time in text format. [\#3819](https://github.com/ClickHouse/ClickHouse/pull/3819) -- Fixed bugs in some cases of `VIEW` and subqueries that omit the database. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3521) -- Fixed a race condition when simultaneously reading from a `MATERIALIZED VIEW` and deleting a `MATERIALIZED VIEW` due to not locking the internal `MATERIALIZED VIEW`. [\#3404](https://github.com/ClickHouse/ClickHouse/pull/3404) [\#3694](https://github.com/ClickHouse/ClickHouse/pull/3694) -- Fixed the error `Lock handler cannot be nullptr.` [\#3689](https://github.com/ClickHouse/ClickHouse/pull/3689) -- Fixed query processing when the `compile_expressions` option is enabled (it’s enabled by default). Nondeterministic constant expressions like the `now` function are no longer unfolded. [\#3457](https://github.com/ClickHouse/ClickHouse/pull/3457) -- Fixed a crash when specifying a non-constant scale argument in `toDecimal32/64/128` functions. -- Fixed an error when trying to insert an array with `NULL` elements in the `Values` format into a column of type `Array` without `Nullable` (if `input_format_values_interpret_expressions` = 1). [\#3487](https://github.com/ClickHouse/ClickHouse/pull/3487) [\#3503](https://github.com/ClickHouse/ClickHouse/pull/3503) -- Fixed continuous error logging in `DDLWorker` if ZooKeeper is not available. [8f50c620](https://github.com/ClickHouse/ClickHouse/commit/8f50c620334988b28018213ec0092fe6423847e2) -- Fixed the return type for `quantile*` functions from `Date` and `DateTime` types of arguments. [\#3580](https://github.com/ClickHouse/ClickHouse/pull/3580) -- Fixed the `WITH` clause if it specifies a simple alias without expressions. [\#3570](https://github.com/ClickHouse/ClickHouse/pull/3570) -- Fixed processing of queries with named sub-queries and qualified column names when `enable_optimize_predicate_expression` is enabled. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3588) -- Fixed the error `Attempt to attach to nullptr thread group` when working with materialized views. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3623) -- Fixed a crash when passing certain incorrect arguments to the `arrayReverse` function. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) -- Fixed the buffer overflow in the `extractURLParameter` function. Improved performance. Added correct processing of strings containing zero bytes. [141e9799](https://github.com/ClickHouse/ClickHouse/commit/141e9799e49201d84ea8e951d1bed4fb6d3dacb5) -- Fixed buffer overflow in the `lowerUTF8` and `upperUTF8` functions. Removed the ability to execute these functions over `FixedString` type arguments. [\#3662](https://github.com/ClickHouse/ClickHouse/pull/3662) -- Fixed a rare race condition when deleting `MergeTree` tables. [\#3680](https://github.com/ClickHouse/ClickHouse/pull/3680) -- Fixed a race condition when reading from `Buffer` tables and simultaneously performing `ALTER` or `DROP` on the target tables. [\#3719](https://github.com/ClickHouse/ClickHouse/pull/3719) -- Fixed a segfault if the `max_temporary_non_const_columns` limit was exceeded. [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) - -#### Improvements: {#improvements-1} - -- The server does not write the processed configuration files to the `/etc/clickhouse-server/` directory. Instead, it saves them in the `preprocessed_configs` directory inside `path`. This means that the `/etc/clickhouse-server/` directory doesn’t have write access for the `clickhouse` user, which improves security. [\#2443](https://github.com/ClickHouse/ClickHouse/pull/2443) -- The `min_merge_bytes_to_use_direct_io` option is set to 10 GiB by default. A merge that forms large parts of tables from the MergeTree family will be performed in `O_DIRECT` mode, which prevents excessive page cache eviction. [\#3504](https://github.com/ClickHouse/ClickHouse/pull/3504) -- Accelerated server start when there is a very large number of tables. [\#3398](https://github.com/ClickHouse/ClickHouse/pull/3398) -- Added a connection pool and HTTP `Keep-Alive` for connections between replicas. [\#3594](https://github.com/ClickHouse/ClickHouse/pull/3594) -- If the query syntax is invalid, the `400 Bad Request` code is returned in the `HTTP` interface (500 was returned previously). [31bc680a](https://github.com/ClickHouse/ClickHouse/commit/31bc680ac5f4bb1d0360a8ba4696fa84bb47d6ab) -- The `join_default_strictness` option is set to `ALL` by default for compatibility. [120e2cbe](https://github.com/ClickHouse/ClickHouse/commit/120e2cbe2ff4fbad626c28042d9b28781c805afe) -- Removed logging to `stderr` from the `re2` library for invalid or complex regular expressions. [\#3723](https://github.com/ClickHouse/ClickHouse/pull/3723) -- Added for the `Kafka` table engine: checks for subscriptions before beginning to read from Kafka; the kafka\_max\_block\_size setting for the table. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3396) -- The `cityHash64`, `farmHash64`, `metroHash64`, `sipHash64`, `halfMD5`, `murmurHash2_32`, `murmurHash2_64`, `murmurHash3_32`, and `murmurHash3_64` functions now work for any number of arguments and for arguments in the form of tuples. [\#3451](https://github.com/ClickHouse/ClickHouse/pull/3451) [\#3519](https://github.com/ClickHouse/ClickHouse/pull/3519) -- The `arrayReverse` function now works with any types of arrays. [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) -- Added an optional parameter: the slot size for the `timeSlots` function. [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/3724) -- For `FULL` and `RIGHT JOIN`, the `max_block_size` setting is used for a stream of non-joined data from the right table. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3699) -- Added the `--secure` command line parameter in `clickhouse-benchmark` and `clickhouse-performance-test` to enable TLS. [\#3688](https://github.com/ClickHouse/ClickHouse/pull/3688) [\#3690](https://github.com/ClickHouse/ClickHouse/pull/3690) -- Type conversion when the structure of a `Buffer` type table does not match the structure of the destination table. [Vitaly Baranov](https://github.com/ClickHouse/ClickHouse/pull/3603) -- Added the `tcp_keep_alive_timeout` option to enable keep-alive packets after inactivity for the specified time interval. [\#3441](https://github.com/ClickHouse/ClickHouse/pull/3441) -- Removed unnecessary quoting of values for the partition key in the `system.parts` table if it consists of a single column. [\#3652](https://github.com/ClickHouse/ClickHouse/pull/3652) -- The modulo function works for `Date` and `DateTime` data types. [\#3385](https://github.com/ClickHouse/ClickHouse/pull/3385) -- Added synonyms for the `POWER`, `LN`, `LCASE`, `UCASE`, `REPLACE`, `LOCATE`, `SUBSTR`, and `MID` functions. [\#3774](https://github.com/ClickHouse/ClickHouse/pull/3774) [\#3763](https://github.com/ClickHouse/ClickHouse/pull/3763) Some function names are case-insensitive for compatibility with the SQL standard. Added syntactic sugar `SUBSTRING(expr FROM start FOR length)` for compatibility with SQL. [\#3804](https://github.com/ClickHouse/ClickHouse/pull/3804) -- Added the ability to `mlock` memory pages corresponding to `clickhouse-server` executable code to prevent it from being forced out of memory. This feature is disabled by default. [\#3553](https://github.com/ClickHouse/ClickHouse/pull/3553) -- Improved performance when reading from `O_DIRECT` (with the `min_bytes_to_use_direct_io` option enabled). [\#3405](https://github.com/ClickHouse/ClickHouse/pull/3405) -- Improved performance of the `dictGet...OrDefault` function for a constant key argument and a non-constant default argument. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3563) -- The `firstSignificantSubdomain` function now processes the domains `gov`, `mil`, and `edu`. [Igor Hatarist](https://github.com/ClickHouse/ClickHouse/pull/3601) Improved performance. [\#3628](https://github.com/ClickHouse/ClickHouse/pull/3628) -- Ability to specify custom environment variables for starting `clickhouse-server` using the `SYS-V init.d` script by defining `CLICKHOUSE_PROGRAM_ENV` in `/etc/default/clickhouse`. - [Pavlo Bashynskyi](https://github.com/ClickHouse/ClickHouse/pull/3612) -- Correct return code for the clickhouse-server init script. [\#3516](https://github.com/ClickHouse/ClickHouse/pull/3516) -- The `system.metrics` table now has the `VersionInteger` metric, and `system.build_options` has the added line `VERSION_INTEGER`, which contains the numeric form of the ClickHouse version, such as `18016000`. [\#3644](https://github.com/ClickHouse/ClickHouse/pull/3644) -- Removed the ability to compare the `Date` type with a number to avoid potential errors like `date = 2018-12-17`, where quotes around the date are omitted by mistake. [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) -- Fixed the behavior of stateful functions like `rowNumberInAllBlocks`. They previously output a result that was one number larger due to starting during query analysis. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3729) -- If the `force_restore_data` file can’t be deleted, an error message is displayed. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3794) - -#### Build improvements: {#build-improvements-1} - -- Updated the `jemalloc` library, which fixes a potential memory leak. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3557) -- Profiling with `jemalloc` is enabled by default in order to debug builds. [2cc82f5c](https://github.com/ClickHouse/ClickHouse/commit/2cc82f5cbe266421cd4c1165286c2c47e5ffcb15) -- Added the ability to run integration tests when only `Docker` is installed on the system. [\#3650](https://github.com/ClickHouse/ClickHouse/pull/3650) -- Added the fuzz expression test in SELECT queries. [\#3442](https://github.com/ClickHouse/ClickHouse/pull/3442) -- Added a stress test for commits, which performs functional tests in parallel and in random order to detect more race conditions. [\#3438](https://github.com/ClickHouse/ClickHouse/pull/3438) -- Improved the method for starting clickhouse-server in a Docker image. [Elghazal Ahmed](https://github.com/ClickHouse/ClickHouse/pull/3663) -- For a Docker image, added support for initializing databases using files in the `/docker-entrypoint-initdb.d` directory. [Konstantin Lebedev](https://github.com/ClickHouse/ClickHouse/pull/3695) -- Fixes for builds on ARM. [\#3709](https://github.com/ClickHouse/ClickHouse/pull/3709) - -#### Backward incompatible changes: {#backward-incompatible-changes} - -- Removed the ability to compare the `Date` type with a number. Instead of `toDate('2018-12-18') = 17883`, you must use explicit type conversion `= toDate(17883)` [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) - -## ClickHouse release 18.14 {#clickhouse-release-18-14} - -### ClickHouse release 18.14.19, 2018-12-19 {#clickhouse-release-18-14-19-2018-12-19} - -#### Bug fixes: {#bug-fixes-2} - -- Fixed an error that led to problems with updating dictionaries with the ODBC source. [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) -- Databases are correctly specified when executing DDL `ON CLUSTER` queries. [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) -- Fixed a segfault if the `max_temporary_non_const_columns` limit was exceeded. [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) - -#### Build improvements: {#build-improvements-2} - -- Fixes for builds on ARM. - -### ClickHouse release 18.14.18, 2018-12-04 {#clickhouse-release-18-14-18-2018-12-04} - -#### Bug fixes: {#bug-fixes-3} - -- Fixed error in `dictGet...` function for dictionaries of type `range`, if one of the arguments is constant and other is not. [\#3751](https://github.com/ClickHouse/ClickHouse/pull/3751) -- Fixed error that caused messages `netlink: '...': attribute type 1 has an invalid length` to be printed in Linux kernel log, that was happening only on fresh enough versions of Linux kernel. [\#3749](https://github.com/ClickHouse/ClickHouse/pull/3749) -- Fixed segfault in function `empty` for argument of `FixedString` type. [Daniel, Dao Quang Minh](https://github.com/ClickHouse/ClickHouse/pull/3703) -- Fixed excessive memory allocation when using large value of `max_query_size` setting (a memory chunk of `max_query_size` bytes was preallocated at once). [\#3720](https://github.com/ClickHouse/ClickHouse/pull/3720) - -#### Build changes: {#build-changes} - -- Fixed build with LLVM/Clang libraries of version 7 from the OS packages (these libraries are used for runtime query compilation). [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) - -### ClickHouse release 18.14.17, 2018-11-30 {#clickhouse-release-18-14-17-2018-11-30} - -#### Bug fixes: {#bug-fixes-4} - -- Fixed cases when the ODBC bridge process did not terminate with the main server process. [\#3642](https://github.com/ClickHouse/ClickHouse/pull/3642) -- Fixed synchronous insertion into the `Distributed` table with a columns list that differs from the column list of the remote table. [\#3673](https://github.com/ClickHouse/ClickHouse/pull/3673) -- Fixed a rare race condition that can lead to a crash when dropping a MergeTree table. [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) -- Fixed a query deadlock in case when query thread creation fails with the `Resource temporarily unavailable` error. [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) -- Fixed parsing of the `ENGINE` clause when the `CREATE AS table` syntax was used and the `ENGINE` clause was specified before the `AS table` (the error resulted in ignoring the specified engine). [\#3692](https://github.com/ClickHouse/ClickHouse/pull/3692) - -### ClickHouse release 18.14.15, 2018-11-21 {#clickhouse-release-18-14-15-2018-11-21} - -#### Bug fixes: {#bug-fixes-5} - -- The size of memory chunk was overestimated while deserializing the column of type `Array(String)` that leads to “Memory limit exceeded” errors. The issue appeared in version 18.12.13. [\#3589](https://github.com/ClickHouse/ClickHouse/issues/3589) - -### ClickHouse release 18.14.14, 2018-11-20 {#clickhouse-release-18-14-14-2018-11-20} - -#### Bug fixes: {#bug-fixes-6} - -- Fixed `ON CLUSTER` queries when cluster configured as secure (flag ``). [\#3599](https://github.com/ClickHouse/ClickHouse/pull/3599) - -#### Build changes: {#build-changes-1} - -- Fixed problems (llvm-7 from system, macos) [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) - -### ClickHouse release 18.14.13, 2018-11-08 {#clickhouse-release-18-14-13-2018-11-08} - -#### Bug fixes: {#bug-fixes-7} - -- Fixed the `Block structure mismatch in MergingSorted stream` error. [\#3162](https://github.com/ClickHouse/ClickHouse/issues/3162) -- Fixed `ON CLUSTER` queries in case when secure connections were turned on in the cluster config (the `` flag). [\#3465](https://github.com/ClickHouse/ClickHouse/pull/3465) -- Fixed an error in queries that used `SAMPLE`, `PREWHERE` and alias columns. [\#3543](https://github.com/ClickHouse/ClickHouse/pull/3543) -- Fixed a rare `unknown compression method` error when the `min_bytes_to_use_direct_io` setting was enabled. [3544](https://github.com/ClickHouse/ClickHouse/pull/3544) - -#### Performance improvements: {#performance-improvements} - -- Fixed performance regression of queries with `GROUP BY` of columns of UInt16 or Date type when executing on AMD EPYC processors. [Igor Lapko](https://github.com/ClickHouse/ClickHouse/pull/3512) -- Fixed performance regression of queries that process long strings. [\#3530](https://github.com/ClickHouse/ClickHouse/pull/3530) - -#### Build improvements: {#build-improvements-3} - -- Improvements for simplifying the Arcadia build. [\#3475](https://github.com/ClickHouse/ClickHouse/pull/3475), [\#3535](https://github.com/ClickHouse/ClickHouse/pull/3535) - -### ClickHouse release 18.14.12, 2018-11-02 {#clickhouse-release-18-14-12-2018-11-02} - -#### Bug fixes: {#bug-fixes-8} - -- Fixed a crash on joining two unnamed subqueries. [\#3505](https://github.com/ClickHouse/ClickHouse/pull/3505) -- Fixed generating incorrect queries (with an empty `WHERE` clause) when querying external databases. [hotid](https://github.com/ClickHouse/ClickHouse/pull/3477) -- Fixed using an incorrect timeout value in ODBC dictionaries. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3511) - -### ClickHouse release 18.14.11, 2018-10-29 {#clickhouse-release-18-14-11-2018-10-29} - -#### Bug fixes: {#bug-fixes-9} - -- Fixed the error `Block structure mismatch in UNION stream: different number of columns` in LIMIT queries. [\#2156](https://github.com/ClickHouse/ClickHouse/issues/2156) -- Fixed errors when merging data in tables containing arrays inside Nested structures. [\#3397](https://github.com/ClickHouse/ClickHouse/pull/3397) -- Fixed incorrect query results if the `merge_tree_uniform_read_distribution` setting is disabled (it is enabled by default). [\#3429](https://github.com/ClickHouse/ClickHouse/pull/3429) -- Fixed an error on inserts to a Distributed table in Native format. [\#3411](https://github.com/ClickHouse/ClickHouse/issues/3411) - -### ClickHouse release 18.14.10, 2018-10-23 {#clickhouse-release-18-14-10-2018-10-23} - -- The `compile_expressions` setting (JIT compilation of expressions) is disabled by default. [\#3410](https://github.com/ClickHouse/ClickHouse/pull/3410) -- The `enable_optimize_predicate_expression` setting is disabled by default. - -### ClickHouse release 18.14.9, 2018-10-16 {#clickhouse-release-18-14-9-2018-10-16} - -#### New features: {#new-features-1} - -- The `WITH CUBE` modifier for `GROUP BY` (the alternative syntax `GROUP BY CUBE(...)` is also available). [\#3172](https://github.com/ClickHouse/ClickHouse/pull/3172) -- Added the `formatDateTime` function. [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2770) -- Added the `JDBC` table engine and `jdbc` table function (requires installing clickhouse-jdbc-bridge). [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) -- Added functions for working with the ISO week number: `toISOWeek`, `toISOYear`, `toStartOfISOYear`, and `toDayOfYear`. [\#3146](https://github.com/ClickHouse/ClickHouse/pull/3146) -- Now you can use `Nullable` columns for `MySQL` and `ODBC` tables. [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) -- Nested data structures can be read as nested objects in `JSONEachRow` format. Added the `input_format_import_nested_json` setting. [Veloman Yunkan](https://github.com/ClickHouse/ClickHouse/pull/3144) -- Parallel processing is available for many `MATERIALIZED VIEW`s when inserting data. See the `parallel_view_processing` setting. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3208) -- Added the `SYSTEM FLUSH LOGS` query (forced log flushes to system tables such as `query_log`) [\#3321](https://github.com/ClickHouse/ClickHouse/pull/3321) -- Now you can use pre-defined `database` and `table` macros when declaring `Replicated` tables. [\#3251](https://github.com/ClickHouse/ClickHouse/pull/3251) -- Added the ability to read `Decimal` type values in engineering notation (indicating powers of ten). [\#3153](https://github.com/ClickHouse/ClickHouse/pull/3153) - -#### Experimental features: {#experimental-features} - -- Optimization of the GROUP BY clause for `LowCardinality data types.` [\#3138](https://github.com/ClickHouse/ClickHouse/pull/3138) -- Optimized calculation of expressions for `LowCardinality data types.` [\#3200](https://github.com/ClickHouse/ClickHouse/pull/3200) - -#### Improvements: {#improvements-2} - -- Significantly reduced memory consumption for queries with `ORDER BY` and `LIMIT`. See the `max_bytes_before_remerge_sort` setting. [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) -- In the absence of `JOIN` (`LEFT`, `INNER`, …), `INNER JOIN` is assumed. [\#3147](https://github.com/ClickHouse/ClickHouse/pull/3147) -- Qualified asterisks work correctly in queries with `JOIN`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3202) -- The `ODBC` table engine correctly chooses the method for quoting identifiers in the SQL dialect of a remote database. [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) -- The `compile_expressions` setting (JIT compilation of expressions) is enabled by default. -- Fixed behavior for simultaneous DROP DATABASE/TABLE IF EXISTS and CREATE DATABASE/TABLE IF NOT EXISTS. Previously, a `CREATE DATABASE ... IF NOT EXISTS` query could return the error message “File … already exists”, and the `CREATE TABLE ... IF NOT EXISTS` and `DROP TABLE IF EXISTS` queries could return `Table ... is creating or attaching right now`. [\#3101](https://github.com/ClickHouse/ClickHouse/pull/3101) -- LIKE and IN expressions with a constant right half are passed to the remote server when querying from MySQL or ODBC tables. [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) -- Comparisons with constant expressions in a WHERE clause are passed to the remote server when querying from MySQL and ODBC tables. Previously, only comparisons with constants were passed. [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) -- Correct calculation of row width in the terminal for `Pretty` formats, including strings with hieroglyphs. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/3257). -- `ON CLUSTER` can be specified for `ALTER UPDATE` queries. -- Improved performance for reading data in `JSONEachRow` format. [\#3332](https://github.com/ClickHouse/ClickHouse/pull/3332) -- Added synonyms for the `LENGTH` and `CHARACTER_LENGTH` functions for compatibility. The `CONCAT` function is no longer case-sensitive. [\#3306](https://github.com/ClickHouse/ClickHouse/pull/3306) -- Added the `TIMESTAMP` synonym for the `DateTime` type. [\#3390](https://github.com/ClickHouse/ClickHouse/pull/3390) -- There is always space reserved for query\_id in the server logs, even if the log line is not related to a query. This makes it easier to parse server text logs with third-party tools. -- Memory consumption by a query is logged when it exceeds the next level of an integer number of gigabytes. [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) -- Added compatibility mode for the case when the client library that uses the Native protocol sends fewer columns by mistake than the server expects for the INSERT query. This scenario was possible when using the clickhouse-cpp library. Previously, this scenario caused the server to crash. [\#3171](https://github.com/ClickHouse/ClickHouse/pull/3171) -- In a user-defined WHERE expression in `clickhouse-copier`, you can now use a `partition_key` alias (for additional filtering by source table partition). This is useful if the partitioning scheme changes during copying, but only changes slightly. [\#3166](https://github.com/ClickHouse/ClickHouse/pull/3166) -- The workflow of the `Kafka` engine has been moved to a background thread pool in order to automatically reduce the speed of data reading at high loads. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). -- Support for reading `Tuple` and `Nested` values of structures like `struct` in the `Cap'n'Proto format`. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3216) -- The list of top-level domains for the `firstSignificantSubdomain` function now includes the domain `biz`. [decaseal](https://github.com/ClickHouse/ClickHouse/pull/3219) -- In the configuration of external dictionaries, `null_value` is interpreted as the value of the default data type. [\#3330](https://github.com/ClickHouse/ClickHouse/pull/3330) -- Support for the `intDiv` and `intDivOrZero` functions for `Decimal`. [b48402e8](https://github.com/ClickHouse/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) -- Support for the `Date`, `DateTime`, `UUID`, and `Decimal` types as a key for the `sumMap` aggregate function. [\#3281](https://github.com/ClickHouse/ClickHouse/pull/3281) -- Support for the `Decimal` data type in external dictionaries. [\#3324](https://github.com/ClickHouse/ClickHouse/pull/3324) -- Support for the `Decimal` data type in `SummingMergeTree` tables. [\#3348](https://github.com/ClickHouse/ClickHouse/pull/3348) -- Added specializations for `UUID` in `if`. [\#3366](https://github.com/ClickHouse/ClickHouse/pull/3366) -- Reduced the number of `open` and `close` system calls when reading from a `MergeTree table`. [\#3283](https://github.com/ClickHouse/ClickHouse/pull/3283) -- A `TRUNCATE TABLE` query can be executed on any replica (the query is passed to the leader replica). [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/3375) - -#### Bug fixes: {#bug-fixes-10} - -- Fixed an issue with `Dictionary` tables for `range_hashed` dictionaries. This error occurred in version 18.12.17. [\#1702](https://github.com/ClickHouse/ClickHouse/pull/1702) -- Fixed an error when loading `range_hashed` dictionaries (the message `Unsupported type Nullable (...)`). This error occurred in version 18.12.17. [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) -- Fixed errors in the `pointInPolygon` function due to the accumulation of inaccurate calculations for polygons with a large number of vertices located close to each other. [\#3331](https://github.com/ClickHouse/ClickHouse/pull/3331) [\#3341](https://github.com/ClickHouse/ClickHouse/pull/3341) -- If after merging data parts, the checksum for the resulting part differs from the result of the same merge in another replica, the result of the merge is deleted and the data part is downloaded from the other replica (this is the correct behavior). But after downloading the data part, it couldn’t be added to the working set because of an error that the part already exists (because the data part was deleted with some delay after the merge). This led to cyclical attempts to download the same data. [\#3194](https://github.com/ClickHouse/ClickHouse/pull/3194) -- Fixed incorrect calculation of total memory consumption by queries (because of incorrect calculation, the `max_memory_usage_for_all_queries` setting worked incorrectly and the `MemoryTracking` metric had an incorrect value). This error occurred in version 18.12.13. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3344) -- Fixed the functionality of `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` This error occurred in version 18.12.13. [\#3247](https://github.com/ClickHouse/ClickHouse/pull/3247) -- Fixed unnecessary preparation of data structures for `JOIN`s on the server that initiates the query if the `JOIN` is only performed on remote servers. [\#3340](https://github.com/ClickHouse/ClickHouse/pull/3340) -- Fixed bugs in the `Kafka` engine: deadlocks after exceptions when starting to read data, and locks upon completion [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). -- For `Kafka` tables, the optional `schema` parameter was not passed (the schema of the `Cap'n'Proto` format). [Vojtech Splichal](https://github.com/ClickHouse/ClickHouse/pull/3150) -- If the ensemble of ZooKeeper servers has servers that accept the connection but then immediately close it instead of responding to the handshake, ClickHouse chooses to connect another server. Previously, this produced the error `Cannot read all data. Bytes read: 0. Bytes expected: 4.` and the server couldn’t start. [8218cf3a](https://github.com/ClickHouse/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) -- If the ensemble of ZooKeeper servers contains servers for which the DNS query returns an error, these servers are ignored. [17b8e209](https://github.com/ClickHouse/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) -- Fixed type conversion between `Date` and `DateTime` when inserting data in the `VALUES` format (if `input_format_values_interpret_expressions = 1`). Previously, the conversion was performed between the numerical value of the number of days in Unix Epoch time and the Unix timestamp, which led to unexpected results. [\#3229](https://github.com/ClickHouse/ClickHouse/pull/3229) -- Corrected type conversion between `Decimal` and integer numbers. [\#3211](https://github.com/ClickHouse/ClickHouse/pull/3211) -- Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3231) -- Fixed a parsing error in CSV format with floating-point numbers if a non-default CSV separator is used, such as `;` [\#3155](https://github.com/ClickHouse/ClickHouse/pull/3155) -- Fixed the `arrayCumSumNonNegative` function (it does not accumulate negative values if the accumulator is less than zero). [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/3163) -- Fixed how `Merge` tables work on top of `Distributed` tables when using `PREWHERE`. [\#3165](https://github.com/ClickHouse/ClickHouse/pull/3165) -- Bug fixes in the `ALTER UPDATE` query. -- Fixed bugs in the `odbc` table function that appeared in version 18.12. [\#3197](https://github.com/ClickHouse/ClickHouse/pull/3197) -- Fixed the operation of aggregate functions with `StateArray` combinators. [\#3188](https://github.com/ClickHouse/ClickHouse/pull/3188) -- Fixed a crash when dividing a `Decimal` value by zero. [69dd6609](https://github.com/ClickHouse/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) -- Fixed output of types for operations using `Decimal` and integer arguments. [\#3224](https://github.com/ClickHouse/ClickHouse/pull/3224) -- Fixed the segfault during `GROUP BY` on `Decimal128`. [3359ba06](https://github.com/ClickHouse/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) -- The `log_query_threads` setting (logging information about each thread of query execution) now takes effect only if the `log_queries` option (logging information about queries) is set to 1. Since the `log_query_threads` option is enabled by default, information about threads was previously logged even if query logging was disabled. [\#3241](https://github.com/ClickHouse/ClickHouse/pull/3241) -- Fixed an error in the distributed operation of the quantiles aggregate function (the error message `Not found column quantile...`). [292a8855](https://github.com/ClickHouse/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) -- Fixed the compatibility problem when working on a cluster of version 18.12.17 servers and older servers at the same time. For distributed queries with GROUP BY keys of both fixed and non-fixed length, if there was a large amount of data to aggregate, the returned data was not always fully aggregated (two different rows contained the same aggregation keys). [\#3254](https://github.com/ClickHouse/ClickHouse/pull/3254) -- Fixed handling of substitutions in `clickhouse-performance-test`, if the query contains only part of the substitutions declared in the test. [\#3263](https://github.com/ClickHouse/ClickHouse/pull/3263) -- Fixed an error when using `FINAL` with `PREWHERE`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) -- Fixed an error when using `PREWHERE` over columns that were added during `ALTER`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) -- Added a check for the absence of `arrayJoin` for `DEFAULT` and `MATERIALIZED` expressions. Previously, `arrayJoin` led to an error when inserting data. [\#3337](https://github.com/ClickHouse/ClickHouse/pull/3337) -- Added a check for the absence of `arrayJoin` in a `PREWHERE` clause. Previously, this led to messages like `Size ... doesn't match` or `Unknown compression method` when executing queries. [\#3357](https://github.com/ClickHouse/ClickHouse/pull/3357) -- Fixed segfault that could occur in rare cases after optimization that replaced AND chains from equality evaluations with the corresponding IN expression. [liuyimin-bytedance](https://github.com/ClickHouse/ClickHouse/pull/3339) -- Minor corrections to `clickhouse-benchmark`: previously, client information was not sent to the server; now the number of queries executed is calculated more accurately when shutting down and for limiting the number of iterations. [\#3351](https://github.com/ClickHouse/ClickHouse/pull/3351) [\#3352](https://github.com/ClickHouse/ClickHouse/pull/3352) - -#### Backward incompatible changes: {#backward-incompatible-changes-1} - -- Removed the `allow_experimental_decimal_type` option. The `Decimal` data type is available for default use. [\#3329](https://github.com/ClickHouse/ClickHouse/pull/3329) - -## ClickHouse release 18.12 {#clickhouse-release-18-12} - -### ClickHouse release 18.12.17, 2018-09-16 {#clickhouse-release-18-12-17-2018-09-16} - -#### New features: {#new-features-2} - -- `invalidate_query` (the ability to specify a query to check whether an external dictionary needs to be updated) is implemented for the `clickhouse` source. [\#3126](https://github.com/ClickHouse/ClickHouse/pull/3126) -- Added the ability to use `UInt*`, `Int*`, and `DateTime` data types (along with the `Date` type) as a `range_hashed` external dictionary key that defines the boundaries of ranges. Now `NULL` can be used to designate an open range. [Vasily Nemkov](https://github.com/ClickHouse/ClickHouse/pull/3123) -- The `Decimal` type now supports `var*` and `stddev*` aggregate functions. [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) -- The `Decimal` type now supports mathematical functions (`exp`, `sin` and so on.) [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) -- The `system.part_log` table now has the `partition_id` column. [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) - -#### Bug fixes: {#bug-fixes-11} - -- `Merge` now works correctly on `Distributed` tables. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3159) -- Fixed incompatibility (unnecessary dependency on the `glibc` version) that made it impossible to run ClickHouse on `Ubuntu Precise` and older versions. The incompatibility arose in version 18.12.13. [\#3130](https://github.com/ClickHouse/ClickHouse/pull/3130) -- Fixed errors in the `enable_optimize_predicate_expression` setting. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3107) -- Fixed a minor issue with backwards compatibility that appeared when working with a cluster of replicas on versions earlier than 18.12.13 and simultaneously creating a new replica of a table on a server with a newer version (shown in the message `Can not clone replica, because the ... updated to new ClickHouse version`, which is logical, but shouldn’t happen). [\#3122](https://github.com/ClickHouse/ClickHouse/pull/3122) - -#### Backward incompatible changes: {#backward-incompatible-changes-2} - -- The `enable_optimize_predicate_expression` option is enabled by default (which is rather optimistic). If query analysis errors occur that are related to searching for the column names, set `enable_optimize_predicate_expression` to 0. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3107) - -### ClickHouse release 18.12.14, 2018-09-13 {#clickhouse-release-18-12-14-2018-09-13} - -#### New features: {#new-features-3} - -- Added support for `ALTER UPDATE` queries. [\#3035](https://github.com/ClickHouse/ClickHouse/pull/3035) -- Added the `allow_ddl` option, which restricts the user’s access to DDL queries. [\#3104](https://github.com/ClickHouse/ClickHouse/pull/3104) -- Added the `min_merge_bytes_to_use_direct_io` option for `MergeTree` engines, which allows you to set a threshold for the total size of the merge (when above the threshold, data part files will be handled using O\_DIRECT). [\#3117](https://github.com/ClickHouse/ClickHouse/pull/3117) -- The `system.merges` system table now contains the `partition_id` column. [\#3099](https://github.com/ClickHouse/ClickHouse/pull/3099) - -#### Improvements {#improvements-3} - -- If a data part remains unchanged during mutation, it isn’t downloaded by replicas. [\#3103](https://github.com/ClickHouse/ClickHouse/pull/3103) -- Autocomplete is available for names of settings when working with `clickhouse-client`. [\#3106](https://github.com/ClickHouse/ClickHouse/pull/3106) - -#### Bug fixes: {#bug-fixes-12} - -- Added a check for the sizes of arrays that are elements of `Nested` type fields when inserting. [\#3118](https://github.com/ClickHouse/ClickHouse/pull/3118) -- Fixed an error updating external dictionaries with the `ODBC` source and `hashed` storage. This error occurred in version 18.12.13. -- Fixed a crash when creating a temporary table from a query with an `IN` condition. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3098) -- Fixed an error in aggregate functions for arrays that can have `NULL` elements. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/3097) - -### ClickHouse release 18.12.13, 2018-09-10 {#clickhouse-release-18-12-13-2018-09-10} - -#### New features: {#new-features-4} - -- Added the `DECIMAL(digits, scale)` data type (`Decimal32(scale)`, `Decimal64(scale)`, `Decimal128(scale)`). To enable it, use the setting `allow_experimental_decimal_type`. [\#2846](https://github.com/ClickHouse/ClickHouse/pull/2846) [\#2970](https://github.com/ClickHouse/ClickHouse/pull/2970) [\#3008](https://github.com/ClickHouse/ClickHouse/pull/3008) [\#3047](https://github.com/ClickHouse/ClickHouse/pull/3047) -- New `WITH ROLLUP` modifier for `GROUP BY` (alternative syntax: `GROUP BY ROLLUP(...)`). [\#2948](https://github.com/ClickHouse/ClickHouse/pull/2948) -- In queries with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2787) -- Added support for JOIN with table functions. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2907) -- Autocomplete by pressing Tab in clickhouse-client. [Sergey Shcherbin](https://github.com/ClickHouse/ClickHouse/pull/2447) -- Ctrl+C in clickhouse-client clears a query that was entered. [\#2877](https://github.com/ClickHouse/ClickHouse/pull/2877) -- Added the `join_default_strictness` setting (values: `"`, `'any'`, `'all'`). This allows you to not specify `ANY` or `ALL` for `JOIN`. [\#2982](https://github.com/ClickHouse/ClickHouse/pull/2982) -- Each line of the server log related to query processing shows the query ID. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Now you can get query execution logs in clickhouse-client (use the `send_logs_level` setting). With distributed query processing, logs are cascaded from all the servers. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- The `system.query_log` and `system.processes` (`SHOW PROCESSLIST`) tables now have information about all changed settings when you run a query (the nested structure of the `Settings` data). Added the `log_query_settings` setting. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- The `system.query_log` and `system.processes` tables now show information about the number of threads that are participating in query execution (see the `thread_numbers` column). [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Added `ProfileEvents` counters that measure the time spent on reading and writing over the network and reading and writing to disk, the number of network errors, and the time spent waiting when network bandwidth is limited. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Added `ProfileEvents`counters that contain the system metrics from rusage (you can use them to get information about CPU usage in userspace and the kernel, page faults, and context switches), as well as taskstats metrics (use these to obtain information about I/O wait time, CPU wait time, and the amount of data read and recorded, both with and without page cache). [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- The `ProfileEvents` counters are applied globally and for each query, as well as for each query execution thread, which allows you to profile resource consumption by query in detail. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- Added the `system.query_thread_log` table, which contains information about each query execution thread. Added the `log_query_threads` setting. [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) -- The `system.metrics` and `system.events` tables now have built-in documentation. [\#3016](https://github.com/ClickHouse/ClickHouse/pull/3016) -- Added the `arrayEnumerateDense` function. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2975) -- Added the `arrayCumSumNonNegative` and `arrayDifference` functions. [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/2942) -- Added the `retention` aggregate function. [Sundy Li](https://github.com/ClickHouse/ClickHouse/pull/2887) -- Now you can add (merge) states of aggregate functions by using the plus operator, and multiply the states of aggregate functions by a nonnegative constant. [\#3062](https://github.com/ClickHouse/ClickHouse/pull/3062) [\#3034](https://github.com/ClickHouse/ClickHouse/pull/3034) -- Tables in the MergeTree family now have the virtual column `_partition_id`. [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) - -#### Experimental features: {#experimental-features-1} - -- Added the `LowCardinality(T)` data type. This data type automatically creates a local dictionary of values and allows data processing without unpacking the dictionary. [\#2830](https://github.com/ClickHouse/ClickHouse/pull/2830) -- Added a cache of JIT-compiled functions and a counter for the number of uses before compiling. To JIT compile expressions, enable the `compile_expressions` setting. [\#2990](https://github.com/ClickHouse/ClickHouse/pull/2990) [\#3077](https://github.com/ClickHouse/ClickHouse/pull/3077) - -#### Improvements: {#improvements-4} - -- Fixed the problem with unlimited accumulation of the replication log when there are abandoned replicas. Added an effective recovery mode for replicas with a long lag. -- Improved performance of `GROUP BY` with multiple aggregation fields when one of them is string and the others are fixed length. -- Improved performance when using `PREWHERE` and with implicit transfer of expressions in `PREWHERE`. -- Improved parsing performance for text formats (`CSV`, `TSV`). [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2977) [\#2980](https://github.com/ClickHouse/ClickHouse/pull/2980) -- Improved performance of reading strings and arrays in binary formats. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2955) -- Increased performance and reduced memory consumption for queries to `system.tables` and `system.columns` when there is a very large number of tables on a single server. [\#2953](https://github.com/ClickHouse/ClickHouse/pull/2953) -- Fixed a performance problem in the case of a large stream of queries that result in an error (the `_dl_addr` function is visible in `perf top`, but the server isn’t using much CPU). [\#2938](https://github.com/ClickHouse/ClickHouse/pull/2938) -- Conditions are cast into the View (when `enable_optimize_predicate_expression` is enabled). [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2907) -- Improvements to the functionality for the `UUID` data type. [\#3074](https://github.com/ClickHouse/ClickHouse/pull/3074) [\#2985](https://github.com/ClickHouse/ClickHouse/pull/2985) -- The `UUID` data type is supported in The-Alchemist dictionaries. [\#2822](https://github.com/ClickHouse/ClickHouse/pull/2822) -- The `visitParamExtractRaw` function works correctly with nested structures. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2974) -- When the `input_format_skip_unknown_fields` setting is enabled, object fields in `JSONEachRow` format are skipped correctly. [BlahGeek](https://github.com/ClickHouse/ClickHouse/pull/2958) -- For a `CASE` expression with conditions, you can now omit `ELSE`, which is equivalent to `ELSE NULL`. [\#2920](https://github.com/ClickHouse/ClickHouse/pull/2920) -- The operation timeout can now be configured when working with ZooKeeper. [urykhy](https://github.com/ClickHouse/ClickHouse/pull/2971) -- You can specify an offset for `LIMIT n, m` as `LIMIT n OFFSET m`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) -- You can use the `SELECT TOP n` syntax as an alternative for `LIMIT`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) -- Increased the size of the queue to write to system tables, so the `SystemLog parameter queue is full` error doesn’t happen as often. -- The `windowFunnel` aggregate function now supports events that meet multiple conditions. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2801) -- Duplicate columns can be used in a `USING` clause for `JOIN`. [\#3006](https://github.com/ClickHouse/ClickHouse/pull/3006) -- `Pretty` formats now have a limit on column alignment by width. Use the `output_format_pretty_max_column_pad_width` setting. If a value is wider, it will still be displayed in its entirety, but the other cells in the table will not be too wide. [\#3003](https://github.com/ClickHouse/ClickHouse/pull/3003) -- The `odbc` table function now allows you to specify the database/schema name. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2885) -- Added the ability to use a username specified in the `clickhouse-client` config file. [Vladimir Kozbin](https://github.com/ClickHouse/ClickHouse/pull/2909) -- The `ZooKeeperExceptions` counter has been split into three counters: `ZooKeeperUserExceptions`, `ZooKeeperHardwareExceptions`, and `ZooKeeperOtherExceptions`. -- `ALTER DELETE` queries work for materialized views. -- Added randomization when running the cleanup thread periodically for `ReplicatedMergeTree` tables in order to avoid periodic load spikes when there are a very large number of `ReplicatedMergeTree` tables. -- Support for `ATTACH TABLE ... ON CLUSTER` queries. [\#3025](https://github.com/ClickHouse/ClickHouse/pull/3025) - -#### Bug fixes: {#bug-fixes-13} - -- Fixed an issue with `Dictionary` tables (throws the `Size of offsets doesn't match size of column` or `Unknown compression method` exception). This bug appeared in version 18.10.3. [\#2913](https://github.com/ClickHouse/ClickHouse/issues/2913) -- Fixed a bug when merging `CollapsingMergeTree` tables if one of the data parts is empty (these parts are formed during merge or `ALTER DELETE` if all data was deleted), and the `vertical` algorithm was used for the merge. [\#3049](https://github.com/ClickHouse/ClickHouse/pull/3049) -- Fixed a race condition during `DROP` or `TRUNCATE` for `Memory` tables with a simultaneous `SELECT`, which could lead to server crashes. This bug appeared in version 1.1.54388. [\#3038](https://github.com/ClickHouse/ClickHouse/pull/3038) -- Fixed the possibility of data loss when inserting in `Replicated` tables if the `Session is expired` error is returned (data loss can be detected by the `ReplicatedDataLoss` metric). This error occurred in version 1.1.54378. [\#2939](https://github.com/ClickHouse/ClickHouse/pull/2939) [\#2949](https://github.com/ClickHouse/ClickHouse/pull/2949) [\#2964](https://github.com/ClickHouse/ClickHouse/pull/2964) -- Fixed a segfault during `JOIN ... ON`. [\#3000](https://github.com/ClickHouse/ClickHouse/pull/3000) -- Fixed the error searching column names when the `WHERE` expression consists entirely of a qualified column name, such as `WHERE table.column`. [\#2994](https://github.com/ClickHouse/ClickHouse/pull/2994) -- Fixed the “Not found column” error that occurred when executing distributed queries if a single column consisting of an IN expression with a subquery is requested from a remote server. [\#3087](https://github.com/ClickHouse/ClickHouse/pull/3087) -- Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for distributed queries if one of the shards is local and the other is not, and optimization of the move to `PREWHERE` is triggered. [\#2226](https://github.com/ClickHouse/ClickHouse/pull/2226) [\#3037](https://github.com/ClickHouse/ClickHouse/pull/3037) [\#3055](https://github.com/ClickHouse/ClickHouse/pull/3055) [\#3065](https://github.com/ClickHouse/ClickHouse/pull/3065) [\#3073](https://github.com/ClickHouse/ClickHouse/pull/3073) [\#3090](https://github.com/ClickHouse/ClickHouse/pull/3090) [\#3093](https://github.com/ClickHouse/ClickHouse/pull/3093) -- Fixed the `pointInPolygon` function for certain cases of non-convex polygons. [\#2910](https://github.com/ClickHouse/ClickHouse/pull/2910) -- Fixed the incorrect result when comparing `nan` with integers. [\#3024](https://github.com/ClickHouse/ClickHouse/pull/3024) -- Fixed an error in the `zlib-ng` library that could lead to segfault in rare cases. [\#2854](https://github.com/ClickHouse/ClickHouse/pull/2854) -- Fixed a memory leak when inserting into a table with `AggregateFunction` columns, if the state of the aggregate function is not simple (allocates memory separately), and if a single insertion request results in multiple small blocks. [\#3084](https://github.com/ClickHouse/ClickHouse/pull/3084) -- Fixed a race condition when creating and deleting the same `Buffer` or `MergeTree` table simultaneously. -- Fixed the possibility of a segfault when comparing tuples made up of certain non-trivial types, such as tuples. [\#2989](https://github.com/ClickHouse/ClickHouse/pull/2989) -- Fixed the possibility of a segfault when running certain `ON CLUSTER` queries. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2960) -- Fixed an error in the `arrayDistinct` function for `Nullable` array elements. [\#2845](https://github.com/ClickHouse/ClickHouse/pull/2845) [\#2937](https://github.com/ClickHouse/ClickHouse/pull/2937) -- The `enable_optimize_predicate_expression` option now correctly supports cases with `SELECT *`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2929) -- Fixed the segfault when re-initializing the ZooKeeper session. [\#2917](https://github.com/ClickHouse/ClickHouse/pull/2917) -- Fixed potential blocking when working with ZooKeeper. -- Fixed incorrect code for adding nested data structures in a `SummingMergeTree`. -- When allocating memory for states of aggregate functions, alignment is correctly taken into account, which makes it possible to use operations that require alignment when implementing states of aggregate functions. [chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2808) - -#### Security fix: {#security-fix} - -- Safe use of ODBC data sources. Interaction with ODBC drivers uses a separate `clickhouse-odbc-bridge` process. Errors in third-party ODBC drivers no longer cause problems with server stability or vulnerabilities. [\#2828](https://github.com/ClickHouse/ClickHouse/pull/2828) [\#2879](https://github.com/ClickHouse/ClickHouse/pull/2879) [\#2886](https://github.com/ClickHouse/ClickHouse/pull/2886) [\#2893](https://github.com/ClickHouse/ClickHouse/pull/2893) [\#2921](https://github.com/ClickHouse/ClickHouse/pull/2921) -- Fixed incorrect validation of the file path in the `catBoostPool` table function. [\#2894](https://github.com/ClickHouse/ClickHouse/pull/2894) -- The contents of system tables (`tables`, `databases`, `parts`, `columns`, `parts_columns`, `merges`, `mutations`, `replicas`, and `replication_queue`) are filtered according to the user’s configured access to databases (`allow_databases`). [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2856) - -#### Backward incompatible changes: {#backward-incompatible-changes-3} - -- In queries with JOIN, the star character expands to a list of columns in all tables, in compliance with the SQL standard. You can restore the old behavior by setting `asterisk_left_columns_only` to 1 on the user configuration level. - -#### Build changes: {#build-changes-2} - -- Most integration tests can now be run by commit. -- Code style checks can also be run by commit. -- The `memcpy` implementation is chosen correctly when building on CentOS7/Fedora. [Etienne Champetier](https://github.com/ClickHouse/ClickHouse/pull/2912) -- When using clang to build, some warnings from `-Weverything` have been added, in addition to the regular `-Wall-Wextra -Werror`. [\#2957](https://github.com/ClickHouse/ClickHouse/pull/2957) -- Debugging the build uses the `jemalloc` debug option. -- The interface of the library for interacting with ZooKeeper is declared abstract. [\#2950](https://github.com/ClickHouse/ClickHouse/pull/2950) - -## ClickHouse release 18.10 {#clickhouse-release-18-10} - -### ClickHouse release 18.10.3, 2018-08-13 {#clickhouse-release-18-10-3-2018-08-13} - -#### New features: {#new-features-5} - -- HTTPS can be used for replication. [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) -- Added the functions `murmurHash2_64`, `murmurHash3_32`, `murmurHash3_64`, and `murmurHash3_128` in addition to the existing `murmurHash2_32`. [\#2791](https://github.com/ClickHouse/ClickHouse/pull/2791) -- Support for Nullable types in the ClickHouse ODBC driver (`ODBCDriver2` output format). [\#2834](https://github.com/ClickHouse/ClickHouse/pull/2834) -- Support for `UUID` in the key columns. - -#### Improvements: {#improvements-5} - -- Clusters can be removed without restarting the server when they are deleted from the config files. [\#2777](https://github.com/ClickHouse/ClickHouse/pull/2777) -- External dictionaries can be removed without restarting the server when they are removed from config files. [\#2779](https://github.com/ClickHouse/ClickHouse/pull/2779) -- Added `SETTINGS` support for the `Kafka` table engine. [Alexander Marshalov](https://github.com/ClickHouse/ClickHouse/pull/2781) -- Improvements for the `UUID` data type (not yet complete). [\#2618](https://github.com/ClickHouse/ClickHouse/pull/2618) -- Support for empty parts after merges in the `SummingMergeTree`, `CollapsingMergeTree` and `VersionedCollapsingMergeTree` engines. [\#2815](https://github.com/ClickHouse/ClickHouse/pull/2815) -- Old records of completed mutations are deleted (`ALTER DELETE`). [\#2784](https://github.com/ClickHouse/ClickHouse/pull/2784) -- Added the `system.merge_tree_settings` table. [Kirill Shvakov](https://github.com/ClickHouse/ClickHouse/pull/2841) -- The `system.tables` table now has dependency columns: `dependencies_database` and `dependencies_table`. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2851) -- Added the `max_partition_size_to_drop` config option. [\#2782](https://github.com/ClickHouse/ClickHouse/pull/2782) -- Added the `output_format_json_escape_forward_slashes` option. [Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2812) -- Added the `max_fetch_partition_retries_count` setting. [\#2831](https://github.com/ClickHouse/ClickHouse/pull/2831) -- Added the `prefer_localhost_replica` setting for disabling the preference for a local replica and going to a local replica without inter-process interaction. [\#2832](https://github.com/ClickHouse/ClickHouse/pull/2832) -- The `quantileExact` aggregate function returns `nan` in the case of aggregation on an empty `Float32` or `Float64` set. [Sundy Li](https://github.com/ClickHouse/ClickHouse/pull/2855) - -#### Bug fixes: {#bug-fixes-14} - -- Removed unnecessary escaping of the connection string parameters for ODBC, which made it impossible to establish a connection. This error occurred in version 18.6.0. -- Fixed the logic for processing `REPLACE PARTITION` commands in the replication queue. If there are two `REPLACE` commands for the same partition, the incorrect logic could cause one of them to remain in the replication queue and not be executed. [\#2814](https://github.com/ClickHouse/ClickHouse/pull/2814) -- Fixed a merge bug when all data parts were empty (parts that were formed from a merge or from `ALTER DELETE` if all data was deleted). This bug appeared in version 18.1.0. [\#2930](https://github.com/ClickHouse/ClickHouse/pull/2930) -- Fixed an error for concurrent `Set` or `Join`. [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2823) -- Fixed the `Block structure mismatch in UNION stream: different number of columns` error that occurred for `UNION ALL` queries inside a sub-query if one of the `SELECT` queries contains duplicate column names. [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2094) -- Fixed a memory leak if an exception occurred when connecting to a MySQL server. -- Fixed incorrect clickhouse-client response code in case of a query error. -- Fixed incorrect behavior of materialized views containing DISTINCT. [\#2795](https://github.com/ClickHouse/ClickHouse/issues/2795) - -#### Backward incompatible changes {#backward-incompatible-changes-4} - -- Removed support for CHECK TABLE queries for Distributed tables. - -#### Build changes: {#build-changes-3} - -- The allocator has been replaced: `jemalloc` is now used instead of `tcmalloc`. In some scenarios, this increases speed up to 20%. However, there are queries that have slowed by up to 20%. Memory consumption has been reduced by approximately 10% in some scenarios, with improved stability. With highly competitive loads, CPU usage in userspace and in system shows just a slight increase. [\#2773](https://github.com/ClickHouse/ClickHouse/pull/2773) -- Use of libressl from a submodule. [\#1983](https://github.com/ClickHouse/ClickHouse/pull/1983) [\#2807](https://github.com/ClickHouse/ClickHouse/pull/2807) -- Use of unixodbc from a submodule. [\#2789](https://github.com/ClickHouse/ClickHouse/pull/2789) -- Use of mariadb-connector-c from a submodule. [\#2785](https://github.com/ClickHouse/ClickHouse/pull/2785) -- Added functional test files to the repository that depend on the availability of test data (for the time being, without the test data itself). - -## ClickHouse release 18.6 {#clickhouse-release-18-6} - -### ClickHouse release 18.6.0, 2018-08-02 {#clickhouse-release-18-6-0-2018-08-02} - -#### New features: {#new-features-6} - -- Added support for ON expressions for the JOIN ON syntax: - `JOIN ON Expr([table.]column ...) = Expr([table.]column, ...) [AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]` - The expression must be a chain of equalities joined by the AND operator. Each side of the equality can be an arbitrary expression over the columns of one of the tables. The use of fully qualified column names is supported (`table.name`, `database.table.name`, `table_alias.name`, `subquery_alias.name`) for the right table. [\#2742](https://github.com/ClickHouse/ClickHouse/pull/2742) -- HTTPS can be enabled for replication. [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) - -#### Improvements: {#improvements-6} - -- The server passes the patch component of its version to the client. Data about the patch version component is in `system.processes` and `query_log`. [\#2646](https://github.com/ClickHouse/ClickHouse/pull/2646) - -## ClickHouse release 18.5 {#clickhouse-release-18-5} - -### ClickHouse release 18.5.1, 2018-07-31 {#clickhouse-release-18-5-1-2018-07-31} - -#### New features: {#new-features-7} - -- Added the hash function `murmurHash2_32` [\#2756](https://github.com/ClickHouse/ClickHouse/pull/2756). - -#### Improvements: {#improvements-7} - -- Now you can use the `from_env` [\#2741](https://github.com/ClickHouse/ClickHouse/pull/2741) attribute to set values in config files from environment variables. -- Added case-insensitive versions of the `coalesce`, `ifNull`, and `nullIf functions` [\#2752](https://github.com/ClickHouse/ClickHouse/pull/2752). - -#### Bug fixes: {#bug-fixes-15} - -- Fixed a possible bug when starting a replica [\#2759](https://github.com/ClickHouse/ClickHouse/pull/2759). - -## ClickHouse release 18.4 {#clickhouse-release-18-4} - -### ClickHouse release 18.4.0, 2018-07-28 {#clickhouse-release-18-4-0-2018-07-28} - -#### New features: {#new-features-8} - -- Added system tables: `formats`, `data_type_families`, `aggregate_function_combinators`, `table_functions`, `table_engines`, `collations` [\#2721](https://github.com/ClickHouse/ClickHouse/pull/2721). -- Added the ability to use a table function instead of a table as an argument of a `remote` or `cluster table function` [\#2708](https://github.com/ClickHouse/ClickHouse/pull/2708). -- Support for `HTTP Basic` authentication in the replication protocol [\#2727](https://github.com/ClickHouse/ClickHouse/pull/2727). -- The `has` function now allows searching for a numeric value in an array of `Enum` values [Maxim Khrisanfov](https://github.com/ClickHouse/ClickHouse/pull/2699). -- Support for adding arbitrary message separators when reading from `Kafka` [Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2701). - -#### Improvements: {#improvements-8} - -- The `ALTER TABLE t DELETE WHERE` query does not rewrite data parts that were not affected by the WHERE condition [\#2694](https://github.com/ClickHouse/ClickHouse/pull/2694). -- The `use_minimalistic_checksums_in_zookeeper` option for `ReplicatedMergeTree` tables is enabled by default. This setting was added in version 1.1.54378, 2018-04-16. Versions that are older than 1.1.54378 can no longer be installed. -- Support for running `KILL` and `OPTIMIZE` queries that specify `ON CLUSTER` [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2689). - -#### Bug fixes: {#bug-fixes-16} - -- Fixed the error `Column ... is not under an aggregate function and not in GROUP BY` for aggregation with an IN expression. This bug appeared in version 18.1.0. ([bbdd780b](https://github.com/ClickHouse/ClickHouse/commit/bbdd780be0be06a0f336775941cdd536878dd2c2)) -- Fixed a bug in the `windowFunnel aggregate function` [Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2735). -- Fixed a bug in the `anyHeavy` aggregate function ([a2101df2](https://github.com/ClickHouse/ClickHouse/commit/a2101df25a6a0fba99aa71f8793d762af2b801ee)) -- Fixed server crash when using the `countArray()` aggregate function. - -#### Backward incompatible changes: {#backward-incompatible-changes-5} - -- Parameters for `Kafka` engine was changed from `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_schema, kafka_num_consumers])` to `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_row_delimiter, kafka_schema, kafka_num_consumers])`. If your tables use `kafka_schema` or `kafka_num_consumers` parameters, you have to manually edit the metadata files `path/metadata/database/table.sql` and add `kafka_row_delimiter` parameter with `''` value. - -## ClickHouse release 18.1 {#clickhouse-release-18-1} - -### ClickHouse release 18.1.0, 2018-07-23 {#clickhouse-release-18-1-0-2018-07-23} - -#### New features: {#new-features-9} - -- Support for the `ALTER TABLE t DELETE WHERE` query for non-replicated MergeTree tables ([\#2634](https://github.com/ClickHouse/ClickHouse/pull/2634)). -- Support for arbitrary types for the `uniq*` family of aggregate functions ([\#2010](https://github.com/ClickHouse/ClickHouse/issues/2010)). -- Support for arbitrary types in comparison operators ([\#2026](https://github.com/ClickHouse/ClickHouse/issues/2026)). -- The `users.xml` file allows setting a subnet mask in the format `10.0.0.1/255.255.255.0`. This is necessary for using masks for IPv6 networks with zeros in the middle ([\#2637](https://github.com/ClickHouse/ClickHouse/pull/2637)). -- Added the `arrayDistinct` function ([\#2670](https://github.com/ClickHouse/ClickHouse/pull/2670)). -- The SummingMergeTree engine can now work with AggregateFunction type columns ([Constantin S. Pan](https://github.com/ClickHouse/ClickHouse/pull/2566)). - -#### Improvements: {#improvements-9} - -- Changed the numbering scheme for release versions. Now the first part contains the year of release (A.D., Moscow timezone, minus 2000), the second part contains the number for major changes (increases for most releases), and the third part is the patch version. Releases are still backward compatible, unless otherwise stated in the changelog. -- Faster conversions of floating-point numbers to a string ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2664)). -- If some rows were skipped during an insert due to parsing errors (this is possible with the `input_allow_errors_num` and `input_allow_errors_ratio` settings enabled), the number of skipped rows is now written to the server log ([Leonardo Cecchi](https://github.com/ClickHouse/ClickHouse/pull/2669)). - -#### Bug fixes: {#bug-fixes-17} - -- Fixed the TRUNCATE command for temporary tables ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2624)). -- Fixed a rare deadlock in the ZooKeeper client library that occurred when there was a network error while reading the response ([c315200](https://github.com/ClickHouse/ClickHouse/commit/c315200e64b87e44bdf740707fc857d1fdf7e947)). -- Fixed an error during a CAST to Nullable types ([\#1322](https://github.com/ClickHouse/ClickHouse/issues/1322)). -- Fixed the incorrect result of the `maxIntersection()` function when the boundaries of intervals coincided ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2657)). -- Fixed incorrect transformation of the OR expression chain in a function argument ([chenxing-xc](https://github.com/ClickHouse/ClickHouse/pull/2663)). -- Fixed performance degradation for queries containing `IN (subquery)` expressions inside another subquery ([\#2571](https://github.com/ClickHouse/ClickHouse/issues/2571)). -- Fixed incompatibility between servers with different versions in distributed queries that use a `CAST` function that isn’t in uppercase letters ([fe8c4d6](https://github.com/ClickHouse/ClickHouse/commit/fe8c4d64e434cacd4ceef34faa9005129f2190a5)). -- Added missing quoting of identifiers for queries to an external DBMS ([\#2635](https://github.com/ClickHouse/ClickHouse/issues/2635)). - -#### Backward incompatible changes: {#backward-incompatible-changes-6} - -- Converting a string containing the number zero to DateTime does not work. Example: `SELECT toDateTime('0')`. This is also the reason that `DateTime DEFAULT '0'` does not work in tables, as well as `0` in dictionaries. Solution: replace `0` with `0000-00-00 00:00:00`. - -## ClickHouse release 1.1 {#clickhouse-release-1-1} - -### ClickHouse release 1.1.54394, 2018-07-12 {#clickhouse-release-1-1-54394-2018-07-12} - -#### New features: {#new-features-10} - -- Added the `histogram` aggregate function ([Mikhail Surin](https://github.com/ClickHouse/ClickHouse/pull/2521)). -- Now `OPTIMIZE TABLE ... FINAL` can be used without specifying partitions for `ReplicatedMergeTree` ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2600)). - -#### Bug fixes: {#bug-fixes-18} - -- Fixed a problem with a very small timeout for sockets (one second) for reading and writing when sending and downloading replicated data, which made it impossible to download larger parts if there is a load on the network or disk (it resulted in cyclical attempts to download parts). This error occurred in version 1.1.54388. -- Fixed issues when using chroot in ZooKeeper if you inserted duplicate data blocks in the table. -- The `has` function now works correctly for an array with Nullable elements ([\#2115](https://github.com/ClickHouse/ClickHouse/issues/2115)). -- The `system.tables` table now works correctly when used in distributed queries. The `metadata_modification_time` and `engine_full` columns are now non-virtual. Fixed an error that occurred if only these columns were queried from the table. -- Fixed how an empty `TinyLog` table works after inserting an empty data block ([\#2563](https://github.com/ClickHouse/ClickHouse/issues/2563)). -- The `system.zookeeper` table works if the value of the node in ZooKeeper is NULL. - -### ClickHouse release 1.1.54390, 2018-07-06 {#clickhouse-release-1-1-54390-2018-07-06} - -#### New features: {#new-features-11} - -- Queries can be sent in `multipart/form-data` format (in the `query` field), which is useful if external data is also sent for query processing ([Olga Hvostikova](https://github.com/ClickHouse/ClickHouse/pull/2490)). -- Added the ability to enable or disable processing single or double quotes when reading data in CSV format. You can configure this in the `format_csv_allow_single_quotes` and `format_csv_allow_double_quotes` settings ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2574)). -- Now `OPTIMIZE TABLE ... FINAL` can be used without specifying the partition for non-replicated variants of `MergeTree` ([Amos Bird](https://github.com/ClickHouse/ClickHouse/pull/2599)). - -#### Improvements: {#improvements-10} - -- Improved performance, reduced memory consumption, and correct memory consumption tracking with use of the IN operator when a table index could be used ([\#2584](https://github.com/ClickHouse/ClickHouse/pull/2584)). -- Removed redundant checking of checksums when adding a data part. This is important when there are a large number of replicas, because in these cases the total number of checks was equal to N^2. -- Added support for `Array(Tuple(...))` arguments for the `arrayEnumerateUniq` function ([\#2573](https://github.com/ClickHouse/ClickHouse/pull/2573)). -- Added `Nullable` support for the `runningDifference` function ([\#2594](https://github.com/ClickHouse/ClickHouse/pull/2594)). -- Improved query analysis performance when there is a very large number of expressions ([\#2572](https://github.com/ClickHouse/ClickHouse/pull/2572)). -- Faster selection of data parts for merging in `ReplicatedMergeTree` tables. Faster recovery of the ZooKeeper session ([\#2597](https://github.com/ClickHouse/ClickHouse/pull/2597)). -- The `format_version.txt` file for `MergeTree` tables is re-created if it is missing, which makes sense if ClickHouse is launched after copying the directory structure without files ([Ciprian Hacman](https://github.com/ClickHouse/ClickHouse/pull/2593)). - -#### Bug fixes: {#bug-fixes-19} - -- Fixed a bug when working with ZooKeeper that could make it impossible to recover the session and readonly states of tables before restarting the server. -- Fixed a bug when working with ZooKeeper that could result in old nodes not being deleted if the session is interrupted. -- Fixed an error in the `quantileTDigest` function for Float arguments (this bug was introduced in version 1.1.54388) ([Mikhail Surin](https://github.com/ClickHouse/ClickHouse/pull/2553)). -- Fixed a bug in the index for MergeTree tables if the primary key column is located inside the function for converting types between signed and unsigned integers of the same size ([\#2603](https://github.com/ClickHouse/ClickHouse/pull/2603)). -- Fixed segfault if `macros` are used but they aren’t in the config file ([\#2570](https://github.com/ClickHouse/ClickHouse/pull/2570)). -- Fixed switching to the default database when reconnecting the client ([\#2583](https://github.com/ClickHouse/ClickHouse/pull/2583)). -- Fixed a bug that occurred when the `use_index_for_in_with_subqueries` setting was disabled. - -#### Security fix: {#security-fix-1} - -- Sending files is no longer possible when connected to MySQL (`LOAD DATA LOCAL INFILE`). - -### ClickHouse release 1.1.54388, 2018-06-28 {#clickhouse-release-1-1-54388-2018-06-28} - -#### New features: {#new-features-12} - -- Support for the `ALTER TABLE t DELETE WHERE` query for replicated tables. Added the `system.mutations` table to track progress of this type of queries. -- Support for the `ALTER TABLE t [REPLACE|ATTACH] PARTITION` query for \*MergeTree tables. -- Support for the `TRUNCATE TABLE` query ([Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2260)) -- Several new `SYSTEM` queries for replicated tables (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|SENDS REPLICATED|REPLICATION QUEUES]`). -- Added the ability to write to a table with the MySQL engine and the corresponding table function ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2294)). -- Added the `url()` table function and the `URL` table engine ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2501)). -- Added the `windowFunnel` aggregate function ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2352)). -- New `startsWith` and `endsWith` functions for strings ([Vadim Plakhtinsky](https://github.com/ClickHouse/ClickHouse/pull/2429)). -- The `numbers()` table function now allows you to specify the offset ([Winter Zhang](https://github.com/ClickHouse/ClickHouse/pull/2535)). -- The password to `clickhouse-client` can be entered interactively. -- Server logs can now be sent to syslog ([Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2459)). -- Support for logging in dictionaries with a shared library source ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2472)). -- Support for custom CSV delimiters ([Ivan Zhukov](https://github.com/ClickHouse/ClickHouse/pull/2263)) -- Added the `date_time_input_format` setting. If you switch this setting to `'best_effort'`, DateTime values will be read in a wide range of formats. -- Added the `clickhouse-obfuscator` utility for data obfuscation. Usage example: publishing data used in performance tests. - -#### Experimental features: {#experimental-features-2} - -- Added the ability to calculate `and` arguments only where they are needed ([Anastasia Tsarkova](https://github.com/ClickHouse/ClickHouse/pull/2272)) -- JIT compilation to native code is now available for some expressions ([pyos](https://github.com/ClickHouse/ClickHouse/pull/2277)). - -#### Bug fixes: {#bug-fixes-20} - -- Duplicates no longer appear for a query with `DISTINCT` and `ORDER BY`. -- Queries with `ARRAY JOIN` and `arrayFilter` no longer return an incorrect result. -- Fixed an error when reading an array column from a Nested structure ([\#2066](https://github.com/ClickHouse/ClickHouse/issues/2066)). -- Fixed an error when analyzing queries with a HAVING clause like `HAVING tuple IN (...)`. -- Fixed an error when analyzing queries with recursive aliases. -- Fixed an error when reading from ReplacingMergeTree with a condition in PREWHERE that filters all rows ([\#2525](https://github.com/ClickHouse/ClickHouse/issues/2525)). -- User profile settings were not applied when using sessions in the HTTP interface. -- Fixed how settings are applied from the command line parameters in clickhouse-local. -- The ZooKeeper client library now uses the session timeout received from the server. -- Fixed a bug in the ZooKeeper client library when the client waited for the server response longer than the timeout. -- Fixed pruning of parts for queries with conditions on partition key columns ([\#2342](https://github.com/ClickHouse/ClickHouse/issues/2342)). -- Merges are now possible after `CLEAR COLUMN IN PARTITION` ([\#2315](https://github.com/ClickHouse/ClickHouse/issues/2315)). -- Type mapping in the ODBC table function has been fixed ([sundy-li](https://github.com/ClickHouse/ClickHouse/pull/2268)). -- Type comparisons have been fixed for `DateTime` with and without the time zone ([Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2400)). -- Fixed syntactic parsing and formatting of the `CAST` operator. -- Fixed insertion into a materialized view for the Distributed table engine ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2411)). -- Fixed a race condition when writing data from the `Kafka` engine to materialized views ([Yangkuan Liu](https://github.com/ClickHouse/ClickHouse/pull/2448)). -- Fixed SSRF in the remote() table function. -- Fixed exit behavior of `clickhouse-client` in multiline mode ([\#2510](https://github.com/ClickHouse/ClickHouse/issues/2510)). - -#### Improvements: {#improvements-11} - -- Background tasks in replicated tables are now performed in a thread pool instead of in separate threads ([Silviu Caragea](https://github.com/ClickHouse/ClickHouse/pull/1722)). -- Improved LZ4 compression performance. -- Faster analysis for queries with a large number of JOINs and sub-queries. -- The DNS cache is now updated automatically when there are too many network errors. -- Table inserts no longer occur if the insert into one of the materialized views is not possible because it has too many parts. -- Corrected the discrepancy in the event counters `Query`, `SelectQuery`, and `InsertQuery`. -- Expressions like `tuple IN (SELECT tuple)` are allowed if the tuple types match. -- A server with replicated tables can start even if you haven’t configured ZooKeeper. -- When calculating the number of available CPU cores, limits on cgroups are now taken into account ([Atri Sharma](https://github.com/ClickHouse/ClickHouse/pull/2325)). -- Added chown for config directories in the systemd config file ([Mikhail Shiryaev](https://github.com/ClickHouse/ClickHouse/pull/2421)). - -#### Build changes: {#build-changes-4} - -- The gcc8 compiler can be used for builds. -- Added the ability to build llvm from submodule. -- The version of the librdkafka library has been updated to v0.11.4. -- Added the ability to use the system libcpuid library. The library version has been updated to 0.4.0. -- Fixed the build using the vectorclass library ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2274)). -- Cmake now generates files for ninja by default (like when using `-G Ninja`). -- Added the ability to use the libtinfo library instead of libtermcap ([Georgy Kondratiev](https://github.com/ClickHouse/ClickHouse/pull/2519)). -- Fixed a header file conflict in Fedora Rawhide ([\#2520](https://github.com/ClickHouse/ClickHouse/issues/2520)). - -#### Backward incompatible changes: {#backward-incompatible-changes-7} - -- Removed escaping in `Vertical` and `Pretty*` formats and deleted the `VerticalRaw` format. -- If servers with version 1.1.54388 (or newer) and servers with an older version are used simultaneously in a distributed query and the query has the `cast(x, 'Type')` expression without the `AS` keyword and doesn’t have the word `cast` in uppercase, an exception will be thrown with a message like `Not found column cast(0, 'UInt8') in block`. Solution: Update the server on the entire cluster. - -### ClickHouse release 1.1.54385, 2018-06-01 {#clickhouse-release-1-1-54385-2018-06-01} - -#### Bug fixes: {#bug-fixes-21} - -- Fixed an error that in some cases caused ZooKeeper operations to block. - -### ClickHouse release 1.1.54383, 2018-05-22 {#clickhouse-release-1-1-54383-2018-05-22} - -#### Bug fixes: {#bug-fixes-22} - -- Fixed a slowdown of replication queue if a table has many replicas. - -### ClickHouse release 1.1.54381, 2018-05-14 {#clickhouse-release-1-1-54381-2018-05-14} - -#### Bug fixes: {#bug-fixes-23} - -- Fixed a nodes leak in ZooKeeper when ClickHouse loses connection to ZooKeeper server. - -### ClickHouse release 1.1.54380, 2018-04-21 {#clickhouse-release-1-1-54380-2018-04-21} - -#### New features: {#new-features-13} - -- Added the table function `file(path, format, structure)`. An example reading bytes from `/dev/urandom`: ``` ln -s /dev/urandom /var/lib/clickhouse/user_files/random``clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10" ```. - -#### Improvements: {#improvements-12} - -- Subqueries can be wrapped in `()` brackets to enhance query readability. For example: `(SELECT 1) UNION ALL (SELECT 1)`. -- Simple `SELECT` queries from the `system.processes` table are not included in the `max_concurrent_queries` limit. - -#### Bug fixes: {#bug-fixes-24} - -- Fixed incorrect behavior of the `IN` operator when select from `MATERIALIZED VIEW`. -- Fixed incorrect filtering by partition index in expressions like `partition_key_column IN (...)`. -- Fixed inability to execute `OPTIMIZE` query on non-leader replica if `REANAME` was performed on the table. -- Fixed the authorization error when executing `OPTIMIZE` or `ALTER` queries on a non-leader replica. -- Fixed freezing of `KILL QUERY`. -- Fixed an error in ZooKeeper client library which led to loss of watches, freezing of distributed DDL queue, and slowdowns in the replication queue if a non-empty `chroot` prefix is used in the ZooKeeper configuration. - -#### Backward incompatible changes: {#backward-incompatible-changes-8} - -- Removed support for expressions like `(a, b) IN (SELECT (a, b))` (you can use the equivalent expression `(a, b) IN (SELECT a, b)`). In previous releases, these expressions led to undetermined `WHERE` filtering or caused errors. - -### ClickHouse release 1.1.54378, 2018-04-16 {#clickhouse-release-1-1-54378-2018-04-16} - -#### New features: {#new-features-14} - -- Logging level can be changed without restarting the server. -- Added the `SHOW CREATE DATABASE` query. -- The `query_id` can be passed to `clickhouse-client` (elBroom). -- New setting: `max_network_bandwidth_for_all_users`. -- Added support for `ALTER TABLE ... PARTITION ...` for `MATERIALIZED VIEW`. -- Added information about the size of data parts in uncompressed form in the system table. -- Server-to-server encryption support for distributed tables (`1` in the replica config in ``). -- Configuration of the table level for the `ReplicatedMergeTree` family in order to minimize the amount of data stored in Zookeeper: : `use_minimalistic_checksums_in_zookeeper = 1` -- Configuration of the `clickhouse-client` prompt. By default, server names are now output to the prompt. The server’s display name can be changed. It’s also sent in the `X-ClickHouse-Display-Name` HTTP header (Kirill Shvakov). -- Multiple comma-separated `topics` can be specified for the `Kafka` engine (Tobias Adamson) -- When a query is stopped by `KILL QUERY` or `replace_running_query`, the client receives the `Query was canceled` exception instead of an incomplete result. - -#### Improvements: {#improvements-13} - -- `ALTER TABLE ... DROP/DETACH PARTITION` queries are run at the front of the replication queue. -- `SELECT ... FINAL` and `OPTIMIZE ... FINAL` can be used even when the table has a single data part. -- A `query_log` table is recreated on the fly if it was deleted manually (Kirill Shvakov). -- The `lengthUTF8` function runs faster (zhang2014). -- Improved performance of synchronous inserts in `Distributed` tables (`insert_distributed_sync = 1`) when there is a very large number of shards. -- The server accepts the `send_timeout` and `receive_timeout` settings from the client and applies them when connecting to the client (they are applied in reverse order: the server socket’s `send_timeout` is set to the `receive_timeout` value received from the client, and vice versa). -- More robust crash recovery for asynchronous insertion into `Distributed` tables. -- The return type of the `countEqual` function changed from `UInt32` to `UInt64` (谢磊). - -#### Bug fixes: {#bug-fixes-25} - -- Fixed an error with `IN` when the left side of the expression is `Nullable`. -- Correct results are now returned when using tuples with `IN` when some of the tuple components are in the table index. -- The `max_execution_time` limit now works correctly with distributed queries. -- Fixed errors when calculating the size of composite columns in the `system.columns` table. -- Fixed an error when creating a temporary table `CREATE TEMPORARY TABLE IF NOT EXISTS.` -- Fixed errors in `StorageKafka` (\#\#2075) -- Fixed server crashes from invalid arguments of certain aggregate functions. -- Fixed the error that prevented the `DETACH DATABASE` query from stopping background tasks for `ReplicatedMergeTree` tables. -- `Too many parts` state is less likely to happen when inserting into aggregated materialized views (\#\#2084). -- Corrected recursive handling of substitutions in the config if a substitution must be followed by another substitution on the same level. -- Corrected the syntax in the metadata file when creating a `VIEW` that uses a query with `UNION ALL`. -- `SummingMergeTree` now works correctly for summation of nested data structures with a composite key. -- Fixed the possibility of a race condition when choosing the leader for `ReplicatedMergeTree` tables. - -#### Build changes: {#build-changes-5} - -- The build supports `ninja` instead of `make` and uses `ninja` by default for building releases. -- Renamed packages: `clickhouse-server-base` in `clickhouse-common-static`; `clickhouse-server-common` in `clickhouse-server`; `clickhouse-common-dbg` in `clickhouse-common-static-dbg`. To install, use `clickhouse-server clickhouse-client`. Packages with the old names will still load in the repositories for backward compatibility. - -#### Backward incompatible changes: {#backward-incompatible-changes-9} - -- Removed the special interpretation of an IN expression if an array is specified on the left side. Previously, the expression `arr IN (set)` was interpreted as “at least one `arr` element belongs to the `set`”. To get the same behavior in the new version, write `arrayExists(x -> x IN (set), arr)`. -- Disabled the incorrect use of the socket option `SO_REUSEPORT`, which was incorrectly enabled by default in the Poco library. Note that on Linux there is no longer any reason to simultaneously specify the addresses `::` and `0.0.0.0` for listen – use just `::`, which allows listening to the connection both over IPv4 and IPv6 (with the default kernel config settings). You can also revert to the behavior from previous versions by specifying `1` in the config. - -### ClickHouse release 1.1.54370, 2018-03-16 {#clickhouse-release-1-1-54370-2018-03-16} - -#### New features: {#new-features-15} - -- Added the `system.macros` table and auto updating of macros when the config file is changed. -- Added the `SYSTEM RELOAD CONFIG` query. -- Added the `maxIntersections(left_col, right_col)` aggregate function, which returns the maximum number of simultaneously intersecting intervals `[left; right]`. The `maxIntersectionsPosition(left, right)` function returns the beginning of the “maximum” interval. ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2012)). - -#### Improvements: {#improvements-14} - -- When inserting data in a `Replicated` table, fewer requests are made to `ZooKeeper` (and most of the user-level errors have disappeared from the `ZooKeeper` log). -- Added the ability to create aliases for data sets. Example: `WITH (1, 2, 3) AS set SELECT number IN set FROM system.numbers LIMIT 10`. - -#### Bug fixes: {#bug-fixes-26} - -- Fixed the `Illegal PREWHERE` error when reading from Merge tables for `Distributed`tables. -- Added fixes that allow you to start clickhouse-server in IPv4-only Docker containers. -- Fixed a race condition when reading from system `system.parts_columns tables.` -- Removed double buffering during a synchronous insert to a `Distributed` table, which could have caused the connection to timeout. -- Fixed a bug that caused excessively long waits for an unavailable replica before beginning a `SELECT` query. -- Fixed incorrect dates in the `system.parts` table. -- Fixed a bug that made it impossible to insert data in a `Replicated` table if `chroot` was non-empty in the configuration of the `ZooKeeper` cluster. -- Fixed the vertical merging algorithm for an empty `ORDER BY` table. -- Restored the ability to use dictionaries in queries to remote tables, even if these dictionaries are not present on the requestor server. This functionality was lost in release 1.1.54362. -- Restored the behavior for queries like `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` when the right side of the `IN` should use a remote `default.table` instead of a local one. This behavior was broken in version 1.1.54358. -- Removed extraneous error-level logging of `Not found column ... in block`. - -### Clickhouse Release 1.1.54362, 2018-03-11 {#clickhouse-release-1-1-54362-2018-03-11} - -#### New features: {#new-features-16} - -- Aggregation without `GROUP BY` for an empty set (such as `SELECT count(*) FROM table WHERE 0`) now returns a result with one row with null values for aggregate functions, in compliance with the SQL standard. To restore the old behavior (return an empty result), set `empty_result_for_aggregation_by_empty_set` to 1. -- Added type conversion for `UNION ALL`. Different alias names are allowed in `SELECT` positions in `UNION ALL`, in compliance with the SQL standard. -- Arbitrary expressions are supported in `LIMIT BY` clauses. Previously, it was only possible to use columns resulting from `SELECT`. -- An index of `MergeTree` tables is used when `IN` is applied to a tuple of expressions from the columns of the primary key. Example: `WHERE (UserID, EventDate) IN ((123, '2000-01-01'), ...)` (Anastasiya Tsarkova). -- Added the `clickhouse-copier` tool for copying between clusters and resharding data (beta). -- Added consistent hashing functions: `yandexConsistentHash`, `jumpConsistentHash`, `sumburConsistentHash`. They can be used as a sharding key in order to reduce the amount of network traffic during subsequent reshardings. -- Added functions: `arrayAny`, `arrayAll`, `hasAny`, `hasAll`, `arrayIntersect`, `arrayResize`. -- Added the `arrayCumSum` function (Javi Santana). -- Added the `parseDateTimeBestEffort`, `parseDateTimeBestEffortOrZero`, and `parseDateTimeBestEffortOrNull` functions to read the DateTime from a string containing text in a wide variety of possible formats. -- Data can be partially reloaded from external dictionaries during updating (load just the records in which the value of the specified field greater than in the previous download) (Arsen Hakobyan). -- Added the `cluster` table function. Example: `cluster(cluster_name, db, table)`. The `remote` table function can accept the cluster name as the first argument, if it is specified as an identifier. -- The `remote` and `cluster` table functions can be used in `INSERT` queries. -- Added the `create_table_query` and `engine_full` virtual columns to the `system.tables`table . The `metadata_modification_time` column is virtual. -- Added the `data_path` and `metadata_path` columns to `system.tables`and`system.databases` tables, and added the `path` column to the `system.parts` and `system.parts_columns` tables. -- Added additional information about merges in the `system.part_log` table. -- An arbitrary partitioning key can be used for the `system.query_log` table (Kirill Shvakov). -- The `SHOW TABLES` query now also shows temporary tables. Added temporary tables and the `is_temporary` column to `system.tables` (zhang2014). -- Added `DROP TEMPORARY TABLE` and `EXISTS TEMPORARY TABLE` queries (zhang2014). -- Support for `SHOW CREATE TABLE` for temporary tables (zhang2014). -- Added the `system_profile` configuration parameter for the settings used by internal processes. -- Support for loading `object_id` as an attribute in `MongoDB` dictionaries (Pavel Litvinenko). -- Reading `null` as the default value when loading data for an external dictionary with the `MongoDB` source (Pavel Litvinenko). -- Reading `DateTime` values in the `Values` format from a Unix timestamp without single quotes. -- Failover is supported in `remote` table functions for cases when some of the replicas are missing the requested table. -- Configuration settings can be overridden in the command line when you run `clickhouse-server`. Example: `clickhouse-server -- --logger.level=information`. -- Implemented the `empty` function from a `FixedString` argument: the function returns 1 if the string consists entirely of null bytes (zhang2014). -- Added the `listen_try`configuration parameter for listening to at least one of the listen addresses without quitting, if some of the addresses can’t be listened to (useful for systems with disabled support for IPv4 or IPv6). -- Added the `VersionedCollapsingMergeTree` table engine. -- Support for rows and arbitrary numeric types for the `library` dictionary source. -- `MergeTree` tables can be used without a primary key (you need to specify `ORDER BY tuple()`). -- A `Nullable` type can be `CAST` to a non-`Nullable` type if the argument is not `NULL`. -- `RENAME TABLE` can be performed for `VIEW`. -- Added the `throwIf` function. -- Added the `odbc_default_field_size` option, which allows you to extend the maximum size of the value loaded from an ODBC source (by default, it is 1024). -- The `system.processes` table and `SHOW PROCESSLIST` now have the `is_cancelled` and `peak_memory_usage` columns. - -#### Improvements: {#improvements-15} - -- Limits and quotas on the result are no longer applied to intermediate data for `INSERT SELECT` queries or for `SELECT` subqueries. -- Fewer false triggers of `force_restore_data` when checking the status of `Replicated` tables when the server starts. -- Added the `allow_distributed_ddl` option. -- Nondeterministic functions are not allowed in expressions for `MergeTree` table keys. -- Files with substitutions from `config.d` directories are loaded in alphabetical order. -- Improved performance of the `arrayElement` function in the case of a constant multidimensional array with an empty array as one of the elements. Example: `[[1], []][x]`. -- The server starts faster now when using configuration files with very large substitutions (for instance, very large lists of IP networks). -- When running a query, table valued functions run once. Previously, `remote` and `mysql` table valued functions performed the same query twice to retrieve the table structure from a remote server. -- The `MkDocs` documentation generator is used. -- When you try to delete a table column that `DEFAULT`/`MATERIALIZED` expressions of other columns depend on, an exception is thrown (zhang2014). -- Added the ability to parse an empty line in text formats as the number 0 for `Float` data types. This feature was previously available but was lost in release 1.1.54342. -- `Enum` values can be used in `min`, `max`, `sum` and some other functions. In these cases, it uses the corresponding numeric values. This feature was previously available but was lost in the release 1.1.54337. -- Added `max_expanded_ast_elements` to restrict the size of the AST after recursively expanding aliases. - -#### Bug fixes: {#bug-fixes-27} - -- Fixed cases when unnecessary columns were removed from subqueries in error, or not removed from subqueries containing `UNION ALL`. -- Fixed a bug in merges for `ReplacingMergeTree` tables. -- Fixed synchronous insertions in `Distributed` tables (`insert_distributed_sync = 1`). -- Fixed segfault for certain uses of `FULL` and `RIGHT JOIN` with duplicate columns in subqueries. -- Fixed segfault for certain uses of `replace_running_query` and `KILL QUERY`. -- Fixed the order of the `source` and `last_exception` columns in the `system.dictionaries` table. -- Fixed a bug when the `DROP DATABASE` query did not delete the file with metadata. -- Fixed the `DROP DATABASE` query for `Dictionary` databases. -- Fixed the low precision of `uniqHLL12` and `uniqCombined` functions for cardinalities greater than 100 million items (Alex Bocharov). -- Fixed the calculation of implicit default values when necessary to simultaneously calculate default explicit expressions in `INSERT` queries (zhang2014). -- Fixed a rare case when a query to a `MergeTree` table couldn’t finish (chenxing-xc). -- Fixed a crash that occurred when running a `CHECK` query for `Distributed` tables if all shards are local (chenxing.xc). -- Fixed a slight performance regression with functions that use regular expressions. -- Fixed a performance regression when creating multidimensional arrays from complex expressions. -- Fixed a bug that could cause an extra `FORMAT` section to appear in an `.sql` file with metadata. -- Fixed a bug that caused the `max_table_size_to_drop` limit to apply when trying to delete a `MATERIALIZED VIEW` looking at an explicitly specified table. -- Fixed incompatibility with old clients (old clients were sometimes sent data with the `DateTime('timezone')` type, which they do not understand). -- Fixed a bug when reading `Nested` column elements of structures that were added using `ALTER` but that are empty for the old partitions, when the conditions for these columns moved to `PREWHERE`. -- Fixed a bug when filtering tables by virtual `_table` columns in queries to `Merge` tables. -- Fixed a bug when using `ALIAS` columns in `Distributed` tables. -- Fixed a bug that made dynamic compilation impossible for queries with aggregate functions from the `quantile` family. -- Fixed a race condition in the query execution pipeline that occurred in very rare cases when using `Merge` tables with a large number of tables, and when using `GLOBAL` subqueries. -- Fixed a crash when passing arrays of different sizes to an `arrayReduce` function when using aggregate functions from multiple arguments. -- Prohibited the use of queries with `UNION ALL` in a `MATERIALIZED VIEW`. -- Fixed an error during initialization of the `part_log` system table when the server starts (by default, `part_log` is disabled). - -#### Backward incompatible changes: {#backward-incompatible-changes-10} - -- Removed the `distributed_ddl_allow_replicated_alter` option. This behavior is enabled by default. -- Removed the `strict_insert_defaults` setting. If you were using this functionality, write to `clickhouse-feedback@yandex-team.com`. -- Removed the `UnsortedMergeTree` engine. - -### Clickhouse Release 1.1.54343, 2018-02-05 {#clickhouse-release-1-1-54343-2018-02-05} - -- Added macros support for defining cluster names in distributed DDL queries and constructors of Distributed tables: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. -- Now queries like `SELECT ... FROM table WHERE expr IN (subquery)` are processed using the `table` index. -- Improved processing of duplicates when inserting to Replicated tables, so they no longer slow down execution of the replication queue. - -### Clickhouse Release 1.1.54342, 2018-01-22 {#clickhouse-release-1-1-54342-2018-01-22} - -This release contains bug fixes for the previous release 1.1.54337: - -- Fixed a regression in 1.1.54337: if the default user has readonly access, then the server refuses to start up with the message `Cannot create database in readonly mode`. -- Fixed a regression in 1.1.54337: on systems with systemd, logs are always written to syslog regardless of the configuration; the watchdog script still uses init.d. -- Fixed a regression in 1.1.54337: wrong default configuration in the Docker image. -- Fixed nondeterministic behavior of GraphiteMergeTree (you can see it in log messages `Data after merge is not byte-identical to the data on another replicas`). -- Fixed a bug that may lead to inconsistent merges after OPTIMIZE query to Replicated tables (you may see it in log messages `Part ... intersects the previous part`). -- Buffer tables now work correctly when MATERIALIZED columns are present in the destination table (by zhang2014). -- Fixed a bug in implementation of NULL. - -### Clickhouse Release 1.1.54337, 2018-01-18 {#clickhouse-release-1-1-54337-2018-01-18} - -#### New features: {#new-features-17} - -- Added support for storage of multi-dimensional arrays and tuples (`Tuple` data type) in tables. -- Support for table functions for `DESCRIBE` and `INSERT` queries. Added support for subqueries in `DESCRIBE`. Examples: `DESC TABLE remote('host', default.hits)`; `DESC TABLE (SELECT 1)`; `INSERT INTO TABLE FUNCTION remote('host', default.hits)`. Support for `INSERT INTO TABLE` in addition to `INSERT INTO`. -- Improved support for time zones. The `DateTime` data type can be annotated with the timezone that is used for parsing and formatting in text formats. Example: `DateTime('Europe/Moscow')`. When timezones are specified in functions for `DateTime` arguments, the return type will track the timezone, and the value will be displayed as expected. -- Added the functions `toTimeZone`, `timeDiff`, `toQuarter`, `toRelativeQuarterNum`. The `toRelativeHour`/`Minute`/`Second` functions can take a value of type `Date` as an argument. The `now` function name is case-sensitive. -- Added the `toStartOfFifteenMinutes` function (Kirill Shvakov). -- Added the `clickhouse format` tool for formatting queries. -- Added the `format_schema_path` configuration parameter (Marek Vavruşa). It is used for specifying a schema in `Cap'n Proto` format. Schema files can be located only in the specified directory. -- Added support for config substitutions (`incl` and `conf.d`) for configuration of external dictionaries and models (Pavel Yakunin). -- Added a column with documentation for the `system.settings` table (Kirill Shvakov). -- Added the `system.parts_columns` table with information about column sizes in each data part of `MergeTree` tables. -- Added the `system.models` table with information about loaded `CatBoost` machine learning models. -- Added the `mysql` and `odbc` table function and corresponding `MySQL` and `ODBC` table engines for accessing remote databases. This functionality is in the beta stage. -- Added the possibility to pass an argument of type `AggregateFunction` for the `groupArray` aggregate function (so you can create an array of states of some aggregate function). -- Removed restrictions on various combinations of aggregate function combinators. For example, you can use `avgForEachIf` as well as `avgIfForEach` aggregate functions, which have different behaviors. -- The `-ForEach` aggregate function combinator is extended for the case of aggregate functions of multiple arguments. -- Added support for aggregate functions of `Nullable` arguments even for cases when the function returns a non-`Nullable` result (added with the contribution of Silviu Caragea). Example: `groupArray`, `groupUniqArray`, `topK`. -- Added the `max_client_network_bandwidth` for `clickhouse-client` (Kirill Shvakov). -- Users with the `readonly = 2` setting are allowed to work with TEMPORARY tables (CREATE, DROP, INSERT…) (Kirill Shvakov). -- Added support for using multiple consumers with the `Kafka` engine. Extended configuration options for `Kafka` (Marek Vavruša). -- Added the `intExp3` and `intExp4` functions. -- Added the `sumKahan` aggregate function. -- Added the to \* Number\* OrNull functions, where \* Number\* is a numeric type. -- Added support for `WITH` clauses for an `INSERT SELECT` query (author: zhang2014). -- Added settings: `http_connection_timeout`, `http_send_timeout`, `http_receive_timeout`. In particular, these settings are used for downloading data parts for replication. Changing these settings allows for faster failover if the network is overloaded. -- Added support for `ALTER` for tables of type `Null` (Anastasiya Tsarkova). -- The `reinterpretAsString` function is extended for all data types that are stored contiguously in memory. -- Added the `--silent` option for the `clickhouse-local` tool. It suppresses printing query execution info in stderr. -- Added support for reading values of type `Date` from text in a format where the month and/or day of the month is specified using a single digit instead of two digits (Amos Bird). - -#### Performance optimizations: {#performance-optimizations} - -- Improved performance of aggregate functions `min`, `max`, `any`, `anyLast`, `anyHeavy`, `argMin`, `argMax` from string arguments. -- Improved performance of the functions `isInfinite`, `isFinite`, `isNaN`, `roundToExp2`. -- Improved performance of parsing and formatting `Date` and `DateTime` type values in text format. -- Improved performance and precision of parsing floating point numbers. -- Lowered memory usage for `JOIN` in the case when the left and right parts have columns with identical names that are not contained in `USING` . -- Improved performance of aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr` by reducing computational stability. The old functions are available under the names `varSampStable`, `varPopStable`, `stddevSampStable`, `stddevPopStable`, `covarSampStable`, `covarPopStable`, `corrStable`. - -#### Bug fixes: {#bug-fixes-28} - -- Fixed data deduplication after running a `DROP` or `DETACH PARTITION` query. In the previous version, dropping a partition and inserting the same data again was not working because inserted blocks were considered duplicates. -- Fixed a bug that could lead to incorrect interpretation of the `WHERE` clause for `CREATE MATERIALIZED VIEW` queries with `POPULATE` . -- Fixed a bug in using the `root_path` parameter in the `zookeeper_servers` configuration. -- Fixed unexpected results of passing the `Date` argument to `toStartOfDay` . -- Fixed the `addMonths` and `subtractMonths` functions and the arithmetic for `INTERVAL n MONTH` in cases when the result has the previous year. -- Added missing support for the `UUID` data type for `DISTINCT` , `JOIN` , and `uniq` aggregate functions and external dictionaries (Evgeniy Ivanov). Support for `UUID` is still incomplete. -- Fixed `SummingMergeTree` behavior in cases when the rows summed to zero. -- Various fixes for the `Kafka` engine (Marek Vavruša). -- Fixed incorrect behavior of the `Join` table engine (Amos Bird). -- Fixed incorrect allocator behavior under FreeBSD and OS X. -- The `extractAll` function now supports empty matches. -- Fixed an error that blocked usage of `libressl` instead of `openssl` . -- Fixed the `CREATE TABLE AS SELECT` query from temporary tables. -- Fixed non-atomicity of updating the replication queue. This could lead to replicas being out of sync until the server restarts. -- Fixed possible overflow in `gcd` , `lcm` and `modulo` (`%` operator) (Maks Skorokhod). -- `-preprocessed` files are now created after changing `umask` (`umask` can be changed in the config). -- Fixed a bug in the background check of parts (`MergeTreePartChecker` ) when using a custom partition key. -- Fixed parsing of tuples (values of the `Tuple` data type) in text formats. -- Improved error messages about incompatible types passed to `multiIf` , `array` and some other functions. -- Redesigned support for `Nullable` types. Fixed bugs that may lead to a server crash. Fixed almost all other bugs related to `NULL` support: incorrect type conversions in INSERT SELECT, insufficient support for Nullable in HAVING and PREWHERE, `join_use_nulls` mode, Nullable types as arguments of `OR` operator, etc. -- Fixed various bugs related to internal semantics of data types. Examples: unnecessary summing of `Enum` type fields in `SummingMergeTree` ; alignment of `Enum` types in `Pretty` formats, etc. -- Stricter checks for allowed combinations of composite columns. -- Fixed the overflow when specifying a very large parameter for the `FixedString` data type. -- Fixed a bug in the `topK` aggregate function in a generic case. -- Added the missing check for equality of array sizes in arguments of n-ary variants of aggregate functions with an `-Array` combinator. -- Fixed a bug in `--pager` for `clickhouse-client` (author: ks1322). -- Fixed the precision of the `exp10` function. -- Fixed the behavior of the `visitParamExtract` function for better compliance with documentation. -- Fixed the crash when incorrect data types are specified. -- Fixed the behavior of `DISTINCT` in the case when all columns are constants. -- Fixed query formatting in the case of using the `tupleElement` function with a complex constant expression as the tuple element index. -- Fixed a bug in `Dictionary` tables for `range_hashed` dictionaries. -- Fixed a bug that leads to excessive rows in the result of `FULL` and `RIGHT JOIN` (Amos Bird). -- Fixed a server crash when creating and removing temporary files in `config.d` directories during config reload. -- Fixed the `SYSTEM DROP DNS CACHE` query: the cache was flushed but addresses of cluster nodes were not updated. -- Fixed the behavior of `MATERIALIZED VIEW` after executing `DETACH TABLE` for the table under the view (Marek Vavruša). - -#### Build improvements: {#build-improvements-4} - -- The `pbuilder` tool is used for builds. The build process is almost completely independent of the build host environment. -- A single build is used for different OS versions. Packages and binaries have been made compatible with a wide range of Linux systems. -- Added the `clickhouse-test` package. It can be used to run functional tests. -- The source tarball can now be published to the repository. It can be used to reproduce the build without using GitHub. -- Added limited integration with Travis CI. Due to limits on build time in Travis, only the debug build is tested and a limited subset of tests are run. -- Added support for `Cap'n'Proto` in the default build. -- Changed the format of documentation sources from `Restricted Text` to `Markdown`. -- Added support for `systemd` (Vladimir Smirnov). It is disabled by default due to incompatibility with some OS images and can be enabled manually. -- For dynamic code generation, `clang` and `lld` are embedded into the `clickhouse` binary. They can also be invoked as `clickhouse clang` and `clickhouse lld` . -- Removed usage of GNU extensions from the code. Enabled the `-Wextra` option. When building with `clang` the default is `libc++` instead of `libstdc++`. -- Extracted `clickhouse_parsers` and `clickhouse_common_io` libraries to speed up builds of various tools. - -#### Backward incompatible changes: {#backward-incompatible-changes-11} - -- The format for marks in `Log` type tables that contain `Nullable` columns was changed in a backward incompatible way. If you have these tables, you should convert them to the `TinyLog` type before starting up the new server version. To do this, replace `ENGINE = Log` with `ENGINE = TinyLog` in the corresponding `.sql` file in the `metadata` directory. If your table doesn’t have `Nullable` columns or if the type of your table is not `Log`, then you don’t need to do anything. -- Removed the `experimental_allow_extended_storage_definition_syntax` setting. Now this feature is enabled by default. -- The `runningIncome` function was renamed to `runningDifferenceStartingWithFirstvalue` to avoid confusion. -- Removed the `FROM ARRAY JOIN arr` syntax when ARRAY JOIN is specified directly after FROM with no table (Amos Bird). -- Removed the `BlockTabSeparated` format that was used solely for demonstration purposes. -- Changed the state format for aggregate functions `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. If you have stored states of these aggregate functions in tables (using the `AggregateFunction` data type or materialized views with corresponding states), please write to clickhouse-feedback@yandex-team.com. -- In previous server versions there was an undocumented feature: if an aggregate function depends on parameters, you can still specify it without parameters in the AggregateFunction data type. Example: `AggregateFunction(quantiles, UInt64)` instead of `AggregateFunction(quantiles(0.5, 0.9), UInt64)`. This feature was lost. Although it was undocumented, we plan to support it again in future releases. -- Enum data types cannot be used in min/max aggregate functions. This ability will be returned in the next release. - -#### Please note when upgrading: {#please-note-when-upgrading} - -- When doing a rolling update on a cluster, at the point when some of the replicas are running the old version of ClickHouse and some are running the new version, replication is temporarily stopped and the message `unknown parameter 'shard'` appears in the log. Replication will continue after all replicas of the cluster are updated. -- If different versions of ClickHouse are running on the cluster servers, it is possible that distributed queries using the following functions will have incorrect results: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. You should update all cluster nodes. - -## [Changelog for 2017](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2017.md) diff --git a/docs/zh/changelog/2019.md b/docs/zh/changelog/2019.md deleted file mode 100644 index 01a0756af14..00000000000 --- a/docs/zh/changelog/2019.md +++ /dev/null @@ -1,2071 +0,0 @@ ---- -en_copy: true ---- - -## ClickHouse release v19.17 {#clickhouse-release-v19-17} - -### ClickHouse release v19.17.6.36, 2019-12-27 {#clickhouse-release-v19-17-6-36-2019-12-27} - -#### Bug Fix {#bug-fix} - -- Fixed potential buffer overflow in decompress. Malicious user can pass fabricated compressed data that could cause read after buffer. This issue was found by Eldar Zaitov from Yandex information security team. [\#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed possible server crash (`std::terminate`) when the server cannot send or write data in JSON or XML format with values of String data type (that require UTF-8 validation) or when compressing result data with Brotli algorithm or in some other rare cases. [\#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed dictionaries with source from a clickhouse `VIEW`, now reading such dictionaries doesn’t cause the error `There is no query`. [\#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed checking if a client host is allowed by host\_regexp specified in users.xml. [\#8241](https://github.com/ClickHouse/ClickHouse/pull/8241), [\#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([Vitaly Baranov](https://github.com/vitlibar)) -- `RENAME TABLE` for a distributed table now renames the folder containing inserted data before sending to shards. This fixes an issue with successive renames `tableA->tableB`, `tableC->tableA`. [\#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([tavplubix](https://github.com/tavplubix)) -- `range_hashed` external dictionaries created by DDL queries now allow ranges of arbitrary numeric types. [\#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([alesapin](https://github.com/alesapin)) -- Fixed `INSERT INTO table SELECT ... FROM mysql(...)` table function. [\#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([tavplubix](https://github.com/tavplubix)) -- Fixed segfault in `INSERT INTO TABLE FUNCTION file()` while inserting into a file which doesn’t exist. Now in this case file would be created and then insert would be processed. [\#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixed bitmapAnd error when intersecting an aggregated bitmap and a scalar bitmap. [\#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([Yue Huang](https://github.com/moon03432)) -- Fixed segfault when `EXISTS` query was used without `TABLE` or `DICTIONARY` qualifier, just like `EXISTS t`. [\#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed return type for functions `rand` and `randConstant` in case of nullable argument. Now functions always return `UInt32` and never `Nullable(UInt32)`. [\#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed `DROP DICTIONARY IF EXISTS db.dict`, now it doesn’t throw exception if `db` doesn’t exist. [\#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([Vitaly Baranov](https://github.com/vitlibar)) -- If a table wasn’t completely dropped because of server crash, the server will try to restore and load it [\#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([tavplubix](https://github.com/tavplubix)) -- Fixed a trivial count query for a distributed table if there are more than two shard local table. [\#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) -- Fixed bug that lead to a data race in DB::BlockStreamProfileInfo::calculateRowsBeforeLimit() [\#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([Alexander Kazakov](https://github.com/Akazz)) -- Fixed `ALTER table MOVE part` executed immediately after merging the specified part, which could cause moving a part which the specified part merged into. Now it correctly moves the specified part. [\#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Expressions for dictionaries can be specified as strings now. This is useful for calculation of attributes while extracting data from non-ClickHouse sources because it allows to use non-ClickHouse syntax for those expressions. [\#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([alesapin](https://github.com/alesapin)) -- Fixed a very rare race in `clickhouse-copier` because of an overflow in ZXid. [\#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([Ding Xiang Fei](https://github.com/dingxiangfei2009)) -- Fixed the bug when after the query failed (due to “Too many simultaneous queries” for example) it would not read external tables info, and the - next request would interpret this info as the beginning of the next query causing an error like `Unknown packet from client`. [\#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Azat Khuzhin](https://github.com/azat)) -- Avoid null dereference after “Unknown packet X from server” [\#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Azat Khuzhin](https://github.com/azat)) -- Restore support of all ICU locales, add the ability to apply collations for constant expressions and add language name to system.collations table. [\#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([alesapin](https://github.com/alesapin)) -- Number of streams for read from `StorageFile` and `StorageHDFS` is now limited, to avoid exceeding the memory limit. [\#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([alesapin](https://github.com/alesapin)) -- Fixed `CHECK TABLE` query for `*MergeTree` tables without key. [\#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([alesapin](https://github.com/alesapin)) -- Removed the mutation number from a part name in case there were no mutations. This removing improved the compatibility with older versions. [\#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([alesapin](https://github.com/alesapin)) -- Fixed the bug that mutations are skipped for some attached parts due to their data\_version are larger than the table mutation version. [\#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([Zhichang Yu](https://github.com/yuzhichang)) -- Allow starting the server with redundant copies of parts after moving them to another device. [\#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed the error “Sizes of columns doesn’t match” that might appear when using aggregate function columns. [\#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Boris Granveaud](https://github.com/bgranvea)) -- Now an exception will be thrown in case of using WITH TIES alongside LIMIT BY. And now it’s possible to use TOP with LIMIT BY. [\#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Fix dictionary reload if it has `invalidate_query`, which stopped updates and some exception on previous update tries. [\#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([alesapin](https://github.com/alesapin)) - -### ClickHouse release v19.17.4.11, 2019-11-22 {#clickhouse-release-v19-17-4-11-2019-11-22} - -#### Backward Incompatible Change {#backward-incompatible-change} - -- Using column instead of AST to store scalar subquery results for better performance. Setting `enable_scalar_subquery_optimization` was added in 19.17 and it was enabled by default. It leads to errors like [this](https://github.com/ClickHouse/ClickHouse/issues/7851) during upgrade to 19.17.2 or 19.17.3 from previous versions. This setting was disabled by default in 19.17.4, to make possible upgrading from 19.16 and older versions without errors. [\#7392](https://github.com/ClickHouse/ClickHouse/pull/7392) ([Amos Bird](https://github.com/amosbird)) - -#### New Feature {#new-feature} - -- Add the ability to create dictionaries with DDL queries. [\#7360](https://github.com/ClickHouse/ClickHouse/pull/7360) ([alesapin](https://github.com/alesapin)) -- Make `bloom_filter` type of index supporting `LowCardinality` and `Nullable` [\#7363](https://github.com/ClickHouse/ClickHouse/issues/7363) [\#7561](https://github.com/ClickHouse/ClickHouse/pull/7561) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Add function `isValidJSON` to check that passed string is a valid json. [\#5910](https://github.com/ClickHouse/ClickHouse/issues/5910) [\#7293](https://github.com/ClickHouse/ClickHouse/pull/7293) ([Vdimir](https://github.com/Vdimir)) -- Implement `arrayCompact` function [\#7328](https://github.com/ClickHouse/ClickHouse/pull/7328) ([Memo](https://github.com/Joeywzr)) -- Created function `hex` for Decimal numbers. It works like `hex(reinterpretAsString())`, but doesn’t delete last zero bytes. [\#7355](https://github.com/ClickHouse/ClickHouse/pull/7355) ([Mikhail Korotov](https://github.com/millb)) -- Add `arrayFill` and `arrayReverseFill` functions, which replace elements by other elements in front/back of them in the array. [\#7380](https://github.com/ClickHouse/ClickHouse/pull/7380) ([hcz](https://github.com/hczhcz)) -- Add `CRC32IEEE()`/`CRC64()` support [\#7480](https://github.com/ClickHouse/ClickHouse/pull/7480) ([Azat Khuzhin](https://github.com/azat)) -- Implement `char` function similar to one in [mysql](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char) [\#7486](https://github.com/ClickHouse/ClickHouse/pull/7486) ([sundyli](https://github.com/sundy-li)) -- Add `bitmapTransform` function. It transforms an array of values in a bitmap to another array of values, the result is a new bitmap [\#7598](https://github.com/ClickHouse/ClickHouse/pull/7598) ([Zhichang Yu](https://github.com/yuzhichang)) -- Implemented `javaHashUTF16LE()` function [\#7651](https://github.com/ClickHouse/ClickHouse/pull/7651) ([achimbab](https://github.com/achimbab)) -- Add `_shard_num` virtual column for the Distributed engine [\#7624](https://github.com/ClickHouse/ClickHouse/pull/7624) ([Azat Khuzhin](https://github.com/azat)) - -#### Experimental Feature {#experimental-feature} - -- Support for processors (new query execution pipeline) in `MergeTree`. [\#7181](https://github.com/ClickHouse/ClickHouse/pull/7181) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Bug Fix {#bug-fix-1} - -- Fix incorrect float parsing in `Values` [\#7817](https://github.com/ClickHouse/ClickHouse/issues/7817) [\#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([tavplubix](https://github.com/tavplubix)) -- Fix rare deadlock which can happen when trace\_log is enabled. [\#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([filimonov](https://github.com/filimonov)) -- Prevent message duplication when producing Kafka table has any MVs selecting from it [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7)) -- Support for `Array(LowCardinality(Nullable(String)))` in `IN`. Resolves [\#7364](https://github.com/ClickHouse/ClickHouse/issues/7364) [\#7366](https://github.com/ClickHouse/ClickHouse/pull/7366) ([achimbab](https://github.com/achimbab)) -- Add handling of `SQL_TINYINT` and `SQL_BIGINT`, and fix handling of `SQL_FLOAT` data source types in ODBC Bridge. [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) -- Fix aggregation (`avg` and quantiles) over empty decimal columns [\#7431](https://github.com/ClickHouse/ClickHouse/pull/7431) ([Andrey Konyaev](https://github.com/akonyaev90)) -- Fix `INSERT` into Distributed with `MATERIALIZED` columns [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) -- Make `MOVE PARTITION` work if some parts of partition are already on destination disk or volume [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed bug with hardlinks failing to be created during mutations in `ReplicatedMergeTree` in multi-disk configurations. [\#7558](https://github.com/ClickHouse/ClickHouse/pull/7558) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed a bug with a mutation on a MergeTree when whole part remains unchanged and best space is being found on another disk [\#7602](https://github.com/ClickHouse/ClickHouse/pull/7602) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed bug with `keep_free_space_ratio` not being read from disks configuration [\#7645](https://github.com/ClickHouse/ClickHouse/pull/7645) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fix bug with table contains only `Tuple` columns or columns with complex paths. Fixes [7541](https://github.com/ClickHouse/ClickHouse/issues/7541). [\#7545](https://github.com/ClickHouse/ClickHouse/pull/7545) ([alesapin](https://github.com/alesapin)) -- Do not account memory for Buffer engine in max\_memory\_usage limit [\#7552](https://github.com/ClickHouse/ClickHouse/pull/7552) ([Azat Khuzhin](https://github.com/azat)) -- Fix final mark usage in `MergeTree` tables ordered by `tuple()`. In rare cases it could lead to `Can't adjust last granule` error while select. [\#7639](https://github.com/ClickHouse/ClickHouse/pull/7639) ([Anton Popov](https://github.com/CurtizJ)) -- Fix bug in mutations that have predicate with actions that require context (for example functions for json), which may lead to crashes or strange exceptions. [\#7664](https://github.com/ClickHouse/ClickHouse/pull/7664) ([alesapin](https://github.com/alesapin)) -- Fix mismatch of database and table names escaping in `data/` and `shadow/` directories [\#7575](https://github.com/ClickHouse/ClickHouse/pull/7575) ([Alexander Burmak](https://github.com/Alex-Burmak)) -- Support duplicated keys in RIGHT\|FULL JOINs, e.g. `ON t.x = u.x AND t.x = u.y`. Fix crash in this case. [\#7586](https://github.com/ClickHouse/ClickHouse/pull/7586) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix `Not found column in block` when joining on expression with RIGHT or FULL JOIN. [\#7641](https://github.com/ClickHouse/ClickHouse/pull/7641) ([Artem Zuikov](https://github.com/4ertus2)) -- One more attempt to fix infinite loop in `PrettySpace` format [\#7591](https://github.com/ClickHouse/ClickHouse/pull/7591) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fix bug in `concat` function when all arguments were `FixedString` of the same size. [\#7635](https://github.com/ClickHouse/ClickHouse/pull/7635) ([alesapin](https://github.com/alesapin)) -- Fixed exception in case of using 1 argument while defining S3, URL and HDFS storages. [\#7618](https://github.com/ClickHouse/ClickHouse/pull/7618) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fix scope of the InterpreterSelectQuery for views with query [\#7601](https://github.com/ClickHouse/ClickHouse/pull/7601) ([Azat Khuzhin](https://github.com/azat)) - -#### Improvement {#improvement} - -- `Nullable` columns recognized and NULL-values handled correctly by ODBC-bridge [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk)) -- Write current batch for distributed send atomically [\#7600](https://github.com/ClickHouse/ClickHouse/pull/7600) ([Azat Khuzhin](https://github.com/azat)) -- Throw an exception if we cannot detect table for column name in query. [\#7358](https://github.com/ClickHouse/ClickHouse/pull/7358) ([Artem Zuikov](https://github.com/4ertus2)) -- Add `merge_max_block_size` setting to `MergeTreeSettings` [\#7412](https://github.com/ClickHouse/ClickHouse/pull/7412) ([Artem Zuikov](https://github.com/4ertus2)) -- Queries with `HAVING` and without `GROUP BY` assume group by constant. So, `SELECT 1 HAVING 1` now returns a result. [\#7496](https://github.com/ClickHouse/ClickHouse/pull/7496) ([Amos Bird](https://github.com/amosbird)) -- Support parsing `(X,)` as tuple similar to python. [\#7501](https://github.com/ClickHouse/ClickHouse/pull/7501), [\#7562](https://github.com/ClickHouse/ClickHouse/pull/7562) ([Amos Bird](https://github.com/amosbird)) -- Make `range` function behaviors almost like pythonic one. [\#7518](https://github.com/ClickHouse/ClickHouse/pull/7518) ([sundyli](https://github.com/sundy-li)) -- Add `constraints` columns to table `system.settings` [\#7553](https://github.com/ClickHouse/ClickHouse/pull/7553) ([Vitaly Baranov](https://github.com/vitlibar)) -- Better Null format for tcp handler, so that it’s possible to use `select ignore() from table format Null` for perf measure via clickhouse-client [\#7606](https://github.com/ClickHouse/ClickHouse/pull/7606) ([Amos Bird](https://github.com/amosbird)) -- Queries like `CREATE TABLE ... AS (SELECT (1, 2))` are parsed correctly [\#7542](https://github.com/ClickHouse/ClickHouse/pull/7542) ([hcz](https://github.com/hczhcz)) - -#### Performance Improvement {#performance-improvement} - -- The performance of aggregation over short string keys is improved. [\#6243](https://github.com/ClickHouse/ClickHouse/pull/6243) ([Alexander Kuzmenkov](https://github.com/akuzm), [Amos Bird](https://github.com/amosbird)) -- Run another pass of syntax/expression analysis to get potential optimizations after constant predicates are folded. [\#7497](https://github.com/ClickHouse/ClickHouse/pull/7497) ([Amos Bird](https://github.com/amosbird)) -- Use storage meta info to evaluate trivial `SELECT count() FROM table;` [\#7510](https://github.com/ClickHouse/ClickHouse/pull/7510) ([Amos Bird](https://github.com/amosbird), [alexey-milovidov](https://github.com/alexey-milovidov)) -- Vectorize processing `arrayReduce` similar to Aggregator `addBatch`. [\#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([Amos Bird](https://github.com/amosbird)) -- Minor improvements in performance of `Kafka` consumption [\#7475](https://github.com/ClickHouse/ClickHouse/pull/7475) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement} - -- Add support for cross-compiling to the CPU architecture AARCH64. Refactor packager script. [\#7370](https://github.com/ClickHouse/ClickHouse/pull/7370) [\#7539](https://github.com/ClickHouse/ClickHouse/pull/7539) ([Ivan](https://github.com/abyss7)) -- Unpack darwin-x86\_64 and linux-aarch64 toolchains into mounted Docker volume when building packages [\#7534](https://github.com/ClickHouse/ClickHouse/pull/7534) ([Ivan](https://github.com/abyss7)) -- Update Docker Image for Binary Packager [\#7474](https://github.com/ClickHouse/ClickHouse/pull/7474) ([Ivan](https://github.com/abyss7)) -- Fixed compile errors on MacOS Catalina [\#7585](https://github.com/ClickHouse/ClickHouse/pull/7585) ([Ernest Poletaev](https://github.com/ernestp)) -- Some refactoring in query analysis logic: split complex class into several simple ones. [\#7454](https://github.com/ClickHouse/ClickHouse/pull/7454) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix build without submodules [\#7295](https://github.com/ClickHouse/ClickHouse/pull/7295) ([proller](https://github.com/proller)) -- Better `add_globs` in CMake files [\#7418](https://github.com/ClickHouse/ClickHouse/pull/7418) ([Amos Bird](https://github.com/amosbird)) -- Remove hardcoded paths in `unwind` target [\#7460](https://github.com/ClickHouse/ClickHouse/pull/7460) ([Konstantin Podshumok](https://github.com/podshumok)) -- Allow to use mysql format without ssl [\#7524](https://github.com/ClickHouse/ClickHouse/pull/7524) ([proller](https://github.com/proller)) - -#### Other {#other} - -- Added ANTLR4 grammar for ClickHouse SQL dialect [\#7595](https://github.com/ClickHouse/ClickHouse/issues/7595) [\#7596](https://github.com/ClickHouse/ClickHouse/pull/7596) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -## ClickHouse release v19.16 {#clickhouse-release-v19-16} - -#### Clickhouse release v19.16.14.65, 2020-03-25 - -* Fixed up a bug in batched calculations of ternary logical OPs on multiple arguments (more than 10). [#8718](https://github.com/ClickHouse/ClickHouse/pull/8718) ([Alexander Kazakov](https://github.com/Akazz)) This bugfix was backported to version 19.16 by a special request from Altinity. - -#### Clickhouse release v19.16.14.65, 2020-03-05 {#clickhouse-release-v19-16-14-65-2020-03-05} - -- Fix distributed subqueries incompatibility with older CH versions. Fixes [\#7851](https://github.com/ClickHouse/ClickHouse/issues/7851) - [(tabplubix)](https://github.com/tavplubix) -- When executing `CREATE` query, fold constant expressions in storage engine arguments. Replace empty database name with current database. Fixes [\#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [\#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). Also fix check for local address in `ClickHouseDictionarySource`. - [\#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) -- Now background merges in `*MergeTree` table engines family preserve storage policy volume order more accurately. - [\#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Prevent losing data in `Kafka` in rare cases when exception happens after reading suffix but before commit. Fixes [\#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). Related: [\#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) - [\#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(filimonov)](https://github.com/filimonov) -- Fix bug leading to server termination when trying to use / drop `Kafka` table created with wrong parameters. Fixes [\#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). Incorporates [\#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). - [\#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(filimonov)](https://github.com/filimonov) -- Allow using `MaterializedView` with subqueries above `Kafka` tables. - [\#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov)) - -#### New Feature {#new-feature-1} - -- Add `deduplicate_blocks_in_dependent_materialized_views` option to control the behaviour of idempotent inserts into tables with materialized views. This new feature was added to the bugfix release by a special request from Altinity. - [\#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) - -### ClickHouse release v19.16.2.2, 2019-10-30 {#clickhouse-release-v19-16-2-2-2019-10-30} - -#### Backward Incompatible Change {#backward-incompatible-change-1} - -- Add missing arity validation for count/counIf. - [\#7095](https://github.com/ClickHouse/ClickHouse/issues/7095) - [\#7298](https://github.com/ClickHouse/ClickHouse/pull/7298) ([Vdimir](https://github.com/Vdimir)) -- Remove legacy `asterisk_left_columns_only` setting (it was disabled by default). - [\#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([Artem - Zuikov](https://github.com/4ertus2)) -- Format strings for Template data format are now specified in files. - [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) - ([tavplubix](https://github.com/tavplubix)) - -#### New Feature {#new-feature-2} - -- Introduce uniqCombined64() to calculate cardinality greater than UINT\_MAX. - [\#7213](https://github.com/ClickHouse/ClickHouse/pull/7213), - [\#7222](https://github.com/ClickHouse/ClickHouse/pull/7222) ([Azat - Khuzhin](https://github.com/azat)) -- Support Bloom filter indexes on Array columns. - [\#6984](https://github.com/ClickHouse/ClickHouse/pull/6984) - ([achimbab](https://github.com/achimbab)) -- Add a function `getMacro(name)` that returns String with the value of corresponding `` - from server configuration. [\#7240](https://github.com/ClickHouse/ClickHouse/pull/7240) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Set two configuration options for a dictionary based on an HTTP source: `credentials` and - `http-headers`. [\#7092](https://github.com/ClickHouse/ClickHouse/pull/7092) ([Guillaume - Tassery](https://github.com/YiuRULE)) -- Add a new ProfileEvent `Merge` that counts the number of launched background merges. - [\#7093](https://github.com/ClickHouse/ClickHouse/pull/7093) ([Mikhail - Korotov](https://github.com/millb)) -- Add fullHostName function that returns a fully qualified domain name. - [\#7263](https://github.com/ClickHouse/ClickHouse/issues/7263) - [\#7291](https://github.com/ClickHouse/ClickHouse/pull/7291) ([sundyli](https://github.com/sundy-li)) -- Add function `arraySplit` and `arrayReverseSplit` which split an array by “cut off” - conditions. They are useful in time sequence handling. - [\#7294](https://github.com/ClickHouse/ClickHouse/pull/7294) ([hcz](https://github.com/hczhcz)) -- Add new functions that return the Array of all matched indices in multiMatch family of functions. - [\#7299](https://github.com/ClickHouse/ClickHouse/pull/7299) ([Danila - Kutenin](https://github.com/danlark1)) -- Add a new database engine `Lazy` that is optimized for storing a large number of small -Log - tables. [\#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([Nikita - Vasilev](https://github.com/nikvas0)) -- Add aggregate functions groupBitmapAnd, -Or, -Xor for bitmap columns. [\#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Zhichang - Yu](https://github.com/yuzhichang)) -- Add aggregate function combinators -OrNull and -OrDefault, which return null - or default values when there is nothing to aggregate. - [\#7331](https://github.com/ClickHouse/ClickHouse/pull/7331) - ([hcz](https://github.com/hczhcz)) -- Introduce CustomSeparated data format that supports custom escaping and - delimiter rules. [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) - ([tavplubix](https://github.com/tavplubix)) -- Support Redis as source of external dictionary. [\#4361](https://github.com/ClickHouse/ClickHouse/pull/4361) [\#6962](https://github.com/ClickHouse/ClickHouse/pull/6962) ([comunodi](https://github.com/comunodi), [Anton - Popov](https://github.com/CurtizJ)) - -#### Bug Fix {#bug-fix-2} - -- Fix wrong query result if it has `WHERE IN (SELECT ...)` section and `optimize_read_in_order` is - used. [\#7371](https://github.com/ClickHouse/ClickHouse/pull/7371) ([Anton - Popov](https://github.com/CurtizJ)) -- Disabled MariaDB authentication plugin, which depends on files outside of project. - [\#7140](https://github.com/ClickHouse/ClickHouse/pull/7140) ([Yuriy - Baranov](https://github.com/yurriy)) -- Fix exception `Cannot convert column ... because it is constant but values of constants are different in source and result` which could rarely happen when functions `now()`, `today()`, - `yesterday()`, `randConstant()` are used. - [\#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([Nikolai - Kochetov](https://github.com/KochetovNicolai)) -- Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout. - [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily - Nemkov](https://github.com/Enmk)) -- Fixed a segmentation fault in groupBitmapOr (issue [\#7109](https://github.com/ClickHouse/ClickHouse/issues/7109)). - [\#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([Zhichang - Yu](https://github.com/yuzhichang)) -- For materialized views the commit for Kafka is called after all data were written. - [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7)) -- Fixed wrong `duration_ms` value in `system.part_log` table. It was ten times off. - [\#7172](https://github.com/ClickHouse/ClickHouse/pull/7172) ([Vladimir - Chebotarev](https://github.com/excitoon)) -- A quick fix to resolve crash in LIVE VIEW table and re-enabling all LIVE VIEW tests. - [\#7201](https://github.com/ClickHouse/ClickHouse/pull/7201) - ([vzakaznikov](https://github.com/vzakaznikov)) -- Serialize NULL values correctly in min/max indexes of MergeTree parts. - [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Don’t put virtual columns to .sql metadata when table is created as `CREATE TABLE AS`. - [\#7183](https://github.com/ClickHouse/ClickHouse/pull/7183) ([Ivan](https://github.com/abyss7)) -- Fix segmentation fault in `ATTACH PART` query. - [\#7185](https://github.com/ClickHouse/ClickHouse/pull/7185) - ([alesapin](https://github.com/alesapin)) -- Fix wrong result for some queries given by the optimization of empty IN subqueries and empty - INNER/RIGHT JOIN. [\#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([Nikolai - Kochetov](https://github.com/KochetovNicolai)) -- Fixing AddressSanitizer error in the LIVE VIEW getHeader() method. - [\#7271](https://github.com/ClickHouse/ClickHouse/pull/7271) - ([vzakaznikov](https://github.com/vzakaznikov)) - -#### Improvement {#improvement-1} - -- Add a message in case of queue\_wait\_max\_ms wait takes place. - [\#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Azat - Khuzhin](https://github.com/azat)) -- Made setting `s3_min_upload_part_size` table-level. - [\#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([Vladimir - Chebotarev](https://github.com/excitoon)) -- Check TTL in StorageFactory. [\#7304](https://github.com/ClickHouse/ClickHouse/pull/7304) - ([sundyli](https://github.com/sundy-li)) -- Squash left-hand blocks in partial merge join (optimization). - [\#7122](https://github.com/ClickHouse/ClickHouse/pull/7122) ([Artem - Zuikov](https://github.com/4ertus2)) -- Do not allow non-deterministic functions in mutations of Replicated table engines, because this - can introduce inconsistencies between replicas. - [\#7247](https://github.com/ClickHouse/ClickHouse/pull/7247) ([Alexander - Kazakov](https://github.com/Akazz)) -- Disable memory tracker while converting exception stack trace to string. It can prevent the loss - of error messages of type `Memory limit exceeded` on server, which caused the `Attempt to read after eof` exception on client. [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) - ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Miscellaneous format improvements. Resolves - [\#6033](https://github.com/ClickHouse/ClickHouse/issues/6033), - [\#2633](https://github.com/ClickHouse/ClickHouse/issues/2633), - [\#6611](https://github.com/ClickHouse/ClickHouse/issues/6611), - [\#6742](https://github.com/ClickHouse/ClickHouse/issues/6742) - [\#7215](https://github.com/ClickHouse/ClickHouse/pull/7215) - ([tavplubix](https://github.com/tavplubix)) -- ClickHouse ignores values on the right side of IN operator that are not convertible to the left - side type. Make it work properly for compound types – Array and Tuple. - [\#7283](https://github.com/ClickHouse/ClickHouse/pull/7283) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Support missing inequalities for ASOF JOIN. It’s possible to join less-or-equal variant and strict - greater and less variants for ASOF column in ON syntax. - [\#7282](https://github.com/ClickHouse/ClickHouse/pull/7282) ([Artem - Zuikov](https://github.com/4ertus2)) -- Optimize partial merge join. [\#7070](https://github.com/ClickHouse/ClickHouse/pull/7070) - ([Artem Zuikov](https://github.com/4ertus2)) -- Do not use more than 98K of memory in uniqCombined functions. - [\#7236](https://github.com/ClickHouse/ClickHouse/pull/7236), - [\#7270](https://github.com/ClickHouse/ClickHouse/pull/7270) ([Azat - Khuzhin](https://github.com/azat)) -- Flush parts of right-hand joining table on disk in PartialMergeJoin (if there is not enough - memory). Load data back when needed. [\#7186](https://github.com/ClickHouse/ClickHouse/pull/7186) - ([Artem Zuikov](https://github.com/4ertus2)) - -#### Performance Improvement {#performance-improvement-1} - -- Speed up joinGet with const arguments by avoiding data duplication. - [\#7359](https://github.com/ClickHouse/ClickHouse/pull/7359) ([Amos - Bird](https://github.com/amosbird)) -- Return early if the subquery is empty. - [\#7007](https://github.com/ClickHouse/ClickHouse/pull/7007) ([小路](https://github.com/nicelulu)) -- Optimize parsing of SQL expression in Values. - [\#6781](https://github.com/ClickHouse/ClickHouse/pull/6781) - ([tavplubix](https://github.com/tavplubix)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-1} - -- Disable some contribs for cross-compilation to Mac OS. - [\#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([Ivan](https://github.com/abyss7)) -- Add missing linking with PocoXML for clickhouse\_common\_io. - [\#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Azat - Khuzhin](https://github.com/azat)) -- Accept multiple test filter arguments in clickhouse-test. - [\#7226](https://github.com/ClickHouse/ClickHouse/pull/7226) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Enable musl and jemalloc for ARM. [\#7300](https://github.com/ClickHouse/ClickHouse/pull/7300) - ([Amos Bird](https://github.com/amosbird)) -- Added `--client-option` parameter to `clickhouse-test` to pass additional parameters to client. - [\#7277](https://github.com/ClickHouse/ClickHouse/pull/7277) ([Nikolai - Kochetov](https://github.com/KochetovNicolai)) -- Preserve existing configs on rpm package upgrade. - [\#7103](https://github.com/ClickHouse/ClickHouse/pull/7103) - ([filimonov](https://github.com/filimonov)) -- Fix errors detected by PVS. [\#7153](https://github.com/ClickHouse/ClickHouse/pull/7153) ([Artem - Zuikov](https://github.com/4ertus2)) -- Fix build for Darwin. [\#7149](https://github.com/ClickHouse/ClickHouse/pull/7149) - ([Ivan](https://github.com/abyss7)) -- glibc 2.29 compatibility. [\#7142](https://github.com/ClickHouse/ClickHouse/pull/7142) ([Amos - Bird](https://github.com/amosbird)) -- Make sure dh\_clean does not touch potential source files. - [\#7205](https://github.com/ClickHouse/ClickHouse/pull/7205) ([Amos - Bird](https://github.com/amosbird)) -- Attempt to avoid conflict when updating from altinity rpm - it has config file packaged separately - in clickhouse-server-common. [\#7073](https://github.com/ClickHouse/ClickHouse/pull/7073) - ([filimonov](https://github.com/filimonov)) -- Optimize some header files for faster rebuilds. - [\#7212](https://github.com/ClickHouse/ClickHouse/pull/7212), - [\#7231](https://github.com/ClickHouse/ClickHouse/pull/7231) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Add performance tests for Date and DateTime. [\#7332](https://github.com/ClickHouse/ClickHouse/pull/7332) ([Vasily - Nemkov](https://github.com/Enmk)) -- Fix some tests that contained non-deterministic mutations. - [\#7132](https://github.com/ClickHouse/ClickHouse/pull/7132) ([Alexander - Kazakov](https://github.com/Akazz)) -- Add build with MemorySanitizer to CI. [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) - ([Alexander Kuzmenkov](https://github.com/akuzm)) -- Avoid use of uninitialized values in MetricsTransmitter. - [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat - Khuzhin](https://github.com/azat)) -- Fix some issues in Fields found by MemorySanitizer. - [\#7135](https://github.com/ClickHouse/ClickHouse/pull/7135), - [\#7179](https://github.com/ClickHouse/ClickHouse/pull/7179) ([Alexander - Kuzmenkov](https://github.com/akuzm)), [\#7376](https://github.com/ClickHouse/ClickHouse/pull/7376) - ([Amos Bird](https://github.com/amosbird)) -- Fix undefined behavior in murmurhash32. [\#7388](https://github.com/ClickHouse/ClickHouse/pull/7388) ([Amos - Bird](https://github.com/amosbird)) -- Fix undefined behavior in StoragesInfoStream. [\#7384](https://github.com/ClickHouse/ClickHouse/pull/7384) - ([tavplubix](https://github.com/tavplubix)) -- Fixed constant expressions folding for external database engines (MySQL, ODBC, JDBC). In previous - versions it wasn’t working for multiple constant expressions and was not working at all for Date, - DateTime and UUID. This fixes [\#7245](https://github.com/ClickHouse/ClickHouse/issues/7245) - [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixing ThreadSanitizer data race error in the LIVE VIEW when accessing no\_users\_thread variable. - [\#7353](https://github.com/ClickHouse/ClickHouse/pull/7353) - ([vzakaznikov](https://github.com/vzakaznikov)) -- Get rid of malloc symbols in libcommon - [\#7134](https://github.com/ClickHouse/ClickHouse/pull/7134), - [\#7065](https://github.com/ClickHouse/ClickHouse/pull/7065) ([Amos - Bird](https://github.com/amosbird)) -- Add global flag ENABLE\_LIBRARIES for disabling all libraries. - [\#7063](https://github.com/ClickHouse/ClickHouse/pull/7063) - ([proller](https://github.com/proller)) - -#### Code cleanup {#code-cleanup} - -- Generalize configuration repository to prepare for DDL for Dictionaries. [\#7155](https://github.com/ClickHouse/ClickHouse/pull/7155) - ([alesapin](https://github.com/alesapin)) -- Parser for dictionaries DDL without any semantic. - [\#7209](https://github.com/ClickHouse/ClickHouse/pull/7209) - ([alesapin](https://github.com/alesapin)) -- Split ParserCreateQuery into different smaller parsers. - [\#7253](https://github.com/ClickHouse/ClickHouse/pull/7253) - ([alesapin](https://github.com/alesapin)) -- Small refactoring and renaming near external dictionaries. - [\#7111](https://github.com/ClickHouse/ClickHouse/pull/7111) - ([alesapin](https://github.com/alesapin)) -- Refactor some code to prepare for role-based access control. [\#7235](https://github.com/ClickHouse/ClickHouse/pull/7235) ([Vitaly - Baranov](https://github.com/vitlibar)) -- Some improvements in DatabaseOrdinary code. - [\#7086](https://github.com/ClickHouse/ClickHouse/pull/7086) ([Nikita - Vasilev](https://github.com/nikvas0)) -- Do not use iterators in find() and emplace() methods of hash tables. - [\#7026](https://github.com/ClickHouse/ClickHouse/pull/7026) ([Alexander - Kuzmenkov](https://github.com/akuzm)) -- Fix getMultipleValuesFromConfig in case when parameter root is not empty. [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) - ([Mikhail Korotov](https://github.com/millb)) -- Remove some copy-paste (TemporaryFile and TemporaryFileStream) - [\#7166](https://github.com/ClickHouse/ClickHouse/pull/7166) ([Artem - Zuikov](https://github.com/4ertus2)) -- Improved code readability a little bit (`MergeTreeData::getActiveContainingPart`). - [\#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([Vladimir - Chebotarev](https://github.com/excitoon)) -- Wait for all scheduled jobs, which are using local objects, if `ThreadPool::schedule(...)` throws - an exception. Rename `ThreadPool::schedule(...)` to `ThreadPool::scheduleOrThrowOnError(...)` and - fix comments to make obvious that it may throw. - [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) - ([tavplubix](https://github.com/tavplubix)) - -## ClickHouse release 19.15 {#clickhouse-release-19-15} - -### ClickHouse release 19.15.4.10, 2019-10-31 {#clickhouse-release-19-15-4-10-2019-10-31} - -#### Bug Fix {#bug-fix-3} - -- Added handling of SQL\_TINYINT and SQL\_BIGINT, and fix handling of SQL\_FLOAT data source types in ODBC Bridge. - [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) -- Allowed to have some parts on destination disk or volume in MOVE PARTITION. - [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixed NULL-values in nullable columns through ODBC-bridge. - [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk)) -- Fixed INSERT into Distributed non local node with MATERIALIZED columns. - [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) -- Fixed function getMultipleValuesFromConfig. - [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) ([Mikhail Korotov](https://github.com/millb)) -- Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout. - [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily Nemkov](https://github.com/Enmk)) -- Wait for all jobs to finish on exception (fixes rare segfaults). - [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) ([tavplubix](https://github.com/tavplubix)) -- Don’t push to MVs when inserting into Kafka table. - [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7)) -- Disable memory tracker for exception stack. - [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed bad code in transforming query for external database. - [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid use of uninitialized values in MetricsTransmitter. - [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat Khuzhin](https://github.com/azat)) -- Added example config with macros for tests ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.15.3.6, 2019-10-09 {#clickhouse-release-19-15-3-6-2019-10-09} - -#### Bug Fix {#bug-fix-4} - -- Fixed bad\_variant in hashed dictionary. - ([alesapin](https://github.com/alesapin)) -- Fixed up bug with segmentation fault in ATTACH PART query. - ([alesapin](https://github.com/alesapin)) -- Fixed time calculation in `MergeTreeData`. - ([Vladimir Chebotarev](https://github.com/excitoon)) -- Commit to Kafka explicitly after the writing is finalized. - [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7)) -- Serialize NULL values correctly in min/max indexes of MergeTree parts. - [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander Kuzmenkov](https://github.com/akuzm)) - -### ClickHouse release 19.15.2.2, 2019-10-01 {#clickhouse-release-19-15-2-2-2019-10-01} - -#### New Feature {#new-feature-3} - -- Tiered storage: support to use multiple storage volumes for tables with MergeTree engine. It’s possible to store fresh data on SSD and automatically move old data to HDD. ([example](https://clickhouse.github.io/clickhouse-presentations/meetup30/new_features/#12)). [\#4918](https://github.com/ClickHouse/ClickHouse/pull/4918) ([Igr](https://github.com/ObjatieGroba)) [\#6489](https://github.com/ClickHouse/ClickHouse/pull/6489) ([alesapin](https://github.com/alesapin)) -- Add table function `input` for reading incoming data in `INSERT SELECT` query. [\#5450](https://github.com/ClickHouse/ClickHouse/pull/5450) ([palasonic1](https://github.com/palasonic1)) [\#6832](https://github.com/ClickHouse/ClickHouse/pull/6832) ([Anton Popov](https://github.com/CurtizJ)) -- Add a `sparse_hashed` dictionary layout, that is functionally equivalent to the `hashed` layout, but is more memory efficient. It uses about twice as less memory at the cost of slower value retrieval. [\#6894](https://github.com/ClickHouse/ClickHouse/pull/6894) ([Azat Khuzhin](https://github.com/azat)) -- Implement ability to define list of users for access to dictionaries. Only current connected database using. [\#6907](https://github.com/ClickHouse/ClickHouse/pull/6907) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Add `LIMIT` option to `SHOW` query. [\#6944](https://github.com/ClickHouse/ClickHouse/pull/6944) ([Philipp Malkovsky](https://github.com/malkfilipp)) -- Add `bitmapSubsetLimit(bitmap, range_start, limit)` function, that returns subset of the smallest `limit` values in set that is no smaller than `range_start`. [\#6957](https://github.com/ClickHouse/ClickHouse/pull/6957) ([Zhichang Yu](https://github.com/yuzhichang)) -- Add `bitmapMin` and `bitmapMax` functions. [\#6970](https://github.com/ClickHouse/ClickHouse/pull/6970) ([Zhichang Yu](https://github.com/yuzhichang)) -- Add function `repeat` related to [issue-6648](https://github.com/ClickHouse/ClickHouse/issues/6648) [\#6999](https://github.com/ClickHouse/ClickHouse/pull/6999) ([flynn](https://github.com/ucasFL)) - -#### Experimental Feature {#experimental-feature-1} - -- Implement (in memory) Merge Join variant that does not change current pipeline. Result is partially sorted by merge key. Set `partial_merge_join = 1` to use this feature. The Merge Join is still in development. [\#6940](https://github.com/ClickHouse/ClickHouse/pull/6940) ([Artem Zuikov](https://github.com/4ertus2)) -- Add `S3` engine and table function. It is still in development (no authentication support yet). [\#5596](https://github.com/ClickHouse/ClickHouse/pull/5596) ([Vladimir Chebotarev](https://github.com/excitoon)) - -#### Improvement {#improvement-2} - -- Every message read from Kafka is inserted atomically. This resolves almost all known issues with Kafka engine. [\#6950](https://github.com/ClickHouse/ClickHouse/pull/6950) ([Ivan](https://github.com/abyss7)) -- Improvements for failover of Distributed queries. Shorten recovery time, also it is now configurable and can be seen in `system.clusters`. [\#6399](https://github.com/ClickHouse/ClickHouse/pull/6399) ([Vasily Nemkov](https://github.com/Enmk)) -- Support numeric values for Enums directly in `IN` section. \#6766 [\#6941](https://github.com/ClickHouse/ClickHouse/pull/6941) ([dimarub2000](https://github.com/dimarub2000)) -- Support (optional, disabled by default) redirects on URL storage. [\#6914](https://github.com/ClickHouse/ClickHouse/pull/6914) ([maqroll](https://github.com/maqroll)) -- Add information message when client with an older version connects to a server. [\#6893](https://github.com/ClickHouse/ClickHouse/pull/6893) ([Philipp Malkovsky](https://github.com/malkfilipp)) -- Remove maximum backoff sleep time limit for sending data in Distributed tables [\#6895](https://github.com/ClickHouse/ClickHouse/pull/6895) ([Azat Khuzhin](https://github.com/azat)) -- Add ability to send profile events (counters) with cumulative values to graphite. It can be enabled under `` in server `config.xml`. [\#6969](https://github.com/ClickHouse/ClickHouse/pull/6969) ([Azat Khuzhin](https://github.com/azat)) -- Add automatically cast type `T` to `LowCardinality(T)` while inserting data in column of type `LowCardinality(T)` in Native format via HTTP. [\#6891](https://github.com/ClickHouse/ClickHouse/pull/6891) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Add ability to use function `hex` without using `reinterpretAsString` for `Float32`, `Float64`. [\#7024](https://github.com/ClickHouse/ClickHouse/pull/7024) ([Mikhail Korotov](https://github.com/millb)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-2} - -- Add gdb-index to clickhouse binary with debug info. It will speed up startup time of `gdb`. [\#6947](https://github.com/ClickHouse/ClickHouse/pull/6947) ([alesapin](https://github.com/alesapin)) -- Speed up deb packaging with patched dpkg-deb which uses `pigz`. [\#6960](https://github.com/ClickHouse/ClickHouse/pull/6960) ([alesapin](https://github.com/alesapin)) -- Set `enable_fuzzing = 1` to enable libfuzzer instrumentation of all the project code. [\#7042](https://github.com/ClickHouse/ClickHouse/pull/7042) ([kyprizel](https://github.com/kyprizel)) -- Add split build smoke test in CI. [\#7061](https://github.com/ClickHouse/ClickHouse/pull/7061) ([alesapin](https://github.com/alesapin)) -- Add build with MemorySanitizer to CI. [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) ([Alexander Kuzmenkov](https://github.com/akuzm)) -- Replace `libsparsehash` with `sparsehash-c11` [\#6965](https://github.com/ClickHouse/ClickHouse/pull/6965) ([Azat Khuzhin](https://github.com/azat)) - -#### Bug Fix {#bug-fix-5} - -- Fixed performance degradation of index analysis on complex keys on large tables. This fixes \#6924. [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix logical error causing segfaults when selecting from Kafka empty topic. [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Ivan](https://github.com/abyss7)) -- Fix too early MySQL connection close in `MySQLBlockInputStream.cpp`. [\#6882](https://github.com/ClickHouse/ClickHouse/pull/6882) ([Clément Rodriguez](https://github.com/clemrodriguez)) -- Returned support for very old Linux kernels (fix [\#6841](https://github.com/ClickHouse/ClickHouse/issues/6841)) [\#6853](https://github.com/ClickHouse/ClickHouse/pull/6853) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix possible data loss in `insert select` query in case of empty block in input stream. \#6834 \#6862 [\#6911](https://github.com/ClickHouse/ClickHouse/pull/6911) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) -- Fix complex queries with array joins and global subqueries. [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Ivan](https://github.com/abyss7)) -- Fix `Unknown identifier` error in ORDER BY and GROUP BY with multiple JOINs [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed `MSan` warning while executing function with `LowCardinality` argument. [\#7062](https://github.com/ClickHouse/ClickHouse/pull/7062) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Backward Incompatible Change {#backward-incompatible-change-2} - -- Changed serialization format of bitmap\* aggregate function states to improve performance. Serialized states of bitmap\* from previous versions cannot be read. [\#6908](https://github.com/ClickHouse/ClickHouse/pull/6908) ([Zhichang Yu](https://github.com/yuzhichang)) - -## ClickHouse release 19.14 {#clickhouse-release-19-14} - -### ClickHouse release 19.14.7.15, 2019-10-02 {#clickhouse-release-19-14-7-15-2019-10-02} - -#### Bug Fix {#bug-fix-6} - -- This release also contains all bug fixes from 19.11.12.69. -- Fixed compatibility for distributed queries between 19.14 and earlier versions. This fixes [\#7068](https://github.com/ClickHouse/ClickHouse/issues/7068). [\#7069](https://github.com/ClickHouse/ClickHouse/pull/7069) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.14.6.12, 2019-09-19 {#clickhouse-release-19-14-6-12-2019-09-19} - -#### Bug Fix {#bug-fix-7} - -- Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params. [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) -- Fixed subquery name in queries with `ARRAY JOIN` and `GLOBAL IN subquery` with alias. Use subquery alias for external table name if it is specified. [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-3} - -- Fix [flapping](https://clickhouse-test-reports.s3.yandex.net/6944/aab95fd5175a513413c7395a73a82044bdafb906/functional_stateless_tests_(debug).html) test `00715_fetch_merged_or_mutated_part_zookeeper` by rewriting it to a shell scripts because it needs to wait for mutations to apply. [\#6977](https://github.com/ClickHouse/ClickHouse/pull/6977) ([Alexander Kazakov](https://github.com/Akazz)) -- Fixed UBSan and MemSan failure in function `groupUniqArray` with emtpy array argument. It was caused by placing of empty `PaddedPODArray` into hash table zero cell because constructor for zero cell value was not called. [\#6937](https://github.com/ClickHouse/ClickHouse/pull/6937) ([Amos Bird](https://github.com/amosbird)) - -### ClickHouse release 19.14.3.3, 2019-09-10 {#clickhouse-release-19-14-3-3-2019-09-10} - -#### New Feature {#new-feature-4} - -- `WITH FILL` modifier for `ORDER BY`. (continuation of [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Anton Popov](https://github.com/CurtizJ)) -- `WITH TIES` modifier for `LIMIT`. (continuation of [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([Anton Popov](https://github.com/CurtizJ)) -- Parse unquoted `NULL` literal as NULL (if setting `format_csv_unquoted_null_literal_as_null=1`). Initialize null fields with default values if data type of this field is not nullable (if setting `input_format_null_as_default=1`). [\#5990](https://github.com/ClickHouse/ClickHouse/issues/5990) [\#6055](https://github.com/ClickHouse/ClickHouse/pull/6055) ([tavplubix](https://github.com/tavplubix)) -- Support for wildcards in paths of table functions `file` and `hdfs`. If the path contains wildcards, the table will be readonly. Example of usage: `select * from hdfs('hdfs://hdfs1:9000/some_dir/another_dir/*/file{0..9}{0..9}')` and `select * from file('some_dir/{some_file,another_file,yet_another}.tsv', 'TSV', 'value UInt32')`. [\#6092](https://github.com/ClickHouse/ClickHouse/pull/6092) ([Olga Khvostikova](https://github.com/stavrolia)) -- New `system.metric_log` table which stores values of `system.events` and `system.metrics` with specified time interval. [\#6363](https://github.com/ClickHouse/ClickHouse/issues/6363) [\#6467](https://github.com/ClickHouse/ClickHouse/pull/6467) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) [\#6530](https://github.com/ClickHouse/ClickHouse/pull/6530) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow to write ClickHouse text logs to `system.text_log` table. [\#6037](https://github.com/ClickHouse/ClickHouse/issues/6037) [\#6103](https://github.com/ClickHouse/ClickHouse/pull/6103) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) [\#6164](https://github.com/ClickHouse/ClickHouse/pull/6164) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Show private symbols in stack traces (this is done via parsing symbol tables of ELF files). Added information about file and line number in stack traces if debug info is present. Speedup symbol name lookup with indexing symbols present in program. Added new SQL functions for introspection: `demangle` and `addressToLine`. Renamed function `symbolizeAddress` to `addressToSymbol` for consistency. Function `addressToSymbol` will return mangled name for performance reasons and you have to apply `demangle`. Added setting `allow_introspection_functions` which is turned off by default. [\#6201](https://github.com/ClickHouse/ClickHouse/pull/6201) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Table function `values` (the name is case-insensitive). It allows to read from `VALUES` list proposed in [\#5984](https://github.com/ClickHouse/ClickHouse/issues/5984). Example: `SELECT * FROM VALUES('a UInt64, s String', (1, 'one'), (2, 'two'), (3, 'three'))`. [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) -- Added an ability to alter storage settings. Syntax: `ALTER TABLE MODIFY SETTING = `. [\#6366](https://github.com/ClickHouse/ClickHouse/pull/6366) [\#6669](https://github.com/ClickHouse/ClickHouse/pull/6669) [\#6685](https://github.com/ClickHouse/ClickHouse/pull/6685) ([alesapin](https://github.com/alesapin)) -- Support for removing of detached parts. Syntax: `ALTER TABLE DROP DETACHED PART ''`. [\#6158](https://github.com/ClickHouse/ClickHouse/pull/6158) ([tavplubix](https://github.com/tavplubix)) -- Table constraints. Allows to add constraint to table definition which will be checked at insert. [\#5273](https://github.com/ClickHouse/ClickHouse/pull/5273) ([Gleb Novikov](https://github.com/NanoBjorn)) [\#6652](https://github.com/ClickHouse/ClickHouse/pull/6652) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Suppport for cascaded materialized views. [\#6324](https://github.com/ClickHouse/ClickHouse/pull/6324) ([Amos Bird](https://github.com/amosbird)) -- Turn on query profiler by default to sample every query execution thread once a second. [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Input format `ORC`. [\#6454](https://github.com/ClickHouse/ClickHouse/pull/6454) [\#6703](https://github.com/ClickHouse/ClickHouse/pull/6703) ([akonyaev90](https://github.com/akonyaev90)) -- Added two new functions: `sigmoid` and `tanh` (that are useful for machine learning applications). [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Function `hasToken(haystack, token)`, `hasTokenCaseInsensitive(haystack, token)` to check if given token is in haystack. Token is a maximal length substring between two non alphanumeric ASCII characters (or boundaries of haystack). Token must be a constant string. Supported by tokenbf\_v1 index specialization. [\#6596](https://github.com/ClickHouse/ClickHouse/pull/6596), [\#6662](https://github.com/ClickHouse/ClickHouse/pull/6662) ([Vasily Nemkov](https://github.com/Enmk)) -- New function `neighbor(value, offset[, default_value])`. Allows to reach prev/next value within column in a block of data. [\#5925](https://github.com/ClickHouse/ClickHouse/pull/5925) ([Alex Krash](https://github.com/alex-krash)) [6685365ab8c5b74f9650492c88a012596eb1b0c6](https://github.com/ClickHouse/ClickHouse/commit/6685365ab8c5b74f9650492c88a012596eb1b0c6) [341e2e4587a18065c2da1ca888c73389f48ce36c](https://github.com/ClickHouse/ClickHouse/commit/341e2e4587a18065c2da1ca888c73389f48ce36c) [Alexey Milovidov](https://github.com/alexey-milovidov) -- Created a function `currentUser()`, returning login of authorized user. Added alias `user()` for compatibility with MySQL. [\#6470](https://github.com/ClickHouse/ClickHouse/pull/6470) ([Alex Krash](https://github.com/alex-krash)) -- New aggregate functions `quantilesExactInclusive` and `quantilesExactExclusive` which were proposed in [\#5885](https://github.com/ClickHouse/ClickHouse/issues/5885). [\#6477](https://github.com/ClickHouse/ClickHouse/pull/6477) ([dimarub2000](https://github.com/dimarub2000)) -- Function `bitmapRange(bitmap, range_begin, range_end)` which returns new set with specified range (not include the `range_end`). [\#6314](https://github.com/ClickHouse/ClickHouse/pull/6314) ([Zhichang Yu](https://github.com/yuzhichang)) -- Function `geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)` which creates array of precision-long strings of geohash-boxes covering provided area. [\#6127](https://github.com/ClickHouse/ClickHouse/pull/6127) ([Vasily Nemkov](https://github.com/Enmk)) -- Implement support for INSERT query with `Kafka` tables. [\#6012](https://github.com/ClickHouse/ClickHouse/pull/6012) ([Ivan](https://github.com/abyss7)) -- Added support for `_partition` and `_timestamp` virtual columns to Kafka engine. [\#6400](https://github.com/ClickHouse/ClickHouse/pull/6400) ([Ivan](https://github.com/abyss7)) -- Possibility to remove sensitive data from `query_log`, server logs, process list with regexp-based rules. [\#5710](https://github.com/ClickHouse/ClickHouse/pull/5710) ([filimonov](https://github.com/filimonov)) - -#### Experimental Feature {#experimental-feature-2} - -- Input and output data format `Template`. It allows to specify custom format string for input and output. [\#4354](https://github.com/ClickHouse/ClickHouse/issues/4354) [\#6727](https://github.com/ClickHouse/ClickHouse/pull/6727) ([tavplubix](https://github.com/tavplubix)) -- Implementation of `LIVE VIEW` tables that were originally proposed in [\#2898](https://github.com/ClickHouse/ClickHouse/pull/2898), prepared in [\#3925](https://github.com/ClickHouse/ClickHouse/issues/3925), and then updated in [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541). See [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) for detailed description. [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) ([vzakaznikov](https://github.com/vzakaznikov)) [\#6425](https://github.com/ClickHouse/ClickHouse/pull/6425) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) [\#6656](https://github.com/ClickHouse/ClickHouse/pull/6656) ([vzakaznikov](https://github.com/vzakaznikov)) Note that `LIVE VIEW` feature may be removed in next versions. - -#### Bug Fix {#bug-fix-8} - -- This release also contains all bug fixes from 19.13 and 19.11. -- Fix segmentation fault when the table has skip indices and vertical merge happens. [\#6723](https://github.com/ClickHouse/ClickHouse/pull/6723) ([alesapin](https://github.com/alesapin)) -- Fix per-column TTL with non-trivial column defaults. Previously in case of force TTL merge with `OPTIMIZE ... FINAL` query, expired values was replaced by type defaults instead of user-specified column defaults. [\#6796](https://github.com/ClickHouse/ClickHouse/pull/6796) ([Anton Popov](https://github.com/CurtizJ)) -- Fix Kafka messages duplication problem on normal server restart. [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Ivan](https://github.com/abyss7)) -- Fixed infinite loop when reading Kafka messages. Do not pause/resume consumer on subscription at all - otherwise it may get paused indefinitely in some scenarios. [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([Ivan](https://github.com/abyss7)) -- Fix `Key expression contains comparison between inconvertible types` exception in `bitmapContains` function. [\#6136](https://github.com/ClickHouse/ClickHouse/issues/6136) [\#6146](https://github.com/ClickHouse/ClickHouse/issues/6146) [\#6156](https://github.com/ClickHouse/ClickHouse/pull/6156) ([dimarub2000](https://github.com/dimarub2000)) -- Fix segfault with enabled `optimize_skip_unused_shards` and missing sharding key. [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Removed extra verbose logging in MySQL interface [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Return the ability to parse boolean settings from ‘true’ and ‘false’ in the configuration file. [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) -- Fix crash in `quantile` and `median` function over `Nullable(Decimal128)`. [\#6378](https://github.com/ClickHouse/ClickHouse/pull/6378) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed possible incomplete result returned by `SELECT` query with `WHERE` condition on primary key contained conversion to Float type. It was caused by incorrect checking of monotonicity in `toFloat` function. [\#6248](https://github.com/ClickHouse/ClickHouse/issues/6248) [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) -- Check `max_expanded_ast_elements` setting for mutations. Clear mutations after `TRUNCATE TABLE`. [\#6205](https://github.com/ClickHouse/ClickHouse/pull/6205) ([Winter Zhang](https://github.com/zhang2014)) -- Fix JOIN results for key columns when used with `join_use_nulls`. Attach Nulls instead of columns defaults. [\#6249](https://github.com/ClickHouse/ClickHouse/pull/6249) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix for skip indices with vertical merge and alter. Fix for `Bad size of marks file` exception. [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594) [\#6713](https://github.com/ClickHouse/ClickHouse/pull/6713) ([alesapin](https://github.com/alesapin)) -- Fix rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows) [\#6746](https://github.com/ClickHouse/ClickHouse/issues/6746) [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin)) -- Fixed bug in conversion of `LowCardinality` types in `AggregateFunctionFactory`. This fixes [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix wrong behavior and possible segfaults in `topK` and `topKWeighted` aggregated functions. [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed unsafe code around `getIdentifier` function. [\#6401](https://github.com/ClickHouse/ClickHouse/issues/6401) [\#6409](https://github.com/ClickHouse/ClickHouse/pull/6409) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed bug in MySQL wire protocol (is used while connecting to ClickHouse form MySQL client). Caused by heap buffer overflow in `PacketPayloadWriteBuffer`. [\#6212](https://github.com/ClickHouse/ClickHouse/pull/6212) ([Yuriy Baranov](https://github.com/yurriy)) -- Fixed memory leak in `bitmapSubsetInRange` function. [\#6819](https://github.com/ClickHouse/ClickHouse/pull/6819) ([Zhichang Yu](https://github.com/yuzhichang)) -- Fix rare bug when mutation executed after granularity change. [\#6816](https://github.com/ClickHouse/ClickHouse/pull/6816) ([alesapin](https://github.com/alesapin)) -- Allow protobuf message with all fields by default. [\#6132](https://github.com/ClickHouse/ClickHouse/pull/6132) ([Vitaly Baranov](https://github.com/vitlibar)) -- Resolve a bug with `nullIf` function when we send a `NULL` argument on the second argument. [\#6446](https://github.com/ClickHouse/ClickHouse/pull/6446) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Fix rare bug with wrong memory allocation/deallocation in complex key cache dictionaries with string fields which leads to infinite memory consumption (looks like memory leak). Bug reproduces when string size was a power of two starting from eight (8, 16, 32, etc). [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([alesapin](https://github.com/alesapin)) -- Fixed Gorilla encoding on small sequences which caused exception `Cannot write after end of buffer`. [\#6398](https://github.com/ClickHouse/ClickHouse/issues/6398) [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Vasily Nemkov](https://github.com/Enmk)) -- Allow to use not nullable types in JOINs with `join_use_nulls` enabled. [\#6705](https://github.com/ClickHouse/ClickHouse/pull/6705) ([Artem Zuikov](https://github.com/4ertus2)) -- Disable `Poco::AbstractConfiguration` substitutions in query in `clickhouse-client`. [\#6706](https://github.com/ClickHouse/ClickHouse/pull/6706) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid deadlock in `REPLACE PARTITION`. [\#6677](https://github.com/ClickHouse/ClickHouse/pull/6677) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Using `arrayReduce` for constant arguments may lead to segfault. [\#6242](https://github.com/ClickHouse/ClickHouse/issues/6242) [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix inconsistent parts which can appear if replica was restored after `DROP PARTITION`. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) -- Fixed hang in `JSONExtractRaw` function. [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug with incorrect skip indices serialization and aggregation with adaptive granularity. [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594). [\#6748](https://github.com/ClickHouse/ClickHouse/pull/6748) ([alesapin](https://github.com/alesapin)) -- Fix `WITH ROLLUP` and `WITH CUBE` modifiers of `GROUP BY` with two-level aggregation. [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Anton Popov](https://github.com/CurtizJ)) -- Fix bug with writing secondary indices marks with adaptive granularity. [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alesapin](https://github.com/alesapin)) -- Fix initialization order while server startup. Since `StorageMergeTree::background_task_handle` is initialized in `startup()` the `MergeTreeBlockOutputStream::write()` may try to use it before initialization. Just check if it is initialized. [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Ivan](https://github.com/abyss7)) -- Clearing the data buffer from the previous read operation that was completed with an error. [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Nikolay](https://github.com/bopohaa)) -- Fix bug with enabling adaptive granularity when creating a new replica for Replicated\*MergeTree table. [\#6394](https://github.com/ClickHouse/ClickHouse/issues/6394) [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([alesapin](https://github.com/alesapin)) -- Fixed possible crash during server startup in case of exception happened in `libunwind` during exception at access to uninitialized `ThreadStatus` structure. [\#6456](https://github.com/ClickHouse/ClickHouse/pull/6456) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Fix crash in `yandexConsistentHash` function. Found by fuzz test. [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304) [\#6305](https://github.com/ClickHouse/ClickHouse/pull/6305) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed the possibility of hanging queries when server is overloaded and global thread pool becomes near full. This have higher chance to happen on clusters with large number of shards (hundreds), because distributed queries allocate a thread per connection to each shard. For example, this issue may reproduce if a cluster of 330 shards is processing 30 concurrent distributed queries. This issue affects all versions starting from 19.2. [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed logic of `arrayEnumerateUniqRanked` function. [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix segfault when decoding symbol table. [\#6603](https://github.com/ClickHouse/ClickHouse/pull/6603) ([Amos Bird](https://github.com/amosbird)) -- Fixed irrelevant exception in cast of `LowCardinality(Nullable)` to not-Nullable column in case if it doesn’t contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Removed extra quoting of description in `system.settings` table. [\#6696](https://github.com/ClickHouse/ClickHouse/issues/6696) [\#6699](https://github.com/ClickHouse/ClickHouse/pull/6699) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid possible deadlock in `TRUNCATE` of Replicated table. [\#6695](https://github.com/ClickHouse/ClickHouse/pull/6695) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix reading in order of sorting key. [\#6189](https://github.com/ClickHouse/ClickHouse/pull/6189) ([Anton Popov](https://github.com/CurtizJ)) -- Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) -- Fix bug opened by [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) (since 19.4.0). Reproduces in queries to Distributed tables over MergeTree tables when we doesn’t query any columns (`SELECT 1`). [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([alesapin](https://github.com/alesapin)) -- Fixed overflow in integer division of signed type to unsigned type. The behaviour was exactly as in C or C++ language (integer promotion rules) that may be surprising. Please note that the overflow is still possible when dividing large signed number to large unsigned number or vice-versa (but that case is less usual). The issue existed in all server versions. [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214) [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Limit maximum sleep time for throttling when `max_execution_speed` or `max_execution_speed_bytes` is set. Fixed false errors like `Estimated query execution time (inf seconds) is too long`. [\#5547](https://github.com/ClickHouse/ClickHouse/issues/5547) [\#6232](https://github.com/ClickHouse/ClickHouse/pull/6232) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed issues about using `MATERIALIZED` columns and aliases in `MaterializedView`. [\#448](https://github.com/ClickHouse/ClickHouse/issues/448) [\#3484](https://github.com/ClickHouse/ClickHouse/issues/3484) [\#3450](https://github.com/ClickHouse/ClickHouse/issues/3450) [\#2878](https://github.com/ClickHouse/ClickHouse/issues/2878) [\#2285](https://github.com/ClickHouse/ClickHouse/issues/2285) [\#3796](https://github.com/ClickHouse/ClickHouse/pull/3796) ([Amos Bird](https://github.com/amosbird)) [\#6316](https://github.com/ClickHouse/ClickHouse/pull/6316) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix `FormatFactory` behaviour for input streams which are not implemented as processor. [\#6495](https://github.com/ClickHouse/ClickHouse/pull/6495) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed typo. [\#6631](https://github.com/ClickHouse/ClickHouse/pull/6631) ([Alex Ryndin](https://github.com/alexryndin)) -- Typo in the error message ( is -\> are ). [\#6839](https://github.com/ClickHouse/ClickHouse/pull/6839) ([Denis Zhuravlev](https://github.com/den-crane)) -- Fixed error while parsing of columns list from string if type contained a comma (this issue was relevant for `File`, `URL`, `HDFS` storages) [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) - -#### Security Fix {#security-fix} - -- This release also contains all bug security fixes from 19.13 and 19.11. -- Fixed the possibility of a fabricated query to cause server crash due to stack overflow in SQL parser. Fixed the possibility of stack overflow in Merge and Distributed tables, materialized views and conditions for row-level security that involve subqueries. [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvement {#improvement-3} - -- Correct implementation of ternary logic for `AND/OR`. [\#6048](https://github.com/ClickHouse/ClickHouse/pull/6048) ([Alexander Kazakov](https://github.com/Akazz)) -- Now values and rows with expired TTL will be removed after `OPTIMIZE ... FINAL` query from old parts without TTL infos or with outdated TTL infos, e.g. after `ALTER ... MODIFY TTL` query. Added queries `SYSTEM STOP/START TTL MERGES` to disallow/allow assign merges with TTL and filter expired values in all merges. [\#6274](https://github.com/ClickHouse/ClickHouse/pull/6274) ([Anton Popov](https://github.com/CurtizJ)) -- Possibility to change the location of ClickHouse history file for client using `CLICKHOUSE_HISTORY_FILE` env. [\#6840](https://github.com/ClickHouse/ClickHouse/pull/6840) ([filimonov](https://github.com/filimonov)) -- Remove `dry_run` flag from `InterpreterSelectQuery`. … [\#6375](https://github.com/ClickHouse/ClickHouse/pull/6375) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Support `ASOF JOIN` with `ON` section. [\#6211](https://github.com/ClickHouse/ClickHouse/pull/6211) ([Artem Zuikov](https://github.com/4ertus2)) -- Better support of skip indexes for mutations and replication. Support for `MATERIALIZE/CLEAR INDEX ... IN PARTITION` query. `UPDATE x = x` recalculates all indices that use column `x`. [\#5053](https://github.com/ClickHouse/ClickHouse/pull/5053) ([Nikita Vasilev](https://github.com/nikvas0)) -- Allow to `ATTACH` live views (for example, at the server startup) regardless to `allow_experimental_live_view` setting. [\#6754](https://github.com/ClickHouse/ClickHouse/pull/6754) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- For stack traces gathered by query profiler, do not include stack frames generated by the query profiler itself. [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Now table functions `values`, `file`, `url`, `hdfs` have support for ALIAS columns. [\#6255](https://github.com/ClickHouse/ClickHouse/pull/6255) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Throw an exception if `config.d` file doesn’t have the corresponding root element as the config file. [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) -- Print extra info in exception message for `no space left on device`. [\#6182](https://github.com/ClickHouse/ClickHouse/issues/6182), [\#6252](https://github.com/ClickHouse/ClickHouse/issues/6252) [\#6352](https://github.com/ClickHouse/ClickHouse/pull/6352) ([tavplubix](https://github.com/tavplubix)) -- When determining shards of a `Distributed` table to be covered by a read query (for `optimize_skip_unused_shards` = 1) ClickHouse now checks conditions from both `prewhere` and `where` clauses of select statement. [\#6521](https://github.com/ClickHouse/ClickHouse/pull/6521) ([Alexander Kazakov](https://github.com/Akazz)) -- Enabled `SIMDJSON` for machines without AVX2 but with SSE 4.2 and PCLMUL instruction set. [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285) [\#6320](https://github.com/ClickHouse/ClickHouse/pull/6320) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- ClickHouse can work on filesystems without `O_DIRECT` support (such as ZFS and BtrFS) without additional tuning. [\#4449](https://github.com/ClickHouse/ClickHouse/issues/4449) [\#6730](https://github.com/ClickHouse/ClickHouse/pull/6730) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Support push down predicate for final subquery. [\#6120](https://github.com/ClickHouse/ClickHouse/pull/6120) ([TCeason](https://github.com/TCeason)) [\#6162](https://github.com/ClickHouse/ClickHouse/pull/6162) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Better `JOIN ON` keys extraction [\#6131](https://github.com/ClickHouse/ClickHouse/pull/6131) ([Artem Zuikov](https://github.com/4ertus2)) -- Upated `SIMDJSON`. [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285). [\#6306](https://github.com/ClickHouse/ClickHouse/pull/6306) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Optimize selecting of smallest column for `SELECT count()` query. [\#6344](https://github.com/ClickHouse/ClickHouse/pull/6344) ([Amos Bird](https://github.com/amosbird)) -- Added `strict` parameter in `windowFunnel()`. When the `strict` is set, the `windowFunnel()` applies conditions only for the unique values. [\#6548](https://github.com/ClickHouse/ClickHouse/pull/6548) ([achimbab](https://github.com/achimbab)) -- Safer interface of `mysqlxx::Pool`. [\#6150](https://github.com/ClickHouse/ClickHouse/pull/6150) ([avasiliev](https://github.com/avasiliev)) -- Options line size when executing with `--help` option now corresponds with terminal size. [\#6590](https://github.com/ClickHouse/ClickHouse/pull/6590) ([dimarub2000](https://github.com/dimarub2000)) -- Disable “read in order” optimization for aggregation without keys. [\#6599](https://github.com/ClickHouse/ClickHouse/pull/6599) ([Anton Popov](https://github.com/CurtizJ)) -- HTTP status code for `INCORRECT_DATA` and `TYPE_MISMATCH` error codes was changed from default `500 Internal Server Error` to `400 Bad Request`. [\#6271](https://github.com/ClickHouse/ClickHouse/pull/6271) ([Alexander Rodin](https://github.com/a-rodin)) -- Move Join object from `ExpressionAction` into `AnalyzedJoin`. `ExpressionAnalyzer` and `ExpressionAction` do not know about `Join` class anymore. Its logic is hidden by `AnalyzedJoin` iface. [\#6801](https://github.com/ClickHouse/ClickHouse/pull/6801) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed possible deadlock of distributed queries when one of shards is localhost but the query is sent via network connection. [\#6759](https://github.com/ClickHouse/ClickHouse/pull/6759) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Changed semantic of multiple tables `RENAME` to avoid possible deadlocks. [\#6757](https://github.com/ClickHouse/ClickHouse/issues/6757). [\#6756](https://github.com/ClickHouse/ClickHouse/pull/6756) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Rewritten MySQL compatibility server to prevent loading full packet payload in memory. Decreased memory consumption for each connection to approximately `2 * DBMS_DEFAULT_BUFFER_SIZE` (read/write buffers). [\#5811](https://github.com/ClickHouse/ClickHouse/pull/5811) ([Yuriy Baranov](https://github.com/yurriy)) -- Move AST alias interpreting logic out of parser that doesn’t have to know anything about query semantics. [\#6108](https://github.com/ClickHouse/ClickHouse/pull/6108) ([Artem Zuikov](https://github.com/4ertus2)) -- Slightly more safe parsing of `NamesAndTypesList`. [\#6408](https://github.com/ClickHouse/ClickHouse/issues/6408). [\#6410](https://github.com/ClickHouse/ClickHouse/pull/6410) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `clickhouse-copier`: Allow use `where_condition` from config with `partition_key` alias in query for checking partition existence (Earlier it was used only in reading data queries). [\#6577](https://github.com/ClickHouse/ClickHouse/pull/6577) ([proller](https://github.com/proller)) -- Added optional message argument in `throwIf`. ([\#5772](https://github.com/ClickHouse/ClickHouse/issues/5772)) [\#6329](https://github.com/ClickHouse/ClickHouse/pull/6329) ([Vdimir](https://github.com/Vdimir)) -- Server exception got while sending insertion data is now being processed in client as well. [\#5891](https://github.com/ClickHouse/ClickHouse/issues/5891) [\#6711](https://github.com/ClickHouse/ClickHouse/pull/6711) ([dimarub2000](https://github.com/dimarub2000)) -- Added a metric `DistributedFilesToInsert` that shows the total number of files in filesystem that are selected to send to remote servers by Distributed tables. The number is summed across all shards. [\#6600](https://github.com/ClickHouse/ClickHouse/pull/6600) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Move most of JOINs prepare logic from `ExpressionAction/ExpressionAnalyzer` to `AnalyzedJoin`. [\#6785](https://github.com/ClickHouse/ClickHouse/pull/6785) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix TSan [warning](https://clickhouse-test-reports.s3.yandex.net/6399/c1c1d1daa98e199e620766f1bd06a5921050a00d/functional_stateful_tests_(thread).html) ‘lock-order-inversion’. [\#6740](https://github.com/ClickHouse/ClickHouse/pull/6740) ([Vasily Nemkov](https://github.com/Enmk)) -- Better information messages about lack of Linux capabilities. Logging fatal errors with “fatal” level, that will make it easier to find in `system.text_log`. [\#6441](https://github.com/ClickHouse/ClickHouse/pull/6441) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- When enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`, `ORDER BY`, it didn’t check the free disk space. The fix add a new setting `min_free_disk_space`, when the free disk space it smaller then the threshold, the query will stop and throw `ErrorCodes::NOT_ENOUGH_SPACE`. [\#6678](https://github.com/ClickHouse/ClickHouse/pull/6678) ([Weiqing Xu](https://github.com/weiqxu)) [\#6691](https://github.com/ClickHouse/ClickHouse/pull/6691) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Removed recursive rwlock by thread. It makes no sense, because threads are reused between queries. `SELECT` query may acquire a lock in one thread, hold a lock from another thread and exit from first thread. In the same time, first thread can be reused by `DROP` query. This will lead to false “Attempt to acquire exclusive lock recursively” messages. [\#6771](https://github.com/ClickHouse/ClickHouse/pull/6771) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Split `ExpressionAnalyzer.appendJoin()`. Prepare a place in `ExpressionAnalyzer` for `MergeJoin`. [\#6524](https://github.com/ClickHouse/ClickHouse/pull/6524) ([Artem Zuikov](https://github.com/4ertus2)) -- Added `mysql_native_password` authentication plugin to MySQL compatibility server. [\#6194](https://github.com/ClickHouse/ClickHouse/pull/6194) ([Yuriy Baranov](https://github.com/yurriy)) -- Less number of `clock_gettime` calls; fixed ABI compatibility between debug/release in `Allocator` (insignificant issue). [\#6197](https://github.com/ClickHouse/ClickHouse/pull/6197) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Move `collectUsedColumns` from `ExpressionAnalyzer` to `SyntaxAnalyzer`. `SyntaxAnalyzer` makes `required_source_columns` itself now. [\#6416](https://github.com/ClickHouse/ClickHouse/pull/6416) ([Artem Zuikov](https://github.com/4ertus2)) -- Add setting `joined_subquery_requires_alias` to require aliases for subselects and table functions in `FROM` that more than one table is present (i.e. queries with JOINs). [\#6733](https://github.com/ClickHouse/ClickHouse/pull/6733) ([Artem Zuikov](https://github.com/4ertus2)) -- Extract `GetAggregatesVisitor` class from `ExpressionAnalyzer`. [\#6458](https://github.com/ClickHouse/ClickHouse/pull/6458) ([Artem Zuikov](https://github.com/4ertus2)) -- `system.query_log`: change data type of `type` column to `Enum`. [\#6265](https://github.com/ClickHouse/ClickHouse/pull/6265) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Static linking of `sha256_password` authentication plugin. [\#6512](https://github.com/ClickHouse/ClickHouse/pull/6512) ([Yuriy Baranov](https://github.com/yurriy)) -- Avoid extra dependency for the setting `compile` to work. In previous versions, the user may get error like `cannot open crti.o`, `unable to find library -lc` etc. [\#6309](https://github.com/ClickHouse/ClickHouse/pull/6309) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- More validation of the input that may come from malicious replica. [\#6303](https://github.com/ClickHouse/ClickHouse/pull/6303) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Now `clickhouse-obfuscator` file is available in `clickhouse-client` package. In previous versions it was available as `clickhouse obfuscator` (with whitespace). [\#5816](https://github.com/ClickHouse/ClickHouse/issues/5816) [\#6609](https://github.com/ClickHouse/ClickHouse/pull/6609) ([dimarub2000](https://github.com/dimarub2000)) -- Fixed deadlock when we have at least two queries that read at least two tables in different order and another query that performs DDL operation on one of tables. Fixed another very rare deadlock. [\#6764](https://github.com/ClickHouse/ClickHouse/pull/6764) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added `os_thread_ids` column to `system.processes` and `system.query_log` for better debugging possibilities. [\#6763](https://github.com/ClickHouse/ClickHouse/pull/6763) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- A workaround for PHP mysqlnd extension bugs which occur when `sha256_password` is used as a default authentication plugin (described in [\#6031](https://github.com/ClickHouse/ClickHouse/issues/6031)). [\#6113](https://github.com/ClickHouse/ClickHouse/pull/6113) ([Yuriy Baranov](https://github.com/yurriy)) -- Remove unneeded place with changed nullability columns. [\#6693](https://github.com/ClickHouse/ClickHouse/pull/6693) ([Artem Zuikov](https://github.com/4ertus2)) -- Set default value of `queue_max_wait_ms` to zero, because current value (five seconds) makes no sense. There are rare circumstances when this settings has any use. Added settings `replace_running_query_max_wait_ms`, `kafka_max_wait_ms` and `connection_pool_max_wait_ms` for disambiguation. [\#6692](https://github.com/ClickHouse/ClickHouse/pull/6692) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Extract `SelectQueryExpressionAnalyzer` from `ExpressionAnalyzer`. Keep the last one for non-select queries. [\#6499](https://github.com/ClickHouse/ClickHouse/pull/6499) ([Artem Zuikov](https://github.com/4ertus2)) -- Removed duplicating input and output formats. [\#6239](https://github.com/ClickHouse/ClickHouse/pull/6239) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Allow user to override `poll_interval` and `idle_connection_timeout` settings on connection. [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `MergeTree` now has an additional option `ttl_only_drop_parts` (disabled by default) to avoid partial pruning of parts, so that they dropped completely when all the rows in a part are expired. [\#6191](https://github.com/ClickHouse/ClickHouse/pull/6191) ([Sergi Vladykin](https://github.com/svladykin)) -- Type checks for set index functions. Throw exception if function got a wrong type. This fixes fuzz test with UBSan. [\#6511](https://github.com/ClickHouse/ClickHouse/pull/6511) ([Nikita Vasilev](https://github.com/nikvas0)) - -#### Performance Improvement {#performance-improvement-2} - -- Optimize queries with `ORDER BY expressions` clause, where `expressions` have coinciding prefix with sorting key in `MergeTree` tables. This optimization is controlled by `optimize_read_in_order` setting. [\#6054](https://github.com/ClickHouse/ClickHouse/pull/6054) [\#6629](https://github.com/ClickHouse/ClickHouse/pull/6629) ([Anton Popov](https://github.com/CurtizJ)) -- Allow to use multiple threads during parts loading and removal. [\#6372](https://github.com/ClickHouse/ClickHouse/issues/6372) [\#6074](https://github.com/ClickHouse/ClickHouse/issues/6074) [\#6438](https://github.com/ClickHouse/ClickHouse/pull/6438) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Implemented batch variant of updating aggregate function states. It may lead to performance benefits. [\#6435](https://github.com/ClickHouse/ClickHouse/pull/6435) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Using `FastOps` library for functions `exp`, `log`, `sigmoid`, `tanh`. FastOps is a fast vector math library from Michael Parakhin (Yandex CTO). Improved performance of `exp` and `log` functions more than 6 times. The functions `exp` and `log` from `Float32` argument will return `Float32` (in previous versions they always return `Float64`). Now `exp(nan)` may return `inf`. The result of `exp` and `log` functions may be not the nearest machine representable number to the true answer. [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([alexey-milovidov](https://github.com/alexey-milovidov)) Using Danila Kutenin variant to make fastops working [\#6317](https://github.com/ClickHouse/ClickHouse/pull/6317) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Disable consecutive key optimization for `UInt8/16`. [\#6298](https://github.com/ClickHouse/ClickHouse/pull/6298) [\#6701](https://github.com/ClickHouse/ClickHouse/pull/6701) ([akuzm](https://github.com/akuzm)) -- Improved performance of `simdjson` library by getting rid of dynamic allocation in `ParsedJson::Iterator`. [\#6479](https://github.com/ClickHouse/ClickHouse/pull/6479) ([Vitaly Baranov](https://github.com/vitlibar)) -- Pre-fault pages when allocating memory with `mmap()`. [\#6667](https://github.com/ClickHouse/ClickHouse/pull/6667) ([akuzm](https://github.com/akuzm)) -- Fix performance bug in `Decimal` comparison. [\#6380](https://github.com/ClickHouse/ClickHouse/pull/6380) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-4} - -- Remove Compiler (runtime template instantiation) because we’ve win over it’s performance. [\#6646](https://github.com/ClickHouse/ClickHouse/pull/6646) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added performance test to show degradation of performance in gcc-9 in more isolated way. [\#6302](https://github.com/ClickHouse/ClickHouse/pull/6302) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added table function `numbers_mt`, which is multithreaded version of `numbers`. Updated performance tests with hash functions. [\#6554](https://github.com/ClickHouse/ClickHouse/pull/6554) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Comparison mode in `clickhouse-benchmark` [\#6220](https://github.com/ClickHouse/ClickHouse/issues/6220) [\#6343](https://github.com/ClickHouse/ClickHouse/pull/6343) ([dimarub2000](https://github.com/dimarub2000)) -- Best effort for printing stack traces. Also added `SIGPROF` as a debugging signal to print stack trace of a running thread. [\#6529](https://github.com/ClickHouse/ClickHouse/pull/6529) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Every function in its own file, part 10. [\#6321](https://github.com/ClickHouse/ClickHouse/pull/6321) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Remove doubled const `TABLE_IS_READ_ONLY`. [\#6566](https://github.com/ClickHouse/ClickHouse/pull/6566) ([filimonov](https://github.com/filimonov)) -- Formatting changes for `StringHashMap` PR [\#5417](https://github.com/ClickHouse/ClickHouse/issues/5417). [\#6700](https://github.com/ClickHouse/ClickHouse/pull/6700) ([akuzm](https://github.com/akuzm)) -- Better subquery for join creation in `ExpressionAnalyzer`. [\#6824](https://github.com/ClickHouse/ClickHouse/pull/6824) ([Artem Zuikov](https://github.com/4ertus2)) -- Remove a redundant condition (found by PVS Studio). [\#6775](https://github.com/ClickHouse/ClickHouse/pull/6775) ([akuzm](https://github.com/akuzm)) -- Separate the hash table interface for `ReverseIndex`. [\#6672](https://github.com/ClickHouse/ClickHouse/pull/6672) ([akuzm](https://github.com/akuzm)) -- Refactoring of settings. [\#6689](https://github.com/ClickHouse/ClickHouse/pull/6689) ([alesapin](https://github.com/alesapin)) -- Add comments for `set` index functions. [\#6319](https://github.com/ClickHouse/ClickHouse/pull/6319) ([Nikita Vasilev](https://github.com/nikvas0)) -- Increase OOM score in debug version on Linux. [\#6152](https://github.com/ClickHouse/ClickHouse/pull/6152) ([akuzm](https://github.com/akuzm)) -- HDFS HA now work in debug build. [\#6650](https://github.com/ClickHouse/ClickHouse/pull/6650) ([Weiqing Xu](https://github.com/weiqxu)) -- Added a test to `transform_query_for_external_database`. [\#6388](https://github.com/ClickHouse/ClickHouse/pull/6388) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add test for multiple materialized views for Kafka table. [\#6509](https://github.com/ClickHouse/ClickHouse/pull/6509) ([Ivan](https://github.com/abyss7)) -- Make a better build scheme. [\#6500](https://github.com/ClickHouse/ClickHouse/pull/6500) ([Ivan](https://github.com/abyss7)) -- Fixed `test_external_dictionaries` integration in case it was executed under non root user. [\#6507](https://github.com/ClickHouse/ClickHouse/pull/6507) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- The bug reproduces when total size of written packets exceeds `DBMS_DEFAULT_BUFFER_SIZE`. [\#6204](https://github.com/ClickHouse/ClickHouse/pull/6204) ([Yuriy Baranov](https://github.com/yurriy)) -- Added a test for `RENAME` table race condition [\#6752](https://github.com/ClickHouse/ClickHouse/pull/6752) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid data race on Settings in `KILL QUERY`. [\#6753](https://github.com/ClickHouse/ClickHouse/pull/6753) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add integration test for handling errors by a cache dictionary. [\#6755](https://github.com/ClickHouse/ClickHouse/pull/6755) ([Vitaly Baranov](https://github.com/vitlibar)) -- Disable parsing of ELF object files on Mac OS, because it makes no sense. [\#6578](https://github.com/ClickHouse/ClickHouse/pull/6578) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Attempt to make changelog generator better. [\#6327](https://github.com/ClickHouse/ClickHouse/pull/6327) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Adding `-Wshadow` switch to the GCC. [\#6325](https://github.com/ClickHouse/ClickHouse/pull/6325) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) -- Removed obsolete code for `mimalloc` support. [\#6715](https://github.com/ClickHouse/ClickHouse/pull/6715) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `zlib-ng` determines x86 capabilities and saves this info to global variables. This is done in defalteInit call, which may be made by different threads simultaneously. To avoid multithreaded writes, do it on library startup. [\#6141](https://github.com/ClickHouse/ClickHouse/pull/6141) ([akuzm](https://github.com/akuzm)) -- Regression test for a bug which in join which was fixed in [\#5192](https://github.com/ClickHouse/ClickHouse/issues/5192). [\#6147](https://github.com/ClickHouse/ClickHouse/pull/6147) ([Bakhtiyor Ruziev](https://github.com/theruziev)) -- Fixed MSan report. [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix flapping TTL test. [\#6782](https://github.com/ClickHouse/ClickHouse/pull/6782) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed false data race in `MergeTreeDataPart::is_frozen` field. [\#6583](https://github.com/ClickHouse/ClickHouse/pull/6583) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed timeouts in fuzz test. In previous version, it managed to find false hangup in query `SELECT * FROM numbers_mt(gccMurmurHash(''))`. [\#6582](https://github.com/ClickHouse/ClickHouse/pull/6582) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added debug checks to `static_cast` of columns. [\#6581](https://github.com/ClickHouse/ClickHouse/pull/6581) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Support for Oracle Linux in official RPM packages. [\#6356](https://github.com/ClickHouse/ClickHouse/issues/6356) [\#6585](https://github.com/ClickHouse/ClickHouse/pull/6585) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Changed json perftests from `once` to `loop` type. [\#6536](https://github.com/ClickHouse/ClickHouse/pull/6536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- `odbc-bridge.cpp` defines `main()` so it should not be included in `clickhouse-lib`. [\#6538](https://github.com/ClickHouse/ClickHouse/pull/6538) ([Orivej Desh](https://github.com/orivej)) -- Test for crash in `FULL|RIGHT JOIN` with nulls in right table’s keys. [\#6362](https://github.com/ClickHouse/ClickHouse/pull/6362) ([Artem Zuikov](https://github.com/4ertus2)) -- Added a test for the limit on expansion of aliases just in case. [\#6442](https://github.com/ClickHouse/ClickHouse/pull/6442) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Switched from `boost::filesystem` to `std::filesystem` where appropriate. [\#6253](https://github.com/ClickHouse/ClickHouse/pull/6253) [\#6385](https://github.com/ClickHouse/ClickHouse/pull/6385) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added RPM packages to website. [\#6251](https://github.com/ClickHouse/ClickHouse/pull/6251) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add a test for fixed `Unknown identifier` exception in `IN` section. [\#6708](https://github.com/ClickHouse/ClickHouse/pull/6708) ([Artem Zuikov](https://github.com/4ertus2)) -- Simplify `shared_ptr_helper` because people facing difficulties understanding it. [\#6675](https://github.com/ClickHouse/ClickHouse/pull/6675) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added performance tests for fixed Gorilla and DoubleDelta codec. [\#6179](https://github.com/ClickHouse/ClickHouse/pull/6179) ([Vasily Nemkov](https://github.com/Enmk)) -- Split the integration test `test_dictionaries` into 4 separate tests. [\#6776](https://github.com/ClickHouse/ClickHouse/pull/6776) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix PVS-Studio warning in `PipelineExecutor`. [\#6777](https://github.com/ClickHouse/ClickHouse/pull/6777) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Allow to use `library` dictionary source with ASan. [\#6482](https://github.com/ClickHouse/ClickHouse/pull/6482) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added option to generate changelog from a list of PRs. [\#6350](https://github.com/ClickHouse/ClickHouse/pull/6350) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Lock the `TinyLog` storage when reading. [\#6226](https://github.com/ClickHouse/ClickHouse/pull/6226) ([akuzm](https://github.com/akuzm)) -- Check for broken symlinks in CI. [\#6634](https://github.com/ClickHouse/ClickHouse/pull/6634) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Increase timeout for “stack overflow” test because it may take a long time in debug build. [\#6637](https://github.com/ClickHouse/ClickHouse/pull/6637) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added a check for double whitespaces. [\#6643](https://github.com/ClickHouse/ClickHouse/pull/6643) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix `new/delete` memory tracking when build with sanitizers. Tracking is not clear. It only prevents memory limit exceptions in tests. [\#6450](https://github.com/ClickHouse/ClickHouse/pull/6450) ([Artem Zuikov](https://github.com/4ertus2)) -- Enable back the check of undefined symbols while linking. [\#6453](https://github.com/ClickHouse/ClickHouse/pull/6453) ([Ivan](https://github.com/abyss7)) -- Avoid rebuilding `hyperscan` every day. [\#6307](https://github.com/ClickHouse/ClickHouse/pull/6307) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed UBSan report in `ProtobufWriter`. [\#6163](https://github.com/ClickHouse/ClickHouse/pull/6163) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Don’t allow to use query profiler with sanitizers because it is not compatible. [\#6769](https://github.com/ClickHouse/ClickHouse/pull/6769) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add test for reloading a dictionary after fail by timer. [\#6114](https://github.com/ClickHouse/ClickHouse/pull/6114) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix inconsistency in `PipelineExecutor::prepareProcessor` argument type. [\#6494](https://github.com/ClickHouse/ClickHouse/pull/6494) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Added a test for bad URIs. [\#6493](https://github.com/ClickHouse/ClickHouse/pull/6493) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added more checks to `CAST` function. This should get more information about segmentation fault in fuzzy test. [\#6346](https://github.com/ClickHouse/ClickHouse/pull/6346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Added `gcc-9` support to `docker/builder` container that builds image locally. [\#6333](https://github.com/ClickHouse/ClickHouse/pull/6333) ([Gleb Novikov](https://github.com/NanoBjorn)) -- Test for primary key with `LowCardinality(String)`. [\#5044](https://github.com/ClickHouse/ClickHouse/issues/5044) [\#6219](https://github.com/ClickHouse/ClickHouse/pull/6219) ([dimarub2000](https://github.com/dimarub2000)) -- Fixed tests affected by slow stack traces printing. [\#6315](https://github.com/ClickHouse/ClickHouse/pull/6315) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add a test case for crash in `groupUniqArray` fixed in [\#6029](https://github.com/ClickHouse/ClickHouse/pull/6029). [\#4402](https://github.com/ClickHouse/ClickHouse/issues/4402) [\#6129](https://github.com/ClickHouse/ClickHouse/pull/6129) ([akuzm](https://github.com/akuzm)) -- Fixed indices mutations tests. [\#6645](https://github.com/ClickHouse/ClickHouse/pull/6645) ([Nikita Vasilev](https://github.com/nikvas0)) -- In performance test, do not read query log for queries we didn’t run. [\#6427](https://github.com/ClickHouse/ClickHouse/pull/6427) ([akuzm](https://github.com/akuzm)) -- Materialized view now could be created with any low cardinality types regardless to the setting about suspicious low cardinality types. [\#6428](https://github.com/ClickHouse/ClickHouse/pull/6428) ([Olga Khvostikova](https://github.com/stavrolia)) -- Updated tests for `send_logs_level` setting. [\#6207](https://github.com/ClickHouse/ClickHouse/pull/6207) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix build under gcc-8.2. [\#6196](https://github.com/ClickHouse/ClickHouse/pull/6196) ([Max Akhmedov](https://github.com/zlobober)) -- Fix build with internal libc++. [\#6724](https://github.com/ClickHouse/ClickHouse/pull/6724) ([Ivan](https://github.com/abyss7)) -- Fix shared build with `rdkafka` library [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Ivan](https://github.com/abyss7)) -- Fixes for Mac OS build (incomplete). [\#6390](https://github.com/ClickHouse/ClickHouse/pull/6390) ([alexey-milovidov](https://github.com/alexey-milovidov)) [\#6429](https://github.com/ClickHouse/ClickHouse/pull/6429) ([alex-zaitsev](https://github.com/alex-zaitsev)) -- Fix “splitted” build. [\#6618](https://github.com/ClickHouse/ClickHouse/pull/6618) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Other build fixes: [\#6186](https://github.com/ClickHouse/ClickHouse/pull/6186) ([Amos Bird](https://github.com/amosbird)) [\#6486](https://github.com/ClickHouse/ClickHouse/pull/6486) [\#6348](https://github.com/ClickHouse/ClickHouse/pull/6348) ([vxider](https://github.com/Vxider)) [\#6744](https://github.com/ClickHouse/ClickHouse/pull/6744) ([Ivan](https://github.com/abyss7)) [\#6016](https://github.com/ClickHouse/ClickHouse/pull/6016) [\#6421](https://github.com/ClickHouse/ClickHouse/pull/6421) [\#6491](https://github.com/ClickHouse/ClickHouse/pull/6491) ([proller](https://github.com/proller)) - -#### Backward Incompatible Change {#backward-incompatible-change-3} - -- Removed rarely used table function `catBoostPool` and storage `CatBoostPool`. If you have used this table function, please write email to `clickhouse-feedback@yandex-team.com`. Note that CatBoost integration remains and will be supported. [\#6279](https://github.com/ClickHouse/ClickHouse/pull/6279) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Disable `ANY RIGHT JOIN` and `ANY FULL JOIN` by default. Set `any_join_distinct_right_table_keys` setting to enable them. [\#5126](https://github.com/ClickHouse/ClickHouse/issues/5126) [\#6351](https://github.com/ClickHouse/ClickHouse/pull/6351) ([Artem Zuikov](https://github.com/4ertus2)) - -## ClickHouse release 19.13 {#clickhouse-release-19-13} - -### ClickHouse release 19.13.6.51, 2019-10-02 {#clickhouse-release-19-13-6-51-2019-10-02} - -#### Bug Fix {#bug-fix-9} - -- This release also contains all bug fixes from 19.11.12.69. - -### ClickHouse release 19.13.5.44, 2019-09-20 {#clickhouse-release-19-13-5-44-2019-09-20} - -#### Bug Fix {#bug-fix-10} - -- This release also contains all bug fixes from 19.14.6.12. -- Fixed possible inconsistent state of table while executing `DROP` query for replicated table while zookeeper is not accessible. [\#6045](https://github.com/ClickHouse/ClickHouse/issues/6045) [\#6413](https://github.com/ClickHouse/ClickHouse/pull/6413) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Fix for data race in StorageMerge [\#6717](https://github.com/ClickHouse/ClickHouse/pull/6717) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug introduced in query profiler which leads to endless recv from socket. [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) ([alesapin](https://github.com/alesapin)) -- Fix excessive CPU usage while executing `JSONExtractRaw` function over a boolean value. [\#6208](https://github.com/ClickHouse/ClickHouse/pull/6208) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fixes the regression while pushing to materialized view. [\#6415](https://github.com/ClickHouse/ClickHouse/pull/6415) ([Ivan](https://github.com/abyss7)) -- Table function `url` had the vulnerability allowed the attacker to inject arbitrary HTTP headers in the request. This issue was found by [Nikita Tikhomirov](https://github.com/NSTikhomirov). [\#6466](https://github.com/ClickHouse/ClickHouse/pull/6466) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix useless `AST` check in Set index. [\#6510](https://github.com/ClickHouse/ClickHouse/issues/6510) [\#6651](https://github.com/ClickHouse/ClickHouse/pull/6651) ([Nikita Vasilev](https://github.com/nikvas0)) -- Fixed parsing of `AggregateFunction` values embedded in query. [\#6575](https://github.com/ClickHouse/ClickHouse/issues/6575) [\#6773](https://github.com/ClickHouse/ClickHouse/pull/6773) ([Zhichang Yu](https://github.com/yuzhichang)) -- Fixed wrong behaviour of `trim` functions family. [\#6647](https://github.com/ClickHouse/ClickHouse/pull/6647) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.13.4.32, 2019-09-10 {#clickhouse-release-19-13-4-32-2019-09-10} - -#### Bug Fix {#bug-fix-11} - -- This release also contains all bug security fixes from 19.11.9.52 and 19.11.10.54. -- Fixed data race in `system.parts` table and `ALTER` query. [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245) [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed mismatched header in streams happened in case of reading from empty distributed table with sample and prewhere. [\#6167](https://github.com/ClickHouse/ClickHouse/issues/6167) ([Lixiang Qian](https://github.com/fancyqlx)) [\#6823](https://github.com/ClickHouse/ClickHouse/pull/6823) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed crash when using `IN` clause with a subquery with a tuple. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -- Fix case with same column names in `GLOBAL JOIN ON` section. [\#6181](https://github.com/ClickHouse/ClickHouse/pull/6181) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix crash when casting types to `Decimal` that do not support it. Throw exception instead. [\#6297](https://github.com/ClickHouse/ClickHouse/pull/6297) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed crash in `extractAll()` function. [\#6644](https://github.com/ClickHouse/ClickHouse/pull/6644) ([Artem Zuikov](https://github.com/4ertus2)) -- Query transformation for `MySQL`, `ODBC`, `JDBC` table functions now works properly for `SELECT WHERE` queries with multiple `AND` expressions. [\#6381](https://github.com/ClickHouse/ClickHouse/issues/6381) [\#6676](https://github.com/ClickHouse/ClickHouse/pull/6676) ([dimarub2000](https://github.com/dimarub2000)) -- Added previous declaration checks for MySQL 8 integration. [\#6569](https://github.com/ClickHouse/ClickHouse/pull/6569) ([Rafael David Tinoco](https://github.com/rafaeldtinoco)) - -#### Security Fix {#security-fix-1} - -- Fix two vulnerabilities in codecs in decompression phase (malicious user can fabricate compressed data that will lead to buffer overflow in decompression). [\#6670](https://github.com/ClickHouse/ClickHouse/pull/6670) ([Artem Zuikov](https://github.com/4ertus2)) - -### ClickHouse release 19.13.3.26, 2019-08-22 {#clickhouse-release-19-13-3-26-2019-08-22} - -#### Bug Fix {#bug-fix-12} - -- Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) -- Fix NPE when using IN clause with a subquery with a tuple. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -- Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) -- Fixed issue with parsing CSV [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) -- Fixed data race in system.parts table and ALTER query. This fixes [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed possible data loss after `ALTER DELETE` query on table with skipping index. [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Nikita Vasilev](https://github.com/nikvas0)) - -#### Security Fix {#security-fix-2} - -- If the attacker has write access to ZooKeeper and is able to run custom server available from the network where ClickHouse run, it can create custom-built malicious server that will act as ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Found by Eldar Zaitov, information security team at Yandex. [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.13.2.19, 2019-08-14 {#clickhouse-release-19-13-2-19-2019-08-14} - -#### New Feature {#new-feature-5} - -- Sampling profiler on query level. [Example](https://gist.github.com/alexey-milovidov/92758583dd41c24c360fdb8d6a4da194). [\#4247](https://github.com/ClickHouse/ClickHouse/issues/4247) ([laplab](https://github.com/laplab)) [\#6124](https://github.com/ClickHouse/ClickHouse/pull/6124) ([alexey-milovidov](https://github.com/alexey-milovidov)) [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) -- Allow to specify a list of columns with `COLUMNS('regexp')` expression that works like a more sophisticated variant of `*` asterisk. [\#5951](https://github.com/ClickHouse/ClickHouse/pull/5951) ([mfridental](https://github.com/mfridental)), ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `CREATE TABLE AS table_function()` is now possible [\#6057](https://github.com/ClickHouse/ClickHouse/pull/6057) ([dimarub2000](https://github.com/dimarub2000)) -- Adam optimizer for stochastic gradient descent is used by default in `stochasticLinearRegression()` and `stochasticLogisticRegression()` aggregate functions, because it shows good quality without almost any tuning. [\#6000](https://github.com/ClickHouse/ClickHouse/pull/6000) ([Quid37](https://github.com/Quid37)) -- Added functions for working with the сustom week number [\#5212](https://github.com/ClickHouse/ClickHouse/pull/5212) ([Andy Yang](https://github.com/andyyzh)) -- `RENAME` queries now work with all storages. [\#5953](https://github.com/ClickHouse/ClickHouse/pull/5953) ([Ivan](https://github.com/abyss7)) -- Now client receive logs from server with any desired level by setting `send_logs_level` regardless to the log level specified in server settings. [\#5964](https://github.com/ClickHouse/ClickHouse/pull/5964) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) - -#### Backward Incompatible Change {#backward-incompatible-change-4} - -- The setting `input_format_defaults_for_omitted_fields` is enabled by default. Inserts in Distributed tables need this setting to be the same on cluster (you need to set it before rolling update). It enables calculation of complex default expressions for omitted fields in `JSONEachRow` and `CSV*` formats. It should be the expected behavior but may lead to negligible performance difference. [\#6043](https://github.com/ClickHouse/ClickHouse/pull/6043) ([Artem Zuikov](https://github.com/4ertus2)), [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) - -#### Experimental features {#experimental-features} - -- New query processing pipeline. Use `experimental_use_processors=1` option to enable it. Use for your own trouble. [\#4914](https://github.com/ClickHouse/ClickHouse/pull/4914) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Bug Fix {#bug-fix-13} - -- Kafka integration has been fixed in this version. -- Fixed `DoubleDelta` encoding of `Int64` for large `DoubleDelta` values, improved `DoubleDelta` encoding for random data for `Int32`. [\#5998](https://github.com/ClickHouse/ClickHouse/pull/5998) ([Vasily Nemkov](https://github.com/Enmk)) -- Fixed overestimation of `max_rows_to_read` if the setting `merge_tree_uniform_read_distribution` is set to 0. [\#6019](https://github.com/ClickHouse/ClickHouse/pull/6019) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvement {#improvement-4} - -- Throws an exception if `config.d` file doesn’t have the corresponding root element as the config file [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) - -#### Performance Improvement {#performance-improvement-3} - -- Optimize `count()`. Now it uses the smallest column (if possible). [\#6028](https://github.com/ClickHouse/ClickHouse/pull/6028) ([Amos Bird](https://github.com/amosbird)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-5} - -- Report memory usage in performance tests. [\#5899](https://github.com/ClickHouse/ClickHouse/pull/5899) ([akuzm](https://github.com/akuzm)) -- Fix build with external `libcxx` [\#6010](https://github.com/ClickHouse/ClickHouse/pull/6010) ([Ivan](https://github.com/abyss7)) -- Fix shared build with `rdkafka` library [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([Ivan](https://github.com/abyss7)) - -## ClickHouse release 19.11 {#clickhouse-release-19-11} - -### ClickHouse release 19.11.13.74, 2019-11-01 {#clickhouse-release-19-11-13-74-2019-11-01} - -#### Bug Fix {#bug-fix-14} - -- Fixed rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows). [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin)) -- Manual update of `SIMDJSON`. This fixes possible flooding of stderr files with bogus json diagnostic messages. [\#7548](https://github.com/ClickHouse/ClickHouse/pull/7548) ([Alexander Kazakov](https://github.com/Akazz)) -- Fixed bug with `mrk` file extension for mutations ([alesapin](https://github.com/alesapin)) - -### ClickHouse release 19.11.12.69, 2019-10-02 {#clickhouse-release-19-11-12-69-2019-10-02} - -#### Bug Fix {#bug-fix-15} - -- Fixed performance degradation of index analysis on complex keys on large tables. This fixes [\#6924](https://github.com/ClickHouse/ClickHouse/issues/6924). [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid rare SIGSEGV while sending data in tables with Distributed engine (`Failed to send batch: file with index XXXXX is absent`). [\#7032](https://github.com/ClickHouse/ClickHouse/pull/7032) ([Azat Khuzhin](https://github.com/azat)) -- Fix `Unknown identifier` with multiple joins. This fixes [\#5254](https://github.com/ClickHouse/ClickHouse/issues/5254). [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) - -### ClickHouse release 19.11.11.57, 2019-09-13 {#clickhouse-release-19-11-11-57-2019-09-13} - -- Fix logical error causing segfaults when selecting from Kafka empty topic. [\#6902](https://github.com/ClickHouse/ClickHouse/issues/6902) [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([Ivan](https://github.com/abyss7)) -- Fix for function `АrrayEnumerateUniqRanked` with empty arrays in params. [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) - -### ClickHouse release 19.11.10.54, 2019-09-10 {#clickhouse-release-19-11-10-54-2019-09-10} - -#### Bug Fix {#bug-fix-16} - -- Do store offsets for Kafka messages manually to be able to commit them all at once for all partitions. Fixes potential duplication in “one consumer - many partitions” scenario. [\#6872](https://github.com/ClickHouse/ClickHouse/pull/6872) ([Ivan](https://github.com/abyss7)) - -### ClickHouse release 19.11.9.52, 2019-09-6 {#clickhouse-release-19-11-9-52-2019-09-6} - -- Improve error handling in cache dictionaries. [\#6737](https://github.com/ClickHouse/ClickHouse/pull/6737) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fixed bug in function `arrayEnumerateUniqRanked`. [\#6779](https://github.com/ClickHouse/ClickHouse/pull/6779) ([proller](https://github.com/proller)) -- Fix `JSONExtract` function while extracting a `Tuple` from JSON. [\#6718](https://github.com/ClickHouse/ClickHouse/pull/6718) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fixed possible data loss after `ALTER DELETE` query on table with skipping index. [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([Nikita Vasilev](https://github.com/nikvas0)) -- Fixed performance test. [\#6392](https://github.com/ClickHouse/ClickHouse/pull/6392) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Parquet: Fix reading boolean columns. [\#6579](https://github.com/ClickHouse/ClickHouse/pull/6579) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed wrong behaviour of `nullIf` function for constant arguments. [\#6518](https://github.com/ClickHouse/ClickHouse/pull/6518) ([Guillaume Tassery](https://github.com/YiuRULE)) [\#6580](https://github.com/ClickHouse/ClickHouse/pull/6580) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix Kafka messages duplication problem on normal server restart. [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([Ivan](https://github.com/abyss7)) -- Fixed an issue when long `ALTER UPDATE` or `ALTER DELETE` may prevent regular merges to run. Prevent mutations from executing if there is no enough free threads available. [\#6502](https://github.com/ClickHouse/ClickHouse/issues/6502) [\#6617](https://github.com/ClickHouse/ClickHouse/pull/6617) ([tavplubix](https://github.com/tavplubix)) -- Fixed error with processing “timezone” in server configuration file. [\#6709](https://github.com/ClickHouse/ClickHouse/pull/6709) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix kafka tests. [\#6805](https://github.com/ClickHouse/ClickHouse/pull/6805) ([Ivan](https://github.com/abyss7)) - -#### Security Fix {#security-fix-3} - -- If the attacker has write access to ZooKeeper and is able to run custom server available from the network where ClickHouse runs, it can create custom-built malicious server that will act as ClickHouse replica and register it in ZooKeeper. When another replica will fetch data part from malicious replica, it can force clickhouse-server to write to arbitrary path on filesystem. Found by Eldar Zaitov, information security team at Yandex. [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.11.8.46, 2019-08-22 {#clickhouse-release-19-11-8-46-2019-08-22} - -#### Bug Fix {#bug-fix-17} - -- Fix `ALTER TABLE ... UPDATE` query for tables with `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([alesapin](https://github.com/alesapin)) -- Fix NPE when using IN clause with a subquery with a tuple. [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) -- Fixed an issue that if a stale replica becomes alive, it may still have data parts that were removed by DROP PARTITION. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) -- Fixed issue with parsing CSV [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) -- Fixed data race in system.parts table and ALTER query. This fixes [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed wrong code in mutations that may lead to memory corruption. Fixed segfault with read of address `0x14c0` that may happed due to concurrent `DROP TABLE` and `SELECT` from `system.parts` or `system.parts_columns`. Fixed race condition in preparation of mutation queries. Fixed deadlock caused by `OPTIMIZE` of Replicated tables and concurrent modification operations like ALTERs. [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.11.7.40, 2019-08-14 {#clickhouse-release-19-11-7-40-2019-08-14} - -#### Bug fix {#bug-fix-18} - -- Kafka integration has been fixed in this version. -- Fix segfault when using `arrayReduce` for constant arguments. [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed `toFloat()` monotonicity. [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) -- Fix segfault with enabled `optimize_skip_unused_shards` and missing sharding key. [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([CurtizJ](https://github.com/CurtizJ)) -- Fixed logic of `arrayEnumerateUniqRanked` function. [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Removed extra verbose logging from MySQL handler. [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix wrong behavior and possible segfaults in `topK` and `topKWeighted` aggregated functions. [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([CurtizJ](https://github.com/CurtizJ)) -- Do not expose virtual columns in `system.columns` table. This is required for backward compatibility. [\#6406](https://github.com/ClickHouse/ClickHouse/pull/6406) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug with memory allocation for string fields in complex key cache dictionary. [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([alesapin](https://github.com/alesapin)) -- Fix bug with enabling adaptive granularity when creating new replica for `Replicated*MergeTree` table. [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([alesapin](https://github.com/alesapin)) -- Fix infinite loop when reading Kafka messages. [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([abyss7](https://github.com/abyss7)) -- Fixed the possibility of a fabricated query to cause server crash due to stack overflow in SQL parser and possibility of stack overflow in `Merge` and `Distributed` tables [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed Gorilla encoding error on small sequences. [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Enmk](https://github.com/Enmk)) - -#### Improvement {#improvement-5} - -- Allow user to override `poll_interval` and `idle_connection_timeout` settings on connection. [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.11.5.28, 2019-08-05 {#clickhouse-release-19-11-5-28-2019-08-05} - -#### Bug fix {#bug-fix-19} - -- Fixed the possibility of hanging queries when server is overloaded. [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix FPE in yandexConsistentHash function. This fixes [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304). [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed bug in conversion of `LowCardinality` types in `AggregateFunctionFactory`. This fixes [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix parsing of `bool` settings from `true` and `false` strings in configuration files. [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) -- Fix rare bug with incompatible stream headers in queries to `Distributed` table over `MergeTree` table when part of `WHERE` moves to `PREWHERE`. [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([alesapin](https://github.com/alesapin)) -- Fixed overflow in integer division of signed type to unsigned type. This fixes [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214). [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Backward Incompatible Change {#backward-incompatible-change-5} - -- `Kafka` still broken. - -### ClickHouse release 19.11.4.24, 2019-08-01 {#clickhouse-release-19-11-4-24-2019-08-01} - -#### Bug Fix {#bug-fix-20} - -- Fix bug with writing secondary indices marks with adaptive granularity. [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([alesapin](https://github.com/alesapin)) -- Fix `WITH ROLLUP` and `WITH CUBE` modifiers of `GROUP BY` with two-level aggregation. [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed hang in `JSONExtractRaw` function. Fixed [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix segfault in ExternalLoader::reloadOutdated(). [\#6082](https://github.com/ClickHouse/ClickHouse/pull/6082) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fixed the case when server may close listening sockets but not shutdown and continue serving remaining queries. You may end up with two running clickhouse-server processes. Sometimes, the server may return an error `bad_function_call` for remaining queries. [\#6231](https://github.com/ClickHouse/ClickHouse/pull/6231) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed useless and incorrect condition on update field for initial loading of external dictionaries via ODBC, MySQL, ClickHouse and HTTP. This fixes [\#6069](https://github.com/ClickHouse/ClickHouse/issues/6069) [\#6083](https://github.com/ClickHouse/ClickHouse/pull/6083) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed irrelevant exception in cast of `LowCardinality(Nullable)` to not-Nullable column in case if it doesn’t contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix non-deterministic result of “uniq” aggregate function in extreme rare cases. The bug was present in all ClickHouse versions. [\#6058](https://github.com/ClickHouse/ClickHouse/pull/6058) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Segfault when we set a little bit too high CIDR on the function `IPv6CIDRToRange`. [\#6068](https://github.com/ClickHouse/ClickHouse/pull/6068) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Fixed small memory leak when server throw many exceptions from many different contexts. [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix the situation when consumer got paused before subscription and not resumed afterwards. [\#6075](https://github.com/ClickHouse/ClickHouse/pull/6075) ([Ivan](https://github.com/abyss7)) Note that Kafka is broken in this version. -- Clearing the Kafka data buffer from the previous read operation that was completed with an error [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([Nikolay](https://github.com/bopohaa)) Note that Kafka is broken in this version. -- Since `StorageMergeTree::background_task_handle` is initialized in `startup()` the `MergeTreeBlockOutputStream::write()` may try to use it before initialization. Just check if it is initialized. [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-6} - -- Added official `rpm` packages. [\#5740](https://github.com/ClickHouse/ClickHouse/pull/5740) ([proller](https://github.com/proller)) ([alesapin](https://github.com/alesapin)) -- Add an ability to build `.rpm` and `.tgz` packages with `packager` script. [\#5769](https://github.com/ClickHouse/ClickHouse/pull/5769) ([alesapin](https://github.com/alesapin)) -- Fixes for “Arcadia” build system. [\#6223](https://github.com/ClickHouse/ClickHouse/pull/6223) ([proller](https://github.com/proller)) - -#### Backward Incompatible Change {#backward-incompatible-change-6} - -- `Kafka` is broken in this version. - -### ClickHouse release 19.11.3.11, 2019-07-18 {#clickhouse-release-19-11-3-11-2019-07-18} - -#### New Feature {#new-feature-6} - -- Added support for prepared statements. [\#5331](https://github.com/ClickHouse/ClickHouse/pull/5331/) ([Alexander](https://github.com/sanych73)) [\#5630](https://github.com/ClickHouse/ClickHouse/pull/5630) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `DoubleDelta` and `Gorilla` column codecs [\#5600](https://github.com/ClickHouse/ClickHouse/pull/5600) ([Vasily Nemkov](https://github.com/Enmk)) -- Added `os_thread_priority` setting that allows to control the “nice” value of query processing threads that is used by OS to adjust dynamic scheduling priority. It requires `CAP_SYS_NICE` capabilities to work. This implements [\#5858](https://github.com/ClickHouse/ClickHouse/issues/5858) [\#5909](https://github.com/ClickHouse/ClickHouse/pull/5909) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Implement `_topic`, `_offset`, `_key` columns for Kafka engine [\#5382](https://github.com/ClickHouse/ClickHouse/pull/5382) ([Ivan](https://github.com/abyss7)) Note that Kafka is broken in this version. -- Add aggregate function combinator `-Resample` [\#5590](https://github.com/ClickHouse/ClickHouse/pull/5590) ([hcz](https://github.com/hczhcz)) -- Aggregate functions `groupArrayMovingSum(win_size)(x)` and `groupArrayMovingAvg(win_size)(x)`, which calculate moving sum/avg with or without window-size limitation. [\#5595](https://github.com/ClickHouse/ClickHouse/pull/5595) ([inv2004](https://github.com/inv2004)) -- Add synonim `arrayFlatten` \<-\> `flatten` [\#5764](https://github.com/ClickHouse/ClickHouse/pull/5764) ([hcz](https://github.com/hczhcz)) -- Intergate H3 function `geoToH3` from Uber. [\#4724](https://github.com/ClickHouse/ClickHouse/pull/4724) ([Remen Ivan](https://github.com/BHYCHIK)) [\#5805](https://github.com/ClickHouse/ClickHouse/pull/5805) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Bug Fix {#bug-fix-21} - -- Implement DNS cache with asynchronous update. Separate thread resolves all hosts and updates DNS cache with period (setting `dns_cache_update_period`). It should help, when ip of hosts changes frequently. [\#5857](https://github.com/ClickHouse/ClickHouse/pull/5857) ([Anton Popov](https://github.com/CurtizJ)) -- Fix segfault in `Delta` codec which affects columns with values less than 32 bits size. The bug led to random memory corruption. [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([alesapin](https://github.com/alesapin)) -- Fix segfault in TTL merge with non-physical columns in block. [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Anton Popov](https://github.com/CurtizJ)) -- Fix rare bug in checking of part with `LowCardinality` column. Previously `checkDataPart` always fails for part with `LowCardinality` column. [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([alesapin](https://github.com/alesapin)) -- Avoid hanging connections when server thread pool is full. It is important for connections from `remote` table function or connections to a shard without replicas when there is long connection timeout. This fixes [\#5878](https://github.com/ClickHouse/ClickHouse/issues/5878) [\#5881](https://github.com/ClickHouse/ClickHouse/pull/5881) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Support for constant arguments to `evalMLModel` function. This fixes [\#5817](https://github.com/ClickHouse/ClickHouse/issues/5817) [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed the issue when ClickHouse determines default time zone as `UCT` instead of `UTC`. This fixes [\#5804](https://github.com/ClickHouse/ClickHouse/issues/5804). [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed buffer underflow in `visitParamExtractRaw`. This fixes [\#5901](https://github.com/ClickHouse/ClickHouse/issues/5901) [\#5902](https://github.com/ClickHouse/ClickHouse/pull/5902) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Now distributed `DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER` queries will be executed directly on leader replica. [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([alesapin](https://github.com/alesapin)) -- Fix `coalesce` for `ColumnConst` with `ColumnNullable` + related changes. [\#5755](https://github.com/ClickHouse/ClickHouse/pull/5755) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix the `ReadBufferFromKafkaConsumer` so that it keeps reading new messages after `commit()` even if it was stalled before [\#5852](https://github.com/ClickHouse/ClickHouse/pull/5852) ([Ivan](https://github.com/abyss7)) -- Fix `FULL` and `RIGHT` JOIN results when joining on `Nullable` keys in right table. [\#5859](https://github.com/ClickHouse/ClickHouse/pull/5859) ([Artem Zuikov](https://github.com/4ertus2)) -- Possible fix of infinite sleeping of low-priority queries. [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix race condition, which cause that some queries may not appear in query\_log after `SYSTEM FLUSH LOGS` query. [\#5456](https://github.com/ClickHouse/ClickHouse/issues/5456) [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Anton Popov](https://github.com/CurtizJ)) -- Fixed `heap-use-after-free` ASan warning in ClusterCopier caused by watch which try to use already removed copier object. [\#5871](https://github.com/ClickHouse/ClickHouse/pull/5871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed wrong `StringRef` pointer returned by some implementations of `IColumn::deserializeAndInsertFromArena`. This bug affected only unit-tests. [\#5973](https://github.com/ClickHouse/ClickHouse/pull/5973) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Prevent source and intermediate array join columns of masking same name columns. [\#5941](https://github.com/ClickHouse/ClickHouse/pull/5941) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix insert and select query to MySQL engine with MySQL style identifier quoting. [\#5704](https://github.com/ClickHouse/ClickHouse/pull/5704) ([Winter Zhang](https://github.com/zhang2014)) -- Now `CHECK TABLE` query can work with MergeTree engine family. It returns check status and message if any for each part (or file in case of simplier engines). Also, fix bug in fetch of a broken part. [\#5865](https://github.com/ClickHouse/ClickHouse/pull/5865) ([alesapin](https://github.com/alesapin)) -- Fix SPLIT\_SHARED\_LIBRARIES runtime [\#5793](https://github.com/ClickHouse/ClickHouse/pull/5793) ([Danila Kutenin](https://github.com/danlark1)) -- Fixed time zone initialization when `/etc/localtime` is a relative symlink like `../usr/share/zoneinfo/Europe/Moscow` [\#5922](https://github.com/ClickHouse/ClickHouse/pull/5922) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- clickhouse-copier: Fix use-after free on shutdown [\#5752](https://github.com/ClickHouse/ClickHouse/pull/5752) ([proller](https://github.com/proller)) -- Updated `simdjson`. Fixed the issue that some invalid JSONs with zero bytes successfully parse. [\#5938](https://github.com/ClickHouse/ClickHouse/pull/5938) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix shutdown of SystemLogs [\#5802](https://github.com/ClickHouse/ClickHouse/pull/5802) ([Anton Popov](https://github.com/CurtizJ)) -- Fix hanging when condition in invalidate\_query depends on a dictionary. [\#6011](https://github.com/ClickHouse/ClickHouse/pull/6011) ([Vitaly Baranov](https://github.com/vitlibar)) - -#### Improvement {#improvement-6} - -- Allow unresolvable addresses in cluster configuration. They will be considered unavailable and tried to resolve at every connection attempt. This is especially useful for Kubernetes. This fixes [\#5714](https://github.com/ClickHouse/ClickHouse/issues/5714) [\#5924](https://github.com/ClickHouse/ClickHouse/pull/5924) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Close idle TCP connections (with one hour timeout by default). This is especially important for large clusters with multiple distributed tables on every server, because every server can possibly keep a connection pool to every other server, and after peak query concurrency, connections will stall. This fixes [\#5879](https://github.com/ClickHouse/ClickHouse/issues/5879) [\#5880](https://github.com/ClickHouse/ClickHouse/pull/5880) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Better quality of `topK` function. Changed the SavingSpace set behavior to remove the last element if the new element have a bigger weight. [\#5833](https://github.com/ClickHouse/ClickHouse/issues/5833) [\#5850](https://github.com/ClickHouse/ClickHouse/pull/5850) ([Guillaume Tassery](https://github.com/YiuRULE)) -- URL functions to work with domains now can work for incomplete URLs without scheme [\#5725](https://github.com/ClickHouse/ClickHouse/pull/5725) ([alesapin](https://github.com/alesapin)) -- Checksums added to the `system.parts_columns` table. [\#5874](https://github.com/ClickHouse/ClickHouse/pull/5874) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)) -- Added `Enum` data type as a synonim for `Enum8` or `Enum16`. [\#5886](https://github.com/ClickHouse/ClickHouse/pull/5886) ([dimarub2000](https://github.com/dimarub2000)) -- Full bit transpose variant for `T64` codec. Could lead to better compression with `zstd`. [\#5742](https://github.com/ClickHouse/ClickHouse/pull/5742) ([Artem Zuikov](https://github.com/4ertus2)) -- Condition on `startsWith` function now can uses primary key. This fixes [\#5310](https://github.com/ClickHouse/ClickHouse/issues/5310) and [\#5882](https://github.com/ClickHouse/ClickHouse/issues/5882) [\#5919](https://github.com/ClickHouse/ClickHouse/pull/5919) ([dimarub2000](https://github.com/dimarub2000)) -- Allow to use `clickhouse-copier` with cross-replication cluster topology by permitting empty database name. [\#5745](https://github.com/ClickHouse/ClickHouse/pull/5745) ([nvartolomei](https://github.com/nvartolomei)) -- Use `UTC` as default timezone on a system without `tzdata` (e.g. bare Docker container). Before this patch, error message `Could not determine local time zone` was printed and server or client refused to start. [\#5827](https://github.com/ClickHouse/ClickHouse/pull/5827) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Returned back support for floating point argument in function `quantileTiming` for backward compatibility. [\#5911](https://github.com/ClickHouse/ClickHouse/pull/5911) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Show which table is missing column in error messages. [\#5768](https://github.com/ClickHouse/ClickHouse/pull/5768) ([Ivan](https://github.com/abyss7)) -- Disallow run query with same query\_id by various users [\#5430](https://github.com/ClickHouse/ClickHouse/pull/5430) ([proller](https://github.com/proller)) -- More robust code for sending metrics to Graphite. It will work even during long multiple `RENAME TABLE` operation. [\#5875](https://github.com/ClickHouse/ClickHouse/pull/5875) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- More informative error messages will be displayed when ThreadPool cannot schedule a task for execution. This fixes [\#5305](https://github.com/ClickHouse/ClickHouse/issues/5305) [\#5801](https://github.com/ClickHouse/ClickHouse/pull/5801) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Inverting ngramSearch to be more intuitive [\#5807](https://github.com/ClickHouse/ClickHouse/pull/5807) ([Danila Kutenin](https://github.com/danlark1)) -- Add user parsing in HDFS engine builder [\#5946](https://github.com/ClickHouse/ClickHouse/pull/5946) ([akonyaev90](https://github.com/akonyaev90)) -- Update default value of `max_ast_elements parameter` [\#5933](https://github.com/ClickHouse/ClickHouse/pull/5933) ([Artem Konovalov](https://github.com/izebit)) -- Added a notion of obsolete settings. The obsolete setting `allow_experimental_low_cardinality_type` can be used with no effect. [0f15c01c6802f7ce1a1494c12c846be8c98944cd](https://github.com/ClickHouse/ClickHouse/commit/0f15c01c6802f7ce1a1494c12c846be8c98944cd) [Alexey Milovidov](https://github.com/alexey-milovidov) - -#### Performance Improvement {#performance-improvement-4} - -- Increase number of streams to SELECT from Merge table for more uniform distribution of threads. Added setting `max_streams_multiplier_for_merge_tables`. This fixes [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5915](https://github.com/ClickHouse/ClickHouse/pull/5915) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-7} - -- Add a backward compatibility test for client-server interaction with different versions of clickhouse. [\#5868](https://github.com/ClickHouse/ClickHouse/pull/5868) ([alesapin](https://github.com/alesapin)) -- Test coverage information in every commit and pull request. [\#5896](https://github.com/ClickHouse/ClickHouse/pull/5896) ([alesapin](https://github.com/alesapin)) -- Cooperate with address sanitizer to support our custom allocators (`Arena` and `ArenaWithFreeLists`) for better debugging of “use-after-free” errors. [\#5728](https://github.com/ClickHouse/ClickHouse/pull/5728) ([akuzm](https://github.com/akuzm)) -- Switch to [LLVM libunwind implementation](https://github.com/llvm-mirror/libunwind) for C++ exception handling and for stack traces printing [\#4828](https://github.com/ClickHouse/ClickHouse/pull/4828) ([Nikita Lapkov](https://github.com/laplab)) -- Add two more warnings from -Weverything [\#5923](https://github.com/ClickHouse/ClickHouse/pull/5923) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow to build ClickHouse with Memory Sanitizer. [\#3949](https://github.com/ClickHouse/ClickHouse/pull/3949) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed ubsan report about `bitTest` function in fuzz test. [\#5943](https://github.com/ClickHouse/ClickHouse/pull/5943) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Docker: added possibility to init a ClickHouse instance which requires authentication. [\#5727](https://github.com/ClickHouse/ClickHouse/pull/5727) ([Korviakov Andrey](https://github.com/shurshun)) -- Update librdkafka to version 1.1.0 [\#5872](https://github.com/ClickHouse/ClickHouse/pull/5872) ([Ivan](https://github.com/abyss7)) -- Add global timeout for integration tests and disable some of them in tests code. [\#5741](https://github.com/ClickHouse/ClickHouse/pull/5741) ([alesapin](https://github.com/alesapin)) -- Fix some ThreadSanitizer failures. [\#5854](https://github.com/ClickHouse/ClickHouse/pull/5854) ([akuzm](https://github.com/akuzm)) -- The `--no-undefined` option forces the linker to check all external names for existence while linking. It’s very useful to track real dependencies between libraries in the split build mode. [\#5855](https://github.com/ClickHouse/ClickHouse/pull/5855) ([Ivan](https://github.com/abyss7)) -- Added performance test for [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5914](https://github.com/ClickHouse/ClickHouse/pull/5914) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed compatibility with gcc-7. [\#5840](https://github.com/ClickHouse/ClickHouse/pull/5840) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added support for gcc-9. This fixes [\#5717](https://github.com/ClickHouse/ClickHouse/issues/5717) [\#5774](https://github.com/ClickHouse/ClickHouse/pull/5774) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error when libunwind can be linked incorrectly. [\#5948](https://github.com/ClickHouse/ClickHouse/pull/5948) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a few warnings found by PVS-Studio. [\#5921](https://github.com/ClickHouse/ClickHouse/pull/5921) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added initial support for `clang-tidy` static analyzer. [\#5806](https://github.com/ClickHouse/ClickHouse/pull/5806) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Convert BSD/Linux endian macros( ‘be64toh’ and ‘htobe64’) to the Mac OS X equivalents [\#5785](https://github.com/ClickHouse/ClickHouse/pull/5785) ([Fu Chen](https://github.com/fredchenbj)) -- Improved integration tests guide. [\#5796](https://github.com/ClickHouse/ClickHouse/pull/5796) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Fixing build at macosx + gcc9 [\#5822](https://github.com/ClickHouse/ClickHouse/pull/5822) ([filimonov](https://github.com/filimonov)) -- Fix a hard-to-spot typo: aggreAGte -\> aggregate. [\#5753](https://github.com/ClickHouse/ClickHouse/pull/5753) ([akuzm](https://github.com/akuzm)) -- Fix freebsd build [\#5760](https://github.com/ClickHouse/ClickHouse/pull/5760) ([proller](https://github.com/proller)) -- Add link to experimental YouTube channel to website [\#5845](https://github.com/ClickHouse/ClickHouse/pull/5845) ([Ivan Blinkov](https://github.com/blinkov)) -- CMake: add option for coverage flags: WITH\_COVERAGE [\#5776](https://github.com/ClickHouse/ClickHouse/pull/5776) ([proller](https://github.com/proller)) -- Fix initial size of some inline PODArray’s. [\#5787](https://github.com/ClickHouse/ClickHouse/pull/5787) ([akuzm](https://github.com/akuzm)) -- clickhouse-server.postinst: fix os detection for centos 6 [\#5788](https://github.com/ClickHouse/ClickHouse/pull/5788) ([proller](https://github.com/proller)) -- Added Arch linux package generation. [\#5719](https://github.com/ClickHouse/ClickHouse/pull/5719) ([Vladimir Chebotarev](https://github.com/excitoon)) -- Split Common/config.h by libs (dbms) [\#5715](https://github.com/ClickHouse/ClickHouse/pull/5715) ([proller](https://github.com/proller)) -- Fixes for “Arcadia” build platform [\#5795](https://github.com/ClickHouse/ClickHouse/pull/5795) ([proller](https://github.com/proller)) -- Fixes for unconventional build (gcc9, no submodules) [\#5792](https://github.com/ClickHouse/ClickHouse/pull/5792) ([proller](https://github.com/proller)) -- Require explicit type in unalignedStore because it was proven to be bug-prone [\#5791](https://github.com/ClickHouse/ClickHouse/pull/5791) ([akuzm](https://github.com/akuzm)) -- Fixes MacOS build [\#5830](https://github.com/ClickHouse/ClickHouse/pull/5830) ([filimonov](https://github.com/filimonov)) -- Performance test concerning the new JIT feature with bigger dataset, as requested here [\#5263](https://github.com/ClickHouse/ClickHouse/issues/5263) [\#5887](https://github.com/ClickHouse/ClickHouse/pull/5887) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Run stateful tests in stress test [12693e568722f11e19859742f56428455501fd2a](https://github.com/ClickHouse/ClickHouse/commit/12693e568722f11e19859742f56428455501fd2a) ([alesapin](https://github.com/alesapin)) - -#### Backward Incompatible Change {#backward-incompatible-change-7} - -- `Kafka` is broken in this version. -- Enable `adaptive_index_granularity` = 10MB by default for new `MergeTree` tables. If you created new MergeTree tables on version 19.11+, downgrade to versions prior to 19.6 will be impossible. [\#5628](https://github.com/ClickHouse/ClickHouse/pull/5628) ([alesapin](https://github.com/alesapin)) -- Removed obsolete undocumented embedded dictionaries that were used by Yandex.Metrica. The functions `OSIn`, `SEIn`, `OSToRoot`, `SEToRoot`, `OSHierarchy`, `SEHierarchy` are no longer available. If you are using these functions, write email to clickhouse-feedback@yandex-team.com. Note: at the last moment we decided to keep these functions for a while. [\#5780](https://github.com/ClickHouse/ClickHouse/pull/5780) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -## ClickHouse release 19.10 {#clickhouse-release-19-10} - -### ClickHouse release 19.10.1.5, 2019-07-12 {#clickhouse-release-19-10-1-5-2019-07-12} - -#### New Feature {#new-feature-7} - -- Add new column codec: `T64`. Made for (U)IntX/EnumX/Data(Time)/DecimalX columns. It should be good for columns with constant or small range values. Codec itself allows enlarge or shrink data type without re-compression. [\#5557](https://github.com/ClickHouse/ClickHouse/pull/5557) ([Artem Zuikov](https://github.com/4ertus2)) -- Add database engine `MySQL` that allow to view all the tables in remote MySQL server [\#5599](https://github.com/ClickHouse/ClickHouse/pull/5599) ([Winter Zhang](https://github.com/zhang2014)) -- `bitmapContains` implementation. It’s 2x faster than `bitmapHasAny` if the second bitmap contains one element. [\#5535](https://github.com/ClickHouse/ClickHouse/pull/5535) ([Zhichang Yu](https://github.com/yuzhichang)) -- Support for `crc32` function (with behaviour exactly as in MySQL or PHP). Do not use it if you need a hash function. [\#5661](https://github.com/ClickHouse/ClickHouse/pull/5661) ([Remen Ivan](https://github.com/BHYCHIK)) -- Implemented `SYSTEM START/STOP DISTRIBUTED SENDS` queries to control asynchronous inserts into `Distributed` tables. [\#4935](https://github.com/ClickHouse/ClickHouse/pull/4935) ([Winter Zhang](https://github.com/zhang2014)) - -#### Bug Fix {#bug-fix-22} - -- Ignore query execution limits and max parts size for merge limits while executing mutations. [\#5659](https://github.com/ClickHouse/ClickHouse/pull/5659) ([Anton Popov](https://github.com/CurtizJ)) -- Fix bug which may lead to deduplication of normal blocks (extremely rare) and insertion of duplicate blocks (more often). [\#5549](https://github.com/ClickHouse/ClickHouse/pull/5549) ([alesapin](https://github.com/alesapin)) -- Fix of function `arrayEnumerateUniqRanked` for arguments with empty arrays [\#5559](https://github.com/ClickHouse/ClickHouse/pull/5559) ([proller](https://github.com/proller)) -- Don’t subscribe to Kafka topics without intent to poll any messages. [\#5698](https://github.com/ClickHouse/ClickHouse/pull/5698) ([Ivan](https://github.com/abyss7)) -- Make setting `join_use_nulls` get no effect for types that cannot be inside Nullable [\#5700](https://github.com/ClickHouse/ClickHouse/pull/5700) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixed `Incorrect size of index granularity` errors [\#5720](https://github.com/ClickHouse/ClickHouse/pull/5720) ([coraxster](https://github.com/coraxster)) -- Fix Float to Decimal convert overflow [\#5607](https://github.com/ClickHouse/ClickHouse/pull/5607) ([coraxster](https://github.com/coraxster)) -- Flush buffer when `WriteBufferFromHDFS`’s destructor is called. This fixes writing into `HDFS`. [\#5684](https://github.com/ClickHouse/ClickHouse/pull/5684) ([Xindong Peng](https://github.com/eejoin)) - -#### Improvement {#improvement-7} - -- Treat empty cells in `CSV` as default values when the setting `input_format_defaults_for_omitted_fields` is enabled. [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) -- Non-blocking loading of external dictionaries. [\#5567](https://github.com/ClickHouse/ClickHouse/pull/5567) ([Vitaly Baranov](https://github.com/vitlibar)) -- Network timeouts can be dynamically changed for already established connections according to the settings. [\#4558](https://github.com/ClickHouse/ClickHouse/pull/4558) ([Konstantin Podshumok](https://github.com/podshumok)) -- Using “public\_suffix\_list” for functions `firstSignificantSubdomain`, `cutToFirstSignificantSubdomain`. It’s using a perfect hash table generated by `gperf` with a list generated from the file: https://publicsuffix.org/list/public\_suffix\_list.dat. (for example, now we recognize the domain `ac.uk` as non-significant). [\#5030](https://github.com/ClickHouse/ClickHouse/pull/5030) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Adopted `IPv6` data type in system tables; unified client info columns in `system.processes` and `system.query_log` [\#5640](https://github.com/ClickHouse/ClickHouse/pull/5640) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Using sessions for connections with MySQL compatibility protocol. \#5476 [\#5646](https://github.com/ClickHouse/ClickHouse/pull/5646) ([Yuriy Baranov](https://github.com/yurriy)) -- Support more `ALTER` queries `ON CLUSTER`. [\#5593](https://github.com/ClickHouse/ClickHouse/pull/5593) [\#5613](https://github.com/ClickHouse/ClickHouse/pull/5613) ([sundyli](https://github.com/sundy-li)) -- Support `` section in `clickhouse-local` config file. [\#5540](https://github.com/ClickHouse/ClickHouse/pull/5540) ([proller](https://github.com/proller)) -- Allow run query with `remote` table function in `clickhouse-local` [\#5627](https://github.com/ClickHouse/ClickHouse/pull/5627) ([proller](https://github.com/proller)) - -#### Performance Improvement {#performance-improvement-5} - -- Add the possibility to write the final mark at the end of MergeTree columns. It allows to avoid useless reads for keys that are out of table data range. It is enabled only if adaptive index granularity is in use. [\#5624](https://github.com/ClickHouse/ClickHouse/pull/5624) ([alesapin](https://github.com/alesapin)) -- Improved performance of MergeTree tables on very slow filesystems by reducing number of `stat` syscalls. [\#5648](https://github.com/ClickHouse/ClickHouse/pull/5648) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed performance degradation in reading from MergeTree tables that was introduced in version 19.6. Fixes \#5631. [\#5633](https://github.com/ClickHouse/ClickHouse/pull/5633) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-8} - -- Implemented `TestKeeper` as an implementation of ZooKeeper interface used for testing [\#5643](https://github.com/ClickHouse/ClickHouse/pull/5643) ([alexey-milovidov](https://github.com/alexey-milovidov)) ([levushkin aleksej](https://github.com/alexey-milovidov)) -- From now on `.sql` tests can be run isolated by server, in parallel, with random database. It allows to run them faster, add new tests with custom server configurations, and be sure that different tests doesn’t affect each other. [\#5554](https://github.com/ClickHouse/ClickHouse/pull/5554) ([Ivan](https://github.com/abyss7)) -- Remove `` and `` from performance tests [\#5672](https://github.com/ClickHouse/ClickHouse/pull/5672) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixed “select\_format” performance test for `Pretty` formats [\#5642](https://github.com/ClickHouse/ClickHouse/pull/5642) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -## ClickHouse release 19.9 {#clickhouse-release-19-9} - -### ClickHouse release 19.9.3.31, 2019-07-05 {#clickhouse-release-19-9-3-31-2019-07-05} - -#### Bug Fix {#bug-fix-23} - -- Fix segfault in Delta codec which affects columns with values less than 32 bits size. The bug led to random memory corruption. [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([alesapin](https://github.com/alesapin)) -- Fix rare bug in checking of part with LowCardinality column. [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([alesapin](https://github.com/alesapin)) -- Fix segfault in TTL merge with non-physical columns in block. [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([Anton Popov](https://github.com/CurtizJ)) -- Fix potential infinite sleeping of low-priority queries. [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix how ClickHouse determines default time zone as UCT instead of UTC. [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug about executing distributed DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER queries on follower replica before leader replica. Now they will be executed directly on leader replica. [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([alesapin](https://github.com/alesapin)) -- Fix race condition, which cause that some queries may not appear in query\_log instantly after SYSTEM FLUSH LOGS query. [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([Anton Popov](https://github.com/CurtizJ)) -- Added missing support for constant arguments to `evalMLModel` function. [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.9.2.4, 2019-06-24 {#clickhouse-release-19-9-2-4-2019-06-24} - -#### New Feature {#new-feature-8} - -- Print information about frozen parts in `system.parts` table. [\#5471](https://github.com/ClickHouse/ClickHouse/pull/5471) ([proller](https://github.com/proller)) -- Ask client password on clickhouse-client start on tty if not set in arguments [\#5092](https://github.com/ClickHouse/ClickHouse/pull/5092) ([proller](https://github.com/proller)) -- Implement `dictGet` and `dictGetOrDefault` functions for Decimal types. [\#5394](https://github.com/ClickHouse/ClickHouse/pull/5394) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Improvement {#improvement-8} - -- Debian init: Add service stop timeout [\#5522](https://github.com/ClickHouse/ClickHouse/pull/5522) ([proller](https://github.com/proller)) -- Add setting forbidden by default to create table with suspicious types for LowCardinality [\#5448](https://github.com/ClickHouse/ClickHouse/pull/5448) ([Olga Khvostikova](https://github.com/stavrolia)) -- Regression functions return model weights when not used as State in function `evalMLMethod`. [\#5411](https://github.com/ClickHouse/ClickHouse/pull/5411) ([Quid37](https://github.com/Quid37)) -- Rename and improve regression methods. [\#5492](https://github.com/ClickHouse/ClickHouse/pull/5492) ([Quid37](https://github.com/Quid37)) -- Clearer interfaces of string searchers. [\#5586](https://github.com/ClickHouse/ClickHouse/pull/5586) ([Danila Kutenin](https://github.com/danlark1)) - -#### Bug Fix {#bug-fix-24} - -- Fix potential data loss in Kafka [\#5445](https://github.com/ClickHouse/ClickHouse/pull/5445) ([Ivan](https://github.com/abyss7)) -- Fix potential infinite loop in `PrettySpace` format when called with zero columns [\#5560](https://github.com/ClickHouse/ClickHouse/pull/5560) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixed UInt32 overflow bug in linear models. Allow eval ML model for non-const model argument. [\#5516](https://github.com/ClickHouse/ClickHouse/pull/5516) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- `ALTER TABLE ... DROP INDEX IF EXISTS ...` should not raise an exception if provided index does not exist [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Gleb Novikov](https://github.com/NanoBjorn)) -- Fix segfault with `bitmapHasAny` in scalar subquery [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Yu](https://github.com/yuzhichang)) -- Fixed error when replication connection pool doesn’t retry to resolve host, even when DNS cache was dropped. [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([alesapin](https://github.com/alesapin)) -- Fixed `ALTER ... MODIFY TTL` on ReplicatedMergeTree. [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539) ([Anton Popov](https://github.com/CurtizJ)) -- Fix INSERT into Distributed table with MATERIALIZED column [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) -- Fix bad alloc when truncate Join storage [\#5437](https://github.com/ClickHouse/ClickHouse/pull/5437) ([TCeason](https://github.com/TCeason)) -- In recent versions of package tzdata some of files are symlinks now. The current mechanism for detecting default timezone gets broken and gives wrong names for some timezones. Now at least we force the timezone name to the contents of TZ if provided. [\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Ivan](https://github.com/abyss7)) -- Fix some extremely rare cases with MultiVolnitsky searcher when the constant needles in sum are at least 16KB long. The algorithm missed or overwrote the previous results which can lead to the incorrect result of `multiSearchAny`. [\#5588](https://github.com/ClickHouse/ClickHouse/pull/5588) ([Danila Kutenin](https://github.com/danlark1)) -- Fix the issue when settings for ExternalData requests couldn’t use ClickHouse settings. Also, for now, settings `date_time_input_format` and `low_cardinality_allow_in_native_format` cannot be used because of the ambiguity of names (in external data it can be interpreted as table format and in the query it can be a setting). [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila Kutenin](https://github.com/danlark1)) -- Fix bug when parts were removed only from FS without dropping them from Zookeeper. [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([alesapin](https://github.com/alesapin)) -- Remove debug logging from MySQL protocol [\#5478](https://github.com/ClickHouse/ClickHouse/pull/5478) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Skip ZNONODE during DDL query processing [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) -- Fix mix `UNION ALL` result column type. There were cases with inconsistent data and column types of resulting columns. [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) -- Throw an exception on wrong integers in `dictGetT` functions instead of crash. [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix wrong element\_count and load\_factor for hashed dictionary in `system.dictionaries` table. [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-9} - -- Fixed build without `Brotli` HTTP compression support (`ENABLE_BROTLI=OFF` cmake variable). [\#5521](https://github.com/ClickHouse/ClickHouse/pull/5521) ([Anton Yuzhaninov](https://github.com/citrin)) -- Include roaring.h as roaring/roaring.h [\#5523](https://github.com/ClickHouse/ClickHouse/pull/5523) ([Orivej Desh](https://github.com/orivej)) -- Fix gcc9 warnings in hyperscan (\#line directive is evil!) [\#5546](https://github.com/ClickHouse/ClickHouse/pull/5546) ([Danila Kutenin](https://github.com/danlark1)) -- Fix all warnings when compiling with gcc-9. Fix some contrib issues. Fix gcc9 ICE and submit it to bugzilla. [\#5498](https://github.com/ClickHouse/ClickHouse/pull/5498) ([Danila Kutenin](https://github.com/danlark1)) -- Fixed linking with lld [\#5477](https://github.com/ClickHouse/ClickHouse/pull/5477) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Remove unused specializations in dictionaries [\#5452](https://github.com/ClickHouse/ClickHouse/pull/5452) ([Artem Zuikov](https://github.com/4ertus2)) -- Improvement performance tests for formatting and parsing tables for different types of files [\#5497](https://github.com/ClickHouse/ClickHouse/pull/5497) ([Olga Khvostikova](https://github.com/stavrolia)) -- Fixes for parallel test run [\#5506](https://github.com/ClickHouse/ClickHouse/pull/5506) ([proller](https://github.com/proller)) -- Docker: use configs from clickhouse-test [\#5531](https://github.com/ClickHouse/ClickHouse/pull/5531) ([proller](https://github.com/proller)) -- Fix compile for FreeBSD [\#5447](https://github.com/ClickHouse/ClickHouse/pull/5447) ([proller](https://github.com/proller)) -- Upgrade boost to 1.70 [\#5570](https://github.com/ClickHouse/ClickHouse/pull/5570) ([proller](https://github.com/proller)) -- Fix build clickhouse as submodule [\#5574](https://github.com/ClickHouse/ClickHouse/pull/5574) ([proller](https://github.com/proller)) -- Improve JSONExtract performance tests [\#5444](https://github.com/ClickHouse/ClickHouse/pull/5444) ([Vitaly Baranov](https://github.com/vitlibar)) - -## ClickHouse release 19.8 {#clickhouse-release-19-8} - -### ClickHouse release 19.8.3.8, 2019-06-11 {#clickhouse-release-19-8-3-8-2019-06-11} - -#### New Features {#new-features} - -- Added functions to work with JSON [\#4686](https://github.com/ClickHouse/ClickHouse/pull/4686) ([hcz](https://github.com/hczhcz)) [\#5124](https://github.com/ClickHouse/ClickHouse/pull/5124). ([Vitaly Baranov](https://github.com/vitlibar)) -- Add a function basename, with a similar behaviour to a basename function, which exists in a lot of languages (`os.path.basename` in python, `basename` in PHP, etc…). Work with both an UNIX-like path or a Windows path. [\#5136](https://github.com/ClickHouse/ClickHouse/pull/5136) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Added `LIMIT n, m BY` or `LIMIT m OFFSET n BY` syntax to set offset of n for LIMIT BY clause. [\#5138](https://github.com/ClickHouse/ClickHouse/pull/5138) ([Anton Popov](https://github.com/CurtizJ)) -- Added new data type `SimpleAggregateFunction`, which allows to have columns with light aggregation in an `AggregatingMergeTree`. This can only be used with simple functions like `any`, `anyLast`, `sum`, `min`, `max`. [\#4629](https://github.com/ClickHouse/ClickHouse/pull/4629) ([Boris Granveaud](https://github.com/bgranvea)) -- Added support for non-constant arguments in function `ngramDistance` [\#5198](https://github.com/ClickHouse/ClickHouse/pull/5198) ([Danila Kutenin](https://github.com/danlark1)) -- Added functions `skewPop`, `skewSamp`, `kurtPop` and `kurtSamp` to compute for sequence skewness, sample skewness, kurtosis and sample kurtosis respectively. [\#5200](https://github.com/ClickHouse/ClickHouse/pull/5200) ([hcz](https://github.com/hczhcz)) -- Support rename operation for `MaterializeView` storage. [\#5209](https://github.com/ClickHouse/ClickHouse/pull/5209) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Added server which allows connecting to ClickHouse using MySQL client. [\#4715](https://github.com/ClickHouse/ClickHouse/pull/4715) ([Yuriy Baranov](https://github.com/yurriy)) -- Add `toDecimal*OrZero` and `toDecimal*OrNull` functions. [\#5291](https://github.com/ClickHouse/ClickHouse/pull/5291) ([Artem Zuikov](https://github.com/4ertus2)) -- Support Decimal types in functions: `quantile`, `quantiles`, `median`, `quantileExactWeighted`, `quantilesExactWeighted`, medianExactWeighted. [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) -- Added `toValidUTF8` function, which replaces all invalid UTF-8 characters by replacement character � (U+FFFD). [\#5322](https://github.com/ClickHouse/ClickHouse/pull/5322) ([Danila Kutenin](https://github.com/danlark1)) -- Added `format` function. Formatting constant pattern (simplified Python format pattern) with the strings listed in the arguments. [\#5330](https://github.com/ClickHouse/ClickHouse/pull/5330) ([Danila Kutenin](https://github.com/danlark1)) -- Added `system.detached_parts` table containing information about detached parts of `MergeTree` tables. [\#5353](https://github.com/ClickHouse/ClickHouse/pull/5353) ([akuzm](https://github.com/akuzm)) -- Added `ngramSearch` function to calculate the non-symmetric difference between needle and haystack. [\#5418](https://github.com/ClickHouse/ClickHouse/pull/5418)[\#5422](https://github.com/ClickHouse/ClickHouse/pull/5422) ([Danila Kutenin](https://github.com/danlark1)) -- Implementation of basic machine learning methods (stochastic linear regression and logistic regression) using aggregate functions interface. Has different strategies for updating model weights (simple gradient descent, momentum method, Nesterov method). Also supports mini-batches of custom size. [\#4943](https://github.com/ClickHouse/ClickHouse/pull/4943) ([Quid37](https://github.com/Quid37)) -- Implementation of `geohashEncode` and `geohashDecode` functions. [\#5003](https://github.com/ClickHouse/ClickHouse/pull/5003) ([Vasily Nemkov](https://github.com/Enmk)) -- Added aggregate function `timeSeriesGroupSum`, which can aggregate different time series that sample timestamp not alignment. It will use linear interpolation between two sample timestamp and then sum time-series together. Added aggregate function `timeSeriesGroupRateSum`, which calculates the rate of time-series and then sum rates together. [\#4542](https://github.com/ClickHouse/ClickHouse/pull/4542) ([Yangkuan Liu](https://github.com/LiuYangkuan)) -- Added functions `IPv4CIDRtoIPv4Range` and `IPv6CIDRtoIPv6Range` to calculate the lower and higher bounds for an IP in the subnet using a CIDR. [\#5095](https://github.com/ClickHouse/ClickHouse/pull/5095) ([Guillaume Tassery](https://github.com/YiuRULE)) -- Add a X-ClickHouse-Summary header when we send a query using HTTP with enabled setting `send_progress_in_http_headers`. Return the usual information of X-ClickHouse-Progress, with additional information like how many rows and bytes were inserted in the query. [\#5116](https://github.com/ClickHouse/ClickHouse/pull/5116) ([Guillaume Tassery](https://github.com/YiuRULE)) - -#### Improvements {#improvements} - -- Added `max_parts_in_total` setting for MergeTree family of tables (default: 100 000) that prevents unsafe specification of partition key \#5166. [\#5171](https://github.com/ClickHouse/ClickHouse/pull/5171) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `clickhouse-obfuscator`: derive seed for individual columns by combining initial seed with column name, not column position. This is intended to transform datasets with multiple related tables, so that tables will remain JOINable after transformation. [\#5178](https://github.com/ClickHouse/ClickHouse/pull/5178) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added functions `JSONExtractRaw`, `JSONExtractKeyAndValues`. Renamed functions `jsonExtract` to `JSONExtract`. When something goes wrong these functions return the correspondent values, not `NULL`. Modified function `JSONExtract`, now it gets the return type from its last parameter and doesn’t inject nullables. Implemented fallback to RapidJSON in case AVX2 instructions are not available. Simdjson library updated to a new version. [\#5235](https://github.com/ClickHouse/ClickHouse/pull/5235) ([Vitaly Baranov](https://github.com/vitlibar)) -- Now `if` and `multiIf` functions don’t rely on the condition’s `Nullable`, but rely on the branches for sql compatibility. [\#5238](https://github.com/ClickHouse/ClickHouse/pull/5238) ([Jian Wu](https://github.com/janplus)) -- `In` predicate now generates `Null` result from `Null` input like the `Equal` function. [\#5152](https://github.com/ClickHouse/ClickHouse/pull/5152) ([Jian Wu](https://github.com/janplus)) -- Check the time limit every (flush\_interval / poll\_timeout) number of rows from Kafka. This allows to break the reading from Kafka consumer more frequently and to check the time limits for the top-level streams [\#5249](https://github.com/ClickHouse/ClickHouse/pull/5249) ([Ivan](https://github.com/abyss7)) -- Link rdkafka with bundled SASL. It should allow to use SASL SCRAM authentication [\#5253](https://github.com/ClickHouse/ClickHouse/pull/5253) ([Ivan](https://github.com/abyss7)) -- Batched version of RowRefList for ALL JOINS. [\#5267](https://github.com/ClickHouse/ClickHouse/pull/5267) ([Artem Zuikov](https://github.com/4ertus2)) -- clickhouse-server: more informative listen error messages. [\#5268](https://github.com/ClickHouse/ClickHouse/pull/5268) ([proller](https://github.com/proller)) -- Support dictionaries in clickhouse-copier for functions in `` [\#5270](https://github.com/ClickHouse/ClickHouse/pull/5270) ([proller](https://github.com/proller)) -- Add new setting `kafka_commit_every_batch` to regulate Kafka committing policy. - It allows to set commit mode: after every batch of messages is handled, or after the whole block is written to the storage. It’s a trade-off between losing some messages or reading them twice in some extreme situations. [\#5308](https://github.com/ClickHouse/ClickHouse/pull/5308) ([Ivan](https://github.com/abyss7)) -- Make `windowFunnel` support other Unsigned Integer Types. [\#5320](https://github.com/ClickHouse/ClickHouse/pull/5320) ([sundyli](https://github.com/sundy-li)) -- Allow to shadow virtual column `_table` in Merge engine. [\#5325](https://github.com/ClickHouse/ClickHouse/pull/5325) ([Ivan](https://github.com/abyss7)) -- Make `sequenceMatch` aggregate functions support other unsigned Integer types [\#5339](https://github.com/ClickHouse/ClickHouse/pull/5339) ([sundyli](https://github.com/sundy-li)) -- Better error messages if checksum mismatch is most likely caused by hardware failures. [\#5355](https://github.com/ClickHouse/ClickHouse/pull/5355) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Check that underlying tables support sampling for `StorageMerge` [\#5366](https://github.com/ClickHouse/ClickHouse/pull/5366) ([Ivan](https://github.com/abyss7)) -- Сlose MySQL connections after their usage in external dictionaries. It is related to issue \#893. [\#5395](https://github.com/ClickHouse/ClickHouse/pull/5395) ([Clément Rodriguez](https://github.com/clemrodriguez)) -- Improvements of MySQL Wire Protocol. Changed name of format to MySQLWire. Using RAII for calling RSA\_free. Disabling SSL if context cannot be created. [\#5419](https://github.com/ClickHouse/ClickHouse/pull/5419) ([Yuriy Baranov](https://github.com/yurriy)) -- clickhouse-client: allow to run with unaccessable history file (read-only, no disk space, file is directory, …). [\#5431](https://github.com/ClickHouse/ClickHouse/pull/5431) ([proller](https://github.com/proller)) -- Respect query settings in asynchronous INSERTs into Distributed tables. [\#4936](https://github.com/ClickHouse/ClickHouse/pull/4936) ([TCeason](https://github.com/TCeason)) -- Renamed functions `leastSqr` to `simpleLinearRegression`, `LinearRegression` to `linearRegression`, `LogisticRegression` to `logisticRegression`. [\#5391](https://github.com/ClickHouse/ClickHouse/pull/5391) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Performance Improvements {#performance-improvements} - -- Parallelize processing of parts of non-replicated MergeTree tables in ALTER MODIFY query. [\#4639](https://github.com/ClickHouse/ClickHouse/pull/4639) ([Ivan Kush](https://github.com/IvanKush)) -- Optimizations in regular expressions extraction. [\#5193](https://github.com/ClickHouse/ClickHouse/pull/5193) [\#5191](https://github.com/ClickHouse/ClickHouse/pull/5191) ([Danila Kutenin](https://github.com/danlark1)) -- Do not add right join key column to join result if it’s used only in join on section. [\#5260](https://github.com/ClickHouse/ClickHouse/pull/5260) ([Artem Zuikov](https://github.com/4ertus2)) -- Freeze the Kafka buffer after first empty response. It avoids multiple invokations of `ReadBuffer::next()` for empty result in some row-parsing streams. [\#5283](https://github.com/ClickHouse/ClickHouse/pull/5283) ([Ivan](https://github.com/abyss7)) -- `concat` function optimization for multiple arguments. [\#5357](https://github.com/ClickHouse/ClickHouse/pull/5357) ([Danila Kutenin](https://github.com/danlark1)) -- Query optimisation. Allow push down IN statement while rewriting commа/cross join into inner one. [\#5396](https://github.com/ClickHouse/ClickHouse/pull/5396) ([Artem Zuikov](https://github.com/4ertus2)) -- Upgrade our LZ4 implementation with reference one to have faster decompression. [\#5070](https://github.com/ClickHouse/ClickHouse/pull/5070) ([Danila Kutenin](https://github.com/danlark1)) -- Implemented MSD radix sort (based on kxsort), and partial sorting. [\#5129](https://github.com/ClickHouse/ClickHouse/pull/5129) ([Evgenii Pravda](https://github.com/kvinty)) - -#### Bug Fixes {#bug-fixes} - -- Fix push require columns with join [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed bug, when ClickHouse is run by systemd, the command `sudo service clickhouse-server forcerestart` was not working as expected. [\#5204](https://github.com/ClickHouse/ClickHouse/pull/5204) ([proller](https://github.com/proller)) -- Fix http error codes in DataPartsExchange (interserver http server on 9009 port always returned code 200, even on errors). [\#5216](https://github.com/ClickHouse/ClickHouse/pull/5216) ([proller](https://github.com/proller)) -- Fix SimpleAggregateFunction for String longer than MAX\_SMALL\_STRING\_SIZE [\#5311](https://github.com/ClickHouse/ClickHouse/pull/5311) ([Azat Khuzhin](https://github.com/azat)) -- Fix error for `Decimal` to `Nullable(Decimal)` conversion in IN. Support other Decimal to Decimal conversions (including different scales). [\#5350](https://github.com/ClickHouse/ClickHouse/pull/5350) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed FPU clobbering in simdjson library that lead to wrong calculation of `uniqHLL` and `uniqCombined` aggregate function and math functions such as `log`. [\#5354](https://github.com/ClickHouse/ClickHouse/pull/5354) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed handling mixed const/nonconst cases in JSON functions. [\#5435](https://github.com/ClickHouse/ClickHouse/pull/5435) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix `retention` function. Now all conditions that satisfy in a row of data are added to the data state. [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) -- Fix result type for `quantileExact` with Decimals. [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Documentation {#documentation} - -- Translate documentation for `CollapsingMergeTree` to chinese. [\#5168](https://github.com/ClickHouse/ClickHouse/pull/5168) ([张风啸](https://github.com/AlexZFX)) -- Translate some documentation about table engines to chinese. - [\#5134](https://github.com/ClickHouse/ClickHouse/pull/5134) - [\#5328](https://github.com/ClickHouse/ClickHouse/pull/5328) - ([never lee](https://github.com/neverlee)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements} - -- Fix some sanitizer reports that show probable use-after-free.[\#5139](https://github.com/ClickHouse/ClickHouse/pull/5139) [\#5143](https://github.com/ClickHouse/ClickHouse/pull/5143) [\#5393](https://github.com/ClickHouse/ClickHouse/pull/5393) ([Ivan](https://github.com/abyss7)) -- Move performance tests out of separate directories for convenience. [\#5158](https://github.com/ClickHouse/ClickHouse/pull/5158) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix incorrect performance tests. [\#5255](https://github.com/ClickHouse/ClickHouse/pull/5255) ([alesapin](https://github.com/alesapin)) -- Added a tool to calculate checksums caused by bit flips to debug hardware issues. [\#5334](https://github.com/ClickHouse/ClickHouse/pull/5334) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Make runner script more usable. [\#5340](https://github.com/ClickHouse/ClickHouse/pull/5340)[\#5360](https://github.com/ClickHouse/ClickHouse/pull/5360) ([filimonov](https://github.com/filimonov)) -- Add small instruction how to write performance tests. [\#5408](https://github.com/ClickHouse/ClickHouse/pull/5408) ([alesapin](https://github.com/alesapin)) -- Add ability to make substitutions in create, fill and drop query in performance tests [\#5367](https://github.com/ClickHouse/ClickHouse/pull/5367) ([Olga Khvostikova](https://github.com/stavrolia)) - -## ClickHouse release 19.7 {#clickhouse-release-19-7} - -### ClickHouse release 19.7.5.29, 2019-07-05 {#clickhouse-release-19-7-5-29-2019-07-05} - -#### Bug Fix {#bug-fix-25} - -- Fix performance regression in some queries with JOIN. [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([Winter Zhang](https://github.com/zhang2014)) - -### ClickHouse release 19.7.5.27, 2019-06-09 {#clickhouse-release-19-7-5-27-2019-06-09} - -#### New features {#new-features-1} - -- Added bitmap related functions `bitmapHasAny` and `bitmapHasAll` analogous to `hasAny` and `hasAll` functions for arrays. [\#5279](https://github.com/ClickHouse/ClickHouse/pull/5279) ([Sergi Vladykin](https://github.com/svladykin)) - -#### Bug Fixes {#bug-fixes-1} - -- Fix segfault on `minmax` INDEX with Null value. [\#5246](https://github.com/ClickHouse/ClickHouse/pull/5246) ([Nikita Vasilev](https://github.com/nikvas0)) -- Mark all input columns in LIMIT BY as required output. It fixes ‘Not found column’ error in some distributed queries. [\#5407](https://github.com/ClickHouse/ClickHouse/pull/5407) ([Constantin S. Pan](https://github.com/kvap)) -- Fix “Column ‘0’ already exists” error in `SELECT .. PREWHERE` on column with DEFAULT [\#5397](https://github.com/ClickHouse/ClickHouse/pull/5397) ([proller](https://github.com/proller)) -- Fix `ALTER MODIFY TTL` query on `ReplicatedMergeTree`. [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539/commits) ([Anton Popov](https://github.com/CurtizJ)) -- Don’t crash the server when Kafka consumers have failed to start. [\#5285](https://github.com/ClickHouse/ClickHouse/pull/5285) ([Ivan](https://github.com/abyss7)) -- Fixed bitmap functions produce wrong result. [\#5359](https://github.com/ClickHouse/ClickHouse/pull/5359) ([Andy Yang](https://github.com/andyyzh)) -- Fix element\_count for hashed dictionary (do not include duplicates) [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) -- Use contents of environment variable TZ as the name for timezone. It helps to correctly detect default timezone in some cases.[\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([Ivan](https://github.com/abyss7)) -- Do not try to convert integers in `dictGetT` functions, because it doesn’t work correctly. Throw an exception instead. [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix settings in ExternalData HTTP request. [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila - Kutenin](https://github.com/danlark1)) -- Fix bug when parts were removed only from FS without dropping them from Zookeeper. [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([alesapin](https://github.com/alesapin)) -- Fix segmentation fault in `bitmapHasAny` function. [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([Zhichang Yu](https://github.com/yuzhichang)) -- Fixed error when replication connection pool doesn’t retry to resolve host, even when DNS cache was dropped. [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([alesapin](https://github.com/alesapin)) -- Fixed `DROP INDEX IF EXISTS` query. Now `ALTER TABLE ... DROP INDEX IF EXISTS ...` query doesn’t raise an exception if provided index does not exist. [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([Gleb Novikov](https://github.com/NanoBjorn)) -- Fix union all supertype column. There were cases with inconsistent data and column types of resulting columns. [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) -- Skip ZNONODE during DDL query processing. Before if another node removes the znode in task queue, the one that - did not process it, but already get list of children, will terminate the DDLWorker thread. [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) -- Fix INSERT into Distributed() table with MATERIALIZED column. [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) - -### ClickHouse release 19.7.3.9, 2019-05-30 {#clickhouse-release-19-7-3-9-2019-05-30} - -#### New Features {#new-features-2} - -- Allow to limit the range of a setting that can be specified by user. - These constraints can be set up in user settings profile. - [\#4931](https://github.com/ClickHouse/ClickHouse/pull/4931) ([Vitaly - Baranov](https://github.com/vitlibar)) -- Add a second version of the function `groupUniqArray` with an optional - `max_size` parameter that limits the size of the resulting array. This - behavior is similar to `groupArray(max_size)(x)` function. - [\#5026](https://github.com/ClickHouse/ClickHouse/pull/5026) ([Guillaume - Tassery](https://github.com/YiuRULE)) -- For TSVWithNames/CSVWithNames input file formats, column order can now be - determined from file header. This is controlled by - `input_format_with_names_use_header` parameter. - [\#5081](https://github.com/ClickHouse/ClickHouse/pull/5081) - ([Alexander](https://github.com/Akazz)) - -#### Bug Fixes {#bug-fixes-2} - -- Crash with uncompressed\_cache + JOIN during merge (\#5197) - [\#5133](https://github.com/ClickHouse/ClickHouse/pull/5133) ([Danila - Kutenin](https://github.com/danlark1)) -- Segmentation fault on a clickhouse-client query to system tables. \#5066 - [\#5127](https://github.com/ClickHouse/ClickHouse/pull/5127) - ([Ivan](https://github.com/abyss7)) -- Data loss on heavy load via KafkaEngine (\#4736) - [\#5080](https://github.com/ClickHouse/ClickHouse/pull/5080) - ([Ivan](https://github.com/abyss7)) -- Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts\_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Performance Improvements {#performance-improvements-1} - -- Use radix sort for sorting by single numeric column in `ORDER BY` without - `LIMIT`. [\#5106](https://github.com/ClickHouse/ClickHouse/pull/5106), - [\#4439](https://github.com/ClickHouse/ClickHouse/pull/4439) - ([Evgenii Pravda](https://github.com/kvinty), - [alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Documentation {#documentation-1} - -- Translate documentation for some table engines to Chinese. - [\#5107](https://github.com/ClickHouse/ClickHouse/pull/5107), - [\#5094](https://github.com/ClickHouse/ClickHouse/pull/5094), - [\#5087](https://github.com/ClickHouse/ClickHouse/pull/5087) - ([张风啸](https://github.com/AlexZFX)), - [\#5068](https://github.com/ClickHouse/ClickHouse/pull/5068) ([never - lee](https://github.com/neverlee)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-1} - -- Print UTF-8 characters properly in `clickhouse-test`. - [\#5084](https://github.com/ClickHouse/ClickHouse/pull/5084) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add command line parameter for clickhouse-client to always load suggestion - data. [\#5102](https://github.com/ClickHouse/ClickHouse/pull/5102) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Resolve some of PVS-Studio warnings. - [\#5082](https://github.com/ClickHouse/ClickHouse/pull/5082) - ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Update LZ4 [\#5040](https://github.com/ClickHouse/ClickHouse/pull/5040) ([Danila - Kutenin](https://github.com/danlark1)) -- Add gperf to build requirements for upcoming pull request \#5030. - [\#5110](https://github.com/ClickHouse/ClickHouse/pull/5110) - ([proller](https://github.com/proller)) - -## ClickHouse release 19.6 {#clickhouse-release-19-6} - -### ClickHouse release 19.6.3.18, 2019-06-13 {#clickhouse-release-19-6-3-18-2019-06-13} - -#### Bug Fixes {#bug-fixes-3} - -- Fixed IN condition pushdown for queries from table functions `mysql` and `odbc` and corresponding table engines. This fixes \#3540 and \#2384. [\#5313](https://github.com/ClickHouse/ClickHouse/pull/5313) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix deadlock in Zookeeper. [\#5297](https://github.com/ClickHouse/ClickHouse/pull/5297) ([github1youlc](https://github.com/github1youlc)) -- Allow quoted decimals in CSV. [\#5284](https://github.com/ClickHouse/ClickHouse/pull/5284) ([Artem Zuikov](https://github.com/4ertus2) -- Disallow conversion from float Inf/NaN into Decimals (throw exception). [\#5282](https://github.com/ClickHouse/ClickHouse/pull/5282) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix data race in rename query. [\#5247](https://github.com/ClickHouse/ClickHouse/pull/5247) ([Winter Zhang](https://github.com/zhang2014)) -- Temporarily disable LFAlloc. Usage of LFAlloc might lead to a lot of MAP\_FAILED in allocating UncompressedCache and in a result to crashes of queries at high loaded servers. [cfdba93](https://github.com/ClickHouse/ClickHouse/commit/cfdba938ce22f16efeec504f7f90206a515b1280)([Danila Kutenin](https://github.com/danlark1)) - -### ClickHouse release 19.6.2.11, 2019-05-13 {#clickhouse-release-19-6-2-11-2019-05-13} - -#### New Features {#new-features-3} - -- TTL expressions for columns and tables. [\#4212](https://github.com/ClickHouse/ClickHouse/pull/4212) ([Anton Popov](https://github.com/CurtizJ)) -- Added support for `brotli` compression for HTTP responses (Accept-Encoding: br) [\#4388](https://github.com/ClickHouse/ClickHouse/pull/4388) ([Mikhail](https://github.com/fandyushin)) -- Added new function `isValidUTF8` for checking whether a set of bytes is correctly utf-8 encoded. [\#4934](https://github.com/ClickHouse/ClickHouse/pull/4934) ([Danila Kutenin](https://github.com/danlark1)) -- Add new load balancing policy `first_or_random` which sends queries to the first specified host and if it’s inaccessible send queries to random hosts of shard. Useful for cross-replication topology setups. [\#5012](https://github.com/ClickHouse/ClickHouse/pull/5012) ([nvartolomei](https://github.com/nvartolomei)) - -#### Experimental Features {#experimental-features-1} - -- Add setting `index_granularity_bytes` (adaptive index granularity) for MergeTree\* tables family. [\#4826](https://github.com/ClickHouse/ClickHouse/pull/4826) ([alesapin](https://github.com/alesapin)) - -#### Improvements {#improvements-1} - -- Added support for non-constant and negative size and length arguments for function `substringUTF8`. [\#4989](https://github.com/ClickHouse/ClickHouse/pull/4989) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Disable push-down to right table in left join, left table in right join, and both tables in full join. This fixes wrong JOIN results in some cases. [\#4846](https://github.com/ClickHouse/ClickHouse/pull/4846) ([Ivan](https://github.com/abyss7)) -- `clickhouse-copier`: auto upload task configuration from `--task-file` option [\#4876](https://github.com/ClickHouse/ClickHouse/pull/4876) ([proller](https://github.com/proller)) -- Added typos handler for storage factory and table functions factory. [\#4891](https://github.com/ClickHouse/ClickHouse/pull/4891) ([Danila Kutenin](https://github.com/danlark1)) -- Support asterisks and qualified asterisks for multiple joins without subqueries [\#4898](https://github.com/ClickHouse/ClickHouse/pull/4898) ([Artem Zuikov](https://github.com/4ertus2)) -- Make missing column error message more user friendly. [\#4915](https://github.com/ClickHouse/ClickHouse/pull/4915) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Performance Improvements {#performance-improvements-2} - -- Significant speedup of ASOF JOIN [\#4924](https://github.com/ClickHouse/ClickHouse/pull/4924) ([Martijn Bakker](https://github.com/Gladdy)) - -#### Backward Incompatible Changes {#backward-incompatible-changes} - -- HTTP header `Query-Id` was renamed to `X-ClickHouse-Query-Id` for consistency. [\#4972](https://github.com/ClickHouse/ClickHouse/pull/4972) ([Mikhail](https://github.com/fandyushin)) - -#### Bug Fixes {#bug-fixes-4} - -- Fixed potential null pointer dereference in `clickhouse-copier`. [\#4900](https://github.com/ClickHouse/ClickHouse/pull/4900) ([proller](https://github.com/proller)) -- Fixed error on query with JOIN + ARRAY JOIN [\#4938](https://github.com/ClickHouse/ClickHouse/pull/4938) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed hanging on start of the server when a dictionary depends on another dictionary via a database with engine=Dictionary. [\#4962](https://github.com/ClickHouse/ClickHouse/pull/4962) ([Vitaly Baranov](https://github.com/vitlibar)) -- Partially fix distributed\_product\_mode = local. It’s possible to allow columns of local tables in where/having/order by/… via table aliases. Throw exception if table does not have alias. There’s not possible to access to the columns without table aliases yet. [\#4986](https://github.com/ClickHouse/ClickHouse/pull/4986) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix potentially wrong result for `SELECT DISTINCT` with `JOIN` [\#5001](https://github.com/ClickHouse/ClickHouse/pull/5001) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts\_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-2} - -- Fixed test failures when running clickhouse-server on different host [\#4713](https://github.com/ClickHouse/ClickHouse/pull/4713) ([Vasily Nemkov](https://github.com/Enmk)) -- clickhouse-test: Disable color control sequences in non tty environment. [\#4937](https://github.com/ClickHouse/ClickHouse/pull/4937) ([alesapin](https://github.com/alesapin)) -- clickhouse-test: Allow use any test database (remove `test.` qualification where it possible) [\#5008](https://github.com/ClickHouse/ClickHouse/pull/5008) ([proller](https://github.com/proller)) -- Fix ubsan errors [\#5037](https://github.com/ClickHouse/ClickHouse/pull/5037) ([Vitaly Baranov](https://github.com/vitlibar)) -- Yandex LFAlloc was added to ClickHouse to allocate MarkCache and UncompressedCache data in different ways to catch segfaults more reliable [\#4995](https://github.com/ClickHouse/ClickHouse/pull/4995) ([Danila Kutenin](https://github.com/danlark1)) -- Python util to help with backports and changelogs. [\#4949](https://github.com/ClickHouse/ClickHouse/pull/4949) ([Ivan](https://github.com/abyss7)) - -## ClickHouse release 19.5 {#clickhouse-release-19-5} - -### ClickHouse release 19.5.4.22, 2019-05-13 {#clickhouse-release-19-5-4-22-2019-05-13} - -#### Bug fixes {#bug-fixes-5} - -- Fixed possible crash in bitmap\* functions [\#5220](https://github.com/ClickHouse/ClickHouse/pull/5220) [\#5228](https://github.com/ClickHouse/ClickHouse/pull/5228) ([Andy Yang](https://github.com/andyyzh)) -- Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts\_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error `Set for IN is not created yet in case of using single LowCardinality column in the left part of IN`. This error happened if LowCardinality column was the part of primary key. \#5031 [\#5154](https://github.com/ClickHouse/ClickHouse/pull/5154) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Modification of retention function: If a row satisfies both the first and NTH condition, only the first satisfied condition is added to the data state. Now all conditions that satisfy in a row of data are added to the data state. [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) - -### ClickHouse release 19.5.3.8, 2019-04-18 {#clickhouse-release-19-5-3-8-2019-04-18} - -#### Bug fixes {#bug-fixes-6} - -- Fixed type of setting `max_partitions_per_insert_block` from boolean to UInt64. [\#5028](https://github.com/ClickHouse/ClickHouse/pull/5028) ([Mohammad Hossein Sekhavat](https://github.com/mhsekhavat)) - -### ClickHouse release 19.5.2.6, 2019-04-15 {#clickhouse-release-19-5-2-6-2019-04-15} - -#### New Features {#new-features-4} - -- [Hyperscan](https://github.com/intel/hyperscan) multiple regular expression matching was added (functions `multiMatchAny`, `multiMatchAnyIndex`, `multiFuzzyMatchAny`, `multiFuzzyMatchAnyIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780), [\#4841](https://github.com/ClickHouse/ClickHouse/pull/4841) ([Danila Kutenin](https://github.com/danlark1)) -- `multiSearchFirstPosition` function was added. [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) -- Implement the predefined expression filter per row for tables. [\#4792](https://github.com/ClickHouse/ClickHouse/pull/4792) ([Ivan](https://github.com/abyss7)) -- A new type of data skipping indices based on bloom filters (can be used for `equal`, `in` and `like` functions). [\#4499](https://github.com/ClickHouse/ClickHouse/pull/4499) ([Nikita Vasilev](https://github.com/nikvas0)) -- Added `ASOF JOIN` which allows to run queries that join to the most recent value known. [\#4774](https://github.com/ClickHouse/ClickHouse/pull/4774) [\#4867](https://github.com/ClickHouse/ClickHouse/pull/4867) [\#4863](https://github.com/ClickHouse/ClickHouse/pull/4863) [\#4875](https://github.com/ClickHouse/ClickHouse/pull/4875) ([Martijn Bakker](https://github.com/Gladdy), [Artem Zuikov](https://github.com/4ertus2)) -- Rewrite multiple `COMMA JOIN` to `CROSS JOIN`. Then rewrite them to `INNER JOIN` if possible. [\#4661](https://github.com/ClickHouse/ClickHouse/pull/4661) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Improvement {#improvement-9} - -- `topK` and `topKWeighted` now supports custom `loadFactor` (fixes issue [\#4252](https://github.com/ClickHouse/ClickHouse/issues/4252)). [\#4634](https://github.com/ClickHouse/ClickHouse/pull/4634) ([Kirill Danshin](https://github.com/kirillDanshin)) -- Allow to use `parallel_replicas_count > 1` even for tables without sampling (the setting is simply ignored for them). In previous versions it was lead to exception. [\#4637](https://github.com/ClickHouse/ClickHouse/pull/4637) ([Alexey Elymanov](https://github.com/digitalist)) -- Support for `CREATE OR REPLACE VIEW`. Allow to create a view or set a new definition in a single statement. [\#4654](https://github.com/ClickHouse/ClickHouse/pull/4654) ([Boris Granveaud](https://github.com/bgranvea)) -- `Buffer` table engine now supports `PREWHERE`. [\#4671](https://github.com/ClickHouse/ClickHouse/pull/4671) ([Yangkuan Liu](https://github.com/LiuYangkuan)) -- Add ability to start replicated table without metadata in zookeeper in `readonly` mode. [\#4691](https://github.com/ClickHouse/ClickHouse/pull/4691) ([alesapin](https://github.com/alesapin)) -- Fixed flicker of progress bar in clickhouse-client. The issue was most noticeable when using `FORMAT Null` with streaming queries. [\#4811](https://github.com/ClickHouse/ClickHouse/pull/4811) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow to disable functions with `hyperscan` library on per user basis to limit potentially excessive and uncontrolled resource usage. [\#4816](https://github.com/ClickHouse/ClickHouse/pull/4816) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add version number logging in all errors. [\#4824](https://github.com/ClickHouse/ClickHouse/pull/4824) ([proller](https://github.com/proller)) -- Added restriction to the `multiMatch` functions which requires string size to fit into `unsigned int`. Also added the number of arguments limit to the `multiSearch` functions. [\#4834](https://github.com/ClickHouse/ClickHouse/pull/4834) ([Danila Kutenin](https://github.com/danlark1)) -- Improved usage of scratch space and error handling in Hyperscan. [\#4866](https://github.com/ClickHouse/ClickHouse/pull/4866) ([Danila Kutenin](https://github.com/danlark1)) -- Fill `system.graphite_detentions` from a table config of `*GraphiteMergeTree` engine tables. [\#4584](https://github.com/ClickHouse/ClickHouse/pull/4584) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -- Rename `trigramDistance` function to `ngramDistance` and add more functions with `CaseInsensitive` and `UTF`. [\#4602](https://github.com/ClickHouse/ClickHouse/pull/4602) ([Danila Kutenin](https://github.com/danlark1)) -- Improved data skipping indices calculation. [\#4640](https://github.com/ClickHouse/ClickHouse/pull/4640) ([Nikita Vasilev](https://github.com/nikvas0)) -- Keep ordinary, `DEFAULT`, `MATERIALIZED` and `ALIAS` columns in a single list (fixes issue [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) - -#### Bug Fix {#bug-fix-26} - -- Avoid `std::terminate` in case of memory allocation failure. Now `std::bad_alloc` exception is thrown as expected. [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixes capnproto reading from buffer. Sometimes files wasn’t loaded successfully by HTTP. [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Vladislav](https://github.com/smirnov-vs)) -- Fix error `Unknown log entry type: 0` after `OPTIMIZE TABLE FINAL` query. [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Amos Bird](https://github.com/amosbird)) -- Wrong arguments to `hasAny` or `hasAll` functions may lead to segfault. [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Deadlock may happen while executing `DROP DATABASE dictionary` query. [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix undefined behavior in `median` and `quantile` functions. [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) -- Fix compression level detection when `network_compression_method` in lowercase. Broken in v19.1. [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) -- Fixed ignorance of `UTC` setting (fixes issue [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) -- Fix `histogram` function behaviour with `Distributed` tables. [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) -- Fixed tsan report `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed TSan report on shutdown due to race condition in system logs usage. Fixed potential use-after-free on shutdown when part\_log is enabled. [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix recheck parts in `ReplicatedMergeTreeAlterThread` in case of error. [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Arithmetic operations on intermediate aggregate function states were not working for constant arguments (such as subquery results). [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Always backquote column names in metadata. Otherwise it’s impossible to create a table with column named `index` (server won’t restart due to malformed `ATTACH` query in metadata). [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix crash in `ALTER ... MODIFY ORDER BY` on `Distributed` table. [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) -- Fix segfault in `JOIN ON` with enabled `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Winter Zhang](https://github.com/zhang2014)) -- Fix bug with adding an extraneous row after consuming a protobuf message from Kafka. [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix crash of `JOIN` on not-nullable vs nullable column. Fix `NULLs` in right keys in `ANY JOIN` + `join_use_nulls`. [\#4815](https://github.com/ClickHouse/ClickHouse/pull/4815) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix segmentation fault in `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -- Fixed race condition in `SELECT` from `system.tables` if the table is renamed or altered concurrently. [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed data race when fetching data part that is already obsolete. [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed rare data race that can happen during `RENAME` table of MergeTree family. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed segmentation fault in function `arrayIntersect`. Segmentation fault could happen if function was called with mixed constant and ordinary arguments. [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Lixiang Qian](https://github.com/fancyqlx)) -- Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix `No message received` exception while fetching parts between replicas. [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([alesapin](https://github.com/alesapin)) -- Fixed `arrayIntersect` function wrong result in case of several repeated values in single array. [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix a race condition during concurrent `ALTER COLUMN` queries that could lead to a server crash (fixes issue [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix incorrect result in `FULL/RIGHT JOIN` with const column. [\#4723](https://github.com/ClickHouse/ClickHouse/pull/4723) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix duplicates in `GLOBAL JOIN` with asterisk. [\#4705](https://github.com/ClickHouse/ClickHouse/pull/4705) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix parameter deduction in `ALTER MODIFY` of column `CODEC` when column type is not specified. [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([alesapin](https://github.com/alesapin)) -- Functions `cutQueryStringAndFragment()` and `queryStringAndFragment()` now works correctly when `URL` contains a fragment and no query. [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix rare bug when setting `min_bytes_to_use_direct_io` is greater than zero, which occures when thread have to seek backward in column file. [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([alesapin](https://github.com/alesapin)) -- Fix wrong argument types for aggregate functions with `LowCardinality` arguments (fixes issue [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix wrong name qualification in `GLOBAL JOIN`. [\#4969](https://github.com/ClickHouse/ClickHouse/pull/4969) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix function `toISOWeek` result for year 1970. [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix `DROP`, `TRUNCATE` and `OPTIMIZE` queries duplication, when executed on `ON CLUSTER` for `ReplicatedMergeTree*` tables family. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([alesapin](https://github.com/alesapin)) - -#### Backward Incompatible Change {#backward-incompatible-change-8} - -- Rename setting `insert_sample_with_metadata` to setting `input_format_defaults_for_omitted_fields`. [\#4771](https://github.com/ClickHouse/ClickHouse/pull/4771) ([Artem Zuikov](https://github.com/4ertus2)) -- Added setting `max_partitions_per_insert_block` (with value 100 by default). If inserted block contains larger number of partitions, an exception is thrown. Set it to 0 if you want to remove the limit (not recommended). [\#4845](https://github.com/ClickHouse/ClickHouse/pull/4845) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Multi-search functions were renamed (`multiPosition` to `multiSearchAllPositions`, `multiSearch` to `multiSearchAny`, `firstMatch` to `multiSearchFirstIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) - -#### Performance Improvement {#performance-improvement-6} - -- Optimize Volnitsky searcher by inlining, giving about 5-10% search improvement for queries with many needles or many similar bigrams. [\#4862](https://github.com/ClickHouse/ClickHouse/pull/4862) ([Danila Kutenin](https://github.com/danlark1)) -- Fix performance issue when setting `use_uncompressed_cache` is greater than zero, which appeared when all read data contained in cache. [\#4913](https://github.com/ClickHouse/ClickHouse/pull/4913) ([alesapin](https://github.com/alesapin)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-10} - -- Hardening debug build: more granular memory mappings and ASLR; add memory protection for mark cache and index. This allows to find more memory stomping bugs in case when ASan and MSan cannot do it. [\#4632](https://github.com/ClickHouse/ClickHouse/pull/4632) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add support for cmake variables `ENABLE_PROTOBUF`, `ENABLE_PARQUET` and `ENABLE_BROTLI` which allows to enable/disable the above features (same as we can do for librdkafka, mysql, etc). [\#4669](https://github.com/ClickHouse/ClickHouse/pull/4669) ([Silviu Caragea](https://github.com/silviucpp)) -- Add ability to print process list and stacktraces of all threads if some queries are hung after test run. [\#4675](https://github.com/ClickHouse/ClickHouse/pull/4675) ([alesapin](https://github.com/alesapin)) -- Add retries on `Connection loss` error in `clickhouse-test`. [\#4682](https://github.com/ClickHouse/ClickHouse/pull/4682) ([alesapin](https://github.com/alesapin)) -- Add freebsd build with vagrant and build with thread sanitizer to packager script. [\#4712](https://github.com/ClickHouse/ClickHouse/pull/4712) [\#4748](https://github.com/ClickHouse/ClickHouse/pull/4748) ([alesapin](https://github.com/alesapin)) -- Now user asked for password for user `'default'` during installation. [\#4725](https://github.com/ClickHouse/ClickHouse/pull/4725) ([proller](https://github.com/proller)) -- Suppress warning in `rdkafka` library. [\#4740](https://github.com/ClickHouse/ClickHouse/pull/4740) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow ability to build without ssl. [\#4750](https://github.com/ClickHouse/ClickHouse/pull/4750) ([proller](https://github.com/proller)) -- Add a way to launch clickhouse-server image from a custom user. [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -- Upgrade contrib boost to 1.69. [\#4793](https://github.com/ClickHouse/ClickHouse/pull/4793) ([proller](https://github.com/proller)) -- Disable usage of `mremap` when compiled with Thread Sanitizer. Surprisingly enough, TSan does not intercept `mremap` (though it does intercept `mmap`, `munmap`) that leads to false positives. Fixed TSan report in stateful tests. [\#4859](https://github.com/ClickHouse/ClickHouse/pull/4859) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add test checking using format schema via HTTP interface. [\#4864](https://github.com/ClickHouse/ClickHouse/pull/4864) ([Vitaly Baranov](https://github.com/vitlibar)) - -## ClickHouse release 19.4 {#clickhouse-release-19-4} - -### ClickHouse release 19.4.4.33, 2019-04-17 {#clickhouse-release-19-4-4-33-2019-04-17} - -#### Bug Fixes {#bug-fixes-7} - -- Avoid `std::terminate` in case of memory allocation failure. Now `std::bad_alloc` exception is thrown as expected. [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixes capnproto reading from buffer. Sometimes files wasn’t loaded successfully by HTTP. [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([Vladislav](https://github.com/smirnov-vs)) -- Fix error `Unknown log entry type: 0` after `OPTIMIZE TABLE FINAL` query. [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([Amos Bird](https://github.com/amosbird)) -- Wrong arguments to `hasAny` or `hasAll` functions may lead to segfault. [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Deadlock may happen while executing `DROP DATABASE dictionary` query. [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix undefined behavior in `median` and `quantile` functions. [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) -- Fix compression level detection when `network_compression_method` in lowercase. Broken in v19.1. [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) -- Fixed ignorance of `UTC` setting (fixes issue [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) -- Fix `histogram` function behaviour with `Distributed` tables. [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) -- Fixed tsan report `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed TSan report on shutdown due to race condition in system logs usage. Fixed potential use-after-free on shutdown when part\_log is enabled. [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix recheck parts in `ReplicatedMergeTreeAlterThread` in case of error. [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Arithmetic operations on intermediate aggregate function states were not working for constant arguments (such as subquery results). [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Always backquote column names in metadata. Otherwise it’s impossible to create a table with column named `index` (server won’t restart due to malformed `ATTACH` query in metadata). [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix crash in `ALTER ... MODIFY ORDER BY` on `Distributed` table. [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) -- Fix segfault in `JOIN ON` with enabled `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([Winter Zhang](https://github.com/zhang2014)) -- Fix bug with adding an extraneous row after consuming a protobuf message from Kafka. [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix segmentation fault in `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -- Fixed race condition in `SELECT` from `system.tables` if the table is renamed or altered concurrently. [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed data race when fetching data part that is already obsolete. [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed rare data race that can happen during `RENAME` table of MergeTree family. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed segmentation fault in function `arrayIntersect`. Segmentation fault could happen if function was called with mixed constant and ordinary arguments. [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([Lixiang Qian](https://github.com/fancyqlx)) -- Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix `No message received` exception while fetching parts between replicas. [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([alesapin](https://github.com/alesapin)) -- Fixed `arrayIntersect` function wrong result in case of several repeated values in single array. [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix a race condition during concurrent `ALTER COLUMN` queries that could lead to a server crash (fixes issue [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix parameter deduction in `ALTER MODIFY` of column `CODEC` when column type is not specified. [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([alesapin](https://github.com/alesapin)) -- Functions `cutQueryStringAndFragment()` and `queryStringAndFragment()` now works correctly when `URL` contains a fragment and no query. [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([Vitaly Baranov](https://github.com/vitlibar)) -- Fix rare bug when setting `min_bytes_to_use_direct_io` is greater than zero, which occures when thread have to seek backward in column file. [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([alesapin](https://github.com/alesapin)) -- Fix wrong argument types for aggregate functions with `LowCardinality` arguments (fixes issue [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fix function `toISOWeek` result for year 1970. [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix `DROP`, `TRUNCATE` and `OPTIMIZE` queries duplication, when executed on `ON CLUSTER` for `ReplicatedMergeTree*` tables family. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([alesapin](https://github.com/alesapin)) - -#### Improvements {#improvements-2} - -- Keep ordinary, `DEFAULT`, `MATERIALIZED` and `ALIAS` columns in a single list (fixes issue [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) - -### ClickHouse release 19.4.3.11, 2019-04-02 {#clickhouse-release-19-4-3-11-2019-04-02} - -#### Bug Fixes {#bug-fixes-8} - -- Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix segmentation fault in `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-11} - -- Add a way to launch clickhouse-server image from a custom user. [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) - -### ClickHouse release 19.4.2.7, 2019-03-30 {#clickhouse-release-19-4-2-7-2019-03-30} - -#### Bug Fixes {#bug-fixes-9} - -- Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -### ClickHouse release 19.4.1.3, 2019-03-19 {#clickhouse-release-19-4-1-3-2019-03-19} - -#### Bug Fixes {#bug-fixes-10} - -- Fixed remote queries which contain both `LIMIT BY` and `LIMIT`. Previously, if `LIMIT BY` and `LIMIT` were used for remote query, `LIMIT` could happen before `LIMIT BY`, which led to too filtered result. [\#4708](https://github.com/ClickHouse/ClickHouse/pull/4708) ([Constantin S. Pan](https://github.com/kvap)) - -### ClickHouse release 19.4.0.49, 2019-03-09 {#clickhouse-release-19-4-0-49-2019-03-09} - -#### New Features {#new-features-5} - -- Added full support for `Protobuf` format (input and output, nested data structures). [\#4174](https://github.com/ClickHouse/ClickHouse/pull/4174) [\#4493](https://github.com/ClickHouse/ClickHouse/pull/4493) ([Vitaly Baranov](https://github.com/vitlibar)) -- Added bitmap functions with Roaring Bitmaps. [\#4207](https://github.com/ClickHouse/ClickHouse/pull/4207) ([Andy Yang](https://github.com/andyyzh)) [\#4568](https://github.com/ClickHouse/ClickHouse/pull/4568) ([Vitaly Baranov](https://github.com/vitlibar)) -- Parquet format support. [\#4448](https://github.com/ClickHouse/ClickHouse/pull/4448) ([proller](https://github.com/proller)) -- N-gram distance was added for fuzzy string comparison. It is similar to q-gram metrics in R language. [\#4466](https://github.com/ClickHouse/ClickHouse/pull/4466) ([Danila Kutenin](https://github.com/danlark1)) -- Combine rules for graphite rollup from dedicated aggregation and retention patterns. [\#4426](https://github.com/ClickHouse/ClickHouse/pull/4426) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) -- Added `max_execution_speed` and `max_execution_speed_bytes` to limit resource usage. Added `min_execution_speed_bytes` setting to complement the `min_execution_speed`. [\#4430](https://github.com/ClickHouse/ClickHouse/pull/4430) ([Winter Zhang](https://github.com/zhang2014)) -- Implemented function `flatten`. [\#4555](https://github.com/ClickHouse/ClickHouse/pull/4555) [\#4409](https://github.com/ClickHouse/ClickHouse/pull/4409) ([alexey-milovidov](https://github.com/alexey-milovidov), [kzon](https://github.com/kzon)) -- Added functions `arrayEnumerateDenseRanked` and `arrayEnumerateUniqRanked` (it’s like `arrayEnumerateUniq` but allows to fine tune array depth to look inside multidimensional arrays). [\#4475](https://github.com/ClickHouse/ClickHouse/pull/4475) ([proller](https://github.com/proller)) [\#4601](https://github.com/ClickHouse/ClickHouse/pull/4601) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Multiple JOINS with some restrictions: no asterisks, no complex aliases in ON/WHERE/GROUP BY/… [\#4462](https://github.com/ClickHouse/ClickHouse/pull/4462) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Bug Fixes {#bug-fixes-11} - -- This release also contains all bug fixes from 19.3 and 19.1. -- Fixed bug in data skipping indices: order of granules after INSERT was incorrect. [\#4407](https://github.com/ClickHouse/ClickHouse/pull/4407) ([Nikita Vasilev](https://github.com/nikvas0)) -- Fixed `set` index for `Nullable` and `LowCardinality` columns. Before it, `set` index with `Nullable` or `LowCardinality` column led to error `Data type must be deserialized with multiple streams` while selecting. [\#4594](https://github.com/ClickHouse/ClickHouse/pull/4594) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Correctly set update\_time on full `executable` dictionary update. [\#4551](https://github.com/ClickHouse/ClickHouse/pull/4551) ([Tema Novikov](https://github.com/temoon)) -- Fix broken progress bar in 19.3. [\#4627](https://github.com/ClickHouse/ClickHouse/pull/4627) ([filimonov](https://github.com/filimonov)) -- Fixed inconsistent values of MemoryTracker when memory region was shrinked, in certain cases. [\#4619](https://github.com/ClickHouse/ClickHouse/pull/4619) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed undefined behaviour in ThreadPool. [\#4612](https://github.com/ClickHouse/ClickHouse/pull/4612) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a very rare crash with the message `mutex lock failed: Invalid argument` that could happen when a MergeTree table was dropped concurrently with a SELECT. [\#4608](https://github.com/ClickHouse/ClickHouse/pull/4608) ([Alex Zatelepin](https://github.com/ztlpn)) -- ODBC driver compatibility with `LowCardinality` data type. [\#4381](https://github.com/ClickHouse/ClickHouse/pull/4381) ([proller](https://github.com/proller)) -- FreeBSD: Fixup for `AIOcontextPool: Found io_event with unknown id 0` error. [\#4438](https://github.com/ClickHouse/ClickHouse/pull/4438) ([urgordeadbeef](https://github.com/urgordeadbeef)) -- `system.part_log` table was created regardless to configuration. [\#4483](https://github.com/ClickHouse/ClickHouse/pull/4483) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix undefined behaviour in `dictIsIn` function for cache dictionaries. [\#4515](https://github.com/ClickHouse/ClickHouse/pull/4515) ([alesapin](https://github.com/alesapin)) -- Fixed a deadlock when a SELECT query locks the same table multiple times (e.g. from different threads or when executing multiple subqueries) and there is a concurrent DDL query. [\#4535](https://github.com/ClickHouse/ClickHouse/pull/4535) ([Alex Zatelepin](https://github.com/ztlpn)) -- Disable compile\_expressions by default until we get own `llvm` contrib and can test it with `clang` and `asan`. [\#4579](https://github.com/ClickHouse/ClickHouse/pull/4579) ([alesapin](https://github.com/alesapin)) -- Prevent `std::terminate` when `invalidate_query` for `clickhouse` external dictionary source has returned wrong resultset (empty or more than one row or more than one column). Fixed issue when the `invalidate_query` was performed every five seconds regardless to the `lifetime`. [\#4583](https://github.com/ClickHouse/ClickHouse/pull/4583) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Avoid deadlock when the `invalidate_query` for a dictionary with `clickhouse` source was involving `system.dictionaries` table or `Dictionaries` database (rare case). [\#4599](https://github.com/ClickHouse/ClickHouse/pull/4599) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixes for CROSS JOIN with empty WHERE. [\#4598](https://github.com/ClickHouse/ClickHouse/pull/4598) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed segfault in function “replicate” when constant argument is passed. [\#4603](https://github.com/ClickHouse/ClickHouse/pull/4603) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix lambda function with predicate optimizer. [\#4408](https://github.com/ClickHouse/ClickHouse/pull/4408) ([Winter Zhang](https://github.com/zhang2014)) -- Multiple JOINs multiple fixes. [\#4595](https://github.com/ClickHouse/ClickHouse/pull/4595) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Improvements {#improvements-3} - -- Support aliases in JOIN ON section for right table columns. [\#4412](https://github.com/ClickHouse/ClickHouse/pull/4412) ([Artem Zuikov](https://github.com/4ertus2)) -- Result of multiple JOINs need correct result names to be used in subselects. Replace flat aliases with source names in result. [\#4474](https://github.com/ClickHouse/ClickHouse/pull/4474) ([Artem Zuikov](https://github.com/4ertus2)) -- Improve push-down logic for joined statements. [\#4387](https://github.com/ClickHouse/ClickHouse/pull/4387) ([Ivan](https://github.com/abyss7)) - -#### Performance Improvements {#performance-improvements-3} - -- Improved heuristics of “move to PREWHERE” optimization. [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Use proper lookup tables that uses HashTable’s API for 8-bit and 16-bit keys. [\#4536](https://github.com/ClickHouse/ClickHouse/pull/4536) ([Amos Bird](https://github.com/amosbird)) -- Improved performance of string comparison. [\#4564](https://github.com/ClickHouse/ClickHouse/pull/4564) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Cleanup distributed DDL queue in a separate thread so that it doesn’t slow down the main loop that processes distributed DDL tasks. [\#4502](https://github.com/ClickHouse/ClickHouse/pull/4502) ([Alex Zatelepin](https://github.com/ztlpn)) -- When `min_bytes_to_use_direct_io` is set to 1, not every file was opened with O\_DIRECT mode because the data size to read was sometimes underestimated by the size of one compressed block. [\#4526](https://github.com/ClickHouse/ClickHouse/pull/4526) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-12} - -- Added support for clang-9 [\#4604](https://github.com/ClickHouse/ClickHouse/pull/4604) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix wrong `__asm__` instructions (again) [\#4621](https://github.com/ClickHouse/ClickHouse/pull/4621) ([Konstantin Podshumok](https://github.com/podshumok)) -- Add ability to specify settings for `clickhouse-performance-test` from command line. [\#4437](https://github.com/ClickHouse/ClickHouse/pull/4437) ([alesapin](https://github.com/alesapin)) -- Add dictionaries tests to integration tests. [\#4477](https://github.com/ClickHouse/ClickHouse/pull/4477) ([alesapin](https://github.com/alesapin)) -- Added queries from the benchmark on the website to automated performance tests. [\#4496](https://github.com/ClickHouse/ClickHouse/pull/4496) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `xxhash.h` does not exist in external lz4 because it is an implementation detail and its symbols are namespaced with `XXH_NAMESPACE` macro. When lz4 is external, xxHash has to be external too, and the dependents have to link to it. [\#4495](https://github.com/ClickHouse/ClickHouse/pull/4495) ([Orivej Desh](https://github.com/orivej)) -- Fixed a case when `quantileTiming` aggregate function can be called with negative or floating point argument (this fixes fuzz test with undefined behaviour sanitizer). [\#4506](https://github.com/ClickHouse/ClickHouse/pull/4506) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Spelling error correction. [\#4531](https://github.com/ClickHouse/ClickHouse/pull/4531) ([sdk2](https://github.com/sdk2)) -- Fix compilation on Mac. [\#4371](https://github.com/ClickHouse/ClickHouse/pull/4371) ([Vitaly Baranov](https://github.com/vitlibar)) -- Build fixes for FreeBSD and various unusual build configurations. [\#4444](https://github.com/ClickHouse/ClickHouse/pull/4444) ([proller](https://github.com/proller)) - -## ClickHouse release 19.3 {#clickhouse-release-19-3} - -### ClickHouse release 19.3.9.1, 2019-04-02 {#clickhouse-release-19-3-9-1-2019-04-02} - -#### Bug Fixes {#bug-fixes-12} - -- Fix crash in `FULL/RIGHT JOIN` when we joining on nullable vs not nullable. [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) -- Fix segmentation fault in `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) -- Fixed reading from `Array(LowCardinality)` column in rare case when column contained a long sequence of empty arrays. [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) - -#### Build/Testing/Packaging Improvement {#buildtestingpackaging-improvement-13} - -- Add a way to launch clickhouse-server image from a custom user [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) - -### ClickHouse release 19.3.7, 2019-03-12 {#clickhouse-release-19-3-7-2019-03-12} - -#### Bug fixes {#bug-fixes-13} - -- Fixed error in \#3920. This error manifests itself as random cache corruption (messages `Unknown codec family code`, `Cannot seek through file`) and segfaults. This bug first appeared in version 19.1 and is present in versions up to 19.1.10 and 19.3.6. [\#4623](https://github.com/ClickHouse/ClickHouse/pull/4623) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.3.6, 2019-03-02 {#clickhouse-release-19-3-6-2019-03-02} - -#### Bug fixes {#bug-fixes-14} - -- When there are more than 1000 threads in a thread pool, `std::terminate` may happen on thread exit. [Azat Khuzhin](https://github.com/azat) [\#4485](https://github.com/ClickHouse/ClickHouse/pull/4485) [\#4505](https://github.com/ClickHouse/ClickHouse/pull/4505) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Now it’s possible to create `ReplicatedMergeTree*` tables with comments on columns without defaults and tables with columns codecs without comments and defaults. Also fix comparison of codecs. [\#4523](https://github.com/ClickHouse/ClickHouse/pull/4523) ([alesapin](https://github.com/alesapin)) -- Fixed crash on JOIN with array or tuple. [\#4552](https://github.com/ClickHouse/ClickHouse/pull/4552) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed crash in clickhouse-copier with the message `ThreadStatus not created`. [\#4540](https://github.com/ClickHouse/ClickHouse/pull/4540) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed hangup on server shutdown if distributed DDLs were used. [\#4472](https://github.com/ClickHouse/ClickHouse/pull/4472) ([Alex Zatelepin](https://github.com/ztlpn)) -- Incorrect column numbers were printed in error message about text format parsing for columns with number greater than 10. [\#4484](https://github.com/ClickHouse/ClickHouse/pull/4484) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-3} - -- Fixed build with AVX enabled. [\#4527](https://github.com/ClickHouse/ClickHouse/pull/4527) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Enable extended accounting and IO accounting based on good known version instead of kernel under which it is compiled. [\#4541](https://github.com/ClickHouse/ClickHouse/pull/4541) ([nvartolomei](https://github.com/nvartolomei)) -- Allow to skip setting of core\_dump.size\_limit, warning instead of throw if limit set fail. [\#4473](https://github.com/ClickHouse/ClickHouse/pull/4473) ([proller](https://github.com/proller)) -- Removed the `inline` tags of `void readBinary(...)` in `Field.cpp`. Also merged redundant `namespace DB` blocks. [\#4530](https://github.com/ClickHouse/ClickHouse/pull/4530) ([hcz](https://github.com/hczhcz)) - -### ClickHouse release 19.3.5, 2019-02-21 {#clickhouse-release-19-3-5-2019-02-21} - -#### Bug fixes {#bug-fixes-15} - -- Fixed bug with large http insert queries processing. [\#4454](https://github.com/ClickHouse/ClickHouse/pull/4454) ([alesapin](https://github.com/alesapin)) -- Fixed backward incompatibility with old versions due to wrong implementation of `send_logs_level` setting. [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed backward incompatibility of table function `remote` introduced with column comments. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.3.4, 2019-02-16 {#clickhouse-release-19-3-4-2019-02-16} - -#### Improvements {#improvements-4} - -- Table index size is not accounted for memory limits when doing `ATTACH TABLE` query. Avoided the possibility that a table cannot be attached after being detached. [\#4396](https://github.com/ClickHouse/ClickHouse/pull/4396) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Slightly raised up the limit on max string and array size received from ZooKeeper. It allows to continue to work with increased size of `CLIENT_JVMFLAGS=-Djute.maxbuffer=...` on ZooKeeper. [\#4398](https://github.com/ClickHouse/ClickHouse/pull/4398) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Allow to repair abandoned replica even if it already has huge number of nodes in its queue. [\#4399](https://github.com/ClickHouse/ClickHouse/pull/4399) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Add one required argument to `SET` index (max stored rows number). [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Nikita Vasilev](https://github.com/nikvas0)) - -#### Bug Fixes {#bug-fixes-16} - -- Fixed `WITH ROLLUP` result for group by single `LowCardinality` key. [\#4384](https://github.com/ClickHouse/ClickHouse/pull/4384) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Fixed bug in the set index (dropping a granule if it contains more than `max_rows` rows). [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([Nikita Vasilev](https://github.com/nikvas0)) -- A lot of FreeBSD build fixes. [\#4397](https://github.com/ClickHouse/ClickHouse/pull/4397) ([proller](https://github.com/proller)) -- Fixed aliases substitution in queries with subquery containing same alias (issue [\#4110](https://github.com/ClickHouse/ClickHouse/issues/4110)). [\#4351](https://github.com/ClickHouse/ClickHouse/pull/4351) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-4} - -- Add ability to run `clickhouse-server` for stateless tests in docker image. [\#4347](https://github.com/ClickHouse/ClickHouse/pull/4347) ([Vasily Nemkov](https://github.com/Enmk)) - -### ClickHouse release 19.3.3, 2019-02-13 {#clickhouse-release-19-3-3-2019-02-13} - -#### New Features {#new-features-6} - -- Added the `KILL MUTATION` statement that allows removing mutations that are for some reasons stuck. Added `latest_failed_part`, `latest_fail_time`, `latest_fail_reason` fields to the `system.mutations` table for easier troubleshooting. [\#4287](https://github.com/ClickHouse/ClickHouse/pull/4287) ([Alex Zatelepin](https://github.com/ztlpn)) -- Added aggregate function `entropy` which computes Shannon entropy. [\#4238](https://github.com/ClickHouse/ClickHouse/pull/4238) ([Quid37](https://github.com/Quid37)) -- Added ability to send queries `INSERT INTO tbl VALUES (....` to server without splitting on `query` and `data` parts. [\#4301](https://github.com/ClickHouse/ClickHouse/pull/4301) ([alesapin](https://github.com/alesapin)) -- Generic implementation of `arrayWithConstant` function was added. [\#4322](https://github.com/ClickHouse/ClickHouse/pull/4322) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Implemented `NOT BETWEEN` comparison operator. [\#4228](https://github.com/ClickHouse/ClickHouse/pull/4228) ([Dmitry Naumov](https://github.com/nezed)) -- Implement `sumMapFiltered` in order to be able to limit the number of keys for which values will be summed by `sumMap`. [\#4129](https://github.com/ClickHouse/ClickHouse/pull/4129) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -- Added support of `Nullable` types in `mysql` table function. [\#4198](https://github.com/ClickHouse/ClickHouse/pull/4198) ([Emmanuel Donin de Rosière](https://github.com/edonin)) -- Support for arbitrary constant expressions in `LIMIT` clause. [\#4246](https://github.com/ClickHouse/ClickHouse/pull/4246) ([k3box](https://github.com/k3box)) -- Added `topKWeighted` aggregate function that takes additional argument with (unsigned integer) weight. [\#4245](https://github.com/ClickHouse/ClickHouse/pull/4245) ([Andrew Golman](https://github.com/andrewgolman)) -- `StorageJoin` now supports `join_any_take_last_row` setting that allows overwriting existing values of the same key. [\#3973](https://github.com/ClickHouse/ClickHouse/pull/3973) ([Amos Bird](https://github.com/amosbird) -- Added function `toStartOfInterval`. [\#4304](https://github.com/ClickHouse/ClickHouse/pull/4304) ([Vitaly Baranov](https://github.com/vitlibar)) -- Added `RowBinaryWithNamesAndTypes` format. [\#4200](https://github.com/ClickHouse/ClickHouse/pull/4200) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)) -- Added `IPv4` and `IPv6` data types. More effective implementations of `IPv*` functions. [\#3669](https://github.com/ClickHouse/ClickHouse/pull/3669) ([Vasily Nemkov](https://github.com/Enmk)) -- Added function `toStartOfTenMinutes()`. [\#4298](https://github.com/ClickHouse/ClickHouse/pull/4298) ([Vitaly Baranov](https://github.com/vitlibar)) -- Added `Protobuf` output format. [\#4005](https://github.com/ClickHouse/ClickHouse/pull/4005) [\#4158](https://github.com/ClickHouse/ClickHouse/pull/4158) ([Vitaly Baranov](https://github.com/vitlibar)) -- Added brotli support for HTTP interface for data import (INSERTs). [\#4235](https://github.com/ClickHouse/ClickHouse/pull/4235) ([Mikhail](https://github.com/fandyushin)) -- Added hints while user make typo in function name or type in command line client. [\#4239](https://github.com/ClickHouse/ClickHouse/pull/4239) ([Danila Kutenin](https://github.com/danlark1)) -- Added `Query-Id` to Server’s HTTP Response header. [\#4231](https://github.com/ClickHouse/ClickHouse/pull/4231) ([Mikhail](https://github.com/fandyushin)) - -#### Experimental features {#experimental-features-2} - -- Added `minmax` and `set` data skipping indices for MergeTree table engines family. [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Nikita Vasilev](https://github.com/nikvas0)) -- Added conversion of `CROSS JOIN` to `INNER JOIN` if possible. [\#4221](https://github.com/ClickHouse/ClickHouse/pull/4221) [\#4266](https://github.com/ClickHouse/ClickHouse/pull/4266) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Bug Fixes {#bug-fixes-17} - -- Fixed `Not found column` for duplicate columns in `JOIN ON` section. [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) -- Make `START REPLICATED SENDS` command start replicated sends. [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) -- Fixed aggregate functions execution with `Array(LowCardinality)` arguments. [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fixed wrong behaviour when doing `INSERT ... SELECT ... FROM file(...)` query and file has `CSVWithNames` or `TSVWIthNames` format and the first data row is missing. [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed crash on dictionary reload if dictionary not available. This bug was appeared in 19.1.6. [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) -- Fixed `ALL JOIN` with duplicates in right table. [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed segmentation fault with `use_uncompressed_cache=1` and exception with wrong uncompressed size. This bug was appeared in 19.1.6. [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([alesapin](https://github.com/alesapin)) -- Fixed `compile_expressions` bug with comparison of big (more than int16) dates. [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([alesapin](https://github.com/alesapin)) -- Fixed infinite loop when selecting from table function `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Temporarily disable predicate optimization for `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed `Illegal instruction` error when using base64 functions on old CPUs. This error has been reproduced only when ClickHouse was compiled with gcc-8. [\#4275](https://github.com/ClickHouse/ClickHouse/pull/4275) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed `No message received` error when interacting with PostgreSQL ODBC Driver through TLS connection. Also fixes segfault when using MySQL ODBC Driver. [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed incorrect result when `Date` and `DateTime` arguments are used in branches of conditional operator (function `if`). Added generic case for function `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- ClickHouse dictionaries now load within `clickhouse` process. [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed deadlock when `SELECT` from a table with `File` engine was retried after `No such file or directory` error. [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed race condition when selecting from `system.tables` may give `table doesn't exist` error. [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `clickhouse-client` can segfault on exit while loading data for command line suggestions if it was run in interactive mode. [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a bug when the execution of mutations containing `IN` operators was producing incorrect results. [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fixed error: if there is a database with `Dictionary` engine, all dictionaries forced to load at server startup, and if there is a dictionary with ClickHouse source from localhost, the dictionary cannot load. [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error when system logs are tried to create again at server shutdown. [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Correctly return the right type and properly handle locks in `joinGet` function. [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Amos Bird](https://github.com/amosbird)) -- Added `sumMapWithOverflow` function. [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -- Fixed segfault with `allow_experimental_multiple_joins_emulation`. [52de2c](https://github.com/ClickHouse/ClickHouse/commit/52de2cd927f7b5257dd67e175f0a5560a48840d0) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed bug with incorrect `Date` and `DateTime` comparison. [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) -- Fixed fuzz test under undefined behavior sanitizer: added parameter type check for `quantile*Weighted` family of functions. [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed rare race condition when removing of old data parts can fail with `File not found` error. [\#4378](https://github.com/ClickHouse/ClickHouse/pull/4378) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix install package with missing /etc/clickhouse-server/config.xml. [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-5} - -- Debian package: correct /etc/clickhouse-server/preprocessed link according to config. [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) -- Various build fixes for FreeBSD. [\#4225](https://github.com/ClickHouse/ClickHouse/pull/4225) ([proller](https://github.com/proller)) -- Added ability to create, fill and drop tables in perftest. [\#4220](https://github.com/ClickHouse/ClickHouse/pull/4220) ([alesapin](https://github.com/alesapin)) -- Added a script to check for duplicate includes. [\#4326](https://github.com/ClickHouse/ClickHouse/pull/4326) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added ability to run queries by index in performance test. [\#4264](https://github.com/ClickHouse/ClickHouse/pull/4264) ([alesapin](https://github.com/alesapin)) -- Package with debug symbols is suggested to be installed. [\#4274](https://github.com/ClickHouse/ClickHouse/pull/4274) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Refactoring of performance-test. Better logging and signals handling. [\#4171](https://github.com/ClickHouse/ClickHouse/pull/4171) ([alesapin](https://github.com/alesapin)) -- Added docs to anonymized Yandex.Metrika datasets. [\#4164](https://github.com/ClickHouse/ClickHouse/pull/4164) ([alesapin](https://github.com/alesapin)) -- Аdded tool for converting an old month-partitioned part to the custom-partitioned format. [\#4195](https://github.com/ClickHouse/ClickHouse/pull/4195) ([Alex Zatelepin](https://github.com/ztlpn)) -- Added docs about two datasets in s3. [\#4144](https://github.com/ClickHouse/ClickHouse/pull/4144) ([alesapin](https://github.com/alesapin)) -- Added script which creates changelog from pull requests description. [\#4169](https://github.com/ClickHouse/ClickHouse/pull/4169) [\#4173](https://github.com/ClickHouse/ClickHouse/pull/4173) ([KochetovNicolai](https://github.com/KochetovNicolai)) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Added puppet module for Clickhouse. [\#4182](https://github.com/ClickHouse/ClickHouse/pull/4182) ([Maxim Fedotov](https://github.com/MaxFedotov)) -- Added docs for a group of undocumented functions. [\#4168](https://github.com/ClickHouse/ClickHouse/pull/4168) ([Winter Zhang](https://github.com/zhang2014)) -- ARM build fixes. [\#4210](https://github.com/ClickHouse/ClickHouse/pull/4210)[\#4306](https://github.com/ClickHouse/ClickHouse/pull/4306) [\#4291](https://github.com/ClickHouse/ClickHouse/pull/4291) ([proller](https://github.com/proller)) ([proller](https://github.com/proller)) -- Dictionary tests now able to run from `ctest`. [\#4189](https://github.com/ClickHouse/ClickHouse/pull/4189) ([proller](https://github.com/proller)) -- Now `/etc/ssl` is used as default directory with SSL certificates. [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added checking SSE and AVX instruction at start. [\#4234](https://github.com/ClickHouse/ClickHouse/pull/4234) ([Igr](https://github.com/igron99)) -- Init script will wait server until start. [\#4281](https://github.com/ClickHouse/ClickHouse/pull/4281) ([proller](https://github.com/proller)) - -#### Backward Incompatible Changes {#backward-incompatible-changes-1} - -- Removed `allow_experimental_low_cardinality_type` setting. `LowCardinality` data types are production ready. [\#4323](https://github.com/ClickHouse/ClickHouse/pull/4323) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Reduce mark cache size and uncompressed cache size accordingly to available memory amount. [\#4240](https://github.com/ClickHouse/ClickHouse/pull/4240) ([Lopatin Konstantin](https://github.com/k-lopatin) -- Added keyword `INDEX` in `CREATE TABLE` query. A column with name `index` must be quoted with backticks or double quotes: `` `index` ``. [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([Nikita Vasilev](https://github.com/nikvas0)) -- `sumMap` now promote result type instead of overflow. The old `sumMap` behavior can be obtained by using `sumMapWithOverflow` function. [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) - -#### Performance Improvements {#performance-improvements-4} - -- `std::sort` replaced by `pdqsort` for queries without `LIMIT`. [\#4236](https://github.com/ClickHouse/ClickHouse/pull/4236) ([Evgenii Pravda](https://github.com/kvinty)) -- Now server reuse threads from global thread pool. This affects performance in some corner cases. [\#4150](https://github.com/ClickHouse/ClickHouse/pull/4150) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvements {#improvements-5} - -- Implemented AIO support for FreeBSD. [\#4305](https://github.com/ClickHouse/ClickHouse/pull/4305) ([urgordeadbeef](https://github.com/urgordeadbeef)) -- `SELECT * FROM a JOIN b USING a, b` now return `a` and `b` columns only from the left table. [\#4141](https://github.com/ClickHouse/ClickHouse/pull/4141) ([Artem Zuikov](https://github.com/4ertus2)) -- Allow `-C` option of client to work as `-c` option. [\#4232](https://github.com/ClickHouse/ClickHouse/pull/4232) ([syominsergey](https://github.com/syominsergey)) -- Now option `--password` used without value requires password from stdin. [\#4230](https://github.com/ClickHouse/ClickHouse/pull/4230) ([BSD\_Conqueror](https://github.com/bsd-conqueror)) -- Added highlighting of unescaped metacharacters in string literals that contain `LIKE` expressions or regexps. [\#4327](https://github.com/ClickHouse/ClickHouse/pull/4327) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added cancelling of HTTP read only queries if client socket goes away. [\#4213](https://github.com/ClickHouse/ClickHouse/pull/4213) ([nvartolomei](https://github.com/nvartolomei)) -- Now server reports progress to keep client connections alive. [\#4215](https://github.com/ClickHouse/ClickHouse/pull/4215) ([Ivan](https://github.com/abyss7)) -- Slightly better message with reason for OPTIMIZE query with `optimize_throw_if_noop` setting enabled. [\#4294](https://github.com/ClickHouse/ClickHouse/pull/4294) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added support of `--version` option for clickhouse server. [\#4251](https://github.com/ClickHouse/ClickHouse/pull/4251) ([Lopatin Konstantin](https://github.com/k-lopatin)) -- Added `--help/-h` option to `clickhouse-server`. [\#4233](https://github.com/ClickHouse/ClickHouse/pull/4233) ([Yuriy Baranov](https://github.com/yurriy)) -- Added support for scalar subqueries with aggregate function state result. [\#4348](https://github.com/ClickHouse/ClickHouse/pull/4348) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -- Improved server shutdown time and ALTERs waiting time. [\#4372](https://github.com/ClickHouse/ClickHouse/pull/4372) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added info about the replicated\_can\_become\_leader setting to system.replicas and add logging if the replica won’t try to become leader. [\#4379](https://github.com/ClickHouse/ClickHouse/pull/4379) ([Alex Zatelepin](https://github.com/ztlpn)) - -## ClickHouse release 19.1 {#clickhouse-release-19-1} - -### ClickHouse release 19.1.14, 2019-03-14 {#clickhouse-release-19-1-14-2019-03-14} - -- Fixed error `Column ... queried more than once` that may happen if the setting `asterisk_left_columns_only` is set to 1 in case of using `GLOBAL JOIN` with `SELECT *` (rare case). The issue does not exist in 19.3 and newer. [6bac7d8d](https://github.com/ClickHouse/ClickHouse/pull/4692/commits/6bac7d8d11a9b0d6de0b32b53c47eb2f6f8e7062) ([Artem Zuikov](https://github.com/4ertus2)) - -### ClickHouse release 19.1.13, 2019-03-12 {#clickhouse-release-19-1-13-2019-03-12} - -This release contains exactly the same set of patches as 19.3.7. - -### ClickHouse release 19.1.10, 2019-03-03 {#clickhouse-release-19-1-10-2019-03-03} - -This release contains exactly the same set of patches as 19.3.6. - -## ClickHouse release 19.1 {#clickhouse-release-19-1-1} - -### ClickHouse release 19.1.9, 2019-02-21 {#clickhouse-release-19-1-9-2019-02-21} - -#### Bug fixes {#bug-fixes-18} - -- Fixed backward incompatibility with old versions due to wrong implementation of `send_logs_level` setting. [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed backward incompatibility of table function `remote` introduced with column comments. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.1.8, 2019-02-16 {#clickhouse-release-19-1-8-2019-02-16} - -#### Bug Fixes {#bug-fixes-19} - -- Fix install package with missing /etc/clickhouse-server/config.xml. [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) - -## ClickHouse release 19.1 {#clickhouse-release-19-1-2} - -### ClickHouse release 19.1.7, 2019-02-15 {#clickhouse-release-19-1-7-2019-02-15} - -#### Bug Fixes {#bug-fixes-20} - -- Correctly return the right type and properly handle locks in `joinGet` function. [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([Amos Bird](https://github.com/amosbird)) -- Fixed error when system logs are tried to create again at server shutdown. [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error: if there is a database with `Dictionary` engine, all dictionaries forced to load at server startup, and if there is a dictionary with ClickHouse source from localhost, the dictionary cannot load. [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a bug when the execution of mutations containing `IN` operators was producing incorrect results. [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) -- `clickhouse-client` can segfault on exit while loading data for command line suggestions if it was run in interactive mode. [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed race condition when selecting from `system.tables` may give `table doesn't exist` error. [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed deadlock when `SELECT` from a table with `File` engine was retried after `No such file or directory` error. [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed an issue: local ClickHouse dictionaries are loaded via TCP, but should load within process. [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed `No message received` error when interacting with PostgreSQL ODBC Driver through TLS connection. Also fixes segfault when using MySQL ODBC Driver. [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Temporarily disable predicate optimization for `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed infinite loop when selecting from table function `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed `compile_expressions` bug with comparison of big (more than int16) dates. [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([alesapin](https://github.com/alesapin)) -- Fixed segmentation fault with `uncompressed_cache=1` and exception with wrong uncompressed size. [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([alesapin](https://github.com/alesapin)) -- Fixed `ALL JOIN` with duplicates in right table. [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) -- Fixed wrong behaviour when doing `INSERT ... SELECT ... FROM file(...)` query and file has `CSVWithNames` or `TSVWIthNames` format and the first data row is missing. [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed aggregate functions execution with `Array(LowCardinality)` arguments. [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Debian package: correct /etc/clickhouse-server/preprocessed link according to config. [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) -- Fixed fuzz test under undefined behavior sanitizer: added parameter type check for `quantile*Weighted` family of functions. [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Make `START REPLICATED SENDS` command start replicated sends. [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([nvartolomei](https://github.com/nvartolomei)) -- Fixed `Not found column` for duplicate columns in JOIN ON section. [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) -- Now `/etc/ssl` is used as default directory with SSL certificates. [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed crash on dictionary reload if dictionary not available. [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) -- Fixed bug with incorrect `Date` and `DateTime` comparison. [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) -- Fixed incorrect result when `Date` and `DateTime` arguments are used in branches of conditional operator (function `if`). Added generic case for function `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -### ClickHouse release 19.1.6, 2019-01-24 {#clickhouse-release-19-1-6-2019-01-24} - -#### New Features {#new-features-7} - -- Custom per column compression codecs for tables. [\#3899](https://github.com/ClickHouse/ClickHouse/pull/3899) [\#4111](https://github.com/ClickHouse/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin), [Winter Zhang](https://github.com/zhang2014), [Anatoly](https://github.com/Sindbag)) -- Added compression codec `Delta`. [\#4052](https://github.com/ClickHouse/ClickHouse/pull/4052) ([alesapin](https://github.com/alesapin)) -- Allow to `ALTER` compression codecs. [\#4054](https://github.com/ClickHouse/ClickHouse/pull/4054) ([alesapin](https://github.com/alesapin)) -- Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [\#3826](https://github.com/ClickHouse/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov)) -- Support for write in `HDFS` tables and `hdfs` table function. [\#4084](https://github.com/ClickHouse/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin)) -- Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [\#4053](https://github.com/ClickHouse/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1)) -- Pruning of unused shards if `SELECT` query filters by sharding key (setting `optimize_skip_unused_shards`). [\#3851](https://github.com/ClickHouse/ClickHouse/pull/3851) ([Gleb Kanterov](https://github.com/kanterov), [Ivan](https://github.com/abyss7)) -- Allow `Kafka` engine to ignore some number of parsing errors per block. [\#4094](https://github.com/ClickHouse/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7)) -- Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [\#607](https://github.com/catboost/catboost/pull/607). [\#3959](https://github.com/ClickHouse/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [\#4097](https://github.com/ClickHouse/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea)) -- Added hashing functions `xxHash64` and `xxHash32`. [\#3905](https://github.com/ClickHouse/ClickHouse/pull/3905) ([filimonov](https://github.com/filimonov)) -- Added `gccMurmurHash` hashing function (GCC flavoured Murmur hash) which uses the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [\#4000](https://github.com/ClickHouse/ClickHouse/pull/4000) ([sundyli](https://github.com/sundy-li)) -- Added hashing functions `javaHash`, `hiveHash`. [\#3811](https://github.com/ClickHouse/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) -- Added table function `remoteSecure`. Function works as `remote`, but uses secure connection. [\#4088](https://github.com/ClickHouse/ClickHouse/pull/4088) ([proller](https://github.com/proller)) - -#### Experimental features {#experimental-features-3} - -- Added multiple JOINs emulation (`allow_experimental_multiple_joins_emulation` setting). [\#3946](https://github.com/ClickHouse/ClickHouse/pull/3946) ([Artem Zuikov](https://github.com/4ertus2)) - -#### Bug Fixes {#bug-fixes-21} - -- Make `compiled_expression_cache_size` setting limited by default to lower memory consumption. [\#4041](https://github.com/ClickHouse/ClickHouse/pull/4041) ([alesapin](https://github.com/alesapin)) -- Fix a bug that led to hangups in threads that perform ALTERs of Replicated tables and in the thread that updates configuration from ZooKeeper. [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3891](https://github.com/ClickHouse/ClickHouse/issues/3891) [\#3934](https://github.com/ClickHouse/ClickHouse/pull/3934) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fixed a race condition when executing a distributed ALTER task. The race condition led to more than one replica trying to execute the task and all replicas except one failing with a ZooKeeper error. [\#3904](https://github.com/ClickHouse/ClickHouse/pull/3904) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix a bug when `from_zk` config elements weren’t refreshed after a request to ZooKeeper timed out. [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3947](https://github.com/ClickHouse/ClickHouse/pull/3947) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix bug with wrong prefix for IPv4 subnet masks. [\#3945](https://github.com/ClickHouse/ClickHouse/pull/3945) ([alesapin](https://github.com/alesapin)) -- Fixed crash (`std::terminate`) in rare cases when a new thread cannot be created due to exhausted resources. [\#3956](https://github.com/ClickHouse/ClickHouse/pull/3956) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix bug when in `remote` table function execution when wrong restrictions were used for in `getStructureOfRemoteTable`. [\#4009](https://github.com/ClickHouse/ClickHouse/pull/4009) ([alesapin](https://github.com/alesapin)) -- Fix a leak of netlink sockets. They were placed in a pool where they were never deleted and new sockets were created at the start of a new thread when all current sockets were in use. [\#4017](https://github.com/ClickHouse/ClickHouse/pull/4017) ([Alex Zatelepin](https://github.com/ztlpn)) -- Fix bug with closing `/proc/self/fd` directory earlier than all fds were read from `/proc` after forking `odbc-bridge` subprocess. [\#4120](https://github.com/ClickHouse/ClickHouse/pull/4120) ([alesapin](https://github.com/alesapin)) -- Fixed String to UInt monotonic conversion in case of usage String in primary key. [\#3870](https://github.com/ClickHouse/ClickHouse/pull/3870) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed error in calculation of integer conversion function monotonicity. [\#3921](https://github.com/ClickHouse/ClickHouse/pull/3921) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed segfault in `arrayEnumerateUniq`, `arrayEnumerateDense` functions in case of some invalid arguments. [\#3909](https://github.com/ClickHouse/ClickHouse/pull/3909) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fix UB in StorageMerge. [\#3910](https://github.com/ClickHouse/ClickHouse/pull/3910) ([Amos Bird](https://github.com/amosbird)) -- Fixed segfault in functions `addDays`, `subtractDays`. [\#3913](https://github.com/ClickHouse/ClickHouse/pull/3913) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error: functions `round`, `floor`, `trunc`, `ceil` may return bogus result when executed on integer argument and large negative scale. [\#3914](https://github.com/ClickHouse/ClickHouse/pull/3914) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed a bug induced by ‘kill query sync’ which leads to a core dump. [\#3916](https://github.com/ClickHouse/ClickHouse/pull/3916) ([muVulDeePecker](https://github.com/fancyqlx)) -- Fix bug with long delay after empty replication queue. [\#3928](https://github.com/ClickHouse/ClickHouse/pull/3928) [\#3932](https://github.com/ClickHouse/ClickHouse/pull/3932) ([alesapin](https://github.com/alesapin)) -- Fixed excessive memory usage in case of inserting into table with `LowCardinality` primary key. [\#3955](https://github.com/ClickHouse/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fixed `LowCardinality` serialization for `Native` format in case of empty arrays. [\#3907](https://github.com/ClickHouse/ClickHouse/issues/3907) [\#4011](https://github.com/ClickHouse/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fixed incorrect result while using distinct by single LowCardinality numeric column. [\#3895](https://github.com/ClickHouse/ClickHouse/issues/3895) [\#4012](https://github.com/ClickHouse/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fixed specialized aggregation with LowCardinality key (in case when `compile` setting is enabled). [\#3886](https://github.com/ClickHouse/ClickHouse/pull/3886) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Fix user and password forwarding for replicated tables queries. [\#3957](https://github.com/ClickHouse/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) ([小路](https://github.com/nicelulu)) -- Fixed very rare race condition that can happen when listing tables in Dictionary database while reloading dictionaries. [\#3970](https://github.com/ClickHouse/ClickHouse/pull/3970) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed incorrect result when HAVING was used with ROLLUP or CUBE. [\#3756](https://github.com/ClickHouse/ClickHouse/issues/3756) [\#3837](https://github.com/ClickHouse/ClickHouse/pull/3837) ([Sam Chou](https://github.com/reflection)) -- Fixed column aliases for query with `JOIN ON` syntax and distributed tables. [\#3980](https://github.com/ClickHouse/ClickHouse/pull/3980) ([Winter Zhang](https://github.com/zhang2014)) -- Fixed error in internal implementation of `quantileTDigest` (found by Artem Vakhrushev). This error never happens in ClickHouse and was relevant only for those who use ClickHouse codebase as a library directly. [\#3935](https://github.com/ClickHouse/ClickHouse/pull/3935) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Improvements {#improvements-6} - -- Support for `IF NOT EXISTS` in `ALTER TABLE ADD COLUMN` statements along with `IF EXISTS` in `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [\#3900](https://github.com/ClickHouse/ClickHouse/pull/3900) ([Boris Granveaud](https://github.com/bgranvea)) -- Function `parseDateTimeBestEffort`: support for formats `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` and similar. [\#3922](https://github.com/ClickHouse/ClickHouse/pull/3922) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- `CapnProtoInputStream` now support jagged structures. [\#4063](https://github.com/ClickHouse/ClickHouse/pull/4063) ([Odin Hultgren Van Der Horst](https://github.com/Miniwoffer)) -- Usability improvement: added a check that server process is started from the data directory’s owner. Do not allow to start server from root if the data belongs to non-root user. [\#3785](https://github.com/ClickHouse/ClickHouse/pull/3785) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev)) -- Better logic of checking required columns during analysis of queries with JOINs. [\#3930](https://github.com/ClickHouse/ClickHouse/pull/3930) ([Artem Zuikov](https://github.com/4ertus2)) -- Decreased the number of connections in case of large number of Distributed tables in a single server. [\#3726](https://github.com/ClickHouse/ClickHouse/pull/3726) ([Winter Zhang](https://github.com/zhang2014)) -- Supported totals row for `WITH TOTALS` query for ODBC driver. [\#3836](https://github.com/ClickHouse/ClickHouse/pull/3836) ([Maksim Koritckiy](https://github.com/nightweb)) -- Allowed to use `Enum`s as integers inside if function. [\#3875](https://github.com/ClickHouse/ClickHouse/pull/3875) ([Ivan](https://github.com/abyss7)) -- Added `low_cardinality_allow_in_native_format` setting. If disabled, do not use `LowCadrinality` type in `Native` format. [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai)) -- Removed some redundant objects from compiled expressions cache to lower memory usage. [\#4042](https://github.com/ClickHouse/ClickHouse/pull/4042) ([alesapin](https://github.com/alesapin)) -- Add check that `SET send_logs_level = 'value'` query accept appropriate value. [\#3873](https://github.com/ClickHouse/ClickHouse/pull/3873) ([Sabyanin Maxim](https://github.com/s-mx)) -- Fixed data type check in type conversion functions. [\#3896](https://github.com/ClickHouse/ClickHouse/pull/3896) ([Winter Zhang](https://github.com/zhang2014)) - -#### Performance Improvements {#performance-improvements-5} - -- Add a MergeTree setting `use_minimalistic_part_header_in_zookeeper`. If enabled, Replicated tables will store compact part metadata in a single part znode. This can dramatically reduce ZooKeeper snapshot size (especially if the tables have a lot of columns). Note that after enabling this setting you will not be able to downgrade to a version that doesn’t support it. [\#3960](https://github.com/ClickHouse/ClickHouse/pull/3960) ([Alex Zatelepin](https://github.com/ztlpn)) -- Add an DFA-based implementation for functions `sequenceMatch` and `sequenceCount` in case pattern doesn’t contain time. [\#4004](https://github.com/ClickHouse/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) -- Performance improvement for integer numbers serialization. [\#3968](https://github.com/ClickHouse/ClickHouse/pull/3968) ([Amos Bird](https://github.com/amosbird)) -- Zero left padding PODArray so that -1 element is always valid and zeroed. It’s used for branchless calculation of offsets. [\#3920](https://github.com/ClickHouse/ClickHouse/pull/3920) ([Amos Bird](https://github.com/amosbird)) -- Reverted `jemalloc` version which lead to performance degradation. [\#4018](https://github.com/ClickHouse/ClickHouse/pull/4018) ([alexey-milovidov](https://github.com/alexey-milovidov)) - -#### Backward Incompatible Changes {#backward-incompatible-changes-2} - -- Removed undocumented feature `ALTER MODIFY PRIMARY KEY` because it was superseded by the `ALTER MODIFY ORDER BY` command. [\#3887](https://github.com/ClickHouse/ClickHouse/pull/3887) ([Alex Zatelepin](https://github.com/ztlpn)) -- Removed function `shardByHash`. [\#3833](https://github.com/ClickHouse/ClickHouse/pull/3833) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Forbid using scalar subqueries with result of type `AggregateFunction`. [\#3865](https://github.com/ClickHouse/ClickHouse/pull/3865) ([Ivan](https://github.com/abyss7)) - -#### Build/Testing/Packaging Improvements {#buildtestingpackaging-improvements-6} - -- Added support for PowerPC (`ppc64le`) build. [\#4132](https://github.com/ClickHouse/ClickHouse/pull/4132) ([Danila Kutenin](https://github.com/danlark1)) -- Stateful functional tests are run on public available dataset. [\#3969](https://github.com/ClickHouse/ClickHouse/pull/3969) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed error when the server cannot start with the `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` message within Docker or systemd-nspawn. [\#4136](https://github.com/ClickHouse/ClickHouse/pull/4136) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Updated `rdkafka` library to v1.0.0-RC5. Used cppkafka instead of raw C interface. [\#4025](https://github.com/ClickHouse/ClickHouse/pull/4025) ([Ivan](https://github.com/abyss7)) -- Updated `mariadb-client` library. Fixed one of issues found by UBSan. [\#3924](https://github.com/ClickHouse/ClickHouse/pull/3924) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Some fixes for UBSan builds. [\#3926](https://github.com/ClickHouse/ClickHouse/pull/3926) [\#3021](https://github.com/ClickHouse/ClickHouse/pull/3021) [\#3948](https://github.com/ClickHouse/ClickHouse/pull/3948) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added per-commit runs of tests with UBSan build. -- Added per-commit runs of PVS-Studio static analyzer. -- Fixed bugs found by PVS-Studio. [\#4013](https://github.com/ClickHouse/ClickHouse/pull/4013) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed glibc compatibility issues. [\#4100](https://github.com/ClickHouse/ClickHouse/pull/4100) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Move Docker images to 18.10 and add compatibility file for glibc \>= 2.28 [\#3965](https://github.com/ClickHouse/ClickHouse/pull/3965) ([alesapin](https://github.com/alesapin)) -- Add env variable if user don’t want to chown directories in server Docker image. [\#3967](https://github.com/ClickHouse/ClickHouse/pull/3967) ([alesapin](https://github.com/alesapin)) -- Enabled most of the warnings from `-Weverything` in clang. Enabled `-Wpedantic`. [\#3986](https://github.com/ClickHouse/ClickHouse/pull/3986) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Added a few more warnings that are available only in clang 8. [\#3993](https://github.com/ClickHouse/ClickHouse/pull/3993) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Link to `libLLVM` rather than to individual LLVM libs when using shared linking. [\#3989](https://github.com/ClickHouse/ClickHouse/pull/3989) ([Orivej Desh](https://github.com/orivej)) -- Added sanitizer variables for test images. [\#4072](https://github.com/ClickHouse/ClickHouse/pull/4072) ([alesapin](https://github.com/alesapin)) -- `clickhouse-server` debian package will recommend `libcap2-bin` package to use `setcap` tool for setting capabilities. This is optional. [\#4093](https://github.com/ClickHouse/ClickHouse/pull/4093) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Improved compilation time, fixed includes. [\#3898](https://github.com/ClickHouse/ClickHouse/pull/3898) ([proller](https://github.com/proller)) -- Added performance tests for hash functions. [\#3918](https://github.com/ClickHouse/ClickHouse/pull/3918) ([filimonov](https://github.com/filimonov)) -- Fixed cyclic library dependences. [\#3958](https://github.com/ClickHouse/ClickHouse/pull/3958) ([proller](https://github.com/proller)) -- Improved compilation with low available memory. [\#4030](https://github.com/ClickHouse/ClickHouse/pull/4030) ([proller](https://github.com/proller)) -- Added test script to reproduce performance degradation in `jemalloc`. [\#4036](https://github.com/ClickHouse/ClickHouse/pull/4036) ([alexey-milovidov](https://github.com/alexey-milovidov)) -- Fixed misspells in comments and string literals under `dbms`. [\#4122](https://github.com/ClickHouse/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) -- Fixed typos in comments. [\#4089](https://github.com/ClickHouse/ClickHouse/pull/4089) ([Evgenii Pravda](https://github.com/kvinty)) - -## [Changelog for 2018](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2018.md) diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md deleted file mode 120000 index 79b747aee1b..00000000000 --- a/docs/zh/changelog/index.md +++ /dev/null @@ -1 +0,0 @@ -../../../CHANGELOG.md \ No newline at end of file diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md new file mode 100644 index 00000000000..90bb7abe0b0 --- /dev/null +++ b/docs/zh/changelog/index.md @@ -0,0 +1,665 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +--- + +## 碌莽禄release拢.0755-88888888 {#clickhouse-release-v20-3} + +### ClickHouse版本v20.3.4.10,2020-03-20 {#clickhouse-release-v20-3-4-10-2020-03-20} + +#### 错误修复 {#bug-fix} + +- 此版本还包含20.1.8.41的所有错误修复 +- 修复丢失 `rows_before_limit_at_least` 用于通过http进行查询(使用处理器管道)。 这修复 [\#9730](https://github.com/ClickHouse/ClickHouse/issues/9730). [\#9757](https://github.com/ClickHouse/ClickHouse/pull/9757) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +### ClickHouse释放v20.3.3.6,2020-03-17 {#clickhouse-release-v20-3-3-6-2020-03-17} + +#### 错误修复 {#bug-fix-1} + +- 此版本还包含20.1.7.38的所有错误修复 +- 修复复制中的错误,如果用户在以前的版本上执行了突变,则不允许复制工作。 这修复 [\#9645](https://github.com/ClickHouse/ClickHouse/issues/9645). [\#9652](https://github.com/ClickHouse/ClickHouse/pull/9652) ([阿利沙平](https://github.com/alesapin)). 它使版本20.3再次向后兼容。 +- 添加设置 `use_compact_format_in_distributed_parts_names` 它允许写文件 `INSERT` 查询到 `Distributed` 表格格式更紧凑。 这修复 [\#9647](https://github.com/ClickHouse/ClickHouse/issues/9647). [\#9653](https://github.com/ClickHouse/ClickHouse/pull/9653) ([阿利沙平](https://github.com/alesapin)). 它使版本20.3再次向后兼容。 + +### ClickHouse版本v20.3.2.1,2020-03-12 {#clickhouse-release-v20-3-2-1-2020-03-12} + +#### 向后不兼容的更改 {#backward-incompatible-change} + +- 修正了这个问题 `file name too long` 当发送数据 `Distributed` 大量副本的表。 修复了服务器日志中显示副本凭据的问题。 磁盘上的目录名格式已更改为 `[shard{shard_index}[_replica{replica_index}]]`. [\#8911](https://github.com/ClickHouse/ClickHouse/pull/8911) ([米哈伊尔\*科罗托夫](https://github.com/millb))升级到新版本后,您将无法在没有人工干预的情况下降级,因为旧的服务器版本无法识别新的目录格式。 如果要降级,则必须手动将相应的目录重命名为旧格式。 仅当您使用了异步时,此更改才相关 `INSERT`s到 `Distributed` 桌子 在版本20.3.3中,我们将介绍一个设置,让您逐渐启用新格式。 +- 更改了mutation命令的复制日志条目的格式。 在安装新版本之前,您必须等待旧的突变处理。 +- 实现简单的内存分析器,将堆栈跟踪转储到 `system.trace_log` 超过软分配限制的每N个字节 [\#8765](https://github.com/ClickHouse/ClickHouse/pull/8765) ([伊万](https://github.com/abyss7)) [\#9472](https://github.com/ClickHouse/ClickHouse/pull/9472) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov))列 `system.trace_log` 从改名 `timer_type` 到 `trace_type`. 这将需要改变第三方性能分析和flamegraph处理工具。 +- 在任何地方使用操作系统线程id,而不是内部线程编号。 这修复 [\#7477](https://github.com/ClickHouse/ClickHouse/issues/7477) 老 `clickhouse-client` 无法接收从服务器发送的日志,当设置 `send_logs_level` 已启用,因为结构化日志消息的名称和类型已更改。 另一方面,不同的服务器版本可以相互发送不同类型的日志。 当你不使用 `send_logs_level` 设置,你不应该关心。 [\#8954](https://github.com/ClickHouse/ClickHouse/pull/8954) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除 `indexHint` 功能 [\#9542](https://github.com/ClickHouse/ClickHouse/pull/9542) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除 `findClusterIndex`, `findClusterValue` 功能。 这修复 [\#8641](https://github.com/ClickHouse/ClickHouse/issues/8641). 如果您正在使用这些功能,请发送电子邮件至 `clickhouse-feedback@yandex-team.com` [\#9543](https://github.com/ClickHouse/ClickHouse/pull/9543) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在不允许创建列或添加列 `SELECT` 子查询作为默认表达式。 [\#9481](https://github.com/ClickHouse/ClickHouse/pull/9481) ([阿利沙平](https://github.com/alesapin)) +- 需要联接中的子查询的别名。 [\#9274](https://github.com/ClickHouse/ClickHouse/pull/9274) ([Artem Zuikov](https://github.com/4ertus2)) +- 改进 `ALTER MODIFY/ADD` 查询逻辑。 现在你不能 `ADD` 不带类型的列, `MODIFY` 默认表达式不改变列的类型和 `MODIFY` type不会丢失默认表达式值。 修复 [\#8669](https://github.com/ClickHouse/ClickHouse/issues/8669). [\#9227](https://github.com/ClickHouse/ClickHouse/pull/9227) ([阿利沙平](https://github.com/alesapin)) +- 要求重新启动服务器以应用日志记录配置中的更改。 这是一种临时解决方法,可以避免服务器将日志记录到已删除的日志文件中的错误(请参阅 [\#8696](https://github.com/ClickHouse/ClickHouse/issues/8696)). [\#8707](https://github.com/ClickHouse/ClickHouse/pull/8707) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 设置 `experimental_use_processors` 默认情况下启用。 此设置允许使用新的查询管道。 这是内部重构,我们期望没有明显的变化。 如果您将看到任何问题,请将其设置为返回零。 [\#8768](https://github.com/ClickHouse/ClickHouse/pull/8768) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 新功能 {#new-feature} + +- 添加 `Avro` 和 `AvroConfluent` 输入/输出格式 [\#8571](https://github.com/ClickHouse/ClickHouse/pull/8571) ([安德鲁Onyshchuk](https://github.com/oandrew)) [\#8957](https://github.com/ClickHouse/ClickHouse/pull/8957) ([安德鲁Onyshchuk](https://github.com/oandrew)) [\#8717](https://github.com/ClickHouse/ClickHouse/pull/8717) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 过期密钥的多线程和非阻塞更新 `cache` 字典(可选的权限读取旧的)。 [\#8303](https://github.com/ClickHouse/ClickHouse/pull/8303) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 添加查询 `ALTER ... MATERIALIZE TTL`. 它运行突变,强制通过TTL删除过期的数据,并重新计算所有部分有关ttl的元信息。 [\#8775](https://github.com/ClickHouse/ClickHouse/pull/8775) ([安东\*波波夫](https://github.com/CurtizJ)) +- 如果需要,从HashJoin切换到MergeJoin(在磁盘上 [\#9082](https://github.com/ClickHouse/ClickHouse/pull/9082) ([Artem Zuikov](https://github.com/4ertus2)) +- 已添加 `MOVE PARTITION` 命令 `ALTER TABLE` [\#4729](https://github.com/ClickHouse/ClickHouse/issues/4729) [\#6168](https://github.com/ClickHouse/ClickHouse/pull/6168) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 动态地从配置文件重新加载存储配置。 [\#8594](https://github.com/ClickHouse/ClickHouse/pull/8594) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 允许更改 `storage_policy` 为了不那么富有的人。 [\#8107](https://github.com/ClickHouse/ClickHouse/pull/8107) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 增加了对s3存储和表功能的globs/通配符的支持。 [\#8851](https://github.com/ClickHouse/ClickHouse/pull/8851) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 执行 `bitAnd`, `bitOr`, `bitXor`, `bitNot` 为 `FixedString(N)` 数据类型。 [\#9091](https://github.com/ClickHouse/ClickHouse/pull/9091) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 添加功能 `bitCount`. 这修复 [\#8702](https://github.com/ClickHouse/ClickHouse/issues/8702). [\#8708](https://github.com/ClickHouse/ClickHouse/pull/8708) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) [\#8749](https://github.com/ClickHouse/ClickHouse/pull/8749) ([ikopylov](https://github.com/ikopylov)) +- 添加 `generateRandom` 表函数生成具有给定模式的随机行。 允许用数据填充任意测试表。 [\#8994](https://github.com/ClickHouse/ClickHouse/pull/8994) ([Ilya Yatsishin](https://github.com/qoega)) +- `JSONEachRowFormat`:当对象包含在顶层数组中时,支持特殊情况。 [\#8860](https://github.com/ClickHouse/ClickHouse/pull/8860) ([克鲁格洛夫\*帕维尔](https://github.com/Avogar)) +- 现在可以创建一个列 `DEFAULT` 取决于默认列的表达式 `ALIAS` 表达。 [\#9489](https://github.com/ClickHouse/ClickHouse/pull/9489) ([阿利沙平](https://github.com/alesapin)) +- 允许指定 `--limit` 超过源数据大小 `clickhouse-obfuscator`. 数据将以不同的随机种子重复。 [\#9155](https://github.com/ClickHouse/ClickHouse/pull/9155) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 已添加 `groupArraySample` 功能(类似于 `groupArray`)与reservior采样算法。 [\#8286](https://github.com/ClickHouse/ClickHouse/pull/8286) ([阿莫斯鸟](https://github.com/amosbird)) +- 现在,您可以监视更新队列的大小 `cache`/`complex_key_cache` 通过系统指标字典。 [\#9413](https://github.com/ClickHouse/ClickHouse/pull/9413) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 允许使用CRLF作为CSV输出格式的行分隔符与设置 `output_format_csv_crlf_end_of_line` 设置为1 [\#8934](https://github.com/ClickHouse/ClickHouse/pull/8934) [\#8935](https://github.com/ClickHouse/ClickHouse/pull/8935) [\#8963](https://github.com/ClickHouse/ClickHouse/pull/8963) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 实现的更多功能 [H3](https://github.com/uber/h3) API: `h3GetBaseCell`, `h3HexAreaM2`, `h3IndexesAreNeighbors`, `h3ToChildren`, `h3ToString` 和 `stringToH3` [\#8938](https://github.com/ClickHouse/ClickHouse/pull/8938) ([Nico Mandery](https://github.com/nmandery)) +- 引入新设置: `max_parser_depth` 控制最大堆栈大小并允许大型复杂查询。 这修复 [\#6681](https://github.com/ClickHouse/ClickHouse/issues/6681) 和 [\#7668](https://github.com/ClickHouse/ClickHouse/issues/7668). [\#8647](https://github.com/ClickHouse/ClickHouse/pull/8647) ([马克西姆\*斯米尔诺夫](https://github.com/qMBQx8GH)) +- 添加设置 `force_optimize_skip_unused_shards` 如果无法跳过未使用的分片,则设置为抛出 [\#8805](https://github.com/ClickHouse/ClickHouse/pull/8805) ([Azat Khuzhin](https://github.com/azat)) +- 允许配置多个磁盘/卷用于存储数据发送 `Distributed` 发动机 [\#8756](https://github.com/ClickHouse/ClickHouse/pull/8756) ([Azat Khuzhin](https://github.com/azat)) +- 支持存储策略 (``)用于存储临时数据。 [\#8750](https://github.com/ClickHouse/ClickHouse/pull/8750) ([Azat Khuzhin](https://github.com/azat)) +- 已添加 `X-ClickHouse-Exception-Code` 如果在发送数据之前引发异常,则设置的HTTP头。 这实现了 [\#4971](https://github.com/ClickHouse/ClickHouse/issues/4971). [\#8786](https://github.com/ClickHouse/ClickHouse/pull/8786) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 添加功能 `ifNotFinite`. 这只是一个句法糖: `ifNotFinite(x, y) = isFinite(x) ? x : y`. [\#8710](https://github.com/ClickHouse/ClickHouse/pull/8710) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 已添加 `last_successful_update_time` 列中 `system.dictionaries` 表 [\#9394](https://github.com/ClickHouse/ClickHouse/pull/9394) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 添加 `blockSerializedSize` 功能(磁盘大小不压缩) [\#8952](https://github.com/ClickHouse/ClickHouse/pull/8952) ([Azat Khuzhin](https://github.com/azat)) +- 添加功能 `moduloOrZero` [\#9358](https://github.com/ClickHouse/ClickHouse/pull/9358) ([hcz](https://github.com/hczhcz)) +- 添加系统表 `system.zeros` 和 `system.zeros_mt` 以及故事功能 `zeros()` 和 `zeros_mt()`. 表(和表函数)包含具有名称的单列 `zero` 和类型 `UInt8`. 此列包含零。 为了测试目的,需要它作为生成许多行的最快方法。 这修复 [\#6604](https://github.com/ClickHouse/ClickHouse/issues/6604) [\#9593](https://github.com/ClickHouse/ClickHouse/pull/9593) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +#### 实验特点 {#experimental-feature} + +- 添加新的紧凑格式的部件 `MergeTree`-家庭表中的所有列都存储在一个文件中。 它有助于提高小型和频繁插入的性能。 旧的格式(每列一个文件)现在被称为wide。 数据存储格式由设置控制 `min_bytes_for_wide_part` 和 `min_rows_for_wide_part`. [\#8290](https://github.com/ClickHouse/ClickHouse/pull/8290) ([安东\*波波夫](https://github.com/CurtizJ)) +- 支持S3存储 `Log`, `TinyLog` 和 `StripeLog` 桌子 [\#8862](https://github.com/ClickHouse/ClickHouse/pull/8862) ([帕维尔\*科瓦连科](https://github.com/Jokser)) + +#### 错误修复 {#bug-fix-2} + +- 修正了日志消息中不一致的空格。 [\#9322](https://github.com/ClickHouse/ClickHouse/pull/9322) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复在创建表时将未命名元组数组展平为嵌套结构的错误。 [\#8866](https://github.com/ClickHouse/ClickHouse/pull/8866) ([achulkov2](https://github.com/achulkov2)) +- 修复了以下问题 “Too many open files” 如果有太多的文件匹配glob模式可能会发生错误 `File` 表或 `file` 表功能。 现在文件懒洋洋地打开。 这修复 [\#8857](https://github.com/ClickHouse/ClickHouse/issues/8857) [\#8861](https://github.com/ClickHouse/ClickHouse/pull/8861) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除临时表现在只删除临时表。 [\#8907](https://github.com/ClickHouse/ClickHouse/pull/8907) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 当我们关闭服务器或分离/附加表时删除过时的分区。 [\#8602](https://github.com/ClickHouse/ClickHouse/pull/8602) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 默认磁盘如何计算可用空间 `data` 子目录。 修复了可用空间量计算不正确的问题,如果 `data` 目录被安装到一个单独的设备(罕见的情况)。 这修复 [\#7441](https://github.com/ClickHouse/ClickHouse/issues/7441) [\#9257](https://github.com/ClickHouse/ClickHouse/pull/9257) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 允许逗号(交叉)与IN()内部连接。 [\#9251](https://github.com/ClickHouse/ClickHouse/pull/9251) ([Artem Zuikov](https://github.com/4ertus2)) +- 如果在WHERE部分中有\[NOT\]LIKE运算符,则允许将CROSS重写为INNER JOIN。 [\#9229](https://github.com/ClickHouse/ClickHouse/pull/9229) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复后可能不正确的结果 `GROUP BY` 启用设置 `distributed_aggregation_memory_efficient`. 修复 [\#9134](https://github.com/ClickHouse/ClickHouse/issues/9134). [\#9289](https://github.com/ClickHouse/ClickHouse/pull/9289) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 找到的键在缓存字典的指标中被计为错过。 [\#9411](https://github.com/ClickHouse/ClickHouse/pull/9411) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 修复引入的复制协议不兼容 [\#8598](https://github.com/ClickHouse/ClickHouse/issues/8598). [\#9412](https://github.com/ClickHouse/ClickHouse/pull/9412) ([阿利沙平](https://github.com/alesapin)) +- 在固定的竞争条件 `queue_task_handle` 在启动 `ReplicatedMergeTree` 桌子 [\#9552](https://github.com/ClickHouse/ClickHouse/pull/9552) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 令牌 `NOT` 没有工作 `SHOW TABLES NOT LIKE` 查询 [\#8727](https://github.com/ClickHouse/ClickHouse/issues/8727) [\#8940](https://github.com/ClickHouse/ClickHouse/pull/8940) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加范围检查功能 `h3EdgeLengthM`. 如果没有这个检查,缓冲区溢出是可能的。 [\#8945](https://github.com/ClickHouse/ClickHouse/pull/8945) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了多个参数(超过10)的三元逻辑运算批量计算中的错误。 [\#8718](https://github.com/ClickHouse/ClickHouse/pull/8718) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 修复PREWHERE优化的错误,这可能导致段错误或 `Inconsistent number of columns got from MergeTreeRangeReader` 例外。 [\#9024](https://github.com/ClickHouse/ClickHouse/pull/9024) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复意外 `Timeout exceeded while reading from socket` 异常,在实际超时之前以及启用查询探查器时,在安全连接上随机发生。 还添加 `connect_timeout_with_failover_secure_ms` 设置(默认100ms),这是类似于 `connect_timeout_with_failover_ms`,但用于安全连接(因为SSL握手比普通TCP连接慢) [\#9026](https://github.com/ClickHouse/ClickHouse/pull/9026) ([tavplubix](https://github.com/tavplubix)) +- 修复突变最终确定的错误,当突变可能处于以下状态时 `parts_to_do=0` 和 `is_done=0`. [\#9022](https://github.com/ClickHouse/ClickHouse/pull/9022) ([阿利沙平](https://github.com/alesapin)) +- 使用新的任何连接逻辑 `partial_merge_join` 设置。 有可能使 `ANY|ALL|SEMI LEFT` 和 `ALL INNER` 加入与 `partial_merge_join=1` 现在 [\#8932](https://github.com/ClickHouse/ClickHouse/pull/8932) ([Artem Zuikov](https://github.com/4ertus2)) +- Shard现在将从发起者获得的设置夹到shard的constaints,而不是抛出异常。 此修补程序允许将查询发送到具有另一个约束的分片。 [\#9447](https://github.com/ClickHouse/ClickHouse/pull/9447) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修正了内存管理问题 `MergeTreeReadPool`. [\#8791](https://github.com/ClickHouse/ClickHouse/pull/8791) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复 `toDecimal*OrNull()` 使用字符串调用时的函数系列 `e`. 修复 [\#8312](https://github.com/ClickHouse/ClickHouse/issues/8312) [\#8764](https://github.com/ClickHouse/ClickHouse/pull/8764) ([Artem Zuikov](https://github.com/4ertus2)) +- 请确保 `FORMAT Null` 不向客户端发送数据。 [\#8767](https://github.com/ClickHouse/ClickHouse/pull/8767) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 修复时间戳中的错误 `LiveViewBlockInputStream` 不会更新。 `LIVE VIEW` 是一个实验特征。 [\#8644](https://github.com/ClickHouse/ClickHouse/pull/8644) ([vxider](https://github.com/Vxider)) [\#8625](https://github.com/ClickHouse/ClickHouse/pull/8625) ([vxider](https://github.com/Vxider)) +- 固定 `ALTER MODIFY TTL` 不允许删除旧ttl表达式的错误行为。 [\#8422](https://github.com/ClickHouse/ClickHouse/pull/8422) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复了MergeTreeIndexSet中的UBSan报告。 这修复 [\#9250](https://github.com/ClickHouse/ClickHouse/issues/9250) [\#9365](https://github.com/ClickHouse/ClickHouse/pull/9365) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定的行为 `match` 和 `extract` 当干草堆有零字节的函数。 当干草堆不变时,这种行为是错误的。 这修复 [\#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) [\#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) [\#9345](https://github.com/ClickHouse/ClickHouse/pull/9345) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免从apache Avro第三方库中的析构函数抛出。 [\#9066](https://github.com/ClickHouse/ClickHouse/pull/9066) ([安德鲁Onyshchuk](https://github.com/oandrew)) +- 不要提交从轮询的批次 `Kafka` 部分,因为它可能会导致数据漏洞。 [\#8876](https://github.com/ClickHouse/ClickHouse/pull/8876) ([filimonov](https://github.com/filimonov)) +- 修复 `joinGet` 使用可为空的返回类型。 https://github.com/ClickHouse/ClickHouse/issues/8919 [\#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) ([阿莫斯鸟](https://github.com/amosbird)) +- 修复压缩时的数据不兼容 `T64` 编解ec [\#9016](https://github.com/ClickHouse/ClickHouse/pull/9016) ([Artem Zuikov](https://github.com/4ertus2))修复数据类型id `T64` 在受影响的版本中导致错误(de)压缩的压缩编解ec。 [\#9033](https://github.com/ClickHouse/ClickHouse/pull/9033) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加设置 `enable_early_constant_folding` 并禁用它在某些情况下,导致错误。 [\#9010](https://github.com/ClickHouse/ClickHouse/pull/9010) ([Artem Zuikov](https://github.com/4ertus2)) +- 使用VIEW修复下推谓词优化器并启用测试 [\#9011](https://github.com/ClickHouse/ClickHouse/pull/9011) ([张冬](https://github.com/zhang2014)) +- 修复段错误 `Merge` 表,从读取时可能发生 `File` 储存 [\#9387](https://github.com/ClickHouse/ClickHouse/pull/9387) ([tavplubix](https://github.com/tavplubix)) +- 添加了对存储策略的检查 `ATTACH PARTITION FROM`, `REPLACE PARTITION`, `MOVE TO TABLE`. 否则,它可以使部分数据重新启动后无法访问,并阻止ClickHouse启动。 [\#9383](https://github.com/ClickHouse/ClickHouse/pull/9383) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复改变,如果有TTL设置表。 [\#8800](https://github.com/ClickHouse/ClickHouse/pull/8800) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复在以下情况下可能发生的竞争条件 `SYSTEM RELOAD ALL DICTIONARIES` 在某些字典被修改/添加/删除时执行。 [\#8801](https://github.com/ClickHouse/ClickHouse/pull/8801) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 在以前的版本 `Memory` 数据库引擎使用空数据路径,因此在以下位置创建表 `path` directory (e.g. `/var/lib/clickhouse/`), not in data directory of database (e.g. `/var/lib/clickhouse/db_name`). [\#8753](https://github.com/ClickHouse/ClickHouse/pull/8753) ([tavplubix](https://github.com/tavplubix)) +- 修复了关于缺少默认磁盘或策略的错误日志消息。 [\#9530](https://github.com/ClickHouse/ClickHouse/pull/9530) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复数组类型的bloom\_filter索引的not(has())。 [\#9407](https://github.com/ClickHouse/ClickHouse/pull/9407) ([achimbab](https://github.com/achimbab)) +- 允许表中的第一列 `Log` 引擎是别名 [\#9231](https://github.com/ClickHouse/ClickHouse/pull/9231) ([伊万](https://github.com/abyss7)) +- 从读取时修复范围的顺序 `MergeTree` 表中的一个线程。 它可能会导致例外 `MergeTreeRangeReader` 或错误的查询结果。 [\#9050](https://github.com/ClickHouse/ClickHouse/pull/9050) ([安东\*波波夫](https://github.com/CurtizJ)) +- 赂眉露\>\> `reinterpretAsFixedString` 返回 `FixedString` 而不是 `String`. [\#9052](https://github.com/ClickHouse/ClickHouse/pull/9052) ([安德鲁Onyshchuk](https://github.com/oandrew)) +- 避免极少数情况下,当用户可以得到错误的错误消息 (`Success` 而不是详细的错误描述)。 [\#9457](https://github.com/ClickHouse/ClickHouse/pull/9457) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用时不要崩溃 `Template` 使用空行模板格式化。 [\#8785](https://github.com/ClickHouse/ClickHouse/pull/8785) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 系统表的元数据文件可能在错误的位置创建 [\#8653](https://github.com/ClickHouse/ClickHouse/pull/8653) ([tavplubix](https://github.com/tavplubix))修复 [\#8581](https://github.com/ClickHouse/ClickHouse/issues/8581). +- 修复缓存字典中exception\_ptr上的数据竞赛 [\#8303](https://github.com/ClickHouse/ClickHouse/issues/8303). [\#9379](https://github.com/ClickHouse/ClickHouse/pull/9379) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 不要为查询引发异常 `ATTACH TABLE IF NOT EXISTS`. 以前它是抛出,如果表已经存在,尽管 `IF NOT EXISTS` 条款 [\#8967](https://github.com/ClickHouse/ClickHouse/pull/8967) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复了异常消息中丢失的关闭paren。 [\#8811](https://github.com/ClickHouse/ClickHouse/pull/8811) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免消息 `Possible deadlock avoided` 在clickhouse客户端在交互模式下启动。 [\#9455](https://github.com/ClickHouse/ClickHouse/pull/9455) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了base64编码值末尾填充格式错误的问题。 更新base64库。 这修复 [\#9491](https://github.com/ClickHouse/ClickHouse/issues/9491),关闭 [\#9492](https://github.com/ClickHouse/ClickHouse/issues/9492) [\#9500](https://github.com/ClickHouse/ClickHouse/pull/9500) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 防止丢失数据 `Kafka` 在极少数情况下,在读取后缀之后但在提交之前发生异常。 修复 [\#9378](https://github.com/ClickHouse/ClickHouse/issues/9378) [\#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) ([filimonov](https://github.com/filimonov)) +- 在固定的异常 `DROP TABLE IF EXISTS` [\#8663](https://github.com/ClickHouse/ClickHouse/pull/8663) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 修复当用户尝试崩溃 `ALTER MODIFY SETTING` 对于老格式化 `MergeTree` 表引擎家族. [\#9435](https://github.com/ClickHouse/ClickHouse/pull/9435) ([阿利沙平](https://github.com/alesapin)) +- 支持在JSON相关函数中不适合Int64的UInt64号码。 更新SIMDJSON掌握。 这修复 [\#9209](https://github.com/ClickHouse/ClickHouse/issues/9209) [\#9344](https://github.com/ClickHouse/ClickHouse/pull/9344) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 当使用非严格单调函数索引时,固定执行反转谓词。 [\#9223](https://github.com/ClickHouse/ClickHouse/pull/9223) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 不要试图折叠 `IN` 常量在 `GROUP BY` [\#8868](https://github.com/ClickHouse/ClickHouse/pull/8868) ([阿莫斯鸟](https://github.com/amosbird)) +- 修复bug `ALTER DELETE` 突变导致索引损坏。 这修复 [\#9019](https://github.com/ClickHouse/ClickHouse/issues/9019) 和 [\#8982](https://github.com/ClickHouse/ClickHouse/issues/8982). 另外修复极其罕见的竞争条件 `ReplicatedMergeTree` `ALTER` 查询。 [\#9048](https://github.com/ClickHouse/ClickHouse/pull/9048) ([阿利沙平](https://github.com/alesapin)) +- 当设置 `compile_expressions` 被启用,你可以得到 `unexpected column` 在 `LLVMExecutableFunction` 当我们使用 `Nullable` 类型 [\#8910](https://github.com/ClickHouse/ClickHouse/pull/8910) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 多个修复 `Kafka` 引擎:1)修复在消费者组重新平衡期间出现的重复项。 2)修复罕见 ‘holes’ 当数据从一个轮询的几个分区轮询并部分提交时出现(现在我们总是处理/提交整个轮询的消息块)。 3)通过块大小修复刷新(在此之前,只有超时刷新才能正常工作)。 4)更好的订阅程序(与分配反馈)。 5)使测试工作得更快(默认时间间隔和超时)。 由于数据之前没有被块大小刷新(根据文档),pr可能会导致默认设置的性能下降(由于更频繁和更小的刷新不太理想)。 如果您在更改后遇到性能问题-请增加 `kafka_max_block_size` 在表中的更大的值(例如 `CREATE TABLE ...Engine=Kafka ... SETTINGS ... kafka_max_block_size=524288`). 修复 [\#7259](https://github.com/ClickHouse/ClickHouse/issues/7259) [\#8917](https://github.com/ClickHouse/ClickHouse/pull/8917) ([filimonov](https://github.com/filimonov)) +- 修复 `Parameter out of bound` 在PREWHERE优化之后的某些查询中出现异常。 [\#8914](https://github.com/ClickHouse/ClickHouse/pull/8914) ([Baudouin Giard](https://github.com/bgiard)) +- 修正了函数参数混合常量的情况 `arrayZip`. [\#8705](https://github.com/ClickHouse/ClickHouse/pull/8705) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 执行时 `CREATE` 查询,在存储引擎参数中折叠常量表达式。 将空数据库名称替换为当前数据库。 修复 [\#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [\#3492](https://github.com/ClickHouse/ClickHouse/issues/3492) [\#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) ([tavplubix](https://github.com/tavplubix)) +- 现在不可能创建或添加具有简单循环别名的列,如 `a DEFAULT b, b DEFAULT a`. [\#9603](https://github.com/ClickHouse/ClickHouse/pull/9603) ([阿利沙平](https://github.com/alesapin)) +- 修正了双重移动可能会损坏原始部分的错误。 这是相关的,如果你使用 `ALTER TABLE MOVE` [\#8680](https://github.com/ClickHouse/ClickHouse/pull/8680) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 允许 `interval` 用于正确解析的标识符,而无需反引号。 当一个查询不能被执行,即使固定的问题 `interval` 标识符用反引号或双引号括起来。 这修复 [\#9124](https://github.com/ClickHouse/ClickHouse/issues/9124). [\#9142](https://github.com/ClickHouse/ClickHouse/pull/9142) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了模糊测试和不正确的行为 `bitTestAll`/`bitTestAny` 功能。 [\#9143](https://github.com/ClickHouse/ClickHouse/pull/9143) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复可能的崩溃/错误的行数 `LIMIT n WITH TIES` 当有很多行等于第n行时。 [\#9464](https://github.com/ClickHouse/ClickHouse/pull/9464) ([tavplubix](https://github.com/tavplubix)) +- 使用enabled编写的部件修复突变 `insert_quorum`. [\#9463](https://github.com/ClickHouse/ClickHouse/pull/9463) ([阿利沙平](https://github.com/alesapin)) +- 修复数据竞赛破坏 `Poco::HTTPServer`. 当服务器启动并立即关闭时,可能会发生这种情况。 [\#9468](https://github.com/ClickHouse/ClickHouse/pull/9468) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复运行时显示误导性错误消息的错误 `SHOW CREATE TABLE a_table_that_does_not_exist`. [\#8899](https://github.com/ClickHouse/ClickHouse/pull/8899) ([achulkov2](https://github.com/achulkov2)) +- 固定 `Parameters are out of bound` 例外在一些罕见的情况下,当我们在一个常数 `SELECT` 条款时,我们有一个 `ORDER BY` 和一个 `LIMIT` 条款 [\#8892](https://github.com/ClickHouse/ClickHouse/pull/8892) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 修复突变定稿,当已经完成突变可以有状态 `is_done=0`. [\#9217](https://github.com/ClickHouse/ClickHouse/pull/9217) ([阿利沙平](https://github.com/alesapin)) +- 防止执行 `ALTER ADD INDEX` 对于旧语法的MergeTree表,因为它不起作用。 [\#8822](https://github.com/ClickHouse/ClickHouse/pull/8822) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 在服务器启动时不要访问表,这 `LIVE VIEW` 取决于,所以服务器将能够启动。 也删除 `LIVE VIEW` 分离时的依赖关系 `LIVE VIEW`. `LIVE VIEW` 是一个实验特征。 [\#8824](https://github.com/ClickHouse/ClickHouse/pull/8824) ([tavplubix](https://github.com/tavplubix)) +- 修复可能的段错误 `MergeTreeRangeReader`,同时执行 `PREWHERE`. [\#9106](https://github.com/ClickHouse/ClickHouse/pull/9106) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复与列Ttl可能不匹配的校验和。 [\#9451](https://github.com/ClickHouse/ClickHouse/pull/9451) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修正了一个错误,当部分没有被移动的情况下,只有一个卷的TTL规则在后台。 [\#8672](https://github.com/ClickHouse/ClickHouse/pull/8672) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修正了这个问题 `Method createColumn() is not implemented for data type Set`. 这修复 [\#7799](https://github.com/ClickHouse/ClickHouse/issues/7799). [\#8674](https://github.com/ClickHouse/ClickHouse/pull/8674) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在我们将尝试更频繁地完成突变。 [\#9427](https://github.com/ClickHouse/ClickHouse/pull/9427) ([阿利沙平](https://github.com/alesapin)) +- 修复 `intDiv` 减一个常数 [\#9351](https://github.com/ClickHouse/ClickHouse/pull/9351) ([hcz](https://github.com/hczhcz)) +- 修复可能的竞争条件 `BlockIO`. [\#9356](https://github.com/ClickHouse/ClickHouse/pull/9356) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复尝试使用/删除时导致服务器终止的错误 `Kafka` 使用错误的参数创建的表。 [\#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) ([filimonov](https://github.com/filimonov)) +- 增加了解决方法,如果操作系统返回错误的结果 `timer_create` 功能。 [\#8837](https://github.com/ClickHouse/ClickHouse/pull/8837) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在使用固定错误 `min_marks_for_seek` 参数。 修复了分布式表中没有分片键时的错误消息,并且我们尝试跳过未使用的分片。 [\#8908](https://github.com/ClickHouse/ClickHouse/pull/8908) ([Azat Khuzhin](https://github.com/azat)) + +#### 改进 {#improvement} + +- 执行 `ALTER MODIFY/DROP` 对突变的顶部查询 `ReplicatedMergeTree*` 引擎家族. 现在 `ALTERS` 仅在元数据更新阶段阻止,之后不阻止。 [\#8701](https://github.com/ClickHouse/ClickHouse/pull/8701) ([阿利沙平](https://github.com/alesapin)) +- 添加重写交叉到内部连接的能力 `WHERE` 包含未编译名称的部分。 [\#9512](https://github.com/ClickHouse/ClickHouse/pull/9512) ([Artem Zuikov](https://github.com/4ertus2)) +- 赂眉露\>\> `SHOW TABLES` 和 `SHOW DATABASES` 查询支持 `WHERE` 表达式和 `FROM`/`IN` [\#9076](https://github.com/ClickHouse/ClickHouse/pull/9076) ([sundyli](https://github.com/sundy-li)) +- 添加了一个设置 `deduplicate_blocks_in_dependent_materialized_views`. [\#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) ([urykhy](https://github.com/urykhy)) +- 在最近的变化之后,MySQL客户端开始以十六进制打印二进制字符串,从而使它们不可读 ([\#9032](https://github.com/ClickHouse/ClickHouse/issues/9032)). ClickHouse中的解决方法是将字符串列标记为UTF-8,这并不总是如此,但通常是这种情况。 [\#9079](https://github.com/ClickHouse/ClickHouse/pull/9079) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 添加对字符串和FixedString键的支持 `sumMap` [\#8903](https://github.com/ClickHouse/ClickHouse/pull/8903) ([Baudouin Giard](https://github.com/bgiard)) +- 支持SummingMergeTree地图中的字符串键 [\#8933](https://github.com/ClickHouse/ClickHouse/pull/8933) ([Baudouin Giard](https://github.com/bgiard)) +- 即使线程已抛出异常,也向线程池发送线程终止信号 [\#8736](https://github.com/ClickHouse/ClickHouse/pull/8736) ([丁香飞](https://github.com/dingxiangfei2009)) +- 允许设置 `query_id` 在 `clickhouse-benchmark` [\#9416](https://github.com/ClickHouse/ClickHouse/pull/9416) ([安东\*波波夫](https://github.com/CurtizJ)) +- 不要让奇怪的表达 `ALTER TABLE ... PARTITION partition` 查询。 这个地址 [\#7192](https://github.com/ClickHouse/ClickHouse/issues/7192) [\#8835](https://github.com/ClickHouse/ClickHouse/pull/8835) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 表 `system.table_engines` 现在提供有关功能支持的信息(如 `supports_ttl` 或 `supports_sort_order`). [\#8830](https://github.com/ClickHouse/ClickHouse/pull/8830) ([Max Akhmedov](https://github.com/zlobober)) +- 启用 `system.metric_log` 默认情况下。 它将包含具有ProfileEvents值的行,CurrentMetrics收集与 “collect\_interval\_milliseconds” 间隔(默认情况下为一秒)。 该表非常小(通常以兆字节为单位),默认情况下收集此数据是合理的。 [\#9225](https://github.com/ClickHouse/ClickHouse/pull/9225) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries. Fixes [\#6964](https://github.com/ClickHouse/ClickHouse/issues/6964) [\#8874](https://github.com/ClickHouse/ClickHouse/pull/8874) ([伊万](https://github.com/abyss7)) +- 现在是暂时的 `LIVE VIEW` 创建者 `CREATE LIVE VIEW name WITH TIMEOUT [42] ...` 而不是 `CREATE TEMPORARY LIVE VIEW ...`,因为以前的语法不符合 `CREATE TEMPORARY TABLE ...` [\#9131](https://github.com/ClickHouse/ClickHouse/pull/9131) ([tavplubix](https://github.com/tavplubix)) +- 添加text\_log。级别配置参数,以限制进入 `system.text_log` 表 [\#8809](https://github.com/ClickHouse/ClickHouse/pull/8809) ([Azat Khuzhin](https://github.com/azat)) +- 允许根据TTL规则将下载的部分放入磁盘/卷 [\#8598](https://github.com/ClickHouse/ClickHouse/pull/8598) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 对于外部MySQL字典,允许将MySQL连接池共同化为 “share” 他们在字典中。 此选项显着减少到MySQL服务器的连接数。 [\#9409](https://github.com/ClickHouse/ClickHouse/pull/9409) ([Clément Rodriguez](https://github.com/clemrodriguez)) +- 显示分位数的最近查询执行时间 `clickhouse-benchmark` 输出而不是插值值。 最好显示与某些查询的执行时间相对应的值。 [\#8712](https://github.com/ClickHouse/ClickHouse/pull/8712) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 可以在将数据插入到Kafka时为消息添加密钥和时间戳。 修复 [\#7198](https://github.com/ClickHouse/ClickHouse/issues/7198) [\#8969](https://github.com/ClickHouse/ClickHouse/pull/8969) ([filimonov](https://github.com/filimonov)) +- 如果服务器从终端运行,请按颜色突出显示线程号,查询id和日志优先级。 这是为了提高开发人员相关日志消息的可读性。 [\#8961](https://github.com/ClickHouse/ClickHouse/pull/8961) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更好的异常消息,同时加载表 `Ordinary` 数据库。 [\#9527](https://github.com/ClickHouse/ClickHouse/pull/9527) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 执行 `arraySlice` 对于具有聚合函数状态的数组。 这修复 [\#9388](https://github.com/ClickHouse/ClickHouse/issues/9388) [\#9391](https://github.com/ClickHouse/ClickHouse/pull/9391) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 允许在in运算符的右侧使用常量函数和常量数组。 [\#8813](https://github.com/ClickHouse/ClickHouse/pull/8813) ([安东\*波波夫](https://github.com/CurtizJ)) +- 如果在获取系统数据时发生了zookeeper异常。副本,将其显示在单独的列中。 这实现了 [\#9137](https://github.com/ClickHouse/ClickHouse/issues/9137) [\#9138](https://github.com/ClickHouse/ClickHouse/pull/9138) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 原子删除destroy上的MergeTree数据部分。 [\#8402](https://github.com/ClickHouse/ClickHouse/pull/8402) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 支持分布式表的行级安全性。 [\#8926](https://github.com/ClickHouse/ClickHouse/pull/8926) ([伊万](https://github.com/abyss7)) +- Now we recognize suffix (like KB, KiB…) in settings values. [\#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 在构建大型连接的结果时防止内存不足。 [\#8637](https://github.com/ClickHouse/ClickHouse/pull/8637) ([Artem Zuikov](https://github.com/4ertus2)) +- 在交互模式下为建议添加群集名称 `clickhouse-client`. [\#8709](https://github.com/ClickHouse/ClickHouse/pull/8709) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries [\#8820](https://github.com/ClickHouse/ClickHouse/pull/8820) ([伊万](https://github.com/abyss7)) +- 添加列 `exception_code` 在 `system.query_log` 桌子 [\#8770](https://github.com/ClickHouse/ClickHouse/pull/8770) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 在端口上启用MySQL兼容服务器 `9004` 在默认服务器配置文件中。 在配置的例子固定密码生成命令。 [\#8771](https://github.com/ClickHouse/ClickHouse/pull/8771) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 如果文件系统是只读的,请防止在关闭时中止。 这修复 [\#9094](https://github.com/ClickHouse/ClickHouse/issues/9094) [\#9100](https://github.com/ClickHouse/ClickHouse/pull/9100) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 当HTTP POST查询中需要长度时,更好的异常消息。 [\#9453](https://github.com/ClickHouse/ClickHouse/pull/9453) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加 `_path` 和 `_file` 虚拟列 `HDFS` 和 `File` 发动机和 `hdfs` 和 `file` 表函数 [\#8489](https://github.com/ClickHouse/ClickHouse/pull/8489) ([Olga Khvostikova](https://github.com/stavrolia)) +- 修复错误 `Cannot find column` 同时插入到 `MATERIALIZED VIEW` 在情况下,如果新列被添加到视图的内部表。 [\#8766](https://github.com/ClickHouse/ClickHouse/pull/8766) [\#8788](https://github.com/ClickHouse/ClickHouse/pull/8788) ([vzakaznikov](https://github.com/vzakaznikov)) [\#8788](https://github.com/ClickHouse/ClickHouse/issues/8788) [\#8806](https://github.com/ClickHouse/ClickHouse/pull/8806) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) [\#8803](https://github.com/ClickHouse/ClickHouse/pull/8803) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 通过最终更新后发送进度(如日志)修复本机客户端-服务器协议的进度。 这可能仅与使用本机协议的某些第三方工具相关。 [\#9495](https://github.com/ClickHouse/ClickHouse/pull/9495) ([Azat Khuzhin](https://github.com/azat)) +- 添加系统指标跟踪使用MySQL协议的客户端连接数 ([\#9013](https://github.com/ClickHouse/ClickHouse/issues/9013)). [\#9015](https://github.com/ClickHouse/ClickHouse/pull/9015) ([尤金\*克里莫夫](https://github.com/Slach)) +- 从现在开始,HTTP响应将有 `X-ClickHouse-Timezone` 标题设置为相同的时区值 `SELECT timezone()` 会报告。 [\#9493](https://github.com/ClickHouse/ClickHouse/pull/9493) ([Denis Glazachev](https://github.com/traceon)) + +#### 性能改进 {#performance-improvement} + +- 使用IN提高分析指标的性能 [\#9261](https://github.com/ClickHouse/ClickHouse/pull/9261) ([安东\*波波夫](https://github.com/CurtizJ)) +- 逻辑函数+代码清理更简单,更有效的代码。 跟进到 [\#8718](https://github.com/ClickHouse/ClickHouse/issues/8718) [\#8728](https://github.com/ClickHouse/ClickHouse/pull/8728) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 整体性能改善(范围为5%。.通过确保使用C++20功能进行更严格的别名处理,对于受影响的查询来说,这是200%)。 [\#9304](https://github.com/ClickHouse/ClickHouse/pull/9304) ([阿莫斯鸟](https://github.com/amosbird)) +- 比较函数的内部循环更严格的别名。 [\#9327](https://github.com/ClickHouse/ClickHouse/pull/9327) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 对于算术函数的内部循环更严格的别名。 [\#9325](https://github.com/ClickHouse/ClickHouse/pull/9325) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- ColumnVector::replicate()的实现速度快约3倍,通过该实现ColumnConst::convertToFullColumn()。 在实现常数时,也将在测试中有用。 [\#9293](https://github.com/ClickHouse/ClickHouse/pull/9293) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 另一个小的性能改进 `ColumnVector::replicate()` (这加快了 `materialize` 函数和高阶函数),甚至进一步改进 [\#9293](https://github.com/ClickHouse/ClickHouse/issues/9293) [\#9442](https://github.com/ClickHouse/ClickHouse/pull/9442) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 改进的性能 `stochasticLinearRegression` 聚合函数。 此补丁由英特尔贡献。 [\#8652](https://github.com/ClickHouse/ClickHouse/pull/8652) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 提高性能 `reinterpretAsFixedString` 功能。 [\#9342](https://github.com/ClickHouse/ClickHouse/pull/9342) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 不要向客户端发送块 `Null` 处理器管道中的格式。 [\#8797](https://github.com/ClickHouse/ClickHouse/pull/8797) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) [\#8767](https://github.com/ClickHouse/ClickHouse/pull/8767) ([Alexander Kuzmenkov](https://github.com/akuzm)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement} + +- 异常处理现在可以在适用于Linux的Windows子系统上正常工作。 看https://github.com/ClickHouse-Extras/libunwind/pull/3 这修复 [\#6480](https://github.com/ClickHouse/ClickHouse/issues/6480) [\#9564](https://github.com/ClickHouse/ClickHouse/pull/9564) ([sobolevsv](https://github.com/sobolevsv)) +- 替换 `readline` 与 `replxx` 对于在交互式线编辑 `clickhouse-client` [\#8416](https://github.com/ClickHouse/ClickHouse/pull/8416) ([伊万](https://github.com/abyss7)) +- 在FunctionsComparison中更好的构建时间和更少的模板实例化。 [\#9324](https://github.com/ClickHouse/ClickHouse/pull/9324) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了与集成 `clang-tidy` 在线人 另请参阅 [\#6044](https://github.com/ClickHouse/ClickHouse/issues/6044) [\#9566](https://github.com/ClickHouse/ClickHouse/pull/9566) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在我们使用CI链接ClickHouse `lld` 即使是 `gcc`. [\#9049](https://github.com/ClickHouse/ClickHouse/pull/9049) ([阿利沙平](https://github.com/alesapin)) +- 允许随机线程调度和插入毛刺时 `THREAD_FUZZER_*` 设置环境变量。 这有助于测试。 [\#9459](https://github.com/ClickHouse/ClickHouse/pull/9459) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在无状态测试中启用安全套接字 [\#9288](https://github.com/ClickHouse/ClickHouse/pull/9288) ([tavplubix](https://github.com/tavplubix)) +- 使SPLIT\_SHARED\_LIBRARIES=OFF更强大 [\#9156](https://github.com/ClickHouse/ClickHouse/pull/9156) ([Azat Khuzhin](https://github.com/azat)) +- 赂眉露\>\> “performance\_introspection\_and\_logging” 测试可靠的随机服务器卡住。 这可能发生在CI环境中。 另请参阅 [\#9515](https://github.com/ClickHouse/ClickHouse/issues/9515) [\#9528](https://github.com/ClickHouse/ClickHouse/pull/9528) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在样式检查中验证XML。 [\#9550](https://github.com/ClickHouse/ClickHouse/pull/9550) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了测试中的竞争条件 `00738_lock_for_inner_table`. 这个测试依赖于睡眠。 [\#9555](https://github.com/ClickHouse/ClickHouse/pull/9555) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除类型的性能测试 `once`. 这是在统计比较模式下运行所有性能测试(更可靠)所需的。 [\#9557](https://github.com/ClickHouse/ClickHouse/pull/9557) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了算术函数的性能测试。 [\#9326](https://github.com/ClickHouse/ClickHouse/pull/9326) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了性能测试 `sumMap` 和 `sumMapWithOverflow` 聚合函数。 后续行动 [\#8933](https://github.com/ClickHouse/ClickHouse/issues/8933) [\#8947](https://github.com/ClickHouse/ClickHouse/pull/8947) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 通过样式检查确保错误代码的样式。 [\#9370](https://github.com/ClickHouse/ClickHouse/pull/9370) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 为测试历史添加脚本。 [\#8796](https://github.com/ClickHouse/ClickHouse/pull/8796) ([阿利沙平](https://github.com/alesapin)) +- 添加GCC警告 `-Wsuggest-override` 找到并修复所有地方 `override` 必须使用关键字。 [\#8760](https://github.com/ClickHouse/ClickHouse/pull/8760) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +- 在Mac OS X下忽略弱符号,因为它必须被定义 [\#9538](https://github.com/ClickHouse/ClickHouse/pull/9538) ([已删除用户](https://github.com/ghost)) +- 规范性能测试中某些查询的运行时间。 这是在准备在比较模式下运行所有性能测试时完成的。 [\#9565](https://github.com/ClickHouse/ClickHouse/pull/9565) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复一些测试,以支持pytest与查询测试 [\#9062](https://github.com/ClickHouse/ClickHouse/pull/9062) ([伊万](https://github.com/abyss7)) +- 使用MSan在生成中启用SSL,因此在运行无状态测试时,服务器不会在启动时失败 [\#9531](https://github.com/ClickHouse/ClickHouse/pull/9531) ([tavplubix](https://github.com/tavplubix)) +- 修复测试结果中的数据库替换 [\#9384](https://github.com/ClickHouse/ClickHouse/pull/9384) ([Ilya Yatsishin](https://github.com/qoega)) +- 针对其他平台构建修复程序 [\#9381](https://github.com/ClickHouse/ClickHouse/pull/9381) ([proller](https://github.com/proller)) [\#8755](https://github.com/ClickHouse/ClickHouse/pull/8755) ([proller](https://github.com/proller)) [\#8631](https://github.com/ClickHouse/ClickHouse/pull/8631) ([proller](https://github.com/proller)) +- 将磁盘部分添加到无状态复盖率测试docker映像 [\#9213](https://github.com/ClickHouse/ClickHouse/pull/9213) ([帕维尔\*科瓦连科](https://github.com/Jokser)) +- 使用GRPC构建时,摆脱源代码树中的文件 [\#9588](https://github.com/ClickHouse/ClickHouse/pull/9588) ([阿莫斯鸟](https://github.com/amosbird)) +- 通过从上下文中删除SessionCleaner来缩短构建时间。 让SessionCleaner的代码更简单。 [\#9232](https://github.com/ClickHouse/ClickHouse/pull/9232) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新了clickhouse-test脚本中挂起查询的检查 [\#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 从存储库中删除了一些无用的文件。 [\#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更改类型的数学perftests从 `once` 到 `loop`. [\#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html) +- 抑制MSan下的一些测试失败。 [\#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 加速 “exception while insert” 测试 此测试通常在具有复盖率的调试版本中超时。 [\#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新 `libcxx` 和 `libcxxabi` 为了主人 在准备 [\#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [\#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复flacky测试 `00910_zookeeper_test_alter_compression_codecs`. [\#9525](https://github.com/ClickHouse/ClickHouse/pull/9525) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 清理重复的链接器标志。 确保链接器不会查找意想不到的符号。 [\#9433](https://github.com/ClickHouse/ClickHouse/pull/9433) ([阿莫斯鸟](https://github.com/amosbird)) +- 添加 `clickhouse-odbc` 驱动程序进入测试图像。 这允许通过自己的ODBC驱动程序测试ClickHouse与ClickHouse的交互。 [\#9348](https://github.com/ClickHouse/ClickHouse/pull/9348) ([filimonov](https://github.com/filimonov)) +- 修复单元测试中的几个错误。 [\#9047](https://github.com/ClickHouse/ClickHouse/pull/9047) ([阿利沙平](https://github.com/alesapin)) +- 启用 `-Wmissing-include-dirs` GCC警告消除所有不存在的包括-主要是由于CMake脚本错误 [\#8704](https://github.com/ClickHouse/ClickHouse/pull/8704) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +- 描述查询探查器无法工作的原因。 这是用于 [\#9049](https://github.com/ClickHouse/ClickHouse/issues/9049) [\#9144](https://github.com/ClickHouse/ClickHouse/pull/9144) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 将OpenSSL更新到上游主机。 修复了TLS连接可能会失败并显示消息的问题 `OpenSSL SSL_read: error:14094438:SSL routines:ssl3_read_bytes:tlsv1 alert internal error` 和 `SSL Exception: error:2400006E:random number generator::error retrieving entropy`. 该问题出现在版本20.1中。 [\#8956](https://github.com/ClickHouse/ClickHouse/pull/8956) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新服务器的Dockerfile [\#8893](https://github.com/ClickHouse/ClickHouse/pull/8893) ([Ilya Mazaev](https://github.com/ne-ray)) +- Build-gcc-from-sources脚本中的小修复 [\#8774](https://github.com/ClickHouse/ClickHouse/pull/8774) ([Michael Nacharov](https://github.com/mnach)) +- 替换 `numbers` 到 `zeros` 在perftests其中 `number` 不使用列。 这将导致更干净的测试结果。 [\#9600](https://github.com/ClickHouse/ClickHouse/pull/9600) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复列构造函数中使用initializer\_list时堆栈溢出问题。 [\#9367](https://github.com/ClickHouse/ClickHouse/pull/9367) ([已删除用户](https://github.com/ghost)) +- 将librdkafka升级到v1.3.0。 启用bund绑 `rdkafka` 和 `gsasl` mac OS X上的库 [\#9000](https://github.com/ClickHouse/ClickHouse/pull/9000) ([安德鲁Onyshchuk](https://github.com/oandrew)) +- 在GCC9.2.0上构建修复程序 [\#9306](https://github.com/ClickHouse/ClickHouse/pull/9306) ([vxider](https://github.com/Vxider)) + +## 碌莽禄.拢.0755-88888888 {#clickhouse-release-v20-1} + +### ClickHouse版本v20.1.8.41,2020-03-20 {#clickhouse-release-v20-1-8-41-2020-03-20} + +#### 错误修复 {#bug-fix-3} + +- 修复可能的永久性 `Cannot schedule a task` 错误(由于未处理的异常 `ParallelAggregatingBlockInputStream::Handler::onFinish/onFinishThread`). 这修复 [\#6833](https://github.com/ClickHouse/ClickHouse/issues/6833). [\#9154](https://github.com/ClickHouse/ClickHouse/pull/9154) ([Azat Khuzhin](https://github.com/azat)) +- 修复过多的内存消耗 `ALTER` 查询(突变)。 这修复 [\#9533](https://github.com/ClickHouse/ClickHouse/issues/9533) 和 [\#9670](https://github.com/ClickHouse/ClickHouse/issues/9670). [\#9754](https://github.com/ClickHouse/ClickHouse/pull/9754) ([阿利沙平](https://github.com/alesapin)) +- 修复外部字典DDL中反引用的错误。 这修复 [\#9619](https://github.com/ClickHouse/ClickHouse/issues/9619). [\#9734](https://github.com/ClickHouse/ClickHouse/pull/9734) ([阿利沙平](https://github.com/alesapin)) + +### ClickHouse释放v20.1.7.38,2020-03-18 {#clickhouse-release-v20-1-7-38-2020-03-18} + +#### 错误修复 {#bug-fix-4} + +- 修正了不正确的内部函数名称 `sumKahan` 和 `sumWithOverflow`. 在远程查询中使用此函数时,我会导致异常。 [\#9636](https://github.com/ClickHouse/ClickHouse/pull/9636) ([Azat Khuzhin](https://github.com/azat)). 这个问题是在所有ClickHouse版本。 +- 允许 `ALTER ON CLUSTER` 的 `Distributed` 具有内部复制的表。 这修复 [\#3268](https://github.com/ClickHouse/ClickHouse/issues/3268). [\#9617](https://github.com/ClickHouse/ClickHouse/pull/9617) ([shinoi2](https://github.com/shinoi2)). 这个问题是在所有ClickHouse版本。 +- 修复可能的异常 `Size of filter doesn't match size of column` 和 `Invalid number of rows in Chunk` 在 `MergeTreeRangeReader`. 它们可能在执行时出现 `PREWHERE` 在某些情况下。 修复 [\#9132](https://github.com/ClickHouse/ClickHouse/issues/9132). [\#9612](https://github.com/ClickHouse/ClickHouse/pull/9612) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复了这个问题:如果你编写一个简单的算术表达式,则不会保留时区 `time + 1` (与像这样的表达形成对比 `time + INTERVAL 1 SECOND`). 这修复 [\#5743](https://github.com/ClickHouse/ClickHouse/issues/5743). [\#9323](https://github.com/ClickHouse/ClickHouse/pull/9323) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)). 这个问题是在所有ClickHouse版本。 +- 现在不可能创建或添加具有简单循环别名的列,如 `a DEFAULT b, b DEFAULT a`. [\#9603](https://github.com/ClickHouse/ClickHouse/pull/9603) ([阿利沙平](https://github.com/alesapin)) +- 修复了base64编码值末尾填充格式错误的问题。 更新base64库。 这修复 [\#9491](https://github.com/ClickHouse/ClickHouse/issues/9491),关闭 [\#9492](https://github.com/ClickHouse/ClickHouse/issues/9492) [\#9500](https://github.com/ClickHouse/ClickHouse/pull/9500) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复数据竞赛破坏 `Poco::HTTPServer`. 当服务器启动并立即关闭时,可能会发生这种情况。 [\#9468](https://github.com/ClickHouse/ClickHouse/pull/9468) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复可能的崩溃/错误的行数 `LIMIT n WITH TIES` 当有很多行等于第n行时。 [\#9464](https://github.com/ClickHouse/ClickHouse/pull/9464) ([tavplubix](https://github.com/tavplubix)) +- 修复与列Ttl可能不匹配的校验和。 [\#9451](https://github.com/ClickHouse/ClickHouse/pull/9451) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复当用户尝试崩溃 `ALTER MODIFY SETTING` 对于老格式化 `MergeTree` 表引擎家族. [\#9435](https://github.com/ClickHouse/ClickHouse/pull/9435) ([阿利沙平](https://github.com/alesapin)) +- 现在我们将尝试更频繁地完成突变。 [\#9427](https://github.com/ClickHouse/ClickHouse/pull/9427) ([阿利沙平](https://github.com/alesapin)) +- 修复引入的复制协议不兼容 [\#8598](https://github.com/ClickHouse/ClickHouse/issues/8598). [\#9412](https://github.com/ClickHouse/ClickHouse/pull/9412) ([阿利沙平](https://github.com/alesapin)) +- 修复数组类型的bloom\_filter索引的not(has())。 [\#9407](https://github.com/ClickHouse/ClickHouse/pull/9407) ([achimbab](https://github.com/achimbab)) +- 固定的行为 `match` 和 `extract` 当干草堆有零字节的函数。 当干草堆不变时,这种行为是错误的。 这修复 [\#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) [\#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) [\#9345](https://github.com/ClickHouse/ClickHouse/pull/9345) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-1} + +- 异常处理现在可以在适用于Linux的Windows子系统上正常工作。 看https://github.com/ClickHouse-Extras/libunwind/pull/3 这修复 [\#6480](https://github.com/ClickHouse/ClickHouse/issues/6480) [\#9564](https://github.com/ClickHouse/ClickHouse/pull/9564) ([sobolevsv](https://github.com/sobolevsv)) + +### ClickHouse释放v20.1.6.30,2020-03-05 {#clickhouse-release-v20-1-6-30-2020-03-05} + +#### 错误修复 {#bug-fix-5} + +- 修复压缩时的数据不兼容 `T64` 编解ec + [\#9039](https://github.com/ClickHouse/ClickHouse/pull/9039) [(abyss7)](https://github.com/abyss7) +- 在一个线程中从MergeTree表中读取时修复范围顺序。 修复 [\#8964](https://github.com/ClickHouse/ClickHouse/issues/8964). + [\#9050](https://github.com/ClickHouse/ClickHouse/pull/9050) [(CurtizJ))](https://github.com/CurtizJ) +- 修复可能的段错误 `MergeTreeRangeReader`,同时执行 `PREWHERE`. 修复 [\#9064](https://github.com/ClickHouse/ClickHouse/issues/9064). + [\#9106](https://github.com/ClickHouse/ClickHouse/pull/9106) [(CurtizJ))](https://github.com/CurtizJ) +- 修复 `reinterpretAsFixedString` 返回 `FixedString` 而不是 `String`. + [\#9052](https://github.com/ClickHouse/ClickHouse/pull/9052) [(oandrew)](https://github.com/oandrew) +- 修复 `joinGet` 使用可为空的返回类型。 修复 [\#8919](https://github.com/ClickHouse/ClickHouse/issues/8919) + [\#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) [(amosbird)](https://github.com/amosbird) +- 修复bittestall/bitTestAny函数的模糊测试和不正确的行为。 + [\#9143](https://github.com/ClickHouse/ClickHouse/pull/9143) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 修复当干草堆有零字节时匹配和提取函数的行为。 当干草堆不变时,这种行为是错误的。 修复 [\#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) + [\#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 当使用非严格单调函数索引时,固定执行反转谓词。 修复 [\#9034](https://github.com/ClickHouse/ClickHouse/issues/9034) + [\#9223](https://github.com/ClickHouse/ClickHouse/pull/9223) [(Akazz)](https://github.com/Akazz) +- 允许重写 `CROSS` 到 `INNER JOIN` 如果有 `[NOT] LIKE` 操作员在 `WHERE` 科。 修复 [\#9191](https://github.com/ClickHouse/ClickHouse/issues/9191) + [\#9229](https://github.com/ClickHouse/ClickHouse/pull/9229) [(4ertus2)](https://github.com/4ertus2) +- 允许使用日志引擎的表中的第一列成为别名。 + [\#9231](https://github.com/ClickHouse/ClickHouse/pull/9231) [(abyss7)](https://github.com/abyss7) +- 允许逗号加入 `IN()` 进去 修复 [\#7314](https://github.com/ClickHouse/ClickHouse/issues/7314). + [\#9251](https://github.com/ClickHouse/ClickHouse/pull/9251) [(4ertus2)](https://github.com/4ertus2) +- 改进 `ALTER MODIFY/ADD` 查询逻辑。 现在你不能 `ADD` 不带类型的列, `MODIFY` 默认表达式不改变列的类型和 `MODIFY` type不会丢失默认表达式值。 修复 [\#8669](https://github.com/ClickHouse/ClickHouse/issues/8669). + [\#9227](https://github.com/ClickHouse/ClickHouse/pull/9227) [(alesapin)](https://github.com/alesapin) +- 修复突变最终确定,当已经完成突变时可以具有状态is\_done=0。 + [\#9217](https://github.com/ClickHouse/ClickHouse/pull/9217) [(alesapin)](https://github.com/alesapin) +- 碌莽禄Support: “Processors” 管道系统.数字和系统.numbers\_mt 这也修复了错误时 `max_execution_time` 不被尊重。 + [\#7796](https://github.com/ClickHouse/ClickHouse/pull/7796) [(KochetovNicolai)](https://github.com/KochetovNicolai) +- 修复错误的计数 `DictCacheKeysRequestedFound` 公制。 + [\#9411](https://github.com/ClickHouse/ClickHouse/pull/9411) [(nikitamikhaylov)](https://github.com/nikitamikhaylov) +- 添加了对存储策略的检查 `ATTACH PARTITION FROM`, `REPLACE PARTITION`, `MOVE TO TABLE` 否则可能使部分数据在重新启动后无法访问,并阻止ClickHouse启动。 + [\#9383](https://github.com/ClickHouse/ClickHouse/pull/9383) [(excitoon)](https://github.com/excitoon) +- 在固定的瑞银报告 `MergeTreeIndexSet`. 这修复 [\#9250](https://github.com/ClickHouse/ClickHouse/issues/9250) + [\#9365](https://github.com/ClickHouse/ClickHouse/pull/9365) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 在BlockIO中修复可能的数据集。 + [\#9356](https://github.com/ClickHouse/ClickHouse/pull/9356) [(KochetovNicolai)](https://github.com/KochetovNicolai) +- 支持 `UInt64` 在JSON相关函数中不适合Int64的数字。 更新 `SIMDJSON` 为了主人 这修复 [\#9209](https://github.com/ClickHouse/ClickHouse/issues/9209) + [\#9344](https://github.com/ClickHouse/ClickHouse/pull/9344) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 如果将数据目录挂载到单独的设备,则修复可用空间量计算不正确时的问题。 对于默认磁盘,计算数据子目录的可用空间。 这修复 [\#7441](https://github.com/ClickHouse/ClickHouse/issues/7441) + [\#9257](https://github.com/ClickHouse/ClickHouse/pull/9257) [(米尔布)](https://github.com/millb) +- 修复TLS连接可能会失败并显示消息时的问题 `OpenSSL SSL_read: error:14094438:SSL routines:ssl3_read_bytes:tlsv1 alert internal error and SSL Exception: error:2400006E:random number generator::error retrieving entropy.` 将OpenSSL更新到上游主机。 + [\#8956](https://github.com/ClickHouse/ClickHouse/pull/8956) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 执行时 `CREATE` 查询,在存储引擎参数中折叠常量表达式。 将空数据库名称替换为当前数据库。 修复 [\#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [\#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). 还修复了ClickHouseDictionarySource中检查本地地址。 + [\#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) +- 修复段错误 `StorageMerge`,从StorageFile读取时可能发生。 + [\#9387](https://github.com/ClickHouse/ClickHouse/pull/9387) [(tabplubix)](https://github.com/tavplubix) +- 防止丢失数据 `Kafka` 在极少数情况下,在读取后缀之后但在提交之前发生异常。 修复 [\#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). 相关: [\#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) + [\#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(菲利蒙诺夫)](https://github.com/filimonov) +- 修复尝试使用/删除时导致服务器终止的错误 `Kafka` 使用错误的参数创建的表。 修复 [\#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). 结合 [\#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). + [\#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(菲利蒙诺夫)](https://github.com/filimonov) + +#### 新功能 {#new-feature-1} + +- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 这个新功能是由Altinity的特殊要求添加到错误修正版本中的。 + [\#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) + +### ClickHouse版本v20.1.2.4,2020-01-22 {#clickhouse-release-v20-1-2-4-2020-01-22} + +#### 向后不兼容的更改 {#backward-incompatible-change-1} + +- 使设置 `merge_tree_uniform_read_distribution` 过时了 服务器仍可识别此设置,但无效。 [\#8308](https://github.com/ClickHouse/ClickHouse/pull/8308) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更改函数的返回类型 `greatCircleDistance` 到 `Float32` 因为现在计算的结果是 `Float32`. [\#7993](https://github.com/ClickHouse/ClickHouse/pull/7993) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在预计查询参数表示为 “escaped” 格式。 例如,要传递字符串 `ab` 你必须写 `a\tb` 或 `a\b` 并分别, `a%5Ctb` 或 `a%5C%09b` 在URL中。 这是需要添加传递NULL作为的可能性 `\N`. 这修复 [\#7488](https://github.com/ClickHouse/ClickHouse/issues/7488). [\#8517](https://github.com/ClickHouse/ClickHouse/pull/8517) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 启用 `use_minimalistic_part_header_in_zookeeper` 设置 `ReplicatedMergeTree` 默认情况下。 这将显着减少存储在ZooKeeper中的数据量。 自19.1版本以来支持此设置,我们已经在多个服务的生产中使用它,半年以上没有任何问题。 如果您有机会降级到19.1以前的版本,请禁用此设置。 [\#6850](https://github.com/ClickHouse/ClickHouse/pull/6850) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 数据跳过索引已准备就绪并默认启用。 设置 `allow_experimental_data_skipping_indices`, `allow_experimental_cross_to_join_conversion` 和 `allow_experimental_multiple_joins_emulation` 现在已经过时,什么也不做。 [\#7974](https://github.com/ClickHouse/ClickHouse/pull/7974) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加新建 `ANY JOIN` 逻辑 `StorageJoin` 符合 `JOIN` 操作。 要在不改变行为的情况下进行升级,您需要添加 `SETTINGS any_join_distinct_right_table_keys = 1` 引擎联接表元数据或在升级后重新创建这些表。 [\#8400](https://github.com/ClickHouse/ClickHouse/pull/8400) ([Artem Zuikov](https://github.com/4ertus2)) +- 要求重新启动服务器以应用日志记录配置中的更改。 这是一种临时解决方法,可以避免服务器将日志记录到已删除的日志文件中的错误(请参阅 [\#8696](https://github.com/ClickHouse/ClickHouse/issues/8696)). [\#8707](https://github.com/ClickHouse/ClickHouse/pull/8707) ([Alexander Kuzmenkov](https://github.com/akuzm)) + +#### 新功能 {#new-feature-2} + +- 添加了有关部件路径的信息 `system.merges`. [\#8043](https://github.com/ClickHouse/ClickHouse/pull/8043) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 添加执行能力 `SYSTEM RELOAD DICTIONARY` 查询中 `ON CLUSTER` 模式 [\#8288](https://github.com/ClickHouse/ClickHouse/pull/8288) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 添加执行能力 `CREATE DICTIONARY` 查询中 `ON CLUSTER` 模式 [\#8163](https://github.com/ClickHouse/ClickHouse/pull/8163) ([阿利沙平](https://github.com/alesapin)) +- 现在用户的个人资料 `users.xml` 可以继承多个配置文件。 [\#8343](https://github.com/ClickHouse/ClickHouse/pull/8343) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +- 已添加 `system.stack_trace` 允许查看所有服务器线程的堆栈跟踪的表。 这对于开发人员反省服务器状态非常有用。 这修复 [\#7576](https://github.com/ClickHouse/ClickHouse/issues/7576). [\#8344](https://github.com/ClickHouse/ClickHouse/pull/8344) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加 `DateTime64` 具有可配置子秒精度的数据类型。 [\#7170](https://github.com/ClickHouse/ClickHouse/pull/7170) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 添加表函数 `clusterAllReplicas` 这允许查询集群中的所有节点。 [\#8493](https://github.com/ClickHouse/ClickHouse/pull/8493) ([kiran sunkari](https://github.com/kiransunkari)) +- 添加聚合函数 `categoricalInformationValue` 其计算出离散特征的信息值。 [\#8117](https://github.com/ClickHouse/ClickHouse/pull/8117) ([hcz](https://github.com/hczhcz)) +- 加快数据文件的解析 `CSV`, `TSV` 和 `JSONEachRow` 通过并行进行格式化。 [\#7780](https://github.com/ClickHouse/ClickHouse/pull/7780) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 添加功能 `bankerRound` 它执行银行家的四舍五入。 [\#8112](https://github.com/ClickHouse/ClickHouse/pull/8112) ([hcz](https://github.com/hczhcz)) +- 支持区域名称的嵌入式字典中的更多语言: ‘ru’, ‘en’, ‘ua’, ‘uk’, ‘by’, ‘kz’, ‘tr’, ‘de’, ‘uz’, ‘lv’, ‘lt’, ‘et’, ‘pt’, ‘he’, ‘vi’. [\#8189](https://github.com/ClickHouse/ClickHouse/pull/8189) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改进的一致性 `ANY JOIN` 逻辑 现在 `t1 ANY LEFT JOIN t2` 等于 `t2 ANY RIGHT JOIN t1`. [\#7665](https://github.com/ClickHouse/ClickHouse/pull/7665) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加设置 `any_join_distinct_right_table_keys` 这使旧的行为 `ANY INNER JOIN`. [\#7665](https://github.com/ClickHouse/ClickHouse/pull/7665) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加新建 `SEMI` 和 `ANTI JOIN`. 老 `ANY INNER JOIN` 行为现在可作为 `SEMI LEFT JOIN`. [\#7665](https://github.com/ClickHouse/ClickHouse/pull/7665) ([Artem Zuikov](https://github.com/4ertus2)) +- 已添加 `Distributed` 格式 `File` 发动机和 `file` 表函数,它允许从读 `.bin` 通过异步插入生成的文件 `Distributed` 桌子 [\#8535](https://github.com/ClickHouse/ClickHouse/pull/8535) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 添加可选的重置列参数 `runningAccumulate` 这允许为每个新的键值重置聚合结果。 [\#8326](https://github.com/ClickHouse/ClickHouse/pull/8326) ([谢尔盖\*科诺年科](https://github.com/kononencheg)) +- 添加使用ClickHouse作为普罗米修斯端点的能力。 [\#7900](https://github.com/ClickHouse/ClickHouse/pull/7900) ([vdimir](https://github.com/Vdimir)) +- 添加部分 `` 在 `config.xml` 这将限制允许的主机用于远程表引擎和表函数 `URL`, `S3`, `HDFS`. [\#7154](https://github.com/ClickHouse/ClickHouse/pull/7154) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 添加功能 `greatCircleAngle` 它计算球体上的距离(以度为单位)。 [\#8105](https://github.com/ClickHouse/ClickHouse/pull/8105) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改变地球半径与h3库一致。 [\#8105](https://github.com/ClickHouse/ClickHouse/pull/8105) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 已添加 `JSONCompactEachRow` 和 `JSONCompactEachRowWithNamesAndTypes` 输入和输出格式。 [\#7841](https://github.com/ClickHouse/ClickHouse/pull/7841) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 增加了与文件相关的表引擎和表函数的功能 (`File`, `S3`, `URL`, `HDFS`)它允许读取和写入 `gzip` 基于附加引擎参数或文件扩展名的文件。 [\#7840](https://github.com/ClickHouse/ClickHouse/pull/7840) ([安德烈\*博德罗夫](https://github.com/apbodrov)) +- 添加了 `randomASCII(length)` 函数,生成一个字符串与一个随机集 [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) 可打印字符。 [\#8401](https://github.com/ClickHouse/ClickHouse/pull/8401) ([刺刀](https://github.com/BayoNet)) +- 添加功能 `JSONExtractArrayRaw` 它返回从未解析的json数组元素上的数组 `JSON` 字符串。 [\#8081](https://github.com/ClickHouse/ClickHouse/pull/8081) ([Oleg Matrokhin](https://github.com/errx)) +- 添加 `arrayZip` 函数允许将多个长度相等的数组合成一个元组数组。 [\#8149](https://github.com/ClickHouse/ClickHouse/pull/8149) ([张冬](https://github.com/zhang2014)) +- 添加根据配置的磁盘之间移动数据的能力 `TTL`-表达式为 `*MergeTree` 表引擎家族. [\#8140](https://github.com/ClickHouse/ClickHouse/pull/8140) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 增加了新的聚合功能 `avgWeighted` 其允许计算加权平均值。 [\#7898](https://github.com/ClickHouse/ClickHouse/pull/7898) ([安德烈\*博德罗夫](https://github.com/apbodrov)) +- 现在并行解析默认启用 `TSV`, `TSKV`, `CSV` 和 `JSONEachRow` 格式。 [\#7894](https://github.com/ClickHouse/ClickHouse/pull/7894) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 从添加几个地理功能 `H3` 图书馆: `h3GetResolution`, `h3EdgeAngle`, `h3EdgeLength`, `h3IsValid` 和 `h3kRing`. [\#8034](https://github.com/ClickHouse/ClickHouse/pull/8034) ([Konstantin Malanchev](https://github.com/hombit)) +- 增加了对brotli的支持 (`br`)压缩文件相关的存储和表函数。 这修复 [\#8156](https://github.com/ClickHouse/ClickHouse/issues/8156). [\#8526](https://github.com/ClickHouse/ClickHouse/pull/8526) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加 `groupBit*` 功能的 `SimpleAggregationFunction` 类型。 [\#8485](https://github.com/ClickHouse/ClickHouse/pull/8485) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) + +#### 错误修复 {#bug-fix-6} + +- 修复重命名表 `Distributed` 引擎 修复问题 [\#7868](https://github.com/ClickHouse/ClickHouse/issues/7868). [\#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([tavplubix](https://github.com/tavplubix)) +- 现在字典支持 `EXPRESSION` 对于非ClickHouse SQL方言中任意字符串中的属性。 [\#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([阿利沙平](https://github.com/alesapin)) +- 修复损坏 `INSERT SELECT FROM mysql(...)` 查询。 这修复 [\#8070](https://github.com/ClickHouse/ClickHouse/issues/8070) 和 [\#7960](https://github.com/ClickHouse/ClickHouse/issues/7960). [\#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([tavplubix](https://github.com/tavplubix)) +- 修复错误 “Mismatch column sizes” 插入默认值时 `Tuple` 从 `JSONEachRow`. 这修复 [\#5653](https://github.com/ClickHouse/ClickHouse/issues/5653). [\#8606](https://github.com/ClickHouse/ClickHouse/pull/8606) ([tavplubix](https://github.com/tavplubix)) +- 现在将在使用的情况下抛出一个异常 `WITH TIES` 旁边的 `LIMIT BY`. 还增加了使用能力 `TOP` 与 `LIMIT BY`. 这修复 [\#7472](https://github.com/ClickHouse/ClickHouse/issues/7472). [\#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 从新鲜的glibc版本中修复unintendent依赖关系 `clickhouse-odbc-bridge` 二进制 [\#8046](https://github.com/ClickHouse/ClickHouse/pull/8046) ([阿莫斯鸟](https://github.com/amosbird)) +- 修正错误的检查功能 `*MergeTree` 引擎家族. 现在,当我们在最后一个颗粒和最后一个标记(非最终)中有相同数量的行时,它不会失败。 [\#8047](https://github.com/ClickHouse/ClickHouse/pull/8047) ([阿利沙平](https://github.com/alesapin)) +- 修复插入 `Enum*` 列后 `ALTER` 查询,当基础数值类型等于表指定类型时。 这修复 [\#7836](https://github.com/ClickHouse/ClickHouse/issues/7836). [\#7908](https://github.com/ClickHouse/ClickHouse/pull/7908) ([安东\*波波夫](https://github.com/CurtizJ)) +- 允许非常数负 “size” 函数的参数 `substring`. 这是不允许的错误。 这修复 [\#4832](https://github.com/ClickHouse/ClickHouse/issues/4832). [\#7703](https://github.com/ClickHouse/ClickHouse/pull/7703) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复当错误数量的参数传递到解析错误 `(O|J)DBC` 表引擎。 [\#7709](https://github.com/ClickHouse/ClickHouse/pull/7709) ([阿利沙平](https://github.com/alesapin)) +- 将日志发送到syslog时使用正在运行的clickhouse进程的命令名。 在以前的版本中,使用空字符串而不是命令名称。 [\#8460](https://github.com/ClickHouse/ClickHouse/pull/8460) ([Michael Nacharov](https://github.com/mnach)) +- 修复检查允许的主机 `localhost`. 这个公关修复了在提供的解决方案 [\#8241](https://github.com/ClickHouse/ClickHouse/pull/8241). [\#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复罕见的崩溃 `argMin` 和 `argMax` 长字符串参数的函数,当结果被用于 `runningAccumulate` 功能。 这修复 [\#8325](https://github.com/ClickHouse/ClickHouse/issues/8325) [\#8341](https://github.com/ClickHouse/ClickHouse/pull/8341) ([恐龙](https://github.com/769344359)) +- 修复表的内存过度使用 `Buffer` 引擎 [\#8345](https://github.com/ClickHouse/ClickHouse/pull/8345) ([Azat Khuzhin](https://github.com/azat)) +- 修正了可以采取的功能中的潜在错误 `NULL` 作为参数之一,并返回非NULL。 [\#8196](https://github.com/ClickHouse/ClickHouse/pull/8196) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在线程池中更好地计算后台进程的指标 `MergeTree` 表引擎. [\#8194](https://github.com/ClickHouse/ClickHouse/pull/8194) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复功能 `IN` 里面 `WHERE` 存在行级表筛选器时的语句。 修复 [\#6687](https://github.com/ClickHouse/ClickHouse/issues/6687) [\#8357](https://github.com/ClickHouse/ClickHouse/pull/8357) ([伊万](https://github.com/abyss7)) +- 现在,如果整数值没有完全解析设置值,则会引发异常。 [\#7678](https://github.com/ClickHouse/ClickHouse/pull/7678) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 修复当聚合函数用于查询具有两个以上本地分片的分布式表时出现的异常。 [\#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) +- 现在,bloom filter可以处理零长度数组,并且不执行冗余计算。 [\#8242](https://github.com/ClickHouse/ClickHouse/pull/8242) ([achimbab](https://github.com/achimbab)) +- 修正了通过匹配客户端主机来检查客户端主机是否允许 `host_regexp` 在指定 `users.xml`. [\#8241](https://github.com/ClickHouse/ClickHouse/pull/8241) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 放松不明确的列检查,导致多个误报 `JOIN ON` 科。 [\#8385](https://github.com/ClickHouse/ClickHouse/pull/8385) ([Artem Zuikov](https://github.com/4ertus2)) +- 修正了可能的服务器崩溃 (`std::terminate`)当服务器不能发送或写入数据 `JSON` 或 `XML` 格式与值 `String` 数据类型(需要 `UTF-8` 验证)或使用Brotli算法或其他一些罕见情况下压缩结果数据时。 这修复 [\#7603](https://github.com/ClickHouse/ClickHouse/issues/7603) [\#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复竞争条件 `StorageDistributedDirectoryMonitor` 被线人发现 这修复 [\#8364](https://github.com/ClickHouse/ClickHouse/issues/8364). [\#8383](https://github.com/ClickHouse/ClickHouse/pull/8383) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在背景合并 `*MergeTree` 表引擎家族更准确地保留存储策略卷顺序。 [\#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 现在表引擎 `Kafka` 与正常工作 `Native` 格式。 这修复 [\#6731](https://github.com/ClickHouse/ClickHouse/issues/6731) [\#7337](https://github.com/ClickHouse/ClickHouse/issues/7337) [\#8003](https://github.com/ClickHouse/ClickHouse/issues/8003). [\#8016](https://github.com/ClickHouse/ClickHouse/pull/8016) ([filimonov](https://github.com/filimonov)) +- 固定格式与标题(如 `CSVWithNames`)这是抛出关于EOF表引擎的异常 `Kafka`. [\#8016](https://github.com/ClickHouse/ClickHouse/pull/8016) ([filimonov](https://github.com/filimonov)) +- 修复了从子查询右侧部分制作set的错误 `IN` 科。 这修复 [\#5767](https://github.com/ClickHouse/ClickHouse/issues/5767) 和 [\#2542](https://github.com/ClickHouse/ClickHouse/issues/2542). [\#7755](https://github.com/ClickHouse/ClickHouse/pull/7755) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 从存储读取时修复可能的崩溃 `File`. [\#7756](https://github.com/ClickHouse/ClickHouse/pull/7756) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 在固定的文件读取 `Parquet` 包含类型列的格式 `list`. [\#8334](https://github.com/ClickHouse/ClickHouse/pull/8334) ([马苏兰](https://github.com/maxulan)) +- 修复错误 `Not found column` 对于分布式查询 `PREWHERE` 条件取决于采样键if `max_parallel_replicas > 1`. [\#7913](https://github.com/ClickHouse/ClickHouse/pull/7913) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复错误 `Not found column` 如果使用查询 `PREWHERE` 依赖于表的别名,结果集由于主键条件而为空。 [\#7911](https://github.com/ClickHouse/ClickHouse/pull/7911) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 函数的固定返回类型 `rand` 和 `randConstant` 在情况下 `Nullable` 争论。 现在函数总是返回 `UInt32` 而且从来没有 `Nullable(UInt32)`. [\#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 禁用谓词下推 `WITH FILL` 表达。 这修复 [\#7784](https://github.com/ClickHouse/ClickHouse/issues/7784). [\#7789](https://github.com/ClickHouse/ClickHouse/pull/7789) ([张冬](https://github.com/zhang2014)) +- 修正错误 `count()` 结果 `SummingMergeTree` 当 `FINAL` 部分被使用。 [\#3280](https://github.com/ClickHouse/ClickHouse/issues/3280) [\#7786](https://github.com/ClickHouse/ClickHouse/pull/7786) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 修复来自远程服务器的常量函数可能不正确的结果。 它发生在具有以下功能的查询中 `version()`, `uptime()` 等。 它为不同的服务器返回不同的常量值。 这修复 [\#7666](https://github.com/ClickHouse/ClickHouse/issues/7666). [\#7689](https://github.com/ClickHouse/ClickHouse/pull/7689) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复下推谓词优化中导致错误结果的复杂错误。 这解决了下推谓词优化的很多问题。 [\#8503](https://github.com/ClickHouse/ClickHouse/pull/8503) ([张冬](https://github.com/zhang2014)) +- 修复崩溃 `CREATE TABLE .. AS dictionary` 查询。 [\#8508](https://github.com/ClickHouse/ClickHouse/pull/8508) ([Azat Khuzhin](https://github.com/azat)) +- 一些改进ClickHouse语法 `.g4` 文件 [\#8294](https://github.com/ClickHouse/ClickHouse/pull/8294) ([太阳里](https://github.com/taiyang-li)) +- 修复导致崩溃的错误 `JOIN`s与表与发动机 `Join`. 这修复 [\#7556](https://github.com/ClickHouse/ClickHouse/issues/7556) [\#8254](https://github.com/ClickHouse/ClickHouse/issues/8254) [\#7915](https://github.com/ClickHouse/ClickHouse/issues/7915) [\#8100](https://github.com/ClickHouse/ClickHouse/issues/8100). [\#8298](https://github.com/ClickHouse/ClickHouse/pull/8298) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复冗余字典重新加载 `CREATE DATABASE`. [\#7916](https://github.com/ClickHouse/ClickHouse/pull/7916) ([Azat Khuzhin](https://github.com/azat)) +- 限制从读取流的最大数量 `StorageFile` 和 `StorageHDFS`. 修复https://github.com/ClickHouse/ClickHouse/issues/7650. [\#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([阿利沙平](https://github.com/alesapin)) +- 修复bug `ALTER ... MODIFY ... CODEC` 查询,当用户同时指定默认表达式和编解ec。 修复 [8593](https://github.com/ClickHouse/ClickHouse/issues/8593). [\#8614](https://github.com/ClickHouse/ClickHouse/pull/8614) ([阿利沙平](https://github.com/alesapin)) +- 修复列的后台合并错误 `SimpleAggregateFunction(LowCardinality)` 类型。 [\#8613](https://github.com/ClickHouse/ClickHouse/pull/8613) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 固定类型签入功能 `toDateTime64`. [\#8375](https://github.com/ClickHouse/ClickHouse/pull/8375) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 现在服务器不崩溃 `LEFT` 或 `FULL JOIN` 与和加入引擎和不支持 `join_use_nulls` 设置。 [\#8479](https://github.com/ClickHouse/ClickHouse/pull/8479) ([Artem Zuikov](https://github.com/4ertus2)) +- 现在 `DROP DICTIONARY IF EXISTS db.dict` 查询不会抛出异常,如果 `db` 根本不存在 [\#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复表函数中可能出现的崩溃 (`file`, `mysql`, `remote`)引用删除引起的 `IStorage` 对象。 修复插入表函数时指定的列的不正确解析。 [\#7762](https://github.com/ClickHouse/ClickHouse/pull/7762) ([tavplubix](https://github.com/tavplubix)) +- 确保网络启动前 `clickhouse-server`. 这修复 [\#7507](https://github.com/ClickHouse/ClickHouse/issues/7507). [\#8570](https://github.com/ClickHouse/ClickHouse/pull/8570) ([余志昌](https://github.com/yuzhichang)) +- 修复安全连接的超时处理,因此查询不会无限挂起。 这修复 [\#8126](https://github.com/ClickHouse/ClickHouse/issues/8126). [\#8128](https://github.com/ClickHouse/ClickHouse/pull/8128) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `clickhouse-copier`并发工人之间的冗余争用。 [\#7816](https://github.com/ClickHouse/ClickHouse/pull/7816) ([丁香飞](https://github.com/dingxiangfei2009)) +- 现在突变不会跳过附加的部分,即使它们的突变版本比当前的突变版本大。 [\#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([余志昌](https://github.com/yuzhichang)) [\#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([阿利沙平](https://github.com/alesapin)) +- 忽略冗余副本 `*MergeTree` 数据部分移动到另一个磁盘和服务器重新启动后。 [\#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复崩溃 `FULL JOIN` 与 `LowCardinality` 在 `JOIN` 钥匙 [\#8252](https://github.com/ClickHouse/ClickHouse/pull/8252) ([Artem Zuikov](https://github.com/4ertus2)) +- 禁止在插入查询中多次使用列名,如 `INSERT INTO tbl (x, y, x)`. 这修复 [\#5465](https://github.com/ClickHouse/ClickHouse/issues/5465), [\#7681](https://github.com/ClickHouse/ClickHouse/issues/7681). [\#7685](https://github.com/ClickHouse/ClickHouse/pull/7685) ([阿利沙平](https://github.com/alesapin)) +- 增加了回退,用于检测未知Cpu的物理CPU内核数量(使用逻辑CPU内核数量)。 这修复 [\#5239](https://github.com/ClickHouse/ClickHouse/issues/5239). [\#7726](https://github.com/ClickHouse/ClickHouse/pull/7726) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `There's no column` 实例化列和别名列出错。 [\#8210](https://github.com/ClickHouse/ClickHouse/pull/8210) ([Artem Zuikov](https://github.com/4ertus2)) +- 固定切断崩溃时 `EXISTS` 查询没有使用 `TABLE` 或 `DICTIONARY` 预选赛 就像 `EXISTS t`. 这修复 [\#8172](https://github.com/ClickHouse/ClickHouse/issues/8172). 此错误在版本19.17中引入。 [\#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复罕见错误 `"Sizes of columns doesn't match"` 使用时可能会出现 `SimpleAggregateFunction` 列。 [\#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Boris Granveaud](https://github.com/bgranvea)) +- 修正错误,其中用户空 `allow_databases` 可以访问所有数据库(和相同的 `allow_dictionaries`). [\#7793](https://github.com/ClickHouse/ClickHouse/pull/7793) ([DeifyTheGod](https://github.com/DeifyTheGod)) +- 修复客户端崩溃时,服务器已经从客户端断开连接。 [\#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Azat Khuzhin](https://github.com/azat)) +- 修复 `ORDER BY` 在按主键前缀和非主键后缀排序的情况下的行为。 [\#7759](https://github.com/ClickHouse/ClickHouse/pull/7759) ([安东\*波波夫](https://github.com/CurtizJ)) +- 检查表中是否存在合格列。 这修复 [\#6836](https://github.com/ClickHouse/ClickHouse/issues/6836). [\#7758](https://github.com/ClickHouse/ClickHouse/pull/7758) ([Artem Zuikov](https://github.com/4ertus2)) +- 固定行为 `ALTER MOVE` 合并完成后立即运行移动指定的超部分。 修复 [\#8103](https://github.com/ClickHouse/ClickHouse/issues/8103). [\#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 使用时修复可能的服务器崩溃 `UNION` 具有不同数量的列。 修复 [\#7279](https://github.com/ClickHouse/ClickHouse/issues/7279). [\#7929](https://github.com/ClickHouse/ClickHouse/pull/7929) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复函数结果子字符串的大小 `substr` 负大小。 [\#8589](https://github.com/ClickHouse/ClickHouse/pull/8589) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在服务器不执行部分突变 `MergeTree` 如果后台池中没有足够的可用线程。 [\#8588](https://github.com/ClickHouse/ClickHouse/pull/8588) ([tavplubix](https://github.com/tavplubix)) +- 修复格式化时的小错字 `UNION ALL` AST. [\#7999](https://github.com/ClickHouse/ClickHouse/pull/7999) ([litao91](https://github.com/litao91)) +- 修正了负数不正确的布隆过滤结果。 这修复 [\#8317](https://github.com/ClickHouse/ClickHouse/issues/8317). [\#8566](https://github.com/ClickHouse/ClickHouse/pull/8566) ([张冬](https://github.com/zhang2014)) +- 在解压缩固定潜在的缓冲区溢出。 恶意用户可以传递捏造的压缩数据,这将导致缓冲区后读取。 这个问题是由Yandex信息安全团队的Eldar Zaitov发现的。 [\#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复因整数溢出而导致的错误结果 `arrayIntersect`. [\#7777](https://github.com/ClickHouse/ClickHouse/pull/7777) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在 `OPTIMIZE TABLE` query不会等待脱机副本执行该操作。 [\#8314](https://github.com/ClickHouse/ClickHouse/pull/8314) ([javi santana](https://github.com/javisantana)) +- 固定 `ALTER TTL` 解析器 `Replicated*MergeTree` 桌子 [\#8318](https://github.com/ClickHouse/ClickHouse/pull/8318) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复服务器和客户端之间的通信,以便服务器在查询失败后读取临时表信息。 [\#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Azat Khuzhin](https://github.com/azat)) +- 修复 `bitmapAnd` 在聚合位图和标量位图相交时出现函数错误。 [\#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([黄月](https://github.com/moon03432)) +- 完善的定义 `ZXid` 根据动物园管理员的程序员指南,它修复了错误 `clickhouse-cluster-copier`. [\#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([丁香飞](https://github.com/dingxiangfei2009)) +- `odbc` 表函数现在尊重 `external_table_functions_use_nulls` 设置。 [\#7506](https://github.com/ClickHouse/ClickHouse/pull/7506) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 修正了导致罕见的数据竞赛的错误。 [\#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 现在 `SYSTEM RELOAD DICTIONARY` 完全重新加载字典,忽略 `update_field`. 这修复 [\#7440](https://github.com/ClickHouse/ClickHouse/issues/7440). [\#8037](https://github.com/ClickHouse/ClickHouse/pull/8037) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 添加检查字典是否存在于创建查询的能力。 [\#8032](https://github.com/ClickHouse/ClickHouse/pull/8032) ([阿利沙平](https://github.com/alesapin)) +- 修复 `Float*` 解析中 `Values` 格式。 这修复 [\#7817](https://github.com/ClickHouse/ClickHouse/issues/7817). [\#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([tavplubix](https://github.com/tavplubix)) +- 修复崩溃时,我们不能在一些后台操作保留空间 `*MergeTree` 表引擎家族. [\#7873](https://github.com/ClickHouse/ClickHouse/pull/7873) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复表包含合并操作时的崩溃 `SimpleAggregateFunction(LowCardinality)` 列。 这修复 [\#8515](https://github.com/ClickHouse/ClickHouse/issues/8515). [\#8522](https://github.com/ClickHouse/ClickHouse/pull/8522) ([Azat Khuzhin](https://github.com/azat)) +- 恢复对所有ICU区域设置的支持,并添加对常量表达式应用排序规则的功能。 还添加语言名称 `system.collations` 桌子 [\#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([阿利沙平](https://github.com/alesapin)) +- 修正错误时,外部字典与零最小寿命 (`LIFETIME(MIN 0 MAX N)`, `LIFETIME(N)`)不要在后台更新。 [\#7983](https://github.com/ClickHouse/ClickHouse/pull/7983) ([阿利沙平](https://github.com/alesapin)) +- 修复当clickhouse源外部字典在查询中有子查询时崩溃。 [\#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复文件扩展名不正确的解析表与引擎 `URL`. 这修复 [\#8157](https://github.com/ClickHouse/ClickHouse/issues/8157). [\#8419](https://github.com/ClickHouse/ClickHouse/pull/8419) ([安德烈\*博德罗夫](https://github.com/apbodrov)) +- 修复 `CHECK TABLE` 查询为 `*MergeTree` 表没有关键. 修复 [\#7543](https://github.com/ClickHouse/ClickHouse/issues/7543). [\#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([阿利沙平](https://github.com/alesapin)) +- 固定转换 `Float64` 到MySQL类型。 [\#8079](https://github.com/ClickHouse/ClickHouse/pull/8079) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 现在,如果表没有完全删除,因为服务器崩溃,服务器将尝试恢复并加载它。 [\#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([tavplubix](https://github.com/tavplubix)) +- 修复了表函数中的崩溃 `file` 同时插入到不存在的文件。 现在在这种情况下,文件将被创建,然后插入将被处理。 [\#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Olga Khvostikova](https://github.com/stavrolia)) +- 修复罕见的死锁时,可能发生 `trace_log` 处于启用状态。 [\#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([filimonov](https://github.com/filimonov)) +- 添加能力与不同类型的工作,除了 `Date` 在 `RangeHashed` 从DDL查询创建的外部字典。 修复 [7899](https://github.com/ClickHouse/ClickHouse/issues/7899). [\#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([阿利沙平](https://github.com/alesapin)) +- 修复崩溃时 `now64()` 用另一个函数的结果调用。 [\#8270](https://github.com/ClickHouse/ClickHouse/pull/8270) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 修正了通过mysql有线协议检测客户端IP连接的错误。 [\#7743](https://github.com/ClickHouse/ClickHouse/pull/7743) ([Dmitry Muzyka](https://github.com/dmitriy-myz)) +- 修复空阵列处理 `arraySplit` 功能。 这修复 [\#7708](https://github.com/ClickHouse/ClickHouse/issues/7708). [\#7747](https://github.com/ClickHouse/ClickHouse/pull/7747) ([hcz](https://github.com/hczhcz)) +- 修复了以下问题 `pid-file` 另一个运行 `clickhouse-server` 可能会被删除。 [\#8487](https://github.com/ClickHouse/ClickHouse/pull/8487) ([徐伟清](https://github.com/weiqxu)) +- 修复字典重新加载,如果它有 `invalidate_query`,停止更新,并在以前的更新尝试一些异常。 [\#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([阿利沙平](https://github.com/alesapin)) +- 修正了功能错误 `arrayReduce` 这可能会导致 “double free” 和聚合函数组合器中的错误 `Resample` 这可能会导致内存泄漏。 添加聚合功能 `aggThrow`. 此功能可用于测试目的。 [\#8446](https://github.com/ClickHouse/ClickHouse/pull/8446) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 改进 {#improvement-1} + +- 改进了使用时的日志记录 `S3` 表引擎。 [\#8251](https://github.com/ClickHouse/ClickHouse/pull/8251) ([Grigory Pervakov](https://github.com/GrigoryPervakov)) +- 在调用时未传递任何参数时打印帮助消息 `clickhouse-local`. 这修复 [\#5335](https://github.com/ClickHouse/ClickHouse/issues/5335). [\#8230](https://github.com/ClickHouse/ClickHouse/pull/8230) ([安德烈\*纳戈尔尼](https://github.com/Melancholic)) +- 添加设置 `mutations_sync` 这允许等待 `ALTER UPDATE/DELETE` 同步查询。 [\#8237](https://github.com/ClickHouse/ClickHouse/pull/8237) ([阿利沙平](https://github.com/alesapin)) +- 允许设置相对 `user_files_path` 在 `config.xml` (在类似的方式 `format_schema_path`). [\#7632](https://github.com/ClickHouse/ClickHouse/pull/7632) ([hcz](https://github.com/hczhcz)) +- 为转换函数添加非法类型的异常 `-OrZero` 后缀 [\#7880](https://github.com/ClickHouse/ClickHouse/pull/7880) ([安德烈\*科尼亚耶夫](https://github.com/akonyaev90)) +- 简化在分布式查询中发送到分片的数据头的格式。 [\#8044](https://github.com/ClickHouse/ClickHouse/pull/8044) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- `Live View` 表引擎重构。 [\#8519](https://github.com/ClickHouse/ClickHouse/pull/8519) ([vzakaznikov](https://github.com/vzakaznikov)) +- 为从DDL查询创建的外部字典添加额外的检查。 [\#8127](https://github.com/ClickHouse/ClickHouse/pull/8127) ([阿利沙平](https://github.com/alesapin)) +- 修复错误 `Column ... already exists` 使用时 `FINAL` 和 `SAMPLE` together, e.g. `select count() from table final sample 1/2`. 修复 [\#5186](https://github.com/ClickHouse/ClickHouse/issues/5186). [\#7907](https://github.com/ClickHouse/ClickHouse/pull/7907) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在表的第一个参数 `joinGet` 函数可以是表标识符。 [\#7707](https://github.com/ClickHouse/ClickHouse/pull/7707) ([阿莫斯鸟](https://github.com/amosbird)) +- 允许使用 `MaterializedView` 与上面的子查询 `Kafka` 桌子 [\#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov)) +- 现在后台在磁盘之间移动,运行它的seprate线程池。 [\#7670](https://github.com/ClickHouse/ClickHouse/pull/7670) ([Vladimir Chebotarev](https://github.com/excitoon)) +- `SYSTEM RELOAD DICTIONARY` 现在同步执行。 [\#8240](https://github.com/ClickHouse/ClickHouse/pull/8240) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 堆栈跟踪现在显示物理地址(对象文件中的偏移量),而不是虚拟内存地址(加载对象文件的位置)。 这允许使用 `addr2line` 当二进制独立于位置并且ASLR处于活动状态时。 这修复 [\#8360](https://github.com/ClickHouse/ClickHouse/issues/8360). [\#8387](https://github.com/ClickHouse/ClickHouse/pull/8387) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 支持行级安全筛选器的新语法: `
`. 修复 [\#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [\#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7)) +- 现在 `cityHash` 功能可以与工作 `Decimal` 和 `UUID` 类型。 修复 [\#5184](https://github.com/ClickHouse/ClickHouse/issues/5184). [\#7693](https://github.com/ClickHouse/ClickHouse/pull/7693) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 从系统日志中删除了固定的索引粒度(它是1024),因为它在实现自适应粒度之后已经过时。 [\#7698](https://github.com/ClickHouse/ClickHouse/pull/7698) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 当ClickHouse在没有SSL的情况下编译时,启用MySQL兼容服务器。 [\#7852](https://github.com/ClickHouse/ClickHouse/pull/7852) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 现在服务器校验和分布式批处理,这在批处理中损坏数据的情况下提供了更多详细的错误。 [\#7914](https://github.com/ClickHouse/ClickHouse/pull/7914) ([Azat Khuzhin](https://github.com/azat)) +- 碌莽禄Support: `DROP DATABASE`, `DETACH TABLE`, `DROP TABLE` 和 `ATTACH TABLE` 为 `MySQL` 数据库引擎。 [\#8202](https://github.com/ClickHouse/ClickHouse/pull/8202) ([张冬](https://github.com/zhang2014)) +- 在S3表功能和表引擎中添加身份验证。 [\#7623](https://github.com/ClickHouse/ClickHouse/pull/7623) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 增加了检查额外的部分 `MergeTree` 在不同的磁盘上,为了不允许错过未定义磁盘上的数据部分。 [\#8118](https://github.com/ClickHouse/ClickHouse/pull/8118) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 启用Mac客户端和服务器的SSL支持。 [\#8297](https://github.com/ClickHouse/ClickHouse/pull/8297) ([伊万](https://github.com/abyss7)) +- 现在ClickHouse可以作为MySQL联合服务器(参见https://dev.mysql.com/doc/refman/5.7/en/federated-create-server.html)。 [\#7717](https://github.com/ClickHouse/ClickHouse/pull/7717) ([Maxim Fedotov](https://github.com/MaxFedotov)) +- `clickhouse-client` 现在只能启用 `bracketed-paste` 当多查询处于打开状态且多行处于关闭状态时。 这修复(#7757)\[https://github.com/ClickHouse/ClickHouse/issues/7757。 [\#7761](https://github.com/ClickHouse/ClickHouse/pull/7761) ([阿莫斯鸟](https://github.com/amosbird)) +- 碌莽禄Support: `Array(Decimal)` 在 `if` 功能。 [\#7721](https://github.com/ClickHouse/ClickHouse/pull/7721) ([Artem Zuikov](https://github.com/4ertus2)) +- 支持小数 `arrayDifference`, `arrayCumSum` 和 `arrayCumSumNegative` 功能。 [\#7724](https://github.com/ClickHouse/ClickHouse/pull/7724) ([Artem Zuikov](https://github.com/4ertus2)) +- 已添加 `lifetime` 列到 `system.dictionaries` 桌子 [\#6820](https://github.com/ClickHouse/ClickHouse/issues/6820) [\#7727](https://github.com/ClickHouse/ClickHouse/pull/7727) ([kekekekule](https://github.com/kekekekule)) +- 改进了检查不同磁盘上的现有部件 `*MergeTree` 表引擎. 地址 [\#7660](https://github.com/ClickHouse/ClickHouse/issues/7660). [\#8440](https://github.com/ClickHouse/ClickHouse/pull/8440) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 集成与 `AWS SDK` 为 `S3` 交互允许使用开箱即用的所有S3功能。 [\#8011](https://github.com/ClickHouse/ClickHouse/pull/8011) ([帕维尔\*科瓦连科](https://github.com/Jokser)) +- 增加了对子查询的支持 `Live View` 桌子 [\#7792](https://github.com/ClickHouse/ClickHouse/pull/7792) ([vzakaznikov](https://github.com/vzakaznikov)) +- 检查使用 `Date` 或 `DateTime` 从列 `TTL` 表达式已删除。 [\#7920](https://github.com/ClickHouse/ClickHouse/pull/7920) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 有关磁盘的信息已添加到 `system.detached_parts` 桌子 [\#7833](https://github.com/ClickHouse/ClickHouse/pull/7833) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 现在设置 `max_(table|partition)_size_to_drop` 无需重新启动即可更改。 [\#7779](https://github.com/ClickHouse/ClickHouse/pull/7779) ([Grigory Pervakov](https://github.com/GrigoryPervakov)) +- 错误消息的可用性略好。 要求用户不要删除下面的行 `Stack trace:`. [\#7897](https://github.com/ClickHouse/ClickHouse/pull/7897) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更好地阅读消息 `Kafka` 引擎在各种格式后 [\#7935](https://github.com/ClickHouse/ClickHouse/issues/7935). [\#8035](https://github.com/ClickHouse/ClickHouse/pull/8035) ([伊万](https://github.com/abyss7)) +- 与不支持MySQL客户端更好的兼容性 `sha2_password` 验证插件。 [\#8036](https://github.com/ClickHouse/ClickHouse/pull/8036) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 支持MySQL兼容性服务器中的更多列类型。 [\#7975](https://github.com/ClickHouse/ClickHouse/pull/7975) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 执行 `ORDER BY` 优化 `Merge`, `Buffer` 和 `Materilized View` 存储与底层 `MergeTree` 桌子 [\#8130](https://github.com/ClickHouse/ClickHouse/pull/8130) ([安东\*波波夫](https://github.com/CurtizJ)) +- 现在我们总是使用POSIX实现 `getrandom` 与旧内核更好的兼容性(\<3.17)。 [\#7940](https://github.com/ClickHouse/ClickHouse/pull/7940) ([阿莫斯鸟](https://github.com/amosbird)) +- 更好地检查移动ttl规则中的有效目标。 [\#8410](https://github.com/ClickHouse/ClickHouse/pull/8410) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 更好地检查损坏的刀片批次 `Distributed` 表引擎。 [\#7933](https://github.com/ClickHouse/ClickHouse/pull/7933) ([Azat Khuzhin](https://github.com/azat)) +- 添加带有部件名称数组的列,这些部件将来必须处理突变 `system.mutations` 桌子 [\#8179](https://github.com/ClickHouse/ClickHouse/pull/8179) ([阿利沙平](https://github.com/alesapin)) +- 处理器的并行合并排序优化。 [\#8552](https://github.com/ClickHouse/ClickHouse/pull/8552) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 设置 `mark_cache_min_lifetime` 现在已经过时了,什么也不做。 在以前的版本中,标记缓存可以在内存中增长大于 `mark_cache_size` 以容纳内的数据 `mark_cache_min_lifetime` 秒。 这导致了混乱和比预期更高的内存使用率,这在内存受限的系统上尤其糟糕。 如果您在安装此版本后会看到性能下降,则应增加 `mark_cache_size`. [\#8484](https://github.com/ClickHouse/ClickHouse/pull/8484) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 准备使用 `tid` 到处都是 这是必要的 [\#7477](https://github.com/ClickHouse/ClickHouse/issues/7477). [\#8276](https://github.com/ClickHouse/ClickHouse/pull/8276) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 性能改进 {#performance-improvement-1} + +- 处理器管道中的性能优化。 [\#7988](https://github.com/ClickHouse/ClickHouse/pull/7988) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 缓存字典中过期密钥的非阻塞更新(具有读取旧密钥的权限)。 [\#8303](https://github.com/ClickHouse/ClickHouse/pull/8303) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 没有编译ClickHouse `-fno-omit-frame-pointer` 在全球范围内多余一个寄存器。 [\#8097](https://github.com/ClickHouse/ClickHouse/pull/8097) ([阿莫斯鸟](https://github.com/amosbird)) +- 加速 `greatCircleDistance` 功能,并为它添加性能测试。 [\#7307](https://github.com/ClickHouse/ClickHouse/pull/7307) ([Olga Khvostikova](https://github.com/stavrolia)) +- 改进的功能性能 `roundDown`. [\#8465](https://github.com/ClickHouse/ClickHouse/pull/8465) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改进的性能 `max`, `min`, `argMin`, `argMax` 为 `DateTime64` 数据类型。 [\#8199](https://github.com/ClickHouse/ClickHouse/pull/8199) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 改进了无限制或大限制和外部排序的排序性能。 [\#8545](https://github.com/ClickHouse/ClickHouse/pull/8545) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改进的性能格式化浮点数高达6倍。 [\#8542](https://github.com/ClickHouse/ClickHouse/pull/8542) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改进的性能 `modulo` 功能。 [\#7750](https://github.com/ClickHouse/ClickHouse/pull/7750) ([阿莫斯鸟](https://github.com/amosbird)) +- 优化 `ORDER BY` 并与单列键合并。 [\#8335](https://github.com/ClickHouse/ClickHouse/pull/8335) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更好地实施 `arrayReduce`, `-Array` 和 `-State` 组合子 [\#7710](https://github.com/ClickHouse/ClickHouse/pull/7710) ([阿莫斯鸟](https://github.com/amosbird)) +- 现在 `PREWHERE` 应优化为至少一样高效 `WHERE`. [\#7769](https://github.com/ClickHouse/ClickHouse/pull/7769) ([阿莫斯鸟](https://github.com/amosbird)) +- 改进方式 `round` 和 `roundBankers` 处理负数。 [\#8229](https://github.com/ClickHouse/ClickHouse/pull/8229) ([hcz](https://github.com/hczhcz)) +- 改进的解码性能 `DoubleDelta` 和 `Gorilla` 编解码器大约30-40%。 这修复 [\#7082](https://github.com/ClickHouse/ClickHouse/issues/7082). [\#8019](https://github.com/ClickHouse/ClickHouse/pull/8019) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 改进的性能 `base64` 相关功能。 [\#8444](https://github.com/ClickHouse/ClickHouse/pull/8444) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了一个功能 `geoDistance`. 它类似于 `greatCircleDistance` 但使用近似于WGS-84椭球模型。 两个功能的性能几乎相同。 [\#8086](https://github.com/ClickHouse/ClickHouse/pull/8086) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更快 `min` 和 `max` 聚合函数 `Decimal` 数据类型。 [\#8144](https://github.com/ClickHouse/ClickHouse/pull/8144) ([Artem Zuikov](https://github.com/4ertus2)) +- 矢量化处理 `arrayReduce`. [\#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([阿莫斯鸟](https://github.com/amosbird)) +- `if` 链现在优化为 `multiIf`. [\#8355](https://github.com/ClickHouse/ClickHouse/pull/8355) ([kamalov-ruslan](https://github.com/kamalov-ruslan)) +- 修复性能回归 `Kafka` 表引擎在19.15中引入。 这修复 [\#7261](https://github.com/ClickHouse/ClickHouse/issues/7261). [\#7935](https://github.com/ClickHouse/ClickHouse/pull/7935) ([filimonov](https://github.com/filimonov)) +- 已删除 “pie” 代码生成 `gcc` 从Debian软件包偶尔带来默认情况下。 [\#8483](https://github.com/ClickHouse/ClickHouse/pull/8483) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 并行解析数据格式 [\#6553](https://github.com/ClickHouse/ClickHouse/pull/6553) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 启用优化的解析器 `Values` 默认使用表达式 (`input_format_values_deduce_templates_of_expressions=1`). [\#8231](https://github.com/ClickHouse/ClickHouse/pull/8231) ([tavplubix](https://github.com/tavplubix)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-2} + +- 构建修复 `ARM` 而在最小模式。 [\#8304](https://github.com/ClickHouse/ClickHouse/pull/8304) ([proller](https://github.com/proller)) +- 添加复盖文件刷新 `clickhouse-server` 当不调用std::atexit时。 还略微改进了无状态测试的复盖率日志记录。 [\#8267](https://github.com/ClickHouse/ClickHouse/pull/8267) ([阿利沙平](https://github.com/alesapin)) +- 更新contrib中的LLVM库。 避免从操作系统包中使用LLVM。 [\#8258](https://github.com/ClickHouse/ClickHouse/pull/8258) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使bund绑 `curl` 建立完全安静。 [\#8232](https://github.com/ClickHouse/ClickHouse/pull/8232) [\#8203](https://github.com/ClickHouse/ClickHouse/pull/8203) ([帕维尔\*科瓦连科](https://github.com/Jokser)) +- 修复一些 `MemorySanitizer` 警告。 [\#8235](https://github.com/ClickHouse/ClickHouse/pull/8235) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 使用 `add_warning` 和 `no_warning` 宏 `CMakeLists.txt`. [\#8604](https://github.com/ClickHouse/ClickHouse/pull/8604) ([伊万](https://github.com/abyss7)) +- 添加对Minio S3兼容对象的支持(https://min.io/)为了更好的集成测试。 [\#7863](https://github.com/ClickHouse/ClickHouse/pull/7863) [\#7875](https://github.com/ClickHouse/ClickHouse/pull/7875) ([帕维尔\*科瓦连科](https://github.com/Jokser)) +- 导入 `libc` 标题到contrib。 它允许在各种系统中使构建更加一致(仅适用于 `x86_64-linux-gnu`). [\#5773](https://github.com/ClickHouse/ClickHouse/pull/5773) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除 `-fPIC` 从一些图书馆。 [\#8464](https://github.com/ClickHouse/ClickHouse/pull/8464) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 清洁 `CMakeLists.txt` 对于卷曲。 看https://github.com/ClickHouse/ClickHouse/pull/8011\#issuecomment-569478910 [\#8459](https://github.com/ClickHouse/ClickHouse/pull/8459) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 无声警告 `CapNProto` 图书馆. [\#8220](https://github.com/ClickHouse/ClickHouse/pull/8220) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 为短字符串优化哈希表添加性能测试。 [\#7679](https://github.com/ClickHouse/ClickHouse/pull/7679) ([阿莫斯鸟](https://github.com/amosbird)) +- 现在ClickHouse将建立在 `AArch64` 即使 `MADV_FREE` 不可用。 这修复 [\#8027](https://github.com/ClickHouse/ClickHouse/issues/8027). [\#8243](https://github.com/ClickHouse/ClickHouse/pull/8243) ([阿莫斯鸟](https://github.com/amosbird)) +- 更新 `zlib-ng` 来解决记忆消毒的问题 [\#7182](https://github.com/ClickHouse/ClickHouse/pull/7182) [\#8206](https://github.com/ClickHouse/ClickHouse/pull/8206) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 在非Linux系统上启用内部MySQL库,因为操作系统包的使用非常脆弱,通常根本不起作用。 这修复 [\#5765](https://github.com/ClickHouse/ClickHouse/issues/5765). [\#8426](https://github.com/ClickHouse/ClickHouse/pull/8426) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了启用后在某些系统上构建的问题 `libc++`. 这取代了 [\#8374](https://github.com/ClickHouse/ClickHouse/issues/8374). [\#8380](https://github.com/ClickHouse/ClickHouse/pull/8380) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 赂眉露\>\> `Field` 方法更类型安全,以找到更多的错误。 [\#7386](https://github.com/ClickHouse/ClickHouse/pull/7386) [\#8209](https://github.com/ClickHouse/ClickHouse/pull/8209) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 添加丢失的文件到 `libc-headers` 子模块。 [\#8507](https://github.com/ClickHouse/ClickHouse/pull/8507) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复错误 `JSON` 引用性能测试输出。 [\#8497](https://github.com/ClickHouse/ClickHouse/pull/8497) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在堆栈跟踪显示 `std::exception` 和 `Poco::Exception`. 在以前的版本中,它仅适用于 `DB::Exception`. 这改进了诊断。 [\#8501](https://github.com/ClickHouse/ClickHouse/pull/8501) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 移植 `clock_gettime` 和 `clock_nanosleep` 对于新鲜的glibc版本。 [\#8054](https://github.com/ClickHouse/ClickHouse/pull/8054) ([阿莫斯鸟](https://github.com/amosbird)) +- 启用 `part_log` 在示例配置开发人员。 [\#8609](https://github.com/ClickHouse/ClickHouse/pull/8609) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复重新加载的异步性质 `01036_no_superfluous_dict_reload_on_create_database*`. [\#8111](https://github.com/ClickHouse/ClickHouse/pull/8111) ([Azat Khuzhin](https://github.com/azat)) +- 固定编解码器性能测试。 [\#8615](https://github.com/ClickHouse/ClickHouse/pull/8615) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 添加安装脚本 `.tgz` 为他们构建和文档。 [\#8612](https://github.com/ClickHouse/ClickHouse/pull/8612) [\#8591](https://github.com/ClickHouse/ClickHouse/pull/8591) ([阿利沙平](https://github.com/alesapin)) +- 删除旧 `ZSTD` 测试(它是在2016年创建的,以重现zstd1.0版本之前的错误)。 这修复 [\#8618](https://github.com/ClickHouse/ClickHouse/issues/8618). [\#8619](https://github.com/ClickHouse/ClickHouse/pull/8619) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定构建在Mac OS卡特琳娜。 [\#8600](https://github.com/ClickHouse/ClickHouse/pull/8600) ([meo](https://github.com/meob)) +- 增加编解码器性能测试中的行数,以使结果显着。 [\#8574](https://github.com/ClickHouse/ClickHouse/pull/8574) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 在调试版本中,处理 `LOGICAL_ERROR` 异常作为断言失败,使得它们更容易被注意到。 [\#8475](https://github.com/ClickHouse/ClickHouse/pull/8475) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 使与格式相关的性能测试更具确定性。 [\#8477](https://github.com/ClickHouse/ClickHouse/pull/8477) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新 `lz4` 来修复记忆消毒器的故障 [\#8181](https://github.com/ClickHouse/ClickHouse/pull/8181) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 在异常处理中抑制已知MemorySanitizer误报。 [\#8182](https://github.com/ClickHouse/ClickHouse/pull/8182) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 更新 `gcc` 和 `g++` 到版本9在 `build/docker/build.sh` [\#7766](https://github.com/ClickHouse/ClickHouse/pull/7766) ([TLightSky](https://github.com/tlightsky)) +- 添加性能测试用例来测试 `PREWHERE` 比 `WHERE`. [\#7768](https://github.com/ClickHouse/ClickHouse/pull/7768) ([阿莫斯鸟](https://github.com/amosbird)) +- 在修复一个笨拙的测试方面取得了进展。 [\#8621](https://github.com/ClickHouse/ClickHouse/pull/8621) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免从MemorySanitizer报告数据 `libunwind`. [\#8539](https://github.com/ClickHouse/ClickHouse/pull/8539) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新 `libc++` 到最新版本。 [\#8324](https://github.com/ClickHouse/ClickHouse/pull/8324) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从源头构建ICU库。 这修复 [\#6460](https://github.com/ClickHouse/ClickHouse/issues/6460). [\#8219](https://github.com/ClickHouse/ClickHouse/pull/8219) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从切换 `libressl` 到 `openssl`. ClickHouse应在此更改后支持TLS1.3和SNI。 这修复 [\#8171](https://github.com/ClickHouse/ClickHouse/issues/8171). [\#8218](https://github.com/ClickHouse/ClickHouse/pull/8218) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用时固定的UBSan报告 `chacha20_poly1305` 从SSL(发生在连接到https://yandex.ru/)。 [\#8214](https://github.com/ClickHouse/ClickHouse/pull/8214) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复默认密码文件的模式 `.deb` linux发行版。 [\#8075](https://github.com/ClickHouse/ClickHouse/pull/8075) ([proller](https://github.com/proller)) +- 改进的表达式获取 `clickhouse-server` PID输入 `clickhouse-test`. [\#8063](https://github.com/ClickHouse/ClickHouse/pull/8063) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 更新contrib/googletest到v1.10.0。 [\#8587](https://github.com/ClickHouse/ClickHouse/pull/8587) ([Alexander Burmak](https://github.com/Alex-Burmak)) +- 修复了ThreadSaninitizer报告 `base64` 图书馆. 还将此库更新到最新版本,但无关紧要。 这修复 [\#8397](https://github.com/ClickHouse/ClickHouse/issues/8397). [\#8403](https://github.com/ClickHouse/ClickHouse/pull/8403) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `00600_replace_running_query` 对于处理器。 [\#8272](https://github.com/ClickHouse/ClickHouse/pull/8272) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 删除支持 `tcmalloc` 为了使 `CMakeLists.txt` 更简单 [\#8310](https://github.com/ClickHouse/ClickHouse/pull/8310) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 发布海湾合作委员会构建现在使用 `libc++` 而不是 `libstdc++`. 最近 `libc++` 只与叮当一起使用。 这将提高构建配置的一致性和可移植性。 [\#8311](https://github.com/ClickHouse/ClickHouse/pull/8311) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用MemorySanitizer启用ICU库进行构建。 [\#8222](https://github.com/ClickHouse/ClickHouse/pull/8222) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 禁止从警告 `CapNProto` 图书馆. [\#8224](https://github.com/ClickHouse/ClickHouse/pull/8224) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除代码的特殊情况 `tcmalloc`,因为它不再受支持。 [\#8225](https://github.com/ClickHouse/ClickHouse/pull/8225) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在CI coverage任务中,优雅地终止服务器以允许它保存coverage报告。 这修复了我们最近看到的不完整的复盖率报告。 [\#8142](https://github.com/ClickHouse/ClickHouse/pull/8142) ([阿利沙平](https://github.com/alesapin)) +- 针对所有编解码器的性能测试 `Float64` 和 `UInt64` 值。 [\#8349](https://github.com/ClickHouse/ClickHouse/pull/8349) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- `termcap` 非常不推荐使用,并导致各种问题(f.g.missing “up” 帽和呼应 `^J` 而不是多行)。 帮个忙 `terminfo` 或bund绑 `ncurses`. [\#7737](https://github.com/ClickHouse/ClickHouse/pull/7737) ([阿莫斯鸟](https://github.com/amosbird)) +- 修复 `test_storage_s3` 集成测试。 [\#7734](https://github.com/ClickHouse/ClickHouse/pull/7734) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 碌莽禄Support: `StorageFile(, null)` 将块插入给定格式的文件而不实际写入磁盘。 这是性能测试所必需的。 [\#8455](https://github.com/ClickHouse/ClickHouse/pull/8455) ([阿莫斯鸟](https://github.com/amosbird)) +- 添加参数 `--print-time` 功能测试打印每个测试的执行时间。 [\#8001](https://github.com/ClickHouse/ClickHouse/pull/8001) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 添加断言 `KeyCondition` 同时评估RPN。 这将修复来自gcc-9的警告。 [\#8279](https://github.com/ClickHouse/ClickHouse/pull/8279) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在CI构建中转储cmake选项。 [\#8273](https://github.com/ClickHouse/ClickHouse/pull/8273) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 不要为某些fat库生成调试信息。 [\#8271](https://github.com/ClickHouse/ClickHouse/pull/8271) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 赂眉露\>\> `log_to_console.xml` 始终登录到stderr,无论它是否交互。 [\#8395](https://github.com/ClickHouse/ClickHouse/pull/8395) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 删除了一些未使用的功能 `clickhouse-performance-test` 工具 [\#8555](https://github.com/ClickHouse/ClickHouse/pull/8555) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在我们也将搜索 `lld-X` 与相应的 `clang-X` 版本。 [\#8092](https://github.com/ClickHouse/ClickHouse/pull/8092) ([阿利沙平](https://github.com/alesapin)) +- 实木复合地板建设改善。 [\#8421](https://github.com/ClickHouse/ClickHouse/pull/8421) ([马苏兰](https://github.com/maxulan)) +- 更多海湾合作委员会警告 [\#8221](https://github.com/ClickHouse/ClickHouse/pull/8221) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +- Arch Linux的软件包现在允许运行ClickHouse服务器,而不仅仅是客户端。 [\#8534](https://github.com/ClickHouse/ClickHouse/pull/8534) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复与处理器的测试。 微小的性能修复。 [\#7672](https://github.com/ClickHouse/ClickHouse/pull/7672) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 更新contrib/protobuf。 [\#8256](https://github.com/ClickHouse/ClickHouse/pull/8256) ([Matwey V.Kornilov](https://github.com/matwey)) +- 在准备切换到c++20作为新年庆祝活动。 “May the C++ force be with ClickHouse.” [\#8447](https://github.com/ClickHouse/ClickHouse/pull/8447) ([阿莫斯鸟](https://github.com/amosbird)) + +#### 实验特点 {#experimental-feature-1} + +- 增加了实验设置 `min_bytes_to_use_mmap_io`. 它允许读取大文件,而无需将数据从内核复制到用户空间。 默认情况下禁用该设置。 建议的阈值大约是64MB,因为mmap/munmap很慢。 [\#8520](https://github.com/ClickHouse/ClickHouse/pull/8520) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 返工配额作为访问控制系统的一部分。 增加了新表 `system.quotas`,新功能 `currentQuota`, `currentQuotaKey`,新的SQL语法 `CREATE QUOTA`, `ALTER QUOTA`, `DROP QUOTA`, `SHOW QUOTA`. [\#7257](https://github.com/ClickHouse/ClickHouse/pull/7257) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 允许跳过带有警告的未知设置,而不是引发异常。 [\#7653](https://github.com/ClickHouse/ClickHouse/pull/7653) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 重新设计的行策略作为访问控制系统的一部分。 增加了新表 `system.row_policies`,新功能 `currentRowPolicies()`,新的SQL语法 `CREATE POLICY`, `ALTER POLICY`, `DROP POLICY`, `SHOW CREATE POLICY`, `SHOW POLICIES`. [\#7808](https://github.com/ClickHouse/ClickHouse/pull/7808) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) + +#### 安全修复 {#security-fix} + +- 修正了读取目录结构中的表的可能性 `File` 表引擎。 这修复 [\#8536](https://github.com/ClickHouse/ClickHouse/issues/8536). [\#8537](https://github.com/ClickHouse/ClickHouse/pull/8537) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +## [更新日志2019](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2019.md) {#changelog-for-2019} diff --git a/docs/zh/commercial/cloud.md b/docs/zh/commercial/cloud.md index f096bdb92cf..765c352d098 100644 --- a/docs/zh/commercial/cloud.md +++ b/docs/zh/commercial/cloud.md @@ -1,20 +1,21 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 --- -# ClickHouse Cloud Service Providers {#clickhouse-cloud-service-providers} +# ツ环板Providersョツ嘉ッ {#clickhouse-cloud-service-providers} -!!! info "Info" - If you have launched a public cloud with managed ClickHouse service, feel free to [open a pull-request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/cloud.md) adding it to the following list. +!!! info "信息" + 如果您已经启动了带有托管ClickHouse服务的公共云,请随时 [打开拉取请求](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/cloud.md) 将其添加到以下列表。 -## Yandex Cloud {#yandex-cloud} +## Yandex云 {#yandex-cloud} -[Yandex Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse?utm_source=referrals&utm_medium=clickhouseofficialsite&utm_campaign=link3) provides the following key features: +[Yandex的ClickHouse托管服务](https://cloud.yandex.com/services/managed-clickhouse?utm_source=referrals&utm_medium=clickhouseofficialsite&utm_campaign=link3) 提供以下主要功能: -- Fully managed ZooKeeper service for [ClickHouse replication](../operations/table_engines/replication.md) -- Multiple storage type choices -- Replicas in different availability zones -- Encryption and isolation -- Automated maintenance +- 全面管理的动物园管理员服务 [ClickHouse复制](../engines/table_engines/mergetree_family/replication.md) +- 多种存储类型选择 +- 不同可用区中的副本 +- 加密和隔离 +- 自动化维护 -{## [Original article](https://clickhouse.tech/docs/en/commercial/cloud/) ##} +{## [原始文章](https://clickhouse.tech/docs/en/commercial/cloud/) ##} diff --git a/docs/zh/commercial/index.md b/docs/zh/commercial/index.md new file mode 100644 index 00000000000..ec704207201 --- /dev/null +++ b/docs/zh/commercial/index.md @@ -0,0 +1,9 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u5546\u4E1A" +toc_priority: 70 +toc_title: "\u5546\u4E1A" +--- + + diff --git a/docs/zh/data_types/datetime64.md b/docs/zh/data_types/datetime64.md deleted file mode 100644 index e28390bbdd4..00000000000 --- a/docs/zh/data_types/datetime64.md +++ /dev/null @@ -1,101 +0,0 @@ ---- -en_copy: true ---- - -# DateTime64 {#data_type-datetime64} - -Allows to store an instant in time, that can be expressed as a calendar date and a time of a day, with defined sub-second precision - -Tick size (precision): 10-precision seconds - -Syntax: - -``` sql -DateTime64(precision, [timezone]) -``` - -Internally, stores data as a number of ‘ticks’ since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01.000’). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](datetime.md). - -## Examples {#examples} - -**1.** Creating a table with `DateTime64`-type column and inserting data into it: - -``` sql -CREATE TABLE dt -( - `timestamp` DateTime64(3, 'Europe/Moscow'), - `event_id` UInt8 -) -ENGINE = TinyLog -``` - -``` sql -INSERT INTO dt Values (1546300800000, 1), ('2019-01-01 00:00:00', 2) -``` - -``` sql -SELECT * FROM dt -``` - -``` text -┌───────────────timestamp─┬─event_id─┐ -│ 2019-01-01 03:00:00.000 │ 1 │ -│ 2019-01-01 00:00:00.000 │ 2 │ -└─────────────────────────┴──────────┘ -``` - -- When inserting datetime as an integer, it is treated as an appropriately scaled Unix Timestamp (UTC). `1546300800000` (with precision 3) represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Europe/Moscow` (UTC+3) timezone specified, when outputting as a string the value will be shown as `'2019-01-01 03:00:00'` -- When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Europe/Moscow` timezone and stored as `1546290000000`. - -**2.** Filtering on `DateTime64` values - -``` sql -SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow') -``` - -``` text -┌───────────────timestamp─┬─event_id─┐ -│ 2019-01-01 00:00:00.000 │ 2 │ -└─────────────────────────┴──────────┘ -``` - -Unlike `DateTime`, `DateTime64` values are not converted from `String` automatically - -**3.** Getting a time zone for a `DateTime64`-type value: - -``` sql -SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS x -``` - -``` text -┌──────────────────column─┬─x──────────────────────────────┐ -│ 2019-10-16 04:12:04.000 │ DateTime64(3, 'Europe/Moscow') │ -└─────────────────────────┴────────────────────────────────┘ -``` - -**4.** Timezone conversion - -``` sql -SELECT -toDateTime64(timestamp, 3, 'Europe/London') as lon_time, -toDateTime64(timestamp, 3, 'Europe/Moscow') as mos_time -FROM dt -``` - -``` text -┌───────────────lon_time──┬────────────────mos_time─┐ -│ 2019-01-01 00:00:00.000 │ 2019-01-01 03:00:00.000 │ -│ 2018-12-31 21:00:00.000 │ 2019-01-01 00:00:00.000 │ -└─────────────────────────┴─────────────────────────┘ -``` - -## See Also {#see-also} - -- [Type conversion functions](../query_language/functions/type_conversion_functions.md) -- [Functions for working with dates and times](../query_language/functions/date_time_functions.md) -- [Functions for working with arrays](../query_language/functions/array_functions.md) -- [The `date_time_input_format` setting](../operations/settings/settings.md#settings-date_time_input_format) -- [The `timezone` server configuration parameter](../operations/server_settings/settings.md#server_settings-timezone) -- [Operators for working with dates and times](../query_language/operators.md#operators-datetime) -- [`Date` data type](date.md) -- [`DateTime` data type](datetime.md) diff --git a/docs/zh/data_types/int_uint.md b/docs/zh/data_types/int_uint.md deleted file mode 100644 index 4e01ad017ca..00000000000 --- a/docs/zh/data_types/int_uint.md +++ /dev/null @@ -1,17 +0,0 @@ -# UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64 {#uint8-uint16-uint32-uint64-int8-int16-int32-int64} - -固定长度的整型,包括有符号整型或无符号整型。 - -## 整型范围 {#zheng-xing-fan-wei} - -- Int8 - \[-128 : 127\] -- Int16 - \[-32768 : 32767\] -- Int32 - \[-2147483648 : 2147483647\] -- Int64 - \[-9223372036854775808 : 9223372036854775807\] - -## 无符号整型范围 {#wu-fu-hao-zheng-xing-fan-wei} - -- UInt8 - \[0 : 255\] -- UInt16 - \[0 : 65535\] -- UInt32 - \[0 : 4294967295\] -- UInt64 - \[0 : 18446744073709551615\] diff --git a/docs/zh/data_types/uuid.md b/docs/zh/data_types/uuid.md deleted file mode 100644 index 4546be19371..00000000000 --- a/docs/zh/data_types/uuid.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -en_copy: true ---- - -# UUID {#uuid-data-type} - -A universally unique identifier (UUID) is a 16-byte number used to identify records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). - -The example of UUID type value is represented below: - -``` text -61f0c404-5cb3-11e7-907b-a6006ad3dba0 -``` - -If you do not specify the UUID column value when inserting a new record, the UUID value is filled with zero: - -``` text -00000000-0000-0000-0000-000000000000 -``` - -## How to generate {#how-to-generate} - -To generate the UUID value, ClickHouse provides the [generateUUIDv4](../query_language/functions/uuid_functions.md) function. - -## Usage example {#usage-example} - -**Example 1** - -This example demonstrates creating a table with the UUID type column and inserting a value into the table. - -``` sql -CREATE TABLE t_uuid (x UUID, y String) ENGINE=TinyLog -``` - -``` sql -INSERT INTO t_uuid SELECT generateUUIDv4(), 'Example 1' -``` - -``` sql -SELECT * FROM t_uuid -``` - -``` text -┌────────────────────────────────────x─┬─y─────────┐ -│ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ -└──────────────────────────────────────┴───────────┘ -``` - -**Example 2** - -In this example, the UUID column value is not specified when inserting a new record. - -``` sql -INSERT INTO t_uuid (y) VALUES ('Example 2') -``` - -``` sql -SELECT * FROM t_uuid -``` - -``` text -┌────────────────────────────────────x─┬─y─────────┐ -│ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ -│ 00000000-0000-0000-0000-000000000000 │ Example 2 │ -└──────────────────────────────────────┴───────────┘ -``` - -## Restrictions {#restrictions} - -The UUID data type only supports functions which [String](string.md) data type also supports (for example, [min](../query_language/agg_functions/reference.md#agg_function-min), [max](../query_language/agg_functions/reference.md#agg_function-max), and [count](../query_language/agg_functions/reference.md#agg_function-count)). - -The UUID data type is not supported by arithmetic operations (for example, [abs](../query_language/functions/arithmetic_functions.md#arithm_func-abs)) or aggregate functions, such as [sum](../query_language/agg_functions/reference.md#agg_function-sum) and [avg](../query_language/agg_functions/reference.md#agg_function-avg). - -[Original article](https://clickhouse.tech/docs/en/data_types/uuid/) diff --git a/docs/zh/database_engines/lazy.md b/docs/zh/database_engines/lazy.md deleted file mode 100644 index 45c5fd602d7..00000000000 --- a/docs/zh/database_engines/lazy.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -en_copy: true ---- - -# Lazy {#lazy} - -Keeps tables in RAM only `expiration_time_in_seconds` seconds after last access. Can be used only with \*Log tables. - -It’s optimized for storing many small \*Log tables, for which there is a long time interval between accesses. - -## Creating a Database {#creating-a-database} - - CREATE DATABASE testlazy ENGINE = Lazy(expiration_time_in_seconds); - -[Original article](https://clickhouse.tech/docs/en/database_engines/lazy/) diff --git a/docs/zh/development/architecture.md b/docs/zh/development/architecture.md index 22eaaf583d6..66d5cac13b5 100644 --- a/docs/zh/development/architecture.md +++ b/docs/zh/development/architecture.md @@ -1,3 +1,4 @@ + # ClickHouse 架构概述 {#clickhouse-jia-gou-gai-shu} ClickHouse 是一个真正的列式数据库管理系统(DBMS)。在 ClickHouse 中,数据始终是按列存储的,包括矢量(向量或列块)执行的过程。只要有可能,操作都是基于矢量进行分派的,而不是单个的值,这被称为«矢量化查询执行»,它有利于降低实际的数据处理开销。 @@ -12,7 +13,7 @@ ClickHouse 是一个真正的列式数据库管理系统(DBMS)。在 ClickHous 不同的 `IColumn` 实现(`ColumnUInt8`、`ColumnString` 等)负责不同的列内存布局。内存布局通常是一个连续的数组。对于数据类型为整型的列,只是一个连续的数组,比如 `std::vector`。对于 `String` 列和 `Array` 列,则由两个向量组成:其中一个向量连续存储所有的 `String` 或数组元素,另一个存储每一个 `String` 或 `Array` 的起始元素在第一个向量中的偏移。而 `ColumnConst` 则仅在内存中存储一个值,但是看起来像一个列。 -## Field {#field} +## 字段 {#field} 尽管如此,有时候也可能需要处理单个值。表示单个值,可以使用 `Field`。`Field` 是 `UInt64`、`Int64`、`Float64`、`String` 和 `Array` 组成的联合。`IColumn` 拥有 `operator[]` 方法来获取第 `n` 个值成为一个 `Field`,同时也拥有 `insert` 方法将一个 `Field` 追加到一个列的末尾。这些方法并不高效,因为它们需要处理表示单一值的临时 `Field` 对象,但是有更高效的方法比如 `insertFrom` 和 `insertRangeFrom` 等。 @@ -115,7 +116,7 @@ ClickHouse 是一个真正的列式数据库管理系统(DBMS)。在 ClickHous 普通函数不会改变行数 - 它们的执行看起来就像是独立地处理每一行数据。实际上,函数不会作用于一个单独的行上,而是作用在以 `Block` 为单位的数据上,以实现向量查询执行。 -还有一些杂项函数,比如 [blockSize](../query_language/functions/other_functions.md#function-blocksize)、[rowNumberInBlock](../query_language/functions/other_functions.md#function-rownumberinblock),以及 [runningAccumulate](../query_language/functions/other_functions.md#function-runningaccumulate),它们对块进行处理,并且不遵从行的独立性。 +还有一些杂项函数,比如 [块大小](../sql_reference/functions/other_functions.md#function-blocksize)、[rowNumberInBlock](../sql_reference/functions/other_functions.md#function-rownumberinblock),以及 [跑累积](../sql_reference/functions/other_functions.md#function-runningaccumulate),它们对块进行处理,并且不遵从行的独立性。 ClickHouse 具有强类型,因此隐式类型转换不会发生。如果函数不支持某个特定的类型组合,则会抛出异常。但函数可以通过重载以支持许多不同的类型组合。比如,`plus` 函数(用于实现 `+` 运算符)支持任意数字类型的组合:`UInt8` + `Float32`,`UInt16` + `Int8` 等。同时,一些可变参数的函数能够级接收任意数目的参数,比如 `concat` 函数。 @@ -159,7 +160,7 @@ ClickHouse 具有强类型,因此隐式类型转换不会发生。如果函数 分布式查询执行没有全局查询计划。每个节点都有针对自己的工作部分的本地查询计划。我们仅有简单的一次性分布式查询执行:将查询发送给远程节点,然后合并结果。但是对于具有高基数的 `GROUP BY` 或具有大量临时数据的 `JOIN` 这样困难的查询的来说,这是不可行的:在这种情况下,我们需要在服务器之间«改组»数据,这需要额外的协调。ClickHouse 不支持这类查询执行,我们需要在这方面进行努力。 -## Merge Tree {#merge-tree} +## 合并树 {#merge-tree} `MergeTree` 是一系列支持按主键索引的存储引擎。主键可以是一个任意的列或表达式的元组。`MergeTree` 表中的数据存储于«分块»中。每一个分块以主键序存储数据(数据按主键元组的字典序排序)。表的所有列都存储在这些«分块»中分离的 `column.bin` 文件中。`column.bin` 文件由压缩块组成,每一个块通常是 64 KB 到 1 MB 大小的未压缩数据,具体取决于平均值大小。这些块由一个接一个连续放置的列值组成。每一列的列值顺序相同(顺序由主键定义),因此当你按多列进行迭代时,你能够得到相应列的值。 diff --git a/docs/zh/development/browse_code.md b/docs/zh/development/browse_code.md index c3016d5e1dc..10d3ffecd15 100644 --- a/docs/zh/development/browse_code.md +++ b/docs/zh/development/browse_code.md @@ -1,11 +1,14 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 63 +toc_title: "\u6D4F\u89C8ClickHouse\u6E90\u4EE3\u7801" --- -# Browse ClickHouse Source Code {#browse-clickhouse-source-code} +# 浏览ClickHouse源代码 {#browse-clickhouse-source-code} -You can use **Woboq** online code browser available [here](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. +您可以使用 **Woboq** 在线代码浏览器可用 [这里](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/src/index.html). 它提供了代码导航和语义突出显示,搜索和索引。 代码快照每天更新。 -Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. +此外,您还可以浏览源 [GitHub](https://github.com/ClickHouse/ClickHouse) 像往常一样 -If you’re interested what IDE to use, we recommend CLion, QT Creator, VS Code and KDevelop (with caveats). You can use any favourite IDE. Vim and Emacs also count. +如果你有兴趣使用什么样的IDE,我们建议CLion,QT Creator,VS Code和KDevelop(有注意事项)。 您可以使用任何喜欢的IDE。 Vim和Emacs也算数。 diff --git a/docs/zh/development/build.md b/docs/zh/development/build.md index 6a46d6f2cc7..05581985a35 100644 --- a/docs/zh/development/build.md +++ b/docs/zh/development/build.md @@ -1,3 +1,4 @@ + # 如何构建 ClickHouse 发布包 {#ru-he-gou-jian-clickhouse-fa-bu-bao} ## 安装 Git 和 Pbuilder {#an-zhuang-git-he-pbuilder} @@ -32,12 +33,12 @@ cd ClickHouse sudo apt-get install git cmake ninja-build ``` -Or cmake3 instead of cmake on older systems. +或cmake3而不是旧系统上的cmake。 或者在早期版本的系统中用 cmake3 替代 cmake ## 安装 GCC 9 {#an-zhuang-gcc-9} -There are several ways to do this. +有几种方法可以做到这一点。 ### 安装 PPA 包 {#an-zhuang-ppa-bao} @@ -79,6 +80,6 @@ cd .. ``` 若要创建一个执行文件, 执行 `ninja clickhouse`。 -这个命令会使得 `programs/clickhouse` 文件可执行,您可以使用 `client` or `server` 参数运行。 +这个命令会使得 `programs/clickhouse` 文件可执行,您可以使用 `client` 或 `server` 参数运行。 [来源文章](https://clickhouse.tech/docs/en/development/build/) diff --git a/docs/zh/development/build_cross_arm.md b/docs/zh/development/build_cross_arm.md index 0936a3133b2..1061fddfacd 100644 --- a/docs/zh/development/build_cross_arm.md +++ b/docs/zh/development/build_cross_arm.md @@ -1,17 +1,21 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 67 +toc_title: "\u5982\u4F55\u5728Linux\u4E0A\u6784\u5EFAClickHouse for AARCH64\uFF08\ + ARM64)" --- -# How to Build ClickHouse on Linux for AARCH64 (ARM64) architecture {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} +# 如何在Linux上为AARCH64(ARM64)架构构建ClickHouse {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture} -This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. +这是当你有Linux机器,并希望使用它来构建的情况下 `clickhouse` 二进制文件将运行在另一个Linux机器上与AARCH64CPU架构。 这适用于在Linux服务器上运行的持续集成检查。 -The cross-build for AARCH64 is based on the [Build instructions](build.md), follow them first. +Aarch64的交叉构建基于 [构建说明](build.md) 先跟着他们 -# Install Clang-8 {#install-clang-8} +# 安装Clang-8 {#install-clang-8} -Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. -For example, in Ubuntu Bionic you can use the following commands: +按照以下说明操作https://apt.llvm.org/为您的Ubuntu或Debian设置. +例如,在Ubuntu Bionic中,您可以使用以下命令: ``` bash echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" | sudo tee /etc/apt/sources.list.d/llvm.list @@ -19,7 +23,7 @@ sudo apt-get update sudo apt-get install clang-8 ``` -# Install Cross-Compilation Toolset {#install-cross-compilation-toolset} +# 安装交叉编译工具集 {#install-cross-compilation-toolset} ``` bash cd ClickHouse @@ -28,7 +32,7 @@ wget 'https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cmake/toolchain/linux-aarch64 --strip-components=1 ``` -# Build ClickHouse {#build-clickhouse} +# 建立ClickHouse {#build-clickhouse} ``` bash cd ClickHouse @@ -37,4 +41,4 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-arm64 -DCMAKE_TOOLCHAIN_FILE=cmake/linu ninja -C build-arm64 ``` -The resulting binary will run only on Linux with the AARCH64 CPU architecture. +生成的二进制文件将仅在具有AARCH64CPU体系结构的Linux上运行。 diff --git a/docs/zh/development/build_cross_osx.md b/docs/zh/development/build_cross_osx.md index 20577d1213a..c74ff934c0d 100644 --- a/docs/zh/development/build_cross_osx.md +++ b/docs/zh/development/build_cross_osx.md @@ -1,10 +1,11 @@ + # 如何在Linux中编译Mac OS X ClickHouse {#ru-he-zai-linuxzhong-bian-yi-mac-os-x-clickhouse} Linux机器也可以编译运行在OS X系统的`clickhouse`二进制包,这可以用于在Linux上跑持续集成测试。如果要在Mac OS X上直接构建ClickHouse,请参考另外一篇指南: https://clickhouse.tech/docs/zh/development/build\_osx/ Mac OS X的交叉编译基于以下构建说明,请首先遵循它们。 -# Install Clang-8 {#install-clang-8} +# 安装Clang-8 {#install-clang-8} 按照https://apt.llvm.org/中的说明进行Ubuntu或Debian安装。 例如,安装Bionic的命令如下: diff --git a/docs/zh/development/build_osx.md b/docs/zh/development/build_osx.md index e471b716a33..0c1c840912e 100644 --- a/docs/zh/development/build_osx.md +++ b/docs/zh/development/build_osx.md @@ -1,3 +1,4 @@ + # 在 Mac OS X 中编译 ClickHouse {#zai-mac-os-x-zhong-bian-yi-clickhouse} ClickHouse 支持在 Mac OS X 10.12 版本中编译。若您在用更早的操作系统版本,可以尝试在指令中使用 `Gentoo Prefix` 和 `clang sl`. @@ -43,7 +44,7 @@ cd .. 为此,请创建以下文件: -/Library/LaunchDaemons/limit.maxfiles.plist: +/图书馆/LaunchDaemons/限制.maxfilesplist: ``` xml diff --git a/docs/zh/development/contrib.md b/docs/zh/development/contrib.md index 5491cc76f6f..e282856c0e8 100644 --- a/docs/zh/development/contrib.md +++ b/docs/zh/development/contrib.md @@ -1,34 +1,35 @@ + # 使用的三方库 {#shi-yong-de-san-fang-ku} -| Library | License | -|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------| -| base64 | [BSD 2-Clause License](https://github.com/aklomp/base64/blob/a27c565d1b6c676beaf297fe503c4518185666f7/LICENSE) | -| boost | [Boost Software License 1.0](https://github.com/ClickHouse-Extras/boost-extra/blob/6883b40449f378019aec792f9983ce3afc7ff16e/LICENSE_1_0.txt) | -| brotli | [MIT](https://github.com/google/brotli/blob/master/LICENSE) | -| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/master/LICENSE) | -| cctz | [Apache License 2.0](https://github.com/google/cctz/blob/4f9776a310f4952454636363def82c2bf6641d5f/LICENSE.txt) | -| double-conversion | [BSD 3-Clause License](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) | -| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) | -| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) | -| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) | -| libbtrie | [BSD 2-Clause License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | -| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | -| libdivide | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | -| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | -| libhdfs3 | [Apache License 2.0](https://github.com/ClickHouse-Extras/libhdfs3/blob/bd6505cbb0c130b0db695305b9a38546fa880e5a/LICENSE.txt) | -| libmetrohash | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) | -| libpcg-random | [Apache License 2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libpcg-random/LICENSE-APACHE.txt) | -| libressl | [OpenSSL License](https://github.com/ClickHouse-Extras/ssl/blob/master/COPYING) | -| librdkafka | [BSD 2-Clause License](https://github.com/edenhill/librdkafka/blob/363dcad5a23dc29381cc626620e68ae418b3af19/LICENSE) | -| libwidechar\_width | [CC0 1.0 Universal](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libwidechar_width/LICENSE) | -| llvm | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/llvm/blob/163def217817c90fb982a6daf384744d8472b92b/llvm/LICENSE.TXT) | -| lz4 | [BSD 2-Clause License](https://github.com/lz4/lz4/blob/c10863b98e1503af90616ae99725ecd120265dfb/LICENSE) | -| mariadb-connector-c | [LGPL v2.1](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/3.1/COPYING.LIB) | -| murmurhash | [Public Domain](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE) | -| pdqsort | [Zlib License](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/pdqsort/license.txt) | -| poco | [Boost Software License - Version 1.0](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE) | -| protobuf | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE) | -| re2 | [BSD 3-Clause License](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE) | -| UnixODBC | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168) | -| zlib-ng | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md) | -| zstd | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE) | +| 图书馆 | 许可 | +|--------------------|-------------------------------------------------------------------------------------------------------------------------------------| +| base64 | [BSD2-条款许可](https://github.com/aklomp/base64/blob/a27c565d1b6c676beaf297fe503c4518185666f7/LICENSE) | +| 升压 | [提升软件许可证1.0](https://github.com/ClickHouse-Extras/boost-extra/blob/6883b40449f378019aec792f9983ce3afc7ff16e/LICENSE_1_0.txt) | +| brotli | [MIT](https://github.com/google/brotli/blob/master/LICENSE) | +| capnproto | [MIT](https://github.com/capnproto/capnproto/blob/master/LICENSE) | +| cctz | [Apache许可证2.0](https://github.com/google/cctz/blob/4f9776a310f4952454636363def82c2bf6641d5f/LICENSE.txt) | +| 双转换 | [BSD3-条款许可](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) | +| FastMemcpy | [MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) | +| googletest | [BSD3-条款许可](https://github.com/google/googletest/blob/master/LICENSE) | +| 超扫描 | [BSD3-条款许可](https://github.com/intel/hyperscan/blob/master/LICENSE) | +| libbtrie | [BSD2-条款许可](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libbtrie/LICENSE) | +| libcxxabi | [BSD + MIT](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) | +| libdivide | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) | +| libgsasl | [LGPL v2.1](https://github.com/ClickHouse-Extras/libgsasl/blob/3b8948a4042e34fb00b4fb987535dc9e02e39040/LICENSE) | +| libhdfs3 | [Apache许可证2.0](https://github.com/ClickHouse-Extras/libhdfs3/blob/bd6505cbb0c130b0db695305b9a38546fa880e5a/LICENSE.txt) | +| libmetrohash | [Apache许可证2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libmetrohash/LICENSE) | +| libpcg-随机 | [Apache许可证2.0](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/libpcg-random/LICENSE-APACHE.txt) | +| libressl | [OpenSSL许可证](https://github.com/ClickHouse-Extras/ssl/blob/master/COPYING) | +| librdkafka | [BSD2-条款许可](https://github.com/edenhill/librdkafka/blob/363dcad5a23dc29381cc626620e68ae418b3af19/LICENSE) | +| libwidechar\_width | [CC0 1.0通用](https://github.com/ClickHouse/ClickHouse/blob/master/libs/libwidechar_width/LICENSE) | +| llvm | [BSD3-条款许可](https://github.com/ClickHouse-Extras/llvm/blob/163def217817c90fb982a6daf384744d8472b92b/llvm/LICENSE.TXT) | +| lz4 | [BSD2-条款许可](https://github.com/lz4/lz4/blob/c10863b98e1503af90616ae99725ecd120265dfb/LICENSE) | +| mariadb-连接器-c | [LGPL v2.1](https://github.com/ClickHouse-Extras/mariadb-connector-c/blob/3.1/COPYING.LIB) | +| murmurhash | [公共领域](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/murmurhash/LICENSE) | +| pdqsort | [Zlib许可证](https://github.com/ClickHouse/ClickHouse/blob/master/contrib/pdqsort/license.txt) | +| poco | [提升软件许可证-1.0版](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE) | +| protobuf | [BSD3-条款许可](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE) | +| re2 | [BSD3-条款许可](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE) | +| UnixODBC | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168) | +| zlib-ng | [Zlib许可证](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md) | +| zstd | [BSD3-条款许可](https://github.com/facebook/zstd/blob/dev/LICENSE) | diff --git a/docs/zh/development/developer_instruction.md b/docs/zh/development/developer_instruction.md index 6d865afb2c4..f39ab665ee6 100644 --- a/docs/zh/development/developer_instruction.md +++ b/docs/zh/development/developer_instruction.md @@ -1,3 +1,4 @@ + ClickHose支持Linux,FreeBSD 及 Mac OS X 系统。 # Windows使用指引 {#windowsshi-yong-zhi-yin} @@ -67,9 +68,9 @@ ClickHose支持Linux,FreeBSD 及 Mac OS X 系统。 命令执行成功后,可以通过执行`git pull upstream master`,从ClickHouse的主分支中拉去更新。 -## Working with submodules {#working-with-submodules} +## 使用子模块 {#working-with-submodules} -Working with submodules in git could be painful. Next commands will help to manage it: +在git中使用子模块可能会很痛苦。 接下来的命令将有助于管理它: # ! each command accepts --recursive # Update remote URLs for submodules. Barely rare case @@ -81,7 +82,7 @@ Working with submodules in git could be painful. Next commands will help to mana # Two last commands could be merged together git submodule update --init -The next commands would help you to reset all submodules to the initial state (!WARING! - any chenges inside will be deleted): +接下来的命令将帮助您将所有子模块重置为初始状态(!华林! -里面的任何chenges将被删除): # Synchronizes submodules' remote URL with .gitmodules git submodule sync --recursive diff --git a/docs/zh/development/index.md b/docs/zh/development/index.md index 187ee1b3e25..cf3b2fae1d9 100644 --- a/docs/zh/development/index.md +++ b/docs/zh/development/index.md @@ -1,3 +1,4 @@ + # ClickHouse 开发 {#clickhouse-kai-fa} [来源文章](https://clickhouse.tech/docs/en/development/) diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md index 4d374f9b2e8..10c036fef3b 100644 --- a/docs/zh/development/style.md +++ b/docs/zh/development/style.md @@ -1,3 +1,4 @@ + # 如何编写 C++ 代码 {#ru-he-bian-xie-c-dai-ma} ## 一般建议 {#yi-ban-jian-yi} @@ -200,7 +201,7 @@ std::cerr << static_cast(c) << std::endl; for (Names::const_iterator it = column_names.begin(); it != column_names.end(); ++it) ``` -## Comments {#comments} +## 评论 {#comments} **1.** 请务必为所有非常重要的代码部分添加注释。 @@ -297,7 +298,7 @@ void executeQuery( /// for ``` -## Names {#names} +## 姓名 {#names} **1.** 在变量和类成员的名称中使用带下划线的小写字母。 @@ -623,7 +624,7 @@ Loader() {} **18.** 编码。 -在所有情况下使用 UTF-8 编码。使用 `std::string` and `char *`。不要使用 `std::wstring` 和 `wchar_t`。 +在所有情况下使用 UTF-8 编码。使用 `std::string` 和 `char *`。不要使用 `std::wstring` 和 `wchar_t`。 **19.** 日志。 diff --git a/docs/zh/development/tests.md b/docs/zh/development/tests.md index b3a3468e31c..f54e273a77a 100644 --- a/docs/zh/development/tests.md +++ b/docs/zh/development/tests.md @@ -1,3 +1,4 @@ + # ClickHouse 测试 {#clickhouse-ce-shi} ## 功能性测试 {#gong-neng-xing-ce-shi} @@ -14,7 +15,7 @@ 调用功能测试最简单的方法是将 `clickhouse-client` 复制到`/usr/bin/`,运行`clickhouse-server`,然后从自己的目录运行`./ clickhouse-test`。 -要添加新测试,请在 `tests/queries/0_stateless` 目录内添加新的 `.sql` 或 `.sh` 文件,手动检查,然后按以下方式生成 `.reference` 文件: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`。 +要添加新测试,请在 `tests/queries/0_stateless` 目录内添加新的 `.sql` 或 `.sh` 文件,手动检查,然后按以下方式生成 `.reference` 文件: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` 或 `./00000_test.sh > ./00000_test.reference`。 测试应该只使用(创建,删除等)`test` 数据库中的表,这些表假定是事先创建的; 测试也可以使用临时表。 @@ -152,24 +153,24 @@ Clang 有更多有用的警告 - 您可以使用 `-Weverything` 查找它们并 对于生产构建,使用 gcc(它仍然生成比 clang 稍高效的代码)。对于开发来说,clang 通常更方便使用。您可以使用调试模式在自己的机器上构建(以节省笔记本电脑的电量),但请注意,由于更好的控制流程和过程分析,编译器使用 `-O3` 会生成更多警告。 当使用 clang 构建时,使用 `libc++` 而不是 `libstdc++`,并且在使用调试模式构建时,使用调试版本的 `libc++`,它允许在运行时捕获更多错误。 -## Sanitizers {#sanitizers} +## 消毒剂 {#sanitizers} -**Address sanitizer**. +**地址消毒剂**. 我们在每个提交的基础上在 ASan 下运行功能和集成测试。 -**Valgrind (Memcheck)**. +**ツ暗ェツ氾环催ツ団ツ法ツ人)**. 我们在 Valgrind 过夜进行功能测试。 这需要几个小时。 目前在 `re2` 库中有一个已知的误报,请参阅 [文章](https://research.swtch.com/sparse)。 -**Thread sanitizer**. +**螺纹消毒剂**. 我们在 TSan 下进行功能测试。ClickHouse 必须通过所有测试。在 TSan 下运行不是自动化的,只是偶尔执行。 -**Memory sanitizer**. +**记忆消毒剂**. 目前我们不使用 MSan。 -**Undefined behaviour sanitizer.** +**未定义的行为消毒剂。** 我们仍然不会在每次提交的基础上使用 UBSan。 有一些地方需要解决。 -**Debug allocator.** +**调试分alloc。** 您可以使用 `DEBUG_TCMALLOC` CMake 选项启用 `tcmalloc` 的调试版本。我们在每次提交的基础上使用调试分配器运行测试。 更多请参阅 `tests/instructions/sanitizers.txt`。 diff --git a/docs/zh/database_engines/index.md b/docs/zh/engines/database_engines/index.md similarity index 69% rename from docs/zh/database_engines/index.md rename to docs/zh/engines/database_engines/index.md index 95c7ea2c319..2431b96a43d 100644 --- a/docs/zh/database_engines/index.md +++ b/docs/zh/engines/database_engines/index.md @@ -1,8 +1,9 @@ + # 数据库引擎 {#shu-ju-ku-yin-qing} 您使用的所有表都是由数据库引擎所提供的 -默认情况下,ClickHouse使用自己的数据库引擎,该引擎提供可配置的[表引擎](../operations/table_engines/index.md)和[所有支持的SQL语法](../query_language/syntax.md). +默认情况下,ClickHouse使用自己的数据库引擎,该引擎提供可配置的[表引擎](../../engines/database_engines/index.md)和[所有支持的SQL语法](../../engines/database_engines/index.md). 除此之外,您还可以选择使用以下的数据库引擎: diff --git a/docs/zh/engines/database_engines/lazy.md b/docs/zh/engines/database_engines/lazy.md new file mode 100644 index 00000000000..6b094c8793d --- /dev/null +++ b/docs/zh/engines/database_engines/lazy.md @@ -0,0 +1,18 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 31 +toc_title: "\u61D2\u60F0" +--- + +# 懒惰 {#lazy} + +仅将表保留在RAM中 `expiration_time_in_seconds` 上次访问后几秒钟。 只能与\*日志表一起使用。 + +它针对存储许多小\*日志表进行了优化,访问之间存在较长的时间间隔。 + +## 创建数据库 {#creating-a-database} + + CREATE DATABASE testlazy ENGINE = Lazy(expiration_time_in_seconds); + +[原始文章](https://clickhouse.tech/docs/en/database_engines/lazy/) diff --git a/docs/zh/database_engines/mysql.md b/docs/zh/engines/database_engines/mysql.md similarity index 61% rename from docs/zh/database_engines/mysql.md rename to docs/zh/engines/database_engines/mysql.md index 9467269a2cc..80ff82ec2d3 100644 --- a/docs/zh/database_engines/mysql.md +++ b/docs/zh/engines/database_engines/mysql.md @@ -1,3 +1,4 @@ + # MySQL {#mysql} MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并允许您对表进行`INSERT`和`SELECT`查询,以方便您在ClickHouse与MySQL之间进行数据交换。 @@ -6,8 +7,6 @@ MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并 但您无法对其执行以下操作: -- `ATTACH`/`DETACH` -- `DROP` - `RENAME` - `CREATE TABLE` - `ALTER` @@ -16,7 +15,7 @@ MySQL引擎用于将远程的MySQL服务器中的表映射到ClickHouse中,并 ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MySQL('host:port', 'database', 'user', 'password') +ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') ``` **MySQL数据库引擎参数** @@ -28,25 +27,25 @@ ENGINE = MySQL('host:port', 'database', 'user', 'password') ## 支持的类型对应 {#zhi-chi-de-lei-xing-dui-ying} -| MySQL | ClickHouse | -|----------------------------------|---------------------------------------------| -| UNSIGNED TINYINT | [UInt8](../data_types/int_uint.md) | -| TINYINT | [Int8](../data_types/int_uint.md) | -| UNSIGNED SMALLINT | [UInt16](../data_types/int_uint.md) | -| SMALLINT | [Int16](../data_types/int_uint.md) | -| UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../data_types/int_uint.md) | -| INT, MEDIUMINT | [Int32](../data_types/int_uint.md) | -| UNSIGNED BIGINT | [UInt64](../data_types/int_uint.md) | -| BIGINT | [Int64](../data_types/int_uint.md) | -| FLOAT | [Float32](../data_types/float.md) | -| DOUBLE | [Float64](../data_types/float.md) | -| DATE | [Date](../data_types/date.md) | -| DATETIME, TIMESTAMP | [DateTime](../data_types/datetime.md) | -| BINARY | [FixedString](../data_types/fixedstring.md) | +| MySQL | ClickHouse | +|----------------------------------|-------------------------------------------------------------| +| UNSIGNED TINYINT | [UInt8](../../sql_reference/data_types/int_uint.md) | +| TINYINT | [Int8](../../sql_reference/data_types/int_uint.md) | +| UNSIGNED SMALLINT | [UInt16](../../sql_reference/data_types/int_uint.md) | +| SMALLINT | [Int16](../../sql_reference/data_types/int_uint.md) | +| UNSIGNED INT, UNSIGNED MEDIUMINT | [UInt32](../../sql_reference/data_types/int_uint.md) | +| INT, MEDIUMINT | [Int32](../../sql_reference/data_types/int_uint.md) | +| UNSIGNED BIGINT | [UInt64](../../sql_reference/data_types/int_uint.md) | +| BIGINT | [Int64](../../sql_reference/data_types/int_uint.md) | +| FLOAT | [Float32](../../sql_reference/data_types/float.md) | +| DOUBLE | [Float64](../../sql_reference/data_types/float.md) | +| DATE | [日期](../../sql_reference/data_types/date.md) | +| DATETIME, TIMESTAMP | [日期时间](../../sql_reference/data_types/datetime.md) | +| BINARY | [固定字符串](../../sql_reference/data_types/fixedstring.md) | -其他的MySQL数据类型将全部都转换为[String](../data_types/string.md)。 +其他的MySQL数据类型将全部都转换为[字符串](../../sql_reference/data_types/string.md)。 -同时以上的所有类型都支持[Nullable](../data_types/nullable.md)。 +同时以上的所有类型都支持[可为空](../../sql_reference/data_types/nullable.md)。 ## 使用示例 {#shi-yong-shi-li} diff --git a/docs/zh/engines/index.md b/docs/zh/engines/index.md new file mode 100644 index 00000000000..41d2a7e3d8d --- /dev/null +++ b/docs/zh/engines/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u53D1\u52A8\u673A" +toc_priority: 25 +--- + + diff --git a/docs/zh/operations/table_engines/index.md b/docs/zh/engines/table_engines/index.md similarity index 50% rename from docs/zh/operations/table_engines/index.md rename to docs/zh/engines/table_engines/index.md index 6a3a752561c..9603ebe78c8 100644 --- a/docs/zh/operations/table_engines/index.md +++ b/docs/zh/engines/table_engines/index.md @@ -1,3 +1,4 @@ + # 表引擎 {#biao-yin-qing} 表引擎(即表的类型)决定了: @@ -13,54 +14,54 @@ ## MergeTree {#mergetree} -适用于高负载任务的最通用和功能最强大的表引擎。这些引擎的共同特点是可以快速插入数据并进行后续的后台数据处理。 MergeTree系列引擎支持数据复制(使用[Replicated\*](replication.md) 的引擎版本),分区和一些其他引擎不支持的其他功能。 +适用于高负载任务的最通用和功能最强大的表引擎。这些引擎的共同特点是可以快速插入数据并进行后续的后台数据处理。 MergeTree系列引擎支持数据复制(使用[复制\*](mergetree_family/replication.md) 的引擎版本),分区和一些其他引擎不支持的其他功能。 该类型的引擎: -\* [MergeTree](mergetree.md) -\* [ReplacingMergeTree](replacingmergetree.md) -\* [SummingMergeTree](summingmergetree.md) -\* [AggregatingMergeTree](aggregatingmergetree.md) -\* [CollapsingMergeTree](collapsingmergetree.md) -\* [VersionedCollapsingMergeTree](versionedcollapsingmergetree.md) -\* [GraphiteMergeTree](graphitemergetree.md) +\* [MergeTree](mergetree_family/mergetree.md) +\* [更换麦树](mergetree_family/replacingmergetree.md) +\* [SummingMergeTree](mergetree_family/summingmergetree.md) +\* [AggregatingMergeTree](mergetree_family/aggregatingmergetree.md) +\* [折叠树](mergetree_family/collapsingmergetree.md) +\* [版本集合在新树](mergetree_family/versionedcollapsingmergetree.md) +\* [GraphiteMergeTree](mergetree_family/graphitemergetree.md) -## Log {#log} +## 日志 {#log} -具有最小功能的[轻量级引擎](log_family.md)。当您需要快速写入许多小表(最多约100万行)并在以后整体读取它们时,该类型的引擎是最有效的。 +具有最小功能的[轻量级引擎](log_family/index.md)。当您需要快速写入许多小表(最多约100万行)并在以后整体读取它们时,该类型的引擎是最有效的。 该类型的引擎: -- \[TinyLog\](tinylog/) -- \[StripeLog\](stripelog/) -- [Log](#log)(log/) +- [TinyLog](log_family/tinylog.md) +- [StripeLog](log_family/stripelog.md) +- [日志](log_family/log.md) -## Integration engines {#integration-engines} +## 集成引擎 {#integration-engines} 用于与其他的数据存储与处理系统集成的引擎。 该类型的引擎: -- [Kafka](kafka.md) -- [MySQL](mysql.md) -- [ODBC](odbc.md) -- [JDBC](jdbc.md) -- [HDFS](hdfs.md) +- [卡夫卡](integrations/kafka.md) +- [MySQL](integrations/mysql.md) +- [ODBC](integrations/odbc.md) +- [JDBC](integrations/jdbc.md) +- [HDFS](integrations/hdfs.md) ## 用于其他特定功能的引擎 {#yong-yu-qi-ta-te-ding-gong-neng-de-yin-qing} 该类型的引擎: -- [Distributed](distributed.md) -- [MaterializedView](materializedview.md) -- [Dictionary](dictionary.md) -- [Merge](merge.md) -- [File](file.md) -- [Null](null.md) -- [Set](set.md) -- [Join](join.md) -- [URL](url.md) -- [View](view.md) -- [Memory](memory.md) -- [Buffer](buffer.md) +- [分布](special/distributed.md) +- [MaterializedView](special/materializedview.md) +- [字典](special/dictionary.md) +- [合并](special/merge.md) +- [文件](special/file.md) +- [Null](special/null.md) +- [设置](special/set.md) +- [加入我们](special/join.md) +- [URL](special/url.md) +- [查看](special/view.md) +- [记忆](special/memory.md) +- [缓冲区](special/buffer.md) # 虚拟列 {#xu-ni-lie} diff --git a/docs/zh/engines/table_engines/integrations/hdfs.md b/docs/zh/engines/table_engines/integrations/hdfs.md new file mode 100644 index 00000000000..5cd60a855bc --- /dev/null +++ b/docs/zh/engines/table_engines/integrations/hdfs.md @@ -0,0 +1,123 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 36 +toc_title: HDFS +--- + +# HDFS {#table_engines-hdfs} + +该引擎提供了集成 [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) 生态系统通过允许管理数据 [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)通过ClickHouse. 这个引擎是相似的 +到 [文件](../special/file.md) 和 [URL](../special/url.md) 引擎,但提供Hadoop特定的功能。 + +## 用途 {#usage} + +``` sql +ENGINE = HDFS(URI, format) +``` + +该 `URI` 参数是HDFS中的整个文件URI。 +该 `format` 参数指定一种可用的文件格式。 执行 +`SELECT` 查询时,格式必须支持输入,并执行 +`INSERT` queries – for output. The available formats are listed in the +[格式](../../../interfaces/formats.md#formats) 科。 +路径部分 `URI` 可能包含水珠。 在这种情况下,表将是只读的。 + +**示例:** + +**1.** 设置 `hdfs_engine_table` 表: + +``` sql +CREATE TABLE hdfs_engine_table (name String, value UInt32) ENGINE=HDFS('hdfs://hdfs1:9000/other_storage', 'TSV') +``` + +**2.** 填充文件: + +``` sql +INSERT INTO hdfs_engine_table VALUES ('one', 1), ('two', 2), ('three', 3) +``` + +**3.** 查询数据: + +``` sql +SELECT * FROM hdfs_engine_table LIMIT 2 +``` + +``` text +┌─name─┬─value─┐ +│ one │ 1 │ +│ two │ 2 │ +└──────┴───────┘ +``` + +## 实施细节 {#implementation-details} + +- 读取和写入可以并行 +- 不支持: + - `ALTER` 和 `SELECT...SAMPLE` 操作。 + - 索引。 + - 复制。 + +**路径中的水珠** + +多个路径组件可以具有globs。 对于正在处理的文件应该存在并匹配到整个路径模式。 文件列表确定在 `SELECT` (不在 `CREATE` 时刻)。 + +- `*` — Substitutes any number of any characters except `/` 包括空字符串。 +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. + +建筑与 `{}` 类似于 [远程](../../../sql_reference/table_functions/remote.md) 表功能。 + +**示例** + +1. 假设我们在HDFS上有几个TSV格式的文件,其中包含以下Uri: + +- ‘hdfs://hdfs1:9000/some\_dir/some\_file\_1’ +- ‘hdfs://hdfs1:9000/some\_dir/some\_file\_2’ +- ‘hdfs://hdfs1:9000/some\_dir/some\_file\_3’ +- ‘hdfs://hdfs1:9000/another\_dir/some\_file\_1’ +- ‘hdfs://hdfs1:9000/another\_dir/some\_file\_2’ +- ‘hdfs://hdfs1:9000/another\_dir/some\_file\_3’ + +1. 有几种方法可以创建由所有六个文件组成的表: + + + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/some_file_{1..3}', 'TSV') +``` + +另一种方式: + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/some_file_?', 'TSV') +``` + +表由两个目录中的所有文件组成(所有文件都应满足query中描述的格式和模式): + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV') +``` + +!!! warning "警告" + 如果文件列表包含带有前导零的数字范围,请单独使用带有大括号的构造或使用 `?`. + +**示例** + +创建具有名为文件的表 `file000`, `file001`, … , `file999`: + +``` sql +CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') +``` + +## 虚拟列 {#virtual-columns} + +- `_path` — Path to the file. +- `_file` — Name of the file. + +**另请参阅** + +- [虚拟列](../index.md#table_engines-virtual_columns) + +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) diff --git a/docs/zh/engines/table_engines/integrations/index.md b/docs/zh/engines/table_engines/integrations/index.md new file mode 100644 index 00000000000..b488c83d1bd --- /dev/null +++ b/docs/zh/engines/table_engines/integrations/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u96C6\u6210" +toc_priority: 30 +--- + + diff --git a/docs/zh/operations/table_engines/jdbc.md b/docs/zh/engines/table_engines/integrations/jdbc.md similarity index 56% rename from docs/zh/operations/table_engines/jdbc.md rename to docs/zh/engines/table_engines/integrations/jdbc.md index 576c7182907..00363bb988a 100644 --- a/docs/zh/operations/table_engines/jdbc.md +++ b/docs/zh/engines/table_engines/integrations/jdbc.md @@ -1,16 +1,19 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 34 +toc_title: JDBC --- # JDBC {#table-engine-jdbc} -Allows ClickHouse to connect to external databases via [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity). +允许ClickHouse通过以下方式连接到外部数据库 [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity). -To implement the JDBC connection, ClickHouse uses the separate program [clickhouse-jdbc-bridge](https://github.com/alex-krash/clickhouse-jdbc-bridge) that should run as a daemon. +要实现JDBC连接,ClickHouse使用单独的程序 [ツ暗ェツ氾环催ツ団ツ法ツ人](https://github.com/alex-krash/clickhouse-jdbc-bridge) 这应该作为守护进程运行。 -This engine supports the [Nullable](../../data_types/nullable.md) data type. +该引擎支持 [可为空](../../../sql_reference/data_types/nullable.md) 数据类型。 -## Creating a Table {#creating-a-table} +## 创建表 {#creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name @@ -20,20 +23,20 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = JDBC(dbms_uri, external_database, external_table) ``` -**Engine Parameters** +**发动机参数** - `dbms_uri` — URI of an external DBMS. - Format: `jdbc:://:/?user=&password=`. - Example for MySQL: `jdbc:mysql://localhost:3306/?user=root&password=root`. + 格式: `jdbc:://:/?user=&password=`. + Mysql的示例: `jdbc:mysql://localhost:3306/?user=root&password=root`. - `external_database` — Database in an external DBMS. - `external_table` — Name of the table in `external_database`. -## Usage Example {#usage-example} +## 用法示例 {#usage-example} -Creating a table in MySQL server by connecting directly with it’s console client: +通过直接与它的控制台客户端连接在MySQL服务器中创建一个表: ``` text mysql> CREATE TABLE `test`.`test` ( @@ -48,15 +51,15 @@ mysql> insert into test (`int_id`, `float`) VALUES (1,2); Query OK, 1 row affected (0,00 sec) mysql> select * from test; -+--------+--------------+-------+----------------+ ++------+----------+-----+----------+ | int_id | int_nullable | float | float_nullable | -+--------+--------------+-------+----------------+ ++------+----------+-----+----------+ | 1 | NULL | 2 | NULL | -+--------+--------------+-------+----------------+ ++------+----------+-----+----------+ 1 row in set (0,00 sec) ``` -Creating a table in ClickHouse server and selecting data from it: +在ClickHouse服务器中创建表并从中选择数据: ``` sql CREATE TABLE jdbc_table @@ -80,8 +83,8 @@ FROM jdbc_table └────────┴──────────────┴───────┴────────────────┘ ``` -## See Also {#see-also} +## 另请参阅 {#see-also} -- [JDBC table function](../../query_language/table_functions/jdbc.md). +- [JDBC表函数](../../../sql_reference/table_functions/jdbc.md). -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/jdbc/) +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/jdbc/) diff --git a/docs/zh/operations/table_engines/kafka.md b/docs/zh/engines/table_engines/integrations/kafka.md similarity index 85% rename from docs/zh/operations/table_engines/kafka.md rename to docs/zh/engines/table_engines/integrations/kafka.md index e992a76519e..53bde650dfc 100644 --- a/docs/zh/operations/table_engines/kafka.md +++ b/docs/zh/engines/table_engines/integrations/kafka.md @@ -1,4 +1,5 @@ -# Kafka {#kafka} + +# 卡夫卡 {#kafka} 此引擎与 [Apache Kafka](http://kafka.apache.org/) 结合使用。 @@ -36,7 +37,7 @@ Kafka 特性: 可选参数: - `kafka_row_delimiter` - 每个消息体(记录)之间的分隔符。 -- `kafka_schema` – 如果解析格式需要一个 schema 时,此参数必填。例如,[Cap’n Proto](https://capnproto.org/) 需要 schema 文件路径以及根对象 `schema.capnp:Message` 的名字。 +- `kafka_schema` – 如果解析格式需要一个 schema 时,此参数必填。例如,[普罗托船长](https://capnproto.org/) 需要 schema 文件路径以及根对象 `schema.capnp:Message` 的名字。 - `kafka_num_consumers` – 单个表的消费者数量。默认值是:`1`,如果一个消费者的吞吐量不足,则指定更多的消费者。消费者的总数不应该超过 topic 中分区的数量,因为每个分区只能分配一个消费者。 示例: @@ -103,7 +104,7 @@ Kafka 特性: SELECT level, sum(total) FROM daily GROUP BY level; ``` -为了提高性能,接受的消息被分组为 [max\_insert\_block\_size](../settings/settings.md#settings-max_insert_block_size) 大小的块。如果未在 [stream\_flush\_interval\_ms](../settings/settings.md) 毫秒内形成块,则不关心块的完整性,都会将数据刷新到表中。 +为了提高性能,接受的消息被分组为 [max\_insert\_block\_size](../../../operations/settings/settings.md#settings-max_insert_block_size) 大小的块。如果未在 [stream\_flush\_interval\_ms](../../../operations/settings/settings.md) 毫秒内形成块,则不关心块的完整性,都会将数据刷新到表中。 停止接收主题数据或更改转换逻辑,请 detach 物化视图: @@ -130,6 +131,6 @@ Kafka 特性: ``` -有关详细配置选项列表,请参阅 [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md)。在 ClickHouse 配置中使用下划线 (`_`) ,并不是使用点 (`.`)。例如,`check.crcs=true` 将是 `true`。 +有关详细配置选项列表,请参阅 [librdkafka配置参考](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md)。在 ClickHouse 配置中使用下划线 (`_`) ,并不是使用点 (`.`)。例如,`check.crcs=true` 将是 `true`。 -[Original article](https://clickhouse.tech/docs/zh/operations/table_engines/kafka/) +[原始文章](https://clickhouse.tech/docs/zh/operations/table_engines/kafka/) diff --git a/docs/zh/operations/table_engines/mysql.md b/docs/zh/engines/table_engines/integrations/mysql.md similarity index 77% rename from docs/zh/operations/table_engines/mysql.md rename to docs/zh/engines/table_engines/integrations/mysql.md index e97f5f12106..bfd3e6445a5 100644 --- a/docs/zh/operations/table_engines/mysql.md +++ b/docs/zh/engines/table_engines/integrations/mysql.md @@ -1,3 +1,4 @@ + # MySQL {#mysql} MySQL 引擎可以对存储在远程 MySQL 服务器上的数据执行 `SELECT` 查询。 @@ -20,6 +21,6 @@ MySQL 引擎可以对存储在远程 MySQL 服务器上的数据执行 `SELECT` 其余条件以及 `LIMIT` 采样约束语句仅在对MySQL的查询完成后才在ClickHouse中执行。 -`MySQL` 引擎不支持 [Nullable](../../data_types/nullable.md) 数据类型,因此,当从MySQL表中读取数据时,`NULL` 将转换为指定列类型的默认值(通常为0或空字符串)。 +`MySQL` 引擎不支持 [可为空](../../../engines/table_engines/integrations/mysql.md) 数据类型,因此,当从MySQL表中读取数据时,`NULL` 将转换为指定列类型的默认值(通常为0或空字符串)。 -[Original article](https://clickhouse.tech/docs/zh/operations/table_engines/mysql/) +[原始文章](https://clickhouse.tech/docs/zh/operations/table_engines/mysql/) diff --git a/docs/zh/engines/table_engines/integrations/odbc.md b/docs/zh/engines/table_engines/integrations/odbc.md new file mode 100644 index 00000000000..1488ab0d856 --- /dev/null +++ b/docs/zh/engines/table_engines/integrations/odbc.md @@ -0,0 +1,132 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 35 +toc_title: ODBC +--- + +# ODBC {#table-engine-odbc} + +允许ClickHouse通过以下方式连接到外部数据库 [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). + +为了安全地实现ODBC连接,ClickHouse使用单独的程序 `clickhouse-odbc-bridge`. 如果直接从ODBC驱动程序加载 `clickhouse-server`,驱动程序问题可能会导致ClickHouse服务器崩溃。 ClickHouse自动启动 `clickhouse-odbc-bridge` 当它是必需的。 ODBC桥程序是从相同的软件包作为安装 `clickhouse-server`. + +该引擎支持 [可为空](../../../sql_reference/data_types/nullable.md) 数据类型。 + +## 创建表 {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1], + name2 [type2], + ... +) +ENGINE = ODBC(connection_settings, external_database, external_table) +``` + +请参阅的详细说明 [CREATE TABLE](../../../sql_reference/statements/create.md#create-table-query) 查询。 + +表结构可以与源表结构不同: + +- 列名应与源表中的列名相同,但您可以按任何顺序使用其中的一些列。 +- 列类型可能与源表中的列类型不同。 ClickHouse尝试 [投](../../../sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) ClickHouse数据类型的值。 + +**发动机参数** + +- `connection_settings` — Name of the section with connection settings in the `odbc.ini` 文件 +- `external_database` — Name of a database in an external DBMS. +- `external_table` — Name of a table in the `external_database`. + +## 用法示例 {#usage-example} + +**通过ODBC从本地MySQL安装中检索数据** + +此示例检查Ubuntu Linux18.04和MySQL服务器5.7。 + +确保安装了unixODBC和MySQL连接器。 + +默认情况下(如果从软件包安装),ClickHouse以用户身份启动 `clickhouse`. 因此,您需要在MySQL服务器中创建和配置此用户。 + +``` bash +$ sudo mysql +``` + +``` sql +mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse'; +mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION; +``` + +然后配置连接 `/etc/odbc.ini`. + +``` bash +$ cat /etc/odbc.ini +[mysqlconn] +DRIVER = /usr/local/lib/libmyodbc5w.so +SERVER = 127.0.0.1 +PORT = 3306 +DATABASE = test +USERNAME = clickhouse +PASSWORD = clickhouse +``` + +您可以使用 `isql` unixodbc安装中的实用程序。 + +``` bash +$ isql -v mysqlconn ++-------------------------+ +| Connected! | +| | +... +``` + +MySQL中的表: + +``` text +mysql> CREATE TABLE `test`.`test` ( + -> `int_id` INT NOT NULL AUTO_INCREMENT, + -> `int_nullable` INT NULL DEFAULT NULL, + -> `float` FLOAT NOT NULL, + -> `float_nullable` FLOAT NULL DEFAULT NULL, + -> PRIMARY KEY (`int_id`)); +Query OK, 0 rows affected (0,09 sec) + +mysql> insert into test (`int_id`, `float`) VALUES (1,2); +Query OK, 1 row affected (0,00 sec) + +mysql> select * from test; ++------+----------+-----+----------+ +| int_id | int_nullable | float | float_nullable | ++------+----------+-----+----------+ +| 1 | NULL | 2 | NULL | ++------+----------+-----+----------+ +1 row in set (0,00 sec) +``` + +ClickHouse中的表,从MySQL表中检索数据: + +``` sql +CREATE TABLE odbc_t +( + `int_id` Int32, + `float_nullable` Nullable(Float32) +) +ENGINE = ODBC('DSN=mysqlconn', 'test', 'test') +``` + +``` sql +SELECT * FROM odbc_t +``` + +``` text +┌─int_id─┬─float_nullable─┐ +│ 1 │ ᴺᵁᴸᴸ │ +└────────┴────────────────┘ +``` + +## 另请参阅 {#see-also} + +- [ODBC外部字典](../../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) +- [ODBC表函数](../../../sql_reference/table_functions/odbc.md) + +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/odbc/) diff --git a/docs/zh/engines/table_engines/log_family/index.md b/docs/zh/engines/table_engines/log_family/index.md new file mode 100644 index 00000000000..78557921c09 --- /dev/null +++ b/docs/zh/engines/table_engines/log_family/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u65E5\u5FD7\u7CFB\u5217" +toc_priority: 29 +--- + + diff --git a/docs/zh/operations/table_engines/log.md b/docs/zh/engines/table_engines/log_family/log.md similarity index 84% rename from docs/zh/operations/table_engines/log.md rename to docs/zh/engines/table_engines/log_family/log.md index 852575181cd..90f892615c9 100644 --- a/docs/zh/operations/table_engines/log.md +++ b/docs/zh/engines/table_engines/log_family/log.md @@ -1,5 +1,6 @@ -# Log {#log} + +# 日志 {#log} 日志与 TinyLog 的不同之处在于,«标记» 的小文件与列文件存在一起。这些标记写在每个数据块上,并且包含偏移量,这些偏移量指示从哪里开始读取文件以便跳过指定的行数。这使得可以在多个线程中读取表数据。对于并发数据访问,可以同时执行读取操作,而写入操作则阻塞读取和其它写入。Log 引擎不支持索引。同样,如果写入表失败,则该表将被破坏,并且从该表读取将返回错误。Log 引擎适用于临时数据,write-once 表以及测试或演示目的。 -[Original article](https://clickhouse.tech/docs/zh/operations/table_engines/log/) +[原始文章](https://clickhouse.tech/docs/zh/operations/table_engines/log/) diff --git a/docs/zh/operations/table_engines/log_family.md b/docs/zh/engines/table_engines/log_family/log_family.md similarity index 93% rename from docs/zh/operations/table_engines/log_family.md rename to docs/zh/engines/table_engines/log_family/log_family.md index 9ce3ab95b55..60cecab5faf 100644 --- a/docs/zh/operations/table_engines/log_family.md +++ b/docs/zh/engines/table_engines/log_family/log_family.md @@ -1,3 +1,4 @@ + # 日志引擎系列 {#table_engines-log-engine-family} 这些引擎是为了需要写入许多小数据量(少于一百万行)的表的场景而开发的。 @@ -5,7 +6,7 @@ 这系列的引擎有: - [StripeLog](stripelog.md) -- [Log](log.md) +- [日志](log.md) - [TinyLog](tinylog.md) ## 共同属性 {#table_engines-log-engine-family-common-properties} @@ -16,7 +17,7 @@ - 写入时将数据追加在文件末尾。 -- 不支持[突变](../../query_language/alter.md#alter-mutations)操作。 +- 不支持[突变](../../../engines/table_engines/log_family/log_family.md#alter-mutations)操作。 - 不支持索引。 diff --git a/docs/zh/operations/table_engines/stripelog.md b/docs/zh/engines/table_engines/log_family/stripelog.md similarity index 97% rename from docs/zh/operations/table_engines/stripelog.md rename to docs/zh/engines/table_engines/log_family/stripelog.md index 1a5edfd23bf..ab4deb67ebb 100644 --- a/docs/zh/operations/table_engines/stripelog.md +++ b/docs/zh/engines/table_engines/log_family/stripelog.md @@ -1,3 +1,4 @@ + # StripeLog {#table_engines-stripelog} 该引擎属于日志引擎系列。请在[日志引擎系列](log_family.md)文章中查看引擎的共同属性和差异。 @@ -13,7 +14,7 @@ ... ) ENGINE = StripeLog -查看[建表](../../query_language/create.md#create-table-query)请求的详细说明。 +查看[建表](../../../engines/table_engines/log_family/stripelog.md#create-table-query)请求的详细说明。 ## 写数据 {#table_engines-stripelog-writing-the-data} diff --git a/docs/zh/operations/table_engines/tinylog.md b/docs/zh/engines/table_engines/log_family/tinylog.md similarity index 91% rename from docs/zh/operations/table_engines/tinylog.md rename to docs/zh/engines/table_engines/log_family/tinylog.md index 7c9d524d5e6..9a1b27fd418 100644 --- a/docs/zh/operations/table_engines/tinylog.md +++ b/docs/zh/engines/table_engines/log_family/tinylog.md @@ -1,3 +1,4 @@ + # TinyLog {#tinylog} 最简单的表引擎,用于将数据存储在磁盘上。每列都存储在单独的压缩文件中。写入时,数据将附加到文件末尾。 @@ -10,4 +11,4 @@ 在 Yandex.Metrica 中,TinyLog 表用于小批量处理的中间数据。 -[Original article](https://clickhouse.tech/docs/zh/operations/table_engines/tinylog/) +[原始文章](https://clickhouse.tech/docs/zh/operations/table_engines/tinylog/) diff --git a/docs/zh/operations/table_engines/aggregatingmergetree.md b/docs/zh/engines/table_engines/mergetree_family/aggregatingmergetree.md similarity index 89% rename from docs/zh/operations/table_engines/aggregatingmergetree.md rename to docs/zh/engines/table_engines/mergetree_family/aggregatingmergetree.md index 2b18b2fbe48..2d898a5d168 100644 --- a/docs/zh/operations/table_engines/aggregatingmergetree.md +++ b/docs/zh/engines/table_engines/mergetree_family/aggregatingmergetree.md @@ -1,10 +1,11 @@ + # AggregatingMergeTree {#aggregatingmergetree} 该引擎继承自 [MergeTree](mergetree.md),并改变了数据片段的合并逻辑。 ClickHouse 会将相同主键的所有行(在一个数据片段内)替换为单个存储一系列聚合函数状态的行。 可以使用 `AggregatingMergeTree` 表来做增量数据统计聚合,包括物化视图的数据聚合。 -引擎需使用 [AggregateFunction](../../data_types/nested_data_structures/aggregatefunction.md) 类型来处理所有列。 +引擎需使用 [AggregateFunction](../../../engines/table_engines/mergetree_family/aggregatingmergetree.md) 类型来处理所有列。 如果要按一组规则来合并减少行数,则使用 `AggregatingMergeTree` 是合适的。 @@ -23,7 +24,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -语句参数的说明,请参阅 [语句描述](../../query_language/create.md)。 +语句参数的说明,请参阅 [语句描述](../../../engines/table_engines/mergetree_family/aggregatingmergetree.md)。 **子句** @@ -50,7 +51,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ## SELECT 和 INSERT {#select-he-insert} -插入数据,需使用带有聚合 -State- 函数的 [INSERT SELECT](../../query_language/insert_into.md) 语句。 +插入数据,需使用带有聚合 -State- 函数的 [INSERT SELECT](../../../engines/table_engines/mergetree_family/aggregatingmergetree.md) 语句。 从 `AggregatingMergeTree` 表中查询数据时,需使用 `GROUP BY` 子句并且要使用与插入时相同的聚合函数,但后缀要改为 `-Merge` 。 在 `SELECT` 查询的结果中,对于 ClickHouse 的所有输出格式 `AggregateFunction` 类型的值都实现了特定的二进制表示法。如果直接用 `SELECT` 导出这些数据,例如如用 `TabSeparated` 格式,那么这些导出数据也能直接用 `INSERT` 语句加载导入。 diff --git a/docs/zh/operations/table_engines/collapsingmergetree.md b/docs/zh/engines/table_engines/mergetree_family/collapsingmergetree.md similarity index 98% rename from docs/zh/operations/table_engines/collapsingmergetree.md rename to docs/zh/engines/table_engines/mergetree_family/collapsingmergetree.md index dd48fdd58ab..85b5ce076e8 100644 --- a/docs/zh/operations/table_engines/collapsingmergetree.md +++ b/docs/zh/engines/table_engines/mergetree_family/collapsingmergetree.md @@ -1,4 +1,5 @@ -# CollapsingMergeTree {#table_engine-collapsingmergetree} + +# 折叠树 {#table_engine-collapsingmergetree} 该引擎继承于 [MergeTree](mergetree.md),并在数据块合并算法中添加了折叠行的逻辑。 @@ -21,7 +22,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -请求参数的描述,参考[请求参数](../../query_language/create.md)。 +请求参数的描述,参考[请求参数](../../../engines/table_engines/mergetree_family/collapsingmergetree.md)。 **CollapsingMergeTree 参数** diff --git a/docs/zh/operations/table_engines/custom_partitioning_key.md b/docs/zh/engines/table_engines/mergetree_family/custom_partitioning_key.md similarity index 87% rename from docs/zh/operations/table_engines/custom_partitioning_key.md rename to docs/zh/engines/table_engines/mergetree_family/custom_partitioning_key.md index 1a8cea2d0e2..3844506c782 100644 --- a/docs/zh/operations/table_engines/custom_partitioning_key.md +++ b/docs/zh/engines/table_engines/mergetree_family/custom_partitioning_key.md @@ -1,6 +1,7 @@ + # 自定义分区键 {#zi-ding-yi-fen-qu-jian} -[MergeTree](mergetree.md) 系列的表(包括 [可复制表](replication.md) )可以使用分区。基于 MergeTree 表的 [物化视图](materializedview.md) 也支持分区。 +[MergeTree](mergetree.md) 系列的表(包括 [可复制表](replication.md) )可以使用分区。基于 MergeTree 表的 [物化视图](../special/materializedview.md) 也支持分区。 一个分区是指按指定规则逻辑组合一起的表的记录集。可以按任意标准进行分区,如按月,按日或按事件类型。为了减少需要操作的数据,每个分区都是分开存储的。访问数据时,ClickHouse 尽量使用这些分区的最小子集。 @@ -33,7 +34,7 @@ ORDER BY (CounterID, StartDate, intHash32(UserID)); !!! attention "注意" 那些有相同分区表达式值的数据片段才会合并。这意味着 **你不应该用太精细的分区方案**(超过一千个分区)。否则,会因为文件系统中的文件数量和需要找开的文件描述符过多,导致 `SELECT` 查询效率不佳。 -可以通过 [system.parts](../system_tables.md#system_tables-parts) 表查看表片段和分区信息。例如,假设我们有一个 `visits` 表,按月分区。对 `system.parts` 表执行 `SELECT`: +可以通过 [系统。零件](../../../engines/table_engines/mergetree_family/custom_partitioning_key.md#system_tables-parts) 表查看表片段和分区信息。例如,假设我们有一个 `visits` 表,按月分区。对 `system.parts` 表执行 `SELECT`: ``` sql SELECT @@ -70,7 +71,7 @@ WHERE table = 'visits' `active` 列为片段状态。`1` 激活状态;`0` 非激活状态。非激活片段是那些在合并到较大片段之后剩余的源数据片段。损坏的数据片段也表示为非活动状态。 -正如在示例中所看到的,同一分区中有几个独立的片段(例如,`201901_1_3_1`和`201901_1_9_2`)。这意味着这些片段尚未合并。ClickHouse 大约在插入后15分钟定期报告合并操作,合并插入的数据片段。此外,你也可以使用 [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) 语句直接执行合并。例: +正如在示例中所看到的,同一分区中有几个独立的片段(例如,`201901_1_3_1`和`201901_1_9_2`)。这意味着这些片段尚未合并。ClickHouse 大约在插入后15分钟定期报告合并操作,合并插入的数据片段。此外,你也可以使用 [OPTIMIZE](../../../engines/table_engines/mergetree_family/custom_partitioning_key.md#misc_operations-optimize) 语句直接执行合并。例: ``` sql OPTIMIZE TABLE visits PARTITION 201902; @@ -107,10 +108,10 @@ drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached 文件夹 ‘201901\_1\_1\_0’,‘201901\_1\_7\_1’ 等是片段的目录。每个片段都与一个对应的分区相关,并且只包含这个月的数据(本例中的表按月分区)。 -`detached` 目录存放着使用 [DETACH](#alter_detach-partition) 语句从表中分离的片段。损坏的片段也会移到该目录,而不是删除。服务器不使用`detached`目录中的片段。可以随时添加,删除或修改此目录中的数据 – 在运行 [ATTACH](../../query_language/alter.md#alter_attach-partition) 语句前,服务器不会感知到。 +`detached` 目录存放着使用 [DETACH](#alter_detach-partition) 语句从表中分离的片段。损坏的片段也会移到该目录,而不是删除。服务器不使用`detached`目录中的片段。可以随时添加,删除或修改此目录中的数据 – 在运行 [ATTACH](../../../engines/table_engines/mergetree_family/custom_partitioning_key.md#alter_attach-partition) 语句前,服务器不会感知到。 注意,在操作服务器时,你不能手动更改文件系统上的片段集或其数据,因为服务器不会感知到这些修改。对于非复制表,可以在服务器停止时执行这些操作,但不建议这样做。对于复制表,在任何情况下都不要更改片段文件。 -ClickHouse 支持对分区执行这些操作:删除分区,从一个表复制到另一个表,或创建备份。了解分区的所有操作,请参阅 [分区和片段的操作](../../query_language/alter.md#alter_manipulations-with-partitions) 一节。 +ClickHouse 支持对分区执行这些操作:删除分区,从一个表复制到另一个表,或创建备份。了解分区的所有操作,请参阅 [分区和片段的操作](../../../engines/table_engines/mergetree_family/custom_partitioning_key.md#alter_manipulations-with-partitions) 一节。 [来源文章](https://clickhouse.tech/docs/en/operations/table_engines/custom_partitioning_key/) diff --git a/docs/zh/operations/table_engines/graphitemergetree.md b/docs/zh/engines/table_engines/mergetree_family/graphitemergetree.md similarity index 50% rename from docs/zh/operations/table_engines/graphitemergetree.md rename to docs/zh/engines/table_engines/mergetree_family/graphitemergetree.md index 6916441acd0..b578414a203 100644 --- a/docs/zh/operations/table_engines/graphitemergetree.md +++ b/docs/zh/engines/table_engines/mergetree_family/graphitemergetree.md @@ -1,16 +1,19 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 38 +toc_title: GraphiteMergeTree --- # GraphiteMergeTree {#graphitemergetree} -This engine is designed for thinning and aggregating/averaging (rollup) [Graphite](http://graphite.readthedocs.io/en/latest/index.html) data. It may be helpful to developers who want to use ClickHouse as a data store for Graphite. +此引擎专为细化和聚合/平均(rollup) [石墨](http://graphite.readthedocs.io/en/latest/index.html) 戴达 对于想要使用ClickHouse作为Graphite的数据存储的开发人员来说,这可能会有所帮助。 -You can use any ClickHouse table engine to store the Graphite data if you don’t need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite. +您可以使用任何ClickHouse表引擎来存储石墨数据,如果你不需要汇总,但如果你需要一个汇总使用 `GraphiteMergeTree`. 该引擎减少了存储量,并提高了Graphite查询的效率。 -The engine inherits properties from [MergeTree](mergetree.md). +引擎继承从属性 [MergeTree](mergetree.md). -## Creating a Table {#creating-table} +## 创建表 {#creating-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -27,36 +30,36 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -See a detailed description of the [CREATE TABLE](../../query_language/create.md#create-table-query) query. +请参阅的详细说明 [CREATE TABLE](../../../sql_reference/statements/create.md#create-table-query) 查询。 -A table for the Graphite data should have the following columns for the following data: +Graphite数据的表应具有以下数据的列: -- Metric name (Graphite sensor). Data type: `String`. +- 公制名称(石墨传感器)。 数据类型: `String`. -- Time of measuring the metric. Data type: `DateTime`. +- 测量度量的时间。 数据类型: `DateTime`. -- Value of the metric. Data type: any numeric. +- 度量值。 数据类型:任何数字。 -- Version of the metric. Data type: any numeric. +- 指标的版本。 数据类型:任何数字。 - ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts. + 如果版本相同,ClickHouse会保存版本最高或最后写入的行。 其他行在数据部分合并期间被删除。 -The names of these columns should be set in the rollup configuration. +应在汇总配置中设置这些列的名称。 -**GraphiteMergeTree parameters** +**GraphiteMergeTree参数** - `config_section` — Name of the section in the configuration file, where are the rules of rollup set. -**Query clauses** +**查询子句** -When creating a `GraphiteMergeTree` table, the same [clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table. +当创建一个 `GraphiteMergeTree` 表,相同 [条款](mergetree.md#table_engine-mergetree-creating-a-table) 是必需的,因为当创建 `MergeTree` 桌子
-Deprecated Method for Creating a Table +不推荐使用的创建表的方法 -!!! attention "Attention" - Do not use this method in new projects and, if possible, switch the old projects to the method described above. +!!! attention "注意" + 不要在新项目中使用此方法,如果可能的话,请将旧项目切换到上述方法。 ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -70,31 +73,31 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ) ENGINE [=] GraphiteMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, config_section) ``` -All of the parameters excepting `config_section` have the same meaning as in `MergeTree`. +所有参数除外 `config_section` 具有相同的含义 `MergeTree`. - `config_section` — Name of the section in the configuration file, where are the rules of rollup set.
-## Rollup configuration {#rollup-configuration} +## 汇总配置 {#rollup-configuration} -The settings for rollup are defined by the [graphite\_rollup](../server_settings/settings.md#server_settings-graphite_rollup) parameter in the server configuration. The name of the parameter could be any. You can create several configurations and use them for different tables. +汇总的设置由 [graphite\_rollup](../../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-graphite_rollup) 服务器配置中的参数。 参数的名称可以是any。 您可以创建多个配置并将它们用于不同的表。 -Rollup configuration structure: +汇总配置结构: required-columns patterns -### Required Columns {#required-columns} +### 必填列 {#required-columns} - `path_column_name` — The name of the column storing the metric name (Graphite sensor). Default value: `Path`. - `time_column_name` — The name of the column storing the time of measuring the metric. Default value: `Time`. -- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. Default value: `Value`. +- `value_column_name` — The name of the column storing the value of the metric at the time set in `time_column_name`. 默认值: `Value`. - `version_column_name` — The name of the column storing the version of the metric. Default value: `Timestamp`. -### Patterns {#patterns} +### 模式 {#patterns} -Structure of the `patterns` section: +的结构 `patterns` 科: ``` text pattern @@ -117,23 +120,23 @@ default ... ``` -!!! warning "Attention" - Patterns must be strictly ordered: +!!! warning "注意" + 模式必须严格排序: 1. Patterns without `function` or `retention`. 1. Patterns with both `function` and `retention`. 1. Pattern `default`. -When processing a row, ClickHouse checks the rules in the `pattern` sections. Each of `pattern` (including `default`) sections can contain `function` parameter for aggregation, `retention` parameters or both. If the metric name matches the `regexp`, the rules from the `pattern` section (or sections) are applied; otherwise, the rules from the `default` section are used. +在处理行时,ClickHouse会检查以下内容中的规则 `pattern` 部分。 每个 `pattern` (包括 `default`)部分可以包含 `function` 聚合参数, `retention` 参数或两者兼而有之。 如果指标名称匹配 `regexp`,从规则 `pattern` 部分(sections节)的应用;否则,从规则 `default` 部分被使用。 -Fields for `pattern` and `default` sections: +字段为 `pattern` 和 `default` 科: - `regexp`– A pattern for the metric name. - `age` – The minimum age of the data in seconds. - `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day). - `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. -### Configuration Example {#configuration-example} +### 配置示例 {#configuration-example} ``` xml @@ -168,4 +171,4 @@ Fields for `pattern` and `default` sections: ``` -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) diff --git a/docs/zh/engines/table_engines/mergetree_family/index.md b/docs/zh/engines/table_engines/mergetree_family/index.md new file mode 100644 index 00000000000..1cbf6104dc3 --- /dev/null +++ b/docs/zh/engines/table_engines/mergetree_family/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u6885\u6811\u5BB6\u65CF" +toc_priority: 28 +--- + + diff --git a/docs/zh/operations/table_engines/mergetree.md b/docs/zh/engines/table_engines/mergetree_family/mergetree.md similarity index 88% rename from docs/zh/operations/table_engines/mergetree.md rename to docs/zh/engines/table_engines/mergetree_family/mergetree.md index 61d36fea9fa..0778ab2487d 100644 --- a/docs/zh/operations/table_engines/mergetree.md +++ b/docs/zh/engines/table_engines/mergetree_family/mergetree.md @@ -1,3 +1,4 @@ + # MergeTree {#table_engines-mergetree} Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及该系列(`*MergeTree`)中的其他引擎。 @@ -23,7 +24,7 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 需要的话,你可以给表设置一个采样方法。 !!! 注意 "注意" - [Merge](merge.md) 引擎并不属于 `*MergeTree` 系列。 + [合并](../special/merge.md) 引擎并不属于 `*MergeTree` 系列。 ## 建表 {#table_engine-mergetree-creating-a-table} @@ -41,7 +42,7 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 [SAMPLE BY expr] [SETTINGS name=value, ...] -请求参数的描述,参考 [请求描述](../../query_language/create.md) 。 +请求参数的描述,参考 [请求描述](../../../engines/table_engines/mergetree_family/mergetree.md) 。 @@ -51,7 +52,7 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 - `PARTITION BY` — [分区键](custom_partitioning_key.md) 。 - 要按月分区,可以使用表达式 `toYYYYMM(date_column)` ,这里的 `date_column` 是一个 [Date](../../data_types/date.md) 类型的列。这里该分区名格式会是 `"YYYYMM"` 这样。 + 要按月分区,可以使用表达式 `toYYYYMM(date_column)` ,这里的 `date_column` 是一个 [Date](../../../engines/table_engines/mergetree_family/mergetree.md) 类型的列。这里该分区名格式会是 `"YYYYMM"` 这样。 - `ORDER BY` — 表的排序键。 @@ -72,7 +73,7 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 - `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。默认值,8192 。该列表中所有可用的参数可以从这里查看 [MergeTreeSettings.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Storages/MergeTree/MergeTreeSettings.h) 。 - `index_granularity_bytes` — 索引粒度,以字节为单位,默认值: 10Mb。如果仅按数据行数限制索引粒度, 请设置为0(不建议)。 - `enable_mixed_granularity_parts` — 启用或禁用通过 `index_granularity_bytes` 控制索引粒度的大小。在19.11版本之前, 只有 `index_granularity` 配置能够用于限制索引粒度的大小。当从大表(数十或数百兆)中查询数据时候,`index_granularity_bytes` 配置能够提升ClickHouse的性能。如果你的表内数据量很大,可以开启这项配置用以提升`SELECT` 查询的性能。 - - `use_minimalistic_part_header_in_zookeeper` — 数据片段头在 ZooKeeper 中的存储方式。如果设置了 `use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考『服务配置参数』这章中的 [设置描述](../server_settings/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 + - `use_minimalistic_part_header_in_zookeeper` — 数据片段头在 ZooKeeper 中的存储方式。如果设置了 `use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考『服务配置参数』这章中的 [设置描述](../../../operations/server_configuration_parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据片段时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 - `merge_with_ttl_timeout` — TTL合并频率的最小间隔时间。默认值: 86400 (1 天)。 @@ -85,7 +86,7 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 示例中,我们设为按月分区。 -同时我们设置了一个按用户ID哈希的抽样表达式。这让你可以有该表中每个 `CounterID` 和 `EventDate` 下面的数据的伪随机分布。如果你在查询时指定了 [SAMPLE](../../query_language/select.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 +同时我们设置了一个按用户ID哈希的抽样表达式。这让你可以有该表中每个 `CounterID` 和 `EventDate` 下面的数据的伪随机分布。如果你在查询时指定了 [SAMPLE](../../../engines/table_engines/mergetree_family/mergetree.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 `index_granularity` 可省略,默认值为 8192 。 @@ -105,9 +106,9 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 **MergeTree() 参数** -- `date-column` — 类型为 [Date](../../data_types/date.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 +- `date-column` — 类型为 [日期](../../../engines/table_engines/mergetree_family/mergetree.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 - `sampling_expression` — 采样表达式。 -- `(primary, key)` — 主键。类型 — [Tuple()](../../data_types/tuple.md) +- `(primary, key)` — 主键。类型 — [元组()](../../../engines/table_engines/mergetree_family/mergetree.md) - `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 **示例** @@ -168,7 +169,7 @@ ClickHouse 不要求主键惟一。所以,你可以插入多条具有相同主 ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 -- [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里,数据合并时,会有额外的处理逻辑。 +- [折叠树](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里,数据合并时,会有额外的处理逻辑。 在这种情况下,指定一个跟主键不同的 *排序键* 也是有意义的。 @@ -191,7 +192,7 @@ ClickHouse 不要求主键惟一。所以,你可以插入多条具有相同主 这种情况下,主键中仅预留少量列保证高效范围扫描, 剩下的维度列放到排序键元组里。这样是合理的。 -[排序键的修改](../../query_language/alter.md) 是轻量级的操作,因为一个新列同时被加入到表里和排序键后时,已存在的数据片段并不需要修改。由于旧的排序键是新排序键的前缀,并且刚刚添加的列中没有数据,因此在表修改时的数据对于新旧的排序键来说都是有序的。 +[排序键的修改](../../../engines/table_engines/mergetree_family/mergetree.md) 是轻量级的操作,因为一个新列同时被加入到表里和排序键后时,已存在的数据片段并不需要修改。由于旧的排序键是新排序键的前缀,并且刚刚添加的列中没有数据,因此在表修改时的数据对于新旧的排序键来说都是有序的。 ### 索引和分区在查询中的应用 {#suo-yin-he-fen-qu-zai-cha-xun-zhong-de-ying-yong} @@ -221,7 +222,7 @@ ClickHouse 会依据主键索引剪掉不符合的数据,依据按月分区的 SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' ``` -要检查 ClickHouse 执行一个查询时能否使用索引,可设置 [force\_index\_by\_date](../settings/settings.md#settings-force_index_by_date) 和 [force\_primary\_key](../settings/settings.md) 。 +要检查 ClickHouse 执行一个查询时能否使用索引,可设置 [force\_index\_by\_date](../../../operations/settings/settings.md#settings-force_index_by_date) 和 [force\_primary\_key](../../../operations/settings/settings.md) 。 按月分区的分区键是只能读取包含适当范围日期的数据块。这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有带主键前缀条件的查询将会导致读取超过这个日期范围。 @@ -299,14 +300,14 @@ INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY TTL可以设置值的生命周期,它既可以为整张表设置,也可以为每个列字段单独设置。如果`TTL`同时作用于表和字段,ClickHouse会使用先到期的那个。 -被设置TTL的表,必须拥有[Date](../../data_types/date.md) 或 [DateTime](../../data_types/datetime.md) 类型的字段。要定义数据的生命周期,需要在这个日期字段上使用操作符,例如: +被设置TTL的表,必须拥有[日期](../../../engines/table_engines/mergetree_family/mergetree.md) 或 [日期时间](../../../engines/table_engines/mergetree_family/mergetree.md) 类型的字段。要定义数据的生命周期,需要在这个日期字段上使用操作符,例如: ``` sql TTL time_column TTL time_column + interval ``` -要定义`interval`, 需要使用 [time interval](../../query_language/operators.md#operators-datetime) 操作符。 +要定义`interval`, 需要使用 [时间间隔](../../../engines/table_engines/mergetree_family/mergetree.md#operators-datetime) 操作符。 ``` sql TTL date_time + INTERVAL 1 MONTH @@ -385,10 +386,10 @@ ALTER TABLE example_table 当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 你可以设置 [merge\_with\_ttl\_timeout](#mergetree_setting-merge_with_ttl_timeout)。如果该值被设置的太低, 它将导致执行许多的计划外合并,这可能会消耗大量资源。 -如果在合并的时候执行`SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在`SELECT`之前使用 [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) 查询。 +如果在合并的时候执行`SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在`SELECT`之前使用 [OPTIMIZE](../../../engines/table_engines/mergetree_family/mergetree.md#misc_operations-optimize) 查询。 -## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} +## 使用多个块设备进行数据存储 {#table_engine-mergetree-multiple-volumes} -### Configuration {#table_engine-mergetree-multiple-volumes-configure} +### 配置 {#table_engine-mergetree-multiple-volumes-configure} [来源文章](https://clickhouse.tech/docs/en/operations/table_engines/mergetree/) diff --git a/docs/zh/operations/table_engines/replacingmergetree.md b/docs/zh/engines/table_engines/mergetree_family/replacingmergetree.md similarity index 92% rename from docs/zh/operations/table_engines/replacingmergetree.md rename to docs/zh/engines/table_engines/mergetree_family/replacingmergetree.md index 66c3246f272..720560bf1a4 100644 --- a/docs/zh/operations/table_engines/replacingmergetree.md +++ b/docs/zh/engines/table_engines/mergetree_family/replacingmergetree.md @@ -1,4 +1,5 @@ -# ReplacingMergeTree {#replacingmergetree} + +# 更换麦树 {#replacingmergetree} 该引擎和[MergeTree](mergetree.md)的不同之处在于它会删除具有相同主键的重复项。 @@ -21,9 +22,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [SETTINGS name=value, ...] ``` -请求参数的描述,参考[请求参数](../../query_language/create.md)。 +请求参数的描述,参考[请求参数](../../../engines/table_engines/mergetree_family/replacingmergetree.md)。 -**ReplacingMergeTree Parameters** +**替换树参数** - `ver` — 版本列。类型为 `UInt*`, `Date` 或 `DateTime`。可选参数。 diff --git a/docs/zh/operations/table_engines/replication.md b/docs/zh/engines/table_engines/mergetree_family/replication.md similarity index 94% rename from docs/zh/operations/table_engines/replication.md rename to docs/zh/engines/table_engines/mergetree_family/replication.md index c2f4d3eb849..e518eb805c4 100644 --- a/docs/zh/operations/table_engines/replication.md +++ b/docs/zh/engines/table_engines/mergetree_family/replication.md @@ -1,3 +1,4 @@ + # 数据副本 {#table_engines-replication} 只有 MergeTree 系列里的表可支持副本: @@ -7,14 +8,14 @@ - ReplicatedReplacingMergeTree - ReplicatedAggregatingMergeTree - ReplicatedCollapsingMergeTree -- ReplicatedVersionedCollapsingMergeTree +- ReplicatedVersionedCollapsingMergetree - ReplicatedGraphiteMergeTree 副本是表级别的,不是整个服务器级的。所以,服务器里可以同时有复制表和非复制表。 副本不依赖分片。每个分片有它自己的独立副本。 -对于 `INSERT` 和 `ALTER` 语句操作数据的会在压缩的情况下被复制(更多信息,看 [ALTER](../../query_language/alter.md#query_language_queries_alter) )。 +对于 `INSERT` 和 `ALTER` 语句操作数据的会在压缩的情况下被复制(更多信息,看 [ALTER](../../../engines/table_engines/mergetree_family/replication.md#query_language_queries_alter) )。 而 `CREATE`,`DROP`,`ATTACH`,`DETACH` 和 `RENAME` 语句只会在单个服务器上执行,不会被复制。 @@ -47,7 +48,7 @@ 如果配置文件中没有设置 ZooKeeper ,则无法创建复制表,并且任何现有的复制表都将变为只读。 -`SELECT` 查询并不需要借助 ZooKeeper ,复本并不影响 `SELECT` 的性能,查询复制表与非复制表速度是一样的。查询分布式表时,ClickHouse的处理方式可通过设置 [max\_replica\_delay\_for\_distributed\_queries](../settings/settings.md#settings-max_replica_delay_for_distributed_queries) 和 [fallback\_to\_stale\_replicas\_for\_distributed\_queries](../settings/settings.md) 修改。 +`SELECT` 查询并不需要借助 ZooKeeper ,复本并不影响 `SELECT` 的性能,查询复制表与非复制表速度是一样的。查询分布式表时,ClickHouse的处理方式可通过设置 [max\_replica\_delay\_for\_distributed\_queries](../../../operations/settings/settings.md#settings-max_replica_delay_for_distributed_queries) 和 [fallback\_to\_stale\_replicas\_for\_distributed\_queries](../../../operations/settings/settings.md) 修改。 对于每个 `INSERT` 语句,会通过几个事务将十来个记录添加到 ZooKeeper。(确切地说,这是针对每个插入的数据块; 每个 INSERT 语句的每 `max_insert_block_size = 1048576` 行和最后剩余的都各算作一个块。)相比非复制表,写 zk 会导致 `INSERT` 的延迟略长一些。但只要你按照建议每秒不超过一个 `INSERT` 地批量插入数据,不会有任何问题。一个 ZooKeeper 集群能给整个 ClickHouse 集群支撑协调每秒几百个 `INSERT`。数据插入的吞吐量(每秒的行数)可以跟不用复制的数据一样高。 @@ -59,7 +60,7 @@ 单个数据块写入是原子的。 INSERT 的数据按每块最多 `max_insert_block_size = 1048576` 行进行分块,换句话说,如果 `INSERT` 插入的行少于 1048576,则该 INSERT 是原子的。 -数据块会去重。对于被多次写的相同数据块(大小相同且具有相同顺序的相同行的数据块),该块仅会写入一次。这样设计的原因是万一在网络故障时客户端应用程序不知道数据是否成功写入DB,此时可以简单地重复 `INSERT` 。把相同的数据发送给多个副本 INSERT 并不会有问题。因为这些 `INSERT` 是完全相同的(会被去重)。去重参数参看服务器设置 [merge\_tree](../server_settings/settings.md) 。(注意:Replicated\*MergeTree 才会去重,不需要 zookeeper 的不带 MergeTree 不会去重) +数据块会去重。对于被多次写的相同数据块(大小相同且具有相同顺序的相同行的数据块),该块仅会写入一次。这样设计的原因是万一在网络故障时客户端应用程序不知道数据是否成功写入DB,此时可以简单地重复 `INSERT` 。把相同的数据发送给多个副本 INSERT 并不会有问题。因为这些 `INSERT` 是完全相同的(会被去重)。去重参数参看服务器设置 [merge\_tree](../../../operations/server_configuration_parameters/settings.md) 。(注意:Replicated\*MergeTree 才会去重,不需要 zookeeper 的不带 MergeTree 不会去重) 在复制期间,只有要插入的源数据通过网络传输。进一步的数据转换(合并)会在所有副本上以相同的方式进行处理执行。这样可以最大限度地减少网络使用,这意味着即使副本在不同的数据中心,数据同步也能工作良好。(能在不同数据中心中的同步数据是副本机制的主要目标。) @@ -119,7 +120,7 @@ CREATE TABLE table_name `{layer}-{shard}` 是分片标识部分。在此示例中,由于 Yandex.Metrica 集群使用了两级分片,所以它是由两部分组成的。但对于大多数情况来说,你只需保留 {shard} 占位符即可,它会替换展开为分片标识。 `table_name` 是该表在 ZooKeeper 中的名称。使其与 ClickHouse 中的表名相同比较好。 这里它被明确定义,跟 ClickHouse 表名不一样,它并不会被 RENAME 语句修改。 -*HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name` +*HINT*:你可以在前面添加一个数据库名称 `table_name` 也是 例如。 `db_name.table_name` 副本名称用于标识同一个表分片的不同副本。你可以使用服务器名称,如上例所示。同个分片中不同副本的副本名称要唯一。 diff --git a/docs/zh/operations/table_engines/summingmergetree.md b/docs/zh/engines/table_engines/mergetree_family/summingmergetree.md similarity index 86% rename from docs/zh/operations/table_engines/summingmergetree.md rename to docs/zh/engines/table_engines/mergetree_family/summingmergetree.md index 326ccb7118e..73576b00346 100644 --- a/docs/zh/operations/table_engines/summingmergetree.md +++ b/docs/zh/engines/table_engines/mergetree_family/summingmergetree.md @@ -1,3 +1,4 @@ + # SummingMergeTree {#summingmergetree} 该引擎继承自 [MergeTree](mergetree.md)。区别在于,当合并 `SummingMergeTree` 表的数据片段时,ClickHouse 会把所有具有相同主键的行合并为一行,该行包含了被合并的行中具有数值数据类型的列的汇总值。如果主键的组合方式使得单个键值对应于大量的行,则可以显著的减少存储空间并加快数据查询的速度。 @@ -17,7 +18,7 @@ [SAMPLE BY expr] [SETTINGS name=value, ...] -请求参数的描述,参考 [请求描述](../../query_language/create.md)。 +请求参数的描述,参考 [请求描述](../../../engines/table_engines/mergetree_family/summingmergetree.md)。 **SummingMergeTree 的参数** @@ -83,7 +84,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key 当数据被插入到表中时,他们将被原样保存。ClickHouse 定期合并插入的数据片段,并在这个时候对所有具有相同主键的行中的列进行汇总,将这些行替换为包含汇总数据的一行记录。 -ClickHouse 会按片段合并数据,以至于不同的数据片段中会包含具有相同主键的行,即单个汇总片段将会是不完整的。因此,聚合函数 [sum()](../../query_language/agg_functions/reference.md#agg_function-sum) 和 `GROUP BY` 子句应该在(`SELECT`)查询语句中被使用,如上文中的例子所述。 +ClickHouse 会按片段合并数据,以至于不同的数据片段中会包含具有相同主键的行,即单个汇总片段将会是不完整的。因此,聚合函数 [sum()](../../../engines/table_engines/mergetree_family/summingmergetree.md#agg_function-sum) 和 `GROUP BY` 子句应该在(`SELECT`)查询语句中被使用,如上文中的例子所述。 ### 汇总的通用规则 {#hui-zong-de-tong-yong-gui-ze} @@ -97,7 +98,7 @@ ClickHouse 会按片段合并数据,以至于不同的数据片段中会包含 ### AggregateFunction 列中的汇总 {#aggregatefunction-lie-zhong-de-hui-zong} -对于 [AggregateFunction 类型](../../data_types/nested_data_structures/aggregatefunction.md)的列,ClickHouse 根据对应函数表现为 [AggregatingMergeTree](aggregatingmergetree.md) 引擎的聚合。 +对于 [AggregateFunction 类型](../../../engines/table_engines/mergetree_family/summingmergetree.md)的列,ClickHouse 根据对应函数表现为 [AggregatingMergeTree](aggregatingmergetree.md) 引擎的聚合。 ### 嵌套结构 {#qian-tao-jie-gou} @@ -117,7 +118,7 @@ ClickHouse 会按片段合并数据,以至于不同的数据片段中会包含 [(1, 100)] + [(1, 150), (2, 150)] -> [(1, 250), (2, 150)] [(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)] -请求数据时,使用 [sumMap(key, value)](../../query_language/agg_functions/reference.md) 函数来对 `Map` 进行聚合。 +请求数据时,使用 [sumMap(key,value)](../../../engines/table_engines/mergetree_family/summingmergetree.md) 函数来对 `Map` 进行聚合。 对于嵌套数据结构,你无需在列的元组中指定列以进行汇总。 diff --git a/docs/zh/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md b/docs/zh/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md new file mode 100644 index 00000000000..37f11bc21ad --- /dev/null +++ b/docs/zh/engines/table_engines/mergetree_family/versionedcollapsingmergetree.md @@ -0,0 +1,238 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 37 +toc_title: "\u7248\u672C\u96C6\u5408\u5728\u65B0\u6811" +--- + +# 版本集合在新树 {#versionedcollapsingmergetree} + +这个引擎: + +- 允许快速写入不断变化的对象状态。 +- 删除后台中的旧对象状态。 这显着降低了存储体积。 + +请参阅部分 [崩溃](#table_engines_versionedcollapsingmergetree) 有关详细信息。 + +引擎继承自 [MergeTree](mergetree.md#table_engines-mergetree) 并将折叠行的逻辑添加到合并数据部分的算法中。 `VersionedCollapsingMergeTree` 用于相同的目的 [折叠树](collapsingmergetree.md) 但使用不同的折叠算法,允许以多个线程的任何顺序插入数据。 特别是, `Version` 列有助于正确折叠行,即使它们以错误的顺序插入。 相比之下, `CollapsingMergeTree` 只允许严格连续插入。 + +## 创建表 {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = VersionedCollapsingMergeTree(sign, version) +[PARTITION BY expr] +[ORDER BY expr] +[SAMPLE BY expr] +[SETTINGS name=value, ...] +``` + +有关查询参数的说明,请参阅 [查询说明](../../../sql_reference/statements/create.md). + +**发动机参数** + +``` sql +VersionedCollapsingMergeTree(sign, version) +``` + +- `sign` — Name of the column with the type of row: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 + + 列数据类型应为 `Int8`. + +- `version` — Name of the column with the version of the object state. + + 列数据类型应为 `UInt*`. + +**查询子句** + +当创建一个 `VersionedCollapsingMergeTree` 表,相同 [条款](mergetree.md) 需要创建一个时 `MergeTree` 桌子 + +
+ +不推荐使用的创建表的方法 + +!!! attention "注意" + 不要在新项目中使用此方法。 如果可能,请将旧项目切换到上述方法。 + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE [=] VersionedCollapsingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, sign, version) +``` + +所有的参数,除了 `sign` 和 `version` 具有相同的含义 `MergeTree`. + +- `sign` — Name of the column with the type of row: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 + + Column Data Type — `Int8`. + +- `version` — Name of the column with the version of the object state. + + 列数据类型应为 `UInt*`. + +
+ +## 崩溃 {#table_engines-versionedcollapsingmergetree} + +### 数据 {#data} + +考虑一种情况,您需要为某个对象保存不断变化的数据。 对于一个对象有一行,并在发生更改时更新该行是合理的。 但是,对于数据库管理系统来说,更新操作非常昂贵且速度很慢,因为它需要重写存储中的数据。 如果需要快速写入数据,则不能接受更新,但可以按如下顺序将更改写入对象。 + +使用 `Sign` 列写入行时。 如果 `Sign = 1` 这意味着该行是一个对象的状态(让我们把它称为 “state” 行)。 如果 `Sign = -1` 它指示具有相同属性的对象的状态的取消(让我们称之为 “cancel” 行)。 还可以使用 `Version` 列,它应该用单独的数字标识对象的每个状态。 + +例如,我们要计算用户在某个网站上访问了多少页面以及他们在那里的时间。 在某个时间点,我们用用户活动的状态写下面的行: + +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ +│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 | +└─────────────────────┴───────────┴──────────┴──────┴─────────┘ +``` + +在稍后的某个时候,我们注册用户活动的变化,并用以下两行写入它。 + +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ +│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 | +│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 | +└─────────────────────┴───────────┴──────────┴──────┴─────────┘ +``` + +第一行取消对象(用户)的先前状态。 它应该复制已取消状态的所有字段,除了 `Sign`. + +第二行包含当前状态。 + +因为我们只需要用户活动的最后一个状态,行 + +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ +│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 | +│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 | +└─────────────────────┴───────────┴──────────┴──────┴─────────┘ +``` + +可以删除,折叠对象的无效(旧)状态。 `VersionedCollapsingMergeTree` 在合并数据部分时执行此操作。 + +要了解为什么每次更改都需要两行,请参阅 [算法](#table_engines-versionedcollapsingmergetree-algorithm). + +**使用注意事项** + +1. 写入数据的程序应该记住对象的状态以取消它。 该 “cancel” 字符串应该是 “state” 与相反的字符串 `Sign`. 这增加了存储的初始大小,但允许快速写入数据。 +2. 列中长时间增长的数组由于写入负载而降低了引擎的效率。 数据越简单,效率就越高。 +3. `SELECT` 结果很大程度上取决于对象变化历史的一致性。 准备插入数据时要准确。 您可以通过不一致的数据获得不可预测的结果,例如会话深度等非负指标的负值。 + +### 算法 {#table_engines-versionedcollapsingmergetree-algorithm} + +当ClickHouse合并数据部分时,它会删除具有相同主键和版本且不同主键和版本的每对行 `Sign`. 行的顺序并不重要。 + +当ClickHouse插入数据时,它会按主键对行进行排序。 如果 `Version` 列不在主键中,ClickHouse将其隐式添加到主键作为最后一个字段并使用它进行排序。 + +## 选择数据 {#selecting-data} + +ClickHouse不保证具有相同主键的所有行都将位于相同的结果数据部分中,甚至位于相同的物理服务器上。 对于写入数据和随后合并数据部分都是如此。 此外,ClickHouse流程 `SELECT` 具有多个线程的查询,并且无法预测结果中的行顺序。 这意味着聚合是必需的,如果有必要得到完全 “collapsed” 从数据 `VersionedCollapsingMergeTree` 桌子 + +要完成折叠,请使用 `GROUP BY` 考虑符号的子句和聚合函数。 例如,要计算数量,请使用 `sum(Sign)` 而不是 `count()`. 要计算的东西的总和,使用 `sum(Sign * x)` 而不是 `sum(x)`,并添加 `HAVING sum(Sign) > 0`. + +聚合 `count`, `sum` 和 `avg` 可以这样计算。 聚合 `uniq` 如果对象至少具有一个非折叠状态,则可以计算。 聚合 `min` 和 `max` 无法计算是因为 `VersionedCollapsingMergeTree` 不保存折叠状态值的历史记录。 + +如果您需要提取数据 “collapsing” 但是,如果没有聚合(例如,要检查是否存在其最新值与某些条件匹配的行),则可以使用 `FINAL` 修饰符 `FROM` 条款 这种方法效率低下,不应与大型表一起使用。 + +## 使用示例 {#example-of-use} + +示例数据: + +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ +│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 | +│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 | +│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 | +└─────────────────────┴───────────┴──────────┴──────┴─────────┘ +``` + +创建表: + +``` sql +CREATE TABLE UAct +( + UserID UInt64, + PageViews UInt8, + Duration UInt8, + Sign Int8, + Version UInt8 +) +ENGINE = VersionedCollapsingMergeTree(Sign, Version) +ORDER BY UserID +``` + +插入数据: + +``` sql +INSERT INTO UAct VALUES (4324182021466249494, 5, 146, 1, 1) +``` + +``` sql +INSERT INTO UAct VALUES (4324182021466249494, 5, 146, -1, 1),(4324182021466249494, 6, 185, 1, 2) +``` + +我们用两个 `INSERT` 查询以创建两个不同的数据部分。 如果我们使用单个查询插入数据,ClickHouse将创建一个数据部分,并且永远不会执行任何合并。 + +获取数据: + +``` sql +SELECT * FROM UAct +``` + +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ +│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 │ +└─────────────────────┴───────────┴──────────┴──────┴─────────┘ +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ +│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 │ +│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 │ +└─────────────────────┴───────────┴──────────┴──────┴─────────┘ +``` + +我们在这里看到了什么,折叠的部分在哪里? +我们使用两个创建了两个数据部分 `INSERT` 查询。 该 `SELECT` 查询是在两个线程中执行的,结果是行的随机顺序。 +由于数据部分尚未合并,因此未发生折叠。 ClickHouse在我们无法预测的未知时间点合并数据部分。 + +这就是为什么我们需要聚合: + +``` sql +SELECT + UserID, + sum(PageViews * Sign) AS PageViews, + sum(Duration * Sign) AS Duration, + Version +FROM UAct +GROUP BY UserID, Version +HAVING sum(Sign) > 0 +``` + +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Version─┐ +│ 4324182021466249494 │ 6 │ 185 │ 2 │ +└─────────────────────┴───────────┴──────────┴─────────┘ +``` + +如果我们不需要聚合,并希望强制折叠,我们可以使用 `FINAL` 修饰符 `FROM` 条款 + +``` sql +SELECT * FROM UAct FINAL +``` + +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ +│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 │ +└─────────────────────┴───────────┴──────────┴──────┴─────────┘ +``` + +这是一个非常低效的方式来选择数据。 不要把它用于大桌子。 + +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/versionedcollapsingmergetree/) diff --git a/docs/zh/operations/table_engines/buffer.md b/docs/zh/engines/table_engines/special/buffer.md similarity index 97% rename from docs/zh/operations/table_engines/buffer.md rename to docs/zh/engines/table_engines/special/buffer.md index 70fc8c6cbb2..6b53883be7b 100644 --- a/docs/zh/operations/table_engines/buffer.md +++ b/docs/zh/engines/table_engines/special/buffer.md @@ -1,4 +1,5 @@ -# Buffer {#buffer} + +# 缓冲区 {#buffer} 缓冲数据写入 RAM 中,周期性地将数据刷新到另一个表。在读取操作时,同时从缓冲区和另一个表读取数据。 @@ -50,4 +51,4 @@ PREWHERE,FINAL 和 SAMPLE 对缓冲表不起作用。这些条件将传递到 请注意,一次插入一行数据是没有意义的,即使对于 Buffer 表也是如此。这将只产生每秒几千行的速度,而插入更大的数据块每秒可以产生超过一百万行(参见 «性能» 部分)。 -[Original article](https://clickhouse.tech/docs/zh/operations/table_engines/buffer/) +[原始文章](https://clickhouse.tech/docs/zh/operations/table_engines/buffer/) diff --git a/docs/zh/operations/table_engines/dictionary.md b/docs/zh/engines/table_engines/special/dictionary.md similarity index 95% rename from docs/zh/operations/table_engines/dictionary.md rename to docs/zh/engines/table_engines/special/dictionary.md index 3bd6b9d78b6..27da9b40e52 100644 --- a/docs/zh/operations/table_engines/dictionary.md +++ b/docs/zh/engines/table_engines/special/dictionary.md @@ -1,4 +1,5 @@ -# Dictionary {#dictionary} + +# 字典 {#dictionary} `Dictionary` 引擎将字典数据展示为一个ClickHouse的表。 @@ -57,7 +58,7 @@ WHERE name = 'products' │ products │ Flat │ UInt64 │ ['title'] │ ['String'] │ 23065376 │ 175032 │ ODBC: .products │ └──────────┴──────┴────────┴─────────────────┴─────────────────┴─────────────────┴───────────────┴─────────────────┘ -你可以使用 [dictGet\*](../../query_language/functions/ext_dict_functions.md) 函数来获取这种格式的字典数据。 +你可以使用 [dictGet\*](../../../engines/table_engines/special/dictionary.md) 函数来获取这种格式的字典数据。 当你需要获取原始数据,或者是想要使用 `JOIN` 操作的时候,这种视图并没有什么帮助。对于这些情况,你可以使用 `Dictionary` 引擎,它可以将字典数据展示在表中。 diff --git a/docs/zh/operations/table_engines/distributed.md b/docs/zh/engines/table_engines/special/distributed.md similarity index 93% rename from docs/zh/operations/table_engines/distributed.md rename to docs/zh/engines/table_engines/special/distributed.md index b81e52348e6..f31dae7c1ef 100644 --- a/docs/zh/operations/table_engines/distributed.md +++ b/docs/zh/engines/table_engines/special/distributed.md @@ -1,4 +1,5 @@ -# Distributed {#distributed} + +# 分布 {#distributed} **分布式引擎本身不存储数据**, 但可以在多个服务器上进行分布式查询。 读是自动并行的。读取时,远程服务器表的索引(如果有的话)会被使用。 @@ -51,7 +52,7 @@ logs – 服务器配置文件中的集群名称。
``` -这里定义了一个名为‘logs’的集群,它由两个分片组成,每个分片包含两个副本。 +这里定义了一个名为'logs'的集群,它由两个分片组成,每个分片包含两个副本。 分片是指包含数据不同部分的服务器(要读取所有数据,必须访问所有分片)。 副本是存储复制数据的服务器(要读取所有数据,访问任一副本上的数据即可)。 @@ -98,7 +99,7 @@ logs – 服务器配置文件中的集群名称。 若此参数设置为«false»(默认值),写操作会将数据写入所有副本。实质上,这意味着要分布式表本身来复制数据。这种方式不如使用复制表的好,因为不会检查副本的一致性,并且随着时间的推移,副本数据可能会有些不一样。 -选择将一行数据发送到哪个分片的方法是,首先计算分片表达式,然后将这个计算结果除以所有分片的权重总和得到余数。该行会发送到那个包含该余数的从’prev\_weight’到’prev\_weights + weight’的半闭半开区间对应的分片上,其中 ‘prev\_weights’ 是该分片前面的所有分片的权重和,‘weight’ 是该分片的权重。例如,如果有两个分片,第一个分片权重为9,而第二个分片权重为10,则余数在 \[0,9) 中的行发给第一个分片,余数在 \[9,19) 中的行发给第二个分片。 +选择将一行数据发送到哪个分片的方法是,首先计算分片表达式,然后将这个计算结果除以所有分片的权重总和得到余数。该行会发送到那个包含该余数的从'prev\_weight'到'prev\_weights + weight'的半闭半开区间对应的分片上,其中 ‘prev\_weights’ 是该分片前面的所有分片的权重和,‘weight’ 是该分片的权重。例如,如果有两个分片,第一个分片权重为9,而第二个分片权重为10,则余数在 \[0,9) 中的行发给第一个分片,余数在 \[9,19) 中的行发给第二个分片。 分片表达式可以是由常量和表列组成的任何返回整数表达式。例如,您可以使用表达式 ‘rand()’ 来随机分配数据,或者使用 ‘UserID’ 来按用户 ID 的余数分布(相同用户的数据将分配到单个分片上,这可降低带有用户信息的 IN 和 JOIN 的语句运行的复杂度)。如果该列数据分布不够均匀,可以将其包装在散列函数中:intHash64(UserID)。 @@ -117,4 +118,4 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何 启用 max\_parallel\_replicas 选项后,会在分表的所有副本上并行查询处理。更多信息,请参阅«设置,max\_parallel\_replicas»部分。 -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) diff --git a/docs/zh/operations/table_engines/external_data.md b/docs/zh/engines/table_engines/special/external_data.md similarity index 94% rename from docs/zh/operations/table_engines/external_data.md rename to docs/zh/engines/table_engines/special/external_data.md index d993a796e52..399ffd8c0f3 100644 --- a/docs/zh/operations/table_engines/external_data.md +++ b/docs/zh/engines/table_engines/special/external_data.md @@ -1,4 +1,5 @@ -# External Data for Query Processing {#external-data-for-query-processing} + +# 用于查询处理的外部数据 {#external-data-for-query-processing} ClickHouse 允许向服务器发送处理查询所需的数据以及 SELECT 查询。这些数据放在一个临时表中(请参阅 «临时表» 一节),可以在查询中使用(例如,在 IN 操作符中)。 @@ -58,4 +59,4 @@ curl -F 'passwd=@passwd.tsv;' 'http://localhost:8123/?query=SELECT+shell,+count( 对于分布式查询,将临时表发送到所有远程服务器。 -[Original article](https://clickhouse.tech/docs/zh/operations/table_engines/external_data/) +[原始文章](https://clickhouse.tech/docs/zh/operations/table_engines/external_data/) diff --git a/docs/zh/operations/table_engines/file.md b/docs/zh/engines/table_engines/special/file.md similarity index 74% rename from docs/zh/operations/table_engines/file.md rename to docs/zh/engines/table_engines/special/file.md index 10293130088..71c96f8ab43 100644 --- a/docs/zh/operations/table_engines/file.md +++ b/docs/zh/engines/table_engines/special/file.md @@ -1,4 +1,5 @@ -# File(InputFormat) {#table_engines-file} + +# 文件(输入格式) {#table_engines-file} 数据源是以 Clickhouse 支持的一种输入格式(TabSeparated,Native等)存储数据的文件。 @@ -12,13 +13,13 @@ File(Format) -选用的 `Format` 需要支持 `INSERT` 或 `SELECT` 。有关支持格式的完整列表,请参阅 [格式](../../interfaces/formats.md#formats)。 +选用的 `Format` 需要支持 `INSERT` 或 `SELECT` 。有关支持格式的完整列表,请参阅 [格式](../../../interfaces/formats.md#formats)。 -ClickHouse 不支持给 `File` 指定文件系统路径。它使用服务器配置中 [path](../server_settings/settings.md) 设定的文件夹。 +ClickHouse 不支持给 `File` 指定文件系统路径。它使用服务器配置中 [路径](../../../operations/server_configuration_parameters/settings.md) 设定的文件夹。 使用 `File(Format)` 创建表时,它会在该文件夹中创建空的子目录。当数据写入该表时,它会写到该子目录中的 `data.Format` 文件中。 -你也可以在服务器文件系统中手动创建这些子文件夹和文件,然后通过 [ATTACH](../../query_language/misc.md) 将其创建为具有对应名称的表,这样你就可以从该文件中查询数据了。 +你也可以在服务器文件系统中手动创建这些子文件夹和文件,然后通过 [ATTACH](../../../engines/table_engines/special/file.md) 将其创建为具有对应名称的表,这样你就可以从该文件中查询数据了。 !!! 注意 "注意" 注意这个功能,因为 ClickHouse 不会跟踪这些文件在外部的更改。在 ClickHouse 中和 ClickHouse 外部同时写入会造成结果是不确定的。 @@ -54,7 +55,7 @@ SELECT * FROM file_engine_table ## 在 Clickhouse-local 中的使用 {#zai-clickhouse-local-zhong-de-shi-yong} -使用 [clickhouse-local](../utils/clickhouse-local.md) 时,File 引擎除了 `Format` 之外,还可以接受文件路径参数。可以使用数字或人类可读的名称来指定标准输入/输出流,例如 `0` 或 `stdin`,`1` 或 `stdout`。 +使用 [ツ环板-ョツ嘉ッツ偲](../../../engines/table_engines/special/file.md) 时,File 引擎除了 `Format` 之外,还可以接受文件路径参数。可以使用数字或人类可读的名称来指定标准输入/输出流,例如 `0` 或 `stdin`,`1` 或 `stdout`。 **例如:** ``` bash diff --git a/docs/zh/engines/table_engines/special/generate.md b/docs/zh/engines/table_engines/special/generate.md new file mode 100644 index 00000000000..6a31e270066 --- /dev/null +++ b/docs/zh/engines/table_engines/special/generate.md @@ -0,0 +1,61 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 46 +toc_title: GenerateRandom +--- + +# Generaterandom {#table_engines-generate} + +GenerateRandom表引擎为给定的表架构生成随机数据。 + +使用示例: + +- 在测试中使用填充可重复的大表。 +- 为模糊测试生成随机输入。 + +## 在ClickHouse服务器中的使用 {#usage-in-clickhouse-server} + +``` sql +ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) +``` + +该 `max_array_length` 和 `max_string_length` 参数指定所有的最大长度 +数组列和字符串相应地在生成的数据中。 + +生成表引擎仅支持 `SELECT` 查询。 + +它支持所有 [数据类型](../../../sql_reference/data_types/index.md) 可以存储在一个表中,除了 `LowCardinality` 和 `AggregateFunction`. + +**示例:** + +**1.** 设置 `generate_engine_table` 表: + +``` sql +CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE = GenerateRandom(1, 5, 3) +``` + +**2.** 查询数据: + +``` sql +SELECT * FROM generate_engine_table LIMIT 3 +``` + +``` text +┌─name─┬──────value─┐ +│ c4xJ │ 1412771199 │ +│ r │ 1791099446 │ +│ 7#$ │ 124312908 │ +└──────┴────────────┘ +``` + +## 实施细节 {#details-of-implementation} + +- 不支持: + - `ALTER` + - `SELECT ... SAMPLE` + - `INSERT` + - 指数 + - 复制 + +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/generate/) diff --git a/docs/zh/engines/table_engines/special/index.md b/docs/zh/engines/table_engines/special/index.md new file mode 100644 index 00000000000..7be40b75fb5 --- /dev/null +++ b/docs/zh/engines/table_engines/special/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u7279\u522B" +toc_priority: 31 +--- + + diff --git a/docs/zh/operations/table_engines/join.md b/docs/zh/engines/table_engines/special/join.md similarity index 75% rename from docs/zh/operations/table_engines/join.md rename to docs/zh/engines/table_engines/special/join.md index 024530cc0b7..33cc0685a52 100644 --- a/docs/zh/operations/table_engines/join.md +++ b/docs/zh/engines/table_engines/special/join.md @@ -1,10 +1,11 @@ -# Join {#join} + +# 加入我们 {#join} 加载好的 JOIN 表数据会常驻内存中。 Join(ANY|ALL, LEFT|INNER, k1[, k2, ...]) -引擎参数:`ANY|ALL` – 连接修饰;`LEFT|INNER` – 连接类型。更多信息可参考 [JOIN子句](../../query_language/select.md#select-join)。 +引擎参数:`ANY|ALL` – 连接修饰;`LEFT|INNER` – 连接类型。更多信息可参考 [JOIN子句](../../../engines/table_engines/special/join.md#select-join)。 这些参数设置不用带引号,但必须与要 JOIN 表匹配。 k1,k2,……是 USING 子句中要用于连接的关键列。 此引擎表不能用于 GLOBAL JOIN 。 @@ -13,9 +14,9 @@ 跟 Set 引擎类似,Join 引擎把数据存储在磁盘中。 -### Limitations and Settings {#join-limitations-and-settings} +### 限制和设置 {#join-limitations-and-settings} -When creating a table, the following settings are applied: +创建表时,将应用以下设置: - join\_use\_nulls - max\_rows\_in\_join @@ -23,6 +24,6 @@ When creating a table, the following settings are applied: - join\_overflow\_mode - join\_any\_take\_last\_row -The `Join`-engine tables can’t be used in `GLOBAL JOIN` operations. +该 `Join`-发动机表不能用于 `GLOBAL JOIN` 操作。 [来源文章](https://clickhouse.tech/docs/en/operations/table_engines/join/) diff --git a/docs/zh/engines/table_engines/special/materializedview.md b/docs/zh/engines/table_engines/special/materializedview.md new file mode 100644 index 00000000000..5dc4e261fbd --- /dev/null +++ b/docs/zh/engines/table_engines/special/materializedview.md @@ -0,0 +1,6 @@ + +# 物化视图 {#wu-hua-shi-tu} + +物化视图的使用(更多信息请参阅 [CREATE TABLE](../../../engines/table_engines/special/materializedview.md) )。它需要使用一个不同的引擎来存储数据,这个引擎要在创建物化视图时指定。当从表中读取时,它就会使用该引擎。 + +[来源文章](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) diff --git a/docs/zh/operations/table_engines/memory.md b/docs/zh/engines/table_engines/special/memory.md similarity index 89% rename from docs/zh/operations/table_engines/memory.md rename to docs/zh/engines/table_engines/special/memory.md index a48308f7b17..3fd29813d00 100644 --- a/docs/zh/operations/table_engines/memory.md +++ b/docs/zh/engines/table_engines/special/memory.md @@ -1,7 +1,8 @@ -# Memory {#memory} + +# 记忆 {#memory} Memory 引擎以未压缩的形式将数据存储在 RAM 中。数据完全以读取时获得的形式存储。换句话说,从这张表中读取是很轻松的。并发数据访问是同步的。锁范围小:读写操作不会相互阻塞。不支持索引。阅读是并行化的。在简单查询上达到最大生产率(超过10 GB /秒),因为没有磁盘读取,不需要解压缩或反序列化数据。(值得注意的是,在许多情况下,与 MergeTree 引擎的性能几乎一样高)。重新启动服务器时,表中的数据消失,表将变为空。通常,使用此表引擎是不合理的。但是,它可用于测试,以及在相对较少的行(最多约100,000,000)上需要最高性能的查询。 Memory 引擎是由系统用于临时表进行外部数据的查询(请参阅 «外部数据用于请求处理» 部分),以及用于实现 `GLOBAL IN`(请参见 «IN 运算符» 部分)。 -[Original article](https://clickhouse.tech/docs/zh/operations/table_engines/memory/) +[原始文章](https://clickhouse.tech/docs/zh/operations/table_engines/memory/) diff --git a/docs/zh/operations/table_engines/merge.md b/docs/zh/engines/table_engines/special/merge.md similarity index 99% rename from docs/zh/operations/table_engines/merge.md rename to docs/zh/engines/table_engines/special/merge.md index bbcbf8772b4..e4ee3fe92a5 100644 --- a/docs/zh/operations/table_engines/merge.md +++ b/docs/zh/engines/table_engines/special/merge.md @@ -1,4 +1,5 @@ -# Merge {#merge} + +# 合并 {#merge} `Merge` 引擎 (不要跟 `MergeTree` 引擎混淆) 本身不存储数据,但可用于同时从任意多个其他的表中读取数据。 读是自动并行的,不支持写入。读取时,那些被真正读取到数据的表的索引(如果有的话)会被使用。 diff --git a/docs/zh/operations/table_engines/null.md b/docs/zh/engines/table_engines/special/null.md similarity index 69% rename from docs/zh/operations/table_engines/null.md rename to docs/zh/engines/table_engines/special/null.md index 94f731f756d..3fd891db393 100644 --- a/docs/zh/operations/table_engines/null.md +++ b/docs/zh/engines/table_engines/special/null.md @@ -1,7 +1,8 @@ + # Null {#null} 当写入 Null 类型的表时,将忽略数据。从 Null 类型的表中读取时,返回空。 但是,可以在 Null 类型的表上创建物化视图。写入表的数据将转发到视图中。 -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/null/) +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/null/) diff --git a/docs/zh/operations/table_engines/set.md b/docs/zh/engines/table_engines/special/set.md similarity index 87% rename from docs/zh/operations/table_engines/set.md rename to docs/zh/engines/table_engines/special/set.md index e9be9ab7e56..b6ef859b85a 100644 --- a/docs/zh/operations/table_engines/set.md +++ b/docs/zh/engines/table_engines/special/set.md @@ -1,4 +1,5 @@ -# Set {#set} + +# 设置 {#set} 始终存在于 RAM 中的数据集。它适用于IN运算符的右侧(请参见 «IN运算符» 部分)。 @@ -8,4 +9,4 @@ 对于强制服务器重启,磁盘上的数据块可能会丢失或损坏。在数据块损坏的情况下,可能需要手动删除包含损坏数据的文件。 -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/set/) +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/set/) diff --git a/docs/zh/operations/table_engines/url.md b/docs/zh/engines/table_engines/special/url.md similarity index 94% rename from docs/zh/operations/table_engines/url.md rename to docs/zh/engines/table_engines/special/url.md index c2ce37adf21..0e02693aea0 100644 --- a/docs/zh/operations/table_engines/url.md +++ b/docs/zh/engines/table_engines/special/url.md @@ -1,13 +1,14 @@ -# URL(URL, Format) {#table_engines-url} + +# URL(URL,格式) {#table_engines-url} 用于管理远程 HTTP/HTTPS 服务器上的数据。该引擎类似 -[File](file.md) 引擎。 +[文件](file.md) 引擎。 ## 在 ClickHouse 服务器中使用引擎 {#zai-clickhouse-fu-wu-qi-zhong-shi-yong-yin-qing} `Format` 必须是 ClickHouse 可以用于 `SELECT` 查询的一种格式,若有必要,还要可用于 `INSERT` 。有关支持格式的完整列表,请查看 -[Formats](../../interfaces/formats.md#formats)。 +[格式](../../../interfaces/formats.md#formats)。 `URL` 必须符合统一资源定位符的结构。指定的URL必须指向一个 HTTP 或 HTTPS 服务器。对于服务端响应, diff --git a/docs/zh/operations/table_engines/view.md b/docs/zh/engines/table_engines/special/view.md similarity index 68% rename from docs/zh/operations/table_engines/view.md rename to docs/zh/engines/table_engines/special/view.md index 5d15fc74218..a17dab21ce2 100644 --- a/docs/zh/operations/table_engines/view.md +++ b/docs/zh/engines/table_engines/special/view.md @@ -1,5 +1,6 @@ -# View {#view} + +# 查看 {#view} 用于构建视图(有关更多信息,请参阅 `CREATE VIEW 查询`)。 它不存储数据,仅存储指定的 `SELECT` 查询。 从表中读取时,它会运行此查询(并从查询中删除所有不必要的列)。 -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/view/) +[原始文章](https://clickhouse.tech/docs/en/operations/table_engines/view/) diff --git a/docs/zh/faq/general.md b/docs/zh/faq/general.md index 17f4fe9b11b..b81d521fa80 100644 --- a/docs/zh/faq/general.md +++ b/docs/zh/faq/general.md @@ -1,3 +1,4 @@ + # 常见问题 {#chang-jian-wen-ti} ## 为什么不使用MapReduce之类的产品呢? {#wei-shi-yao-bu-shi-yong-mapreducezhi-lei-de-chan-pin-ni} @@ -8,11 +9,11 @@ 大多数MapReduce系统允许您在集群上执行任意代码。但是,声明性查询语言更适合OLAP,以便快速运行实验。例如,Hadoop包含Hive和Pig,Cloudera Impala或Shark(过时)for Spark,以及Spark SQL、Presto和Apache Drill。与专业系统相比,运行此类任务时的性能非常不理想,所以将这些系统用作Web接口的后端服务是不现实的,因为延迟相对较高。 -## What to do if I have a problem with encodings when using Oracle through ODBC? {#oracle-odbc-encodings} +## 如果我在通过ODBC使用Oracle时遇到编码问题,该怎么办? {#oracle-odbc-encodings} -If you use Oracle through ODBC driver as a source of external dictionaries, you need to set up correctly value for the `NLS_LANG` variable in the `/etc/default/clickhouse`. For more details see the [Oracle NLS\_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). +如果您通过ODBC驱动程序使用Oracle作为外部字典的源,则需要为 `NLS_LANG` 在变量 `/etc/default/clickhouse`. 欲了解更多详情,请参阅 [Oracle NLS\_常见问题](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html). -**Example** +**示例** NLS_LANG=CHINESE_CHINA.ZHS16GBK diff --git a/docs/zh/faq/index.md b/docs/zh/faq/index.md new file mode 100644 index 00000000000..7c0b25dbec0 --- /dev/null +++ b/docs/zh/faq/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: F.A.Q. +toc_priority: 76 +--- + + diff --git a/docs/zh/getting_started/example_datasets/amplab_benchmark.md b/docs/zh/getting_started/example_datasets/amplab_benchmark.md index fc78daa6a46..4c3b26819b1 100644 --- a/docs/zh/getting_started/example_datasets/amplab_benchmark.md +++ b/docs/zh/getting_started/example_datasets/amplab_benchmark.md @@ -1,3 +1,4 @@ + # AMPLab 大数据基准测试 {#amplab-da-shu-ju-ji-zhun-ce-shi} 参考 https://amplab.cs.berkeley.edu/benchmark/ @@ -119,4 +120,4 @@ ORDER BY totalRevenue DESC LIMIT 1 ``` -[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/amplab_benchmark/) +[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets/amplab_benchmark/) diff --git a/docs/zh/getting_started/example_datasets/criteo.md b/docs/zh/getting_started/example_datasets/criteo.md index 6083566113a..0ae2650b390 100644 --- a/docs/zh/getting_started/example_datasets/criteo.md +++ b/docs/zh/getting_started/example_datasets/criteo.md @@ -1,3 +1,4 @@ + # Criteo TB级别点击日志 {#criteo-tbji-bie-dian-ji-ri-zhi} 可以从http://labs.criteo.com/downloads/download-terabyte-click-logs/上下载数据 @@ -71,4 +72,4 @@ INSERT INTO criteo SELECT date, clicked, int1, int2, int3, int4, int5, int6, int DROP TABLE criteo_log; ``` -[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/criteo/) +[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets/criteo/) diff --git a/docs/zh/getting_started/example_datasets/index.md b/docs/zh/getting_started/example_datasets/index.md index a07ff8b0010..4faf3b0ecfc 100644 --- a/docs/zh/getting_started/example_datasets/index.md +++ b/docs/zh/getting_started/example_datasets/index.md @@ -1,18 +1,22 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u793A\u4F8B\u6570\u636E\u96C6" +toc_priority: 12 +toc_title: "\u5BFC\u8A00" --- -# Example Datasets +# 示例数据集 {#example-datasets} -This section describes how to obtain example datasets and import them into ClickHouse. -For some datasets example queries are also available. +本节介绍如何获取示例数据集并将其导入ClickHouse。 +对于某些数据集示例查询也可用。 -* [Anonymized Yandex.Metrica Dataset](metrica.md) -* [Star Schema Benchmark](star_schema.md) -* [WikiStat](wikistat.md) -* [Terabyte of Click Logs from Criteo](criteo.md) -* [AMPLab Big Data Benchmark](amplab_benchmark.md) -* [New York Taxi Data](nyc_taxi.md) -* [OnTime](ontime.md) +- [匿名Yandex的。梅里卡数据集](metrica.md) +- [星型架构基准测试](star_schema.md) +- [WikiStat](wikistat.md) +- [来自Criteo的万兆字节点击日志](criteo.md) +- [AMPLab大数据基准](amplab_benchmark.md) +- [纽约出租车数据](nyc_taxi.md) +- [时间](ontime.md) -[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) +[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets) diff --git a/docs/zh/getting_started/example_datasets/metrica.md b/docs/zh/getting_started/example_datasets/metrica.md index d2a91a02a89..f7e0c86d324 100644 --- a/docs/zh/getting_started/example_datasets/metrica.md +++ b/docs/zh/getting_started/example_datasets/metrica.md @@ -1,16 +1,19 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 21 +toc_title: "Yandex\u6885\u7279\u91CC\u5361\u6570\u636E" --- -# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data} +# 匿名Yandex的。梅特里卡数据 {#anonymized-yandex-metrica-data} -Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section. +Dataset由两个表组成,其中包含有关命中的匿名数据 (`hits_v1`)和访问 (`visits_v1`)的Yandex的。梅特里卡 你可以阅读更多关于Yandex的。梅特里卡 [ClickHouse历史](../../introduction/history.md) 科。 -The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz. +数据集由两个表组成,其中任何一个都可以作为压缩表下载 `tsv.xz` 文件或作为准备的分区。 除此之外,该扩展版本 `hits` 包含1亿行的表可作为TSV在https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits\_100m\_obfuscated\_v1.tsv.xz 并作为准备的分区在https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits\_100m\_obfuscated\_v1.tar.xz. -## Obtaining Tables from Prepared Partitions {#obtaining-tables-from-prepared-partitions} +## 从准备好的分区获取表 {#obtaining-tables-from-prepared-partitions} -Download and import hits table: +下载和导入点击表: ``` bash curl -O https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar @@ -20,7 +23,7 @@ sudo service clickhouse-server restart clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" ``` -Download and import visits: +下载和导入访问: ``` bash curl -O https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar @@ -30,9 +33,9 @@ sudo service clickhouse-server restart clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" ``` -## Obtaining Tables from Compressed TSV File {#obtaining-tables-from-compressed-tsv-file} +## 从压缩TSV文件获取表 {#obtaining-tables-from-compressed-tsv-file} -Download and import hits from compressed TSV file: +从压缩的TSV文件下载并导入命中: ``` bash curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv @@ -46,7 +49,7 @@ clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL" clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1" ``` -Download and import visits from compressed tsv-file: +从压缩tsv文件下载和导入访问: ``` bash curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv @@ -60,8 +63,8 @@ clickhouse-client --query "OPTIMIZE TABLE datasets.visits_v1 FINAL" clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" ``` -## Example Queries {#example-queries} +## 查询示例 {#example-queries} -[ClickHouse tutorial](../../getting_started/tutorial.md) is based on Yandex.Metrica dataset and the recommended way to get started with this dataset is to just go through tutorial. +[点击教程](../../getting_started/tutorial.md) 是基于Yandex的。Metrica数据集和开始使用此数据集的推荐方式是通过教程。 -Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hists` and `test.visits` there). +查询这些表的其他示例可以在 [有状态测试](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) ClickHouse的(它们被命名为 `test.hists` 和 `test.visits` 那里)。 diff --git a/docs/zh/getting_started/example_datasets/nyc_taxi.md b/docs/zh/getting_started/example_datasets/nyc_taxi.md index 50dcbed0988..e486dbef9a7 100644 --- a/docs/zh/getting_started/example_datasets/nyc_taxi.md +++ b/docs/zh/getting_started/example_datasets/nyc_taxi.md @@ -1,3 +1,4 @@ + # 纽约市出租车数据 {#niu-yue-shi-chu-zu-che-shu-ju} 纽约市出租车数据有以下两个方式获取: @@ -259,7 +260,7 @@ FROM trips ``` 这需要3030秒,速度约为每秒428,000行。 -要加快速度,可以使用`Log`引擎替换’MergeTree\`引擎来创建表。 在这种情况下,下载速度超过200秒。 +要加快速度,可以使用`Log`引擎替换'MergeTree\`引擎来创建表。 在这种情况下,下载速度超过200秒。 这个表需要使用126GB的磁盘空间。 @@ -285,7 +286,7 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "select count(*) from datasets.trips_mergetree" ``` -!!! info "Info" +!!! info "信息" 如果要运行下面的SQL查询,必须使用完整的表名, `datasets.trips_mergetree`。 @@ -297,7 +298,7 @@ Q1: SELECT cab_type, count(*) FROM trips_mergetree GROUP BY cab_type ``` -0.490 seconds. +0.490秒 Q2: @@ -305,7 +306,7 @@ Q2: SELECT passenger_count, avg(total_amount) FROM trips_mergetree GROUP BY passenger_count ``` -1.224 seconds. +1.224秒 Q3: @@ -313,7 +314,7 @@ Q3: SELECT passenger_count, toYear(pickup_date) AS year, count(*) FROM trips_mergetree GROUP BY passenger_count, year ``` -2.104 seconds. +2.104秒 Q4: @@ -324,11 +325,11 @@ GROUP BY passenger_count, year, distance ORDER BY year, count(*) DESC ``` -3.593 seconds. +3.593秒 我们使用的是如下配置的服务器: -Two Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, 16 physical kernels total,128 GiB RAM,8x6 TB HD on hardware RAID-5 +两个英特尔(R)至强(R)CPU E5-2650v2@2.60GHz,总共有16个物理内核,128GiB RAM,硬件RAID-5上的8X6TB HD 执行时间是取三次运行中最好的值,但是从第二次查询开始,查询就讲从文件系统的缓存中读取数据。同时在每次读取和处理后不在进行缓存。 @@ -356,29 +357,29 @@ INSERT INTO trips_mergetree_x3 SELECT * FROM trips_mergetree 在三台服务器集群中运行的结果: -Q1: 0.212 seconds. -Q2: 0.438 seconds. -Q3: 0.733 seconds. -Q4: 1.241 seconds. +Q1:0.212秒. +Q2:0.438秒。 +Q3:0.733秒。 +Q4:1.241秒. 不出意料,查询是线性扩展的。 我们同时在140台服务器的集群中运行的结果: -Q1: 0.028 sec. -Q2: 0.043 sec. -Q3: 0.051 sec. -Q4: 0.072 sec. +Q1:0.028秒。 +Q2:0.043秒。 +Q3:0.051秒。 +Q4:0.072秒。 在这种情况下,查询处理时间首先由网络延迟确定。 我们使用位于芬兰的Yandex数据中心中的客户端去位于俄罗斯的集群上运行查询,这增加了大约20毫秒的延迟。 ## 总结 {#zong-jie} -| servers | Q1 | Q2 | Q3 | Q4 | -|---------|-------|-------|-------|-------| -| 1 | 0.490 | 1.224 | 2.104 | 3.593 | -| 3 | 0.212 | 0.438 | 0.733 | 1.241 | -| 140 | 0.028 | 0.043 | 0.051 | 0.072 | +| 服务器 | Q1 | Q2 | Q3 | Q4 | +|--------|-------|-------|-------|-------| +| 1 | 0.490 | 1.224 | 2.104 | 3.593 | +| 3 | 0.212 | 0.438 | 0.733 | 1.241 | +| 140 | 0.028 | 0.043 | 0.051 | 0.072 | -[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/nyc_taxi/) +[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets/nyc_taxi/) diff --git a/docs/zh/getting_started/example_datasets/ontime.md b/docs/zh/getting_started/example_datasets/ontime.md index 6db294b12fb..31684129bb7 100644 --- a/docs/zh/getting_started/example_datasets/ontime.md +++ b/docs/zh/getting_started/example_datasets/ontime.md @@ -1,3 +1,4 @@ + # 航班飞行数据 {#hang-ban-fei-xing-shu-ju} 航班飞行数据有以下两个方式获取: @@ -156,7 +157,7 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "select count(*) from datasets.ontime" ``` -!!! info "Info" +!!! info "信息" 如果要运行下面的SQL查询,必须使用完整的表名, `datasets.ontime`。 @@ -356,7 +357,7 @@ ORDER by rate DESC LIMIT 1000; ``` -Bonus: +奖金: ``` sql SELECT avg(cnt) diff --git a/docs/zh/getting_started/example_datasets/star_schema.md b/docs/zh/getting_started/example_datasets/star_schema.md index 4680fe652b2..4fed13923ff 100644 --- a/docs/zh/getting_started/example_datasets/star_schema.md +++ b/docs/zh/getting_started/example_datasets/star_schema.md @@ -1,4 +1,5 @@ -# Star Schema Benchmark {#star-schema-benchmark} + +# 星型架构基准测试 {#star-schema-benchmark} 编译 dbgen: @@ -110,7 +111,7 @@ FROM lineorder l ALTER TABLE lineorder_flat DROP COLUMN C_CUSTKEY, DROP COLUMN S_SUPPKEY, DROP COLUMN P_PARTKEY; ``` -Running the queries: +运行查询: Q1.1 @@ -190,4 +191,4 @@ Q4.3 SELECT toYear(LO_ORDERDATE) AS year, S_CITY, P_BRAND, sum(LO_REVENUE - LO_SUPPLYCOST) AS profit FROM lineorder_flat WHERE S_NATION = 'UNITED STATES' AND (year = 1997 OR year = 1998) AND P_CATEGORY = 'MFGR#14' GROUP BY year, S_CITY, P_BRAND ORDER BY year, S_CITY, P_BRAND; ``` -[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/star_schema/) +[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets/star_schema/) diff --git a/docs/zh/getting_started/example_datasets/wikistat.md b/docs/zh/getting_started/example_datasets/wikistat.md index aacbdbf37f0..c2681a912e2 100644 --- a/docs/zh/getting_started/example_datasets/wikistat.md +++ b/docs/zh/getting_started/example_datasets/wikistat.md @@ -1,3 +1,4 @@ + # 维基访问数据 {#wei-ji-fang-wen-shu-ju} 参考: http://dumps.wikimedia.org/other/pagecounts-raw/ @@ -25,4 +26,4 @@ $ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/page $ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done ``` -[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/wikistat/) +[原始文章](https://clickhouse.tech/docs/en/getting_started/example_datasets/wikistat/) diff --git a/docs/zh/getting_started/index.md b/docs/zh/getting_started/index.md index d6830aa6c84..35ae08bde7b 100644 --- a/docs/zh/getting_started/index.md +++ b/docs/zh/getting_started/index.md @@ -1,3 +1,4 @@ + # 入门 {#ru-men} 如果您是ClickHouse的新手,并希望亲身体验它的性能,首先您需要通过 [安装过程](install.md). diff --git a/docs/zh/getting_started/install.md b/docs/zh/getting_started/install.md index 55aeff892a2..6a9aae286ad 100644 --- a/docs/zh/getting_started/install.md +++ b/docs/zh/getting_started/install.md @@ -1,3 +1,4 @@ + ## 系统要求 {#xi-tong-yao-qiu} ClickHouse可以在任何具有x86\_64,AArch64或PowerPC64LE CPU架构的Linux,FreeBSD或Mac OS X上运行。 @@ -21,7 +22,7 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not deb http://repo.yandex.ru/clickhouse/deb/stable/ main/ ``` -如果你想使用最新的测试版本,请使用’testing’替换’stable’。 +如果你想使用最新的测试版本,请使用'testing'替换'stable'。 然后运行: @@ -34,8 +35,8 @@ sudo apt-get install clickhouse-client clickhouse-server 你也可以从这里手动下载安装包:https://repo.yandex.ru/clickhouse/deb/stable/main/。 -ClickHouse包含访问控制配置,它们位于`users.xml`文件中(与’config.xml’同目录)。 -默认情况下,允许从任何地方使用默认的‘default’用户无密码的访问ClickHouse。参考‘user/default/networks’。 +ClickHouse包含访问控制配置,它们位于`users.xml`文件中(与'config.xml'同目录)。 +默认情况下,允许从任何地方使用默认的'default'用户无密码的访问ClickHouse。参考'user/default/networks'。 有关更多信息,请参考«Configuration files»部分。 ### 来自RPM包 {#from-rpm-packages} @@ -62,7 +63,7 @@ sudo yum install clickhouse-server clickhouse-client ### 来自Docker {#from-docker-image} -要在Docker中运行ClickHouse,请遵循[Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。那些图像使用官方的`deb`包。 +要在Docker中运行ClickHouse,请遵循[码头工人中心](https://hub.docker.com/r/yandex/clickhouse-server/)上的指南。那些图像使用官方的`deb`包。 ### 使用源码安装 {#from-sources} @@ -84,7 +85,7 @@ Server: programs/clickhouse-server ``` (它们可以在server config中配置。) -为需要的用户运行‘chown’ +为需要的用户运行'chown' 日志的路径可以在server config (src/programs/server/config.xml)中配置。 @@ -107,7 +108,7 @@ clickhouse-server --config-file=/etc/clickhouse-server/config.xml ``` 在这种情况下,日志将被打印到控制台中,这在开发过程中很方便。 -如果配置文件在当前目录中,你可以不指定‘–config-file’参数。它默认使用‘./config.xml’。 +如果配置文件在当前目录中,你可以不指定'–config-file'参数。它默认使用'./config.xml'。 你可以使用命令行客户端连接到服务: @@ -115,7 +116,7 @@ clickhouse-server --config-file=/etc/clickhouse-server/config.xml clickhouse-client ``` -默认情况下它使用‘default’用户无密码的与localhost:9000服务建立连接。 +默认情况下它使用'default'用户无密码的与localhost:9000服务建立连接。 客户端也可以用于连接远程服务,例如: ``` bash @@ -149,4 +150,4 @@ SELECT 1 为了继续进行实验,你可以尝试下载测试数据集。 -[Original article](https://clickhouse.tech/docs/en/getting_started/install/) +[原始文章](https://clickhouse.tech/docs/en/getting_started/install/) diff --git a/docs/zh/getting_started/playground.md b/docs/zh/getting_started/playground.md index 186cb9030c2..a09d615ba21 100644 --- a/docs/zh/getting_started/playground.md +++ b/docs/zh/getting_started/playground.md @@ -1,44 +1,47 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 14 +toc_title: "\u266A\u64CD\u573A\u266A" --- -# ClickHouse Playground {#clickhouse-playground} +# ツ环板Playgroundョツ嘉ッ {#clickhouse-playground} -[ClickHouse Playground](https://play.clickhouse.tech?file=welcome) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. -Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. +[ツ环板Playgroundョツ嘉ッ](https://play.clickhouse.tech?file=welcome) 允许人们通过即时运行查询来尝试ClickHouse,而无需设置他们的服务器或集群。 +Playground中提供了几个示例数据集以及显示ClickHouse要素的示例查询。 -The queries are executed as a read-only user. It implies some limitations: +查询以只读用户身份执行。 这意味着一些局限性: -- DDL queries are not allowed -- INSERT queries are not allowed +- 不允许DDL查询 +- 不允许插入查询 -The following settings are also enforced: +还强制执行以下设置: - [`max_result_bytes=10485760`](../operations/settings/query_complexity/#max-result-bytes) - [`max_result_rows=2000`](../operations/settings/query_complexity/#setting-max_result_rows) - [`result_overflow_mode=break`](../operations/settings/query_complexity/#result-overflow-mode) - [`max_execution_time=60000`](../operations/settings/query_complexity/#max-execution-time) -ClickHouse Playground gives the experience of m2.small -[Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) -instance hosted in [Yandex.Cloud](https://cloud.yandex.com/). -More information about [cloud providers](../commercial/cloud.md). +ClickHouse游乐场给m2的经验。小 +[管理服务ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) +实例托管在 [Yandex云](https://cloud.yandex.com/). +更多信息 [云提供商](../commercial/cloud.md). -ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md). -The Playground backend is just a ClickHouse cluster without any additional server-side application. -ClickHouse HTTPS endpoint is also available as a part of the Playground. +ClickHouse游乐场网的界面使请求通过ClickHouse [HTTP API](../interfaces/http.md). +Playground后端只是一个ClickHouse集群,没有任何额外的服务器端应用程序。 +隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄.陇.貌路.隆拢脳枚脢虏 -You can make queries to playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. -More information about software products that support ClickHouse is available [here](../interfaces/index.md). +您可以使用任何HTTP客户端向playground进行查询,例如 [卷曲的](https://curl.haxx.se) 或 [wget](https://www.gnu.org/software/wget/),或使用以下方式建立连接 [JDBC](../interfaces/jdbc.md) 或 [ODBC](../interfaces/odbc.md) 司机 +有关支持ClickHouse的软件产品的更多信息,请访问 [这里](../interfaces/index.md). -| Parameter | Value | -|:----------|:--------------------------------------| -| Endpoint | https://play-api.clickhouse.tech:8443 | -| User | `playground` | -| Password | `clickhouse` | +| 参数 | 价值 | +|:-----|:--------------------------------------| +| 端点 | https://play-api.克莱克豪斯技术:8443 | +| 用户 | `playground` | +| 密码 | `clickhouse` | -Note that this endpoint requires a secure connection. +请注意,此端点需要安全连接。 -Example: +示例: ``` bash curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse!';&user=playground&password=clickhouse&database=datasets" diff --git a/docs/zh/getting_started/tutorial.md b/docs/zh/getting_started/tutorial.md index 4f23dbe756d..4fc2fbdf290 100644 --- a/docs/zh/getting_started/tutorial.md +++ b/docs/zh/getting_started/tutorial.md @@ -1,18 +1,21 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 12 +toc_title: "\u6559\u7A0B" --- -# ClickHouse Tutorial {#clickhouse-tutorial} +# 点击教程 {#clickhouse-tutorial} -## What to Expect from This Tutorial? {#what-to-expect-from-this-tutorial} +## 从本教程中可以期待什么? {#what-to-expect-from-this-tutorial} -By going through this tutorial, you’ll learn how to set up a simple ClickHouse cluster. It’ll be small, but fault-tolerant and scalable. Then we will use one of the example datasets to fill it with data and execute some demo queries. +通过本教程,您将学习如何设置一个简单的ClickHouse集群。 它会很小,但容错和可扩展。 然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。 -## Single Node Setup {#single-node-setup} +## 单节点设置 {#single-node-setup} -To postpone the complexities of a distributed environment, we’ll start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](index.md#install-from-deb-packages) or [rpm](index.md#from-rpm-packages) packages, but there are [alternatives](index.md#from-docker-image) for the operating systems that do no support them. +为了推迟分布式环境的复杂性,我们将首先在单个服务器或虚拟机上部署ClickHouse。 ClickHouse通常是从安装 [黛布](index.md#install-from-deb-packages) 或 [rpm](index.md#from-rpm-packages) 包,但也有 [替代办法](index.md#from-docker-image) 对于不支持它们的操作系统。 -For example, you have chosen `deb` packages and executed: +例如,您选择了 `deb` 包和执行: ``` bash sudo apt-get install dirmngr @@ -24,48 +27,48 @@ sudo apt-get update sudo apt-get install -y clickhouse-server clickhouse-client ``` -What do we have in the packages that got installed: +我们在安装的软件包中有什么: -- `clickhouse-client` package contains [clickhouse-client](../interfaces/cli.md) application, interactive ClickHouse console client. -- `clickhouse-common` package contains a ClickHouse executable file. -- `clickhouse-server` package contains configuration files to run ClickHouse as a server. +- `clickhouse-client` 包包含 [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 应用程序,交互式ClickHouse控制台客户端。 +- `clickhouse-common` 包包含一个ClickHouse可执行文件。 +- `clickhouse-server` 包包含要作为服务器运行ClickHouse的配置文件。 -Server config files are located in `/etc/clickhouse-server/`. Before going further, please notice the `` element in `config.xml`. Path determines the location for data storage, so it should be located on volume with large disk capacity; the default value is `/var/lib/clickhouse/`. If you want to adjust the configuration, it’s not handy to directly edit `config.xml` file, considering it might get rewritten on future package updates. The recommended way to override the config elements is to create [files in config.d directory](../operations/configuration_files.md) which serve as “patches” to config.xml. +服务器配置文件位于 `/etc/clickhouse-server/`. 在进一步讨论之前,请注意 `` 元素in `config.xml`. Path确定数据存储的位置,因此应该位于磁盘容量较大的卷上;默认值为 `/var/lib/clickhouse/`. 如果你想调整配置,直接编辑并不方便 `config.xml` 文件,考虑到它可能会在未来的软件包更新中被重写。 复盖配置元素的推荐方法是创建 [在配置文件。d目录](../operations/configuration_files.md) 它作为 “patches” 要配置。xml -As you might have noticed, `clickhouse-server` is not launched automatically after package installation. It won’t be automatically restarted after updates, either. The way you start the server depends on your init system, usually, it is: +你可能已经注意到了, `clickhouse-server` 安装包后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务器的方式取决于您的init系统,通常情况下,它是: ``` bash sudo service clickhouse-server start ``` -or +或 ``` bash sudo /etc/init.d/clickhouse-server start ``` -The default location for server logs is `/var/log/clickhouse-server/`. The server is ready to handle client connections once it logs the `Ready for connections` message. +服务器日志的默认位置是 `/var/log/clickhouse-server/`. 服务器已准备好处理客户端连接一旦它记录 `Ready for connections` 消息 -Once the `clickhouse-server` is up and running, we can use `clickhouse-client` to connect to the server and run some test queries like `SELECT "Hello, world!";`. +一旦 `clickhouse-server` 正在运行我们可以利用 `clickhouse-client` 连接到服务器并运行一些测试查询,如 `SELECT "Hello, world!";`.
-Quick tips for clickhouse-client -Interactive mode: +Clickhouse-客户端的快速提示 +交互模式: ``` bash clickhouse-client clickhouse-client --host=... --port=... --user=... --password=... ``` -Enable multiline queries: +启用多行查询: ``` bash clickhouse-client -m clickhouse-client --multiline ``` -Run queries in batch-mode: +以批处理模式运行查询: ``` bash clickhouse-client --query='SELECT 1' @@ -73,7 +76,7 @@ echo 'SELECT 1' | clickhouse-client clickhouse-client <<< 'SELECT 1' ``` -Insert data from a file in specified format: +从指定格式的文件中插入数据: ``` bash clickhouse-client --query='INSERT INTO table VALUES' < data.txt @@ -82,39 +85,39 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
-## Import Sample Dataset {#import-sample-dataset} +## 导入示例数据集 {#import-sample-dataset} -Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example_datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. +现在是时候用一些示例数据填充我们的ClickHouse服务器。 在本教程中,我们将使用Yandex的匿名数据。Metrica,在成为开源之前以生产方式运行ClickHouse的第一个服务(更多关于这一点 [历史科](../introduction/history.md)). 有 [多种导入Yandex的方式。梅里卡数据集](example_datasets/metrica.md),为了本教程,我们将使用最现实的一个。 -### Download and Extract Table Data {#download-and-extract-table-data} +### 下载并提取表数据 {#download-and-extract-table-data} ``` bash curl https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv curl https://clickhouse-datasets.s3.yandex.net/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv ``` -The extracted files are about 10GB in size. +提取的文件大小约为10GB。 -### Create Tables {#create-tables} +### 创建表 {#create-tables} -As in most databases management systems, ClickHouse logically groups tables into “databases”. There’s a `default` database, but we’ll create a new one named `tutorial`: +与大多数数据库管理系统一样,ClickHouse在逻辑上将表分组为 “databases”. 有一个 `default` 数据库,但我们将创建一个名为新的 `tutorial`: ``` bash clickhouse-client --query "CREATE DATABASE IF NOT EXISTS tutorial" ``` -Syntax for creating tables is way more complicated compared to databases (see [reference](../query_language/create.md). In general `CREATE TABLE` statement has to specify three key things: +与数据库相比,创建表的语法要复杂得多(请参阅 [参考资料](../sql_reference/statements/create.md). 一般 `CREATE TABLE` 声明必须指定三个关键的事情: -1. Name of table to create. -2. Table schema, i.e. list of columns and their [data types](../data_types/index.md). -3. [Table engine](../operations/table_engines/index.md) and it’s settings, which determines all the details on how queries to this table will be physically executed. +1. 要创建的表的名称。 +2. Table schema, i.e. list of columns and their [数据类型](../sql_reference/data_types/index.md). +3. [表引擎](../engines/table_engines/index.md) 它是settings,它决定了如何物理执行对此表的查询的所有细节。 -Yandex.Metrica is a web analytics service, and sample dataset doesn’t cover its full functionality, so there are only two tables to create: +YandexMetrica是一个网络分析服务,样本数据集不包括其全部功能,因此只有两个表可以创建: -- `hits` is a table with each action done by all users on all websites covered by the service. -- `visits` is a table that contains pre-built sessions instead of individual actions. +- `hits` 是一个表格,其中包含所有用户在服务所涵盖的所有网站上完成的每个操作。 +- `visits` 是一个包含预先构建的会话而不是单个操作的表。 -Let’s see and execute the real create table queries for these tables: +让我们看看并执行这些表的实际创建表查询: ``` sql CREATE TABLE tutorial.hits_v1 @@ -457,22 +460,22 @@ SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192 ``` -You can execute those queries using the interactive mode of `clickhouse-client` (just launch it in a terminal without specifying a query in advance) or try some [alternative interface](../interfaces/index.md) if you want. +您可以使用以下交互模式执行这些查询 `clickhouse-client` (只需在终端中启动它,而不需要提前指定查询)或尝试一些 [替代接口](../interfaces/index.md) 如果你愿意的话 -As we can see, `hits_v1` uses the [basic MergeTree engine](../operations/table_engines/mergetree.md), while the `visits_v1` uses the [Collapsing](../operations/table_engines/collapsingmergetree.md) variant. +正如我们所看到的, `hits_v1` 使用 [基本MergeTree引擎](../engines/table_engines/mergetree_family/mergetree.md),而 `visits_v1` 使用 [崩溃](../engines/table_engines/mergetree_family/collapsingmergetree.md) 变体。 -### Import Data {#import-data} +### 导入数据 {#import-data} -Data import to ClickHouse is done via [INSERT INTO](../query_language/insert_into.md) query like in many other SQL databases. However, data is usually provided in one of the [supported serialization formats](../interfaces/formats.md) instead of `VALUES` clause (which is also supported). +数据导入到ClickHouse是通过以下方式完成的 [INSERT INTO](../sql_reference/statements/insert_into.md) 查询像许多其他SQL数据库。 然而,数据通常是在一个提供 [支持的序列化格式](../interfaces/formats.md) 而不是 `VALUES` 子句(也支持)。 -The files we downloaded earlier are in tab-separated format, so here’s how to import them via console client: +我们之前下载的文件是以制表符分隔的格式,所以这里是如何通过控制台客户端导入它们: ``` bash clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert_block_size=100000 < hits_v1.tsv clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv ``` -ClickHouse has a lot of [settings to tune](../operations/settings/index.md) and one way to specify them in console client is via arguments, as we can see with `--max_insert_block_size`. The easiest way to figure out what settings are available, what do they mean and what the defaults are is to query the `system.settings` table: +ClickHouse有很多 [要调整的设置](../operations/settings/index.md) 在控制台客户端中指定它们的一种方法是通过参数,我们可以看到 `--max_insert_block_size`. 找出可用的设置,它们意味着什么以及默认值的最简单方法是查询 `system.settings` 表: ``` sql SELECT name, value, changed, description @@ -483,23 +486,23 @@ FORMAT TSV max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion." ``` -Optionally you can [OPTIMIZE](../query_language/misc/#misc_operations-optimize) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later: +您也可以 [OPTIMIZE](../sql_reference/statements/misc.md#misc_operations-optimize) 导入后的表。 使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储(或至少检查是否有意义)。 这些查询强制表引擎立即进行存储优化,而不是稍后进行一段时间: ``` bash clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL" clickhouse-client --query "OPTIMIZE TABLE tutorial.visits_v1 FINAL" ``` -These queries start an I/O and CPU intensive operation, so if the table consistently receives new data, it’s better to leave it alone and let merges run in the background. +这些查询开始一个I/O和CPU密集型操作,所以如果表一直接收到新数据,最好不要管它,让合并在后台运行。 -Now we can check if the table import was successful: +现在我们可以检查表导入是否成功: ``` bash clickhouse-client --query "SELECT COUNT(*) FROM tutorial.hits_v1" clickhouse-client --query "SELECT COUNT(*) FROM tutorial.visits_v1" ``` -## Example Queries {#example-queries} +## 查询示例 {#example-queries} ``` sql SELECT @@ -521,18 +524,18 @@ FROM tutorial.visits_v1 WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru') ``` -## Cluster Deployment {#cluster-deployment} +## 集群部署 {#cluster-deployment} -ClickHouse cluster is a homogenous cluster. Steps to set up: +ClickHouse集群是一个同质集群。 设置步骤: -1. Install ClickHouse server on all machines of the cluster -2. Set up cluster configs in configuration files -3. Create local tables on each instance -4. Create a [Distributed table](../operations/table_engines/distributed.md) +1. 在群集的所有计算机上安装ClickHouse服务器 +2. 在配置文件中设置群集配置 +3. 在每个实例上创建本地表 +4. 创建一个 [分布式表](../engines/table_engines/special/distributed.md) -[Distributed table](../operations/table_engines/distributed.md) is actually a kind of “view” to local tables of ClickHouse cluster. SELECT query from a distributed table executes using resources of all cluster’s shards. You may specify configs for multiple clusters and create multiple distributed tables providing views to different clusters. +[分布式表](../engines/table_engines/special/distributed.md) 实际上是一种 “view” 到ClickHouse集群的本地表。 从分布式表中选择查询使用集群所有分片的资源执行。 您可以为多个集群指定configs,并创建多个分布式表,为不同的集群提供视图。 -Example config for a cluster with three shards, one replica each: +具有三个分片的集群的示例配置,每个分片一个副本: ``` xml @@ -559,37 +562,37 @@ Example config for a cluster with three shards, one replica each: ``` -For further demonstration, let’s create a new local table with the same `CREATE TABLE` query that we used for `hits_v1`, but different table name: +为了进一步演示,让我们创建一个新的本地表 `CREATE TABLE` 我们用于查询 `hits_v1`,但不同的表名: ``` sql CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ... ``` -Creating a distributed table providing a view into local tables of the cluster: +创建提供集群本地表视图的分布式表: ``` sql CREATE TABLE tutorial.hits_all AS tutorial.hits_local ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); ``` -A common practice is to create similar Distributed tables on all machines of the cluster. It allows running distributed queries on any machine of the cluster. Also there’s an alternative option to create temporary distributed table for a given SELECT query using [remote](../query_language/table_functions/remote.md) table function. +常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表 [远程](../sql_reference/table_functions/remote.md) 表功能。 -Let’s run [INSERT SELECT](../query_language/insert_into.md) into the Distributed table to spread the table to multiple servers. +我们走吧 [INSERT SELECT](../sql_reference/statements/insert_into.md) 将该表传播到多个服务器。 ``` sql INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -!!! warning "Notice" - This approach is not suitable for the sharding of large tables. There’s a separate tool [clickhouse-copier](../operations/utils/clickhouse-copier.md) that can re-shard arbitrary large tables. +!!! warning "碌莽禄Notice:" + 这种方法不适合大型表的分片。 有一个单独的工具 [ツ环板-ョツ嘉ッツ偲](../operations/utilities/clickhouse-copier.md) 这可以重新分片任意大表。 -As you could expect, computationally heavy queries run N times faster if they utilize 3 servers instead of one. +正如您所期望的那样,如果计算量大的查询使用3台服务器而不是一个,则运行速度快N倍。 -In this case, we have used a cluster with 3 shards, and each contains a single replica. +在这种情况下,我们使用了具有3个分片的集群,每个分片都包含一个副本。 -To provide resilience in a production environment, we recommend that each shard should contain 2-3 replicas spread between multiple availability zones or datacenters (or at least racks). Note that ClickHouse supports an unlimited number of replicas. +为了在生产环境中提供弹性,我们建议每个分片应包含分布在多个可用区或数据中心(或至少机架)之间的2-3个副本。 请注意,ClickHouse支持无限数量的副本。 -Example config for a cluster of one shard containing three replicas: +包含三个副本的一个分片集群的示例配置: ``` xml @@ -613,12 +616,12 @@ Example config for a cluster of one shard containing three replicas: ``` -To enable native replication [ZooKeeper](http://zookeeper.apache.org/) is required. ClickHouse takes care of data consistency on all replicas and runs restore procedure after failure automatically. It’s recommended to deploy the ZooKeeper cluster on separate servers (where no other processes including ClickHouse are running). +启用本机复制 [动物园管理员](http://zookeeper.apache.org/) 是必需的。 ClickHouse负责所有副本的数据一致性,并在失败后自动运行恢复过程。 建议将ZooKeeper集群部署在单独的服务器上(其中没有其他进程,包括ClickHouse正在运行)。 -!!! note "Note" - ZooKeeper is not a strict requirement: in some simple cases, you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case, ClickHouse won’t be able to guarantee data consistency on all replicas. Thus it becomes the responsibility of your application. +!!! note "注" + ZooKeeper不是一个严格的requirement:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是 **不** 建议,在这种情况下,ClickHouse将无法保证所有副本上的数据一致性。 因此,它成为您的应用程序的责任。 -ZooKeeper locations are specified in the configuration file: +ZooKeeper位置在配置文件中指定: ``` xml @@ -637,7 +640,7 @@ ZooKeeper locations are specified in the configuration file: ``` -Also, we need to set macros for identifying each shard and replica which are used on table creation: +此外,我们需要设置宏来识别每个用于创建表的分片和副本: ``` xml @@ -646,7 +649,7 @@ Also, we need to set macros for identifying each shard and replica which are use ``` -If there are no replicas at the moment on replicated table creation, a new first replica is instantiated. If there are already live replicas, the new replica clones data from existing ones. You have an option to create all replicated tables first, and then insert data to it. Another option is to create some replicas and add the others after or during data insertion. +如果在创建复制表时没有副本,则会实例化新的第一个副本。 如果已有实时副本,则新副本将克隆现有副本中的数据。 您可以选择首先创建所有复制的表,然后向其中插入数据。 另一种选择是创建一些副本,并在数据插入之后或期间添加其他副本。 ``` sql CREATE TABLE tutorial.hits_replica (...) @@ -657,12 +660,12 @@ ENGINE = ReplcatedMergeTree( ... ``` -Here we use [ReplicatedMergeTree](../operations/table_engines/replication.md) table engine. In parameters we specify ZooKeeper path containing shard and replica identifiers. +在这里,我们使用 [ReplicatedMergeTree](../engines/table_engines/mergetree_family/replication.md) 表引擎。 在参数中,我们指定包含分片和副本标识符的ZooKeeper路径。 ``` sql INSERT INTO tutorial.hits_replica SELECT * FROM tutorial.hits_local; ``` -Replication operates in multi-master mode. Data can be loaded into any replica, and the system then syncs it with other instances automatically. Replication is asynchronous so at a given moment, not all replicas may contain recently inserted data. At least one replica should be up to allow data ingestion. Others will sync up data and repair consistency once they will become active again. Note that this approach allows for the low possibility of a loss of recently inserted data. +复制在多主机模式下运行。 数据可以加载到任何副本中,然后系统会自动将其与其他实例同步。 复制是异步的,因此在给定时刻,并非所有副本都可能包含最近插入的数据。 至少应有一个副本允许数据摄取。 其他人将同步数据和修复一致性,一旦他们将再次变得活跃。 请注意,这种方法允许最近插入的数据丢失的可能性很低。 -[Original article](https://clickhouse.tech/docs/en/getting_started/tutorial/) +[原始文章](https://clickhouse.tech/docs/en/getting_started/tutorial/) diff --git a/docs/zh/guides/apply_catboost_model.md b/docs/zh/guides/apply_catboost_model.md index 62eb386147f..4ac7d926961 100644 --- a/docs/zh/guides/apply_catboost_model.md +++ b/docs/zh/guides/apply_catboost_model.md @@ -1,40 +1,43 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 41 +toc_title: "\u5E94\u7528CatBoost\u6A21\u578B" --- -# Applying a Catboost Model in ClickHouse {#applying-catboost-model-in-clickhouse} +# 在ClickHouse中应用Catboost模型 {#applying-catboost-model-in-clickhouse} -[CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at [Yandex](https://yandex.com/company/) for machine learning. +[CatBoost](https://catboost.ai) 是一个自由和开源的梯度提升库开发 [Yandex](https://yandex.com/company/) 用于机器学习。 -With this instruction, you will learn to apply pre-trained models in ClickHouse by running model inference from SQL. +通过此指令,您将学习如何通过从SQL运行模型推理在ClickHouse中应用预先训练好的模型。 -To apply a CatBoost model in ClickHouse: +在ClickHouse中应用CatBoost模型: -1. [Create a Table](#create-table). -2. [Insert the Data to the Table](#insert-data-to-table). -3. [Integrate CatBoost into ClickHouse](#integrate-catboost-into-clickhouse) (Optional step). -4. [Run the Model Inference from SQL](#run-model-inference). +1. [创建表](#create-table). +2. [将数据插入到表中](#insert-data-to-table). +3. [碌莽禄into拢Integrate010-68520682\](#integrate-catboost-into-clickhouse) (可选步骤)。 +4. [从SQL运行模型推理](#run-model-inference). -For more information about training CatBoost models, see [Training and applying models](https://catboost.ai/docs/features/training.html#training). +有关训练CatBoost模型的详细信息,请参阅 [培训和应用模型](https://catboost.ai/docs/features/training.html#training). -## Prerequisites {#prerequisites} +## 先决条件 {#prerequisites} -If you don’t have the [Docker](https://docs.docker.com/install/) yet, install it. +如果你没有 [Docker](https://docs.docker.com/install/) 然而,安装它。 -!!! note "Note" - [Docker](https://www.docker.com) is a software platform that allows you to create containers that isolate a CatBoost and ClickHouse installation from the rest of the system. +!!! note "注" + [Docker](https://www.docker.com) 是一个软件平台,允许您创建容器,将CatBoost和ClickHouse安装与系统的其余部分隔离。 -Before applying a CatBoost model: +在应用CatBoost模型之前: -**1.** Pull the [Docker image](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) from the registry: +**1.** 拉 [码头窗口映像](https://hub.docker.com/r/yandex/tutorial-catboost-clickhouse) 从注册表: ``` bash $ docker pull yandex/tutorial-catboost-clickhouse ``` -This Docker image contains everything you need to run CatBoost and ClickHouse: code, runtime, libraries, environment variables, and configuration files. +此Docker映像包含运行CatBoost和ClickHouse所需的所有内容:代码、运行时、库、环境变量和配置文件。 -**2.** Make sure the Docker image has been successfully pulled: +**2.** 确保已成功拉取Docker映像: ``` bash $ docker image ls @@ -42,26 +45,26 @@ REPOSITORY TAG IMAGE ID CR yandex/tutorial-catboost-clickhouse latest 622e4d17945b 22 hours ago 1.37GB ``` -**3.** Start a Docker container based on this image: +**3.** 基于此映像启动一个Docker容器: ``` bash $ docker run -it -p 8888:8888 yandex/tutorial-catboost-clickhouse ``` -## 1. Create a Table {#create-table} +## 1. 创建表 {#create-table} -To create a ClickHouse table for the training sample: +为训练样本创建ClickHouse表: -**1.** Start ClickHouse console client in the interactive mode: +**1.** 在交互模式下启动ClickHouse控制台客户端: ``` bash $ clickhouse client ``` -!!! note "Note" - The ClickHouse server is already running inside the Docker container. +!!! note "注" + ClickHouse服务器已经在Docker容器内运行。 -**2.** Create the table using the command: +**2.** 使用以下命令创建表: ``` sql :) CREATE TABLE amazon_train @@ -81,29 +84,29 @@ $ clickhouse client ENGINE = MergeTree ORDER BY date ``` -**3.** Exit from ClickHouse console client: +**3.** 从ClickHouse控制台客户端退出: ``` sql :) exit ``` -## 2. Insert the Data to the Table {#insert-data-to-table} +## 2. 将数据插入到表中 {#insert-data-to-table} -To insert the data: +插入数据: -**1.** Run the following command: +**1.** 运行以下命令: ``` bash $ clickhouse client --host 127.0.0.1 --query 'INSERT INTO amazon_train FORMAT CSVWithNames' < ~/amazon/train.csv ``` -**2.** Start ClickHouse console client in the interactive mode: +**2.** 在交互模式下启动ClickHouse控制台客户端: ``` bash $ clickhouse client ``` -**3.** Make sure the data has been uploaded: +**3.** 确保数据已上传: ``` sql :) SELECT count() FROM amazon_train @@ -113,27 +116,27 @@ FROM amazon_train +-count()-+ | 65538 | -+---------+ ++-------+ ``` -## 3. Integrate CatBoost into ClickHouse {#integrate-catboost-into-clickhouse} +## 3. 碌莽禄into拢Integrate010-68520682\ {#integrate-catboost-into-clickhouse} -!!! note "Note" - **Optional step.** The Docker image contains everything you need to run CatBoost and ClickHouse. +!!! note "注" + **可选步骤。** Docker映像包含运行CatBoost和ClickHouse所需的所有内容。 -To integrate CatBoost into ClickHouse: +碌莽禄to拢integrate010-68520682\: -**1.** Build the evaluation library. +**1.** 构建评估库。 -The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.` library. For more information about how to build the library, see [CatBoost documentation](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html). +评估CatBoost模型的最快方法是编译 `libcatboostmodel.` 图书馆. 有关如何构建库的详细信息,请参阅 [CatBoost文件](https://catboost.ai/docs/concepts/c-plus-plus-api_dynamic-c-pluplus-wrapper.html). -**2.** Create a new directory anywhere and with any name, for example, `data` and put the created library in it. The Docker image already contains the library `data/libcatboostmodel.so`. +**2.** 例如,在任何地方和任何名称创建一个新目录, `data` 并将创建的库放入其中。 Docker映像已经包含了库 `data/libcatboostmodel.so`. -**3.** Create a new directory for config model anywhere and with any name, for example, `models`. +**3.** 例如,在任何地方和任何名称为config model创建一个新目录, `models`. -**4.** Create a model configuration file with any name, for example, `models/amazon_model.xml`. +**4.** 创建具有任意名称的模型配置文件,例如, `models/amazon_model.xml`. -**5.** Describe the model configuration: +**5.** 描述模型配置: ``` xml @@ -150,7 +153,7 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel. ``` -**6.** Add the path to CatBoost and the model configuration to the ClickHouse configuration: +**6.** 将CatBoost的路径和模型配置添加到ClickHouse配置: ``` xml @@ -158,11 +161,11 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel./home/catboost/models/*_model.xml ``` -## 4. Run the Model Inference from SQL {#run-model-inference} +## 4. 从SQL运行模型推理 {#run-model-inference} -For test model run the ClickHouse client `$ clickhouse client`. +对于测试模型,运行ClickHouse客户端 `$ clickhouse client`. -Let’s make sure that the model is working: +让我们确保模型正常工作: ``` sql :) SELECT @@ -181,10 +184,10 @@ FROM amazon_train LIMIT 10 ``` -!!! note "Note" - Function [modelEvaluate](../query_language/functions/other_functions.md#function-modelevaluate) returns tuple with per-class raw predictions for multiclass models. +!!! note "注" + 功能 [模型值](../sql_reference/functions/other_functions.md#function-modelevaluate) 返回带有多类模型的每类原始预测的元组。 -Let’s predict the probability: +让我们预测一下: ``` sql :) SELECT @@ -204,10 +207,10 @@ FROM amazon_train LIMIT 10 ``` -!!! note "Note" - More info about [exp()](../query_language/functions/math_functions.md) function. +!!! note "注" + 更多信息 [exp()](../sql_reference/functions/math_functions.md) 功能。 -Let’s calculate LogLoss on the sample: +让我们计算样本的LogLoss: ``` sql :) SELECT -avg(tg * log(prob) + (1 - tg) * log(1 - prob)) AS logloss @@ -230,7 +233,7 @@ FROM ) ``` -!!! note "Note" - More info about [avg()](../query_language/agg_functions/reference.md#agg_function-avg) and [log()](../query_language/functions/math_functions.md) functions. +!!! note "注" + 更多信息 [avg()](../sql_reference/aggregate_functions/reference.md#agg_function-avg) 和 [日志()](../sql_reference/functions/math_functions.md) 功能。 -[Original article](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) +[原始文章](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) diff --git a/docs/zh/guides/index.md b/docs/zh/guides/index.md index c1968730961..00fe071434d 100644 --- a/docs/zh/guides/index.md +++ b/docs/zh/guides/index.md @@ -1,12 +1,16 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u6307\u5357" +toc_priority: 38 +toc_title: "\u6982\u8FF0" --- -# ClickHouse Guides {#clickhouse-guides} +# ClickHouse指南 {#clickhouse-guides} -List of detailed step-by-step instructions that help to solve various tasks using ClickHouse: +详细的一步一步的说明,帮助解决使用ClickHouse的各种任务列表: -- [Tutorial on simple cluster set-up](../getting_started/tutorial.md) -- [Applying a CatBoost model in ClickHouse](apply_catboost_model.md) +- [简单集群设置教程](../getting_started/tutorial.md) +- [在ClickHouse中应用CatBoost模型](apply_catboost_model.md) -[Original article](https://clickhouse.tech/docs/en/guides/) +[原始文章](https://clickhouse.tech/docs/en/guides/) diff --git a/docs/zh/index.md b/docs/zh/index.md index b10fafebe93..cb9ccf0420a 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -1,36 +1,41 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +--- + # 什么是ClickHouse? {#shi-yao-shi-clickhouse} ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)。 在传统的行式数据库系统中,数据按如下顺序存储: -| Row | WatchID | JavaEnable | Title | GoodEvent | EventTime | -|-----|-------------|------------|--------------------|-----------|---------------------| -| \#0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| \#1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| \#2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| \#N | … | … | … | … | … | +| 行 | 小心点 | JavaEnable | 标题 | GoodEvent | 活动时间 | +|-----|-------------|------------|------------|-----------|---------------------| +| \#0 | 89354350662 | 1 | 投资者关系 | 1 | 2016-05-18 05:19:20 | +| \#1 | 90329509958 | 0 | 联系我们 | 1 | 2016-05-18 08:10:20 | +| \#2 | 89953706054 | 1 | 任务 | 1 | 2016-05-18 07:38:00 | +| \#N | … | … | … | … | … | 处于同一行中的数据总是被物理的存储在一起。 常见的行式数据库系统有: MySQL、Postgres和MS SQL Server。 -{: .grey } +{: .灰色 } 在列式数据库系统中,数据按如下的顺序存储: -| Row: | \#0 | \#1 | \#2 | \#N | +| 行: | \#0 | \#1 | \#2 | \#N | |-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | +| 小心点: | 89354350662 | 90329509958 | 89953706054 | … | | JavaEnable: | 1 | 0 | 1 | … | -| Title: | Investor Relations | Contact us | Mission | … | +| 标题: | 投资者关系 | 联系我们 | 任务 | … | | GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | +| 活动时间: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | 该示例中只展示了数据在列式数据库中数据的排列顺序。 对于存储而言,列式数据库总是将同一列的数据存储在一起,不同列的数据也总是分开存储。 常见的列式数据库有: Vertica、 Paraccel (Actian Matrix,Amazon Redshift)、 Sybase IQ、 Exasol、 Infobright、 InfiniDB、 MonetDB (VectorWise, Actian Vector)、 LucidDB、 SAP HANA、 Google Dremel、 Google PowerDrill、 Druid、 kdb+。 -{: .grey } +{: .灰色 } 不同的存储方式适合不同的场景,这里的查询场景包括: 进行了哪些查询,多久查询一次以及各类查询的比例; 每种查询读取多少数据————行、列和字节;读取数据和写入数据之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。 @@ -68,7 +73,7 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) 看到差别了么?下面将详细介绍为什么会发生这种情况。 -### Input/output {#inputoutput} +### 输入/输出 {#inputoutput} 1. 针对分析类查询,通常只需要读取表的一小部分列。在列式数据库中你可以只读取你需要的数据。例如,如果只需要读取100列中的5列,这将帮助你最少减少20倍的I/O消耗。 2. 由于数据总是打包成批量读取的,所以压缩是非常容易的。同时数据按列分别存储这也更容易压缩。这进一步降低了I/O的体积。 diff --git a/docs/zh/interfaces/cli.md b/docs/zh/interfaces/cli.md index 7e858ce458b..fef8e404aef 100644 --- a/docs/zh/interfaces/cli.md +++ b/docs/zh/interfaces/cli.md @@ -1,3 +1,4 @@ + # 命令行客户端 {#ming-ling-xing-ke-hu-duan} 通过命令行来访问 ClickHouse,您可以使用 `clickhouse-client` @@ -48,7 +49,7 @@ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMA 命令行客户端是基于 `replxx`。换句话说,它可以使用我们熟悉的快捷键方式来操作以及保留历史命令。 历史命令会写入在 `~/.clickhouse-client-history` 中。 -默认情况下,输出的格式是 `PrettyCompact`。您可以通过 FORMAT 设置根据不同查询来修改格式,或者通过在查询末尾指定 `\G` 字符,或通过在命令行中使用 `--format` or `--vertical` 参数,或使用客户端的配置文件。 +默认情况下,输出的格式是 `PrettyCompact`。您可以通过 FORMAT 设置根据不同查询来修改格式,或者通过在查询末尾指定 `\G` 字符,或通过在命令行中使用 `--format` 或 `--vertical` 参数,或使用客户端的配置文件。 若要退出客户端,使用 Ctrl+D (或 Ctrl+C),或者输入以下其中一个命令:`exit`, `quit`, `logout`, `учше`, `йгше`, `дщпщге`, `exit;`, `quit;`, `logout;`, `учшеж`, `йгшеж`, `дщпщгеж`, `q`, `й`, `q`, `Q`, `:q`, `й`, `Й`, `Жй` @@ -61,7 +62,7 @@ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMA 您可以通过 Ctrl+C 来取消一个长时间的查询。然而,您依然需要等待服务端来中止请求。在某个阶段去取消查询是不可能的。如果您不等待并再次按下 Ctrl + C,客户端将会退出。 -命令行客户端允许通过外部数据 (外部临时表) 来查询。更多相关信息,请参考 «[外部数据查询处理](../operations/table_engines/external_data.md)». +命令行客户端允许通过外部数据 (外部临时表) 来查询。更多相关信息,请参考 «[外部数据查询处理](../engines/table_engines/special/external_data.md)». ## 配置 {#interfaces_cli_configuration} diff --git a/docs/zh/interfaces/cpp.md b/docs/zh/interfaces/cpp.md index 6f162036e01..4aa4f15a456 100644 --- a/docs/zh/interfaces/cpp.md +++ b/docs/zh/interfaces/cpp.md @@ -1,5 +1,6 @@ + # C ++客户端库 {#c-ke-hu-duan-ku} -请参阅以下网站的自述文件[clickhouse-cpp](https://github.com/ClickHouse/clickhouse-cpp)资料库。 +请参阅以下网站的自述文件[ツ暗ェツ氾环催ツ団](https://github.com/ClickHouse/clickhouse-cpp)资料库。 -[Original article](https://clickhouse.tech/docs/zh/interfaces/cpp/) +[原始文章](https://clickhouse.tech/docs/zh/interfaces/cpp/) diff --git a/docs/zh/interfaces/formats.md b/docs/zh/interfaces/formats.md index 80ca5fdf221..64c1940df86 100644 --- a/docs/zh/interfaces/formats.md +++ b/docs/zh/interfaces/formats.md @@ -1,3 +1,4 @@ + # 输入输出格式 {#formats} ClickHouse 可以接受多种数据格式,可以在 (`INSERT`) 以及 (`SELECT`) 请求中使用。 @@ -10,19 +11,19 @@ ClickHouse 可以接受多种数据格式,可以在 (`INSERT`) 以及 (`SELECT | [TabSeparatedRaw](#tabseparatedraw) | ✗ | ✔ | | [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | | [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | -| [Template](#format-template) | ✔ | ✔ | +| [模板](#format-template) | ✔ | ✔ | | [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | | [CSV](#csv) | ✔ | ✔ | | [CSVWithNames](#csvwithnames) | ✔ | ✔ | -| [CustomSeparated](#format-customseparated) | ✔ | ✔ | -| [Values](#data-format-values) | ✔ | ✔ | -| [Vertical](#vertical) | ✗ | ✔ | +| [自定义分离](#format-customseparated) | ✔ | ✔ | +| [值](#data-format-values) | ✔ | ✔ | +| [垂直](#vertical) | ✗ | ✔ | | VerticalRaw | ✗ | ✔ | | [JSON](#json) | ✗ | ✔ | | [JSONCompact](#jsoncompact) | ✗ | ✔ | | [JSONEachRow](#jsoneachrow) | ✔ | ✔ | | [TSKV](#tskv) | ✔ | ✔ | -| [Pretty](#pretty) | ✗ | ✔ | +| [漂亮](#pretty) | ✗ | ✔ | | [PrettyCompact](#prettycompact) | ✗ | ✔ | | [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | | [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | @@ -30,11 +31,11 @@ ClickHouse 可以接受多种数据格式,可以在 (`INSERT`) 以及 (`SELECT | [Protobuf](#protobuf) | ✔ | ✔ | | [Avro](#data-format-avro) | ✔ | ✔ | | [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | -| [Parquet](#data-format-parquet) | ✔ | ✔ | +| [镶木地板](#data-format-parquet) | ✔ | ✔ | | [ORC](#data-format-orc) | ✔ | ✗ | | [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [Native](#native) | ✔ | ✔ | +| [本地人](#native) | ✔ | ✔ | | [Null](#null) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ | | [CapnProto](#capnproto) | ✔ | ✔ | @@ -70,7 +71,7 @@ SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORD 整数以十进制形式写入。数字在开头可以包含额外的 `+` 字符(解析时忽略,格式化时不记录)。非负数不能包含负号。 读取时,允许将空字符串解析为零,或者(对于带符号的类型)将仅包含负号的字符串解析为零。 不符合相应数据类型的数字可能会被解析为不同的数字,而不会显示错误消息。 -浮点数以十进制形式写入。点号用作小数点分隔符。支持指数等符号,如’inf’,‘+ inf’,‘-inf’和’nan’。 浮点数的输入可以以小数点开始或结束。 +浮点数以十进制形式写入。点号用作小数点分隔符。支持指数等符号,如'inf',‘+ inf’,‘-inf’和’nan’。 浮点数的输入可以以小数点开始或结束。 格式化的时候,浮点数的精确度可能会丢失。 解析的时候,没有严格需要去读取与机器可以表示的最接近的数值。 @@ -96,7 +97,7 @@ SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORD 数组写在方括号内的逗号分隔值列表中。 通常情况下,数组中的数字项目会被拼凑,但日期,带时间的日期以及字符串将使用与上面相同的转义规则用单引号引起来。 -[NULL](../query_language/syntax.md) 将输出为 `\N`。 +[NULL](../sql_reference/syntax.md) 将输出为 `\N`。 ## TabSeparatedRaw {#tabseparatedraw} @@ -120,13 +121,13 @@ SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORD 这种格式也可以使用名称 `TSVWithNamesAndTypes` 来表示。 -## Template {#format-template} +## 模板 {#format-template} -This format allows to specify a custom format string with placeholders for values with specified escaping rule. +此格式允许为具有指定转义规则的值指定带有占位符的自定义格式字符串。 -It uses settings `format_schema`, `format_schema_rows`, `format_schema_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` when using `JSON` escaping, see further) +它使用设置 `format_schema`, `format_schema_rows`, `format_schema_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` 使用时 `JSON` 逃跑,进一步查看) -Format string `format_schema_rows` specifies rows format with the following syntax: +格式字符串 `format_schema_rows` 使用以下语法指定行格式: `delimiter_1${column_1:serializeAs_1}delimiter_2${column_2:serializeAs_2} ... delimiter_N`, @@ -150,25 +151,25 @@ Format string `format_schema_rows` specifies rows format with the following synt `Search phrase: 'bathroom interior design', count: 2166, ad price: $3;` -The `format_schema_rows_between_delimiter` setting specifies delimiter between rows, which is printed (or expected) after every row except the last one (`\n` by default) +该 `format_schema_rows_between_delimiter` setting指定行之间的分隔符,该分隔符在除最后一行之外的每一行之后打印(或预期) (`\n` 默认情况下) -Format string `format_schema` has the same syntax as `format_schema_rows` and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names: +格式字符串 `format_schema` 具有相同的语法 `format_schema_rows` 并允许指定前缀,后缀和打印一些附加信息的方式。 它包含以下占位符而不是列名: -- `data` is the rows with data in `format_schema_rows` format, separated by `format_schema_rows_between_delimiter`. This placeholder must be the first placeholder in the format string. -- `totals` is the row with total values in `format_schema_rows` format (when using WITH TOTALS) -- `min` is the row with minimum values in `format_schema_rows` format (when extremes is set to 1) -- `max` is the row with maximum values in `format_schema_rows` format (when extremes is set to 1) -- `rows` is the total number of output rows -- `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows\_before\_limit\_at\_least is the exact number of rows there would have been without a LIMIT. -- `time` is the request execution time in seconds -- `rows_read` is the number of rows have been read -- `bytes_read` is the number of bytes (uncompressed) have been read +- `data` 包含数据的行 `format_schema_rows` 格式,由分隔 `format_schema_rows_between_delimiter`. 此占位符必须是格式字符串中的第一个占位符。 +- `totals` 是包含总值的行 `format_schema_rows` 格式(与总计一起使用时) +- `min` 是具有最小值的行 `format_schema_rows` 格式(当极值设置为1时) +- `max` 是具有最大值的行 `format_schema_rows` 格式(当极值设置为1时) +- `rows` 输出行总数 +- `rows_before_limit` 是没有限制的最小行数。 仅当查询包含LIMIT时输出。 如果查询包含GROUP BY,则rows\_before\_limit\_at\_least是没有限制的确切行数。 +- `time` 请求执行时间以秒为单位 +- `rows_read` 已读取的行数 +- `bytes_read` 被读取的字节数(未压缩) -The placeholders `data`, `totals`, `min` and `max` must not have escaping rule specified (or `None` must be specified explicitly). The remaining placeholders may have any escaping rule specified. -If the `format_schema` setting is an empty string, `${data}` is used as default value. -For insert queries format allows to skip some columns or some fields if prefix or suffix (see example). +占位符 `data`, `totals`, `min` 和 `max` 必须没有指定转义规则(或 `None` 必须明确指定)。 其余的占位符可能具有指定的任何转义规则。 +如果 `format_schema` 设置为空字符串, `${data}` 用作默认值。 +对于插入查询格式允许跳过一些列或一些字段,如果前缀或后缀(见示例)。 -`Select` example: +`Select` 示例: ``` sql SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase ORDER BY c DESC LIMIT 5 @@ -210,7 +211,7 @@ format_schema_rows_between_delimiter = '\n ' ``` -`Insert` example: +`Insert` 示例: Some header Page views: 5, User id: 4324182021466249494, Useless field: hello, Duration: 146, Sign: -1 @@ -223,14 +224,14 @@ format_schema = 'Some header\n${data}\nTotal rows: ${:CSV}\n', format_schema_rows = 'Page views: ${PageViews:CSV}, User id: ${UserID:CSV}, Useless field: ${:CSV}, Duration: ${Duration:CSV}, Sign: ${Sign:CSV}' ``` -`PageViews`, `UserID`, `Duration` and `Sign` inside placeholders are names of columns in the table. Values after `Useless field` in rows and after `\nTotal rows:` in suffix will be ignored. -All delimiters in the input data must be strictly equal to delimiters in specified format strings. +`PageViews`, `UserID`, `Duration` 和 `Sign` 占位符内部是表中列的名称。 值后 `Useless field` 在行和之后 `\nTotal rows:` in后缀将被忽略。 +输入数据中的所有分隔符必须严格等于指定格式字符串中的分隔符。 ## TemplateIgnoreSpaces {#templateignorespaces} -This format is suitable only for input. -Similar to `Template`, but skips whitespace characters between delimiters and values in the input stream. However, if format strings contain whitespace characters, these characters will be expected in the input stream. Also allows to specify empty placeholders (`${}` or `${:None}`) to split some delimiter into separate parts to ignore spaces between them. Such placeholders are used only for skipping whitespace characters. -It’s possible to read `JSON` using this format, if values of columns have the same order in all rows. For example, the following request can be used for inserting data from output example of format [JSON](#json): +此格式仅适用于输入。 +类似于 `Template`,但跳过输入流中的分隔符和值之间的空格字符。 但是,如果格式字符串包含空格字符,则在输入流中将需要这些字符。 还允许指定空占位符 (`${}` 或 `${:None}`)将一些分隔符分成单独的部分,以忽略它们之间的空格。 此类占位符仅用于跳过空格字符。 +可以阅读 `JSON` 如果列的值在所有行中具有相同的顺序,则使用此格式。 例如,以下请求可用于从格式的输出示例中插入数据 [JSON](#json): ``` sql INSERT INTO table_name FORMAT TemplateIgnoreSpaces SETTINGS @@ -254,7 +255,7 @@ format_schema_rows_between_delimiter = ',' SearchPhrase=curtain designs count()=1064 SearchPhrase=baku count()=1000 -[NULL](../query_language/syntax.md) 输出为 `\N`。 +[NULL](../sql_reference/syntax.md) 输出为 `\N`。 ``` sql SELECT * FROM t_null FORMAT TSKV @@ -288,10 +289,10 @@ CSV 格式是和 TabSeparated 一样的方式输出总数和极值。 会输出带头部行,和 `TabSeparatedWithNames` 一样。 -## CustomSeparated {#format-customseparated} +## 自定义分离 {#format-customseparated} -Similar to [Template](#format-template), but it prints or reads all columns and uses escaping rule from setting `format_custom_escaping_rule` and delimiters from settings `format_custom_field_delimiter`, `format_custom_row_before_delimiter`, `format_custom_row_after_delimiter`, `format_custom_row_between_delimiter`, `format_custom_result_before_delimiter` and `format_custom_result_after_delimiter`, not from format strings. -There is also `CustomSeparatedIgnoreSpaces` format, which is similar to `TemplateIgnoreSpaces`. +类似于 [模板](#format-template),但它打印或读取所有列,并使用从设置转义规则 `format_custom_escaping_rule` 从设置和分隔符 `format_custom_field_delimiter`, `format_custom_row_before_delimiter`, `format_custom_row_after_delimiter`, `format_custom_row_between_delimiter`, `format_custom_result_before_delimiter` 和 `format_custom_result_after_delimiter`,而不是从格式字符串。 +也有 `CustomSeparatedIgnoreSpaces` 格式,这是类似于 `TemplateIgnoreSpaces`. ## JSON {#json} @@ -378,7 +379,7 @@ JSON 与 JavaScript 兼容。为了确保这一点,一些字符被另外转义 该格式仅适用于输出查询结果,但不适用于解析输入(将数据插入到表中)。 -ClickHouse 支持 [NULL](../query_language/syntax.md), 在 JSON 格式中以 `null` 输出来表示. +ClickHouse 支持 [NULL](../sql_reference/syntax.md), 在 JSON 格式中以 `null` 输出来表示. 参考 JSONEachRow 格式。 @@ -449,23 +450,23 @@ ClickHouse 支持 [NULL](../query_language/syntax.md), 在 JSON 格式中以 `nu 对于解析,任何顺序都支持不同列的值。可以省略某些值 - 它们被视为等于它们的默认值。在这种情况下,零和空行被用作默认值。 作为默认值,不支持表中指定的复杂值。元素之间的空白字符被忽略。如果在对象之后放置逗号,它将被忽略。对象不一定必须用新行分隔。 -### Usage of Nested Structures {#jsoneachrow-nested} +### 嵌套结构的使用 {#jsoneachrow-nested} -If you have a table with the [Nested](../data_types/nested_data_structures/nested.md) data type columns, you can insert JSON data having the same structure. Enable this functionality with the [input\_format\_import\_nested\_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting. +如果你有一张桌子 [嵌套式](../sql_reference/data_types/nested_data_structures/nested.md) 数据类型列,可以插入具有相同结构的JSON数据。 启用此功能与 [input\_format\_import\_nested\_json](../operations/settings/settings.md#settings-input_format_import_nested_json) 设置。 -For example, consider the following table: +例如,请考虑下表: ``` sql CREATE TABLE json_each_row_nested (n Nested (s String, i Int32) ) ENGINE = Memory ``` -As you can find in the `Nested` data type description, ClickHouse treats each component of the nested structure as a separate column, `n.s` and `n.i` for our table. So you can insert the data the following way: +正如你可以在找到 `Nested` 数据类型说明,ClickHouse将嵌套结构的每个组件视为单独的列, `n.s` 和 `n.i` 为了我们的桌子 所以你可以通过以下方式插入数据: ``` sql INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]} ``` -To insert data as hierarchical JSON object set [input\_format\_import\_nested\_json=1](../operations/settings/settings.md#settings-input_format_import_nested_json). +将数据作为分层JSON对象集插入 [input\_format\_import\_nested\_json=1](../operations/settings/settings.md#settings-input_format_import_nested_json). ``` json { @@ -476,7 +477,7 @@ To insert data as hierarchical JSON object set [input\_format\_import\_nested\_j } ``` -Without this setting ClickHouse throws the exception. +如果没有此设置,ClickHouse将引发异常。 ``` sql SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested_json' @@ -508,7 +509,7 @@ SELECT * FROM json_each_row_nested └───────────────┴────────┘ ``` -## Native {#native} +## 本地人 {#native} 最高性能的格式。 据通过二进制格式的块进行写入和读取。对于每个块,该块中的行数,列数,列名称和类型以及列的部分将被相继记录。 换句话说,这种格式是 «列式»的 - 它不会将列转换为行。 这是用于在服务器之间进行交互的本地界面中使用的格式,用于使用命令行客户端和 C++ 客户端。 @@ -519,13 +520,13 @@ SELECT * FROM json_each_row_nested 没有输出。但是,查询已处理完毕,并且在使用命令行客户端时,数据将传输到客户端。这仅用于测试,包括生产力测试。 显然,这种格式只适用于输出,不适用于解析。 -## Pretty {#pretty} +## 漂亮 {#pretty} 将数据以表格形式输出,也可以使用 ANSI 转义字符在终端中设置颜色。 它会绘制一个完整的表格,每行数据在终端中占用两行。 每一个结果块都会以单独的表格输出。这是很有必要的,以便结果块不用缓冲结果输出(缓冲在可以预见结果集宽度的时候是很有必要的)。 -[NULL](../query_language/syntax.md) 输出为 `ᴺᵁᴸᴸ`。 +[NULL](../sql_reference/syntax.md) 输出为 `ᴺᵁᴸᴸ`。 ``` sql SELECT * FROM t_null @@ -610,29 +611,29 @@ FixedString 被简单地表示为一个字节序列。 数组表示为 varint 长度(无符号 [LEB128](https://en.wikipedia.org/wiki/LEB128)),后跟有序的数组元素。 -对于 [NULL](../query_language/syntax.md#null-literal) 的支持, 一个为 1 或 0 的字节会加在每个 [Nullable](../data_types/nullable.md) 值前面。如果为 1, 那么该值就是 `NULL`。 如果为 0,则不为 `NULL`。 +对于 [NULL](../sql_reference/syntax.md#null-literal) 的支持, 一个为 1 或 0 的字节会加在每个 [可为空](../sql_reference/data_types/nullable.md) 值前面。如果为 1, 那么该值就是 `NULL`。 如果为 0,则不为 `NULL`。 ## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes} -Similar to [RowBinary](#rowbinary), but with added header: +类似于 [RowBinary](#rowbinary),但添加了标题: -- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) -- N `String`s specifying column names -- N `String`s specifying column types +- [LEB128](https://en.wikipedia.org/wiki/LEB128)-编码列数(N) +- N `String`s指定列名 +- N `String`s指定列类型 -## Values {#data-format-values} +## 值 {#data-format-values} -在括号中打印每一行。行由逗号分隔。最后一行之后没有逗号。括号内的值也用逗号分隔。数字以十进制格式输出,不含引号。 数组以方括号输出。带有时间的字符串,日期和时间用引号包围输出。转义字符的解析规则与 [TabSeparated](#tabseparated) 格式类似。 在格式化过程中,不插入额外的空格,但在解析过程中,空格是被允许并跳过的(除了数组值之外的空格,这是不允许的)。[NULL](../query_language/syntax.md) 为 `NULL`。 +在括号中打印每一行。行由逗号分隔。最后一行之后没有逗号。括号内的值也用逗号分隔。数字以十进制格式输出,不含引号。 数组以方括号输出。带有时间的字符串,日期和时间用引号包围输出。转义字符的解析规则与 [TabSeparated](#tabseparated) 格式类似。 在格式化过程中,不插入额外的空格,但在解析过程中,空格是被允许并跳过的(除了数组值之外的空格,这是不允许的)。[NULL](../sql_reference/syntax.md) 为 `NULL`。 以 Values 格式传递数据时需要转义的最小字符集是:单引号和反斜线。 这是 `INSERT INTO t VALUES ...` 中可以使用的格式,但您也可以将其用于查询结果。 -## Vertical {#vertical} +## 垂直 {#vertical} 使用指定的列名在单独的行上打印每个值。如果每行都包含大量列,则此格式便于打印一行或几行。 -[NULL](../query_language/syntax.md) 输出为 `ᴺᵁᴸᴸ`。 +[NULL](../sql_reference/syntax.md) 输出为 `ᴺᵁᴸᴸ`。 示例: @@ -747,9 +748,9 @@ SELECT * FROM t_null FORMAT Vertical ## CapnProto {#capnproto} -Cap’n Proto 是一种二进制消息格式,类似 Protocol Buffers 和 Thriftis,但与 JSON 或 MessagePack 格式不一样。 +Cap'n Proto 是一种二进制消息格式,类似 Protocol Buffers 和 Thriftis,但与 JSON 或 MessagePack 格式不一样。 -Cap’n Proto 消息格式是严格类型的,而不是自我描述,这意味着它们不需要外部的描述。这种格式可以实时地应用,并针对每个查询进行缓存。 +Cap'n Proto 消息格式是严格类型的,而不是自我描述,这意味着它们不需要外部的描述。这种格式可以实时地应用,并针对每个查询进行缓存。 ``` sql SELECT SearchPhrase, count() AS c FROM test.hits @@ -763,18 +764,18 @@ SELECT SearchPhrase, count() AS c FROM test.hits c @1 :Uint64; } -格式文件存储的目录可以在服务配置中的 [format\_schema\_path](../operations/server_settings/settings.md) 指定。 +格式文件存储的目录可以在服务配置中的 [format\_schema\_path](../operations/server_configuration_parameters/settings.md) 指定。 -Cap’n Proto 反序列化是很高效的,通常不会增加系统的负载。 +Cap'n Proto 反序列化是很高效的,通常不会增加系统的负载。 ## Protobuf {#protobuf} -Protobuf - is a [Protocol Buffers](https://developers.google.com/protocol-buffers/) format. +Protobuf-是一个 [协议缓冲区](https://developers.google.com/protocol-buffers/) 格式。 -This format requires an external format schema. The schema is cached between queries. -ClickHouse supports both `proto2` and `proto3` syntaxes. Repeated/optional/required fields are supported. +此格式需要外部格式架构。 在查询之间缓存架构。 +ClickHouse支持 `proto2` 和 `proto3` 语法 支持重复/可选/必填字段。 -Usage examples: +使用示例: ``` sql SELECT * FROM test.table FORMAT Protobuf SETTINGS format_schema = 'schemafile:MessageType' @@ -784,7 +785,7 @@ SELECT * FROM test.table FORMAT Protobuf SETTINGS format_schema = 'schemafile:Me cat protobuf_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT Protobuf SETTINGS format_schema='schemafile:MessageType'" ``` -where the file `schemafile.proto` looks like this: +哪里的文件 `schemafile.proto` 看起来像这样: ``` capnp syntax = "proto3"; @@ -797,11 +798,11 @@ message MessageType { }; ``` -To find the correspondence between table columns and fields of Protocol Buffers’ message type ClickHouse compares their names. -This comparison is case-insensitive and the characters `_` (underscore) and `.` (dot) are considered as equal. -If types of a column and a field of Protocol Buffers’ message are different the necessary conversion is applied. +要查找协议缓冲区的消息类型的表列和字段之间的对应关系,ClickHouse比较它们的名称。 +这种比较是不区分大小写和字符 `_` (下划线)和 `.` (点)被认为是相等的。 +如果协议缓冲区消息的列和字段的类型不同,则应用必要的转换。 -Nested messages are supported. For example, for the field `z` in the following message type +支持嵌套消息。 例如,对于字段 `z` 在下面的消息类型 ``` capnp message MessageType { @@ -815,10 +816,10 @@ message MessageType { }; ``` -ClickHouse tries to find a column named `x.y.z` (or `x_y_z` or `X.y_Z` and so on). -Nested messages are suitable to input or output a [nested data structures](../data_types/nested_data_structures/nested.md). +ClickHouse尝试找到一个名为 `x.y.z` (或 `x_y_z` 或 `X.y_Z` 等)。 +嵌套消息适用于输入或输出一个 [嵌套数据结构](../sql_reference/data_types/nested_data_structures/nested.md). -Default values defined in a protobuf schema like this +在protobuf模式中定义的默认值,如下所示 ``` capnp syntax = "proto2"; @@ -828,91 +829,91 @@ message MessageType { } ``` -are not applied; the [table defaults](../query_language/create.md#create-default-values) are used instead of them. +不应用;该 [表默认值](../sql_reference/statements/create.md#create-default-values) 用来代替它们。 -ClickHouse inputs and outputs protobuf messages in the `length-delimited` format. -It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). -See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). +ClickHouse在输入和输出protobuf消息 `length-delimited` 格式。 +这意味着每个消息之前,应该写它的长度作为一个 [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). +另请参阅 [如何在流行语言中读取/写入长度分隔的protobuf消息](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). ## Avro {#data-format-avro} -[Apache Avro](http://avro.apache.org/) is a row-oriented data serialization framework developed within Apache’s Hadoop project. +[Apache Avro](http://avro.apache.org/) 是在Apache Hadoop项目中开发的面向行的数据序列化框架。 -ClickHouse Avro format supports reading and writing [Avro data files](http://avro.apache.org/docs/current/spec.html#Object+Container+Files). +ClickHouse Avro格式支持读取和写入 [Avro数据文件](http://avro.apache.org/docs/current/spec.html#Object+Container+Files). -### Data Types Matching {#data_types-matching} +### 数据类型匹配{\#sql\_reference/data\_types-matching} {#data-types-matching-sql_referencedata_types-matching} -The table below shows supported data types and how they match ClickHouse [data types](../data_types/index.md) in `INSERT` and `SELECT` queries. +下表显示了支持的数据类型以及它们如何匹配ClickHouse [数据类型](../sql_reference/data_types/index.md) 在 `INSERT` 和 `SELECT` 查询。 -| Avro data type `INSERT` | ClickHouse data type | Avro data type `SELECT` | -|---------------------------------------------|-------------------------------------------------------------------------------------------|------------------------------| -| `boolean`, `int`, `long`, `float`, `double` | [Int(8\|16\|32)](../data_types/int_uint.md), [UInt(8\|16\|32)](../data_types/int_uint.md) | `int` | -| `boolean`, `int`, `long`, `float`, `double` | [Int64](../data_types/int_uint.md), [UInt64](../data_types/int_uint.md) | `long` | -| `boolean`, `int`, `long`, `float`, `double` | [Float32](../data_types/float.md) | `float` | -| `boolean`, `int`, `long`, `float`, `double` | [Float64](../data_types/float.md) | `double` | -| `bytes`, `string`, `fixed`, `enum` | [String](../data_types/string.md) | `bytes` | -| `bytes`, `string`, `fixed` | [FixedString(N)](../data_types/fixedstring.md) | `fixed(N)` | -| `enum` | [Enum(8\|16)](../data_types/enum.md) | `enum` | -| `array(T)` | [Array(T)](../data_types/array.md) | `array(T)` | -| `union(null, T)`, `union(T, null)` | [Nullable(T)](../data_types/date.md) | `union(null, T)` | -| `null` | [Nullable(Nothing)](../data_types/special_data_types/nothing.md) | `null` | -| `int (date)` \* | [Date](../data_types/date.md) | `int (date)` \* | -| `long (timestamp-millis)` \* | [DateTime64(3)](../data_types/datetime.md) | `long (timestamp-millis)` \* | -| `long (timestamp-micros)` \* | [DateTime64(6)](../data_types/datetime.md) | `long (timestamp-micros)` \* | +| Avro数据类型 `INSERT` | ClickHouse数据类型 | Avro数据类型 `SELECT` | +|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------|------------------------------| +| `boolean`, `int`, `long`, `float`, `double` | [Int(8/16/32)](../sql_reference/data_types/int_uint.md), [UInt(8/16/32)](../sql_reference/data_types/int_uint.md) | `int` | +| `boolean`, `int`, `long`, `float`, `double` | [Int64](../sql_reference/data_types/int_uint.md), [UInt64](../sql_reference/data_types/int_uint.md) | `long` | +| `boolean`, `int`, `long`, `float`, `double` | [Float32](../sql_reference/data_types/float.md) | `float` | +| `boolean`, `int`, `long`, `float`, `double` | [Float64](../sql_reference/data_types/float.md) | `double` | +| `bytes`, `string`, `fixed`, `enum` | [字符串](../sql_reference/data_types/string.md) | `bytes` | +| `bytes`, `string`, `fixed` | [固定字符串(N)](../sql_reference/data_types/fixedstring.md) | `fixed(N)` | +| `enum` | [枚举(8/16)](../sql_reference/data_types/enum.md) | `enum` | +| `array(T)` | [阵列(T)](../sql_reference/data_types/array.md) | `array(T)` | +| `union(null, T)`, `union(T, null)` | [可为空(T)](../sql_reference/data_types/date.md) | `union(null, T)` | +| `null` | [可为空(无)](../sql_reference/data_types/special_data_types/nothing.md) | `null` | +| `int (date)` \* | [日期](../sql_reference/data_types/date.md) | `int (date)` \* | +| `long (timestamp-millis)` \* | [DateTime64(3)](../sql_reference/data_types/datetime.md) | `long (timestamp-millis)` \* | +| `long (timestamp-micros)` \* | [DateTime64(6)](../sql_reference/data_types/datetime.md) | `long (timestamp-micros)` \* | -\* [Avro logical types](http://avro.apache.org/docs/current/spec.html#Logical+Types) +\* [Avro逻辑类型](http://avro.apache.org/docs/current/spec.html#Logical+Types) -Unsupported Avro data types: `record` (non-root), `map` +不支持的Avro数据类型: `record` (非根), `map` -Unsupported Avro logical data types: `uuid`, `time-millis`, `time-micros`, `duration` +不支持的Avro逻辑数据类型: `uuid`, `time-millis`, `time-micros`, `duration` -### Inserting Data {#inserting-data} +### 插入数据 {#inserting-data} -To insert data from an Avro file into ClickHouse table: +将Avro文件中的数据插入ClickHouse表: ``` bash $ cat file.avro | clickhouse-client --query="INSERT INTO {some_table} FORMAT Avro" ``` -The root schema of input Avro file must be of `record` type. +输入Avro文件的根模式必须是 `record` 类型。 -To find the correspondence between table columns and fields of Avro schema ClickHouse compares their names. This comparison is case-sensitive. -Unused fields are skipped. +要查找Avro schema的表列和字段之间的对应关系,ClickHouse比较它们的名称。 此比较区分大小写。 +跳过未使用的字段。 -Data types of a ClickHouse table columns can differ from the corresponding fields of the Avro data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to corresponding column type. +ClickHouse表列的数据类型可能与插入的Avro数据的相应字段不同。 插入数据时,ClickHouse根据上表解释数据类型,然后 [投](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) 将数据转换为相应的列类型。 -### Selecting Data {#selecting-data} +### 选择数据 {#selecting-data} -To select data from ClickHouse table into an Avro file: +从ClickHouse表中选择数据到Avro文件: ``` bash $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Avro" > file.avro ``` -Column names must: +列名必须: -- start with `[A-Za-z_]` -- subsequently contain only `[A-Za-z0-9_]` +- 名,名,名,名 `[A-Za-z_]` +- 随后只包含 `[A-Za-z0-9_]` -Output Avro file compression and sync interval can be configured with [output\_format\_avro\_codec](../operations/settings/settings.md#settings-output_format_avro_codec) and [output\_format\_avro\_sync\_interval](../operations/settings/settings.md#settings-output_format_avro_sync_interval) respectively. +输出Avro文件压缩和同步间隔可以配置 [output\_format\_avro\_codec](../operations/settings/settings.md#settings-output_format_avro_codec) 和 [output\_format\_avro\_sync\_interval](../operations/settings/settings.md#settings-output_format_avro_sync_interval) 分别。 ## AvroConfluent {#data-format-avro-confluent} -AvroConfluent supports decoding single-object Avro messages commonly used with [Kafka](https://kafka.apache.org/) and [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html). +AvroConfluent支持解码单对象Avro消息常用于 [卡夫卡](https://kafka.apache.org/) 和 [汇合的模式注册表](https://docs.confluent.io/current/schema-registry/index.html). -Each Avro message embeds a schema id that can be resolved to the actual schema with help of the Schema Registry. +每个Avro消息都嵌入了一个架构id,该架构id可以在架构注册表的帮助下解析为实际架构。 -Schemas are cached once resolved. +模式解析后会进行缓存。 -Schema Registry URL is configured with [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#settings-format_avro_schema_registry_url) +架构注册表URL配置为 [format\_avro\_schema\_registry\_url](../operations/settings/settings.md#settings-format_avro_schema_registry_url) -### Data Types Matching {#data_types-matching-1} +### 数据类型匹配{\#sql\_reference/data\_types-matching-1} {#data-types-matching-sql_referencedata_types-matching-1} -Same as [Avro](#data-format-avro) +和 [Avro](#data-format-avro) -### Usage {#usage} +### 用途 {#usage} -To quickly verify schema resolution you can use [kafkacat](https://github.com/edenhill/kafkacat) with [clickhouse-local](../operations/utils/clickhouse-local.md): +要快速验证架构解析,您可以使用 [kafkacat](https://github.com/edenhill/kafkacat) 与 [ツ环板-ョツ嘉ッツ偲](../operations/utilities/clickhouse-local.md): ``` bash $ kafkacat -b kafka-broker -C -t topic1 -o beginning -f '%s' -c 3 | clickhouse-local --input-format AvroConfluent --format_avro_schema_registry_url 'http://schema-registry' -S "field1 Int64, field2 String" -q 'select * from table' @@ -921,7 +922,7 @@ $ kafkacat -b kafka-broker -C -t topic1 -o beginning -f '%s' -c 3 | clickhouse- 3 c ``` -To use `AvroConfluent` with [Kafka](../operations/table_engines/kafka.md): +使用 `AvroConfluent` 与 [卡夫卡](../engines/table_engines/integrations/kafka.md): ``` sql CREATE TABLE topic1_stream @@ -941,123 +942,123 @@ SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; ``` -!!! note "Warning" - Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. +!!! note "警告" + 设置 `format_avro_schema_registry_url` 需要在配置 `users.xml` restart动后保持它的价值。 -## Parquet {#data-format-parquet} +## 镶木地板 {#data-format-parquet} -[Apache Parquet](http://parquet.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. ClickHouse supports read and write operations for this format. +[阿帕奇地板](http://parquet.apache.org/) 是Hadoop生态系统中普遍存在的列式存储格式。 ClickHouse支持此格式的读写操作。 -### Data Types Matching {#data_types-matching-2} +### 数据类型匹配{\#sql\_reference/data\_types-matching-2} {#data-types-matching-sql_referencedata_types-matching-2} -The table below shows supported data types and how they match ClickHouse [data types](../data_types/index.md) in `INSERT` and `SELECT` queries. +下表显示了支持的数据类型以及它们如何匹配ClickHouse [数据类型](../sql_reference/data_types/index.md) 在 `INSERT` 和 `SELECT` 查询。 -| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) | -|------------------------------|---------------------------------------------|------------------------------| -| `UINT8`, `BOOL` | [UInt8](../data_types/int_uint.md) | `UINT8` | -| `INT8` | [Int8](../data_types/int_uint.md) | `INT8` | -| `UINT16` | [UInt16](../data_types/int_uint.md) | `UINT16` | -| `INT16` | [Int16](../data_types/int_uint.md) | `INT16` | -| `UINT32` | [UInt32](../data_types/int_uint.md) | `UINT32` | -| `INT32` | [Int32](../data_types/int_uint.md) | `INT32` | -| `UINT64` | [UInt64](../data_types/int_uint.md) | `UINT64` | -| `INT64` | [Int64](../data_types/int_uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](../data_types/float.md) | `FLOAT` | -| `DOUBLE` | [Float64](../data_types/float.md) | `DOUBLE` | -| `DATE32` | [Date](../data_types/date.md) | `UINT16` | -| `DATE64`, `TIMESTAMP` | [DateTime](../data_types/datetime.md) | `UINT32` | -| `STRING`, `BINARY` | [String](../data_types/string.md) | `STRING` | -| — | [FixedString](../data_types/fixedstring.md) | `STRING` | -| `DECIMAL` | [Decimal](../data_types/decimal.md) | `DECIMAL` | +| Parquet数据类型 (`INSERT`) | ClickHouse数据类型 | Parquet数据类型 (`SELECT`) | +|----------------------------|----------------------------------------------------------|----------------------------| +| `UINT8`, `BOOL` | [UInt8](../sql_reference/data_types/int_uint.md) | `UINT8` | +| `INT8` | [Int8](../sql_reference/data_types/int_uint.md) | `INT8` | +| `UINT16` | [UInt16](../sql_reference/data_types/int_uint.md) | `UINT16` | +| `INT16` | [Int16](../sql_reference/data_types/int_uint.md) | `INT16` | +| `UINT32` | [UInt32](../sql_reference/data_types/int_uint.md) | `UINT32` | +| `INT32` | [Int32](../sql_reference/data_types/int_uint.md) | `INT32` | +| `UINT64` | [UInt64](../sql_reference/data_types/int_uint.md) | `UINT64` | +| `INT64` | [Int64](../sql_reference/data_types/int_uint.md) | `INT64` | +| `FLOAT`, `HALF_FLOAT` | [Float32](../sql_reference/data_types/float.md) | `FLOAT` | +| `DOUBLE` | [Float64](../sql_reference/data_types/float.md) | `DOUBLE` | +| `DATE32` | [日期](../sql_reference/data_types/date.md) | `UINT16` | +| `DATE64`, `TIMESTAMP` | [日期时间](../sql_reference/data_types/datetime.md) | `UINT32` | +| `STRING`, `BINARY` | [字符串](../sql_reference/data_types/string.md) | `STRING` | +| — | [固定字符串](../sql_reference/data_types/fixedstring.md) | `STRING` | +| `DECIMAL` | [十进制](../sql_reference/data_types/decimal.md) | `DECIMAL` | -ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the Parquet `DECIMAL` type as the ClickHouse `Decimal128` type. +ClickHouse支持可配置的精度 `Decimal` 类型。 该 `INSERT` 查询对待实木复合地板 `DECIMAL` 键入为ClickHouse `Decimal128` 类型。 -Unsupported Parquet data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. +不支持的Parquet数据类型: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -Data types of a ClickHouse table columns can differ from the corresponding fields of the Parquet data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column. +ClickHouse表列的数据类型可能与插入的Parquet数据的相应字段不同。 插入数据时,ClickHouse根据上表解释数据类型,然后 [投](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) 为ClickHouse表列设置的数据类型的数据。 -### Inserting and Selecting Data {#inserting-and-selecting-data} +### 插入和选择数据 {#inserting-and-selecting-data} -You can insert Parquet data from a file into ClickHouse table by the following command: +您可以通过以下命令将Parquet数据从文件插入到ClickHouse表中: ``` bash $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -You can select data from a ClickHouse table and save them into some file in the Parquet format by the following command: +您可以从ClickHouse表中选择数据,并通过以下命令将它们保存到Parquet格式的某个文件中: ``` bash $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq} ``` -To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md). +要与Hadoop交换数据,您可以使用 [HDFS表引擎](../engines/table_engines/integrations/hdfs.md). ## ORC {#data-format-orc} -[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse. +[阿帕奇兽人](https://orc.apache.org/) 是Hadoop生态系统中普遍存在的列式存储格式。 您只能将此格式的数据插入ClickHouse。 -### Data Types Matching {#data_types-matching-3} +### 数据类型匹配{\#sql\_reference/data\_types-matching-3} {#data-types-matching-sql_referencedata_types-matching-3} -The table below shows supported data types and how they match ClickHouse [data types](../data_types/index.md) in `INSERT` queries. +下表显示了支持的数据类型以及它们如何匹配ClickHouse [数据类型](../sql_reference/data_types/index.md) 在 `INSERT` 查询。 -| ORC data type (`INSERT`) | ClickHouse data type | -|--------------------------|---------------------------------------| -| `UINT8`, `BOOL` | [UInt8](../data_types/int_uint.md) | -| `INT8` | [Int8](../data_types/int_uint.md) | -| `UINT16` | [UInt16](../data_types/int_uint.md) | -| `INT16` | [Int16](../data_types/int_uint.md) | -| `UINT32` | [UInt32](../data_types/int_uint.md) | -| `INT32` | [Int32](../data_types/int_uint.md) | -| `UINT64` | [UInt64](../data_types/int_uint.md) | -| `INT64` | [Int64](../data_types/int_uint.md) | -| `FLOAT`, `HALF_FLOAT` | [Float32](../data_types/float.md) | -| `DOUBLE` | [Float64](../data_types/float.md) | -| `DATE32` | [Date](../data_types/date.md) | -| `DATE64`, `TIMESTAMP` | [DateTime](../data_types/datetime.md) | -| `STRING`, `BINARY` | [String](../data_types/string.md) | -| `DECIMAL` | [Decimal](../data_types/decimal.md) | +| ORC数据类型 (`INSERT`) | ClickHouse数据类型 | +|------------------------|-----------------------------------------------------| +| `UINT8`, `BOOL` | [UInt8](../sql_reference/data_types/int_uint.md) | +| `INT8` | [Int8](../sql_reference/data_types/int_uint.md) | +| `UINT16` | [UInt16](../sql_reference/data_types/int_uint.md) | +| `INT16` | [Int16](../sql_reference/data_types/int_uint.md) | +| `UINT32` | [UInt32](../sql_reference/data_types/int_uint.md) | +| `INT32` | [Int32](../sql_reference/data_types/int_uint.md) | +| `UINT64` | [UInt64](../sql_reference/data_types/int_uint.md) | +| `INT64` | [Int64](../sql_reference/data_types/int_uint.md) | +| `FLOAT`, `HALF_FLOAT` | [Float32](../sql_reference/data_types/float.md) | +| `DOUBLE` | [Float64](../sql_reference/data_types/float.md) | +| `DATE32` | [日期](../sql_reference/data_types/date.md) | +| `DATE64`, `TIMESTAMP` | [日期时间](../sql_reference/data_types/datetime.md) | +| `STRING`, `BINARY` | [字符串](../sql_reference/data_types/string.md) | +| `DECIMAL` | [十进制](../sql_reference/data_types/decimal.md) | -ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type. +ClickHouse支持的可配置精度 `Decimal` 类型。 该 `INSERT` 查询对待兽人 `DECIMAL` 键入为ClickHouse `Decimal128` 类型。 -Unsupported ORC data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. +不支持的ORC数据类型: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. -The data types of ClickHouse table columns don’t have to match the corresponding ORC data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to the data type set for the ClickHouse table column. +ClickHouse表列的数据类型不必匹配相应的ORC数据字段。 插入数据时,ClickHouse根据上表解释数据类型,然后 [投](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) 将数据转换为ClickHouse表列的数据类型集。 -### Inserting Data {#inserting-data-1} +### 插入数据 {#inserting-data-1} -You can insert ORC data from a file into ClickHouse table by the following command: +您可以通过以下命令将文件中的ORC数据插入到ClickHouse表中: ``` bash $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md). +要与Hadoop交换数据,您可以使用 [HDFS表引擎](../engines/table_engines/integrations/hdfs.md). -## Format Schema {#formatschema} +## 格式架构 {#formatschema} -The file name containing the format schema is set by the setting `format_schema`. -It’s required to set this setting when it is used one of the formats `Cap'n Proto` and `Protobuf`. -The format schema is a combination of a file name and the name of a message type in this file, delimited by colon, +包含格式架构的文件名由该设置设置 `format_schema`. +当使用其中一种格式时,需要设置此设置 `Cap'n Proto` 和 `Protobuf`. +格式架构是文件名和此文件中消息类型的名称的组合,用冒号分隔, e.g. `schemafile.proto:MessageType`. -If the file has the standard extension for the format (for example, `.proto` for `Protobuf`), -it can be omitted and in this case the format schema looks like `schemafile:MessageType`. +如果文件具有格式的标准扩展名(例如, `.proto` 为 `Protobuf`), +它可以被省略,在这种情况下,格式模式如下所示 `schemafile:MessageType`. -If you input or output data via the [client](../interfaces/cli.md) in the interactive mode, the file name specified in the format schema -can contain an absolute path or a path relative to the current directory on the client. -If you use the client in the batch mode, the path to the schema must be relative due to security reasons. +如果您通过输入或输出数据 [客户](../interfaces/cli.md) 在交互模式下,格式架构中指定的文件名 +可以包含绝对路径或相对于客户端上当前目录的路径。 +如果在批处理模式下使用客户端,则由于安全原因,架构的路径必须是相对的。 -If you input or output data via the [HTTP interface](../interfaces/http.md) the file name specified in the format schema -should be located in the directory specified in [format\_schema\_path](../operations/server_settings/settings.md#server_settings-format_schema_path) -in the server configuration. +如果您通过输入或输出数据 [HTTP接口](../interfaces/http.md) 格式架构中指定的文件名 +应该位于指定的目录中 [format\_schema\_path](../operations/server_configuration_parameters/settings.md#server_configuration_parameters-format_schema_path) +在服务器配置中。 -[Original article](https://clickhouse.tech/docs/en/interfaces/formats/) +[原始文章](https://clickhouse.tech/docs/en/interfaces/formats/) -## Skipping Errors {#skippingerrors} +## 跳过错误 {#skippingerrors} -Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input\_format\_allow\_errors\_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) and -[input\_format\_allow\_errors\_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) settings. -Limitations: -- In case of parsing error `JSONEachRow` skips all data until the new line (or EOF), so rows must be delimited by `\n` to count errors correctly. -- `Template` and `CustomSeparated` use delimiter after the last column and delimiter between rows to find the beginning of next row, so skipping errors works only if at least one of them is not empty. +一些格式,如 `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` 和 `Protobuf` 如果发生解析错误,可以跳过断开的行,并从下一行开始继续解析。 看 [input\_format\_allow\_errors\_num](../operations/settings/settings.md#settings-input_format_allow_errors_num) 和 +[input\_format\_allow\_errors\_ratio](../operations/settings/settings.md#settings-input_format_allow_errors_ratio) 设置。 +限制: +-在解析错误的情况下 `JSONEachRow` 跳过所有数据,直到新行(或EOF),所以行必须由 `\n` 正确计算错误。 +- `Template` 和 `CustomSeparated` 在最后一列之后使用分隔符,并在行之间使用分隔符来查找下一行的开头,所以跳过错误只有在其中至少有一个不为空时才有效。 [来源文章](https://clickhouse.tech/docs/zh/interfaces/formats/) diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index 1dfbe87b7e0..ca8a9076fba 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -1,3 +1,4 @@ + # HTTP 客户端 {#http-ke-hu-duan} HTTP 接口可以让你通过任何平台和编程语言来使用 ClickHouse。我们用 Java 和 Perl 以及 shell 脚本来访问它。在其他的部门中,HTTP 接口会用在 Perl,Python 以及 Go 中。HTTP 接口比 TCP 原生接口更为局限,但是却有更好的兼容性。 @@ -17,7 +18,7 @@ Ok. 当使用 GET 方法请求时,`readonly` 会被设置。换句话说,若要作修改数据的查询,只能发送 POST 方法的请求。可以将查询通过 POST 主体发送,也可以通过 URL 参数发送。 -Examples: +例: ``` bash $ curl 'http://localhost:8123/?query=SELECT%201' @@ -200,7 +201,7 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 可选的 `quota_key` 参数可能当做 quota key 传入(或者任何字符串)。更多信息,参见 «[配额](../operations/quotas.md#quotas)» 部分。 -HTTP 接口允许传入额外的数据(外部临时表)来查询。更多信息,参见 «[外部数据查询处理](../operations/table_engines/external_data.md)» 部分。 +HTTP 接口允许传入额外的数据(外部临时表)来查询。更多信息,参见 «[外部数据查询处理](../engines/table_engines/special/external_data.md)» 部分。 ## 响应缓冲 {#xiang-ying-huan-chong} diff --git a/docs/zh/interfaces/index.md b/docs/zh/interfaces/index.md index df0313cc3d2..a4131e833e7 100644 --- a/docs/zh/interfaces/index.md +++ b/docs/zh/interfaces/index.md @@ -1,3 +1,4 @@ + # 客户端 {#interfaces} ClickHouse提供了两个网络接口(两者都可以选择包装在TLS中以提高安全性): diff --git a/docs/zh/interfaces/jdbc.md b/docs/zh/interfaces/jdbc.md index a2aac229cca..932ab53b9af 100644 --- a/docs/zh/interfaces/jdbc.md +++ b/docs/zh/interfaces/jdbc.md @@ -1,8 +1,9 @@ + # JDBC 驱动 {#jdbc-qu-dong} - **[官方JDBC 的驱动](https://github.com/ClickHouse/clickhouse-jdbc)** - 三方提供的 JDBC 驱动: - - [ClickHouse-Native-JDBC](https://github.com/housepower/ClickHouse-Native-JDBC) + - [掳胫--禄脢鹿脷露胫鲁隆鹿--酶](https://github.com/housepower/ClickHouse-Native-JDBC) - [clickhouse4j](https://github.com/blynkkk/clickhouse4j) [来源文章](https://clickhouse.tech/docs/zh/interfaces/jdbc/) diff --git a/docs/zh/interfaces/mysql.md b/docs/zh/interfaces/mysql.md index 668c0b7b9c3..8996ad6ae6f 100644 --- a/docs/zh/interfaces/mysql.md +++ b/docs/zh/interfaces/mysql.md @@ -1,22 +1,25 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 20 +toc_title: "MySQL\u63A5\u53E3" --- -# MySQL interface {#mysql-interface} +# MySQL接口 {#mysql-interface} -ClickHouse supports MySQL wire protocol. It can be enabled by [mysql\_port](../operations/server_settings/settings.md#server_settings-mysql_port) setting in configuration file: +ClickHouse支持MySQL线协议。 它可以通过启用 [mysql\_port](../operations/server_configuration_parameters/settings.md#server_configuration_parameters-mysql_port) 在配置文件中设置: ``` xml 9004 ``` -Example of connecting using command-line tool `mysql`: +使用命令行工具连接的示例 `mysql`: ``` bash $ mysql --protocol tcp -u default -P 9004 ``` -Output if a connection succeeded: +如果连接成功,则输出: ``` text Welcome to the MySQL monitor. Commands end with ; or \g. @@ -34,13 +37,13 @@ Type 'help;' or '\h' for help. Type '\c' to clear the current input statement. mysql> ``` -For compatibility with all MySQL clients, it is recommended to specify user password with [double SHA1](../operations/settings/settings_users.md#password_double_sha1_hex) in configuration file. -If user password is specified using [SHA256](../operations/settings/settings_users.md#password_sha256_hex), some clients won’t be able to authenticate (mysqljs and old versions of command-line tool mysql). +为了与所有MySQL客户端兼容,建议使用以下命令指定用户密码 [双SHA1](../operations/settings/settings_users.md#password_double_sha1_hex) 在配置文件中。 +如果使用用户密码指定 [SHA256](../operations/settings/settings_users.md#password_sha256_hex),一些客户端将无法进行身份验证(mysqljs和旧版本的命令行工具mysql)。 -Restrictions: +限制: -- prepared queries are not supported +- 不支持准备好的查询 -- some data types are sent as strings +- 某些数据类型以字符串形式发送 -[Original article](https://clickhouse.tech/docs/en/interfaces/mysql/) +[原始文章](https://clickhouse.tech/docs/en/interfaces/mysql/) diff --git a/docs/zh/interfaces/odbc.md b/docs/zh/interfaces/odbc.md index b45c54f8507..5cba3a499f1 100644 --- a/docs/zh/interfaces/odbc.md +++ b/docs/zh/interfaces/odbc.md @@ -1,3 +1,4 @@ + # ODBC 驱动 {#odbc-qu-dong} - ClickHouse官方有 ODBC 的驱动。 见 [这里](https://github.com/ClickHouse/clickhouse-odbc)。 diff --git a/docs/zh/interfaces/tcp.md b/docs/zh/interfaces/tcp.md index b783a8c3959..b926a63c476 100644 --- a/docs/zh/interfaces/tcp.md +++ b/docs/zh/interfaces/tcp.md @@ -1,3 +1,4 @@ + # 原生客户端接口(TCP) {#yuan-sheng-ke-hu-duan-jie-kou-tcp} 本机协议用于 [命令行客户端](cli.md),用于分布式查询处理期间的服务器间通信,以及其他C ++程序。 不幸的是,本机ClickHouse协议还没有正式的规范,但它可以从ClickHouse源代码进行逆向工程 [从这里开始](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client))和/或拦截和分析TCP流量。 diff --git a/docs/zh/interfaces/third-party/client_libraries.md b/docs/zh/interfaces/third-party/client_libraries.md index 4814ca5cf9a..8e48bb8735e 100644 --- a/docs/zh/interfaces/third-party/client_libraries.md +++ b/docs/zh/interfaces/third-party/client_libraries.md @@ -1,3 +1,4 @@ + # 第三方开发的库 {#di-san-fang-kai-fa-de-ku} !!! warning "放弃" @@ -5,46 +6,46 @@ - Python - [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm) - - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - - [clickhouse-client](https://github.com/yurial/clickhouse-client) + - [ツ环板driverョツ嘉ッツ偲](https://github.com/mymarilyn/clickhouse-driver) + - [ツ环板clientョツ嘉ッツ偲](https://github.com/yurial/clickhouse-client) - PHP - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) - - [bozerkins/clickhouse-client](https://packagist.org/packages/bozerkins/clickhouse-client) - - [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client) + - [8bitov/clickhouse-php客户端](https://packagist.org/packages/8bitov/clickhouse-php-client) + - [ツ暗ェツ氾环催ツ団ツ法ツ人](https://packagist.org/packages/bozerkins/clickhouse-client) + - [ツ环板clientョツ嘉ッツ偲](https://packagist.org/packages/simpod/clickhouse-client) - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client) - - [SeasClick C++ client](https://github.com/SeasX/SeasClick) -- Go + - [ツ环板clientョツ嘉ッツ偲](https://github.com/SeasX/SeasClick) +- 走吧 - [clickhouse](https://github.com/kshvakov/clickhouse/) - - [go-clickhouse](https://github.com/roistat/go-clickhouse) - - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) + - [ツ环板-ョツ嘉ッツ偲](https://github.com/roistat/go-clickhouse) + - [ツ暗ェツ氾环催ツ団ツ法ツ人](https://github.com/mailru/go-clickhouse) - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) - NodeJs - - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) - - [node-clickhouse](https://github.com/apla/node-clickhouse) + - [ツ暗ェツ氾环催ツ団ツ法ツ人)](https://github.com/TimonKK/clickhouse) + - [ツ环板-ョツ嘉ッツ偲](https://github.com/apla/node-clickhouse) - Perl - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) - - [AnyEvent-ClickHouse](https://metacpan.org/release/AnyEvent-ClickHouse) + - [ツ暗ェツ氾环催ツ団ツ法ツ人](https://metacpan.org/release/AnyEvent-ClickHouse) - Ruby - - [ClickHouse (Ruby)](https://github.com/shlima/click_house) - - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord) + - [ツ暗ェツ氾环催ツ団)](https://github.com/shlima/click_house) + - [ツ暗ェツ氾环催ツ団ツ法ツ人](https://github.com/PNixx/clickhouse-activerecord) - R - [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r) - [RClickhouse](https://github.com/IMSMWU/RClickhouse) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) -- Scala - - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) +- 斯卡拉 + - [掳胫client-禄脢鹿脷露胫鲁隆鹿-client酶](https://github.com/crobox/clickhouse-scala-client) - Kotlin - [AORM](https://github.com/TanVD/AORM) - C\# - - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) + - [克莱克豪斯Ado](https://github.com/killwort/ClickHouse-Net) - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) - - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) -- Elixir + - [克莱克豪斯客户](https://github.com/DarkWanderer/ClickHouse.Client) +- 仙丹 - [clickhousex](https://github.com/appodeal/clickhousex/) -- Nim +- 尼姆 - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse) [来源文章](https://clickhouse.tech/docs/zh/interfaces/third-party/client_libraries/) diff --git a/docs/zh/interfaces/third-party/gui.md b/docs/zh/interfaces/third-party/gui.md index 83656d18858..bbbd78f650e 100644 --- a/docs/zh/interfaces/third-party/gui.md +++ b/docs/zh/interfaces/third-party/gui.md @@ -1,3 +1,4 @@ + # 第三方开发的可视化界面 {#di-san-fang-kai-fa-de-ke-shi-hua-jie-mian} ## 开源 {#kai-yuan} @@ -37,9 +38,9 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix). - 集群管理 - 监控副本情况以及 Kafka 引擎表 -### LightHouse {#lighthouse} +### 灯塔 {#lighthouse} -[LightHouse](https://github.com/VKCOM/lighthouse) 是ClickHouse的轻量级Web界面。 +[灯塔](https://github.com/VKCOM/lighthouse) 是ClickHouse的轻量级Web界面。 特征: @@ -57,9 +58,9 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix). - 表格预览。 - 自动完成。 -### clickhouse-cli {#clickhouse-cli} +### ツ环板-ョツ嘉ッツ偲 {#clickhouse-cli} -[clickhouse-cli](https://github.com/hatarist/clickhouse-cli) 是ClickHouse的替代命令行客户端,用Python 3编写。 +[ツ环板-ョツ嘉ッツ偲](https://github.com/hatarist/clickhouse-cli) 是ClickHouse的替代命令行客户端,用Python 3编写。 特征: @@ -68,15 +69,15 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix). - 寻呼机支持数据输出。 - 自定义PostgreSQL类命令。 -### clickhouse-flamegraph {#clickhouse-flamegraph} +### ツ暗ェツ氾环催ツ団ツ法ツ人 {#clickhouse-flamegraph} [clickhouse-flamegraph](https://github.com/Slach/clickhouse-flamegraph) 是一个可视化的专业工具`system.trace_log`如[flamegraph](http://www.brendangregg.com/flamegraphs.html). ## 商业 {#shang-ye} -### Holistics Software {#holistics-software} +### ツ环板Softwareョツ嘉ッ {#holistics-software} -[Holistics](https://www.holistics.io/) 在2019年被Gartner FrontRunners列为可用性最高排名第二的商业智能工具之一。 Holistics是一个基于SQL的全栈数据平台和商业智能工具,用于设置您的分析流程。 +[整体学](https://www.holistics.io/) 在2019年被Gartner FrontRunners列为可用性最高排名第二的商业智能工具之一。 Holistics是一个基于SQL的全栈数据平台和商业智能工具,用于设置您的分析流程。 特征: diff --git a/docs/zh/interfaces/third-party/index.md b/docs/zh/interfaces/third-party/index.md new file mode 100644 index 00000000000..fab8cb364e8 --- /dev/null +++ b/docs/zh/interfaces/third-party/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u7B2C\u4E09\u65B9" +toc_priority: 24 +--- + + diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 4bfe367e1f1..aac3d7a1b11 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -1,3 +1,4 @@ + # 第三方集成库 {#di-san-fang-ji-cheng-ku} !!! warning "声明" @@ -9,7 +10,7 @@ - [MySQL](https://www.mysql.com) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) + - [horgh-复制器](https://github.com/larsnovikov/horgh-replicator) - [PostgreSQL](https://www.postgresql.org) - [clickhousedb\_fdw](https://github.com/Percona-Lab/clickhousedb_fdw) - [infi.clickhouse\_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (使用 [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm)) @@ -17,70 +18,70 @@ - [MSSQL](https://en.wikipedia.org/wiki/Microsoft_SQL_Server) - [ClickHouseMightrator](https://github.com/zlzforever/ClickHouseMigrator) - 消息队列 - - [Kafka](https://kafka.apache.org) - - [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (使用 [Go client](https://github.com/kshvakov/clickhouse/)) + - [卡夫卡](https://kafka.apache.org) + - [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (使用 [去客户](https://github.com/kshvakov/clickhouse/)) - 对象存储 - [S3](https://en.wikipedia.org/wiki/Amazon_S3) - - [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) + - [ツ环板backupョツ嘉ッツ偲](https://github.com/AlexAkulov/clickhouse-backup) - 容器编排 - [Kubernetes](https://kubernetes.io) - - [clickhouse-operator](https://github.com/Altinity/clickhouse-operator) + - [clickhouse-操](https://github.com/Altinity/clickhouse-operator) - 配置管理 - - [puppet](https://puppet.com) - - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [木偶](https://puppet.com) + - [ツ环板/ョツ嘉ッツ偲](https://forge.puppet.com/innogames/clickhouse) - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) - 监控 - - [Graphite](https://graphiteapp.org) + - [石墨](https://graphiteapp.org) - [graphouse](https://github.com/yandex/graphouse) - - [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) + - - [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse) - - [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - optimizes staled partitions in [\*GraphiteMergeTree](../../operations/table_engines/graphitemergetree.md#graphitemergetree) if rules from [rollup configuration](../../operations/table_engines/graphitemergetree.md#rollup-configuration) could be applied + - [ツ暗ェツ氾环催ツ団](https://github.com/lomik/carbon-clickhouse) + + - [ツ环板-ョツ嘉ッツ偲](https://github.com/lomik/graphite-clickhouse) + - [石墨-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) -优化静态分区 [\*GraphiteMergeTree](../../engines/table_engines/mergetree_family/graphitemergetree.md#graphitemergetree) 如果从规则 [汇总配置](../../engines/table_engines/mergetree_family/graphitemergetree.md#rollup-configuration) 可以应用 - [Grafana](https://grafana.com/) - [clickhouse-grafana](https://github.com/Vertamedia/clickhouse-grafana) - - [Prometheus](https://prometheus.io/) + - [普罗米修斯号](https://prometheus.io/) - [clickhouse\_exporter](https://github.com/f1yegor/clickhouse_exporter) - [PromHouse](https://github.com/Percona-Lab/PromHouse) - - [clickhouse\_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/)) + - [clickhouse\_exporter](https://github.com/hot-wifi/clickhouse_exporter) (用途 [去客户](https://github.com/kshvakov/clickhouse/)) - [Nagios](https://www.nagios.org/) - [check\_clickhouse](https://github.com/exogroup/check_clickhouse/) - [check\_clickhouse.py](https://github.com/innogames/igmonplugins/blob/master/src/check_clickhouse.py) - [Zabbix](https://www.zabbix.com) - - [clickhouse-zabbix-template](https://github.com/Altinity/clickhouse-zabbix-template) + - [ツ暗ェツ氾环催ツ団ツ法ツ人](https://github.com/Altinity/clickhouse-zabbix-template) - [Sematext](https://sematext.com/) - [clickhouse积分](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) - 记录 - [rsyslog](https://www.rsyslog.com/) - - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) + - [鹿茫house om omhousehousehousehouse酶](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - [fluentd](https://www.fluentd.org) - [loghouse](https://github.com/flant/loghouse) (对于 [Kubernetes](https://kubernetes.io)) - [logagent](https://www.sematext.com/logagent) - - [logagent output-plugin-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) + - [logagent输出-插件-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) - 地理 - [MaxMind](https://dev.maxmind.com/geoip/) - - [clickhouse-maxmind-geoip](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip) + - [ツ环板-ョツ嘉ッツ偲青clickシツ氾カツ鉄ツ工ツ渉](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip) ## 编程语言生态系统 {#bian-cheng-yu-yan-sheng-tai-xi-tong} - Python - [SQLAlchemy](https://www.sqlalchemy.org) - - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (使用 [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - - [pandas](https://pandas.pydata.org) + - [ツ暗ェツ氾环催ツ団ツ法ツ人](https://github.com/cloudflare/sqlalchemy-clickhouse) (使用 [infi.clickhouse\_orm](https://github.com/Infinidat/infi.clickhouse_orm)) + - [熊猫](https://pandas.pydata.org) - [pandahouse](https://github.com/kszucs/pandahouse) - R - [dplyr](https://db.rstudio.com/dplyr/) - - [RClickhouse](https://github.com/IMSMWU/RClickhouse) (使用 [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp)) + - [RClickhouse](https://github.com/IMSMWU/RClickhouse) (使用 [ツ暗ェツ氾环催ツ団](https://github.com/artpaul/clickhouse-cpp)) - Java - [Hadoop](http://hadoop.apache.org) - - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (使用 [JDBC](../../query_language/table_functions/jdbc.md)) -- Scala + - [clickhouse-hdfs-装载机](https://github.com/jaykelin/clickhouse-hdfs-loader) (使用 [JDBC](../../sql_reference/table_functions/jdbc.md)) +- 斯卡拉 - [Akka](https://akka.io) - - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) + - [掳胫client-禄脢鹿脷露胫鲁隆鹿-client酶](https://github.com/crobox/clickhouse-scala-client) - C\# - [ADO.NET](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/ado-net-overview) - - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) + - [克莱克豪斯Ado](https://github.com/killwort/ClickHouse-Net) - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) - [ClickHouse.Net.Migrations](https://github.com/ilyabreev/ClickHouse.Net.Migrations) -- Elixir +- 仙丹 - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) diff --git a/docs/zh/interfaces/third-party/proxy.md b/docs/zh/interfaces/third-party/proxy.md index 727bff00cbb..e954444c46f 100644 --- a/docs/zh/interfaces/third-party/proxy.md +++ b/docs/zh/interfaces/third-party/proxy.md @@ -1,3 +1,4 @@ + # 来自第三方开发人员的代理服务器 {#lai-zi-di-san-fang-kai-fa-ren-yuan-de-dai-li-fu-wu-qi} [chproxy](https://github.com/Vertamedia/chproxy) 是ClickHouse数据库的http代理和负载均衡器。 @@ -22,9 +23,9 @@ 在Go中实现。 -## ClickHouse-Bulk {#clickhouse-bulk} +## ツ环板-ョツ嘉ッツ偲 {#clickhouse-bulk} -[ClickHouse-Bulk](https://github.com/nikepan/clickhouse-bulk) 是一个简单的ClickHouse插入收集器。 +[ツ环板-ョツ嘉ッツ偲](https://github.com/nikepan/clickhouse-bulk) 是一个简单的ClickHouse插入收集器。 特征: diff --git a/docs/zh/introduction/adopters.md b/docs/zh/introduction/adopters.md index ef841b2fa05..8a69e67264e 100644 --- a/docs/zh/introduction/adopters.md +++ b/docs/zh/introduction/adopters.md @@ -1,79 +1,82 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 8 +toc_title: "\u91C7\u7528\u8005" --- -# ClickHouse Adopters {#clickhouse-adopters} +# ツ环板tersョツ嘉ッツ偲 {#clickhouse-adopters} -!!! warning "Disclaimer" - The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful. +!!! warning "免责声明" + 使用ClickHouse的公司和他们的成功故事下面的名单是从公共来源组装,因此可能不同于当前的现实. 如果您分享您公司采用ClickHouse的故事,我们将不胜感激 [将其添加到列表](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md),但请确保你不会有任何保密协议的问题,这样做。 提供来自其他公司的出版物的更新也很有用。 -| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | -|-----------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [2gis](https://2gis.ru) | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | -| [Aloha Browser](https://alohabrowser.com/) | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://github.com/yandex/clickhouse-presentations/blob/master/meetup22/aloha.pdf) | -| [Amadeus](https://amadeus.com/) | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | -| [Appsflyer](https://www.appsflyer.com) | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | -| [ArenaData](https://arenadata.tech/) | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | -| [Badoo](https://badoo.com) | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | -| [Benocs](https://www.benocs.com/) | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | -| [Bloomberg](https://www.bloomberg.com/) | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | -| [Bloxy](https://bloxy.info) | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | -| `Dataliance/UltraPower` | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | -| [CARTO](https://carto.com/) | Business Intelligence | Geo analytics | — | — | [Geospatial processing with Clickhouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | -| [CERN](http://public.web.cern.ch/public/) | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | -| [Cisco](http://cisco.com/) | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | -| [Citadel Securities](https://www.citadelsecurities.com/) | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | -| [Citymobil](https://city-mobil.ru) | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | -| [ContentSquare](https://contentsquare.com) | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | -| [Cloudflare](https://cloudflare.com) | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | -| [Corunet](https://coru.net/) | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | -| [CraiditX 氪信](https://creditx.com) | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | -| [Criteo/Storetail](https://www.criteo.com/) | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | -| [Deutsche Bank](https://db.com) | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | -| [Diva-e](https://www.diva-e.com) | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | -| [Exness](https://www.exness.com) | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | -| [Geniee](https://geniee.co.jp) | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | -| [HUYA](https://www.huya.com/) | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| [Idealista](https://www.idealista.com) | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | -| [Infovista](https://www.infovista.com/) | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | -| [InnoGames](https://www.innogames.com) | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | -| [Integros](https://integros.com) | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| [Kodiak Data](https://www.kodiakdata.com/) | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | -| [Kontur](https://kontur.ru) | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | -| [LifeStreet](https://lifestreet.com/) | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | -| [Mail.ru Cloud Solutions](https://mcs.mail.ru/) | Cloud services | Main product | — | — | [Running ClickHouse Instance, in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | -| [MessageBird](https://www.messagebird.com) | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | -| [MGID](https://www.mgid.com/) | Ad network | Web-analytics | — | — | [Our experience in implementing analytical DBMS ClickHouse, in Russian](http://gs-studio.com/news-about-it/32777----clickhouse---c) | -| [OneAPM](https://www.oneapm.com/) | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | -| [Pragma Innovation](http://www.pragma-innovation.fr/) | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | -| [QINGCLOUD](https://www.qingcloud.com/) | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | -| [Qrator](https://qrator.net) | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| [Beijing PERCENT Information Technology Co., Ltd.](https://www.percent.cn/) | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | -| [Rambler](https://rambler.ru) | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | -| [Tencent](https://www.tencent.com) | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | -| [Traffic Stars](https://trafficstars.com/) | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | -| [S7 Airlines](https://www.s7.ru) | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | -| [SEMrush](https://www.semrush.com/) | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | -| [scireum GmbH](https://www.scireum.de/) | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | -| [Sentry](https://sentry.io/) | Software developer | Backend for product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | -| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr) | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | -| [seo.do](https://seo.do/) | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | -| [Sina](http://english.sina.com/index.html) | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | -| [SMI2](https://smi2.ru/) | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | -| [Splunk](https://www.splunk.com/) | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | -| [Spotify](https://www.spotify.com) | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | -| [Tencent](https://www.tencent.com) | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | -| [Uber](https://www.uber.com) | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | -| [VKontakte](https://vk.com) | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | -| [Wisebits](https://wisebits.com/) | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| [Xiaoxin Tech.](https://www.xiaoheiban.cn/) | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | -| [Ximalaya](https://www.ximalaya.com/) | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | -| [Yandex Cloud](https://cloud.yandex.ru/services/managed-clickhouse) | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | -| [Yandex DataLens](https://cloud.yandex.ru/services/datalens) | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | -| [Yandex Market](https://market.yandex.ru/) | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | -| [Yandex Metrica](https://metrica.yandex.com) | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | -| [ЦВТ](https://htc-cs.ru/) | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | -| [МКБ](https://mkb.ru/) | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | -| [金数据](https://jinshuju.net) | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | +| 公司简介 | 行业 | 用例 | 群集大小 | (Un)压缩数据大小\* | 参考资料 | +|-----------------------------------------------------------------|-------------------|----------------|---------------------------------------------------|----------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [2gis](https://2gis.ru) | 地图 | 监测 | — | — | [讲俄语,2019年7月](https://youtu.be/58sPkXfq6nw) | +| [阿罗哈浏览器](https://alohabrowser.com/) | 移动应用程序 | 浏览器后端 | — | — | [俄罗斯幻灯片,2019年5月](https://github.com/yandex/clickhouse-presentations/blob/master/meetup22/aloha.pdf) | +| [阿玛迪斯](https://amadeus.com/) | 旅费 | 分析 | — | — | [新闻稿,四月2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | +| [Appsflyer](https://www.appsflyer.com) | 移动分析 | 主要产品 | — | — | [讲俄语,2019年7月](https://www.youtube.com/watch?v=M3wbRlcpBbY) | +| [ArenaData](https://arenadata.tech/) | 数据平台 | 主要产品 | — | — | [幻灯片在俄罗斯,十二月2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | +| [Badoo](https://badoo.com) | 约会 | 时间序列 | — | — | [幻灯片在俄罗斯,十二月2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | +| [Benocs](https://www.benocs.com/) | 网络遥测和分析 | 主要产品 | — | — | [幻灯片英文,2017年10月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | +| [彭博](https://www.bloomberg.com/) | 金融、媒体 | 监测 | 102个服务器 | — | [幻灯片,2018年5月](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | +| [Bloxy](https://bloxy.info) | 区块链 | 分析 | — | — | [幻灯片在俄罗斯,八月2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | +| `Dataliance/UltraPower` | 电信 | 分析 | — | — | [中文幻灯片,2018年1月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | +| [CARTO](https://carto.com/) | 商业智能 | 地理分析 | — | — | [地理空间处理与ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | +| [CERN](http://public.web.cern.ch/public/) | 研究 | 实验 | — | — | [新闻稿,四月2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | +| [Cisco](http://cisco.com/) | 碌莽禄Networking: | 流量分析 | — | — | [闪电对话,十月2019](https://youtu.be/-hI1vDR2oPY?t=5057) | +| [城堡证券](https://www.citadelsecurities.com/) | 财政 | — | — | — | [贡献,2019年3月](https://github.com/ClickHouse/ClickHouse/pull/4774) | +| [Citymobil](https://city-mobil.ru) | 出租车 | 分析 | — | — | [博客文章在俄罗斯,三月2020](https://habr.com/en/company/citymobil/blog/490660/) | +| [内容广场](https://contentsquare.com) | 网站分析 | 主要产品 | — | — | [博客文章在法国,十一月2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | +| [Cloudflare](https://cloudflare.com) | CDN | 流量分析 | 36服务器 | — | [博客文章,五月2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [博客文章,三月2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | +| [Corunet](https://coru.net/) | 分析 | 主要产品 | — | — | [英文幻灯片,2019年4月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | +| [CraiditX 氪信](https://creditx.com) | 金融AI | 分析 | — | — | [英文幻灯片,2019年11月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | +| [ツ环板/ョツ嘉ッツ偲](https://www.criteo.com/) | 零售 | 主要产品 | — | — | [幻灯片中的英文,十月2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | +| [德意志银行](https://db.com) | 财政 | 商业智能分析 | — | — | [幻灯片中的英文,十月2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| [Diva-e](https://www.diva-e.com) | 数字咨询 | 主要产品 | — | — | [英文幻灯片,2019年9月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | +| [Exness](https://www.exness.com) | 交易 | 指标,日志记录 | — | — | [俄语交谈,2019年5月](https://youtu.be/_rpU-TvSfZ8?t=3215) | +| [精灵](https://geniee.co.jp) | 广告网络 | 主要产品 | — | — | [日文博客,2017年7月](https://tech.geniee.co.jp/entry/2017/07/20/160100) | +| [HUYA](https://www.huya.com/) | 视频流 | 分析 | — | — | [中文幻灯片,2018年10月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | +| [Idealista](https://www.idealista.com) | 房地产 | 分析 | — | — | [博客文章英文,四月2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| [Infovista](https://www.infovista.com/) | 网络 | 分析 | — | — | [幻灯片中的英文,十月2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | +| [InnoGames](https://www.innogames.com) | 游戏 | 指标,日志记录 | — | — | [俄罗斯幻灯片,2019年9月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | +| [Integros](https://integros.com) | 视频服务平台 | 分析 | — | — | [俄罗斯幻灯片,2019年5月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| [科迪亚克数据](https://www.kodiakdata.com/) | 云 | 主要产品 | — | — | [虏茅驴麓卤戮碌禄路戮鲁拢](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | +| [Kontur](https://kontur.ru) | 软件开发 | 指标 | — | — | [俄语交谈,2018年11月](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | +| [LifeStreet](https://lifestreet.com/) | 广告网络 | 主要产品 | 75台服务器(3个副本) | 5.27PiB | [博客文章在俄罗斯,2017年2月](https://habr.com/en/post/322620/) | +| [Mail.ru 云解决方案](https://mcs.mail.ru/) | 云服务 | 主要产品 | — | — | [运行ClickHouse实例,俄语](https://mcs.mail.ru/help/db-create/clickhouse#) | +| [MessageBird](https://www.messagebird.com) | 电信 | 统计 | — | — | [英文幻灯片,2018年11月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | +| [MGID](https://www.mgid.com/) | 广告网络 | 网络分析 | — | — | [我们在实施分析DBMS ClickHouse的经验,在俄罗斯](http://gs-studio.com/news-about-it/32777----clickhouse---c) | +| [OneAPM](https://www.oneapm.com/) | 监测和数据分析 | 主要产品 | — | — | [中文幻灯片,2018年10月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | +| [ツ环板Innovationョツ嘉ッ](http://www.pragma-innovation.fr/) | 遥测和大数据分析 | 主要产品 | — | — | [幻灯片中的英文,十月2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | +| [QINGCLOUD](https://www.qingcloud.com/) | 云服务 | 主要产品 | — | — | [中文幻灯片,2018年10月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | +| [Qrator](https://qrator.net) | DDoS保护 | 主要产品 | — | — | [博客文章,三月2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | +| [北京百分之信息技术有限公司,Ltd.](https://www.percent.cn/) | 分析 | 主要产品 | — | — | [中文幻灯片,2019年6月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| [漫步者](https://rambler.ru) | 互联网服务 | 分析 | — | — | [俄语交谈,2018年4月](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | +| [腾讯](https://www.tencent.com) | 消息传递 | 日志记录 | — | — | [中文讲座,2019年11月](https://youtu.be/T-iVQRuw-QY?t=5050) | +| [交通明星](https://trafficstars.com/) | 广告网络 | — | — | — | [幻灯片在俄罗斯,2018年5月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | +| [S7航空公司](https://www.s7.ru) | 航空公司 | 指标,日志记录 | — | — | [讲俄语,2019年3月](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | +| [SEMrush](https://www.semrush.com/) | 碌莽禄Marketing: | 主要产品 | — | — | [幻灯片在俄罗斯,八月2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | +| [scireum GmbH](https://www.scireum.de/) | 电子商务 | 主要产品 | — | — | [德语讲座,2020年2月](https://www.youtube.com/watch?v=7QWAn5RbyR4) | +| [哨兵](https://sentry.io/) | 软件开发人员 | 产品后端 | — | — | [博客文章英文,五月2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | +| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr) | 政府社会保障 | 分析 | — | — | [英文幻灯片,2019年11月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | +| [seo.do](https://seo.do/) | 分析 | 主要产品 | — | — | [英文幻灯片,2019年11月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | +| [新浪](http://english.sina.com/index.html) | 新闻 | — | — | — | [中文幻灯片,2018年10月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | +| [SMI2](https://smi2.ru/) | 新闻 | 分析 | — | — | [博客文章在俄罗斯,2017年11月](https://habr.com/ru/company/smi2/blog/314558/) | +| [Splunk](https://www.splunk.com/) | 业务分析 | 主要产品 | — | — | [英文幻灯片,2018年1月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | +| [Spotify的](https://www.spotify.com) | 音乐 | 实验 | — | — | [幻灯片,七月2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | +| [腾讯](https://www.tencent.com) | 大数据 | 数据处理 | — | — | [中文幻灯片,2018年10月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | +| [优步](https://www.uber.com) | 出租车 | 日志记录 | — | — | [幻灯片,二月2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | +| [VKontakte](https://vk.com) | 社交网络 | 统计,日志记录 | — | — | [幻灯片在俄罗斯,八月2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| [Wisebits](https://wisebits.com/) | IT解决方案 | 分析 | — | — | [俄罗斯幻灯片,2019年5月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| [小新科技](https://www.xiaoheiban.cn/) | 教育 | 共同目的 | — | — | [英文幻灯片,2019年11月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | +| [西马拉亚](https://www.ximalaya.com/) | 音频共享 | OLAP | — | — | [英文幻灯片,2019年11月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | +| [Yandex云](https://cloud.yandex.ru/services/managed-clickhouse) | 公有云 | 主要产品 | — | — | [讲俄语,2019年12月](https://www.youtube.com/watch?v=pgnak9e_E0o) | +| [Yandex DataLens](https://cloud.yandex.ru/services/datalens) | 商业智能 | 主要产品 | — | — | [幻灯片在俄罗斯,十二月2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | +| [Yandex市场](https://market.yandex.ru/) | 电子商务 | 指标,日志记录 | — | — | [讲俄语,2019年1月](https://youtu.be/_l1qP0DyBcA?t=478) | +| [Yandex Metrica](https://metrica.yandex.com) | 网站分析 | 主要产品 | 一个集群中的360台服务器,一个部门中的1862台服务器 | 66.41PiB/5.68PiB | [幻灯片,二月2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | +| [ЦВТ](https://htc-cs.ru/) | 软件开发 | 指标,日志记录 | — | — | [博客文章,三月2019,在俄罗斯](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | +| [МКБ](https://mkb.ru/) | 银行 | 网络系统监控 | — | — | [俄罗斯幻灯片,2019年9月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | +| [金数据](https://jinshuju.net) | 商业智能分析 | 主要产品 | — | — | [中文幻灯片,2019年10月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | -[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) +[原始文章](https://clickhouse.tech/docs/en/introduction/adopters/) diff --git a/docs/zh/introduction/distinctive_features.md b/docs/zh/introduction/distinctive_features.md index 250a1a20e87..3b1e7a8c716 100644 --- a/docs/zh/introduction/distinctive_features.md +++ b/docs/zh/introduction/distinctive_features.md @@ -1,3 +1,4 @@ + # ClickHouse的独特功能 {#clickhousede-du-te-gong-neng} ## 真正的列式数据库管理系统 {#zhen-zheng-de-lie-shi-shu-ju-ku-guan-li-xi-tong} @@ -59,6 +60,6 @@ ClickHouse提供各种各样在允许牺牲数据精度的情况下对查询进 ClickHouse使用异步的多主复制技术。当数据被写入任何一个可用副本后,系统会在后台将数据分发给其他副本,以保证系统在不同副本上保持相同的数据。在大多数情况下ClickHouse能在故障后自动恢复,在一些少数的复杂情况下需要手动恢复。 -更多信息,参见 [数据复制](../operations/table_engines/replication.md)。 +更多信息,参见 [数据复制](../engines/table_engines/mergetree_family/replication.md)。 [来源文章](https://clickhouse.tech/docs/en/introduction/distinctive_features/) diff --git a/docs/zh/introduction/features_considered_disadvantages.md b/docs/zh/introduction/features_considered_disadvantages.md index 04cd34c6ffc..efc967e90ac 100644 --- a/docs/zh/introduction/features_considered_disadvantages.md +++ b/docs/zh/introduction/features_considered_disadvantages.md @@ -1,3 +1,4 @@ + # ClickHouse的限制 {#clickhouseke-yi-ren-wei-shi-que-dian-de-gong-neng} 1. 没有完整的事务支持。 diff --git a/docs/zh/introduction/history.md b/docs/zh/introduction/history.md index 7c1a058ea76..673e070addb 100644 --- a/docs/zh/introduction/history.md +++ b/docs/zh/introduction/history.md @@ -1,6 +1,7 @@ + # ClickHouse历史 {#clickhouseli-shi} -ClickHouse最初是为 [Yandex.Metrica](https://metrica.yandex.com/) [世界第二大Web分析平台](http://w3techs.com/technologies/overview/traffic_analysis/all) 而开发的。多年来一直作为该系统的核心组件被该系统持续使用着。目前为止,该系统在ClickHouse中有超过13万亿条记录,并且每天超过200多亿个事件被处理。它允许直接从原始数据中动态查询并生成报告。本文简要介绍了ClickHouse在其早期发展阶段的目标。 +ClickHouse最初是为 [YandexMetrica](https://metrica.yandex.com/) [世界第二大Web分析平台](http://w3techs.com/technologies/overview/traffic_analysis/all) 而开发的。多年来一直作为该系统的核心组件被该系统持续使用着。目前为止,该系统在ClickHouse中有超过13万亿条记录,并且每天超过200多亿个事件被处理。它允许直接从原始数据中动态查询并生成报告。本文简要介绍了ClickHouse在其早期发展阶段的目标。 Yandex.Metrica基于用户定义的字段,对实时访问、连接会话,生成实时的统计报表。这种需求往往需要复杂聚合方式,比如对访问用户进行去重。构建报表的数据,是实时接收存储的新数据。 diff --git a/docs/zh/introduction/index.md b/docs/zh/introduction/index.md new file mode 100644 index 00000000000..4bc6a76857a --- /dev/null +++ b/docs/zh/introduction/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u5BFC\u8A00" +toc_priority: 1 +--- + + diff --git a/docs/zh/introduction/performance.md b/docs/zh/introduction/performance.md index ed44ec760bb..9c5ce29df6f 100644 --- a/docs/zh/introduction/performance.md +++ b/docs/zh/introduction/performance.md @@ -1,4 +1,5 @@ -# Performance {#performance} + +# 性能 {#performance} 根据Yandex的内部测试结果,ClickHouse表现出了比同类可比较产品更优的性能。你可以在 [这里](https://clickhouse.tech/benchmark.html) 查看具体的测试结果。 diff --git a/docs/zh/operations/access_rights.md b/docs/zh/operations/access_rights.md index 1c648a29f26..0178001e74f 100644 --- a/docs/zh/operations/access_rights.md +++ b/docs/zh/operations/access_rights.md @@ -1,8 +1,9 @@ -# Access Rights {#access-rights} -Users and access rights are set up in the user config. This is usually `users.xml`. +# 访问权限 {#access-rights} -Users are recorded in the `users` section. Here is a fragment of the `users.xml` file: +用户和访问权限在用户配置中设置。 这通常是 `users.xml`. + +用户被记录在 `users` 科。 这里是一个片段 `users.xml` 文件: ``` xml @@ -59,15 +60,15 @@ Users are recorded in the `users` section. Here is a fragment of the `users.xml` ``` -You can see a declaration from two users: `default`and`web`. We added the `web` user separately. +您可以看到两个用户的声明: `default`和`web`. 我们添加了 `web` 用户分开。 -The `default` user is chosen in cases when the username is not passed. The `default` user is also used for distributed query processing, if the configuration of the server or cluster doesn’t specify the `user` and `password` (see the section on the [Distributed](../operations/table_engines/distributed.md) engine). +该 `default` 在用户名未通过的情况下选择用户。 该 `default` 如果服务器或群集的配置没有指定分布式查询处理,则user也用于分布式查询处理 `user` 和 `password` (见上的部分 [分布](../engines/table_engines/special/distributed.md) 发动机)。 The user that is used for exchanging information between servers combined in a cluster must not have substantial restrictions or quotas – otherwise, distributed queries will fail. -The password is specified in clear text (not recommended) or in SHA-256. The hash isn’t salted. In this regard, you should not consider these passwords as providing security against potential malicious attacks. Rather, they are necessary for protection from employees. +密码以明文(不推荐)或SHA-256形式指定。 哈希没有腌制。 在这方面,您不应将这些密码视为提供了针对潜在恶意攻击的安全性。 相反,他们是必要的保护员工。 -A list of networks is specified that access is allowed from. In this example, the list of networks for both users is loaded from a separate file (`/etc/metrika.xml`) containing the `networks` substitution. Here is a fragment of it: +指定允许访问的网络列表。 在此示例中,将从单独的文件加载两个用户的网络列表 (`/etc/metrika.xml`)包含 `networks` 替代。 这里是它的一个片段: ``` xml @@ -81,21 +82,21 @@ A list of networks is specified that access is allowed from. In this example, th ``` -You could define this list of networks directly in `users.xml`, or in a file in the `users.d` directory (for more information, see the section «[Configuration files](configuration_files.md#configuration_files)»). +您可以直接在以下内容中定义此网络列表 `users.xml`,或在文件中 `users.d` directory (for more information, see the section «[配置文件](configuration_files.md#configuration_files)»). -The config includes comments explaining how to open access from everywhere. +该配置包括解释如何从任何地方打开访问的注释。 -For use in production, only specify `ip` elements (IP addresses and their masks), since using `host` and `hoost_regexp` might cause extra latency. +对于在生产中使用,仅指定 `ip` 元素(IP地址及其掩码),因为使用 `host` 和 `hoost_regexp` 可能会导致额外的延迟。 -Next the user settings profile is specified (see the section «[Settings profiles](settings/settings_profiles.md)»). You can specify the default profile, `default'`. The profile can have any name. You can specify the same profile for different users. The most important thing you can write in the settings profile is `readonly=1`, which ensures read-only access. -Then specify the quota to be used (see the section «[Quotas](quotas.md#quotas)»). You can specify the default quota: `default`. It is set in the config by default to only count resource usage, without restricting it. The quota can have any name. You can specify the same quota for different users – in this case, resource usage is calculated for each user individually. +Next the user settings profile is specified (see the section «[设置配置文件](settings/settings_profiles.md)»). You can specify the default profile, `default'`. 配置文件可以有任何名称。 您可以为不同的用户指定相同的配置文件。 您可以在设置配置文件中编写的最重要的事情是 `readonly=1`,这确保只读访问。 +Then specify the quota to be used (see the section «[配额](quotas.md#quotas)»). You can specify the default quota: `default`. It is set in the config by default to only count resource usage, without restricting it. The quota can have any name. You can specify the same quota for different users – in this case, resource usage is calculated for each user individually. -In the optional `` section, you can also specify a list of databases that the user can access. By default, all databases are available to the user. You can specify the `default` database. In this case, the user will receive access to the database by default. +在可选 `` 您还可以指定用户可以访问的数据库列表。 默认情况下,所有数据库都可供用户使用。 您可以指定 `default` 数据库。 在这种情况下,默认情况下,用户将接收对数据库的访问权限。 -Access to the `system` database is always allowed (since this database is used for processing queries). +访问 `system` 始终允许数据库(因为此数据库用于处理查询)。 -The user can get a list of all databases and tables in them by using `SHOW` queries or system tables, even if access to individual databases isn’t allowed. +用户可以通过以下方式获取其中所有数据库和表的列表 `SHOW` 查询或系统表,即使不允许访问单个数据库。 -Database access is not related to the [readonly](settings/permissions_for_queries.md#settings_readonly) setting. You can’t grant full access to one database and `readonly` access to another one. +数据库访问是不相关的 [只读](settings/permissions_for_queries.md#settings_readonly) 设置。 您不能授予对一个数据库的完全访问权限,并 `readonly` 进入另一个。 -[Original article](https://clickhouse.tech/docs/en/operations/access_rights/) +[原始文章](https://clickhouse.tech/docs/en/operations/access_rights/) diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md index 90efb613098..256ddddd2c2 100644 --- a/docs/zh/operations/backup.md +++ b/docs/zh/operations/backup.md @@ -1,38 +1,41 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 49 +toc_title: "\u6570\u636E\u5907\u4EFD" --- -# Data Backup {#data-backup} +# 数据备份 {#data-backup} -While [replication](table_engines/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). However, these safeguards don’t cover all possible cases and can be circumvented. +碌莽禄While: [复制](../engines/table_engines/mergetree_family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施并不涵盖所有可能的情况,可以规避。 -In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data **in advance**. +为了有效地减少可能的人为错误,您应该仔细准备备份和还原数据的策略 **提前**. -Each company has different resources available and business requirements, so there’s no universal solution for ClickHouse backups and restores that will fit every situation. What works for one gigabyte of data likely won’t work for tens of petabytes. There are a variety of possible approaches with their own pros and cons, which will be discussed below. It is a good idea to use several approaches instead of just one in order to compensate for their various shortcomings. +每家公司都有不同的可用资源和业务需求,因此没有适合各种情况的ClickHouse备份和恢复通用解决方案。 什么适用于一千兆字节的数据可能不会为几十pb的工作。 有多种可能的方法有自己的优点和缺点,这将在下面讨论。 这是一个好主意,使用几种方法,而不是只是一个,以弥补其各种缺点。 -!!! note "Note" - Keep in mind that if you backed something up and never tried to restore it, chances are that restore will not work properly when you actually need it (or at least it will take longer than business can tolerate). So whatever backup approach you choose, make sure to automate the restore process as well, and practice it on a spare ClickHouse cluster regularly. +!!! note "注" + 请记住,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时(或者至少需要比业务能够容忍的时间更长),恢复可能无法正常工作。 因此,无论您选择哪种备份方法,请确保自动还原过程,并定期在备用ClickHouse群集上练习。 -## Duplicating Source Data Somewhere Else {#duplicating-source-data-somewhere-else} +## 将源数据复制到其他地方 {#duplicating-source-data-somewhere-else} -Often data that is ingested into ClickHouse is delivered through some sort of persistent queue, such as [Apache Kafka](https://kafka.apache.org). In this case it is possible to configure an additional set of subscribers that will read the same data stream while it is being written to ClickHouse and store it in cold storage somewhere. Most companies already have some default recommended cold storage, which could be an object store or a distributed filesystem like [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html). +通常被摄入到ClickHouse的数据是通过某种持久队列传递的,例如 [Apache Kafka](https://kafka.apache.org). 在这种情况下,可以配置一组额外的订阅服务器,这些订阅服务器将在写入ClickHouse时读取相同的数据流,并将其存储在冷存储中。 大多数公司已经有一些默认的推荐冷存储,可能是对象存储或分布式文件系统,如 [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html). -## Filesystem Snapshots {#filesystem-snapshots} +## 文件系统快照 {#filesystem-snapshots} -Some local filesystems provide snapshot functionality (for example, [ZFS](https://en.wikipedia.org/wiki/ZFS)), but they might not be the best choice for serving live queries. A possible solution is to create additional replicas with this kind of filesystem and exclude them from the [Distributed](table_engines/distributed.md) tables that are used for `SELECT` queries. Snapshots on such replicas will be out of reach of any queries that modify data. As a bonus, these replicas might have special hardware configurations with more disks attached per server, which would be cost-effective. +某些本地文件系统提供快照功能(例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)),但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本,并将它们从 [分布](../engines/table_engines/special/distributed.md) 用于以下目的的表 `SELECT` 查询。 任何修改数据的查询都无法访问此类副本上的快照。 作为奖励,这些副本可能具有特殊的硬件配置,每个服务器附加更多的磁盘,这将是经济高效的。 -## clickhouse-copier {#clickhouse-copier} +## ツ环板-ョツ嘉ッツ偲 {#clickhouse-copier} -[clickhouse-copier](utils/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters. +[ツ环板-ョツ嘉ッツ偲](utilities/clickhouse-copier.md) 是一个多功能工具,最初创建用于重新分片pb大小的表。 它还可用于备份和还原目的,因为它可以在ClickHouse表和集群之间可靠地复制数据。 -For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well. +对于较小的数据量,一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。 -## Manipulations with Parts {#manipulations-with-parts} +## 部件操作 {#manipulations-with-parts} -ClickHouse allows using the `ALTER TABLE ... FREEZE PARTITION ...` query to create a local copy of table partitions. This is implemented using hardlinks to the `/var/lib/clickhouse/shadow/` folder, so it usually does not consume extra disk space for old data. The created copies of files are not handled by ClickHouse server, so you can just leave them there: you will have a simple backup that doesn’t require any additional external system, but it will still be prone to hardware issues. For this reason, it’s better to remotely copy them to another location and then remove the local copies. Distributed filesystems and object stores are still a good options for this, but normal attached file servers with a large enough capacity might work as well (in this case the transfer will occur via the network filesystem or maybe [rsync](https://en.wikipedia.org/wiki/Rsync)). +ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是使用硬链接来实现 `/var/lib/clickhouse/shadow/` 文件夹中,所以它通常不会占用旧数据的额外磁盘空间。 创建的文件副本不由ClickHouse服务器处理,所以你可以把它们留在那里:你将有一个简单的备份,不需要任何额外的外部系统,但它仍然会容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统 [rsync](https://en.wikipedia.org/wiki/Rsync)). -For more information about queries related to partition manipulations, see the [ALTER documentation](../query_language/alter.md#alter_manipulations-with-partitions). +有关与分区操作相关的查询的详细信息,请参阅 [更改文档](../sql_reference/statements/alter.md#alter_manipulations-with-partitions). -A third-party tool is available to automate this approach: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). +第三方工具可用于自动化此方法: [ツ环板backupョツ嘉ッツ偲](https://github.com/AlexAkulov/clickhouse-backup). -[Original article](https://clickhouse.tech/docs/en/operations/backup/) +[原始文章](https://clickhouse.tech/docs/en/operations/backup/) diff --git a/docs/zh/operations/configuration_files.md b/docs/zh/operations/configuration_files.md index b0c3d22fdaf..6505cfb1fb9 100644 --- a/docs/zh/operations/configuration_files.md +++ b/docs/zh/operations/configuration_files.md @@ -1,24 +1,25 @@ -# Configuration Files {#configuration_files} -The main server config file is `config.xml`. It resides in the `/etc/clickhouse-server/` directory. +# 配置文件 {#configuration_files} -Individual settings can be overridden in the `*.xml` and `*.conf` files in the `conf.d` and `config.d` directories next to the config file. +主服务器配置文件是 `config.xml`. 它驻留在 `/etc/clickhouse-server/` 目录。 -The `replace` or `remove` attributes can be specified for the elements of these config files. +单个设置可以在复盖 `*.xml` 和 `*.conf` 在文件 `conf.d` 和 `config.d` 配置文件旁边的目录。 -If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children. +该 `replace` 或 `remove` 可以为这些配置文件的元素指定属性。 -If `replace` is specified, it replaces the entire element with the specified one. +如果两者都未指定,则递归组合元素的内容,替换重复子项的值。 -If `remove` is specified, it deletes the element. +如果 `replace` 如果指定,则将整个元素替换为指定的元素。 -The config can also define «substitutions». If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server_settings/settings.md#server_settings-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](#macros) server\_settings/settings.md)). +如果 `remove` 如果指定,则删除该元素。 -Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. +The config can also define «substitutions». If an element has the `incl` 属性时,从文件中的相应替换将被用作该值。 默认情况下,具有替换的文件的路径为 `/etc/metrika.xml`. 这可以在改变 [包括\_从](server_configuration_parameters/settings.md#server_configuration_parameters-include_from) 服务器配置中的元素。 替换值在指定 `/yandex/substitution_name` 这个文件中的元素。 如果在指定的替换 `incl` 不存在,则将其记录在日志中。 要防止ClickHouse记录丢失的替换,请指定 `optional="true"` 属性(例如,设置 [宏](#macros) server\_settings/settings.md))。 -The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the ‘users\_config’ element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`. +替换也可以从ZooKeeper执行。 为此,请指定属性 `from_zk = "/path/to/node"`. 元素值被替换为节点的内容 `/path/to/node` 在动物园管理员。 您还可以将整个XML子树放在ZooKeeper节点上,并将其完全插入到源元素中。 -In addition, `users_config` may have overrides in files from the `users_config.d` directory (for example, `users.d`) and substitutions. For example, you can have separate config file for each user like this: +该 `config.xml` 文件可以指定具有用户设置、配置文件和配额的单独配置。 这个配置的相对路径在 ‘users\_config’ 元素。 默认情况下,它是 `users.xml`. 如果 `users_config` 被省略,用户设置,配置文件和配额直接在指定 `config.xml`. + +此外, `users_config` 可以从文件中复盖 `users_config.d` 目录(例如, `users.d`)和替换。 例如,您可以为每个用户提供单独的配置文件,如下所示: ``` xml $ cat /etc/clickhouse-server/users.d/alice.xml @@ -36,8 +37,8 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ``` -For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file. +对于每个配置文件,服务器还会生成 `file-preprocessed.xml` 启动时的文件。 这些文件包含所有已完成的替换和复盖,并且它们旨在提供信息。 如果zookeeper替换在配置文件中使用,但ZooKeeper在服务器启动时不可用,则服务器将从预处理的文件中加载配置。 -The server tracks changes in config files, as well as files and ZooKeeper nodes that were used when performing substitutions and overrides, and reloads the settings for users and clusters on the fly. This means that you can modify the cluster, users, and their settings without restarting the server. +服务器跟踪配置文件中的更改,以及执行替换和复盖时使用的文件和ZooKeeper节点,并动态重新加载用户和集群的设置。 这意味着您可以在不重新启动服务器的情况下修改群集、用户及其设置。 -[Original article](https://clickhouse.tech/docs/en/operations/configuration_files/) +[原始文章](https://clickhouse.tech/docs/en/operations/configuration_files/) diff --git a/docs/zh/operations/index.md b/docs/zh/operations/index.md index 596ec065f40..4d31fce45af 100644 --- a/docs/zh/operations/index.md +++ b/docs/zh/operations/index.md @@ -1,3 +1,4 @@ -# Operations {#operations} -[Original article](https://clickhouse.tech/docs/en/operations/) +# 操作 {#operations} + +[原始文章](https://clickhouse.tech/docs/en/operations/) diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md index 97cb8329b2b..6683903f531 100644 --- a/docs/zh/operations/monitoring.md +++ b/docs/zh/operations/monitoring.md @@ -1,3 +1,4 @@ + # 监控 {#jian-kong} 可以监控到: @@ -28,9 +29,9 @@ ClickHouse 收集的指标项: - 服务用于计算的资源占用的各种指标。 - 关于查询处理的常见统计信息。 -可以在 [system.metrics](system_tables.md#system_tables-metrics) ,[system.events](system_tables.md#system_tables-events) 以及[system.asynchronous\_metrics](system_tables.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 +可以在 [系统。指标](system_tables.md#system_tables-metrics) ,[系统。活动](system_tables.md#system_tables-events) 以及[系统。asynchronous\_metrics](system_tables.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 -可以配置ClickHouse 往 [Graphite](https://github.com/graphite-project)导入指标。 参考 [Graphite section](server_settings/settings.md#server_settings-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 +可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server_configuration_parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping`。 如果服务器可用,它将以 `200 OK` 响应。 diff --git a/docs/zh/operations/optimizing_performance/index.md b/docs/zh/operations/optimizing_performance/index.md new file mode 100644 index 00000000000..786a7200b28 --- /dev/null +++ b/docs/zh/operations/optimizing_performance/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u4F18\u5316\u6027\u80FD" +toc_priority: 52 +--- + + diff --git a/docs/zh/operations/optimizing_performance/sampling_query_profiler.md b/docs/zh/operations/optimizing_performance/sampling_query_profiler.md new file mode 100644 index 00000000000..6f0eef0a1ed --- /dev/null +++ b/docs/zh/operations/optimizing_performance/sampling_query_profiler.md @@ -0,0 +1,64 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 54 +toc_title: "\u67E5\u8BE2\u5206\u6790" +--- + +# 采样查询探查器 {#sampling-query-profiler} + +ClickHouse运行允许分析查询执行的采样探查器。 使用探查器,您可以找到在查询执行期间使用最频繁的源代码例程。 您可以跟踪CPU时间和挂钟花费的时间,包括空闲时间。 + +使用概要分析器: + +- 设置 [trace\_log](../server_configuration_parameters/settings.md#server_configuration_parameters-trace_log) 服务器配置部分。 + + 本节配置 [trace\_log](../../operations/system_tables.md#system_tables-trace_log) 系统表包含探查器运行的结果。 它是默认配置的。 请记住,此表中的数据仅对正在运行的服务器有效。 服务器重新启动后,ClickHouse不会清理表,所有存储的虚拟内存地址都可能无效。 + +- 设置 [query\_profiler\_cpu\_time\_period\_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) 或 [query\_profiler\_real\_time\_period\_ns](../settings/settings.md#query_profiler_real_time_period_ns) 设置。 这两种设置可以同时使用。 + + 这些设置允许您配置探查器计时器。 由于这些是会话设置,您可以为整个服务器、单个用户或用户配置文件、交互式会话以及每个单个查询获取不同的采样频率。 + +默认采样频率为每秒一个采样,CPU和实时定时器都启用。 该频率允许收集有关ClickHouse集群的足够信息。 同时,使用此频率,profiler不会影响ClickHouse服务器的性能。 如果您需要分析每个单独的查询,请尝试使用更高的采样频率。 + +分析 `trace_log` 系统表: + +- 安装 `clickhouse-common-static-dbg` 包。 看 [从DEB软件包安装](../../getting_started/install.md#install-from-deb-packages). + +- 允许由内省功能 [allow\_introspection\_functions](../settings/settings.md#settings-allow_introspection_functions) 设置。 + + 出于安全原因,默认情况下禁用内省功能。 + +- 使用 `addressToLine`, `addressToSymbol` 和 `demangle` [内省功能](../../sql_reference/functions/introspection.md) 获取函数名称及其在ClickHouse代码中的位置。 要获取某些查询的配置文件,您需要从以下内容汇总数据 `trace_log` 桌子 您可以通过单个函数或整个堆栈跟踪聚合数据。 + +如果你需要想象 `trace_log` 信息,尝试 [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) 和 [测速镜](https://github.com/laplab/clickhouse-speedscope). + +## 示例 {#example} + +在这个例子中,我们: + +- 过滤 `trace_log` 数据由查询标识符和当前日期组成。 + +- 通过堆栈跟踪聚合。 + +- 使用内省功能,我们将得到一个报告: + + - 符号名称和相应的源代码函数。 + - 这些函数的源代码位置。 + + + +``` sql +SELECT + count(), + arrayStringConcat(arrayMap(x -> concat(demangle(addressToSymbol(x)), '\n ', addressToLine(x)), trace), '\n') AS sym +FROM system.trace_log +WHERE (query_id = 'ebca3574-ad0a-400a-9cbc-dca382f5998c') AND (event_date = today()) +GROUP BY trace +ORDER BY count() DESC +LIMIT 10 +``` + +``` text +{% include "operations/performance/sampling_query_profiler_example_result.txt" %} +``` diff --git a/docs/zh/operations/performance/sampling_query_profiler.md b/docs/zh/operations/performance/sampling_query_profiler.md deleted file mode 100644 index 25368fcd883..00000000000 --- a/docs/zh/operations/performance/sampling_query_profiler.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -en_copy: true ---- - -# Sampling Query Profiler {#sampling-query-profiler} - -ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time. - -To use profiler: - -- Setup the [trace\_log](../server_settings/settings.md#server_settings-trace_log) section of the server configuration. - - This section configures the [trace\_log](../system_tables.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse doesn’t clean up the table and all the stored virtual memory address may become invalid. - -- Setup the [query\_profiler\_cpu\_time\_period\_ns](../settings/settings.md#query_profiler_cpu_time_period_ns) or [query\_profiler\_real\_time\_period\_ns](../settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. - - These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query. - -The default sampling frequency is one sample per second and both CPU and real timers are enabled. This frequency allows collecting enough information about ClickHouse cluster. At the same time, working with this frequency, profiler doesn’t affect ClickHouse server’s performance. If you need to profile each individual query try to use higher sampling frequency. - -To analyze the `trace_log` system table: - -- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting_started/install.md#install-from-deb-packages). - -- Allow introspection functions by the [allow\_introspection\_functions](../settings/settings.md#settings-allow_introspection_functions) setting. - - For security reasons, introspection functions are disabled by default. - -- Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../query_language/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. - -If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). - -## Example {#example} - -In this example we: - -- Filtering `trace_log` data by a query identifier and the current date. - -- Aggregating by stack trace. - -- Using introspection functions, we will get a report of: - - - Names of symbols and corresponding source code functions. - - Source code locations of these functions. - - - -``` sql -SELECT - count(), - arrayStringConcat(arrayMap(x -> concat(demangle(addressToSymbol(x)), '\n ', addressToLine(x)), trace), '\n') AS sym -FROM system.trace_log -WHERE (query_id = 'ebca3574-ad0a-400a-9cbc-dca382f5998c') AND (event_date = today()) -GROUP BY trace -ORDER BY count() DESC -LIMIT 10 -``` - -``` text -{% include "operations/performance/sampling_query_profiler_example_result.txt" %} -``` diff --git a/docs/zh/operations/performance/sampling_query_profiler_example_result.txt b/docs/zh/operations/performance/sampling_query_profiler_example_result.txt index a5f6d71ca95..56c2fdf9c65 100644 --- a/docs/zh/operations/performance/sampling_query_profiler_example_result.txt +++ b/docs/zh/operations/performance/sampling_query_profiler_example_result.txt @@ -1,7 +1,3 @@ ---- -en_copy: true ---- - Row 1: ────── count(): 6344 diff --git a/docs/zh/operations/performance_test.md b/docs/zh/operations/performance_test.md index ae4c5752703..f567a9528a0 100644 --- a/docs/zh/operations/performance_test.md +++ b/docs/zh/operations/performance_test.md @@ -1,18 +1,21 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 54 +toc_title: "\u6D4B\u8BD5\u786C\u4EF6" --- -# How To Test Your Hardware With ClickHouse {#how-to-test-your-hardware-with-clickhouse} +# 如何使用ClickHouse测试您的硬件 {#how-to-test-your-hardware-with-clickhouse} -With this instruction you can run basic ClickHouse performance test on any server without installation of ClickHouse packages. +使用此指令,您可以在任何服务器上运行基本的ClickHouse性能测试,而无需安装ClickHouse软件包。 -1. Go to “commits” page: https://github.com/ClickHouse/ClickHouse/commits/master +1. 转到 “commits” 页数:https://github.com/ClickHouse/ClickHouse/commits/master -2. Click on the first green check mark or red cross with green “ClickHouse Build Check” and click on the “Details” link near “ClickHouse Build Check”. +2. 点击第一个绿色复选标记或红色十字与绿色 “ClickHouse Build Check” 然后点击 “Details” 附近链接 “ClickHouse Build Check”. 在一些提交中没有这样的链接,例如与文档的提交。 在这种情况下,请选择具有此链接的最近提交。 -3. Copy the link to “clickhouse” binary for amd64 or aarch64. +3. 将链接复制到 “clickhouse” 二进制为amd64或aarch64. -4. ssh to the server and download it with wget: +4. ssh到服务器并使用wget下载它: @@ -23,7 +26,7 @@ With this instruction you can run basic ClickHouse performance test on any serve # Then do: chmod a+x clickhouse -1. Download configs: +1. 下载配置: @@ -33,7 +36,7 @@ With this instruction you can run basic ClickHouse performance test on any serve wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml -1. Download benchmark files: +1. 下载基准测试文件: @@ -41,7 +44,7 @@ With this instruction you can run basic ClickHouse performance test on any serve chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql -1. Download test data according to the [Yandex.Metrica dataset](../getting_started/example_datasets/metrica.md) instruction (“hits” table containing 100 million rows). +1. 根据下载测试数据 [Yandex梅里卡数据集](../getting_started/example_datasets/metrica.md) 说明 (“hits” 表包含100万行)。 @@ -49,31 +52,31 @@ With this instruction you can run basic ClickHouse performance test on any serve tar xvf hits_100m_obfuscated_v1.tar.xz -C . mv hits_100m_obfuscated_v1/* . -1. Run the server: +1. 运行服务器: ./clickhouse server -1. Check the data: ssh to the server in another terminal +1. 检查数据:ssh到另一个终端中的服务器 ./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated" 100000000 -1. Edit the benchmark-new.sh, change “clickhouse-client” to “./clickhouse client” and add “–max\_memory\_usage 100000000000” parameter. +1. 编辑benchmark-new.sh,改变 “clickhouse-client” 到 “./clickhouse client” 并添加 “–max\_memory\_usage 100000000000” 参数。 mcedit benchmark-new.sh -1. Run the benchmark: +1. 运行基准测试: ./benchmark-new.sh hits_100m_obfuscated -1. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com +1. 将有关硬件配置的编号和信息发送到clickhouse-feedback@yandex-team.com -All the results are published here: https://clickhouse.tech/benchmark\_hardware.html +所有结果都在这里公布:https://clickhouse.技术/benchmark\_hardware.html diff --git a/docs/zh/operations/quotas.md b/docs/zh/operations/quotas.md index 06f25f57016..3838da00e56 100644 --- a/docs/zh/operations/quotas.md +++ b/docs/zh/operations/quotas.md @@ -1,16 +1,17 @@ -# Quotas {#quotas} -Quotas allow you to limit resource usage over a period of time, or simply track the use of resources. -Quotas are set up in the user config. This is usually ‘users.xml’. +# 配额 {#quotas} + +配额允许您在一段时间内限制资源使用情况,或者只是跟踪资源的使用。 +配额在用户配置中设置。 这通常是 ‘users.xml’. The system also has a feature for limiting the complexity of a single query. See the section «Restrictions on query complexity»). -In contrast to query complexity restrictions, quotas: +与查询复杂性限制相比,配额: -- Place restrictions on a set of queries that can be run over a period of time, instead of limiting a single query. -- Account for resources spent on all remote servers for distributed query processing. +- 对可以在一段时间内运行的一组查询设置限制,而不是限制单个查询。 +- 占用在所有远程服务器上用于分布式查询处理的资源。 -Let’s look at the section of the ‘users.xml’ file that defines quotas. +让我们来看看的部分 ‘users.xml’ 定义配额的文件。 ``` xml @@ -32,8 +33,8 @@ Let’s look at the section of the ‘users.xml’ file that defines quotas. ``` -By default, the quota just tracks resource consumption for each hour, without limiting usage. -The resource consumption calculated for each interval is output to the server log after each request. +默认情况下,配额只跟踪每小时的资源消耗,而不限制使用情况。 +每次请求后,计算出的每个时间间隔的资源消耗将输出到服务器日志中。 ``` xml @@ -61,11 +62,11 @@ The resource consumption calculated for each interval is output to the server lo ``` -For the ‘statbox’ quota, restrictions are set for every hour and for every 24 hours (86,400 seconds). The time interval is counted starting from an implementation-defined fixed moment in time. In other words, the 24-hour interval doesn’t necessarily begin at midnight. +为 ‘statbox’ 配额,限制设置为每小时和每24小时(86,400秒)。 时间间隔从实现定义的固定时刻开始计数。 换句话说,24小时间隔不一定从午夜开始。 -When the interval ends, all collected values are cleared. For the next hour, the quota calculation starts over. +间隔结束时,将清除所有收集的值。 在下一个小时内,配额计算将重新开始。 -Here are the amounts that can be restricted: +以下是可以限制的金额: `queries` – The total number of requests. @@ -77,7 +78,7 @@ Here are the amounts that can be restricted: `execution_time` – The total query execution time, in seconds (wall time). -If the limit is exceeded for at least one time interval, an exception is thrown with a text about which restriction was exceeded, for which interval, and when the new interval begins (when queries can be sent again). +如果在至少一个时间间隔内超出限制,则会引发异常,其中包含有关超出了哪个限制、哪个时间间隔以及新时间间隔开始时(何时可以再次发送查询)的文本。 Quotas can use the «quota key» feature in order to report on resources for multiple keys independently. Here is an example of this: @@ -96,10 +97,10 @@ Quotas can use the «quota key» feature in order to report on resources for mul ``` -The quota is assigned to users in the ‘users’ section of the config. See the section «Access rights». +配额分配给用户 ‘users’ section of the config. See the section «Access rights». For distributed query processing, the accumulated amounts are stored on the requestor server. So if the user goes to another server, the quota there will «start over». -When the server is restarted, quotas are reset. +服务器重新启动时,将重置配额。 -[Original article](https://clickhouse.tech/docs/en/operations/quotas/) +[原始文章](https://clickhouse.tech/docs/en/operations/quotas/) diff --git a/docs/zh/operations/requirements.md b/docs/zh/operations/requirements.md index 9dd5553a241..d48de98f85c 100644 --- a/docs/zh/operations/requirements.md +++ b/docs/zh/operations/requirements.md @@ -1,58 +1,61 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 44 +toc_title: "\u8981\u6C42" --- -# Requirements {#requirements} +# 要求 {#requirements} ## CPU {#cpu} -For installation from prebuilt deb packages, use a CPU with x86\_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources. +对于从预构建的deb包进行安装,请使用具有x86\_64架构并支持SSE4.2指令的CPU。 要使用不支持SSE4.2或具有AArch64或PowerPC64LE体系结构的处理器运行ClickHouse,您应该从源代码构建ClickHouse。 -ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz. +ClickHouse实现并行数据处理并使用所有可用的硬件资源。 在选择处理器时,考虑到ClickHouse在具有大量内核但时钟速率较低的配置中的工作效率要高于具有较少内核和较高时钟速率的配置。 例如,具有2600MHz的16核心优于具有3600MHz的8核心。 -Use of **Turbo Boost** and **hyper-threading** technologies is recommended. It significantly improves performance with a typical load. +建议使用 **涡轮增压** 和 **超线程** 技术。 它显着提高了典型工作负载的性能。 ## RAM {#ram} -We recommend to use a minimum of 4GB of RAM in order to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries. +我们建议使用至少4GB的RAM来执行非平凡的查询。 ClickHouse服务器可以使用少得多的RAM运行,但它需要处理查询的内存。 -The required volume of RAM depends on: +RAM所需的体积取决于: -- The complexity of queries. -- The amount of data that is processed in queries. +- 查询的复杂性。 +- 在查询中处理的数据量。 -To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) and other operations you use. +要计算所需的RAM体积,您应该估计临时数据的大小 [GROUP BY](../sql_reference/statements/select.md#select-group-by-clause), [DISTINCT](../sql_reference/statements/select.md#select-distinct), [JOIN](../sql_reference/statements/select.md#select-join) 和您使用的其他操作。 -ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](../query_language/select.md#select-group-by-in-external-memory) for details. +ClickHouse可以使用外部存储器来存储临时数据。 看 [在外部存储器中分组](../sql_reference/statements/select.md#select-group-by-in-external-memory) 有关详细信息。 -## Swap File {#swap-file} +## 交换文件 {#swap-file} -Disable the swap file for production environments. +禁用生产环境的交换文件。 -## Storage Subsystem {#storage-subsystem} +## 存储子系统 {#storage-subsystem} -You need to have 2GB of free disk space to install ClickHouse. +您需要有2GB的可用磁盘空间来安装ClickHouse。 -The volume of storage required for your data should be calculated separately. Assessment should include: +数据所需的存储量应单独计算。 评估应包括: -- Estimation of the data volume. +- 估计数据量。 - You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. + 您可以采取数据的样本并从中获取行的平均大小。 然后将该值乘以计划存储的行数。 -- The data compression coefficient. +- 的数据压缩系数。 - To estimate the data compression coefficient, load a sample of your data into ClickHouse and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. + 要估计数据压缩系数,请将数据的样本加载到ClickHouse中,并将数据的实际大小与存储的表的大小进行比较。 例如,点击流数据通常被压缩6-10次。 -To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas. +要计算要存储的最终数据量,请将压缩系数应用于估计的数据量。 如果计划将数据存储在多个副本中,则将估计的卷乘以副本数。 -## Network {#network} +## 网络 {#network} -If possible, use networks of 10G or higher class. +如果可能的话,使用10G或更高级别的网络。 -The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. In addition, network speed affects replication processes. +网络带宽对于处理具有大量中间数据的分布式查询至关重要。 此外,网络速度会影响复制过程。 -## Software {#software} +## 软件 {#software} -ClickHouse is developed for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. +ClickHouse主要是为Linux系列操作系统开发的。 推荐的Linux发行版是Ubuntu。 该 `tzdata` 软件包应安装在系统中。 -ClickHouse can also work in other operating system families. See details in the [Getting started](../getting_started/index.md) section of the documentation. +ClickHouse也可以在其他操作系统系列中工作。 查看详细信息 [开始](../getting_started/index.md) 文档的部分。 diff --git a/docs/zh/operations/server_configuration_parameters/index.md b/docs/zh/operations/server_configuration_parameters/index.md new file mode 100644 index 00000000000..cf3f158b37c --- /dev/null +++ b/docs/zh/operations/server_configuration_parameters/index.md @@ -0,0 +1,12 @@ + +# 服务器配置参数 {#server-settings} + +本节包含无法在会话或查询级别更改的服务器设置的说明。 + +这些设置存储在 `config.xml` ClickHouse服务器上的文件。 + +Other settings are described in the «[设置](../settings/index.md#settings)» section. + +在研究设置之前,请阅读 [配置文件](../configuration_files.md#configuration_files) 部分和注意使用替换(的 `incl` 和 `optional` 属性)。 + +[原始文章](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/) diff --git a/docs/zh/operations/server_configuration_parameters/settings.md b/docs/zh/operations/server_configuration_parameters/settings.md new file mode 100644 index 00000000000..b78f8173741 --- /dev/null +++ b/docs/zh/operations/server_configuration_parameters/settings.md @@ -0,0 +1,872 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 57 +toc_title: "\u670D\u52A1\u5668\u8BBE\u7F6E" +--- + +# 服务器设置 {#server-settings} + +## builtin\_dictionaries\_reload\_interval {#builtin-dictionaries-reload-interval} + +重新加载内置字典之前的时间间隔(以秒为单位)。 + +ClickHouse每x秒重新加载内置字典。 这使得编辑字典成为可能 “on the fly” 无需重新启动服务器。 + +默认值:3600. + +**示例** + +``` xml +3600 +``` + +## 压缩 {#server-settings-compression} + +数据压缩设置 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md)-发动机表。 + +!!! warning "警告" + 如果您刚开始使用ClickHouse,请不要使用它。 + +配置模板: + +``` xml + + + ... + ... + ... + + ... + +``` + +`` 字段: + +- `min_part_size` – The minimum size of a data part. +- `min_part_size_ratio` – The ratio of the data part size to the table size. +- `method` – Compression method. Acceptable values: `lz4` 或 `zstd`. + +您可以配置多个 `` 部分。 + +满足条件时的操作: + +- 如果数据部分与条件集匹配,ClickHouse将使用指定的压缩方法。 +- 如果数据部分匹配多个条件集,ClickHouse将使用第一个匹配的条件集。 + +如果没有满足数据部分的条件,ClickHouse使用 `lz4` 压缩。 + +**示例** + +``` xml + + + 10000000000 + 0.01 + zstd + + +``` + +## default\_database {#default-database} + +默认数据库。 + +要获取数据库列表,请使用 [SHOW DATABASES](../../sql_reference/statements/show.md#show-databases) 查询。 + +**示例** + +``` xml +default +``` + +## default\_profile {#default-profile} + +默认设置配置文件。 + +设置配置文件位于参数中指定的文件中 `user_config`. + +**示例** + +``` xml +default +``` + +## dictionaries\_config {#server_configuration_parameters-dictionaries_config} + +外部字典的配置文件的路径。 + +路径: + +- 指定相对于服务器配置文件的绝对路径或路径。 +- 路径可以包含通配符\*和?. + +另请参阅 “[外部字典](../../sql_reference/dictionaries/external_dictionaries/external_dicts.md)”. + +**示例** + +``` xml +*_dictionary.xml +``` + +## dictionaries\_lazy\_load {#server_configuration_parameters-dictionaries_lazy_load} + +延迟加载字典。 + +如果 `true`,然后在第一次使用时创建每个字典。 如果字典创建失败,则使用该字典的函数将引发异常。 + +如果 `false`,服务器启动时创建所有字典,如果出现错误,服务器将关闭。 + +默认值为 `true`. + +**示例** + +``` xml +true +``` + +## format\_schema\_path {#server_configuration_parameters-format_schema_path} + +包含输入数据方案的目录路径,例如输入数据的方案 [CapnProto](../../interfaces/formats.md#capnproto) 格式。 + +**示例** + +``` xml + + format_schemas/ +``` + +## 石墨 {#server_configuration_parameters-graphite} + +将数据发送到 [石墨](https://github.com/graphite-project). + +设置: + +- host – The Graphite server. +- port – The port on the Graphite server. +- interval – The interval for sending, in seconds. +- timeout – The timeout for sending data, in seconds. +- root\_path – Prefix for keys. +- metrics – Sending data from the [系统。指标](../../operations/system_tables.md#system_tables-metrics) 桌子 +- events – Sending deltas data accumulated for the time period from the [系统。活动](../../operations/system_tables.md#system_tables-events) 桌子 +- events\_cumulative – Sending cumulative data from the [系统。活动](../../operations/system_tables.md#system_tables-events) 桌子 +- asynchronous\_metrics – Sending data from the [系统。asynchronous\_metrics](../../operations/system_tables.md#system_tables-asynchronous_metrics) 桌子 + +您可以配置多个 `` 条款 例如,您可以使用它以不同的时间间隔发送不同的数据。 + +**示例** + +``` xml + + localhost + 42000 + 0.1 + 60 + one_min + true + true + false + true + +``` + +## graphite\_rollup {#server_configuration_parameters-graphite-rollup} + +石墨细化数据的设置。 + +有关详细信息,请参阅 [GraphiteMergeTree](../../engines/table_engines/mergetree_family/graphitemergetree.md). + +**示例** + +``` xml + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + +``` + +## http\_port/https\_port {#http-porthttps-port} + +通过HTTP连接到服务器的端口。 + +如果 `https_port` 被指定, [openSSL](#server_configuration_parameters-openssl) 必须配置。 + +如果 `http_port` 指定时,即使设置了OpenSSL配置,也会忽略该配置。 + +**示例** + +``` xml +0000 +``` + +## http\_server\_default\_response {#server_configuration_parameters-http_server_default_response} + +访问ClickHouse HTTP(s)服务器时默认显示的页面。 +默认值为 “Ok.” (最后有换行符) + +**示例** + +打开 `https://tabix.io/` 访问时 `http://localhost: http_port`. + +``` xml + +
]]> +
+``` + +## 包括\_从 {#server_configuration_parameters-include_from} + +带替换的文件的路径。 + +有关详细信息,请参阅部分 “[配置文件](../configuration_files.md#configuration_files)”. + +**示例** + +``` xml +/etc/metrica.xml +``` + +## interserver\_http\_port {#interserver-http-port} + +用于在ClickHouse服务器之间交换数据的端口。 + +**示例** + +``` xml +9009 +``` + +## interserver\_http\_host {#interserver-http-host} + +其他服务器可用于访问此服务器的主机名。 + +如果省略,它以相同的方式作为定义 `hostname-f` 指挥部 + +用于脱离特定的网络接口。 + +**示例** + +``` xml +example.yandex.ru +``` + +## interserver\_http\_credentials {#server-settings-interserver-http-credentials} + +用户名和密码用于在以下期间进行身份验证 [复制](../../engines/table_engines/mergetree_family/replication.md) 与复制\*引擎。 这些凭据仅用于副本之间的通信,与ClickHouse客户端的凭据无关。 服务器正在检查这些凭据以连接副本,并在连接到其他副本时使用相同的凭据。 因此,这些凭据应该为集群中的所有副本设置相同。 +默认情况下,不使用身份验证。 + +本节包含以下参数: + +- `user` — username. +- `password` — password. + +**示例** + +``` xml + + admin + 222 + +``` + +## keep\_alive\_timeout {#keep-alive-timeout} + +ClickHouse在关闭连接之前等待传入请求的秒数。 默认为3秒。 + +**示例** + +``` xml +3 +``` + +## listen\_host {#server_configuration_parameters-listen_host} + +对请求可能来自的主机的限制。 如果您希望服务器回答所有这些问题,请指定 `::`. + +例: + +``` xml +::1 +127.0.0.1 +``` + +## 记录器 {#server_configuration_parameters-logger} + +日志记录设置。 + +键: + +- level – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. +- log – The log file. Contains all the entries according to `level`. +- errorlog – Error log file. +- size – Size of the file. Applies to `log`和`errorlog`. 一旦文件到达 `size`,ClickHouse存档并重命名它,并在其位置创建一个新的日志文件。 +- count – The number of archived log files that ClickHouse stores. + +**示例** + +``` xml + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + +``` + +还支持写入系统日志。 配置示例: + +``` xml + + 1 + +
syslog.remote:10514
+ myhost.local + LOG_LOCAL6 + syslog +
+
+``` + +键: + +- use\_syslog — Required setting if you want to write to the syslog. +- address — The host\[:port\] of syslogd. If omitted, the local daemon is used. +- hostname — Optional. The name of the host that logs are sent from. +- facility — [系统日志工具关键字](https://en.wikipedia.org/wiki/Syslog#Facility) 在大写字母与 “LOG\_” 前缀: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`,等等)。 + 默认值: `LOG_USER` 如果 `address` 被指定, `LOG_DAEMON otherwise.` +- format – Message format. Possible values: `bsd` 和 `syslog.` + +## 宏 {#macros} + +复制表的参数替换。 + +如果不使用复制的表,则可以省略。 + +有关详细信息,请参阅部分 “[创建复制的表](../../engines/table_engines/mergetree_family/replication.md)”. + +**示例** + +``` xml + +``` + +## mark\_cache\_size {#server-mark-cache-size} + +表引擎使用的标记缓存的近似大小(以字节为单位) [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md) 家人 + +缓存为服务器共享,并根据需要分配内存。 缓存大小必须至少为5368709120。 + +**示例** + +``` xml +5368709120 +``` + +## max\_concurrent\_queries {#max-concurrent-queries} + +同时处理的请求的最大数量。 + +**示例** + +``` xml +100 +``` + +## max\_connections {#max-connections} + +入站连接的最大数量。 + +**示例** + +``` xml +4096 +``` + +## max\_open\_files {#max-open-files} + +打开文件的最大数量。 + +默认情况下: `maximum`. + +我们建议在Mac OS X中使用此选项,因为 `getrlimit()` 函数返回一个不正确的值。 + +**示例** + +``` xml +262144 +``` + +## max\_table\_size\_to\_drop {#max-table-size-to-drop} + +限制删除表。 + +如果一个大小 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md) 表超过 `max_table_size_to_drop` (以字节为单位),您无法使用删除查询将其删除。 + +如果仍然需要在不重新启动ClickHouse服务器的情况下删除表,请创建 `/flags/force_drop_table` 文件并运行DROP查询。 + +默认值:50GB。 + +值0表示您可以删除所有表而不受任何限制。 + +**示例** + +``` xml +0 +``` + +## merge\_tree {#server_configuration_parameters-merge_tree} + +微调中的表 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md). + +有关详细信息,请参阅MergeTreeSettings。h头文件。 + +**示例** + +``` xml + + 5 + +``` + +## openSSL {#server_configuration_parameters-openssl} + +SSL客户端/服务器配置。 + +对SSL的支持由 `libpoco` 图书馆. 该接口在文件中描述 [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h) + +服务器/客户端设置的密钥: + +- privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. +- certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` 包含证书。 +- caConfig – The path to the file or directory that contains trusted root certificates. +- verificationMode – The method for checking the node's certificates. Details are in the description of the [A.背景](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) 同学们 可能的值: `none`, `relaxed`, `strict`, `once`. +- verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. +- loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \| +- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. +- cacheSessions – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. 可接受的值: `true`, `false`. +- sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. 始终建议使用此参数,因为如果服务器缓存会话,以及客户端请求缓存,它有助于避免出现问题。 默认值: `${application.name}`. +- sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. +- sessionTimeout – Time for caching the session on the server. +- extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`. +- requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`. +- requireTLSv1\_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. +- requireTLSv1 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. +- fips – Activates OpenSSL FIPS mode. Supported if the library's OpenSSL version supports FIPS. +- privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. +- invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . +- disableProtocols – Protocols that are not allowed to use. +- preferServerCiphers – Preferred server ciphers on the client. + +**设置示例:** + +``` xml + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + /etc/clickhouse-server/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + +``` + +## part\_log {#server_configuration_parameters-part-log} + +记录与之关联的事件 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md). 例如,添加或合并数据。 您可以使用日志来模拟合并算法并比较它们的特征。 您可以可视化合并过程。 + +查询记录在 [系统。part\_log](../../operations/system_tables.md#system_tables-part-log) 表,而不是在一个单独的文件。 您可以在以下命令中配置此表的名称 `table` 参数(见下文)。 + +使用以下参数配置日志记录: + +- `database` – Name of the database. +- `table` – Name of the system table. +- `partition_by` – Sets a [自定义分区键](../../engines/table_engines/mergetree_family/custom_partitioning_key.md). +- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. + +**示例** + +``` xml + + system + part_log
+ toMonday(event_date) + 7500 +
+``` + +## 路径 {#server_configuration_parameters-path} + +包含数据的目录的路径。 + +!!! note "注" + 尾部斜杠是强制性的。 + +**示例** + +``` xml +/var/lib/clickhouse/ +``` + +## query\_log {#server_configuration_parameters-query-log} + +用于记录接收到的查询的设置 [log\_queries=1](../settings/settings.md) 设置。 + +查询记录在 [系统。query\_log](../../operations/system_tables.md#system_tables-query_log) 表,而不是在一个单独的文件。 您可以更改表的名称 `table` 参数(见下文)。 + +使用以下参数配置日志记录: + +- `database` – Name of the database. +- `table` – Name of the system table the queries will be logged in. +- `partition_by` – Sets a [自定义分区键](../../engines/table_engines/mergetree_family/custom_partitioning_key.md) 为了一张桌子 +- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. + +如果该表不存在,ClickHouse将创建它。 如果在ClickHouse服务器更新时查询日志的结构发生了更改,则会重命名具有旧结构的表,并自动创建新表。 + +**示例** + +``` xml + + system + query_log
+ toMonday(event_date) + 7500 +
+``` + +## query\_thread\_log {#server_configuration_parameters-query-thread-log} + +设置用于记录接收到的查询的线程 [log\_query\_threads=1](../settings/settings.md#settings-log-query-threads) 设置。 + +查询记录在 [系统。query\_thread\_log](../../operations/system_tables.md#system_tables-query-thread-log) 表,而不是在一个单独的文件。 您可以更改表的名称 `table` 参数(见下文)。 + +使用以下参数配置日志记录: + +- `database` – Name of the database. +- `table` – Name of the system table the queries will be logged in. +- `partition_by` – Sets a [自定义分区键](../../engines/table_engines/mergetree_family/custom_partitioning_key.md) 对于一个系统表。 +- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. + +如果该表不存在,ClickHouse将创建它。 如果更新ClickHouse服务器时查询线程日志的结构发生了更改,则会重命名具有旧结构的表,并自动创建新表。 + +**示例** + +``` xml + + system + query_thread_log
+ toMonday(event_date) + 7500 +
+``` + +## trace\_log {#server_configuration_parameters-trace_log} + +设置为 [trace\_log](../../operations/system_tables.md#system_tables-trace_log) 系统表操作。 + +参数: + +- `database` — Database for storing a table. +- `table` — Table name. +- `partition_by` — [自定义分区键](../../engines/table_engines/mergetree_family/custom_partitioning_key.md) 对于一个系统表。 +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. + +默认服务器配置文件 `config.xml` 包含以下设置部分: + +``` xml + + system + trace_log
+ toYYYYMM(event_date) + 7500 +
+``` + +## query\_masking\_rules {#query-masking-rules} + +基于正则表达式的规则,在将查询以及所有日志消息存储在服务器日志中之前,这些规则将应用于查询以及所有日志消息, +`system.query_log`, `system.text_log`, `system.processes` 表,并在日志中发送给客户端。 这允许防止 +从SQL查询敏感数据泄漏(如姓名,电子邮件,个人 +标识符或信用卡号码)记录。 + +**示例** + +``` xml + + + hide SSN + (^|\D)\d{3}-\d{2}-\d{4}($|\D) + 000-00-0000 + + +``` + +配置字段: +- `name` -规则的名称(可选) +- `regexp` -RE2兼容正则表达式(强制性) +- `replace` -敏感数据的替换字符串(可选,默认情况下-六个星号) + +屏蔽规则应用于整个查询(以防止敏感数据从格式错误/不可解析的查询泄漏)。 + +`system.events` 表有计数器 `QueryMaskingRulesMatch` 其中具有匹配的查询屏蔽规则的总数。 + +对于分布式查询,每个服务器必须单独配置,否则,子查询传递给其他 +节点将被存储而不屏蔽。 + +## remote\_servers {#server-settings-remote-servers} + +所使用的集群的配置 [分布](../../engines/table_engines/special/distributed.md) 表引擎和由 `cluster` 表功能。 + +**示例** + +``` xml + +``` + +对于该值 `incl` 属性,请参阅部分 “[配置文件](../configuration_files.md#configuration_files)”. + +**另请参阅** + +- [skip\_unavailable\_shards](../settings/settings.md#settings-skip_unavailable_shards) + +## 时区 {#server_configuration_parameters-timezone} + +服务器的时区。 + +指定为UTC时区或地理位置(例如,非洲/阿比让)的IANA标识符。 + +当DateTime字段输出为文本格式(打印在屏幕上或文件中)时,以及从字符串获取DateTime时,时区对于字符串和DateTime格式之间的转换是必需的。 此外,如果在输入参数中没有收到时区,则时区用于处理时间和日期的函数。 + +**示例** + +``` xml +Europe/Moscow +``` + +## tcp\_port {#server_configuration_parameters-tcp_port} + +通过TCP协议与客户端通信的端口。 + +**示例** + +``` xml +9000 +``` + +## tcp\_port\_secure {#server_configuration_parameters-tcp_port-secure} + +TCP端口,用于与客户端进行安全通信。 使用它与 [OpenSSL](#server_configuration_parameters-openssl) 设置。 + +**可能的值** + +整数。 + +**默认值** + +``` xml +9440 +``` + +## mysql\_port {#server_configuration_parameters-mysql_port} + +通过MySQL协议与客户端通信的端口。 + +**可能的值** + +整数。 + +示例 + +``` xml +9004 +``` + +## tmp\_path {#server-settings-tmp_path} + +用于处理大型查询的临时数据的路径。 + +!!! note "注" + 尾部斜杠是强制性的。 + +**示例** + +``` xml +/var/lib/clickhouse/tmp/ +``` + +## tmp\_policy {#server-settings-tmp-policy} + +从政策 [`storage_configuration`](../../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-multiple-volumes) 存储临时文件。 +如果没有设置 [`tmp_path`](#server-settings-tmp_path) 被使用,否则被忽略。 + +!!! note "注" + - `move_factor` 被忽略 +- `keep_free_space_bytes` 被忽略 +- `max_data_part_size_bytes` 被忽略 +-您必须在该政策中只有一个卷 + +## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size} + +表引擎使用的未压缩数据的缓存大小(以字节为单位) [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md). + +服务器有一个共享缓存。 内存按需分配。 如果选项使用缓存 [use\_uncompressed\_cache](../settings/settings.md#setting-use_uncompressed_cache) 被启用。 + +在个别情况下,未压缩的缓存对于非常短的查询是有利的。 + +**示例** + +``` xml +8589934592 +``` + +## user\_files\_path {#server_configuration_parameters-user_files_path} + +包含用户文件的目录。 在表函数中使用 [文件()](../../sql_reference/table_functions/file.md). + +**示例** + +``` xml +/var/lib/clickhouse/user_files/ +``` + +## users\_config {#users-config} + +包含文件的路径: + +- 用户配置。 +- 访问权限。 +- 设置配置文件。 +- 配额设置。 + +**示例** + +``` xml +users.xml +``` + +## 动物园管理员 {#server-settings_zookeeper} + +包含允许ClickHouse与 [动物园管理员](http://zookeeper.apache.org/) 集群。 + +ClickHouse使用ZooKeeper在使用复制表时存储副本的元数据。 如果未使用复制的表,则可以省略此部分参数。 + +本节包含以下参数: + +- `node` — ZooKeeper endpoint. You can set multiple endpoints. + + 例如: + + + +``` xml + + example_host + 2181 + +``` + + The `index` attribute specifies the node order when trying to connect to the ZooKeeper cluster. + +- `session_timeout` — Maximum timeout for the client session in milliseconds. +- `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) 隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄.陇.貌路.隆拢脳枚脢虏.麓脢for脱 可选。 +- `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional. + +**配置示例** + +``` xml + + + example1 + 2181 + + + example2 + 2181 + + 30000 + 10000 + + /path/to/zookeeper/node + + user:password + +``` + +**另请参阅** + +- [复制](../../engines/table_engines/mergetree_family/replication.md) +- [动物园管理员程序员指南](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) + +## use\_minimalistic\_part\_header\_in\_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper} + +ZooKeeper中数据部分头的存储方法。 + +此设置仅适用于 `MergeTree` 家人 它可以指定: + +- 在全球范围内 [merge\_tree](#server_configuration_parameters-merge_tree) 一节 `config.xml` 文件 + + ClickHouse使用服务器上所有表的设置。 您可以随时更改设置。 当设置更改时,现有表会更改其行为。 + +- 对于每个表。 + + 创建表时,指定相应的 [发动机设置](../../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-creating-a-table). 即使全局设置更改,具有此设置的现有表的行为也不会更改。 + +**可能的值** + +- 0 — Functionality is turned off. +- 1 — Functionality is turned on. + +如果 `use_minimalistic_part_header_in_zookeeper = 1`,然后 [复制](../../engines/table_engines/mergetree_family/replication.md) 表存储的数据部分的头紧凑使用一个单一的 `znode`. 如果表包含许多列,则此存储方法显着减少了Zookeeper中存储的数据量。 + +!!! attention "注意" + 申请后 `use_minimalistic_part_header_in_zookeeper = 1`,您不能将ClickHouse服务器降级到不支持此设置的版本。 在集群中的服务器上升级ClickHouse时要小心。 不要一次升级所有服务器。 在测试环境中或在集群的几台服务器上测试ClickHouse的新版本更安全。 + + Data part headers already stored with this setting can't be restored to their previous (non-compact) representation. + +**默认值:** 0. + +## disable\_internal\_dns\_cache {#server-settings-disable-internal-dns-cache} + +禁用内部DNS缓存。 推荐用于在系统中运行ClickHouse +随着频繁变化的基础设施,如Kubernetes。 + +**默认值:** 0. + +## dns\_cache\_update\_period {#server-settings-dns-cache-update-period} + +更新存储在ClickHouse内部DNS缓存中的IP地址的周期(以秒为单位)。 +更新是在一个单独的系统线程中异步执行的。 + +**默认值**: 15. + +[原始文章](https://clickhouse.tech/docs/en/operations/server_configuration_parameters/settings/) diff --git a/docs/zh/operations/server_settings/index.md b/docs/zh/operations/server_settings/index.md deleted file mode 100644 index 4a1276a2ce1..00000000000 --- a/docs/zh/operations/server_settings/index.md +++ /dev/null @@ -1,11 +0,0 @@ -# Server configuration parameters {#server-settings} - -This section contains descriptions of server settings that cannot be changed at the session or query level. - -These settings are stored in the `config.xml` file on the ClickHouse server. - -Other settings are described in the «[Settings](../settings/index.md#settings)» section. - -Before studying the settings, read the [Configuration files](../configuration_files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes). - -[Original article](https://clickhouse.tech/docs/en/operations/server_settings/) diff --git a/docs/zh/operations/server_settings/settings.md b/docs/zh/operations/server_settings/settings.md deleted file mode 100644 index bfc1aca7217..00000000000 --- a/docs/zh/operations/server_settings/settings.md +++ /dev/null @@ -1,869 +0,0 @@ ---- -en_copy: true ---- - -# Server Settings {#server-settings} - -## builtin\_dictionaries\_reload\_interval {#builtin-dictionaries-reload-interval} - -The interval in seconds before reloading built-in dictionaries. - -ClickHouse reloads built-in dictionaries every x seconds. This makes it possible to edit dictionaries “on the fly” without restarting the server. - -Default value: 3600. - -**Example** - -``` xml -3600 -``` - -## compression {#server-settings-compression} - -Data compression settings for [MergeTree](../table_engines/mergetree.md)-engine tables. - -!!! warning "Warning" - Don’t use it if you have just started using ClickHouse. - -Configuration template: - -``` xml - - - ... - ... - ... - - ... - -``` - -`` fields: - -- `min_part_size` – The minimum size of a data part. -- `min_part_size_ratio` – The ratio of the data part size to the table size. -- `method` – Compression method. Acceptable values: `lz4` or `zstd`. - -You can configure multiple `` sections. - -Actions when conditions are met: - -- If a data part matches a condition set, ClickHouse uses the specified compression method. -- If a data part matches multiple condition sets, ClickHouse uses the first matched condition set. - -If no conditions met for a data part, ClickHouse uses the `lz4` compression. - -**Example** - -``` xml - - - 10000000000 - 0.01 - zstd - - -``` - -## default\_database {#default-database} - -The default database. - -To get a list of databases, use the [SHOW DATABASES](../../query_language/show.md#show-databases) query. - -**Example** - -``` xml -default -``` - -## default\_profile {#default-profile} - -Default settings profile. - -Settings profiles are located in the file specified in the parameter `user_config`. - -**Example** - -``` xml -default -``` - -## dictionaries\_config {#server_settings-dictionaries_config} - -The path to the config file for external dictionaries. - -Path: - -- Specify the absolute path or the path relative to the server config file. -- The path can contain wildcards \* and ?. - -See also “[External dictionaries](../../query_language/dicts/external_dicts.md)”. - -**Example** - -``` xml -*_dictionary.xml -``` - -## dictionaries\_lazy\_load {#server_settings-dictionaries_lazy_load} - -Lazy loading of dictionaries. - -If `true`, then each dictionary is created on first use. If dictionary creation failed, the function that was using the dictionary throws an exception. - -If `false`, all dictionaries are created when the server starts, and if there is an error, the server shuts down. - -The default is `true`. - -**Example** - -``` xml -true -``` - -## format\_schema\_path {#server_settings-format_schema_path} - -The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format. - -**Example** - -``` xml - - format_schemas/ -``` - -## graphite {#server_settings-graphite} - -Sending data to [Graphite](https://github.com/graphite-project). - -Settings: - -- host – The Graphite server. -- port – The port on the Graphite server. -- interval – The interval for sending, in seconds. -- timeout – The timeout for sending data, in seconds. -- root\_path – Prefix for keys. -- metrics – Sending data from the [system.metrics](../system_tables.md#system_tables-metrics) table. -- events – Sending deltas data accumulated for the time period from the [system.events](../system_tables.md#system_tables-events) table. -- events\_cumulative – Sending cumulative data from the [system.events](../system_tables.md#system_tables-events) table. -- asynchronous\_metrics – Sending data from the [system.asynchronous\_metrics](../system_tables.md#system_tables-asynchronous_metrics) table. - -You can configure multiple `` clauses. For instance, you can use this for sending different data at different intervals. - -**Example** - -``` xml - - localhost - 42000 - 0.1 - 60 - one_min - true - true - false - true - -``` - -## graphite\_rollup {#server_settings-graphite-rollup} - -Settings for thinning data for Graphite. - -For more details, see [GraphiteMergeTree](../table_engines/graphitemergetree.md). - -**Example** - -``` xml - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - -``` - -## http\_port/https\_port {#http-porthttps-port} - -The port for connecting to the server over HTTP(s). - -If `https_port` is specified, [openSSL](#server_settings-openssl) must be configured. - -If `http_port` is specified, the OpenSSL configuration is ignored even if it is set. - -**Example** - -``` xml -0000 -``` - -## http\_server\_default\_response {#server_settings-http_server_default_response} - -The page that is shown by default when you access the ClickHouse HTTP(s) server. -The default value is “Ok.” (with a line feed at the end) - -**Example** - -Opens `https://tabix.io/` when accessing `http://localhost: http_port`. - -``` xml - -
]]> -
-``` - -## include\_from {#server_settings-include_from} - -The path to the file with substitutions. - -For more information, see the section “[Configuration files](../configuration_files.md#configuration_files)”. - -**Example** - -``` xml -/etc/metrica.xml -``` - -## interserver\_http\_port {#interserver-http-port} - -Port for exchanging data between ClickHouse servers. - -**Example** - -``` xml -9009 -``` - -## interserver\_http\_host {#interserver-http-host} - -The hostname that can be used by other servers to access this server. - -If omitted, it is defined in the same way as the `hostname-f` command. - -Useful for breaking away from a specific network interface. - -**Example** - -``` xml -example.yandex.ru -``` - -## interserver\_http\_credentials {#server-settings-interserver-http-credentials} - -The username and password used to authenticate during [replication](../table_engines/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster. -By default, the authentication is not used. - -This section contains the following parameters: - -- `user` — username. -- `password` — password. - -**Example** - -``` xml - - admin - 222 - -``` - -## keep\_alive\_timeout {#keep-alive-timeout} - -The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 3 seconds. - -**Example** - -``` xml -3 -``` - -## listen\_host {#server_settings-listen_host} - -Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`. - -Examples: - -``` xml -::1 -127.0.0.1 -``` - -## logger {#server_settings-logger} - -Logging settings. - -Keys: - -- level – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. -- log – The log file. Contains all the entries according to `level`. -- errorlog – Error log file. -- size – Size of the file. Applies to `log`and`errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. -- count – The number of archived log files that ClickHouse stores. - -**Example** - -``` xml - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - -``` - -Writing to the syslog is also supported. Config example: - -``` xml - - 1 - -
syslog.remote:10514
- myhost.local - LOG_LOCAL6 - syslog -
-
-``` - -Keys: - -- use\_syslog — Required setting if you want to write to the syslog. -- address — The host\[:port\] of syslogd. If omitted, the local daemon is used. -- hostname — Optional. The name of the host that logs are sent from. -- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the “LOG\_” prefix: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`, and so on). - Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON otherwise.` -- format – Message format. Possible values: `bsd` and `syslog.` - -## macros {#macros} - -Parameter substitutions for replicated tables. - -Can be omitted if replicated tables are not used. - -For more information, see the section “[Creating replicated tables](../../operations/table_engines/replication.md)”. - -**Example** - -``` xml - -``` - -## mark\_cache\_size {#server-mark-cache-size} - -Approximate size (in bytes) of the cache of marks used by table engines of the [MergeTree](../table_engines/mergetree.md) family. - -The cache is shared for the server and memory is allocated as needed. The cache size must be at least 5368709120. - -**Example** - -``` xml -5368709120 -``` - -## max\_concurrent\_queries {#max-concurrent-queries} - -The maximum number of simultaneously processed requests. - -**Example** - -``` xml -100 -``` - -## max\_connections {#max-connections} - -The maximum number of inbound connections. - -**Example** - -``` xml -4096 -``` - -## max\_open\_files {#max-open-files} - -The maximum number of open files. - -By default: `maximum`. - -We recommend using this option in Mac OS X since the `getrlimit()` function returns an incorrect value. - -**Example** - -``` xml -262144 -``` - -## max\_table\_size\_to\_drop {#max-table-size-to-drop} - -Restriction on deleting tables. - -If the size of a [MergeTree](../table_engines/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you can’t delete it using a DROP query. - -If you still need to delete the table without restarting the ClickHouse server, create the `/flags/force_drop_table` file and run the DROP query. - -Default value: 50 GB. - -The value 0 means that you can delete all tables without any restrictions. - -**Example** - -``` xml -0 -``` - -## merge\_tree {#server_settings-merge_tree} - -Fine tuning for tables in the [MergeTree](../table_engines/mergetree.md). - -For more information, see the MergeTreeSettings.h header file. - -**Example** - -``` xml - - 5 - -``` - -## openSSL {#server_settings-openssl} - -SSL client/server configuration. - -Support for SSL is provided by the `libpoco` library. The interface is described in the file [SSLManager.h](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h) - -Keys for server/client settings: - -- privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. -- certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate. -- caConfig – The path to the file or directory that contains trusted root certificates. -- verificationMode – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. -- verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. -- loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \| -- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. -- cacheSessions – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. -- sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. -- sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. -- sessionTimeout – Time for caching the session on the server. -- extendedVerification – Automatically extended verification of certificates after the session ends. Acceptable values: `true`, `false`. -- requireTLSv1 – Require a TLSv1 connection. Acceptable values: `true`, `false`. -- requireTLSv1\_1 – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. -- requireTLSv1 – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. -- fips – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS. -- privateKeyPassphraseHandler – Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. -- invalidCertificateHandler – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . -- disableProtocols – Protocols that are not allowed to use. -- preferServerCiphers – Preferred server ciphers on the client. - -**Example of settings:** - -``` xml - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - -``` - -## part\_log {#server_settings-part-log} - -Logging events that are associated with [MergeTree](../table_engines/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process. - -Queries are logged in the [system.part\_log](../system_tables.md#system_tables-part-log) table, not in a separate file. You can configure the name of this table in the `table` parameter (see below). - -Use the following parameters to configure logging: - -- `database` – Name of the database. -- `table` – Name of the system table. -- `partition_by` – Sets a [custom partitioning key](../../operations/table_engines/custom_partitioning_key.md). -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. - -**Example** - -``` xml - - system - part_log
- toMonday(event_date) - 7500 -
-``` - -## path {#server_settings-path} - -The path to the directory containing data. - -!!! note "Note" - The trailing slash is mandatory. - -**Example** - -``` xml -/var/lib/clickhouse/ -``` - -## query\_log {#server_settings-query-log} - -Setting for logging queries received with the [log\_queries=1](../settings/settings.md) setting. - -Queries are logged in the [system.query\_log](../system_tables.md#system_tables-query_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). - -Use the following parameters to configure logging: - -- `database` – Name of the database. -- `table` – Name of the system table the queries will be logged in. -- `partition_by` – Sets a [custom partitioning key](../../operations/table_engines/custom_partitioning_key.md) for a table. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. - -If the table doesn’t exist, ClickHouse will create it. If the structure of the query log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. - -**Example** - -``` xml - - system - query_log
- toMonday(event_date) - 7500 -
-``` - -## query\_thread\_log {#server_settings-query-thread-log} - -Setting for logging threads of queries received with the [log\_query\_threads=1](../settings/settings.md#settings-log-query-threads) setting. - -Queries are logged in the [system.query\_thread\_log](../system_tables.md#system_tables-query-thread-log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). - -Use the following parameters to configure logging: - -- `database` – Name of the database. -- `table` – Name of the system table the queries will be logged in. -- `partition_by` – Sets a [custom partitioning key](../../operations/table_engines/custom_partitioning_key.md) for a system table. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. - -If the table doesn’t exist, ClickHouse will create it. If the structure of the query thread log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. - -**Example** - -``` xml - - system - query_thread_log
- toMonday(event_date) - 7500 -
-``` - -## trace\_log {#server_settings-trace_log} - -Settings for the [trace\_log](../system_tables.md#system_tables-trace_log) system table operation. - -Parameters: - -- `database` — Database for storing a table. -- `table` — Table name. -- `partition_by` — [Custom partitioning key](../../operations/table_engines/custom_partitioning_key.md) for a system table. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. - -The default server configuration file `config.xml` contains the following settings section: - -``` xml - - system - trace_log
- toYYYYMM(event_date) - 7500 -
-``` - -## query\_masking\_rules {#query-masking-rules} - -Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs, -`system.query_log`, `system.text_log`, `system.processes` table, and in logs sent to the client. That allows preventing -sensitive data leakage from SQL queries (like names, emails, personal -identifiers or credit card numbers) to logs. - -**Example** - -``` xml - - - hide SSN - (^|\D)\d{3}-\d{2}-\d{4}($|\D) - 000-00-0000 - - -``` - -Config fields: -- `name` - name for the rule (optional) -- `regexp` - RE2 compatible regular expression (mandatory) -- `replace` - substitution string for sensitive data (optional, by default - six asterisks) - -The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parsable queries). - -`system.events` table have counter `QueryMaskingRulesMatch` which have an overall number of query masking rules matches. - -For distributed queries each server have to be configured separately, otherwise, subqueries passed to other -nodes will be stored without masking. - -## remote\_servers {#server-settings-remote-servers} - -Configuration of clusters used by the [Distributed](../../operations/table_engines/distributed.md) table engine and by the `cluster` table function. - -**Example** - -``` xml - -``` - -For the value of the `incl` attribute, see the section “[Configuration files](../configuration_files.md#configuration_files)”. - -**See Also** - -- [skip\_unavailable\_shards](../settings/settings.md#settings-skip_unavailable_shards) - -## timezone {#server_settings-timezone} - -The server’s time zone. - -Specified as an IANA identifier for the UTC timezone or geographic location (for example, Africa/Abidjan). - -The time zone is necessary for conversions between String and DateTime formats when DateTime fields are output to text format (printed on the screen or in a file), and when getting DateTime from a string. Besides, the time zone is used in functions that work with the time and date if they didn’t receive the time zone in the input parameters. - -**Example** - -``` xml -Europe/Moscow -``` - -## tcp\_port {#server_settings-tcp_port} - -Port for communicating with clients over the TCP protocol. - -**Example** - -``` xml -9000 -``` - -## tcp\_port\_secure {#server_settings-tcp_port-secure} - -TCP port for secure communication with clients. Use it with [OpenSSL](#server_settings-openssl) settings. - -**Possible values** - -Positive integer. - -**Default value** - -``` xml -9440 -``` - -## mysql\_port {#server_settings-mysql_port} - -Port for communicating with clients over MySQL protocol. - -**Possible values** - -Positive integer. - -Example - -``` xml -9004 -``` - -## tmp\_path {#server-settings-tmp_path} - -Path to temporary data for processing large queries. - -!!! note "Note" - The trailing slash is mandatory. - -**Example** - -``` xml -/var/lib/clickhouse/tmp/ -``` - -## tmp\_policy {#server-settings-tmp-policy} - -Policy from [`storage_configuration`](../table_engines/mergetree.md#table_engine-mergetree-multiple-volumes) to store temporary files. -If not set [`tmp_path`](#server-settings-tmp_path) is used, otherwise it is ignored. - -!!! note "Note" - - `move_factor` is ignored -- `keep_free_space_bytes` is ignored -- `max_data_part_size_bytes` is ignored -- you must have exactly one volume in that policy - -## uncompressed\_cache\_size {#server-settings-uncompressed_cache_size} - -Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../table_engines/mergetree.md). - -There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use\_uncompressed\_cache](../settings/settings.md#setting-use_uncompressed_cache) is enabled. - -The uncompressed cache is advantageous for very short queries in individual cases. - -**Example** - -``` xml -8589934592 -``` - -## user\_files\_path {#server_settings-user_files_path} - -The directory with user files. Used in the table function [file()](../../query_language/table_functions/file.md). - -**Example** - -``` xml -/var/lib/clickhouse/user_files/ -``` - -## users\_config {#users-config} - -Path to the file that contains: - -- User configurations. -- Access rights. -- Settings profiles. -- Quota settings. - -**Example** - -``` xml -users.xml -``` - -## zookeeper {#server-settings_zookeeper} - -Contains settings that allow ClickHouse to interact with a [ZooKeeper](http://zookeeper.apache.org/) cluster. - -ClickHouse uses ZooKeeper for storing metadata of replicas when using replicated tables. If replicated tables are not used, this section of parameters can be omitted. - -This section contains the following parameters: - -- `node` — ZooKeeper endpoint. You can set multiple endpoints. - - For example: - - - -``` xml - - example_host - 2181 - -``` - - The `index` attribute specifies the node order when trying to connect to the ZooKeeper cluster. - -- `session_timeout` — Maximum timeout for the client session in milliseconds. -- `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) that is used as the root for znodes used by the ClickHouse server. Optional. -- `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional. - -**Example configuration** - -``` xml - - - example1 - 2181 - - - example2 - 2181 - - 30000 - 10000 - - /path/to/zookeeper/node - - user:password - -``` - -**See Also** - -- [Replication](../../operations/table_engines/replication.md) -- [ZooKeeper Programmer’s Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) - -## use\_minimalistic\_part\_header\_in\_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper} - -Storage method for data part headers in ZooKeeper. - -This setting only applies to the `MergeTree` family. It can be specified: - -- Globally in the [merge\_tree](#server_settings-merge_tree) section of the `config.xml` file. - - ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behaviour when the setting changes. - -- For each table. - - When creating a table, specify the corresponding [engine setting](../table_engines/mergetree.md#table_engine-mergetree-creating-a-table). The behaviour of an existing table with this setting does not change, even if the global setting changes. - -**Possible values** - -- 0 — Functionality is turned off. -- 1 — Functionality is turned on. - -If `use_minimalistic_part_header_in_zookeeper = 1`, then [replicated](../table_engines/replication.md) tables store the headers of the data parts compactly using a single `znode`. If the table contains many columns, this storage method significantly reduces the volume of the data stored in Zookeeper. - -!!! attention "Attention" - After applying `use_minimalistic_part_header_in_zookeeper = 1`, you can’t downgrade the ClickHouse server to a version that doesn’t support this setting. Be careful when upgrading ClickHouse on servers in a cluster. Don’t upgrade all the servers at once. It is safer to test new versions of ClickHouse in a test environment, or on just a few servers of a cluster. - - Data part headers already stored with this setting can't be restored to their previous (non-compact) representation. - -**Default value:** 0. - -## disable\_internal\_dns\_cache {#server-settings-disable-internal-dns-cache} - -Disables the internal DNS cache. Recommended for operating ClickHouse in systems -with frequently changing infrastructure such as Kubernetes. - -**Default value:** 0. - -## dns\_cache\_update\_period {#server-settings-dns-cache-update-period} - -The period of updating IP addresses stored in the ClickHouse internal DNS cache (in seconds). -The update is performed asynchronously, in a separate system thread. - -**Default value**: 15. - -[Original article](https://clickhouse.tech/docs/en/operations/server_settings/settings/) diff --git a/docs/zh/operations/settings/constraints_on_settings.md b/docs/zh/operations/settings/constraints_on_settings.md index b0037813199..c9e572dd907 100644 --- a/docs/zh/operations/settings/constraints_on_settings.md +++ b/docs/zh/operations/settings/constraints_on_settings.md @@ -1,11 +1,14 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 62 +toc_title: "\u5BF9\u8BBE\u7F6E\u7684\u9650\u5236" --- -# Constraints on Settings {#constraints-on-settings} +# 对设置的限制 {#constraints-on-settings} -The constraints on settings can be defined in the `profiles` section of the `user.xml` configuration file and prohibit users from changing some of the settings with the `SET` query. -The constraints are defined as the following: +在设置的约束可以在定义 `profiles` 一节 `user.xml` 配置文件,并禁止用户更改一些设置与 `SET` 查询。 +约束定义如下: ``` xml @@ -29,10 +32,10 @@ The constraints are defined as the following: ``` -If the user tries to violate the constraints an exception is thrown and the setting isn’t changed. -There are supported three types of constraints: `min`, `max`, `readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` constraint specifies that the user cannot change the corresponding setting at all. +如果用户试图违反约束,将引发异常,并且设置不会更改。 +支持三种类型的约束: `min`, `max`, `readonly`. 该 `min` 和 `max` 约束指定数值设置的上边界和下边界,并且可以组合使用。 该 `readonly` constraint指定用户根本无法更改相应的设置。 -**Example:** Let `users.xml` includes lines: +**示例:** 让 `users.xml` 包括行: ``` xml @@ -53,7 +56,7 @@ There are supported three types of constraints: `min`, `max`, `readonly`. The `m ``` -The following queries all throw exceptions: +以下查询都会引发异常: ``` sql SET max_memory_usage=20000000001; @@ -67,6 +70,6 @@ Code: 452, e.displayText() = DB::Exception: Setting max_memory_usage should not Code: 452, e.displayText() = DB::Exception: Setting force_index_by_date should not be changed. ``` -**Note:** the `default` profile has special handling: all the constraints defined for the `default` profile become the default constraints, so they restrict all the users until they’re overridden explicitly for these users. +**注:** 该 `default` 配置文件具有特殊的处理:所有定义的约束 `default` 配置文件成为默认约束,因此它们限制所有用户,直到为这些用户显式复盖它们。 -[Original article](https://clickhouse.tech/docs/en/operations/settings/constraints_on_settings/) +[原始文章](https://clickhouse.tech/docs/en/operations/settings/constraints_on_settings/) diff --git a/docs/zh/operations/settings/index.md b/docs/zh/operations/settings/index.md index 6d3d96dfbf3..9c598d241d1 100644 --- a/docs/zh/operations/settings/index.md +++ b/docs/zh/operations/settings/index.md @@ -1,25 +1,28 @@ -# Settings {#settings} +--- +toc_folder_title: "\u8bbe\u7f6e" +--- +# 设置 {#settings} -There are multiple ways to make all the settings described below. -Settings are configured in layers, so each subsequent layer redefines the previous settings. +有多种方法可以进行以下所述的所有设置。 +设置是在图层中配置的,因此每个后续图层都会重新定义以前的设置。 -Ways to configure settings, in order of priority: +按优先级顺序配置设置的方法: -- Settings in the `users.xml` server configuration file. +- 在设置 `users.xml` 服务器配置文件。 Set in the element ``. -- Session settings. +- 会话设置。 Send ` SET setting=value` from the ClickHouse console client in interactive mode. - Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` HTTP parameter. + 同样,您可以在HTTP协议中使用ClickHouse会话。 要做到这一点,你需要指定 `session_id` HTTP参数。 -- Query settings. +- 查询设置。 - - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. - - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). + - 在非交互模式下启动ClickHouse控制台客户端时,设置startup参数 `--setting=value`. + - 使用HTTP API时,请传递CGI参数 (`URL?setting_1=value&setting_2=value...`). -Settings that can only be made in the server config file are not covered in this section. +本节不介绍只能在服务器配置文件中进行的设置。 -[Original article](https://clickhouse.tech/docs/en/operations/settings/) +[原始文章](https://clickhouse.tech/docs/en/operations/settings/) diff --git a/docs/zh/operations/settings/permissions_for_queries.md b/docs/zh/operations/settings/permissions_for_queries.md index 60942e1926b..a7d2e843b66 100644 --- a/docs/zh/operations/settings/permissions_for_queries.md +++ b/docs/zh/operations/settings/permissions_for_queries.md @@ -1,58 +1,61 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 58 +toc_title: "\u67E5\u8BE2\u6743\u9650" --- -# Permissions for Queries {#permissions_for_queries} +# 查询权限 {#permissions_for_queries} -Queries in ClickHouse can be divided into several types: +ClickHouse中的查询可以分为几种类型: -1. Read data queries: `SELECT`, `SHOW`, `DESCRIBE`, `EXISTS`. -2. Write data queries: `INSERT`, `OPTIMIZE`. -3. Change settings query: `SET`, `USE`. -4. [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries: `CREATE`, `ALTER`, `RENAME`, `ATTACH`, `DETACH`, `DROP` `TRUNCATE`. +1. 读取数据查询: `SELECT`, `SHOW`, `DESCRIBE`, `EXISTS`. +2. 写入数据查询: `INSERT`, `OPTIMIZE`. +3. 更改设置查询: `SET`, `USE`. +4. [DDL](https://en.wikipedia.org/wiki/Data_definition_language) 查询: `CREATE`, `ALTER`, `RENAME`, `ATTACH`, `DETACH`, `DROP` `TRUNCATE`. 5. `KILL QUERY`. -The following settings regulate user permissions by the type of query: +以下设置按查询类型规范用户权限: -- [readonly](#settings_readonly) — Restricts permissions for all types of queries except DDL queries. +- [只读](#settings_readonly) — Restricts permissions for all types of queries except DDL queries. - [allow\_ddl](#settings_allow_ddl) — Restricts permissions for DDL queries. -`KILL QUERY` can be performed with any settings. +`KILL QUERY` 可以与任何设置进行。 -## readonly {#settings_readonly} +## 只读 {#settings_readonly} -Restricts permissions for reading data, write data and change settings queries. +限制读取数据、写入数据和更改设置查询的权限。 -See how the queries are divided into types [above](#permissions_for_queries). +查看查询如何划分为多种类型 [以上](#permissions_for_queries). -Possible values: +可能的值: - 0 — All queries are allowed. - 1 — Only read data queries are allowed. - 2 — Read data and change settings queries are allowed. -After setting `readonly = 1`, the user can’t change `readonly` and `allow_ddl` settings in the current session. +设置后 `readonly = 1`,用户无法更改 `readonly` 和 `allow_ddl` 当前会话中的设置。 -When using the `GET` method in the [HTTP interface](../../interfaces/http.md), `readonly = 1` is set automatically. To modify data, use the `POST` method. +使用时 `GET` 方法中的 [HTTP接口](../../interfaces/http.md), `readonly = 1` 自动设置。 要修改数据,请使用 `POST` 方法。 -Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user -from changing only specific settings, for details see [constraints on settings](constraints_on_settings.md). +设置 `readonly = 1` 禁止用户更改所有设置。 有一种方法可以禁止用户 +从只更改特定设置,有关详细信息,请参阅 [对设置的限制](constraints_on_settings.md). -Default value: 0 +默认值:0 ## allow\_ddl {#settings_allow_ddl} -Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries. +允许或拒绝 [DDL](https://en.wikipedia.org/wiki/Data_definition_language) 查询。 -See how the queries are divided into types [above](#permissions_for_queries). +查看查询如何划分为多种类型 [以上](#permissions_for_queries). -Possible values: +可能的值: - 0 — DDL queries are not allowed. - 1 — DDL queries are allowed. -You can’t execute `SET allow_ddl = 1` if `allow_ddl = 0` for the current session. +你不能执行 `SET allow_ddl = 1` 如果 `allow_ddl = 0` 对于当前会话。 -Default value: 1 +默认值:1 -[Original article](https://clickhouse.tech/docs/en/operations/settings/permissions_for_queries/) +[原始文章](https://clickhouse.tech/docs/en/operations/settings/permissions_for_queries/) diff --git a/docs/zh/operations/settings/query_complexity.md b/docs/zh/operations/settings/query_complexity.md index ccb8acd3da5..b17f5b7aa70 100644 --- a/docs/zh/operations/settings/query_complexity.md +++ b/docs/zh/operations/settings/query_complexity.md @@ -1,193 +1,194 @@ -# Restrictions on query complexity {#restrictions-on-query-complexity} -Restrictions on query complexity are part of the settings. -They are used in order to provide safer execution from the user interface. -Almost all the restrictions only apply to SELECTs.For distributed query processing, restrictions are applied on each server separately. +# 查询复杂性的限制 {#restrictions-on-query-complexity} + +对查询复杂性的限制是设置的一部分。 +它们被用来从用户界面提供更安全的执行。 +几乎所有的限制只适用于选择。对于分布式查询处理,每个服务器上分别应用限制。 Restrictions on the «maximum amount of something» can take the value 0, which means «unrestricted». -Most restrictions also have an ‘overflow\_mode’ setting, meaning what to do when the limit is exceeded. -It can take one of two values: `throw` or `break`. Restrictions on aggregation (group\_by\_overflow\_mode) also have the value `any`. +大多数限制也有一个 ‘overflow\_mode’ 设置,这意味着超过限制时该怎么做。 +它可以采用以下两个值之一: `throw` 或 `break`. 对聚合的限制(group\_by\_overflow\_mode)也具有以下值 `any`. `throw` – Throw an exception (default). `break` – Stop executing the query and return the partial result, as if the source data ran out. -`any (only for group_by_overflow_mode)` – Continuing aggregation for the keys that got into the set, but don’t add new keys to the set. +`any (only for group_by_overflow_mode)` – Continuing aggregation for the keys that got into the set, but don't add new keys to the set. -## readonly {#query-complexity-readonly} +## 只读 {#query-complexity-readonly} -With a value of 0, you can execute any queries. -With a value of 1, you can only execute read requests (such as SELECT and SHOW). Requests for writing and changing settings (INSERT, SET) are prohibited. -With a value of 2, you can process read queries (SELECT, SHOW) and change settings (SET). +值为0时,可以执行任何查询。 +如果值为1,则只能执行读取请求(如SELECT和SHOW)。 禁止写入和更改设置(插入,设置)的请求。 +值为2时,可以处理读取查询(选择、显示)和更改设置(设置)。 -After enabling readonly mode, you can’t disable it in the current session. +启用只读模式后,您无法在当前会话中禁用它。 -When using the GET method in the HTTP interface, ‘readonly = 1’ is set automatically. In other words, for queries that modify data, you can only use the POST method. You can send the query itself either in the POST body, or in the URL parameter. +在HTTP接口中使用GET方法时, ‘readonly = 1’ 自动设置。 换句话说,对于修改数据的查询,您只能使用POST方法。 您可以在POST正文或URL参数中发送查询本身。 ## max\_memory\_usage {#settings_max_memory_usage} -The maximum amount of RAM to use for running a query on a single server. +用于在单个服务器上运行查询的最大RAM量。 -In the default configuration file, the maximum is 10 GB. +在默认配置文件中,最大值为10GB。 -The setting doesn’t consider the volume of available memory or the total volume of memory on the machine. -The restriction applies to a single query within a single server. -You can use `SHOW PROCESSLIST` to see the current memory consumption for each query. -In addition, the peak memory consumption is tracked for each query and written to the log. +该设置不考虑计算机上的可用内存量或内存总量。 +该限制适用于单个服务器中的单个查询。 +您可以使用 `SHOW PROCESSLIST` 查看每个查询的当前内存消耗。 +此外,还会跟踪每个查询的内存消耗峰值并将其写入日志。 -Memory usage is not monitored for the states of certain aggregate functions. +不监视某些聚合函数的状态的内存使用情况。 -Memory usage is not fully tracked for states of the aggregate functions `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` from `String` and `Array` arguments. +未完全跟踪聚合函数的状态的内存使用情况 `min`, `max`, `any`, `anyLast`, `argMin`, `argMax` 从 `String` 和 `Array` 争论。 -Memory consumption is also restricted by the parameters `max_memory_usage_for_user` and `max_memory_usage_for_all_queries`. +内存消耗也受到参数的限制 `max_memory_usage_for_user` 和 `max_memory_usage_for_all_queries`. ## max\_memory\_usage\_for\_user {#max-memory-usage-for-user} -The maximum amount of RAM to use for running a user’s queries on a single server. +用于在单个服务器上运行用户查询的最大RAM量。 -Default values are defined in [Settings.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Settings.h#L244). By default, the amount is not restricted (`max_memory_usage_for_user = 0`). +默认值定义在 [设置。h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Settings.h#L244). 默认情况下,金额不受限制 (`max_memory_usage_for_user = 0`). -See also the description of [max\_memory\_usage](#settings_max_memory_usage). +另请参阅说明 [max\_memory\_usage](#settings_max_memory_usage). ## max\_memory\_usage\_for\_all\_queries {#max-memory-usage-for-all-queries} -The maximum amount of RAM to use for running all queries on a single server. +用于在单个服务器上运行所有查询的最大RAM数量。 -Default values are defined in [Settings.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Settings.h#L245). By default, the amount is not restricted (`max_memory_usage_for_all_queries = 0`). +默认值定义在 [设置。h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Settings.h#L245). 默认情况下,金额不受限制 (`max_memory_usage_for_all_queries = 0`). -See also the description of [max\_memory\_usage](#settings_max_memory_usage). +另请参阅说明 [max\_memory\_usage](#settings_max_memory_usage). ## max\_rows\_to\_read {#max-rows-to-read} -The following restrictions can be checked on each block (instead of on each row). That is, the restrictions can be broken a little. -When running a query in multiple threads, the following restrictions apply to each thread separately. +可以在每个块(而不是每行)上检查以下限制。 也就是说,限制可以打破一点。 +在多个线程中运行查询时,以下限制单独应用于每个线程。 -Maximum number of rows that can be read from a table when running a query. +运行查询时可从表中读取的最大行数。 ## max\_bytes\_to\_read {#max-bytes-to-read} -Maximum number of bytes (uncompressed data) that can be read from a table when running a query. +运行查询时可以从表中读取的最大字节数(未压缩数据)。 ## read\_overflow\_mode {#read-overflow-mode} -What to do when the volume of data read exceeds one of the limits: ‘throw’ or ‘break’. By default, throw. +读取的数据量超过其中一个限制时该怎么办: ‘throw’ 或 ‘break’. 默认情况下,扔。 ## max\_rows\_to\_group\_by {#max-rows-to-group-by} -Maximum number of unique keys received from aggregation. This setting lets you limit memory consumption when aggregating. +从聚合接收的唯一密钥的最大数量。 此设置允许您在聚合时限制内存消耗。 ## group\_by\_overflow\_mode {#group-by-overflow-mode} -What to do when the number of unique keys for aggregation exceeds the limit: ‘throw’, ‘break’, or ‘any’. By default, throw. -Using the ‘any’ value lets you run an approximation of GROUP BY. The quality of this approximation depends on the statistical nature of the data. +当聚合的唯一键数超过限制时该怎么办: ‘throw’, ‘break’,或 ‘any’. 默认情况下,扔。 +使用 ‘any’ 值允许您运行GROUP BY的近似值。 这种近似值的质量取决于数据的统计性质。 ## max\_rows\_to\_sort {#max-rows-to-sort} -Maximum number of rows before sorting. This allows you to limit memory consumption when sorting. +排序前的最大行数。 这允许您在排序时限制内存消耗。 ## max\_bytes\_to\_sort {#max-bytes-to-sort} -Maximum number of bytes before sorting. +排序前的最大字节数。 ## sort\_overflow\_mode {#sort-overflow-mode} -What to do if the number of rows received before sorting exceeds one of the limits: ‘throw’ or ‘break’. By default, throw. +如果排序前收到的行数超过其中一个限制,该怎么办: ‘throw’ 或 ‘break’. 默认情况下,扔。 ## max\_result\_rows {#max-result-rows} -Limit on the number of rows in the result. Also checked for subqueries, and on remote servers when running parts of a distributed query. +限制结果中的行数。 还检查子查询,并在运行分布式查询的部分时在远程服务器上。 ## max\_result\_bytes {#max-result-bytes} -Limit on the number of bytes in the result. The same as the previous setting. +限制结果中的字节数。 与之前的设置相同。 ## result\_overflow\_mode {#result-overflow-mode} -What to do if the volume of the result exceeds one of the limits: ‘throw’ or ‘break’. By default, throw. -Using ‘break’ is similar to using LIMIT. +如果结果的体积超过其中一个限制,该怎么办: ‘throw’ 或 ‘break’. 默认情况下,扔。 +使用 ‘break’ 类似于使用限制。 ## max\_execution\_time {#max-execution-time} -Maximum query execution time in seconds. -At this time, it is not checked for one of the sorting stages, or when merging and finalizing aggregate functions. +最大查询执行时间(以秒为单位)。 +此时,不会检查其中一个排序阶段,也不会在合并和最终确定聚合函数时进行检查。 ## timeout\_overflow\_mode {#timeout-overflow-mode} -What to do if the query is run longer than ‘max\_execution\_time’: ‘throw’ or ‘break’. By default, throw. +如果查询的运行时间长于 ‘max\_execution\_time’: ‘throw’ 或 ‘break’. 默认情况下,扔。 ## min\_execution\_speed {#min-execution-speed} -Minimal execution speed in rows per second. Checked on every data block when ‘timeout\_before\_checking\_execution\_speed’ expires. If the execution speed is lower, an exception is thrown. +以每秒行为单位的最小执行速度。 检查每个数据块时 ‘timeout\_before\_checking\_execution\_speed’ 到期。 如果执行速度较低,则会引发异常。 ## timeout\_before\_checking\_execution\_speed {#timeout-before-checking-execution-speed} -Checks that execution speed is not too slow (no less than ‘min\_execution\_speed’), after the specified time in seconds has expired. +检查执行速度是不是太慢(不低于 ‘min\_execution\_speed’),在指定的时间以秒为单位已过期之后。 ## max\_columns\_to\_read {#max-columns-to-read} -Maximum number of columns that can be read from a table in a single query. If a query requires reading a greater number of columns, it throws an exception. +单个查询中可从表中读取的最大列数。 如果查询需要读取更多列,则会引发异常。 ## max\_temporary\_columns {#max-temporary-columns} -Maximum number of temporary columns that must be kept in RAM at the same time when running a query, including constant columns. If there are more temporary columns than this, it throws an exception. +运行查询时必须同时保留在RAM中的最大临时列数,包括常量列。 如果有比这更多的临时列,它会引发异常。 ## max\_temporary\_non\_const\_columns {#max-temporary-non-const-columns} -The same thing as ‘max\_temporary\_columns’, but without counting constant columns. -Note that constant columns are formed fairly often when running a query, but they require approximately zero computing resources. +同样的事情 ‘max\_temporary\_columns’,但不计数常数列。 +请注意,常量列在运行查询时经常形成,但它们需要大约零计算资源。 ## max\_subquery\_depth {#max-subquery-depth} -Maximum nesting depth of subqueries. If subqueries are deeper, an exception is thrown. By default, 100. +子查询的最大嵌套深度。 如果子查询更深,则会引发异常。 默认情况下,100。 ## max\_pipeline\_depth {#max-pipeline-depth} -Maximum pipeline depth. Corresponds to the number of transformations that each data block goes through during query processing. Counted within the limits of a single server. If the pipeline depth is greater, an exception is thrown. By default, 1000. +最大管道深度。 对应于查询处理期间每个数据块经历的转换数。 在单个服务器的限制范围内计算。 如果管道深度较大,则会引发异常。 默认情况下,1000。 ## max\_ast\_depth {#max-ast-depth} -Maximum nesting depth of a query syntactic tree. If exceeded, an exception is thrown. -At this time, it isn’t checked during parsing, but only after parsing the query. That is, a syntactic tree that is too deep can be created during parsing, but the query will fail. By default, 1000. +查询语法树的最大嵌套深度。 如果超出,将引发异常。 +此时,在解析过程中不会对其进行检查,而是仅在解析查询之后进行检查。 也就是说,在分析过程中可以创建一个太深的语法树,但查询将失败。 默认情况下,1000。 ## max\_ast\_elements {#max-ast-elements} -Maximum number of elements in a query syntactic tree. If exceeded, an exception is thrown. -In the same way as the previous setting, it is checked only after parsing the query. By default, 50,000. +查询语法树中的最大元素数。 如果超出,将引发异常。 +与前面的设置相同,只有在解析查询后才会检查它。 默认情况下,50,000。 ## max\_rows\_in\_set {#max-rows-in-set} -Maximum number of rows for a data set in the IN clause created from a subquery. +从子查询创建的IN子句中数据集的最大行数。 ## max\_bytes\_in\_set {#max-bytes-in-set} -Maximum number of bytes (uncompressed data) used by a set in the IN clause created from a subquery. +从子查询创建的IN子句中的集合使用的最大字节数(未压缩数据)。 ## set\_overflow\_mode {#set-overflow-mode} -What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘break’. By default, throw. +当数据量超过其中一个限制时该怎么办: ‘throw’ 或 ‘break’. 默认情况下,扔。 ## max\_rows\_in\_distinct {#max-rows-in-distinct} -Maximum number of different rows when using DISTINCT. +使用DISTINCT时的最大不同行数。 ## max\_bytes\_in\_distinct {#max-bytes-in-distinct} -Maximum number of bytes used by a hash table when using DISTINCT. +使用DISTINCT时哈希表使用的最大字节数。 ## distinct\_overflow\_mode {#distinct-overflow-mode} -What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘break’. By default, throw. +当数据量超过其中一个限制时该怎么办: ‘throw’ 或 ‘break’. 默认情况下,扔。 ## max\_rows\_to\_transfer {#max-rows-to-transfer} -Maximum number of rows that can be passed to a remote server or saved in a temporary table when using GLOBAL IN. +使用GLOBAL IN时,可以传递到远程服务器或保存在临时表中的最大行数。 ## max\_bytes\_to\_transfer {#max-bytes-to-transfer} -Maximum number of bytes (uncompressed data) that can be passed to a remote server or saved in a temporary table when using GLOBAL IN. +使用GLOBAL IN时,可以传递到远程服务器或保存在临时表中的最大字节数(未压缩数据)。 ## transfer\_overflow\_mode {#transfer-overflow-mode} -What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘break’. By default, throw. +当数据量超过其中一个限制时该怎么办: ‘throw’ 或 ‘break’. 默认情况下,扔。 -[Original article](https://clickhouse.tech/docs/en/operations/settings/query_complexity/) +[原始文章](https://clickhouse.tech/docs/en/operations/settings/query_complexity/) diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 0475642124a..8e0a60d4f03 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1,191 +1,194 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 60 +toc_title: "\u8BBE\u7F6E" --- -# Settings {#settings} +# 设置 {#settings} -## distributed\_product\_mode {#distributed-product-mode} +## 分布\_产品\_模式 {#distributed-product-mode} -Changes the behavior of [distributed subqueries](../../query_language/select.md). +改变的行为 [分布式子查询](../../sql_reference/statements/select.md). ClickHouse applies this setting when the query contains the product of distributed tables, i.e. when the query for a distributed table contains a non-GLOBAL subquery for the distributed table. -Restrictions: +限制: -- Only applied for IN and JOIN subqueries. -- Only if the FROM section uses a distributed table containing more than one shard. -- If the subquery concerns a distributed table containing more than one shard. -- Not used for a table-valued [remote](../../query_language/table_functions/remote.md) function. +- 仅适用于IN和JOIN子查询。 +- 仅当FROM部分使用包含多个分片的分布式表时。 +- 如果子查询涉及包含多个分片的分布式表。 +- 不用于表值 [远程](../../sql_reference/table_functions/remote.md) 功能。 -Possible values: +可能的值: -- `deny` — Default value. Prohibits using these types of subqueries (returns the “Double-distributed in/JOIN subqueries is denied” exception). +- `deny` — Default value. Prohibits using these types of subqueries (returns the “Double-distributed in/JOIN subqueries is denied” 例外)。 - `local` — Replaces the database and table in the subquery with local ones for the destination server (shard), leaving the normal `IN`/`JOIN.` -- `global` — Replaces the `IN`/`JOIN` query with `GLOBAL IN`/`GLOBAL JOIN.` +- `global` — Replaces the `IN`/`JOIN` 查询与 `GLOBAL IN`/`GLOBAL JOIN.` - `allow` — Allows the use of these types of subqueries. ## enable\_optimize\_predicate\_expression {#enable-optimize-predicate-expression} -Turns on predicate pushdown in `SELECT` queries. +打开谓词下推 `SELECT` 查询。 -Predicate pushdown may significantly reduce network traffic for distributed queries. +谓词下推可以显着减少分布式查询的网络流量。 -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 1. +默认值:1。 -Usage +用途 -Consider the following queries: +请考虑以下查询: 1. `SELECT count() FROM test_table WHERE date = '2018-10-10'` 2. `SELECT count() FROM (SELECT * FROM test_table) WHERE date = '2018-10-10'` -If `enable_optimize_predicate_expression = 1`, then the execution time of these queries is equal because ClickHouse applies `WHERE` to the subquery when processing it. +如果 `enable_optimize_predicate_expression = 1`,则这些查询的执行时间相等,因为ClickHouse应用 `WHERE` 对子查询进行处理。 -If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer, because the `WHERE` clause applies to all the data after the subquery finishes. +如果 `enable_optimize_predicate_expression = 0`,那么第二个查询的执行时间要长得多,因为 `WHERE` 子句适用于子查询完成后的所有数据。 ## fallback\_to\_stale\_replicas\_for\_distributed\_queries {#settings-fallback_to_stale_replicas_for_distributed_queries} -Forces a query to an out-of-date replica if updated data is not available. See [Replication](../table_engines/replication.md). +如果更新的数据不可用,则强制对过期副本进行查询。 看 [复制](../../engines/table_engines/mergetree_family/replication.md). -ClickHouse selects the most relevant from the outdated replicas of the table. +ClickHouse从表的过时副本中选择最相关的副本。 -Used when performing `SELECT` from a distributed table that points to replicated tables. +执行时使用 `SELECT` 从指向复制表的分布式表。 -By default, 1 (enabled). +默认情况下,1(已启用)。 ## force\_index\_by\_date {#settings-force_index_by_date} -Disables query execution if the index can’t be used by date. +如果索引不能按日期使用,则禁用查询执行。 -Works with tables in the MergeTree family. +适用于MergeTree系列中的表。 -If `force_index_by_date=1`, ClickHouse checks whether the query has a date key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For example, the condition `Date != ' 2000-01-01 '` is acceptable even when it matches all the data in the table (i.e., running the query requires a full scan). For more information about ranges of data in MergeTree tables, see [MergeTree](../table_engines/mergetree.md). +如果 `force_index_by_date=1`,ClickHouse检查查询是否具有可用于限制数据范围的date键条件。 如果没有合适的条件,则会引发异常。 但是,它不检查条件是否减少了要读取的数据量。 例如,条件 `Date != ' 2000-01-01 '` 即使它与表中的所有数据匹配(即运行查询需要完全扫描),也是可以接受的。 有关MergeTree表中数据范围的详细信息,请参阅 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md). ## force\_primary\_key {#force-primary-key} -Disables query execution if indexing by the primary key is not possible. +如果无法按主键编制索引,则禁用查询执行。 -Works with tables in the MergeTree family. +适用于MergeTree系列中的表。 -If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../table_engines/mergetree.md). +如果 `force_primary_key=1`,ClickHouse检查查询是否具有可用于限制数据范围的主键条件。 如果没有合适的条件,则会引发异常。 但是,它不检查条件是否减少了要读取的数据量。 有关MergeTree表中数据范围的详细信息,请参阅 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md). ## format\_schema {#format-schema} -This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. +当您使用需要架构定义的格式时,此参数非常有用,例如 [普罗托船长](https://capnproto.org/) 或 [Protobuf](https://developers.google.com/protocol-buffers/). 该值取决于格式。 ## fsync\_metadata {#fsync-metadata} -Enables or disables [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/functions/fsync.html) when writing `.sql` files. Enabled by default. +启用或禁用 [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/functions/fsync.html) 写作时 `.sql` 文件 默认情况下启用。 -It makes sense to disable it if the server has millions of tiny tables that are constantly being created and destroyed. +如果服务器有数百万个不断创建和销毁的小表,那么禁用它是有意义的。 ## enable\_http\_compression {#settings-enable_http_compression} -Enables or disables data compression in the response to an HTTP request. +在对HTTP请求的响应中启用或禁用数据压缩。 -For more information, read the [HTTP interface description](../../interfaces/http.md). +欲了解更多信息,请阅读 [HTTP接口描述](../../interfaces/http.md). -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 0. +默认值:0。 ## http\_zlib\_compression\_level {#settings-http_zlib_compression_level} -Sets the level of data compression in the response to an HTTP request if [enable\_http\_compression = 1](#settings-enable_http_compression). +在以下情况下,设置对HTTP请求的响应中的数据压缩级别 [enable\_http\_compression=1](#settings-enable_http_compression). -Possible values: Numbers from 1 to 9. +可能的值:数字从1到9。 -Default value: 3. +默认值:3。 ## http\_native\_compression\_disable\_checksumming\_on\_decompress {#settings-http_native_compression_disable_checksumming_on_decompress} -Enables or disables checksum verification when decompressing the HTTP POST data from the client. Used only for ClickHouse native compression format (not used with `gzip` or `deflate`). +在从客户端解压缩HTTP POST数据时启用或禁用校验和验证。 仅用于ClickHouse原生压缩格式(不用于 `gzip` 或 `deflate`). -For more information, read the [HTTP interface description](../../interfaces/http.md). +欲了解更多信息,请阅读 [HTTP接口描述](../../interfaces/http.md). -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 0. +默认值:0。 ## send\_progress\_in\_http\_headers {#settings-send_progress_in_http_headers} -Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses. +启用或禁用 `X-ClickHouse-Progress` Http响应头 `clickhouse-server` 答复。 -For more information, read the [HTTP interface description](../../interfaces/http.md). +欲了解更多信息,请阅读 [HTTP接口描述](../../interfaces/http.md). -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 0. +默认值:0。 ## max\_http\_get\_redirects {#setting-max_http_get_redirects} -Limits the maximum number of HTTP GET redirect hops for [URL](../table_engines/url.md)-engine tables. The setting applies to both types of tables: those created by the [CREATE TABLE](../../query_language/create/#create-table-query) query and by the [url](../../query_language/table_functions/url.md) table function. +限制HTTP GET重定向跳数的最大数量 [URL](../../engines/table_engines/special/url.md)-发动机表。 该设置适用于两种类型的表:由 [CREATE TABLE](../../sql_reference/statements/create.md#create-table-query) 查询和由 [url](../../sql_reference/table_functions/url.md) 表功能。 -Possible values: +可能的值: -- Any positive integer number of hops. +- 跳数的任何正整数。 - 0 — No hops allowed. -Default value: 0. +默认值:0。 ## input\_format\_allow\_errors\_num {#settings-input_format_allow_errors_num} -Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). +设置从文本格式(CSV,TSV等)读取时可接受的错误的最大数量。). -The default value is 0. +默认值为0。 -Always pair it with `input_format_allow_errors_ratio`. +总是与它配对 `input_format_allow_errors_ratio`. -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. +如果在读取行时发生错误,但错误计数器仍小于 `input_format_allow_errors_num`,ClickHouse忽略该行并移动到下一个。 -If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. +如果两者 `input_format_allow_errors_num` 和 `input_format_allow_errors_ratio` 超出时,ClickHouse引发异常。 ## input\_format\_allow\_errors\_ratio {#settings-input_format_allow_errors_ratio} -Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). -The percentage of errors is set as a floating-point number between 0 and 1. +设置从文本格式(CSV,TSV等)读取时允许的最大错误百分比。). +错误百分比设置为介于0和1之间的浮点数。 -The default value is 0. +默认值为0。 -Always pair it with `input_format_allow_errors_num`. +总是与它配对 `input_format_allow_errors_num`. -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. +如果在读取行时发生错误,但错误计数器仍小于 `input_format_allow_errors_ratio`,ClickHouse忽略该行并移动到下一个。 -If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. +如果两者 `input_format_allow_errors_num` 和 `input_format_allow_errors_ratio` 超出时,ClickHouse引发异常。 ## input\_format\_values\_interpret\_expressions {#settings-input_format_values_interpret_expressions} -Enables or disables the full SQL parser if the fast stream parser can’t parse the data. This setting is used only for the [Values](../../interfaces/formats.md#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../query_language/syntax.md) section. +如果快速流解析器无法解析数据,则启用或禁用完整SQL解析器。 此设置仅用于 [值](../../interfaces/formats.md#data-format-values) 格式在数据插入。 有关语法分析的详细信息,请参阅 [语法](../../sql_reference/syntax.md) 科。 -Possible values: +可能的值: - 0 — Disabled. - In this case, you must provide formatted data. See the [Formats](../../interfaces/formats.md) section. + 在这种情况下,您必须提供格式化的数据。 见 [格式](../../interfaces/formats.md) 科。 - 1 — Enabled. - In this case, you can use an SQL expression as a value, but data insertion is much slower this way. If you insert only formatted data, then ClickHouse behaves as if the setting value is 0. + 在这种情况下,您可以使用SQL表达式作为值,但数据插入速度要慢得多。 如果仅插入格式化的数据,则ClickHouse的行为就好像设置值为0。 -Default value: 1. +默认值:1。 -Example of Use +使用示例 -Insert the [DateTime](../../data_types/datetime.md) type value with the different settings. +插入 [日期时间](../../sql_reference/data_types/datetime.md) 使用不同的设置键入值。 ``` sql SET input_format_values_interpret_expressions = 0; @@ -206,7 +209,7 @@ INSERT INTO datetime_t VALUES (now()) Ok. ``` -The last query is equivalent to the following: +最后一个查询等效于以下内容: ``` sql SET input_format_values_interpret_expressions = 0; @@ -219,21 +222,21 @@ Ok. ## input\_format\_values\_deduce\_templates\_of\_expressions {#settings-input_format_values_deduce_templates_of_expressions} -Enables or disables template deduction for an SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows to parse and interpret expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse will try to deduce template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows. For the following query: +启用或禁用以下内容中的SQL表达式的模板扣除 [值](../../interfaces/formats.md#data-format-values) 格式。 它允许解析和解释表达式 `Values` 如果连续行中的表达式具有相同的结构,速度要快得多。 ClickHouse将尝试推导表达式的模板,使用此模板解析以下行,并在一批成功解析的行上评估表达式。 对于以下查询: ``` sql INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ... ``` -- if `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0` expressions will be interpreted separately for each row (this is very slow for large number of rows) -- if `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1` expressions in the first, second and third rows will be parsed using template `lower(String)` and interpreted together, expression is the forth row will be parsed with another template (`upper(String)`) -- if `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1` - the same as in previous case, but also allows fallback to interpreting expressions separately if it’s not possible to deduce template. +- 如果 `input_format_values_interpret_expressions=1` 和 `format_values_deduce_templates_of_expressions=0` 表达式将单独解释每行(对于大量行来说,这非常慢) +- 如果 `input_format_values_interpret_expressions=0` 和 `format_values_deduce_templates_of_expressions=1` 第一行,第二行和第三行中的表达式将使用template进行分析 `lower(String)` 并一起解释,expression是第四行将与另一个模板进行解析 (`upper(String)`) +- 如果 `input_format_values_interpret_expressions=1` 和 `format_values_deduce_templates_of_expressions=1` -与前面的情况相同,但如果无法推断模板,也可以回退到单独解释表达式。 -Enabled by default. +默认情况下启用。 ## input\_format\_values\_accurate\_types\_of\_literals {#settings-input-format-values-accurate-types-of-literals} -This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. It can happen, that expressions for some column have the same structure, but contain numeric literals of different types, e.g +此设置仅在以下情况下使用 `input_format_values_deduce_templates_of_expressions = 1`. 它可能发生,某些列的表达式具有相同的结构,但包含不同类型的数字文字,例如 ``` sql (..., abs(0), ...), -- UInt64 literal @@ -241,266 +244,266 @@ This setting is used only when `input_format_values_deduce_templates_of_expressi (..., abs(-1), ...), -- Int64 literal ``` -When this setting is enabled, ClickHouse will check the actual type of literal and will use an expression template of the corresponding type. In some cases, it may significantly slow down expression evaluation in `Values`. -When disabled, ClickHouse may use more general type for some literals (e.g. `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. -Enabled by default. +启用此设置后,ClickHouse将检查文本的实际类型,并使用相应类型的表达式模板。 在某些情况下,可能会显着减慢表达式评估 `Values`. +When disabled, ClickHouse may use more general type for some literals (e.g. `Float64` 或 `Int64` 而不是 `UInt64` 为 `42`),但它可能会导致溢出和精度问题。 +默认情况下启用。 ## input\_format\_defaults\_for\_omitted\_fields {#session_settings-input_format_defaults_for_omitted_fields} -When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats. +执行时 `INSERT` 查询时,将省略的输入列值替换为相应列的默认值。 此选项仅适用于 [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) 和 [TabSeparated](../../interfaces/formats.md#tabseparated) 格式。 -!!! note "Note" - When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. +!!! note "注" + 启用此选项后,扩展表元数据将从服务器发送到客户端。 它会消耗服务器上的额外计算资源,并可能降低性能。 -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 1. +默认值:1。 ## input\_format\_tsv\_empty\_as\_default {#settings-input-format-tsv-empty-as-default} -When enabled, replace empty input fields in TSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. +启用后,将TSV中的空输入字段替换为默认值。 对于复杂的默认表达式 `input_format_defaults_for_omitted_fields` 必须启用了。 -Disabled by default. +默认情况下禁用。 ## input\_format\_null\_as\_default {#settings-input-format-null-as-default} -Enables or disables using default values if input data contain `NULL`, but data type of the corresponding column in not `Nullable(T)` (for text input formats). +如果输入数据包含 `NULL`,但相应列的数据类型不 `Nullable(T)` (对于文本输入格式)。 ## input\_format\_skip\_unknown\_fields {#settings-input-format-skip-unknown-fields} -Enables or disables skipping insertion of extra data. +启用或禁用跳过额外数据的插入。 -When writing data, ClickHouse throws an exception if input data contain columns that do not exist in the target table. If skipping is enabled, ClickHouse doesn’t insert extra data and doesn’t throw an exception. +写入数据时,如果输入数据包含目标表中不存在的列,ClickHouse将引发异常。 如果启用了跳过,ClickHouse不会插入额外的数据,也不会引发异常。 -Supported formats: +支持的格式: - [JSONEachRow](../../interfaces/formats.md#jsoneachrow) - [CSVWithNames](../../interfaces/formats.md#csvwithnames) - [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames) - [TSKV](../../interfaces/formats.md#tskv) -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 0. +默认值:0。 ## input\_format\_import\_nested\_json {#settings-input_format_import_nested_json} -Enables or disables the insertion of JSON data with nested objects. +启用或禁用具有嵌套对象的JSON数据的插入。 -Supported formats: +支持的格式: - [JSONEachRow](../../interfaces/formats.md#jsoneachrow) -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 0. +默认值:0。 -See also: +另请参阅: -- [Usage of Nested Structures](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format. +- [嵌套结构的使用](../../interfaces/formats.md#jsoneachrow-nested) 与 `JSONEachRow` 格式。 ## input\_format\_with\_names\_use\_header {#settings-input-format-with-names-use-header} -Enables or disables checking the column order when inserting data. +启用或禁用插入数据时检查列顺序。 -To improve insert performance, we recommend disabling this check if you are sure that the column order of the input data is the same as in the target table. +为了提高插入性能,如果您确定输入数据的列顺序与目标表中的列顺序相同,建议禁用此检查。 -Supported formats: +支持的格式: - [CSVWithNames](../../interfaces/formats.md#csvwithnames) - [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames) -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 1. +默认值:1。 ## date\_time\_input\_format {#settings-date_time_input_format} -Allows choosing a parser of the text representation of date and time. +允许选择日期和时间的文本表示的解析器。 -The setting doesn’t apply to [date and time functions](../../query_language/functions/date_time_functions.md). +该设置不适用于 [日期和时间功能](../../sql_reference/functions/date_time_functions.md). -Possible values: +可能的值: - `'best_effort'` — Enables extended parsing. - ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`. + ClickHouse可以解析基本 `YYYY-MM-DD HH:MM:SS` 格式和所有 [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) 日期和时间格式。 例如, `'2018-06-08T01:02:03.000Z'`. - `'basic'` — Use basic parser. - ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` format. For example, `'2019-08-20 10:18:56'`. + ClickHouse只能解析基本的 `YYYY-MM-DD HH:MM:SS` 格式。 例如, `'2019-08-20 10:18:56'`. -Default value: `'basic'`. +默认值: `'basic'`. -See also: +另请参阅: -- [DateTime data type.](../../data_types/datetime.md) -- [Functions for working with dates and times.](../../query_language/functions/date_time_functions.md) +- [日期时间数据类型。](../../sql_reference/data_types/datetime.md) +- [用于处理日期和时间的函数。](../../sql_reference/functions/date_time_functions.md) ## join\_default\_strictness {#settings-join_default_strictness} -Sets default strictness for [JOIN clauses](../../query_language/select.md#select-join). +设置默认严格性 [加入子句](../../sql_reference/statements/select.md#select-join). -Possible values: +可能的值: -- `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the normal `JOIN` behaviour from standard SQL. -- `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of `ANY` and `ALL` are the same. +- `ALL` — If the right table has several matching rows, ClickHouse creates a [笛卡尔积](https://en.wikipedia.org/wiki/Cartesian_product) 从匹配的行。 这是正常的 `JOIN` 来自标准SQL的行为。 +- `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of `ANY` 和 `ALL` 都是一样的 - `ASOF` — For joining sequences with an uncertain match. -- `Empty string` — If `ALL` or `ANY` is not specified in the query, ClickHouse throws an exception. +- `Empty string` — If `ALL` 或 `ANY` 如果未在查询中指定,ClickHouse将引发异常。 -Default value: `ALL`. +默认值: `ALL`. ## join\_any\_take\_last\_row {#settings-join_any_take_last_row} -Changes behaviour of join operations with `ANY` strictness. +更改联接操作的行为 `ANY` 严格。 -!!! warning "Attention" - This setting applies only for `JOIN` operations with [Join](../table_engines/join.md) engine tables. +!!! warning "注意" + 此设置仅适用于 `JOIN` 操作与 [加入我们](../../engines/table_engines/special/join.md) 发动机表. -Possible values: +可能的值: - 0 — If the right table has more than one matching row, only the first one found is joined. - 1 — If the right table has more than one matching row, only the last one found is joined. -Default value: 0. +默认值:0。 -See also: +另请参阅: -- [JOIN clause](../../query_language/select.md#select-join) -- [Join table engine](../table_engines/join.md) +- [JOIN子句](../../sql_reference/statements/select.md#select-join) +- [联接表引擎](../../engines/table_engines/special/join.md) - [join\_default\_strictness](#settings-join_default_strictness) ## join\_use\_nulls {#join_use_nulls} -Sets the type of [JOIN](../../query_language/select.md) behavior. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting. +设置类型 [JOIN](../../sql_reference/statements/select.md) 行为 合并表时,可能会出现空单元格。 ClickHouse根据此设置以不同的方式填充它们。 -Possible values: +可能的值: - 0 — The empty cells are filled with the default value of the corresponding field type. -- 1 — `JOIN` behaves the same way as in standard SQL. The type of the corresponding field is converted to [Nullable](../../data_types/nullable.md#data_type-nullable), and empty cells are filled with [NULL](../../query_language/syntax.md). +- 1 — `JOIN` 其行为方式与标准SQL中的行为方式相同。 相应字段的类型将转换为 [可为空](../../sql_reference/data_types/nullable.md#data_type-nullable),和空单元格填充 [NULL](../../sql_reference/syntax.md). -Default value: 0. +默认值:0。 ## max\_block\_size {#setting-max_block_size} -In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of the block (in a count of rows) to load from tables. The block size shouldn’t be too small, so that the expenditures on each block are still noticeable, but not too large so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality. +在ClickHouse中,数据由块(列部分集)处理。 单个块的内部处理周期足够高效,但每个块都有明显的支出。 该 `max_block_size` 设置是建议从表中加载块的大小(行数)。 块大小不应该太小,以便每个块上的支出仍然明显,但不能太大,以便在第一个块处理完成后快速完成限制查询。 目标是避免在多个线程中提取大量列时占用太多内存,并且至少保留一些缓存局部性。 -Default value: 65,536. +默认值:65,536。 -Blocks the size of `max_block_size` are not always loaded from the table. If it is obvious that less data needs to be retrieved, a smaller block is processed. +块的大小 `max_block_size` 并不总是从表中加载。 如果显然需要检索的数据较少,则处理较小的块。 ## preferred\_block\_size\_bytes {#preferred-block-size-bytes} -Used for the same purpose as `max_block_size`, but it sets the recommended block size in bytes by adapting it to the number of rows in the block. -However, the block size cannot be more than `max_block_size` rows. -By default: 1,000,000. It only works when reading from MergeTree engines. +用于相同的目的 `max_block_size`,但它通过使其适应块中的行数来设置推荐的块大小(以字节为单位)。 +但是,块大小不能超过 `max_block_size` 行。 +默认情况下:1,000,000。 它只有在从MergeTree引擎读取时才有效。 ## merge\_tree\_min\_rows\_for\_concurrent\_read {#setting-merge-tree-min-rows-for-concurrent-read} -If the number of rows to be read from a file of a [MergeTree](../table_engines/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads. +如果从a的文件中读取的行数 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md) 表超过 `merge_tree_min_rows_for_concurrent_read` 然后ClickHouse尝试在多个线程上从该文件执行并发读取。 -Possible values: +可能的值: -- Any positive integer. +- 任何正整数。 -Default value: 163840. +默认值:163840. ## merge\_tree\_min\_bytes\_for\_concurrent\_read {#setting-merge-tree-min-bytes-for-concurrent-read} -If the number of bytes to read from one file of a [MergeTree](../table_engines/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read`, then ClickHouse tries to concurrently read from this file in several threads. +如果从一个文件中读取的字节数 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md)-发动机表超过 `merge_tree_min_bytes_for_concurrent_read`,然后ClickHouse尝试在多个线程中并发读取此文件。 -Possible value: +可能的值: -- Any positive integer. +- 任何正整数。 -Default value: 251658240. +默认值:251658240. ## merge\_tree\_min\_rows\_for\_seek {#setting-merge-tree-min-rows-for-seek} -If the distance between two data blocks to be read in one file is less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file but reads the data sequentially. +如果要在一个文件中读取的两个数据块之间的距离小于 `merge_tree_min_rows_for_seek` 行,然后ClickHouse不查找文件,而是按顺序读取数据。 -Possible values: +可能的值: -- Any positive integer. +- 任何正整数。 -Default value: 0. +默认值:0。 ## merge\_tree\_min\_bytes\_for\_seek {#setting-merge-tree-min-bytes-for-seek} -If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` bytes, then ClickHouse sequentially reads a range of file that contains both blocks, thus avoiding extra seek. +如果要在一个文件中读取的两个数据块之间的距离小于 `merge_tree_min_bytes_for_seek` 字节数,然后ClickHouse依次读取包含两个块的文件范围,从而避免了额外的寻道。 -Possible values: +可能的值: -- Any positive integer. +- 任何正整数。 -Default value: 0. +默认值:0。 ## merge\_tree\_coarse\_index\_granularity {#setting-merge-tree-coarse-index-granularity} -When searching for data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range into `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively. +搜索数据时,ClickHouse会检查索引文件中的数据标记。 如果ClickHouse发现所需的键在某个范围内,它将此范围划分为 `merge_tree_coarse_index_granularity` 子范围和递归地搜索所需的键。 -Possible values: +可能的值: -- Any positive even integer. +- 任何正偶数整数。 -Default value: 8. +默认值:8。 ## merge\_tree\_max\_rows\_to\_use\_cache {#setting-merge-tree-max-rows-to-use-cache} -If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it doesn’t use the cache of uncompressed blocks. +如果克里克豪斯应该阅读更多 `merge_tree_max_rows_to_use_cache` 在一个查询中的行,它不使用未压缩块的缓存。 -The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks. +未压缩块的缓存存储为查询提取的数据。 ClickHouse使用此缓存来加快对重复的小查询的响应。 此设置可保护缓存免受读取大量数据的查询的破坏。 该 [uncompressed\_cache\_size](../server_configuration_parameters/settings.md#server-settings-uncompressed_cache_size) 服务器设置定义未压缩块的高速缓存的大小。 -Possible values: +可能的值: -- Any positive integer. +- 任何正整数。 Default value: 128 ✕ 8192. ## merge\_tree\_max\_bytes\_to\_use\_cache {#setting-merge-tree-max-bytes-to-use-cache} -If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it doesn’t use the cache of uncompressed blocks. +如果克里克豪斯应该阅读更多 `merge_tree_max_bytes_to_use_cache` 在一个查询中的字节,它不使用未压缩块的缓存。 -The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed\_cache\_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks. +未压缩块的缓存存储为查询提取的数据。 ClickHouse使用此缓存来加快对重复的小查询的响应。 此设置可保护缓存免受读取大量数据的查询的破坏。 该 [uncompressed\_cache\_size](../server_configuration_parameters/settings.md#server-settings-uncompressed_cache_size) 服务器设置定义未压缩块的高速缓存的大小。 -Possible value: +可能的值: -- Any positive integer. +- 任何正整数。 -Default value: 2013265920. +默认值:2013265920. ## min\_bytes\_to\_use\_direct\_io {#settings-min-bytes-to-use-direct-io} -The minimum data volume required for using direct I/O access to the storage disk. +使用直接I/O访问存储磁盘所需的最小数据量。 -ClickHouse uses this setting when reading data from tables. If the total storage volume of all the data to be read exceeds `min_bytes_to_use_direct_io` bytes, then ClickHouse reads the data from the storage disk with the `O_DIRECT` option. +ClickHouse在从表中读取数据时使用此设置。 如果要读取的所有数据的总存储量超过 `min_bytes_to_use_direct_io` 字节,然后ClickHouse读取从存储磁盘的数据 `O_DIRECT` 选项。 -Possible values: +可能的值: - 0 — Direct I/O is disabled. -- Positive integer. +- 整数。 -Default value: 0. +默认值:0。 ## log\_queries {#settings-log-queries} -Setting up query logging. +设置查询日志记录。 -Queries sent to ClickHouse with this setup are logged according to the rules in the [query\_log](../server_settings/settings.md#server_settings-query-log) server configuration parameter. +使用此设置发送到ClickHouse的查询将根据以下内容中的规则记录 [query\_log](../server_configuration_parameters/settings.md#server_configuration_parameters-query-log) 服务器配置参数。 -Example: +示例: ``` text log_queries=1 @@ -508,11 +511,11 @@ log_queries=1 ## log\_query\_threads {#settings-log-query-threads} -Setting up query threads logging. +设置查询线程日志记录。 -Queries’ threads runned by ClickHouse with this setup are logged according to the rules in the [query\_thread\_log](../server_settings/settings.md#server_settings-query-thread-log) server configuration parameter. +ClickHouse使用此设置运行的查询线程将根据以下命令中的规则记录 [query\_thread\_log](../server_configuration_parameters/settings.md#server_configuration_parameters-query-thread-log) 服务器配置参数。 -Example: +示例: ``` text log_query_threads=1 @@ -520,680 +523,680 @@ log_query_threads=1 ## max\_insert\_block\_size {#settings-max_insert_block_size} -The size of blocks to form for insertion into a table. -This setting only applies in cases when the server forms the blocks. -For example, for an INSERT via the HTTP interface, the server parses the data format and forms blocks of the specified size. -But when using clickhouse-client, the client parses the data itself, and the ‘max\_insert\_block\_size’ setting on the server doesn’t affect the size of the inserted blocks. -The setting also doesn’t have a purpose when using INSERT SELECT, since data is inserted using the same blocks that are formed after SELECT. +要插入到表中的块的大小。 +此设置仅适用于服务器形成块的情况。 +例如,对于通过HTTP接口进行的插入,服务器会分析数据格式并形成指定大小的块。 +但是当使用clickhouse-client时,客户端解析数据本身,并且 ‘max\_insert\_block\_size’ 服务器上的设置不会影响插入的块的大小。 +使用INSERT SELECT时,该设置也没有目的,因为数据是使用在SELECT之后形成的相同块插入的。 -Default value: 1,048,576. +默认值:1,048,576。 -The default is slightly more than `max_block_size`. The reason for this is because certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion and a large enough block size allow sorting more data in RAM. +默认值略高于 `max_block_size`. 这样做的原因是因为某些表引擎 (`*MergeTree`)在磁盘上为每个插入的块形成一个数据部分,这是一个相当大的实体。 同样, `*MergeTree` 表在插入过程中对数据进行排序,并且足够大的块大小允许在RAM中对更多数据进行排序。 ## max\_replica\_delay\_for\_distributed\_queries {#settings-max_replica_delay_for_distributed_queries} -Disables lagging replicas for distributed queries. See [Replication](../../operations/table_engines/replication.md). +禁用分布式查询的滞后副本。 看 [复制](../../engines/table_engines/mergetree_family/replication.md). -Sets the time in seconds. If a replica lags more than the set value, this replica is not used. +以秒为单位设置时间。 如果副本滞后超过设定值,则不使用此副本。 -Default value: 300. +默认值:300。 -Used when performing `SELECT` from a distributed table that points to replicated tables. +执行时使用 `SELECT` 从指向复制表的分布式表。 ## max\_threads {#settings-max_threads} -The maximum number of query processing threads, excluding threads for retrieving data from remote servers (see the ‘max\_distributed\_connections’ parameter). +查询处理线程的最大数量,不包括用于从远程服务器检索数据的线程(请参阅 ‘max\_distributed\_connections’ 参数)。 -This parameter applies to threads that perform the same stages of the query processing pipeline in parallel. -For example, when reading from a table, if it is possible to evaluate expressions with functions, filter with WHERE and pre-aggregate for GROUP BY in parallel using at least ‘max\_threads’ number of threads, then ‘max\_threads’ are used. +此参数适用于并行执行查询处理管道的相同阶段的线程。 +例如,当从表中读取时,如果可以使用函数来评估表达式,请使用WHERE进行过滤,并且至少使用并行方式对GROUP BY进行预聚合 ‘max\_threads’ 线程数,然后 ‘max\_threads’ 被使用。 -Default value: the number of physical CPU cores. +默认值:物理CPU内核数。 -If less than one SELECT query is normally run on a server at a time, set this parameter to a value slightly less than the actual number of processor cores. +如果一次在服务器上运行的SELECT查询通常少于一个,请将此参数设置为略小于实际处理器内核数的值。 -For queries that are completed quickly because of a LIMIT, you can set a lower ‘max\_threads’. For example, if the necessary number of entries are located in every block and max\_threads = 8, then 8 blocks are retrieved, although it would have been enough to read just one. +对于由于限制而快速完成的查询,可以设置较低的 ‘max\_threads’. 例如,如果必要数量的条目位于每个块中,并且max\_threads=8,则会检索8个块,尽管只读取一个块就足够了。 -The smaller the `max_threads` value, the less memory is consumed. +越小 `max_threads` 值,较少的内存被消耗。 ## max\_insert\_threads {#settings-max-insert-threads} -The maximum number of threads to execute the `INSERT SELECT` query. +要执行的最大线程数 `INSERT SELECT` 查询。 -Possible values: +可能的值: -- 0 (or 1) — `INSERT SELECT` no parallel execution. -- Positive integer. Bigger than 1. +- 0 (or 1) — `INSERT SELECT` 没有并行执行。 +- 整数。 大于1。 -Default value: 0. +默认值:0。 -Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max\_threads](#settings-max_threads) setting. -Higher values will lead to higher memory usage. +平行 `INSERT SELECT` 只有在 `SELECT` 部分并行执行,请参阅 [max\_threads](#settings-max_threads) 设置。 +更高的值将导致更高的内存使用率。 ## max\_compress\_block\_size {#max-compress-block-size} -The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). If the size is reduced, the compression rate is significantly reduced, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. There usually isn’t any reason to change this setting. +在压缩写入表之前,未压缩数据块的最大大小。 默认情况下,1,048,576(1MiB)。 如果大小减小,则压缩率显着降低,压缩和解压缩速度由于高速缓存局部性而略微增加,并且内存消耗减少。 通常没有任何理由更改此设置。 -Don’t confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table). +不要将用于压缩的块(由字节组成的内存块)与用于查询处理的块(表中的一组行)混淆。 ## min\_compress\_block\_size {#min-compress-block-size} -For [MergeTree](../table_engines/mergetree.md)" tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least ‘min\_compress\_block\_size’. By default, 65,536. +为 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md)"表。 为了减少处理查询时的延迟,在写入下一个标记时,如果块的大小至少为 ‘min\_compress\_block\_size’. 默认情况下,65,536。 -The actual size of the block, if the uncompressed data is less than ‘max\_compress\_block\_size’, is no less than this value and no less than the volume of data for one mark. +块的实际大小,如果未压缩的数据小于 ‘max\_compress\_block\_size’,是不小于该值且不小于一个标记的数据量。 -Let’s look at an example. Assume that ‘index\_granularity’ was set to 8192 during table creation. +让我们来看看一个例子。 假设 ‘index\_granularity’ 在表创建期间设置为8192。 -We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min\_compress\_block\_size = 65,536, a compressed block will be formed for every two marks. +我们正在编写一个UInt32类型的列(每个值4个字节)。 当写入8192行时,总数将是32KB的数据。 由于min\_compress\_block\_size=65,536,将为每两个标记形成一个压缩块。 -We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won’t be decompressed. +我们正在编写一个字符串类型的URL列(每个值的平均大小60字节)。 当写入8192行时,平均数据将略少于500KB。 由于这超过65,536,将为每个标记形成一个压缩块。 在这种情况下,当从单个标记范围内的磁盘读取数据时,额外的数据不会被解压缩。 -There usually isn’t any reason to change this setting. +通常没有任何理由更改此设置。 ## max\_query\_size {#settings-max_query_size} -The maximum part of a query that can be taken to RAM for parsing with the SQL parser. -The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction. +查询的最大部分,可以被带到RAM用于使用SQL解析器进行解析。 +插入查询还包含由单独的流解析器(消耗O(1)RAM)处理的插入数据,这些数据不包含在此限制中。 -Default value: 256 KiB. +默认值:256KiB。 ## interactive\_delay {#interactive-delay} -The interval in microseconds for checking whether request execution has been cancelled and sending the progress. +以微秒为单位的间隔,用于检查请求执行是否已被取消并发送进度。 -Default value: 100,000 (checks for cancelling and sends the progress ten times per second). +默认值:100,000(检查取消并每秒发送十次进度)。 -## connect\_timeout, receive\_timeout, send\_timeout {#connect-timeout-receive-timeout-send-timeout} +## connect\_timeout,receive\_timeout,send\_timeout {#connect-timeout-receive-timeout-send-timeout} -Timeouts in seconds on the socket used for communicating with the client. +用于与客户端通信的套接字上的超时以秒为单位。 -Default value: 10, 300, 300. +默认值:10,300,300。 ## cancel\_http\_readonly\_queries\_on\_client\_close {#cancel-http-readonly-queries-on-client-close} Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response. -Default value: 0 +默认值:0 ## poll\_interval {#poll-interval} -Lock in a wait loop for the specified number of seconds. +锁定在指定秒数的等待循环。 -Default value: 10. +默认值:10。 ## max\_distributed\_connections {#max-distributed-connections} -The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. +与远程服务器同时连接的最大数量,用于分布式处理对单个分布式表的单个查询。 我们建议设置不小于群集中服务器数量的值。 -Default value: 1024. +默认值:1024。 -The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime. +以下参数仅在创建分布式表(以及启动服务器时)时使用,因此没有理由在运行时更改它们。 ## distributed\_connections\_pool\_size {#distributed-connections-pool-size} -The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster. +与远程服务器同时连接的最大数量,用于分布式处理对单个分布式表的所有查询。 我们建议设置不小于群集中服务器数量的值。 -Default value: 1024. +默认值:1024。 ## connect\_timeout\_with\_failover\_ms {#connect-timeout-with-failover-ms} -The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the ‘shard’ and ‘replica’ sections are used in the cluster definition. -If unsuccessful, several attempts are made to connect to various replicas. +以毫秒为单位连接到分布式表引擎的远程服务器的超时,如果 ‘shard’ 和 ‘replica’ 部分用于群集定义。 +如果不成功,将尝试多次连接到各种副本。 -Default value: 50. +默认值:50。 ## connections\_with\_failover\_max\_tries {#connections-with-failover-max-tries} -The maximum number of connection attempts with each replica for the Distributed table engine. +分布式表引擎的每个副本的最大连接尝试次数。 -Default value: 3. +默认值:3。 -## extremes {#extremes} +## 极端 {#extremes} -Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled). -For more information, see the section “Extreme values”. +是否计算极值(查询结果列中的最小值和最大值)。 接受0或1。 默认情况下,0(禁用)。 +有关详细信息,请参阅部分 “Extreme values”. ## use\_uncompressed\_cache {#setting-use_uncompressed_cache} -Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). -Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed\_cache\_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted. +是否使用未压缩块的缓存。 接受0或1。 默认情况下,0(禁用)。 +使用未压缩缓存(仅适用于MergeTree系列中的表)可以在处理大量短查询时显着减少延迟并提高吞吐量。 为频繁发送短请求的用户启用此设置。 还要注意 [uncompressed\_cache\_size](../server_configuration_parameters/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted. -For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically to save space for truly small queries. This means that you can keep the ‘use\_uncompressed\_cache’ setting always set to 1. +对于至少读取大量数据(一百万行或更多行)的查询,将自动禁用未压缩缓存,以节省真正小型查询的空间。 这意味着你可以保持 ‘use\_uncompressed\_cache’ 设置始终设置为1。 ## replace\_running\_query {#replace-running-query} -When using the HTTP interface, the ‘query\_id’ parameter can be passed. This is any string that serves as the query identifier. -If a query from the same user with the same ‘query\_id’ already exists at this time, the behaviour depends on the ‘replace\_running\_query’ parameter. +当使用HTTP接口时, ‘query\_id’ 参数可以传递。 这是用作查询标识符的任何字符串。 +如果来自同一用户的查询具有相同的 ‘query\_id’ 已经存在在这个时候,行为取决于 ‘replace\_running\_query’ 参数。 -`0` (default) – Throw an exception (don’t allow the query to run if a query with the same ‘query\_id’ is already running). +`0` (default) – Throw an exception (don't allow the query to run if a query with the same ‘query\_id’ 已经运行)。 `1` – Cancel the old query and start running the new one. -Yandex.Metrica uses this parameter set to 1 for implementing suggestions for segmentation conditions. After entering the next character, if the old query hasn’t finished yet, it should be cancelled. +YandexMetrica使用此参数设置为1来实现分段条件的建议。 输入下一个字符后,如果旧的查询还没有完成,应该取消。 ## stream\_flush\_interval\_ms {#stream-flush-interval-ms} -Works for tables with streaming in the case of a timeout, or when a thread generates [max\_insert\_block\_size](#settings-max_insert_block_size) rows. +适用于在超时的情况下或线程生成流式传输的表 [max\_insert\_block\_size](#settings-max_insert_block_size) 行。 -The default value is 7500. +默认值为7500。 -The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance. +值越小,数据被刷新到表中的频率就越高。 将该值设置得太低会导致性能较差。 ## load\_balancing {#settings-load_balancing} -Specifies the algorithm of replicas selection that is used for distributed query processing. +指定用于分布式查询处理的副本选择算法。 -ClickHouse supports the following algorithms of choosing replicas: +ClickHouse支持以下选择副本的算法: -- [Random](#load_balancing-random) (by default) -- [Nearest hostname](#load_balancing-nearest_hostname) -- [In order](#load_balancing-in_order) -- [First or random](#load_balancing-first_or_random) +- [随机](#load_balancing-random) (默认情况下) +- [最近的主机名](#load_balancing-nearest_hostname) +- [按顺序](#load_balancing-in_order) +- [第一次或随机](#load_balancing-first_or_random) -### Random (by default) {#load_balancing-random} +### 随机(默认情况下) {#load_balancing-random} ``` sql load_balancing = random ``` -The number of errors is counted for each replica. The query is sent to the replica with the fewest errors, and if there are several of these, to anyone of them. -Disadvantages: Server proximity is not accounted for; if the replicas have different data, you will also get different data. +对每个副本计算错误数。 查询发送到错误最少的副本,如果存在其中几个错误,则发送给其中任何一个。 +缺点:不考虑服务器邻近度;如果副本具有不同的数据,则也会获得不同的数据。 -### Nearest Hostname {#load_balancing-nearest_hostname} +### 最近的主机名 {#load_balancing-nearest_hostname} ``` sql load_balancing = nearest_hostname ``` -The number of errors is counted for each replica. Every 5 minutes, the number of errors is integrally divided by 2. Thus, the number of errors is calculated for a recent time with exponential smoothing. If there is one replica with a minimal number of errors (i.e. errors occurred recently on the other replicas), the query is sent to it. If there are multiple replicas with the same minimal number of errors, the query is sent to the replica with a hostname that is most similar to the server’s hostname in the config file (for the number of different characters in identical positions, up to the minimum length of both hostnames). +The number of errors is counted for each replica. Every 5 minutes, the number of errors is integrally divided by 2. Thus, the number of errors is calculated for a recent time with exponential smoothing. If there is one replica with a minimal number of errors (i.e. errors occurred recently on the other replicas), the query is sent to it. If there are multiple replicas with the same minimal number of errors, the query is sent to the replica with a hostname that is most similar to the server's hostname in the config file (for the number of different characters in identical positions, up to the minimum length of both hostnames). -For instance, example01-01-1 and example01-01-2.yandex.ru are different in one position, while example01-01-1 and example01-02-2 differ in two places. -This method might seem primitive, but it doesn’t require external data about network topology, and it doesn’t compare IP addresses, which would be complicated for our IPv6 addresses. +例如,例如01-01-1和example01-01-2.yandex.ru 在一个位置是不同的,而example01-01-1和example01-02-2在两个地方不同。 +这种方法可能看起来很原始,但它不需要有关网络拓扑的外部数据,也不比较IP地址,这对于我们的IPv6地址来说会很复杂。 -Thus, if there are equivalent replicas, the closest one by name is preferred. -We can also assume that when sending a query to the same server, in the absence of failures, a distributed query will also go to the same servers. So even if different data is placed on the replicas, the query will return mostly the same results. +因此,如果存在等效副本,则首选按名称最接近的副本。 +我们还可以假设,当向同一台服务器发送查询时,在没有失败的情况下,分布式查询也将转到同一台服务器。 因此,即使在副本上放置了不同的数据,查询也会返回大多相同的结果。 -### In Order {#load_balancing-in_order} +### 按顺序 {#load_balancing-in_order} ``` sql load_balancing = in_order ``` -Replicas with the same number of errors are accessed in the same order as they are specified in the configuration. -This method is appropriate when you know exactly which replica is preferable. +具有相同错误数的副本的访问顺序与配置中指定的顺序相同。 +当您确切知道哪个副本是可取的时,此方法是适当的。 -### First or Random {#load_balancing-first_or_random} +### 第一次或随机 {#load_balancing-first_or_random} ``` sql load_balancing = first_or_random ``` -This algorithm chooses the first replica in the set or a random replica if the first is unavailable. It’s effective in cross-replication topology setups, but useless in other configurations. +此算法选择集合中的第一个副本,如果第一个副本不可用,则选择随机副本。 它在跨复制拓扑设置中有效,但在其他配置中无用。 -The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, the load is evenly distributed among replicas that are still available. +该 `first_or_random` 算法解决的问题 `in_order` 算法。 与 `in_order`,如果一个副本出现故障,下一个副本将获得双重负载,而其余副本将处理通常的流量。 使用时 `first_or_random` 算法中,负载均匀分布在仍然可用的副本之间。 ## prefer\_localhost\_replica {#settings-prefer-localhost-replica} -Enables/disables preferable using the localhost replica when processing distributed queries. +在处理分布式查询时,最好使用localhost副本启用/禁用该副本。 -Possible values: +可能的值: - 1 — ClickHouse always sends a query to the localhost replica if it exists. -- 0 — ClickHouse uses the balancing strategy specified by the [load\_balancing](#settings-load_balancing) setting. +- 0 — ClickHouse uses the balancing strategy specified by the [load\_balancing](#settings-load_balancing) 设置。 -Default value: 1. +默认值:1。 -!!! warning "Warning" - Disable this setting if you use [max\_parallel\_replicas](#settings-max_parallel_replicas). +!!! warning "警告" + 如果使用此设置,请禁用此设置 [max\_parallel\_replicas](#settings-max_parallel_replicas). ## totals\_mode {#totals-mode} -How to calculate TOTALS when HAVING is present, as well as when max\_rows\_to\_group\_by and group\_by\_overflow\_mode = ‘any’ are present. -See the section “WITH TOTALS modifier”. +如何计算总计时有存在,以及当max\_rows\_to\_group\_by和group\_by\_overflow\_mode= ‘any’ 都在场。 +请参阅部分 “WITH TOTALS modifier”. ## totals\_auto\_threshold {#totals-auto-threshold} -The threshold for `totals_mode = 'auto'`. -See the section “WITH TOTALS modifier”. +阈值 `totals_mode = 'auto'`. +请参阅部分 “WITH TOTALS modifier”. ## max\_parallel\_replicas {#settings-max_parallel_replicas} -The maximum number of replicas for each shard when executing a query. -For consistency (to get different parts of the same data split), this option only works when the sampling key is set. -Replica lag is not controlled. +执行查询时每个分片的最大副本数。 +为了保持一致性(以获取相同数据拆分的不同部分),此选项仅在设置了采样键时有效。 +副本滞后不受控制。 -## compile {#compile} +## 编译 {#compile} -Enable compilation of queries. By default, 0 (disabled). +启用查询的编译。 默认情况下,0(禁用)。 -The compilation is only used for part of the query-processing pipeline: for the first stage of aggregation (GROUP BY). -If this portion of the pipeline was compiled, the query may run faster due to deployment of short cycles and inlining aggregate function calls. The maximum performance improvement (up to four times faster in rare cases) is seen for queries with multiple simple aggregate functions. Typically, the performance gain is insignificant. In very rare cases, it may slow down query execution. +编译仅用于查询处理管道的一部分:用于聚合的第一阶段(GROUP BY)。 +如果编译了管道的这一部分,则由于部署周期较短和内联聚合函数调用,查询可能运行得更快。 对于具有多个简单聚合函数的查询,可以看到最大的性能改进(在极少数情况下可快四倍)。 通常,性能增益是微不足道的。 在极少数情况下,它可能会减慢查询执行速度。 ## min\_count\_to\_compile {#min-count-to-compile} -How many times to potentially use a compiled chunk of code before running compilation. By default, 3. +在运行编译之前可能使用已编译代码块的次数。 默认情况下,3。 For testing, the value can be set to 0: compilation runs synchronously and the query waits for the end of the compilation process before continuing execution. For all other cases, use values ​​starting with 1. Compilation normally takes about 5-10 seconds. -If the value is 1 or more, compilation occurs asynchronously in a separate thread. The result will be used as soon as it is ready, including queries that are currently running. +如果该值为1或更大,则编译在单独的线程中异步进行。 结果将在准备就绪后立即使用,包括当前正在运行的查询。 -Compiled code is required for each different combination of aggregate functions used in the query and the type of keys in the GROUP BY clause. -The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don’t use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. +对于查询中使用的聚合函数的每个不同组合以及GROUP BY子句中的键类型,都需要编译代码。 +The results of the compilation are saved in the build directory in the form of .so files. There is no restriction on the number of compilation results since they don't use very much space. Old results will be used after server restarts, except in the case of a server upgrade – in this case, the old results are deleted. ## output\_format\_json\_quote\_64bit\_integers {#session_settings-output_format_json_quote_64bit_integers} -If the value is true, integers appear in quotes when using JSON\* Int64 and UInt64 formats (for compatibility with most JavaScript implementations); otherwise, integers are output without the quotes. +如果该值为true,则在使用JSON\*Int64和UInt64格式时,整数将显示在引号中(为了与大多数JavaScript实现兼容);否则,整数将不带引号输出。 ## format\_csv\_delimiter {#settings-format_csv_delimiter} -The character interpreted as a delimiter in the CSV data. By default, the delimiter is `,`. +将字符解释为CSV数据中的分隔符。 默认情况下,分隔符为 `,`. ## input\_format\_csv\_unquoted\_null\_literal\_as\_null {#settings-input_format_csv_unquoted_null_literal_as_null} -For CSV input format enables or disables parsing of unquoted `NULL` as literal (synonym for `\N`). +对于CSV输入格式,启用或禁用未引用的解析 `NULL` 作为文字(同义词 `\N`). ## output\_format\_csv\_crlf\_end\_of\_line {#settings-output-format-csv-crlf-end-of-line} -Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF). +在CSV中使用DOS/Windows样式的行分隔符(CRLF)而不是Unix样式(LF)。 ## output\_format\_tsv\_crlf\_end\_of\_line {#settings-output-format-tsv-crlf-end-of-line} -Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). +在TSV中使用DOC/Windows样式的行分隔符(CRLF)而不是Unix样式(LF)。 ## insert\_quorum {#settings-insert_quorum} -Enables the quorum writes. +启用仲裁写入。 -- If `insert_quorum < 2`, the quorum writes are disabled. -- If `insert_quorum >= 2`, the quorum writes are enabled. +- 如果 `insert_quorum < 2`,仲裁写入被禁用。 +- 如果 `insert_quorum >= 2`,仲裁写入已启用。 -Default value: 0. +默认值:0。 -Quorum writes +仲裁写入 -`INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written. +`INSERT` 只有当ClickHouse设法正确地将数据写入成功 `insert_quorum` 在复制品的 `insert_quorum_timeout`. 如果由于任何原因,成功写入的副本数量没有达到 `insert_quorum`,写入被认为失败,ClickHouse将从已经写入数据的所有副本中删除插入的块。 -All the replicas in the quorum are consistent, i.e., they contain data from all previous `INSERT` queries. The `INSERT` sequence is linearized. +仲裁中的所有副本都是一致的,即它们包含来自所有以前的数据 `INSERT` 查询。 该 `INSERT` 序列线性化。 -When reading the data written from the `insert_quorum`, you can use the [select\_sequential\_consistency](#settings-select_sequential_consistency) option. +当读取从写入的数据 `insert_quorum`,您可以使用 [select\_sequential\_consistency](#settings-select_sequential_consistency) 选项。 -ClickHouse generates an exception +ClickHouse生成异常 -- If the number of available replicas at the time of the query is less than the `insert_quorum`. -- At an attempt to write data when the previous block has not yet been inserted in the `insert_quorum` of replicas. This situation may occur if the user tries to perform an `INSERT` before the previous one with the `insert_quorum` is completed. +- 如果查询时可用副本的数量小于 `insert_quorum`. +- 在尝试写入数据时,以前的块尚未被插入 `insert_quorum` 的复制品。 如果用户尝试执行 `INSERT` 前一个与 `insert_quorum` 完成。 -See also: +另请参阅: - [insert\_quorum\_timeout](#settings-insert_quorum_timeout) - [select\_sequential\_consistency](#settings-select_sequential_consistency) ## insert\_quorum\_timeout {#settings-insert_quorum-timeout} -Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. +写入仲裁超时以秒为单位。 如果超时已经过去,并且还没有发生写入,ClickHouse将生成异常,客户端必须重复查询以将相同的块写入相同的副本或任何其他副本。 -Default value: 60 seconds. +默认值:60秒。 -See also: +另请参阅: - [insert\_quorum](#settings-insert_quorum) - [select\_sequential\_consistency](#settings-select_sequential_consistency) ## select\_sequential\_consistency {#settings-select_sequential_consistency} -Enables or disables sequential consistency for `SELECT` queries: +启用或禁用顺序一致性 `SELECT` 查询: -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 0. +默认值:0。 -Usage +用途 -When sequential consistency is enabled, ClickHouse allows the client to execute the `SELECT` query only for those replicas that contain data from all previous `INSERT` queries executed with `insert_quorum`. If the client refers to a partial replica, ClickHouse will generate an exception. The SELECT query will not include data that has not yet been written to the quorum of replicas. +当启用顺序一致性时,ClickHouse允许客户端执行 `SELECT` 仅查询那些包含来自所有先前数据的副本 `INSERT` 查询执行 `insert_quorum`. 如果客户端引用了部分副本,ClickHouse将生成异常。 SELECT查询将不包括尚未写入副本仲裁的数据。 -See also: +另请参阅: - [insert\_quorum](#settings-insert_quorum) - [insert\_quorum\_timeout](#settings-insert_quorum_timeout) ## insert\_deduplicate {#settings-insert-deduplicate} -Enables or disables block deduplication of `INSERT` (for Replicated\* tables). +启用或禁用块重复数据删除 `INSERT` (对于复制的\*表)。 -Possible values: +可能的值: - 0 — Disabled. - 1 — Enabled. -Default value: 1. +默认值:1。 -By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see \[Data Replication\] (../ table\_engines/replication.md). +默认情况下,块插入到复制的表 `INSERT` 重复数据删除语句(请参阅\[数据复制\](../engines/table\_engines/mergetree\_family/replication.md)。 ## deduplicate\_blocks\_in\_dependent\_materialized\_views {#settings-deduplicate-blocks-in-dependent-materialized-views} -Enables or disables the deduplication check for materialized views that receive data from Replicated\* tables. +启用或禁用从已复制\*表接收数据的实例化视图的重复数据删除检查。 -Possible values: +可能的值: 0 — Disabled. 1 — Enabled. -Default value: 0. +默认值:0。 -Usage +用途 -By default, deduplication is not performed for materialized views but is done upstream, in the source table. -If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behaviour exists to enable insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table. -At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform deduplication check by itself, -ignoring check result for the source table, and will insert rows lost because of the first failure. +默认情况下,重复数据删除不对实例化视图执行,而是在源表的上游执行。 +如果由于源表中的重复数据删除而跳过了插入的块,则不会插入附加的实例化视图。 这种行为的存在是为了允许将高度聚合的数据插入到实例化视图中,对于在实例化视图聚合之后插入的块相同,但是从源表中的不同插入派生的情况。 +与此同时,这种行为 “breaks” `INSERT` 幂等性 如果一个 `INSERT` 进入主表是成功的, `INSERT` into a materialized view failed (e.g. because of communication failure with Zookeeper) a client will get an error and can retry the operation. However, the materialized view won't receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` 允许改变这种行为。 重试时,实例化视图将收到重复插入,并自行执行重复数据删除检查, +忽略源表的检查结果,并将插入由于第一次失败而丢失的行。 ## max\_network\_bytes {#settings-max-network-bytes} -Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query. +限制在执行查询时通过网络接收或传输的数据量(以字节为单位)。 此设置适用于每个单独的查询。 -Possible values: +可能的值: -- Positive integer. +- 整数。 - 0 — Data volume control is disabled. -Default value: 0. +默认值:0。 ## max\_network\_bandwidth {#settings-max-network-bandwidth} -Limits the speed of the data exchange over the network in bytes per second. This setting applies to every query. +限制通过网络进行数据交换的速度,以每秒字节为单位。 此设置适用于每个查询。 -Possible values: +可能的值: -- Positive integer. +- 整数。 - 0 — Bandwidth control is disabled. -Default value: 0. +默认值:0。 ## max\_network\_bandwidth\_for\_user {#settings-max-network-bandwidth-for-user} -Limits the speed of the data exchange over the network in bytes per second. This setting applies to all concurrently running queries performed by a single user. +限制通过网络进行数据交换的速度,以每秒字节为单位。 此设置适用于单个用户执行的所有并发运行的查询。 -Possible values: +可能的值: -- Positive integer. +- 整数。 - 0 — Control of the data speed is disabled. -Default value: 0. +默认值:0。 ## max\_network\_bandwidth\_for\_all\_users {#settings-max-network-bandwidth-for-all-users} -Limits the speed that data is exchanged at over the network in bytes per second. This setting applies to all concurrently running queries on the server. +限制通过网络交换数据的速度,以每秒字节为单位。 此设置适用于服务器上同时运行的所有查询。 -Possible values: +可能的值: -- Positive integer. +- 整数。 - 0 — Control of the data speed is disabled. -Default value: 0. +默认值:0。 ## count\_distinct\_implementation {#settings-count_distinct_implementation} -Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../query_language/agg_functions/reference.md#agg_function-count) construction. +指定其中的 `uniq*` 函数应用于执行 [COUNT(DISTINCT …)](../../sql_reference/aggregate_functions/reference.md#agg_function-count) 建筑。 -Possible values: +可能的值: -- [uniq](../../query_language/agg_functions/reference.md#agg_function-uniq) -- [uniqCombined](../../query_language/agg_functions/reference.md#agg_function-uniqcombined) -- [uniqCombined64](../../query_language/agg_functions/reference.md#agg_function-uniqcombined64) -- [uniqHLL12](../../query_language/agg_functions/reference.md#agg_function-uniqhll12) -- [uniqExact](../../query_language/agg_functions/reference.md#agg_function-uniqexact) +- [uniq](../../sql_reference/aggregate_functions/reference.md#agg_function-uniq) +- [uniqCombined](../../sql_reference/aggregate_functions/reference.md#agg_function-uniqcombined) +- [uniqCombined64](../../sql_reference/aggregate_functions/reference.md#agg_function-uniqcombined64) +- [uniqHLL12](../../sql_reference/aggregate_functions/reference.md#agg_function-uniqhll12) +- [uniqExact](../../sql_reference/aggregate_functions/reference.md#agg_function-uniqexact) -Default value: `uniqExact`. +默认值: `uniqExact`. ## skip\_unavailable\_shards {#settings-skip_unavailable_shards} -Enables or disables silently skipping of unavailable shards. +启用或禁用静默跳过不可用分片。 -Shard is considered unavailable if all its replicas are unavailable. A replica is unavailable in the following cases: +如果分片的所有副本都不可用,则视为不可用。 副本在以下情况下不可用: -- ClickHouse can’t connect to replica for any reason. +- ClickHouse出于任何原因无法连接到副本。 - When connecting to a replica, ClickHouse performs several attempts. If all these attempts fail, the replica is considered unavailable. + 连接到副本时,ClickHouse会执行多次尝试。 如果所有这些尝试都失败,则认为副本不可用。 -- Replica can’t be resolved through DNS. +- 副本无法通过DNS解析。 - If replica’s hostname can’t be resolved through DNS, it can indicate the following situations: + 如果无法通过DNS解析副本的主机名,则可能指示以下情况: - - Replica’s host has no DNS record. It can occur in systems with dynamic DNS, for example, [Kubernetes](https://kubernetes.io), where nodes can be unresolvable during downtime, and this is not an error. + - 副本的主机没有DNS记录。 它可以发生在具有动态DNS的系统中,例如, [Kubernetes](https://kubernetes.io),其中节点在停机期间可能无法解决问题,这不是错误。 - - Configuration error. ClickHouse configuration file contains a wrong hostname. + - 配置错误。 ClickHouse配置文件包含错误的主机名。 -Possible values: +可能的值: - 1 — skipping enabled. - If a shard is unavailable, ClickHouse returns a result based on partial data and doesn’t report node availability issues. + 如果分片不可用,ClickHouse将基于部分数据返回结果,并且不报告节点可用性问题。 - 0 — skipping disabled. - If a shard is unavailable, ClickHouse throws an exception. + 如果分片不可用,ClickHouse将引发异常。 -Default value: 0. +默认值:0。 ## optimize\_skip\_unused\_shards {#settings-optimize_skip_unused_shards} -Enables or disables skipping of unused shards for SELECT queries that have sharding key condition in PREWHERE/WHERE (assumes that the data is distributed by sharding key, otherwise do nothing). +对于在PREWHERE/WHERE中具有分片键条件的SELECT查询,启用或禁用跳过未使用的分片(假定数据是通过分片键分发的,否则不执行任何操作)。 -Default value: 0 +默认值:0 ## force\_optimize\_skip\_unused\_shards {#settings-force_optimize_skip_unused_shards} -Enables or disables query execution if [`optimize_skip_unused_shards`](#settings-optimize_skip_unused_shards) enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled exception will be thrown. +在以下情况下启用或禁用查询执行 [`optimize_skip_unused_shards`](#settings-optimize_skip_unused_shards) 无法启用和跳过未使用的分片。 如果跳过是不可能的,并且设置为启用异常将被抛出。 -Possible values: +可能的值: -- 0 - Disabled (do not throws) -- 1 - Disable query execution only if the table has sharding key -- 2 - Disable query execution regardless sharding key is defined for the table +- 0-禁用(不抛出) +- 1-仅当表具有分片键时禁用查询执行 +- 2-无论为表定义了分片键,都禁用查询执行 -Default value: 0 +默认值:0 ## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested} -Reset [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) for nested `Distributed` table +重置 [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) 对于嵌套 `Distributed` 表 -Possible values: +可能的值: - 1 — Enabled. - 0 — Disabled. -Default value: 0. +默认值:0。 ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} -Enables or disables throwing an exception if an [OPTIMIZE](../../query_language/misc.md#misc_operations-optimize) query didn’t perform a merge. +启用或禁用抛出异常,如果 [OPTIMIZE](../../sql_reference/statements/misc.md#misc_operations-optimize) 查询未执行合并。 -By default, `OPTIMIZE` returns successfully even if it didn’t do anything. This setting lets you differentiate these situations and get the reason in an exception message. +默认情况下, `OPTIMIZE` 即使它没有做任何事情,也会成功返回。 此设置允许您区分这些情况并在异常消息中获取原因。 -Possible values: +可能的值: - 1 — Throwing an exception is enabled. - 0 — Throwing an exception is disabled. -Default value: 0. +默认值:0。 ## distributed\_replica\_error\_half\_life {#settings-distributed_replica_error_half_life} -- Type: seconds -- Default value: 60 seconds +- 类型:秒 +- 默认值:60秒 -Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed\_replica\_error\_half\_life is set to 1 second, then the replica is considered normal 3 seconds after last error. +控制清零分布式表中的错误的速度。 如果某个副本在一段时间内不可用,累计出现5个错误,并且distributed\_replica\_error\_half\_life设置为1秒,则该副本在上一个错误发生3秒后视为正常。 -See also: +另请参阅: -- [Table engine Distributed](../../operations/table_engines/distributed.md) +- [表引擎分布式](../../engines/table_engines/special/distributed.md) - [distributed\_replica\_error\_cap](#settings-distributed_replica_error_cap) ## distributed\_replica\_error\_cap {#settings-distributed_replica_error_cap} -- Type: unsigned int -- Default value: 1000 +- 类型:无符号int +- 默认值:1000 -Error count of each replica is capped at this value, preventing a single replica from accumulating too many errors. +每个副本的错误计数上限为此值,从而防止单个副本累积太多错误。 -See also: +另请参阅: -- [Table engine Distributed](../../operations/table_engines/distributed.md) +- [表引擎分布式](../../engines/table_engines/special/distributed.md) - [distributed\_replica\_error\_half\_life](#settings-distributed_replica_error_half_life) ## distributed\_directory\_monitor\_sleep\_time\_ms {#distributed_directory_monitor_sleep_time_ms} -Base interval for the [Distributed](../table_engines/distributed.md) table engine to send data. The actual interval grows exponentially in the event of errors. +对于基本间隔 [分布](../../engines/table_engines/special/distributed.md) 表引擎发送数据。 在发生错误时,实际间隔呈指数级增长。 -Possible values: +可能的值: -- A positive integer number of milliseconds. +- 毫秒的正整数。 -Default value: 100 milliseconds. +默认值:100毫秒。 ## distributed\_directory\_monitor\_max\_sleep\_time\_ms {#distributed_directory_monitor_max_sleep_time_ms} -Maximum interval for the [Distributed](../table_engines/distributed.md) table engine to send data. Limits exponential growth of the interval set in the [distributed\_directory\_monitor\_sleep\_time\_ms](#distributed_directory_monitor_sleep_time_ms) setting. +的最大间隔 [分布](../../engines/table_engines/special/distributed.md) 表引擎发送数据。 限制在设置的区间的指数增长 [distributed\_directory\_monitor\_sleep\_time\_ms](#distributed_directory_monitor_sleep_time_ms) 设置。 -Possible values: +可能的值: -- A positive integer number of milliseconds. +- 毫秒的正整数。 -Default value: 30000 milliseconds (30 seconds). +默认值:30000毫秒(30秒)。 ## distributed\_directory\_monitor\_batch\_inserts {#distributed_directory_monitor_batch_inserts} -Enables/disables sending of inserted data in batches. +启用/禁用批量发送插入的数据。 -When batch sending is enabled, the [Distributed](../table_engines/distributed.md) table engine tries to send multiple files of inserted data in one operation instead of sending them separately. Batch sending improves cluster performance by better-utilizing server and network resources. +当批量发送被启用时, [分布](../../engines/table_engines/special/distributed.md) 表引擎尝试在一个操作中发送插入数据的多个文件,而不是单独发送它们。 批量发送通过更好地利用服务器和网络资源来提高集群性能。 -Possible values: +可能的值: - 1 — Enabled. - 0 — Disabled. -Default value: 0. +默认值:0。 ## os\_thread\_priority {#setting-os-thread-priority} -Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core. +设置优先级 ([不错](https://en.wikipedia.org/wiki/Nice_(Unix)))对于执行查询的线程。 当选择要在每个可用CPU内核上运行的下一个线程时,操作系统调度程序会考虑此优先级。 -!!! warning "Warning" - To use this setting, you need to set the `CAP_SYS_NICE` capability. The `clickhouse-server` package sets it up during installation. Some virtual environments don’t allow you to set the `CAP_SYS_NICE` capability. In this case, `clickhouse-server` shows a message about it at the start. +!!! warning "警告" + 要使用此设置,您需要设置 `CAP_SYS_NICE` 能力。 该 `clickhouse-server` 软件包在安装过程中设置它。 某些虚拟环境不允许您设置 `CAP_SYS_NICE` 能力。 在这种情况下, `clickhouse-server` 在开始时显示关于它的消息。 -Possible values: +可能的值: -- You can set values in the range `[-20, 19]`. +- 您可以在范围内设置值 `[-20, 19]`. -Lower values mean higher priority. Threads with low `nice` priority values are executed more frequently than threads with high values. High values are preferable for long-running non-interactive queries because it allows them to quickly give up resources in favour of short interactive queries when they arrive. +值越低意味着优先级越高。 低螺纹 `nice` 与具有高值的线程相比,优先级值的执行频率更高。 高值对于长时间运行的非交互式查询更为可取,因为这使得它们可以在到达时快速放弃资源,转而使用短交互式查询。 -Default value: 0. +默认值:0。 ## query\_profiler\_real\_time\_period\_ns {#query_profiler_real_time_period_ns} -Sets the period for a real clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). Real clock timer counts wall-clock time. +设置周期的实时时钟定时器 [查询探查器](../../operations/optimizing_performance/sampling_query_profiler.md). 真正的时钟计时器计数挂钟时间。 -Possible values: +可能的值: -- Positive integer number, in nanoseconds. +- 正整数,以纳秒为单位。 - Recommended values: + 推荐值: - 10000000 (100 times a second) nanoseconds and less for single queries. - 1000000000 (once a second) for cluster-wide profiling. -- 0 for turning off the timer. +- 0用于关闭计时器。 -Type: [UInt64](../../data_types/int_uint.md). +类型: [UInt64](../../sql_reference/data_types/int_uint.md). -Default value: 1000000000 nanoseconds (once a second). +默认值:1000000000纳秒(每秒一次)。 -See also: +另请参阅: -- System table [trace\_log](../system_tables.md#system_tables-trace_log) +- 系统表 [trace\_log](../../operations/system_tables.md#system_tables-trace_log) ## query\_profiler\_cpu\_time\_period\_ns {#query_profiler_cpu_time_period_ns} -Sets the period for a CPU clock timer of the [query profiler](../../operations/performance/sampling_query_profiler.md). This timer counts only CPU time. +设置周期的CPU时钟定时器 [查询探查器](../../operations/optimizing_performance/sampling_query_profiler.md). 此计时器仅计算CPU时间。 -Possible values: +可能的值: -- A positive integer number of nanoseconds. +- 纳秒的正整数。 - Recommended values: + 推荐值: - 10000000 (100 times a second) nanoseconds and more for single queries. - 1000000000 (once a second) for cluster-wide profiling. -- 0 for turning off the timer. +- 0用于关闭计时器。 -Type: [UInt64](../../data_types/int_uint.md). +类型: [UInt64](../../sql_reference/data_types/int_uint.md). -Default value: 1000000000 nanoseconds. +默认值:1000000000纳秒。 -See also: +另请参阅: -- System table [trace\_log](../system_tables.md#system_tables-trace_log) +- 系统表 [trace\_log](../../operations/system_tables.md#system_tables-trace_log) ## allow\_introspection\_functions {#settings-allow_introspection_functions} -Enables of disables [introspections functions](../../query_language/functions/introspection.md) for query profiling. +启用禁用 [反省函数](../../sql_reference/functions/introspection.md) 用于查询分析。 -Possible values: +可能的值: - 1 — Introspection functions enabled. - 0 — Introspection functions disabled. -Default value: 0. +默认值:0。 -**See Also** +**另请参阅** -- [Sampling Query Profiler](../performance/sampling_query_profiler.md) -- System table [trace\_log](../system_tables.md#system_tables-trace_log) +- [采样查询探查器](../optimizing_performance/sampling_query_profiler.md) +- 系统表 [trace\_log](../../operations/system_tables.md#system_tables-trace_log) ## input\_format\_parallel\_parsing {#input-format-parallel-parsing} -- Type: bool -- Default value: True +- 类型:布尔 +- 默认值:True -Enable order-preserving parallel parsing of data formats. Supported only for TSV, TKSV, CSV and JSONEachRow formats. +启用数据格式的保序并行分析。 仅支持TSV,TKSV,CSV和JSONEachRow格式。 ## min\_chunk\_bytes\_for\_parallel\_parsing {#min-chunk-bytes-for-parallel-parsing} -- Type: unsigned int -- Default value: 1 MiB +- 类型:无符号int +- 默认值:1MiB -The minimum chunk size in bytes, which each thread will parse in parallel. +以字节为单位的最小块大小,每个线程将并行解析。 ## output\_format\_avro\_codec {#settings-output_format_avro_codec} -Sets the compression codec used for output Avro file. +设置用于输出Avro文件的压缩编解ec。 -Type: string +类型:字符串 -Possible values: +可能的值: - `null` — No compression - `deflate` — Compress with Deflate (zlib) -- `snappy` — Compress with [Snappy](https://google.github.io/snappy/) +- `snappy` — Compress with [活泼的](https://google.github.io/snappy/) -Default value: `snappy` (if available) or `deflate`. +默认值: `snappy` (如果可用)或 `deflate`. ## output\_format\_avro\_sync\_interval {#settings-output_format_avro_sync_interval} -Sets minimum data size (in bytes) between synchronization markers for output Avro file. +设置输出Avro文件的同步标记之间的最小数据大小(以字节为单位)。 -Type: unsigned int +类型:无符号int -Possible values: 32 (32 bytes) - 1073741824 (1 GiB) +可能的值:32(32字节)-1073741824(1GiB) -Default value: 32768 (32 KiB) +默认值:32768(32KiB) ## format\_avro\_schema\_registry\_url {#settings-format_avro_schema_registry_url} -Sets Confluent Schema Registry URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format +设置要与之一起使用的汇合架构注册表URL [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) 格式 -Type: URL +类型:网址 -Default value: Empty +默认值:空 -[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) +[原始文章](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/zh/operations/settings/settings_profiles.md b/docs/zh/operations/settings/settings_profiles.md index 3c694c0889e..21379a65ebc 100644 --- a/docs/zh/operations/settings/settings_profiles.md +++ b/docs/zh/operations/settings/settings_profiles.md @@ -1,23 +1,26 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 61 +toc_title: "\u8BBE\u7F6E\u914D\u7F6E\u6587\u4EF6" --- -# Settings Profiles {#settings-profiles} +# 设置配置文件 {#settings-profiles} -A settings profile is a collection of settings grouped under the same name. Each ClickHouse user has a profile. -To apply all the settings in a profile, set the `profile` setting. +设置配置文件是以相同名称分组的设置的集合。 每个ClickHouse用户都有一个配置文件。 +要应用配置文件中的所有设置,请设置 `profile` 设置。 -Example: +示例: -Install the `web` profile. +安装 `web` 侧写 ``` sql SET profile = 'web' ``` -Settings profiles are declared in the user config file. This is usually `users.xml`. +设置配置文件在用户配置文件中声明。 这通常是 `users.xml`. -Example: +示例: ``` xml @@ -61,8 +64,8 @@ Example: ``` -The example specifies two profiles: `default` and `web`. The `default` profile has a special purpose: it must always be present and is applied when starting the server. In other words, the `default` profile contains default settings. The `web` profile is a regular profile that can be set using the `SET` query or using a URL parameter in an HTTP query. +该示例指定了两个配置文件: `default` 和 `web`. 该 `default` 配置文件有一个特殊用途:它必须始终存在并在启动服务器时应用。 换句话说, `default` 配置文件包含默认设置。 该 `web` 配置文件是一个常规的配置文件,可以使用设置 `SET` 查询或在HTTP查询中使用URL参数。 -Settings profiles can inherit from each other. To use inheritance, indicate one or multiple `profile` settings before the other settings that are listed in the profile. In case when one setting is defined in different profiles, the latest defined is used. +设置配置文件可以彼此继承。 要使用继承,请指示一个或多个 `profile` 配置文件中列出的其他设置之前的设置。 如果在不同的配置文件中定义了一个设置,则使用最新定义。 -[Original article](https://clickhouse.tech/docs/en/operations/settings/settings_profiles/) +[原始文章](https://clickhouse.tech/docs/en/operations/settings/settings_profiles/) diff --git a/docs/zh/operations/settings/settings_users.md b/docs/zh/operations/settings/settings_users.md index 8b852380f5b..2dba689d08f 100644 --- a/docs/zh/operations/settings/settings_users.md +++ b/docs/zh/operations/settings/settings_users.md @@ -1,12 +1,15 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 63 +toc_title: "\u7528\u6237\u8BBE\u7F6E" --- -# User Settings {#user-settings} +# 用户设置 {#user-settings} -The `users` section of the `user.xml` configuration file contains user settings. +该 `users` 一节 `user.xml` 配置文件包含用户设置。 -Structure of the `users` section: +的结构 `users` 科: ``` xml @@ -35,74 +38,74 @@ Structure of the `users` section: ``` -### user\_name/password {#user-namepassword} +### 用户名称/密码 {#user-namepassword} -Password can be specified in plaintext or in SHA256 (hex format). +密码可以以明文或SHA256(十六进制格式)指定。 -- To assign a password in plaintext (**not recommended**), place it in a `password` element. +- 以明文形式分配密码 (**不推荐**),把它放在一个 `password` 元素。 - For example, `qwerty`. The password can be left blank. + 例如, `qwerty`. 密码可以留空。 -- To assign a password using its SHA256 hash, place it in a `password_sha256_hex` element. +- 要使用其SHA256散列分配密码,请将其放置在 `password_sha256_hex` 元素。 - For example, `65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5`. + 例如, `65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5`. - Example of how to generate a password from shell: + 如何从shell生成密码的示例: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-' - The first line of the result is the password. The second line is the corresponding SHA256 hash. + 结果的第一行是密码。 第二行是相应的SHA256哈希。 -- For compatibility with MySQL clients, password can be specified in double SHA1 hash. Place it in `password_double_sha1_hex` element. +- 为了与MySQL客户端兼容,密码可以在双SHA1哈希中指定。 放进去 `password_double_sha1_hex` 元素。 - For example, `08b4a0f1de6ad37da17359e592c8d74788a83eb0`. + 例如, `08b4a0f1de6ad37da17359e592c8d74788a83eb0`. - Example of how to generate a password from shell: + 如何从shell生成密码的示例: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha1sum | tr -d '-' | xxd -r -p | sha1sum | tr -d '-' - The first line of the result is the password. The second line is the corresponding double SHA1 hash. + 结果的第一行是密码。 第二行是相应的双SHA1哈希。 -### user\_name/networks {#user-namenetworks} +### 用户名称/网络 {#user-namenetworks} -List of networks from which the user can connect to the ClickHouse server. +用户可以从中连接到ClickHouse服务器的网络列表。 -Each element of the list can have one of the following forms: +列表中的每个元素都可以具有以下形式之一: - `` — IP address or network mask. - Examples: `213.180.204.3`, `10.0.0.1/8`, `10.0.0.1/255.255.255.0`, `2a02:6b8::3`, `2a02:6b8::3/64`, `2a02:6b8::3/ffff:ffff:ffff:ffff::`. + 例: `213.180.204.3`, `10.0.0.1/8`, `10.0.0.1/255.255.255.0`, `2a02:6b8::3`, `2a02:6b8::3/64`, `2a02:6b8::3/ffff:ffff:ffff:ffff::`. - `` — Hostname. - Example: `example01.host.ru`. + 示例: `example01.host.ru`. - To check access, a DNS query is performed, and all returned IP addresses are compared to the peer address. + 要检查访问,将执行DNS查询,并将所有返回的IP地址与对等地址进行比较。 - `` — Regular expression for hostnames. - Example, `^example\d\d-\d\d-\d\.host\.ru$` + 示例, `^example\d\d-\d\d-\d\.host\.ru$` - To check access, a [DNS PTR query](https://en.wikipedia.org/wiki/Reverse_DNS_lookup) is performed for the peer address and then the specified regexp is applied. Then, another DNS query is performed for the results of the PTR query and all the received addresses are compared to the peer address. We strongly recommend that regexp ends with $. + 要检查访问,a [DNS PTR查询](https://en.wikipedia.org/wiki/Reverse_DNS_lookup) 对对等体地址执行,然后应用指定的正则表达式。 然后,对PTR查询的结果执行另一个DNS查询,并将所有接收到的地址与对等地址进行比较。 我们强烈建议正则表达式以$结尾。 -All results of DNS requests are cached until the server restarts. +DNS请求的所有结果都将被缓存,直到服务器重新启动。 -**Examples** +**例** -To open access for user from any network, specify: +要从任何网络打开用户的访问权限,请指定: ``` xml ::/0 ``` -!!! warning "Warning" - It’s insecure to open access from any network unless you have a firewall properly configured or the server is not directly connected to Internet. +!!! warning "警告" + 从任何网络开放访问是不安全的,除非你有一个防火墙正确配置或服务器没有直接连接到互联网。 -To open access only from localhost, specify: +若要仅从本地主机打开访问权限,请指定: ``` xml ::1 @@ -111,22 +114,22 @@ To open access only from localhost, specify: ### user\_name/profile {#user-nameprofile} -You can assign a settings profile for the user. Settings profiles are configured in a separate section of the `users.xml` file. For more information, see [Profiles of Settings](settings_profiles.md). +您可以为用户分配设置配置文件。 设置配置文件在单独的部分配置 `users.xml` 文件 有关详细信息,请参阅 [设置配置文件](settings_profiles.md). -### user\_name/quota {#user-namequota} +### 用户名称/配额 {#user-namequota} -Quotas allow you to track or limit resource usage over a period of time. Quotas are configured in the `quotas` -section of the `users.xml` configuration file. +配额允许您在一段时间内跟踪或限制资源使用情况。 配额在配置 `quotas` +一节 `users.xml` 配置文件。 -You can assign a quotas set for the user. For a detailed description of quotas configuration, see [Quotas](../quotas.md#quotas). +您可以为用户分配配额。 有关配额配置的详细说明,请参阅 [配额](../quotas.md#quotas). -### user\_name/databases {#user-namedatabases} +### 用户名/数据库 {#user-namedatabases} -In this section, you can you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security. +在本节中,您可以限制ClickHouse返回的行 `SELECT` 由当前用户进行的查询,从而实现基本的行级安全性。 -**Example** +**示例** -The following configuration forces that user `user1` can only see the rows of `table1` as the result of `SELECT` queries, where the value of the `id` field is 1000. +以下配置强制该用户 `user1` 只能看到的行 `table1` 作为结果 `SELECT` 查询,其中的值 `id` 场是1000。 ``` xml @@ -140,6 +143,6 @@ The following configuration forces that user `user1` can only see the rows of `t ``` -The `filter` can be any expression resulting in a [UInt8](../../data_types/int_uint.md)-type value. It usually contains comparisons and logical operators. Rows from `database_name.table1` where filter results to 0 are not returned for this user. The filtering is incompatible with `PREWHERE` operations and disables `WHERE→PREWHERE` optimization. +该 `filter` 可以是导致任何表达式 [UInt8](../../sql_reference/data_types/int_uint.md)-键入值。 它通常包含比较和逻辑运算符。 从行 `database_name.table1` 其中,不会为此用户返回为0的筛选结果。 过滤是不兼容的 `PREWHERE` 操作和禁用 `WHERE→PREWHERE` 优化。 -[Original article](https://clickhouse.tech/docs/en/operations/settings/settings_users/) +[原始文章](https://clickhouse.tech/docs/en/operations/settings/settings_users/) diff --git a/docs/zh/operations/system_tables.md b/docs/zh/operations/system_tables.md index af47b99222a..ba762ddb562 100644 --- a/docs/zh/operations/system_tables.md +++ b/docs/zh/operations/system_tables.md @@ -1,25 +1,28 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 52 +toc_title: "\u7CFB\u7EDF\u8868" --- -# System tables {#system-tables} +# 系统表 {#system-tables} -System tables are used for implementing part of the system’s functionality, and for providing access to information about how the system is working. -You can’t delete a system table (but you can perform DETACH). -System tables don’t have files with data on the disk or files with metadata. The server creates all the system tables when it starts. -System tables are read-only. -They are located in the ‘system’ database. +系统表用于实现系统的部分功能,并提供对有关系统如何工作的信息的访问。 +您无法删除系统表(但可以执行分离)。 +系统表没有包含磁盘上数据的文件或包含元数据的文件。 服务器在启动时创建所有系统表。 +系统表是只读的。 +它们位于 ‘system’ 数据库。 -## system.asynchronous\_metrics {#system_tables-asynchronous_metrics} +## 系统。asynchronous\_metrics {#system_tables-asynchronous_metrics} -Contains metrics that are calculated periodically in the background. For example, the amount of RAM in use. +包含在后台定期计算的指标。 例如,在使用的RAM量。 -Columns: +列: -- `metric` ([String](../data_types/string.md)) — Metric name. -- `value` ([Float64](../data_types/float.md)) — Metric value. +- `metric` ([字符串](../sql_reference/data_types/string.md)) — Metric name. +- `value` ([Float64](../sql_reference/data_types/float.md)) — Metric value. -**Example** +**示例** ``` sql SELECT * FROM system.asynchronous_metrics LIMIT 10 @@ -40,18 +43,18 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 └─────────────────────────────────────────┴────────────┘ ``` -**See Also** +**另请参阅** -- [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. -- [system.metrics](#system_tables-metrics) — Contains instantly calculated metrics. -- [system.events](#system_tables-events) — Contains a number of events that have occurred. -- [system.metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [监测](monitoring.md) — Base concepts of ClickHouse monitoring. +- [系统。指标](#system_tables-metrics) — Contains instantly calculated metrics. +- [系统。活动](#system_tables-events) — Contains a number of events that have occurred. +- [系统。metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -## system.clusters {#system-clusters} +## 系统。集群 {#system-clusters} -Contains information about clusters available in the config file and the servers in them. +包含有关配置文件中可用的集群及其中的服务器的信息。 -Columns: +列: - `cluster` (String) — The cluster name. - `shard_num` (UInt32) — The shard number in the cluster, starting from 1. @@ -61,30 +64,30 @@ Columns: - `host_address` (String) — The host IP address obtained from DNS. - `port` (UInt16) — The port to use for connecting to the server. - `user` (String) — The name of the user for connecting to the server. -- `errors_count` (UInt32) - number of times this host failed to reach replica. -- `estimated_recovery_time` (UInt32) - seconds left until replica error count is zeroed and it is considered to be back to normal. +- `errors_count` (UInt32)-此主机无法到达副本的次数。 +- `estimated_recovery_time` (UInt32)-剩下的秒数,直到副本错误计数归零,它被认为是恢复正常。 -Please note that `errors_count` is updated once per query to the cluster, but `estimated_recovery_time` is recalculated on-demand. So there could be a case of non-zero `errors_count` and zero `estimated_recovery_time`, that next query will zero `errors_count` and try to use replica as if it has no errors. +请注意 `errors_count` 每个查询集群更新一次,但 `estimated_recovery_time` 按需重新计算。 所以有可能是非零的情况 `errors_count` 和零 `estimated_recovery_time`,下一个查询将为零 `errors_count` 并尝试使用副本,就好像它没有错误。 -**See also** +**另请参阅** -- [Table engine Distributed](table_engines/distributed.md) -- [distributed\_replica\_error\_cap setting](settings/settings.md#settings-distributed_replica_error_cap) -- [distributed\_replica\_error\_half\_life setting](settings/settings.md#settings-distributed_replica_error_half_life) +- [表引擎分布式](../engines/table_engines/special/distributed.md) +- [distributed\_replica\_error\_cap设置](settings/settings.md#settings-distributed_replica_error_cap) +- [distributed\_replica\_error\_half\_life设置](settings/settings.md#settings-distributed_replica_error_half_life) -## system.columns {#system-columns} +## 系统。列 {#system-columns} -Contains information about columns in all the tables. +包含有关所有表中列的信息。 -You can use this table to get information similar to the [DESCRIBE TABLE](../query_language/misc.md#misc-describe-table) query, but for multiple tables at once. +您可以使用此表获取类似于以下内容的信息 [DESCRIBE TABLE](../sql_reference/statements/misc.md#misc-describe-table) 查询,但对于多个表一次。 -The `system.columns` table contains the following columns (the column type is shown in brackets): +该 `system.columns` 表包含以下列(列类型显示在括号中): - `database` (String) — Database name. - `table` (String) — Table name. - `name` (String) — Column name. - `type` (String) — Column type. -- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. +- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`)为默认值,如果没有定义,则为空字符串。 - `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. - `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. - `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. @@ -95,15 +98,15 @@ The `system.columns` table contains the following columns (the column type is sh - `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. - `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. -## system.contributors {#system-contributors} +## 系统。贡献者 {#system-contributors} -Contains information about contributors. All constributors in random order. The order is random at query execution time. +包含有关贡献者的信息。 按随机顺序排列所有构造。 该顺序在查询执行时是随机的。 -Columns: +列: - `name` (String) — Contributor (author) name from git log. -**Example** +**示例** ``` sql SELECT * FROM system.contributors LIMIT 10 @@ -124,7 +127,7 @@ SELECT * FROM system.contributors LIMIT 10 └──────────────────┘ ``` -To find out yourself in the table, use a query: +要在表中找出自己,请使用查询: ``` sql SELECT * FROM system.contributors WHERE name='Olga Khvostikova' @@ -136,21 +139,21 @@ SELECT * FROM system.contributors WHERE name='Olga Khvostikova' └──────────────────┘ ``` -## system.databases {#system-databases} +## 系统。数据库 {#system-databases} -This table contains a single String column called ‘name’ – the name of a database. -Each database that the server knows about has a corresponding entry in the table. -This system table is used for implementing the `SHOW DATABASES` query. +此表包含一个名为"字符串"的列 ‘name’ – the name of a database. +服务器知道的每个数据库在表中都有相应的条目。 +该系统表用于实现 `SHOW DATABASES` 查询。 -## system.detached\_parts {#system_tables-detached_parts} +## 系统。detached\_parts {#system_tables-detached_parts} -Contains information about detached parts of [MergeTree](table_engines/mergetree.md) tables. The `reason` column specifies why the part was detached. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../query_language/query_language/alter/#alter_attach-partition) command. For the description of other columns, see [system.parts](#system_tables-parts). If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../query_language/query_language/alter/#alter_drop-detached). +包含有关分离部分的信息 [MergeTree](../engines/table_engines/mergetree_family/mergetree.md) 桌子 该 `reason` 列指定分离部件的原因。 对于用户分离的部件,原因是空的。 这些部件可以附加 [ALTER TABLE ATTACH PARTITION\|PART](../sql_reference/statements/alter.md#alter_attach-partition) 指挥部 有关其他列的说明,请参阅 [系统。零件](#system_tables-parts). 如果部件名称无效,某些列的值可能为 `NULL`. 这些部分可以删除 [ALTER TABLE DROP DETACHED PART](../sql_reference/statements/alter.md#alter_drop-detached). -## system.dictionaries {#system-dictionaries} +## 系统。字典 {#system-dictionaries} -Contains information about external dictionaries. +包含有关外部字典的信息。 -Columns: +列: - `name` (String) — Dictionary name. - `type` (String) — Dictionary type: Flat, Hashed, Cache. @@ -163,22 +166,22 @@ Columns: - `element_count` (UInt64) — The number of items stored in the dictionary. - `load_factor` (Float64) — The percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). - `creation_time` (DateTime) — The time when the dictionary was created or last successfully reloaded. -- `last_exception` (String) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. +- `last_exception` (String) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn't be created. - `source` (String) — Text describing the data source for the dictionary. -Note that the amount of memory used by the dictionary is not proportional to the number of items stored in it. So for flat and cached dictionaries, all the memory cells are pre-assigned, regardless of how full the dictionary actually is. +请注意,字典使用的内存量与其中存储的项目数量不成正比。 因此,对于平面和缓存字典,所有的内存单元都是预先分配的,而不管字典实际上有多满。 -## system.events {#system_tables-events} +## 系统。活动 {#system_tables-events} -Contains information about the number of events that have occurred in the system. For example, in the table, you can find how many `SELECT` queries were processed since the ClickHouse server started. +包含有关系统中发生的事件数的信息。 例如,在表中,您可以找到多少 `SELECT` 自ClickHouse服务器启动以来已处理查询。 -Columns: +列: -- `event` ([String](../data_types/string.md)) — Event name. -- `value` ([UInt64](../data_types/int_uint.md)) — Number of events occurred. -- `description` ([String](../data_types/string.md)) — Event description. +- `event` ([字符串](../sql_reference/data_types/string.md)) — Event name. +- `value` ([UInt64](../sql_reference/data_types/int_uint.md)) — Number of events occurred. +- `description` ([字符串](../sql_reference/data_types/string.md)) — Event description. -**Example** +**示例** ``` sql SELECT * FROM system.events LIMIT 5 @@ -194,43 +197,43 @@ SELECT * FROM system.events LIMIT 5 └───────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -**See Also** +**另请参阅** -- [system.asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.metrics](#system_tables-metrics) — Contains instantly calculated metrics. -- [system.metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -- [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. +- [系统。asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [系统。指标](#system_tables-metrics) — Contains instantly calculated metrics. +- [系统。metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [监测](monitoring.md) — Base concepts of ClickHouse monitoring. -## system.functions {#system-functions} +## 系统。功能 {#system-functions} -Contains information about normal and aggregate functions. +包含有关正常函数和聚合函数的信息。 -Columns: +列: - `name`(`String`) – The name of the function. - `is_aggregate`(`UInt8`) — Whether the function is aggregate. -## system.graphite\_retentions {#system-graphite-retentions} +## 系统。graphite\_retentions {#system-graphite-retentions} -Contains information about parameters [graphite\_rollup](server_settings/settings.md#server_settings-graphite_rollup) which are used in tables with [\*GraphiteMergeTree](table_engines/graphitemergetree.md) engines. +包含有关参数的信息 [graphite\_rollup](server_configuration_parameters/settings.md#server_configuration_parameters-graphite_rollup) 这是在表中使用 [\*GraphiteMergeTree](../engines/table_engines/mergetree_family/graphitemergetree.md) 引擎 -Columns: +列: -- `config_name` (String) - `graphite_rollup` parameter name. -- `regexp` (String) - A pattern for the metric name. -- `function` (String) - The name of the aggregating function. -- `age` (UInt64) - The minimum age of the data in seconds. -- `precision` (UInt64) - How precisely to define the age of the data in seconds. -- `priority` (UInt16) - Pattern priority. -- `is_default` (UInt8) - Whether the pattern is the default. -- `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. -- `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. +- `config_name` (字符串) - `graphite_rollup` 参数名称。 +- `regexp` (String)-指标名称的模式。 +- `function` (String)-聚合函数的名称。 +- `age` (UInt64)-以秒为单位的数据的最小期限。 +- `precision` (UInt64)-如何精确地定义以秒为单位的数据的年龄。 +- `priority` (UInt16)-模式优先级。 +- `is_default` (UInt8)-模式是否为默认值。 +- `Tables.database` (Array(String))-使用数据库表名称的数组 `config_name` 参数。 +- `Tables.table` (Array(String))-使用表名称的数组 `config_name` 参数。 -## system.merges {#system-merges} +## 系统。合并 {#system-merges} -Contains information about merges and part mutations currently in process for tables in the MergeTree family. +包含有关MergeTree系列中表当前正在进行的合并和部件突变的信息。 -Columns: +列: - `database` (String) — The name of the database the table is in. - `table` (String) — Table name. @@ -238,7 +241,7 @@ Columns: - `progress` (Float64) — The percentage of completed work from 0 to 1. - `num_parts` (UInt64) — The number of pieces to be merged. - `result_part_name` (String) — The name of the part that will be formed as the result of merging. -- `is_mutation` (UInt8) - 1 if this process is a part mutation. +- `is_mutation` (UInt8)-1如果这个过程是一个部分突变. - `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. - `total_size_marks` (UInt64) — The total number of marks in the merged parts. - `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. @@ -246,19 +249,19 @@ Columns: - `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. - `rows_written` (UInt64) — Number of rows written. -## system.metrics {#system_tables-metrics} +## 系统。指标 {#system_tables-metrics} -Contains metrics which can be calculated instantly, or have a current value. For example, the number of simultaneously processed queries or the current replica delay. This table is always up to date. +包含可以立即计算或具有当前值的指标。 例如,同时处理的查询的数量或当前副本的延迟。 此表始终是最新的。 -Columns: +列: -- `metric` ([String](../data_types/string.md)) — Metric name. -- `value` ([Int64](../data_types/int_uint.md)) — Metric value. -- `description` ([String](../data_types/string.md)) — Metric description. +- `metric` ([字符串](../sql_reference/data_types/string.md)) — Metric name. +- `value` ([Int64](../sql_reference/data_types/int_uint.md)) — Metric value. +- `description` ([字符串](../sql_reference/data_types/string.md)) — Metric description. -The list of supported metrics you can find in the [dbms/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/Common/CurrentMetrics.cpp) source file of ClickHouse. +支持的指标列表,您可以在 [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) ClickHouse的源文件。 -**Example** +**示例** ``` sql SELECT * FROM system.metrics LIMIT 10 @@ -279,17 +282,17 @@ SELECT * FROM system.metrics LIMIT 10 └────────────────────────────┴───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -**See Also** +**另请参阅** -- [system.asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.events](#system_tables-events) — Contains a number of events that occurred. -- [system.metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -- [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. +- [系统。asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [系统。活动](#system_tables-events) — Contains a number of events that occurred. +- [系统。metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [监测](monitoring.md) — Base concepts of ClickHouse monitoring. -## system.metric\_log {#system_tables-metric_log} +## 系统。metric\_log {#system_tables-metric_log} -Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. -To turn on metrics history collection on `system.metric_log`, create `/etc/clickhouse-server/config.d/metric_log.xml` with following content: +包含表中度量值的历史记录 `system.metrics` 和 `system.events`,定期刷新到磁盘。 +打开指标历史记录收集 `system.metric_log`,创建 `/etc/clickhouse-server/config.d/metric_log.xml` 具有以下内容: ``` xml @@ -302,7 +305,7 @@ To turn on metrics history collection on `system.metric_log`, create `/etc/click ``` -**Example** +**示例** ``` sql SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical; @@ -335,50 +338,50 @@ CurrentMetric_ReplicatedChecks: 0 ... ``` -**See also** +**另请参阅** -- [system.asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.events](#system_tables-events) — Contains a number of events that occurred. -- [system.metrics](#system_tables-metrics) — Contains instantly calculated metrics. -- [Monitoring](monitoring.md) — Base concepts of ClickHouse monitoring. +- [系统。asynchronous\_metrics](#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [系统。活动](#system_tables-events) — Contains a number of events that occurred. +- [系统。指标](#system_tables-metrics) — Contains instantly calculated metrics. +- [监测](monitoring.md) — Base concepts of ClickHouse monitoring. -## system.numbers {#system-numbers} +## 系统。数字 {#system-numbers} -This table contains a single UInt64 column named ‘number’ that contains almost all the natural numbers starting from zero. -You can use this table for tests, or if you need to do a brute force search. -Reads from this table are not parallelized. +此表包含一个名为UInt64的列 ‘number’ 它包含几乎所有从零开始的自然数。 +您可以使用此表进行测试,或者如果您需要进行暴力搜索。 +从此表中读取的内容不是并行的。 -## system.numbers\_mt {#system-numbers-mt} +## 系统。numbers\_mt {#system-numbers-mt} -The same as ‘system.numbers’ but reads are parallelized. The numbers can be returned in any order. -Used for tests. +一样的 ‘system.numbers’ 但读取是并行的。 这些数字可以以任何顺序返回。 +用于测试。 -## system.one {#system-one} +## 系统。一 {#system-one} -This table contains a single row with a single ‘dummy’ UInt8 column containing the value 0. -This table is used if a SELECT query doesn’t specify the FROM clause. -This is similar to the DUAL table found in other DBMSs. +此表包含一行,其中包含一行 ‘dummy’ UInt8列包含值0。 +如果SELECT查询未指定FROM子句,则使用此表。 +这与其他Dbms中的双表类似。 -## system.parts {#system_tables-parts} +## 系统。零件 {#system_tables-parts} -Contains information about parts of [MergeTree](table_engines/mergetree.md) tables. +包含有关的部分信息 [MergeTree](../engines/table_engines/mergetree_family/mergetree.md) 桌子 -Each row describes one data part. +每行描述一个数据部分。 -Columns: +列: -- `partition` (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../query_language/alter.md#query_language_queries_alter) query. +- `partition` (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../sql_reference/statements/alter.md#query_language_queries_alter) 查询。 - Formats: + 格式: - - `YYYYMM` for automatic partitioning by month. - - `any_string` when partitioning manually. + - `YYYYMM` 用于按月自动分区。 + - `any_string` 手动分区时。 - `name` (`String`) – Name of the data part. -- `active` (`UInt8`) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. +- `active` (`UInt8`) – Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's deleted. Inactive data parts remain after merging. -- `marks` (`UInt64`) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint doesn’t work for adaptive granularity). +- `marks` (`UInt64`) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` 通过索引粒度(通常为8192)(此提示不适用于自适应粒度)。 - `rows` (`UInt64`) – The number of rows. @@ -418,7 +421,7 @@ Columns: - `primary_key_bytes_in_memory_allocated` (`UInt64`) – The amount of memory (in bytes) reserved for primary key values. -- `is_frozen` (`UInt8`) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesn’t exist. For more details, see [FREEZE PARTITION](../query_language/alter.md#alter_freeze-partition) +- `is_frozen` (`UInt8`) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesn't exist. For more details, see [FREEZE PARTITION](../sql_reference/statements/alter.md#alter_freeze-partition) - `database` (`String`) – Name of the database. @@ -430,29 +433,29 @@ Columns: - `disk` (`String`) – Name of a disk that stores the data part. -- `hash_of_all_files` (`String`) – [sipHash128](../query_language/functions/hash_functions.md#hash_functions-siphash128) of compressed files. +- `hash_of_all_files` (`String`) – [sipHash128](../sql_reference/functions/hash_functions.md#hash_functions-siphash128) 的压缩文件。 -- `hash_of_uncompressed_files` (`String`) – [sipHash128](../query_language/functions/hash_functions.md#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.). +- `hash_of_uncompressed_files` (`String`) – [sipHash128](../sql_reference/functions/hash_functions.md#hash_functions-siphash128) 未压缩的文件(带标记的文件,索引文件等。). -- `uncompressed_hash_of_compressed_files` (`String`) – [sipHash128](../query_language/functions/hash_functions.md#hash_functions-siphash128) of data in the compressed files as if they were uncompressed. +- `uncompressed_hash_of_compressed_files` (`String`) – [sipHash128](../sql_reference/functions/hash_functions.md#hash_functions-siphash128) 压缩文件中的数据,就好像它们是未压缩的。 - `bytes` (`UInt64`) – Alias for `bytes_on_disk`. - `marks_size` (`UInt64`) – Alias for `marks_bytes`. -## system.part\_log {#system_tables-part-log} +## 系统。part\_log {#system_tables-part-log} -The `system.part_log` table is created only if the [part\_log](server_settings/settings.md#server_settings-part-log) server setting is specified. +该 `system.part_log` 表只有当创建 [part\_log](server_configuration_parameters/settings.md#server_configuration_parameters-part-log) 指定了服务器设置。 -This table contains information about events that occurred with [data parts](table_engines/custom_partitioning_key.md) in the [MergeTree](table_engines/mergetree.md) family tables, such as adding or merging data. +此表包含与以下情况发生的事件有关的信息 [数据部分](../engines/table_engines/mergetree_family/custom_partitioning_key.md) 在 [MergeTree](../engines/table_engines/mergetree_family/mergetree.md) 家庭表,例如添加或合并数据。 -The `system.part_log` table contains the following columns: +该 `system.part_log` 表包含以下列: - `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values: - `NEW_PART` — Inserting of a new data part. - `MERGE_PARTS` — Merging of data parts. - `DOWNLOAD_PART` — Downloading a data part. - - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../query_language/alter.md#alter_detach-partition). + - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../sql_reference/statements/alter.md#alter_detach-partition). - `MUTATE_PART` — Mutating of a data part. - `MOVE_PART` — Moving the data part from the one disk to another one. - `event_date` (Date) — Event date. @@ -461,7 +464,7 @@ The `system.part_log` table contains the following columns: - `database` (String) — Name of the database the data part is in. - `table` (String) — Name of the table the data part is in. - `part_name` (String) — Name of the data part. -- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`. +- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ 值,如果分区是由 `tuple()`. - `rows` (UInt64) — The number of rows in the data part. - `size_in_bytes` (UInt64) — Size of the data part in bytes. - `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge). @@ -471,36 +474,36 @@ The `system.part_log` table contains the following columns: - `error` (UInt16) — The code number of the occurred error. - `exception` (String) — Text message of the occurred error. -The `system.part_log` table is created after the first inserting data to the `MergeTree` table. +该 `system.part_log` 表的第一个插入数据到后创建 `MergeTree` 桌子 -## system.processes {#system_tables-processes} +## 系统。流程 {#system_tables-processes} -This system table is used for implementing the `SHOW PROCESSLIST` query. +该系统表用于实现 `SHOW PROCESSLIST` 查询。 -Columns: +列: -- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated. -- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server. +- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` 用户。 该字段包含特定查询的用户名,而不是此查询启动的查询的用户名。 +- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` 查询请求者服务器上。 - `elapsed` (Float64) – The time in seconds since request execution started. - `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. - `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. - `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. -- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max\_memory\_usage](../operations/settings/query_complexity.md#settings_max_memory_usage) setting. -- `query` (String) – The query text. For `INSERT`, it doesn’t include the data to insert. +- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max\_memory\_usage](../operations/settings/query_complexity.md#settings_max_memory_usage) 设置。 +- `query` (String) – The query text. For `INSERT`,它不包括要插入的数据。 - `query_id` (String) – Query ID, if defined. -## system.text\_log {#system-tables-text-log} +## 系统。text\_log {#system-tables-text-log} -Contains logging entries. Logging level which goes to this table can be limited with `text_log.level` server setting. +包含日志记录条目。 进入该表的日志记录级别可以通过以下方式进行限制 `text_log.level` 服务器设置。 -Columns: +列: -- `event_date` (`Date`) - Date of the entry. -- `event_time` (`DateTime`) - Time of the entry. -- `microseconds` (`UInt32`) - Microseconds of the entry. +- `event_date` (`Date`)-条目的日期。 +- `event_time` (`DateTime`)-条目的时间。 +- `microseconds` (`UInt32`)-条目的微秒。 - `thread_name` (String) — Name of the thread from which the logging was done. - `thread_id` (UInt64) — OS thread ID. -- `level` (`Enum8`) - Entry level. +- `level` (`Enum8`)-入门级。 - `'Fatal' = 1` - `'Critical' = 2` - `'Error' = 3` @@ -509,30 +512,30 @@ Columns: - `'Information' = 6` - `'Debug' = 7` - `'Trace' = 8` -- `query_id` (`String`) - ID of the query. +- `query_id` (`String`)-查询的ID。 - `logger_name` (`LowCardinality(String)`) - Name of the logger (i.e. `DDLWorker`) -- `message` (`String`) - The message itself. -- `revision` (`UInt32`) - ClickHouse revision. -- `source_file` (`LowCardinality(String)`) - Source file from which the logging was done. -- `source_line` (`UInt64`) - Source line from which the logging was done. +- `message` (`String`)-消息本身。 +- `revision` (`UInt32`)-ClickHouse修订。 +- `source_file` (`LowCardinality(String)`)-从中完成日志记录的源文件。 +- `source_line` (`UInt64`)-从中完成日志记录的源代码行。 -## system.query\_log {#system_tables-query_log} +## 系统。query\_log {#system_tables-query_log} -Contains information about execution of queries. For each query, you can see processing start time, duration of processing, error messages and other information. +包含有关查询执行的信息。 对于每个查询,您可以看到处理开始时间,处理持续时间,错误消息和其他信息。 -!!! note "Note" - The table doesn’t contain input data for `INSERT` queries. +!!! note "注" + 该表不包含以下内容的输入数据 `INSERT` 查询。 -ClickHouse creates this table only if the [query\_log](server_settings/settings.md#server_settings-query-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. +ClickHouse仅在以下情况下创建此表 [query\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-log) 指定服务器参数。 此参数设置日志记录规则,例如日志记录间隔或将记录查询的表的名称。 -To enable query logging, set the [log\_queries](settings/settings.md#settings-log-queries) parameter to 1. For details, see the [Settings](settings/settings.md) section. +要启用查询日志记录,请设置 [log\_queries](settings/settings.md#settings-log-queries) 参数为1。 有关详细信息,请参阅 [设置](settings/settings.md) 科。 -The `system.query_log` table registers two kinds of queries: +该 `system.query_log` 表注册两种查询: -1. Initial queries that were run directly by the client. -2. Child queries that were initiated by other queries (for distributed query execution). For these types of queries, information about the parent queries is shown in the `initial_*` columns. +1. 客户端直接运行的初始查询。 +2. 由其他查询启动的子查询(用于分布式查询执行)。 对于这些类型的查询,有关父查询的信息显示在 `initial_*` 列。 -Columns: +列: - `type` (`Enum8`) — Type of event that occurred when executing the query. Values: - `'QueryStart' = 1` — Successful start of query execution. @@ -545,8 +548,8 @@ Columns: - `query_duration_ms` (UInt64) — Duration of query execution. - `read_rows` (UInt64) — Number of read rows. - `read_bytes` (UInt64) — Number of read bytes. -- `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `written_rows` (UInt64) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。 +- `written_bytes` (UInt64) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。 - `result_rows` (UInt64) — Number of rows in the result. - `result_bytes` (UInt64) — Number of bytes in the result. - `memory_usage` (UInt64) — Memory consumption by the query. @@ -567,50 +570,50 @@ Columns: - `interface` (UInt8) — Interface that the query was initiated from. Possible values: - 1 — TCP. - 2 — HTTP. -- `os_user` (String) — OS’s username who runs [clickhouse-client](../interfaces/cli.md). -- `client_hostname` (String) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run. -- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. -- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version. +- `os_user` (String) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md). +- `client_hostname` (String) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或者运行另一个TCP客户端。 +- `client_name` (String) — The [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端名称。 +- `client_revision` (UInt32) — Revision of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_major` (UInt32) — Major version of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_minor` (UInt32) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_patch` (UInt32) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端版本。 - `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - 0 — The query was launched from the TCP interface. - - 1 — `GET` method was used. - - 2 — `POST` method was used. -- `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. -- `quota_key` (String) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`). + - 1 — `GET` 方法被使用。 + - 2 — `POST` 方法被使用。 +- `http_user_agent` (String) — The `UserAgent` http请求中传递的标头。 +- `quota_key` (String) — The “quota key” 在指定 [配额](quotas.md) 设置(见 `keyed`). - `revision` (UInt32) — ClickHouse revision. - `thread_numbers` (Array(UInt32)) — Number of threads that are participating in query execution. -- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [system.events](#system_tables-events) -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` column. -- `Settings.Names` (Array(String)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. -- `Settings.Values` (Array(String)) — Values of settings that are listed in the `Settings.Names` column. +- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [系统。活动](#system_tables-events) +- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` 列。 +- `Settings.Names` (Array(String)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` 参数为1。 +- `Settings.Values` (Array(String)) — Values of settings that are listed in the `Settings.Names` 列。 -Each query creates one or two rows in the `query_log` table, depending on the status of the query: +每个查询创建一个或两个行中 `query_log` 表,具体取决于查询的状态: -1. If the query execution is successful, two events with types 1 and 2 are created (see the `type` column). -2. If an error occurred during query processing, two events with types 1 and 4 are created. -3. If an error occurred before launching the query, a single event with type 3 is created. +1. 如果查询执行成功,将创建两个类型为1和2的事件(请参阅 `type` 列)。 +2. 如果在查询处理过程中发生错误,将创建两个类型为1和4的事件。 +3. 如果在启动查询之前发生错误,将创建类型为3的单个事件。 -By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_log](server_settings/settings.md#server_settings-query-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. +默认情况下,日志以7.5秒的间隔添加到表中。 您可以在设置此时间间隔 [query\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-log) 服务器设置(请参阅 `flush_interval_milliseconds` 参数)。 要强制将日志从内存缓冲区刷新到表中,请使用 `SYSTEM FLUSH LOGS` 查询。 -When the table is deleted manually, it will be automatically created on the fly. Note that all the previous logs will be deleted. +当手动删除表时,它将自动动态创建。 请注意,所有以前的日志将被删除。 -!!! note "Note" - The storage period for logs is unlimited. Logs aren’t automatically deleted from the table. You need to organize the removal of outdated logs yourself. +!!! note "注" + 日志的存储周期是无限的。 日志不会自动从表中删除。 您需要自己组织删除过时的日志。 -You can specify an arbitrary partitioning key for the `system.query_log` table in the [query\_log](server_settings/settings.md#server_settings-query-log) server setting (see the `partition_by` parameter). +您可以指定一个任意的分区键 `system.query_log` 表中的 [query\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-log) 服务器设置(请参阅 `partition_by` 参数)。 -## system.query\_thread\_log {#system_tables-query-thread-log} +## 系统。query\_thread\_log {#system_tables-query-thread-log} -The table contains information about each query execution thread. +该表包含有关每个查询执行线程的信息。 -ClickHouse creates this table only if the [query\_thread\_log](server_settings/settings.md#server_settings-query-thread-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. +ClickHouse仅在以下情况下创建此表 [query\_thread\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-thread-log) 指定服务器参数。 此参数设置日志记录规则,例如日志记录间隔或将记录查询的表的名称。 -To enable query logging, set the [log\_query\_threads](settings/settings.md#settings-log-query-threads) parameter to 1. For details, see the [Settings](settings/settings.md) section. +要启用查询日志记录,请设置 [log\_query\_threads](settings/settings.md#settings-log-query-threads) 参数为1。 有关详细信息,请参阅 [设置](settings/settings.md) 科。 -Columns: +列: - `event_date` (Date) — the date when the thread has finished execution of the query. - `event_time` (DateTime) — the date and time when the thread has finished execution of the query. @@ -618,8 +621,8 @@ Columns: - `query_duration_ms` (UInt64) — Duration of query execution. - `read_rows` (UInt64) — Number of read rows. - `read_bytes` (UInt64) — Number of read bytes. -- `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `written_rows` (UInt64) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。 +- `written_bytes` (UInt64) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。 - `memory_usage` (Int64) — The difference between the amount of allocated and freed memory in context of this thread. - `peak_memory_usage` (Int64) — The maximum difference between the amount of allocated and freed memory in context of this thread. - `thread_name` (String) — Name of the thread. @@ -641,62 +644,62 @@ Columns: - `interface` (UInt8) — Interface that the query was initiated from. Possible values: - 1 — TCP. - 2 — HTTP. -- `os_user` (String) — OS’s username who runs [clickhouse-client](../interfaces/cli.md). -- `client_hostname` (String) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run. -- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. -- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version. +- `os_user` (String) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md). +- `client_hostname` (String) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或者运行另一个TCP客户端。 +- `client_name` (String) — The [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端名称。 +- `client_revision` (UInt32) — Revision of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_major` (UInt32) — Major version of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_minor` (UInt32) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端。 +- `client_version_patch` (UInt32) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 或另一个TCP客户端版本。 - `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - 0 — The query was launched from the TCP interface. - - 1 — `GET` method was used. - - 2 — `POST` method was used. -- `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. -- `quota_key` (String) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`). + - 1 — `GET` 方法被使用。 + - 2 — `POST` 方法被使用。 +- `http_user_agent` (String) — The `UserAgent` http请求中传递的标头。 +- `quota_key` (String) — The “quota key” 在指定 [配额](quotas.md) 设置(见 `keyed`). - `revision` (UInt32) — ClickHouse revision. -- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events) -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column. +- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events) +- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` 列。 -By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_thread\_log](server_settings/settings.md#server_settings-query-thread-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. +默认情况下,日志以7.5秒的间隔添加到表中。 您可以在设置此时间间隔 [query\_thread\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-thread-log) 服务器设置(请参阅 `flush_interval_milliseconds` 参数)。 要强制将日志从内存缓冲区刷新到表中,请使用 `SYSTEM FLUSH LOGS` 查询。 -When the table is deleted manually, it will be automatically created on the fly. Note that all the previous logs will be deleted. +当手动删除表时,它将自动动态创建。 请注意,所有以前的日志将被删除。 -!!! note "Note" - The storage period for logs is unlimited. Logs aren’t automatically deleted from the table. You need to organize the removal of outdated logs yourself. +!!! note "注" + 日志的存储周期是无限的。 日志不会自动从表中删除。 您需要自己组织删除过时的日志。 -You can specify an arbitrary partitioning key for the `system.query_thread_log` table in the [query\_thread\_log](server_settings/settings.md#server_settings-query-thread-log) server setting (see the `partition_by` parameter). +您可以指定一个任意的分区键 `system.query_thread_log` 表中的 [query\_thread\_log](server_configuration_parameters/settings.md#server_configuration_parameters-query-thread-log) 服务器设置(请参阅 `partition_by` 参数)。 -## system.trace\_log {#system_tables-trace_log} +## 系统。trace\_log {#system_tables-trace_log} -Contains stack traces collected by the sampling query profiler. +包含采样查询探查器收集的堆栈跟踪。 -ClickHouse creates this table when the [trace\_log](server_settings/settings.md#server_settings-trace_log) server configuration section is set. Also the [query\_profiler\_real\_time\_period\_ns](settings/settings.md#query_profiler_real_time_period_ns) and [query\_profiler\_cpu\_time\_period\_ns](settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. +ClickHouse创建此表时 [trace\_log](server_configuration_parameters/settings.md#server_configuration_parameters-trace_log) 服务器配置部分被设置。 也是 [query\_profiler\_real\_time\_period\_ns](settings/settings.md#query_profiler_real_time_period_ns) 和 [query\_profiler\_cpu\_time\_period\_ns](settings/settings.md#query_profiler_cpu_time_period_ns) 应设置设置。 -To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions. +要分析日志,请使用 `addressToLine`, `addressToSymbol` 和 `demangle` 内省功能。 -Columns: +列: -- `event_date`([Date](../data_types/date.md)) — Date of sampling moment. +- `event_date`([日期](../sql_reference/data_types/date.md)) — Date of sampling moment. -- `event_time`([DateTime](../data_types/datetime.md)) — Timestamp of sampling moment. +- `event_time`([日期时间](../sql_reference/data_types/datetime.md)) — Timestamp of sampling moment. -- `revision`([UInt32](../data_types/int_uint.md)) — ClickHouse server build revision. +- `revision`([UInt32](../sql_reference/data_types/int_uint.md)) — ClickHouse server build revision. - When connecting to server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. + 通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1 revision 54429.`. 该字段包含 `revision`,但不是 `version` 的服务器。 -- `timer_type`([Enum8](../data_types/enum.md)) — Timer type: +- `timer_type`([枚举8](../sql_reference/data_types/enum.md)) — Timer type: - - `Real` represents wall-clock time. - - `CPU` represents CPU time. + - `Real` 表示挂钟时间。 + - `CPU` 表示CPU时间。 -- `thread_number`([UInt32](../data_types/int_uint.md)) — Thread identifier. +- `thread_number`([UInt32](../sql_reference/data_types/int_uint.md)) — Thread identifier. -- `query_id`([String](../data_types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) system table. +- `query_id`([字符串](../sql_reference/data_types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query\_log](#system_tables-query_log) 系统表. -- `trace`([Array(UInt64)](../data_types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. +- `trace`([数组(UInt64)](../sql_reference/data_types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. -**Example** +**示例** ``` sql SELECT * FROM system.trace_log LIMIT 1 \G @@ -714,12 +717,12 @@ query_id: acc4d61f-5bd1-4a3e-bc91-2180be37c915 trace: [94222141367858,94222152240175,94222152325351,94222152329944,94222152330796,94222151449980,94222144088167,94222151682763,94222144088167,94222151682763,94222144088167,94222144058283,94222144059248,94222091840750,94222091842302,94222091831228,94222189631488,140509950166747,140509942945935] ``` -## system.replicas {#system_tables-replicas} +## 系统。副本 {#system_tables-replicas} -Contains information and status for replicated tables residing on the local server. -This table can be used for monitoring. The table contains a row for every Replicated\* table. +包含驻留在本地服务器上的复制表的信息和状态。 +此表可用于监视。 该表对于每个已复制的\*表都包含一行。 -Example: +示例: ``` sql SELECT * @@ -763,46 +766,46 @@ total_replicas: 2 active_replicas: 2 ``` -Columns: +列: -- `database` (`String`) - Database name -- `table` (`String`) - Table name -- `engine` (`String`) - Table engine name -- `is_leader` (`UInt8`) - Whether the replica is the leader. - Only one replica at a time can be the leader. The leader is responsible for selecting background merges to perform. - Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader. -- `can_become_leader` (`UInt8`) - Whether the replica can be elected as a leader. -- `is_readonly` (`UInt8`) - Whether the replica is in read-only mode. - This mode is turned on if the config doesn’t have sections with ZooKeeper, if an unknown error occurred when reinitializing sessions in ZooKeeper, and during session reinitialization in ZooKeeper. -- `is_session_expired` (`UInt8`) - the session with ZooKeeper has expired. Basically the same as `is_readonly`. -- `future_parts` (`UInt32`) - The number of data parts that will appear as the result of INSERTs or merges that haven’t been done yet. -- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged. -- `zookeeper_path` (`String`) - Path to table data in ZooKeeper. -- `replica_name` (`String`) - Replica name in ZooKeeper. Different replicas of the same table have different names. -- `replica_path` (`String`) - Path to replica data in ZooKeeper. The same as concatenating ‘zookeeper\_path/replicas/replica\_path’. -- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven’t made all of the ALTERs yet. -- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`. -- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. -- `merges_in_queue` (`UInt32`) - The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time. -- `part_mutations_in_queue` (`UInt32`) - The number of mutations waiting to be made. -- `queue_oldest_time` (`DateTime`) - If `queue_size` greater than 0, shows when the oldest operation was added to the queue. -- `inserts_oldest_time` (`DateTime`) - See `queue_oldest_time` -- `merges_oldest_time` (`DateTime`) - See `queue_oldest_time` -- `part_mutations_oldest_time` (`DateTime`) - See `queue_oldest_time` +- `database` (`String`)-数据库名称 +- `table` (`String`)-表名 +- `engine` (`String`)-表引擎名称 +- `is_leader` (`UInt8`)-副本是否是领导者。 + 一次只有一个副本可以成为领导者。 领导者负责选择要执行的后台合并。 + 请注意,可以对任何可用且在ZK中具有会话的副本执行写操作,而不管该副本是否为leader。 +- `can_become_leader` (`UInt8`)-副本是否可以当选为领导者。 +- `is_readonly` (`UInt8`)-副本是否处于只读模式。 + 如果配置没有ZooKeeper的部分,如果在ZooKeeper中重新初始化会话时发生未知错误,以及在ZooKeeper中重新初始化会话时发生未知错误,则此模式将打开。 +- `is_session_expired` (`UInt8`)-与ZooKeeper的会话已经过期。 基本上一样 `is_readonly`. +- `future_parts` (`UInt32`)-由于尚未完成的插入或合并而显示的数据部分的数量。 +- `parts_to_check` (`UInt32`)-队列中用于验证的数据部分的数量。 如果怀疑零件可能已损坏,则将其放入验证队列。 +- `zookeeper_path` (`String`)-在ZooKeeper中的表数据路径。 +- `replica_name` (`String`)-在动物园管理员副本名称. 同一表的不同副本具有不同的名称。 +- `replica_path` (`String`)-在ZooKeeper中的副本数据的路径。 与连接相同 ‘zookeeper\_path/replicas/replica\_path’. +- `columns_version` (`Int32`)-表结构的版本号。 指示执行ALTER的次数。 如果副本有不同的版本,这意味着一些副本还没有做出所有的改变。 +- `queue_size` (`UInt32`)-等待执行的操作的队列大小。 操作包括插入数据块、合并和某些其他操作。 它通常与 `future_parts`. +- `inserts_in_queue` (`UInt32`)-需要插入数据块的数量。 插入通常复制得相当快。 如果这个数字很大,这意味着有什么不对劲。 +- `merges_in_queue` (`UInt32`)-等待进行合并的数量。 有时合并时间很长,因此此值可能长时间大于零。 +- `part_mutations_in_queue` (`UInt32`)-等待进行的突变的数量。 +- `queue_oldest_time` (`DateTime`)-如果 `queue_size` 大于0,显示何时将最旧的操作添加到队列中。 +- `inserts_oldest_time` (`DateTime`)-看 `queue_oldest_time` +- `merges_oldest_time` (`DateTime`)-看 `queue_oldest_time` +- `part_mutations_oldest_time` (`DateTime`)-看 `queue_oldest_time` -The next 4 columns have a non-zero value only where there is an active session with ZK. +接下来的4列只有在有ZK活动会话的情况下才具有非零值。 -- `log_max_index` (`UInt64`) - Maximum entry number in the log of general activity. -- `log_pointer` (`UInt64`) - Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. If `log_pointer` is much smaller than `log_max_index`, something is wrong. -- `last_queue_update` (`DateTime`) - When the queue was updated last time. -- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has. -- `total_replicas` (`UInt8`) - The total number of known replicas of this table. -- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas). +- `log_max_index` (`UInt64`)-一般活动日志中的最大条目数。 +- `log_pointer` (`UInt64`)-副本复制到其执行队列的常规活动日志中的最大条目数加一。 如果 `log_pointer` 比 `log_max_index`,有点不对劲。 +- `last_queue_update` (`DateTime`)-上次更新队列时。 +- `absolute_delay` (`UInt64`)-当前副本有多大滞后秒。 +- `total_replicas` (`UInt8`)-此表的已知副本总数。 +- `active_replicas` (`UInt8`)-在ZooKeeper中具有会话的此表的副本的数量(即正常运行的副本的数量)。 -If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row. -If you don’t request the last 4 columns (log\_max\_index, log\_pointer, total\_replicas, active\_replicas), the table works quickly. +如果您请求所有列,表可能会工作得有点慢,因为每行都会从ZooKeeper进行几次读取。 +如果您没有请求最后4列(log\_max\_index,log\_pointer,total\_replicas,active\_replicas),表工作得很快。 -For example, you can check that everything is working correctly like this: +例如,您可以检查一切是否正常工作,如下所示: ``` sql SELECT @@ -834,32 +837,61 @@ WHERE OR active_replicas < total_replicas ``` -If this query doesn’t return anything, it means that everything is fine. +如果这个查询没有返回任何东西,这意味着一切都很好。 -## system.settings {#system-settings} +## 系统。设置 {#system-tables-system-settings} -Contains information about settings that are currently in use. -I.e. used for executing the query you are using to read from the system.settings table. +包含有关当前用户的会话设置的信息。 -Columns: +列: -- `name` (String) — Setting name. -- `value` (String) — Setting value. -- `description` (String) — Setting description. -- `type` (String) — Setting type (implementation specific string value). -- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. -- `min` (Nullable(String)) — Get minimum allowed value (if any is set via [constraints](settings/constraints_on_settings.md#constraints-on-settings)). -- `max` (Nullable(String)) — Get maximum allowed value (if any is set via [constraints](settings/constraints_on_settings.md#constraints-on-settings)). -- `readonly` (UInt8) — Can user change this setting (for more info, look into [constraints](settings/constraints_on_settings.md#constraints-on-settings)). +- `name` ([字符串](../sql_reference/data_types/string.md)) — Setting name. +- `value` ([字符串](../sql_reference/data_types/string.md)) — Setting value. +- `changed` ([UInt8](../sql_reference/data_types/int_uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. +- `description` ([字符串](../sql_reference/data_types/string.md)) — Short setting description. +- `min` ([可为空](../sql_reference/data_types/nullable.md)([字符串](../sql_reference/data_types/string.md))) — Minimum value of the setting, if any is set via [制约因素](settings/constraints_on_settings.md#constraints-on-settings). 如果设置没有最小值,则包含 [NULL](../sql_reference/syntax.md#null-literal). +- `max` ([可为空](../sql_reference/data_types/nullable.md)([字符串](../sql_reference/data_types/string.md))) — Maximum value of the setting, if any is set via [制约因素](settings/constraints_on_settings.md#constraints-on-settings). 如果设置没有最大值,则包含 [NULL](../sql_reference/syntax.md#null-literal). +- `readonly` ([UInt8](../sql_reference/data_types/int_uint.md#uint-ranges)) — Shows whether the current user can change the setting: + - `0` — Current user can change the setting. + - `1` — Current user can't change the setting. -Example: +**示例** + +下面的示例演示如何获取有关名称包含的设置的信息 `min_i`. ``` sql -SELECT name, value +SELECT * FROM system.settings -WHERE changed +WHERE name LIKE '%min_i%' ``` +``` text +┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐ +│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ +│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ +│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ +└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘ +``` + +使用 `WHERE changed` 可以是有用的,例如,当你想检查: + +- 配置文件中的设置是否正确加载并正在使用。 +- 在当前会话中更改的设置。 + + + +``` sql +SELECT * FROM system.settings WHERE changed AND name='load_balancing' +``` + +**另请参阅** + +- [设置](settings/index.md#settings) +- [查询权限](settings/permissions_for_queries.md#settings_readonly) +- [对设置的限制](settings/constraints_on_settings.md) + +## 系统。表\_engines {#system.table_engines} + ``` text ┌─name───────────────────┬─value───────┐ │ max_threads │ 8 │ @@ -869,11 +901,11 @@ WHERE changed └────────────────────────┴─────────────┘ ``` -## system.merge\_tree\_settings {#system-merge_tree_settings} +## 系统。merge\_tree\_settings {#system-merge_tree_settings} -Contains information about settings for `MergeTree` tables. +包含有关以下设置的信息 `MergeTree` 桌子 -Columns: +列: - `name` (String) — Setting name. - `value` (String) — Setting value. @@ -881,21 +913,21 @@ Columns: - `type` (String) — Setting type (implementation specific string value). - `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. -## system.table\_engines {#system-table-engines} +## 系统。表\_engines {#system-table-engines} -Contains description of table engines supported by server and their feature support information. +包含服务器支持的表引擎的描述及其功能支持信息。 -This table contains the following columns (the column type is shown in brackets): +此表包含以下列(列类型显示在括号中): - `name` (String) — The name of table engine. -- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` clause. -- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [skipping indices](table_engines/mergetree/#table_engine-mergetree-data_skipping-indexes). -- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](table_engines/mergetree/#table_engine-mergetree-ttl). -- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. -- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](table_engines/replication/). +- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` 条款 +- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [跳过索引](../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-data_skipping-indexes). +- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-ttl). +- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` 和 `SAMPLE_BY`. +- `supports_replication` (UInt8) — Flag that indicates if table engine supports [数据复制](../engines/table_engines/mergetree_family/replication.md). - `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. -Example: +示例: ``` sql SELECT * @@ -911,56 +943,72 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') └───────────────────────────────┴───────────────────┴───────────────────────────┴─────────────────────┴──────────────┴──────────────────────┴────────────────────────┘ ``` -**See also** +**另请参阅** -- MergeTree family [query clauses](table_engines/mergetree.md#mergetree-query-clauses) -- Kafka [settings](table_engines/kafka.md#table_engine-kafka-creating-a-table) -- Join [settings](table_engines/join.md#join-limitations-and-settings) +- 梅树家族 [查询子句](../engines/table_engines/mergetree_family/mergetree.md#mergetree-query-clauses) +- 卡夫卡 [设置](../engines/table_engines/integrations/kafka.md#table_engine-kafka-creating-a-table) +- 加入我们 [设置](../engines/table_engines/special/join.md#join-limitations-and-settings) -## system.tables {#system-tables} +## 系统。表 {#system-tables} -Contains metadata of each table that the server knows about. Detached tables are not shown in `system.tables`. +包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`. -This table contains the following columns (the column type is shown in brackets): +此表包含以下列(列类型显示在括号中): - `database` (String) — The name of the database the table is in. + - `name` (String) — Table name. + - `engine` (String) — Table engine name (without parameters). -- `is_temporary` (UInt8) - Flag that indicates whether the table is temporary. -- `data_path` (String) - Path to the table data in the file system. -- `metadata_path` (String) - Path to the table metadata in the file system. -- `metadata_modification_time` (DateTime) - Time of latest modification of the table metadata. -- `dependencies_database` (Array(String)) - Database dependencies. -- `dependencies_table` (Array(String)) - Table dependencies ([MaterializedView](table_engines/materializedview.md) tables based on the current table). -- `create_table_query` (String) - The query that was used to create the table. -- `engine_full` (String) - Parameters of the table engine. -- `partition_key` (String) - The partition key expression specified in the table. -- `sorting_key` (String) - The sorting key expression specified in the table. -- `primary_key` (String) - The primary key expression specified in the table. -- `sampling_key` (String) - The sampling key expression specified in the table. -- `storage_policy` (String) - The storage policy: - - [MergeTree](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes) - - [Distributed](table_engines/distributed.md#distributed) +- `is_temporary` (UInt8)-指示表是否是临时的标志。 -- `total_rows` (Nullable(UInt64)) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `Null` (including underying `Buffer` table). -- `total_bytes` (Nullable(UInt64)) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `Null` (**does not** includes any underlying storage). +- `data_path` (String)-文件系统中表数据的路径。 - - If the table stores data on disk, returns used space on disk (i.e. compressed). - - If the table stores data in memory, returns approximated number of used bytes in memory. +- `metadata_path` (String)-文件系统中表元数据的路径。 -The `system.tables` table is used in `SHOW TABLES` query implementation. +- `metadata_modification_time` (DateTime)-表元数据的最新修改时间。 -## system.zookeeper {#system-zookeeper} +- `dependencies_database` (数组(字符串))-数据库依赖关系. -The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config. -The query must have a ‘path’ equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for. +- `dependencies_table` (数组(字符串))-表依赖关系 ([MaterializedView](../engines/table_engines/special/materializedview.md) 基于当前表的表)。 -The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. -To output data for all root nodes, write path = ‘/’. -If the path specified in ‘path’ doesn’t exist, an exception will be thrown. +- `create_table_query` (String)-用于创建表的查询。 -Columns: +- `engine_full` (String)-表引擎的参数。 + +- `partition_key` (String)-表中指定的分区键表达式。 + +- `sorting_key` (String)-表中指定的排序键表达式。 + +- `primary_key` (String)-表中指定的主键表达式。 + +- `sampling_key` (String)-表中指定的采样键表达式。 + +- `storage_policy` (字符串)-存储策略: + + - [MergeTree](../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-multiple-volumes) + - [分布](../engines/table_engines/special/distributed.md#distributed) + +- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则 `Null` (包括内衣 `Buffer` 表)。 + +- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则 `Null` (**不** 包括任何底层存储)。 + + - If the table stores data on disk, returns used space on disk (i.e. compressed). + - 如果表在内存中存储数据,返回在内存中使用的近似字节数. + +该 `system.tables` 表中使用 `SHOW TABLES` 查询实现。 + +## 系统。动物园管理员 {#system-zookeeper} + +如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 +查询必须具有 ‘path’ WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 + +查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。 +要输出所有根节点的数据,write path= ‘/’. +如果在指定的路径 ‘path’ 不存在,将引发异常。 + +列: - `name` (String) — The name of the node. - `path` (String) — The path to the node. @@ -977,7 +1025,7 @@ Columns: - `aversion` (Int32) — Number of changes to the ACL. - `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. -Example: +示例: ``` sql SELECT * @@ -1022,57 +1070,57 @@ pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` -## system.mutations {#system_tables-mutations} +## 系统。突变 {#system_tables-mutations} -The table contains information about [mutations](../query_language/alter.md#alter-mutations) of MergeTree tables and their progress. Each mutation command is represented by a single row. The table has the following columns: +该表包含以下信息 [突变](../sql_reference/statements/alter.md#alter-mutations) MergeTree表及其进展。 每个突变命令由一行表示。 该表具有以下列: -**database**, **table** - The name of the database and table to which the mutation was applied. +**数据库**, **表** -应用突变的数据库和表的名称。 -**mutation\_id** - The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ZooKeeper. For unreplicated tables the IDs correspond to file names in the data directory of the table. +**mutation\_id** -变异的ID 对于复制的表,这些Id对应于znode中的名称 `/mutations/` 动物园管理员的目录。 对于未复制的表,Id对应于表的数据目录中的文件名。 -**command** - The mutation command string (the part of the query after `ALTER TABLE [db.]table`). +**命令** -Mutation命令字符串(查询后的部分 `ALTER TABLE [db.]table`). -**create\_time** - When this mutation command was submitted for execution. +**create\_time** -当这个突变命令被提交执行。 -**block\_numbers.partition\_id**, **block\_numbers.number** - A nested column. For mutations of replicated tables, it contains one record for each partition: the partition ID and the block number that was acquired by the mutation (in each partition, only parts that contain blocks with numbers less than the block number acquired by the mutation in that partition will be mutated). In non-replicated tables, block numbers in all partitions form a single sequence. This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation. +**block\_numbers.partition\_id**, **block\_numbers.编号** -嵌套列。 对于复制表的突变,它包含每个分区的一条记录:分区ID和通过突变获取的块编号(在每个分区中,只有包含编号小于该分区中突变获取的块编号的块的 在非复制表中,所有分区中的块编号形成一个序列。 这意味着对于非复制表的突变,该列将包含一条记录,其中包含由突变获取的单个块编号。 -**parts\_to\_do** - The number of data parts that need to be mutated for the mutation to finish. +**parts\_to\_do** -为了完成突变,需要突变的数据部分的数量。 -**is\_done** - Is the mutation done? Note that even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not done yet because of a long-running INSERT that will create a new data part that will need to be mutated. +**is\_done** -变异完成了?? 请注意,即使 `parts_to_do = 0` 由于长时间运行的INSERT将创建需要突变的新数据部分,因此可能尚未完成复制表的突变。 -If there were problems with mutating some parts, the following columns contain additional information: +如果在改变某些部分时出现问题,以下列将包含其他信息: -**latest\_failed\_part** - The name of the most recent part that could not be mutated. +**latest\_failed\_part** -不能变异的最新部分的名称。 -**latest\_fail\_time** - The time of the most recent part mutation failure. +**latest\_fail\_time** -最近的部分突变失败的时间。 -**latest\_fail\_reason** - The exception message that caused the most recent part mutation failure. +**latest\_fail\_reason** -导致最近部件变异失败的异常消息。 -## system.disks {#system_tables-disks} +## 系统。磁盘 {#system_tables-disks} -Contains information about disks defined in the [server configuration](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure). +包含有关在定义的磁盘信息 [服务器配置](../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). -Columns: +列: -- `name` ([String](../data_types/string.md)) — Name of a disk in the server configuration. -- `path` ([String](../data_types/string.md)) — Path to the mount point in the file system. -- `free_space` ([UInt64](../data_types/int_uint.md)) — Free space on disk in bytes. -- `total_space` ([UInt64](../data_types/int_uint.md)) — Disk volume in bytes. -- `keep_free_space` ([UInt64](../data_types/int_uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. +- `name` ([字符串](../sql_reference/data_types/string.md)) — Name of a disk in the server configuration. +- `path` ([字符串](../sql_reference/data_types/string.md)) — Path to the mount point in the file system. +- `free_space` ([UInt64](../sql_reference/data_types/int_uint.md)) — Free space on disk in bytes. +- `total_space` ([UInt64](../sql_reference/data_types/int_uint.md)) — Disk volume in bytes. +- `keep_free_space` ([UInt64](../sql_reference/data_types/int_uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` 磁盘配置参数。 -## system.storage\_policies {#system_tables-storage_policies} +## 系统。storage\_policies {#system_tables-storage_policies} -Contains information about storage policies and volumes defined in the [server configuration](table_engines/mergetree.md#table_engine-mergetree-multiple-volumes_configure). +包含有关存储策略和卷中定义的信息 [服务器配置](../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). -Columns: +列: -- `policy_name` ([String](../data_types/string.md)) — Name of the storage policy. -- `volume_name` ([String](../data_types/string.md)) — Volume name defined in the storage policy. -- `volume_priority` ([UInt64](../data_types/int_uint.md)) — Volume order number in the configuration. -- `disks` ([Array(String)](../data_types/array.md)) — Disk names, defined in the storage policy. -- `max_data_part_size` ([UInt64](../data_types/int_uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). -- `move_factor` ([Float64](../data_types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. +- `policy_name` ([字符串](../sql_reference/data_types/string.md)) — Name of the storage policy. +- `volume_name` ([字符串](../sql_reference/data_types/string.md)) — Volume name defined in the storage policy. +- `volume_priority` ([UInt64](../sql_reference/data_types/int_uint.md)) — Volume order number in the configuration. +- `disks` ([数组(字符串)](../sql_reference/data_types/array.md)) — Disk names, defined in the storage policy. +- `max_data_part_size` ([UInt64](../sql_reference/data_types/int_uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). +- `move_factor` ([Float64](../sql_reference/data_types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. -If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. +如果存储策略包含多个卷,则每个卷的信息将存储在表的单独行中。 -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/) +[原始文章](https://clickhouse.tech/docs/en/operations/system_tables/) diff --git a/docs/zh/operations/table_engines/generate.md b/docs/zh/operations/table_engines/generate.md deleted file mode 100644 index 051369d2e1c..00000000000 --- a/docs/zh/operations/table_engines/generate.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -en_copy: true ---- - -# GenerateRandom {#table_engines-generate} - -The GenerateRandom table engine produces random data for given table schema. - -Usage examples: - -- Use in test to populate reproducible large table. -- Generate random input for fuzzing tests. - -## Usage in ClickHouse Server {#usage-in-clickhouse-server} - -``` sql -ENGINE = GenerateRandom(random_seed, max_string_length, max_array_length) -``` - -The `max_array_length` and `max_string_length` parameters specify maximum length of all -array columns and strings correspondingly in generated data. - -Generate table engine supports only `SELECT` queries. - -It supports all [DataTypes](../../data_types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`. - -**Example:** - -**1.** Set up the `generate_engine_table` table: - -``` sql -CREATE TABLE generate_engine_table (name String, value UInt32) ENGINE = GenerateRandom(1, 5, 3) -``` - -**2.** Query the data: - -``` sql -SELECT * FROM generate_engine_table LIMIT 3 -``` - -``` text -┌─name─┬──────value─┐ -│ c4xJ │ 1412771199 │ -│ r │ 1791099446 │ -│ 7#$ │ 124312908 │ -└──────┴────────────┘ -``` - -## Details of Implementation {#details-of-implementation} - -- Not supported: - - `ALTER` - - `SELECT ... SAMPLE` - - `INSERT` - - Indices - - Replication - -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/generate/) diff --git a/docs/zh/operations/table_engines/hdfs.md b/docs/zh/operations/table_engines/hdfs.md deleted file mode 100644 index 576bbc49d72..00000000000 --- a/docs/zh/operations/table_engines/hdfs.md +++ /dev/null @@ -1,120 +0,0 @@ ---- -en_copy: true ---- - -# HDFS {#table_engines-hdfs} - -This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar -to the [File](file.md) and [URL](url.md) engines, but provides Hadoop-specific features. - -## Usage {#usage} - -``` sql -ENGINE = HDFS(URI, format) -``` - -The `URI` parameter is the whole file URI in HDFS. -The `format` parameter specifies one of the available file formats. To perform -`SELECT` queries, the format must be supported for input, and to perform -`INSERT` queries – for output. The available formats are listed in the -[Formats](../../interfaces/formats.md#formats) section. -The path part of `URI` may contain globs. In this case the table would be readonly. - -**Example:** - -**1.** Set up the `hdfs_engine_table` table: - -``` sql -CREATE TABLE hdfs_engine_table (name String, value UInt32) ENGINE=HDFS('hdfs://hdfs1:9000/other_storage', 'TSV') -``` - -**2.** Fill file: - -``` sql -INSERT INTO hdfs_engine_table VALUES ('one', 1), ('two', 2), ('three', 3) -``` - -**3.** Query the data: - -``` sql -SELECT * FROM hdfs_engine_table LIMIT 2 -``` - -``` text -┌─name─┬─value─┐ -│ one │ 1 │ -│ two │ 2 │ -└──────┴───────┘ -``` - -## Implementation Details {#implementation-details} - -- Reads and writes can be parallel -- Not supported: - - `ALTER` and `SELECT...SAMPLE` operations. - - Indexes. - - Replication. - -**Globs in path** - -Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment). - -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders (could include leading zeros). - -Constructions with `{}` are similar to the [remote](../../query_language/table_functions/remote.md) table function. - -**Example** - -1. Suppose we have several files in TSV format with the following URIs on HDFS: - -- ‘hdfs://hdfs1:9000/some\_dir/some\_file\_1’ -- ‘hdfs://hdfs1:9000/some\_dir/some\_file\_2’ -- ‘hdfs://hdfs1:9000/some\_dir/some\_file\_3’ -- ‘hdfs://hdfs1:9000/another\_dir/some\_file\_1’ -- ‘hdfs://hdfs1:9000/another\_dir/some\_file\_2’ -- ‘hdfs://hdfs1:9000/another\_dir/some\_file\_3’ - -1. There are several ways to make a table consisting of all six files: - - - -``` sql -CREATE TABLE table_with_range (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/some_file_{1..3}', 'TSV') -``` - -Another way: - -``` sql -CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/some_file_?', 'TSV') -``` - -Table consists of all the files in both directories (all files should satisfy format and schema described in query): - -``` sql -CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV') -``` - -!!! warning "Warning" - If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. - -**Example** - -Create table with files named `file000`, `file001`, … , `file999`: - -``` sql -CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV') -``` - -## Virtual Columns {#virtual-columns} - -- `_path` — Path to the file. -- `_file` — Name of the file. - -**See Also** - -- [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) - -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) diff --git a/docs/zh/operations/table_engines/materializedview.md b/docs/zh/operations/table_engines/materializedview.md deleted file mode 100644 index b22700fe3c6..00000000000 --- a/docs/zh/operations/table_engines/materializedview.md +++ /dev/null @@ -1,5 +0,0 @@ -# 物化视图 {#wu-hua-shi-tu} - -物化视图的使用(更多信息请参阅 [CREATE TABLE](../../query_language/create.md) )。它需要使用一个不同的引擎来存储数据,这个引擎要在创建物化视图时指定。当从表中读取时,它就会使用该引擎。 - -[来源文章](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) diff --git a/docs/zh/operations/table_engines/odbc.md b/docs/zh/operations/table_engines/odbc.md deleted file mode 100644 index 69003623e0a..00000000000 --- a/docs/zh/operations/table_engines/odbc.md +++ /dev/null @@ -1,129 +0,0 @@ ---- -en_copy: true ---- - -# ODBC {#table-engine-odbc} - -Allows ClickHouse to connect to external databases via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). - -To safely implement ODBC connections, ClickHouse uses a separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from `clickhouse-server`, driver problems can crash the ClickHouse server. ClickHouse automatically starts `clickhouse-odbc-bridge` when it is required. The ODBC bridge program is installed from the same package as the `clickhouse-server`. - -This engine supports the [Nullable](../../data_types/nullable.md) data type. - -## Creating a Table {#creating-a-table} - -``` sql -CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] -( - name1 [type1], - name2 [type2], - ... -) -ENGINE = ODBC(connection_settings, external_database, external_table) -``` - -See a detailed description of the [CREATE TABLE](../../query_language/create.md#create-table-query) query. - -The table structure can differ from the source table structure: - -- Column names should be the same as in the source table, but you can use just some of these columns and in any order. -- Column types may differ from those in the source table. ClickHouse tries to [cast](../../query_language/functions/type_conversion_functions.md#type_conversion_function-cast) values to the ClickHouse data types. - -**Engine Parameters** - -- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. -- `external_database` — Name of a database in an external DBMS. -- `external_table` — Name of a table in the `external_database`. - -## Usage Example {#usage-example} - -**Retrieving data from the local MySQL installation via ODBC** - -This example is checked for Ubuntu Linux 18.04 and MySQL server 5.7. - -Ensure that unixODBC and MySQL Connector are installed. - -By default (if installed from packages), ClickHouse starts as user `clickhouse`. Thus, you need to create and configure this user in the MySQL server. - -``` bash -$ sudo mysql -``` - -``` sql -mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse'; -mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION; -``` - -Then configure the connection in `/etc/odbc.ini`. - -``` bash -$ cat /etc/odbc.ini -[mysqlconn] -DRIVER = /usr/local/lib/libmyodbc5w.so -SERVER = 127.0.0.1 -PORT = 3306 -DATABASE = test -USERNAME = clickhouse -PASSWORD = clickhouse -``` - -You can check the connection using the `isql` utility from the unixODBC installation. - -``` bash -$ isql -v mysqlconn -+---------------------------------------+ -| Connected! | -| | -... -``` - -Table in MySQL: - -``` text -mysql> CREATE TABLE `test`.`test` ( - -> `int_id` INT NOT NULL AUTO_INCREMENT, - -> `int_nullable` INT NULL DEFAULT NULL, - -> `float` FLOAT NOT NULL, - -> `float_nullable` FLOAT NULL DEFAULT NULL, - -> PRIMARY KEY (`int_id`)); -Query OK, 0 rows affected (0,09 sec) - -mysql> insert into test (`int_id`, `float`) VALUES (1,2); -Query OK, 1 row affected (0,00 sec) - -mysql> select * from test; -+--------+--------------+-------+----------------+ -| int_id | int_nullable | float | float_nullable | -+--------+--------------+-------+----------------+ -| 1 | NULL | 2 | NULL | -+--------+--------------+-------+----------------+ -1 row in set (0,00 sec) -``` - -Table in ClickHouse, retrieving data from the MySQL table: - -``` sql -CREATE TABLE odbc_t -( - `int_id` Int32, - `float_nullable` Nullable(Float32) -) -ENGINE = ODBC('DSN=mysqlconn', 'test', 'test') -``` - -``` sql -SELECT * FROM odbc_t -``` - -``` text -┌─int_id─┬─float_nullable─┐ -│ 1 │ ᴺᵁᴸᴸ │ -└────────┴────────────────┘ -``` - -## See Also {#see-also} - -- [ODBC external dictionaries](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) -- [ODBC table function](../../query_language/table_functions/odbc.md) - -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/odbc/) diff --git a/docs/zh/operations/table_engines/versionedcollapsingmergetree.md b/docs/zh/operations/table_engines/versionedcollapsingmergetree.md deleted file mode 100644 index 29f6d44d748..00000000000 --- a/docs/zh/operations/table_engines/versionedcollapsingmergetree.md +++ /dev/null @@ -1,235 +0,0 @@ ---- -en_copy: true ---- - -# VersionedCollapsingMergeTree {#versionedcollapsingmergetree} - -This engine: - -- Allows quick writing of object states that are continually changing. -- Deletes old object states in the background. This significantly reduces the volume of storage. - -See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details. - -The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion. - -## Creating a Table {#creating-a-table} - -``` sql -CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] -( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], - ... -) ENGINE = VersionedCollapsingMergeTree(sign, version) -[PARTITION BY expr] -[ORDER BY expr] -[SAMPLE BY expr] -[SETTINGS name=value, ...] -``` - -For a description of query parameters, see the [query description](../../query_language/create.md). - -**Engine Parameters** - -``` sql -VersionedCollapsingMergeTree(sign, version) -``` - -- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. - - The column data type should be `Int8`. - -- `version` — Name of the column with the version of the object state. - - The column data type should be `UInt*`. - -**Query Clauses** - -When creating a `VersionedCollapsingMergeTree` table, the same [clauses](mergetree.md) are required as when creating a `MergeTree` table. - -
- -Deprecated Method for Creating a Table - -!!! attention "Attention" - Do not use this method in new projects. If possible, switch the old projects to the method described above. - -``` sql -CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] -( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], - ... -) ENGINE [=] VersionedCollapsingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, sign, version) -``` - -All of the parameters except `sign` and `version` have the same meaning as in `MergeTree`. - -- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. - - Column Data Type — `Int8`. - -- `version` — Name of the column with the version of the object state. - - The column data type should be `UInt*`. - -
- -## Collapsing {#table_engines-versionedcollapsingmergetree} - -### Data {#data} - -Consider a situation where you need to save continually changing data for some object. It is reasonable to have one row for an object and update the row whenever there are changes. However, the update operation is expensive and slow for a DBMS because it requires rewriting the data in the storage. Update is not acceptable if you need to write data quickly, but you can write the changes to an object sequentially as follows. - -Use the `Sign` column when writing the row. If `Sign = 1` it means that the row is a state of an object (let’s call it the “state” row). If `Sign = -1` it indicates the cancellation of the state of an object with the same attributes (let’s call it the “cancel” row). Also use the `Version` column, which should identify each state of an object with a separate number. - -For example, we want to calculate how many pages users visited on some site and how long they were there. At some point in time we write the following row with the state of user activity: - -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ -│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 | -└─────────────────────┴───────────┴──────────┴──────┴─────────┘ -``` - -At some point later we register the change of user activity and write it with the following two rows. - -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ -│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 | -│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 | -└─────────────────────┴───────────┴──────────┴──────┴─────────┘ -``` - -The first row cancels the previous state of the object (user). It should copy all of the fields of the canceled state except `Sign`. - -The second row contains the current state. - -Because we need only the last state of user activity, the rows - -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ -│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 | -│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 | -└─────────────────────┴───────────┴──────────┴──────┴─────────┘ -``` - -can be deleted, collapsing the invalid (old) state of the object. `VersionedCollapsingMergeTree` does this while merging the data parts. - -To find out why we need two rows for each change, see [Algorithm](#table_engines-versionedcollapsingmergetree-algorithm). - -**Notes on Usage** - -1. The program that writes the data should remember the state of an object in order to cancel it. The “cancel” string should be a copy of the “state” string with the opposite `Sign`. This increases the initial size of storage but allows to write the data quickly. -2. Long growing arrays in columns reduce the efficiency of the engine due to the load for writing. The more straightforward the data, the better the efficiency. -3. `SELECT` results depend strongly on the consistency of the history of object changes. Be accurate when preparing data for inserting. You can get unpredictable results with inconsistent data, such as negative values for non-negative metrics like session depth. - -### Algorithm {#table_engines-versionedcollapsingmergetree-algorithm} - -When ClickHouse merges data parts, it deletes each pair of rows that have the same primary key and version and different `Sign`. The order of rows does not matter. - -When ClickHouse inserts data, it orders rows by the primary key. If the `Version` column is not in the primary key, ClickHouse adds it to the primary key implicitly as the last field and uses it for ordering. - -## Selecting Data {#selecting-data} - -ClickHouse doesn’t guarantee that all of the rows with the same primary key will be in the same resulting data part or even on the same physical server. This is true both for writing the data and for subsequent merging of the data parts. In addition, ClickHouse processes `SELECT` queries with multiple threads, and it cannot predict the order of rows in the result. This means that aggregation is required if there is a need to get completely “collapsed” data from a `VersionedCollapsingMergeTree` table. - -To finalize collapsing, write a query with a `GROUP BY` clause and aggregate functions that account for the sign. For example, to calculate quantity, use `sum(Sign)` instead of `count()`. To calculate the sum of something, use `sum(Sign * x)` instead of `sum(x)`, and add `HAVING sum(Sign) > 0`. - -The aggregates `count`, `sum` and `avg` can be calculated this way. The aggregate `uniq` can be calculated if an object has at least one non-collapsed state. The aggregates `min` and `max` can’t be calculated because `VersionedCollapsingMergeTree` does not save the history of values of collapsed states. - -If you need to extract the data with “collapsing” but without aggregation (for example, to check whether rows are present whose newest values match certain conditions), you can use the `FINAL` modifier for the `FROM` clause. This approach is inefficient and should not be used with large tables. - -## Example of Use {#example-of-use} - -Example data: - -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ -│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 | -│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 | -│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 | -└─────────────────────┴───────────┴──────────┴──────┴─────────┘ -``` - -Creating the table: - -``` sql -CREATE TABLE UAct -( - UserID UInt64, - PageViews UInt8, - Duration UInt8, - Sign Int8, - Version UInt8 -) -ENGINE = VersionedCollapsingMergeTree(Sign, Version) -ORDER BY UserID -``` - -Inserting the data: - -``` sql -INSERT INTO UAct VALUES (4324182021466249494, 5, 146, 1, 1) -``` - -``` sql -INSERT INTO UAct VALUES (4324182021466249494, 5, 146, -1, 1),(4324182021466249494, 6, 185, 1, 2) -``` - -We use two `INSERT` queries to create two different data parts. If we insert the data with a single query, ClickHouse creates one data part and will never perform any merge. - -Getting the data: - -``` sql -SELECT * FROM UAct -``` - -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ -│ 4324182021466249494 │ 5 │ 146 │ 1 │ 1 │ -└─────────────────────┴───────────┴──────────┴──────┴─────────┘ -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ -│ 4324182021466249494 │ 5 │ 146 │ -1 │ 1 │ -│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 │ -└─────────────────────┴───────────┴──────────┴──────┴─────────┘ -``` - -What do we see here and where are the collapsed parts? -We created two data parts using two `INSERT` queries. The `SELECT` query was performed in two threads, and the result is a random order of rows. -Collapsing did not occur because the data parts have not been merged yet. ClickHouse merges data parts at an unknown point in time which we cannot predict. - -This is why we need aggregation: - -``` sql -SELECT - UserID, - sum(PageViews * Sign) AS PageViews, - sum(Duration * Sign) AS Duration, - Version -FROM UAct -GROUP BY UserID, Version -HAVING sum(Sign) > 0 -``` - -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Version─┐ -│ 4324182021466249494 │ 6 │ 185 │ 2 │ -└─────────────────────┴───────────┴──────────┴─────────┘ -``` - -If we don’t need aggregation and want to force collapsing, we can use the `FINAL` modifier for the `FROM` clause. - -``` sql -SELECT * FROM UAct FINAL -``` - -``` text -┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┬─Version─┐ -│ 4324182021466249494 │ 6 │ 185 │ 1 │ 2 │ -└─────────────────────┴───────────┴──────────┴──────┴─────────┘ -``` - -This is a very inefficient way to select data. Don’t use it for large tables. - -[Original article](https://clickhouse.tech/docs/en/operations/table_engines/versionedcollapsingmergetree/) diff --git a/docs/zh/operations/tips.md b/docs/zh/operations/tips.md index 8980d74e8b2..05509399d85 100644 --- a/docs/zh/operations/tips.md +++ b/docs/zh/operations/tips.md @@ -1,126 +1,127 @@ -# Usage Recommendations {#usage-recommendations} + +# 使用建议 {#usage-recommendations} ## CPU {#cpu} -The SSE 4.2 instruction set must be supported. Modern processors (since 2008) support it. +必须支持SSE4.2指令集。 现代处理器(自2008年以来)支持它。 -When choosing a processor, prefer a large number of cores and slightly slower clock rate over fewer cores and a higher clock rate. -For example, 16 cores with 2600 MHz is better than 8 cores with 3600 MHz. +选择处理器时,与较少的内核和较高的时钟速率相比,更喜欢大量内核和稍慢的时钟速率。 +例如,具有2600MHz的16核心比具有3600MHz的8核心更好。 -## Hyper-threading {#hyper-threading} +## 超线程 {#hyper-threading} -Don’t disable hyper-threading. It helps for some queries, but not for others. +不要禁用超线程。 它有助于某些查询,但不适用于其他查询。 -## Turbo Boost {#turbo-boost} +## 涡轮增压 {#turbo-boost} -Turbo Boost is highly recommended. It significantly improves performance with a typical load. -You can use `turbostat` to view the CPU’s actual clock rate under a load. +强烈推荐涡轮增压。 它显着提高了典型负载的性能。 +您可以使用 `turbostat` 要查看负载下的CPU的实际时钟速率。 -## CPU Scaling Governor {#cpu-scaling-governor} +## CPU缩放调控器 {#cpu-scaling-governor} -Always use the `performance` scaling governor. The `on-demand` scaling governor works much worse with constantly high demand. +始终使用 `performance` 缩放调控器。 该 `on-demand` 随着需求的不断增加,缩放调节器的工作要糟糕得多。 ``` bash echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ``` -## CPU Limitations {#cpu-limitations} +## CPU限制 {#cpu-limitations} -Processors can overheat. Use `dmesg` to see if the CPU’s clock rate was limited due to overheating. -The restriction can also be set externally at the datacenter level. You can use `turbostat` to monitor it under a load. +处理器可能会过热。 使用 `dmesg` 看看CPU的时钟速率是否由于过热而受到限制。 +此限制也可以在数据中心级别的外部设置。 您可以使用 `turbostat` 在负载下监视它。 ## RAM {#ram} -For small amounts of data (up to ~200 GB compressed), it is best to use as much memory as the volume of data. -For large amounts of data and when processing interactive (online) queries, you should use a reasonable amount of RAM (128 GB or more) so the hot data subset will fit in the cache of pages. -Even for data volumes of ~50 TB per server, using 128 GB of RAM significantly improves query performance compared to 64 GB. +对于少量数据(高达-200GB压缩),最好使用与数据量一样多的内存。 +对于大量数据和处理交互式(在线)查询时,应使用合理数量的RAM(128GB或更多),以便热数据子集适合页面缓存。 +即使对于每台服务器约50TB的数据量,使用128GB的RAM与64GB相比显着提高了查询性能。 -## Swap File {#swap-file} +## 交换文件 {#swap-file} -Always disable the swap file. The only reason for not doing this is if you are using ClickHouse on your personal laptop. +始终禁用交换文件。 不这样做的唯一原因是,如果您使用的ClickHouse在您的个人笔记本电脑。 -## Huge Pages {#huge-pages} +## 巨大的页面 {#huge-pages} -Always disable transparent huge pages. It interferes with memory allocators, which leads to significant performance degradation. +始终禁用透明巨大的页面。 它会干扰内存分alloc,从而导致显着的性能下降。 ``` bash echo 'never' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled ``` -Use `perf top` to watch the time spent in the kernel for memory management. -Permanent huge pages also do not need to be allocated. +使用 `perf top` 观看内核中用于内存管理的时间。 +永久巨大的页面也不需要被分配。 -## Storage Subsystem {#storage-subsystem} +## 存储子系统 {#storage-subsystem} -If your budget allows you to use SSD, use SSD. -If not, use HDD. SATA HDDs 7200 RPM will do. +如果您的预算允许您使用SSD,请使用SSD。 +如果没有,请使用硬盘。 SATA硬盘7200转就行了。 -Give preference to a lot of servers with local hard drives over a smaller number of servers with attached disk shelves. -But for storing archives with rare queries, shelves will work. +优先选择带有本地硬盘驱动器的大量服务器,而不是带有附加磁盘架的小量服务器。 +但是对于存储具有罕见查询的档案,货架将起作用。 ## RAID {#raid} -When using HDD, you can combine their RAID-10, RAID-5, RAID-6 or RAID-50. -For Linux, software RAID is better (with `mdadm`). We don’t recommend using LVM. -When creating RAID-10, select the `far` layout. -If your budget allows, choose RAID-10. +当使用硬盘,你可以结合他们的RAID-10,RAID-5,RAID-6或RAID-50。 +对于Linux,软件RAID更好(与 `mdadm`). 我们不建议使用LVM。 +当创建RAID-10,选择 `far` 布局。 +如果您的预算允许,请选择RAID-10。 -If you have more than 4 disks, use RAID-6 (preferred) or RAID-50, instead of RAID-5. -When using RAID-5, RAID-6 or RAID-50, always increase stripe\_cache\_size, since the default value is usually not the best choice. +如果您有超过4个磁盘,请使用RAID-6(首选)或RAID-50,而不是RAID-5。 +当使用RAID-5、RAID-6或RAID-50时,始终增加stripe\_cache\_size,因为默认值通常不是最佳选择。 ``` bash echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size ``` -Calculate the exact number from the number of devices and the block size, using the formula: `2 * num_devices * chunk_size_in_bytes / 4096`. +使用以下公式,从设备数量和块大小计算确切数量: `2 * num_devices * chunk_size_in_bytes / 4096`. -A block size of 1025 KB is sufficient for all RAID configurations. -Never set the block size too small or too large. +1025KB的块大小足以满足所有RAID配置。 +切勿将块大小设置得太小或太大。 -You can use RAID-0 on SSD. -Regardless of RAID use, always use replication for data security. +您可以在SSD上使用RAID-0。 +无论使用何种RAID,始终使用复制来保证数据安全。 -Enable NCQ with a long queue. For HDD, choose the CFQ scheduler, and for SSD, choose noop. Don’t reduce the ‘readahead’ setting. -For HDD, enable the write cache. +使用长队列启用NCQ。 对于HDD,选择CFQ调度程序,对于SSD,选择noop。 不要减少 ‘readahead’ 设置。 +对于HDD,启用写入缓存。 -## File System {#file-system} +## 文件系统 {#file-system} -Ext4 is the most reliable option. Set the mount options `noatime, nobarrier`. -XFS is also suitable, but it hasn’t been as thoroughly tested with ClickHouse. -Most other file systems should also work fine. File systems with delayed allocation work better. +Ext4是最可靠的选择。 设置挂载选项 `noatime, nobarrier`. +XFS也是合适的,但它还没有经过ClickHouse的彻底测试。 +大多数其他文件系统也应该正常工作。 具有延迟分配的文件系统工作得更好。 -## Linux Kernel {#linux-kernel} +## Linux内核 {#linux-kernel} -Don’t use an outdated Linux kernel. +不要使用过时的Linux内核。 -## Network {#network} +## 网络 {#network} -If you are using IPv6, increase the size of the route cache. -The Linux kernel prior to 3.2 had a multitude of problems with IPv6 implementation. +如果您使用的是IPv6,请增加路由缓存的大小。 +3.2之前的Linux内核在IPv6实现方面遇到了许多问题。 -Use at least a 10 GB network, if possible. 1 Gb will also work, but it will be much worse for patching replicas with tens of terabytes of data, or for processing distributed queries with a large amount of intermediate data. +如果可能的话,至少使用一个10GB的网络。 1Gb也可以工作,但对于使用数十tb的数据修补副本或处理具有大量中间数据的分布式查询,情况会更糟。 -## ZooKeeper {#zookeeper} +## 动物园管理员 {#zookeeper} -You are probably already using ZooKeeper for other purposes. You can use the same installation of ZooKeeper, if it isn’t already overloaded. +您可能已经将ZooKeeper用于其他目的。 您可以使用相同的zookeeper安装,如果它还没有超载。 -It’s best to use a fresh version of ZooKeeper – 3.4.9 or later. The version in stable Linux distributions may be outdated. +It's best to use a fresh version of ZooKeeper – 3.4.9 or later. The version in stable Linux distributions may be outdated. You should never use manually written scripts to transfer data between different ZooKeeper clusters, because the result will be incorrect for sequential nodes. Never use the «zkcopy» utility for the same reason: https://github.com/ksprojects/zkcopy/issues/15 -If you want to divide an existing ZooKeeper cluster into two, the correct way is to increase the number of its replicas and then reconfigure it as two independent clusters. +如果要将现有ZooKeeper集群分为两个,正确的方法是增加其副本的数量,然后将其重新配置为两个独立的集群。 -Do not run ZooKeeper on the same servers as ClickHouse. Because ZooKeeper is very sensitive for latency and ClickHouse may utilize all available system resources. +不要在与ClickHouse相同的服务器上运行ZooKeeper。 由于ZooKeeper对延迟非常敏感,ClickHouse可能会利用所有可用的系统资源。 -With the default settings, ZooKeeper is a time bomb: +使用默认设置,ZooKeeper是一个定时炸弹: -> The ZooKeeper server won’t delete files from old snapshots and logs when using the default configuration (see autopurge), and this is the responsibility of the operator. +> 使用默认配置时,ZooKeeper服务器不会从旧快照和日志中删除文件(请参阅autopurge),这是操作员的责任。 -This bomb must be defused. +必须拆除炸弹 -The ZooKeeper (3.5.1) configuration below is used in the Yandex.Metrica production environment as of May 20, 2017: +下面的ZooKeeper(3.5.1)配置在Yandex中使用。梅地卡生产环境截至2017年5月20日: -zoo.cfg: +动物园cfg: ``` bash # http://hadoop.apache.org/zookeeper/docs/current/zookeeperAdmin.html @@ -176,12 +177,12 @@ standaloneEnabled=false dynamicConfigFile=/etc/zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }}/conf/zoo.cfg.dynamic ``` -Java version: +Java版本: Java(TM) SE Runtime Environment (build 1.8.0_25-b17) Java HotSpot(TM) 64-Bit Server VM (build 25.25-b02, mixed mode) -JVM parameters: +JVM参数: ``` bash NAME=zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} @@ -222,7 +223,7 @@ JAVA_OPTS="-Xms{{ '{{' }} cluster.get('xms','128M') {{ '}}' }} \ -XX:+CMSParallelRemarkEnabled" ``` -Salt init: +盐初始化: description "zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} centralized coordination service" @@ -251,4 +252,4 @@ Salt init: -Dzookeeper.root.logger=${ZOO_LOG4J_PROP} $ZOOMAIN $ZOOCFG end script -[Original article](https://clickhouse.tech/docs/en/operations/tips/) +[原始文章](https://clickhouse.tech/docs/en/operations/tips/) diff --git a/docs/zh/operations/troubleshooting.md b/docs/zh/operations/troubleshooting.md index d48e2b4b7f6..db7bf6c6bb9 100644 --- a/docs/zh/operations/troubleshooting.md +++ b/docs/zh/operations/troubleshooting.md @@ -1,66 +1,69 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 46 +toc_title: "\u7591\u96BE\u89E3\u7B54" --- -# Troubleshooting {#troubleshooting} +# 疑难解答 {#troubleshooting} -- [Installation](#troubleshooting-installation-errors) -- [Connecting to the server](#troubleshooting-accepts-no-connections) -- [Query processing](#troubleshooting-does-not-process-queries) -- [Efficiency of query processing](#troubleshooting-too-slow) +- [安装方式](#troubleshooting-installation-errors) +- [连接到服务器](#troubleshooting-accepts-no-connections) +- [查询处理](#troubleshooting-does-not-process-queries) +- [查询处理效率](#troubleshooting-too-slow) -## Installation {#troubleshooting-installation-errors} +## 安装方式 {#troubleshooting-installation-errors} -### You Cannot Get Deb Packages from ClickHouse Repository With apt-get {#you-cannot-get-deb-packages-from-clickhouse-repository-with-apt-get} +### 您无法使用Apt-get从ClickHouse存储库获取Deb软件包 {#you-cannot-get-deb-packages-from-clickhouse-repository-with-apt-get} -- Check firewall settings. -- If you cannot access the repository for any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. +- 检查防火墙设置。 +- 如果出于任何原因无法访问存储库,请按照以下文件中的描述下载软件包 [开始](../getting_started/index.md) 文章并使用手动安装它们 `sudo dpkg -i ` 指挥部 您还需要 `tzdata` 包。 -## Connecting to the Server {#troubleshooting-accepts-no-connections} +## 连接到服务器 {#troubleshooting-accepts-no-connections} -Possible issues: +可能出现的问题: -- The server is not running. -- Unexpected or wrong configuration parameters. +- 服务器未运行。 +- 意外或错误的配置参数。 -### Server Is Not Running {#server-is-not-running} +### 服务器未运行 {#server-is-not-running} -**Check if server is runnnig** +**检查服务器是否运行nnig** -Command: +命令: ``` bash $ sudo service clickhouse-server status ``` -If the server is not running, start it with the command: +如果服务器没有运行,请使用以下命令启动它: ``` bash $ sudo service clickhouse-server start ``` -**Check logs** +**检查日志** -The main log of `clickhouse-server` is in `/var/log/clickhouse-server/clickhouse-server.log` by default. +主日志 `clickhouse-server` 是在 `/var/log/clickhouse-server/clickhouse-server.log` 默认情况下。 -If the server started successfully, you should see the strings: +如果服务器成功启动,您应该看到字符串: - ` Application: starting up.` — Server started. - ` Application: Ready for connections.` — Server is running and ready for connections. -If `clickhouse-server` start failed with a configuration error, you should see the `` string with an error description. For example: +如果 `clickhouse-server` 启动失败与配置错误,你应该看到 `` 具有错误描述的字符串。 例如: ``` text 2019.01.11 15:23:25.549505 [ 45 ] {} ExternalDictionaries: Failed reloading 'event2id' external dictionary: Poco::Exception. Code: 1000, e.code() = 111, e.displayText() = Connection refused, e.what() = Connection refused ``` -If you don’t see an error at the end of the file, look through the entire file starting from the string: +如果在文件末尾没有看到错误,请从字符串开始查看整个文件: ``` text Application: starting up. ``` -If you try to start a second instance of `clickhouse-server` on the server, you see the following log: +如果您尝试启动第二个实例 `clickhouse-server` 在服务器上,您将看到以下日志: ``` text 2019.01.11 15:25:11.151730 [ 1 ] {} : Starting ClickHouse 19.1.0 with revision 54413 @@ -76,68 +79,68 @@ Revision: 54413 2019.01.11 15:25:11.156716 [ 2 ] {} BaseDaemon: Stop SignalListener thread ``` -**See system.d logs** +**请参阅系统。d日志** -If you don’t find any useful information in `clickhouse-server` logs or there aren’t any logs, you can view `system.d` logs using the command: +如果你没有找到任何有用的信息 `clickhouse-server` 日志或没有任何日志,您可以查看 `system.d` 使用命令记录: ``` bash $ sudo journalctl -u clickhouse-server ``` -**Start clickhouse-server in interactive mode** +**在交互模式下启动clickhouse服务器** ``` bash $ sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-server/config.xml ``` -This command starts the server as an interactive app with standard parameters of the autostart script. In this mode `clickhouse-server` prints all the event messages in the console. +此命令将服务器作为带有自动启动脚本标准参数的交互式应用程序启动。 在这种模式下 `clickhouse-server` 打印控制台中的所有事件消息。 -### Configuration Parameters {#configuration-parameters} +### 配置参数 {#configuration-parameters} -Check: +检查: -- Docker settings. +- 码头工人设置。 - If you run ClickHouse in Docker in an IPv6 network, make sure that `network=host` is set. + 如果您在IPv6网络中的Docker中运行ClickHouse,请确保 `network=host` 已设置。 -- Endpoint settings. +- 端点设置。 - Check [listen\_host](server_settings/settings.md#server_settings-listen_host) and [tcp\_port](server_settings/settings.md#server_settings-tcp_port) settings. + 检查 [listen\_host](server_configuration_parameters/settings.md#server_configuration_parameters-listen_host) 和 [tcp\_port](server_configuration_parameters/settings.md#server_configuration_parameters-tcp_port) 设置。 - ClickHouse server accepts localhost connections only by default. + ClickHouse服务器默认情况下仅接受本地主机连接。 -- HTTP protocol settings. +- HTTP协议设置。 - Check protocol settings for the HTTP API. + 检查HTTP API的协议设置。 -- Secure connection settings. +- 安全连接设置。 - Check: + 检查: - - The [tcp\_port\_secure](server_settings/settings.md#server_settings-tcp_port_secure) setting. - - Settings for [SSL sertificates](server_settings/settings.md#server_settings-openssl). + - 该 [tcp\_port\_secure](server_configuration_parameters/settings.md#server_configuration_parameters-tcp_port_secure) 设置。 + - 设置 [SSL序列](server_configuration_parameters/settings.md#server_configuration_parameters-openssl). - Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`. + 连接时使用正确的参数。 例如,使用 `port_secure` 参数 `clickhouse_client`. -- User settings. +- 用户设置。 - You might be using the wrong user name or password. + 您可能使用了错误的用户名或密码。 -## Query Processing {#troubleshooting-does-not-process-queries} +## 查询处理 {#troubleshooting-does-not-process-queries} -If ClickHouse is not able to process the query, it sends an error description to the client. In the `clickhouse-client` you get a description of the error in the console. If you are using the HTTP interface, ClickHouse sends the error description in the response body. For example: +如果ClickHouse无法处理查询,它会向客户端发送错误描述。 在 `clickhouse-client` 您可以在控制台中获得错误的描述。 如果您使用的是HTTP接口,ClickHouse会在响应正文中发送错误描述。 例如: ``` bash $ curl 'http://localhost:8123/' --data-binary "SELECT a" Code: 47, e.displayText() = DB::Exception: Unknown identifier: a. Note that there are no tables (FROM clause) in your query, context: required_names: 'a' source_tables: table_aliases: private_aliases: column_aliases: public_columns: 'a' masked_columns: array_join_columns: source_columns: , e.what() = DB::Exception ``` -If you start `clickhouse-client` with the `stack-trace` parameter, ClickHouse returns the server stack trace with the description of an error. +如果你开始 `clickhouse-client` 与 `stack-trace` 参数,ClickHouse返回包含错误描述的服务器堆栈跟踪。 -You might see a message about a broken connection. In this case, you can repeat the query. If the connection breaks every time you perform the query, check the server logs for errors. +您可能会看到一条关于连接中断的消息。 在这种情况下,可以重复查询。 如果每次执行查询时连接中断,请检查服务器日志中是否存在错误。 -## Efficiency of Query Processing {#troubleshooting-too-slow} +## 查询处理效率 {#troubleshooting-too-slow} -If you see that ClickHouse is working too slowly, you need to profile the load on the server resources and network for your queries. +如果您发现ClickHouse工作速度太慢,则需要为查询分析服务器资源和网络的负载。 -You can use the clickhouse-benchmark utility to profile queries. It shows the number of queries processed per second, the number of rows processed per second, and percentiles of query processing times. +您可以使用clickhouse-benchmark实用程序来分析查询。 它显示每秒处理的查询数、每秒处理的行数以及查询处理时间的百分位数。 diff --git a/docs/zh/operations/update.md b/docs/zh/operations/update.md index b09eb707e77..a465a8110eb 100644 --- a/docs/zh/operations/update.md +++ b/docs/zh/operations/update.md @@ -1,10 +1,13 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 47 +toc_title: "\u70B9\u51FB\u66F4\u65B0" --- -# ClickHouse Update {#clickhouse-update} +# 点击更新 {#clickhouse-update} -If ClickHouse was installed from deb packages, execute the following commands on the server: +如果从deb包安装ClickHouse,请在服务器上执行以下命令: ``` bash $ sudo apt-get update @@ -12,6 +15,6 @@ $ sudo apt-get install clickhouse-client clickhouse-server $ sudo service clickhouse-server restart ``` -If you installed ClickHouse using something other than the recommended deb packages, use the appropriate update method. +如果您使用除推荐的deb包之外的其他内容安装ClickHouse,请使用适当的更新方法。 -ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time. +ClickHouse不支持分布式更新。 该操作应在每个单独的服务器上连续执行。 不要同时更新群集上的所有服务器,否则群集将在一段时间内不可用。 diff --git a/docs/zh/operations/utils/clickhouse-benchmark.md b/docs/zh/operations/utilities/clickhouse-benchmark.md similarity index 50% rename from docs/zh/operations/utils/clickhouse-benchmark.md rename to docs/zh/operations/utilities/clickhouse-benchmark.md index 1d8ac3dec46..809f4ebe2a1 100644 --- a/docs/zh/operations/utils/clickhouse-benchmark.md +++ b/docs/zh/operations/utilities/clickhouse-benchmark.md @@ -1,63 +1,66 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 61 +toc_title: "\uFF82\u6697\uFF6A\uFF82\u6C3E\u73AF\u50AC\uFF82\u56E3" --- -# clickhouse-benchmark {#clickhouse-benchmark} +# ツ暗ェツ氾环催ツ団 {#clickhouse-benchmark} -Connects to a ClickHouse server and repeatedly sends specified queries. +连接到ClickHouse服务器并重复发送指定的查询。 -Syntax: +语法: ``` bash $ echo "single query" | clickhouse-benchmark [keys] ``` -or +或 ``` bash $ clickhouse-benchmark [keys] <<< "single query" ``` -If you want to send a set of queries, create a text file and place each query on the individual string in this file. For example: +如果要发送一组查询,请创建一个文本文件,并将每个查询放在此文件中的单个字符串上。 例如: ``` sql SELECT * FROM system.numbers LIMIT 10000000 SELECT 1 ``` -Then pass this file to a standard input of `clickhouse-benchmark`. +然后将此文件传递给标准输入 `clickhouse-benchmark`. ``` bash clickhouse-benchmark [keys] < queries_file ``` -## Keys {#clickhouse-benchmark-keys} +## 键 {#clickhouse-benchmark-keys} -- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. +- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` 同时发送。 默认值:1。 - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. -- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. -- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. +- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. 为 [比较模式](#clickhouse-benchmark-comparison-mode) 您可以使用多个 `-h` 钥匙 +- `-p N`, `--port=N` — Server port. Default value: 9000. For the [比较模式](#clickhouse-benchmark-comparison-mode) 您可以使用多个 `-p` 钥匙 - `-i N`, `--iterations=N` — Total number of queries. Default value: 0. - `-r`, `--randomize` — Random order of queries execution if there is more then one input query. - `-s`, `--secure` — Using TLS connection. -- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). -- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) test to determine whether the two distributions aren’t different with the selected level of confidence. +- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` 达到指定的时间限制时停止发送查询。 默认值:0(禁用时间限制)。 +- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [比较模式](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` 执行 [独立双样本学生的t测试](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) 测试以确定两个分布是否与所选置信水平没有不同。 - `--cumulative` — Printing cumulative data instead of data per interval. - `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. -- `--json=FILEPATH` — JSON output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. +- `--json=FILEPATH` — JSON output. When the key is set, `clickhouse-benchmark` 将报告输出到指定的JSON文件。 - `--user=USERNAME` — ClickHouse user name. Default value: `default`. - `--password=PSWD` — ClickHouse user password. Default value: empty string. -- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. -- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`. +- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` 输出异常的堆栈跟踪。 +- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` 在指定的阶段。 可能的值: `complete`, `fetch_columns`, `with_mergeable_state`. 默认值: `complete`. - `--help` — Shows the help message. -If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--= SETTING_VALUE`. For example, `--max_memory_usage=1048576`. +如果你想申请一些 [设置](../../operations/settings/index.md) 对于查询,请将它们作为键传递 `--= SETTING_VALUE`. 例如, `--max_memory_usage=1048576`. -## Output {#clickhouse-benchmark-output} +## 输出 {#clickhouse-benchmark-output} -By default, `clickhouse-benchmark` reports for each `--delay` interval. +默认情况下, `clickhouse-benchmark` 每个报表 `--delay` 间隔。 -Example of the report: +报告示例: ``` text Queries executed: 10. @@ -80,29 +83,29 @@ localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, resul 99.990% 0.150 sec. ``` -In the report you can find: +在报告中,您可以找到: -- Number of queries in the `Queries executed:` field. +- 在查询的数量 `Queries executed:` 场。 -- Status string containing (in order): +- 状态字符串包含(按顺序): - - Endpoint of ClickHouse server. - - Number of processed queries. - - QPS: QPS: How many queries server performed per second during a period specified in the `--delay` argument. - - RPS: How many rows server read per second during a period specified in the `--delay` argument. - - MiB/s: How many mebibytes server read per second during a period specified in the `--delay` argument. - - result RPS: How many rows placed by server to the result of a query per second during a period specified in the `--delay` argument. - - result MiB/s. How many mebibytes placed by server to the result of a query per second during a period specified in the `--delay` argument. + - ClickHouse服务器的端点。 + - 已处理的查询数。 + - QPS:QPS:在指定的时间段内每秒执行多少个查询服务器 `--delay` 争论。 + - RPS:在指定的时间段内,服务器每秒读取多少行 `--delay` 争论。 + - MiB/s:在指定的时间段内每秒读取多少mebibytes服务器 `--delay` 争论。 + - 结果RPS:在指定的时间段内,服务器每秒放置到查询结果的行数 `--delay` 争论。 + - 结果MiB/s.在指定的时间段内,服务器每秒将多少mebibytes放置到查询结果中 `--delay` 争论。 -- Percentiles of queries execution time. +- 查询执行时间的百分位数。 -## Comparison mode {#clickhouse-benchmark-comparison-mode} +## 比较模式 {#clickhouse-benchmark-comparison-mode} -`clickhouse-benchmark` can compare performances for two running ClickHouse servers. +`clickhouse-benchmark` 可以比较两个正在运行的ClickHouse服务器的性能。 -To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown for each server separately. +要使用比较模式,请通过以下两对指定两个服务器的端点 `--host`, `--port` 钥匙 键在参数列表中的位置匹配在一起,第一 `--host` 与第一匹配 `--port` 等等。 `clickhouse-benchmark` 建立到两个服务器的连接,然后发送查询。 每个查询寻址到随机选择的服务器。 每个服务器的结果分别显示。 -## Example {#clickhouse-benchmark-example} +## 示例 {#clickhouse-benchmark-example} ``` bash $ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 diff --git a/docs/zh/operations/utils/clickhouse-copier.md b/docs/zh/operations/utilities/clickhouse-copier.md similarity index 76% rename from docs/zh/operations/utils/clickhouse-copier.md rename to docs/zh/operations/utilities/clickhouse-copier.md index 1a1b8599dba..9e982188499 100644 --- a/docs/zh/operations/utils/clickhouse-copier.md +++ b/docs/zh/operations/utilities/clickhouse-copier.md @@ -1,40 +1,41 @@ -# clickhouse-copier {#clickhouse-copier} -Copies data from the tables in one cluster to tables in another (or the same) cluster. +# ツ环板-ョツ嘉ッツ偲 {#clickhouse-copier} -You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ZooKeeper is used for syncing the processes. +将数据从一个群集中的表复制到另一个(或相同)群集中的表。 -After starting, `clickhouse-copier`: +您可以运行多个 `clickhouse-copier` 不同服务器上的实例执行相同的作业。 ZooKeeper用于同步进程。 -- Connects to ZooKeeper and receives: +开始后, `clickhouse-copier`: - - Copying jobs. - - The state of the copying jobs. +- 连接到动物园管理员和接收: -- It performs the jobs. + - 复制作业。 + - 复制作业的状态。 + +- 它执行的工作。 Each running process chooses the "closest" shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary. -`clickhouse-copier` tracks the changes in ZooKeeper and applies them on the fly. +`clickhouse-copier` 跟踪ZooKeeper中的更改,并实时应用它们。 -To reduce network traffic, we recommend running `clickhouse-copier` on the same server where the source data is located. +为了减少网络流量,我们建议运行 `clickhouse-copier` 在源数据所在的同一服务器上。 -## Running clickhouse-copier {#running-clickhouse-copier} +## ツ暗ェツ氾环催ツ団ツ法ツ人 {#running-clickhouse-copier} -The utility should be run manually: +该实用程序应手动运行: ``` bash clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir ``` -Parameters: +参数: -- `daemon` — Starts `clickhouse-copier` in daemon mode. -- `config` — The path to the `zookeeper.xml` file with the parameters for the connection to ZooKeeper. -- `task-path` — The path to the ZooKeeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. -- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. +- `daemon` — Starts `clickhouse-copier` 在守护进程模式。 +- `config` — The path to the `zookeeper.xml` 带有连接到ZooKeeper的参数的文件。 +- `task-path` — The path to the ZooKeeper node. This node is used for syncing `clickhouse-copier` 处理和存储任务。 任务存储在 `$task-path/description`. +- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` 创建 `clickhouse-copier_YYYYMMHHSS_` 子目录 `$base-dir`. 如果省略此参数,则在以下目录中创建目录 `clickhouse-copier` 被推出。 -## Format of zookeeper.xml {#format-of-zookeeper-xml} +## 动物园管理员的格式。xml {#format-of-zookeeper-xml} ``` xml @@ -53,7 +54,7 @@ Parameters: ``` -## Configuration of copying tasks {#configuration-of-copying-tasks} +## 复制任务的配置 {#configuration-of-copying-tasks} ``` xml @@ -162,6 +163,6 @@ Parameters: ``` -`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change. +`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如,如果你改变的值 `max_workers`,运行任务的进程数也会发生变化。 -[Original article](https://clickhouse.tech/docs/en/operations/utils/clickhouse-copier/) +[原始文章](https://clickhouse.tech/docs/en/operations/utils/clickhouse-copier/) diff --git a/docs/zh/operations/utils/clickhouse-local.md b/docs/zh/operations/utilities/clickhouse-local.md similarity index 55% rename from docs/zh/operations/utils/clickhouse-local.md rename to docs/zh/operations/utilities/clickhouse-local.md index 159e914f446..e29d8f6c4ac 100644 --- a/docs/zh/operations/utils/clickhouse-local.md +++ b/docs/zh/operations/utilities/clickhouse-local.md @@ -1,41 +1,41 @@ -# clickhouse-local {#clickhouse-local} +# ツ环板-ョツ嘉ッツ偲 {#clickhouse-local} -The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. +该 `clickhouse-local` 程序使您能够对本地文件执行快速处理,而无需部署和配置ClickHouse服务器。 -Accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../query_language/index.md). +接受表示表的数据并使用以下方式查询它们 [ツ环板ECTョツ嘉ッツ偲](../../operations/utilities/clickhouse-local.md). -`clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines. +`clickhouse-local` 使用与ClickHouse server相同的核心,因此它支持大多数功能以及相同的格式和表引擎。 -By default `clickhouse-local` does not have access to data on the same host, but it supports loading server configuration using `--config-file` argument. +默认情况下 `clickhouse-local` 不能访问同一主机上的数据,但它支持使用以下方式加载服务器配置 `--config-file` 争论。 -!!! warning "Warning" - It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error. +!!! warning "警告" + 不建议将生产服务器配置加载到 `clickhouse-local` 因为数据可以在人为错误的情况下被损坏。 -## Usage {#usage} +## 用途 {#usage} -Basic usage: +基本用法: ``` bash clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" -q "query" ``` -Arguments: +参数: - `-S`, `--structure` — table structure for input data. -- `-if`, `--input-format` — input format, `TSV` by default. -- `-f`, `--file` — path to data, `stdin` by default. -- `-q` `--query` — queries to execute with `;` as delimeter. -- `-N`, `--table` — table name where to put output data, `table` by default. -- `-of`, `--format`, `--output-format` — output format, `TSV` by default. +- `-if`, `--input-format` — input format, `TSV` 默认情况下。 +- `-f`, `--file` — path to data, `stdin` 默认情况下。 +- `-q` `--query` — queries to execute with `;` 如delimeter。 +- `-N`, `--table` — table name where to put output data, `table` 默认情况下。 +- `-of`, `--format`, `--output-format` — output format, `TSV` 默认情况下。 - `--stacktrace` — whether to dump debug output in case of exception. - `--verbose` — more details on query execution. -- `-s` — disables `stderr` logging. +- `-s` — disables `stderr` 记录。 - `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty. - `--help` — arguments references for `clickhouse-local`. -Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`. +还有每个ClickHouse配置变量的参数,这些变量更常用,而不是 `--config-file`. -## Examples {#examples} +## 例 {#examples} ``` bash echo -e "1,2\n3,4" | clickhouse-local -S "a Int64, b Int64" -if "CSV" -q "SELECT * FROM table" @@ -44,7 +44,7 @@ Read 2 rows, 32.00 B in 0.000 sec., 5182 rows/sec., 80.97 KiB/sec. 3 4 ``` -Previous example is the same as: +前面的例子是一样的: ``` bash $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64) ENGINE = File(CSV, stdin); SELECT a, b FROM table; DROP TABLE table" @@ -53,7 +53,7 @@ Read 2 rows, 32.00 B in 0.000 sec., 4987 rows/sec., 77.93 KiB/sec. 3 4 ``` -Now let’s output memory user for each Unix user: +现在让我们为每个Unix用户输出内存用户: ``` bash $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' | clickhouse-local -S "user String, mem Float64" -q "SELECT user, round(sum(mem), 2) as memTotal FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" @@ -68,4 +68,4 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. ... ``` -[Original article](https://clickhouse.tech/docs/en/operations/utils/clickhouse-local/) +[原始文章](https://clickhouse.tech/docs/en/operations/utils/clickhouse-local/) diff --git a/docs/zh/operations/utilities/index.md b/docs/zh/operations/utilities/index.md new file mode 100644 index 00000000000..8d70ef4a6bb --- /dev/null +++ b/docs/zh/operations/utilities/index.md @@ -0,0 +1,8 @@ + +# ツ环板Utilityョツ嘉ッ {#clickhouse-utility} + +- [ツ环板-ョツ嘉ッツ偲](clickhouse-local.md) — Allows running SQL queries on data without stopping the ClickHouse server, similar to how `awk` 做到这一点。 +- [ツ环板-ョツ嘉ッツ偲](clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. +- [ツ暗ェツ氾环催ツ団](clickhouse-benchmark.md) — Loads server with the custom queries and settings. + +[原始文章](https://clickhouse.tech/docs/en/operations/utils/) diff --git a/docs/zh/operations/utils/index.md b/docs/zh/operations/utils/index.md deleted file mode 100644 index ebc1396d031..00000000000 --- a/docs/zh/operations/utils/index.md +++ /dev/null @@ -1,7 +0,0 @@ -# ClickHouse Utility {#clickhouse-utility} - -- [clickhouse-local](clickhouse-local.md) — Allows running SQL queries on data without stopping the ClickHouse server, similar to how `awk` does this. -- [clickhouse-copier](clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. -- [clickhouse-benchmark](clickhouse-benchmark.md) — Loads server with the custom queries and settings. - -[Original article](https://clickhouse.tech/docs/en/operations/utils/) diff --git a/docs/zh/query_language/agg_functions/combinators.md b/docs/zh/query_language/agg_functions/combinators.md deleted file mode 100644 index a173e56fbea..00000000000 --- a/docs/zh/query_language/agg_functions/combinators.md +++ /dev/null @@ -1,163 +0,0 @@ ---- -en_copy: true ---- - -# Aggregate function combinators {#aggregate_functions_combinators} - -The name of an aggregate function can have a suffix appended to it. This changes the way the aggregate function works. - -## -If {#agg-functions-combinator-if} - -The suffix -If can be appended to the name of any aggregate function. In this case, the aggregate function accepts an extra argument – a condition (Uint8 type). The aggregate function processes only the rows that trigger the condition. If the condition was not triggered even once, it returns a default value (usually zeros or empty strings). - -Examples: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTimingIf(level1, level2)(x, cond)`, `argMinIf(arg, val, cond)` and so on. - -With conditional aggregate functions, you can calculate aggregates for several conditions at once, without using subqueries and `JOIN`s. For example, in Yandex.Metrica, conditional aggregate functions are used to implement the segment comparison functionality. - -## -Array {#agg-functions-combinator-array} - -The -Array suffix can be appended to any aggregate function. In this case, the aggregate function takes arguments of the ‘Array(T)’ type (arrays) instead of ‘T’ type arguments. If the aggregate function accepts multiple arguments, this must be arrays of equal lengths. When processing arrays, the aggregate function works like the original aggregate function across all array elements. - -Example 1: `sumArray(arr)` - Totals all the elements of all ‘arr’ arrays. In this example, it could have been written more simply: `sum(arraySum(arr))`. - -Example 2: `uniqArray(arr)` – Counts the number of unique elements in all ‘arr’ arrays. This could be done an easier way: `uniq(arrayJoin(arr))`, but it’s not always possible to add ‘arrayJoin’ to a query. - --If and -Array can be combined. However, ‘Array’ must come first, then ‘If’. Examples: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. Due to this order, the ‘cond’ argument won’t be an array. - -## -State {#agg-functions-combinator-state} - -If you apply this combinator, the aggregate function doesn’t return the resulting value (such as the number of unique values for the [uniq](reference.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later. - -To work with these states, use: - -- [AggregatingMergeTree](../../operations/table_engines/aggregatingmergetree.md) table engine. -- [finalizeAggregation](../functions/other_functions.md#function-finalizeaggregation) function. -- [runningAccumulate](../functions/other_functions.md#function-runningaccumulate) function. -- [-Merge](#aggregate_functions_combinators_merge) combinator. -- [-MergeState](#aggregate_functions_combinators_mergestate) combinator. - -## -Merge {#aggregate_functions_combinators-merge} - -If you apply this combinator, the aggregate function takes the intermediate aggregation state as an argument, combines the states to finish aggregation, and returns the resulting value. - -## -MergeState {#aggregate_functions_combinators-mergestate} - -Merges the intermediate aggregation states in the same way as the -Merge combinator. However, it doesn’t return the resulting value, but an intermediate aggregation state, similar to the -State combinator. - -## -ForEach {#agg-functions-combinator-foreach} - -Converts an aggregate function for tables into an aggregate function for arrays that aggregates the corresponding array items and returns an array of results. For example, `sumForEach` for the arrays `[1, 2]`, `[3, 4, 5]`and`[6, 7]`returns the result `[10, 13, 5]` after adding together the corresponding array items. - -## -OrDefault {#agg-functions-combinator-ordefault} - -Fills the default value of the aggregate function’s return type if there is nothing to aggregate. - -``` sql -SELECT avg(number), avgOrDefault(number) FROM numbers(0) -``` - -``` text -┌─avg(number)─┬─avgOrDefault(number)─┐ -│ nan │ 0 │ -└─────────────┴──────────────────────┘ -``` - -## -OrNull {#agg-functions-combinator-ornull} - -Fills `null` if there is nothing to aggregate. The return column will be nullable. - -``` sql -SELECT avg(number), avgOrNull(number) FROM numbers(0) -``` - -``` text -┌─avg(number)─┬─avgOrNull(number)─┐ -│ nan │ ᴺᵁᴸᴸ │ -└─────────────┴───────────────────┘ -``` - --OrDefault and -OrNull can be combined with other combinators. It is useful when the aggregate function does not accept the empty input. - -``` sql -SELECT avgOrNullIf(x, x > 10) -FROM -( - SELECT toDecimal32(1.23, 2) AS x -) -``` - -``` text -┌─avgOrNullIf(x, greater(x, 10))─┐ -│ ᴺᵁᴸᴸ │ -└────────────────────────────────┘ -``` - -## -Resample {#agg-functions-combinator-resample} - -Lets you divide data into groups, and then separately aggregates the data in those groups. Groups are created by splitting the values from one column into intervals. - -``` sql -Resample(start, end, step)(, resampling_key) -``` - -**Parameters** - -- `start` — Starting value of the whole required interval for `resampling_key` values. -- `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval doesn’t include the `stop` value `[start, stop)`. -- `step` — Step for separating the whole interval into subintervals. The `aggFunction` is executed over each of those subintervals independently. -- `resampling_key` — Column whose values are used for separating data into intervals. -- `aggFunction_params` — `aggFunction` parameters. - -**Returned values** - -- Array of `aggFunction` results for each subinterval. - -**Example** - -Consider the `people` table with the following data: - -``` text -┌─name───┬─age─┬─wage─┐ -│ John │ 16 │ 10 │ -│ Alice │ 30 │ 15 │ -│ Mary │ 35 │ 8 │ -│ Evelyn │ 48 │ 11.5 │ -│ David │ 62 │ 9.9 │ -│ Brian │ 60 │ 16 │ -└────────┴─────┴──────┘ -``` - -Let’s get the names of the people whose age lies in the intervals of `[30,60)` and `[60,75)`. Since we use integer representation for age, we get ages in the `[30, 59]` and `[60,74]` intervals. - -To aggregate names in an array, we use the [groupArray](reference.md#agg_function-grouparray) aggregate function. It takes one argument. In our case, it’s the `name` column. The `groupArrayResample` function should use the `age` column to aggregate names by age. To define the required intervals, we pass the `30, 75, 30` arguments into the `groupArrayResample` function. - -``` sql -SELECT groupArrayResample(30, 75, 30)(name, age) FROM people -``` - -``` text -┌─groupArrayResample(30, 75, 30)(name, age)─────┐ -│ [['Alice','Mary','Evelyn'],['David','Brian']] │ -└───────────────────────────────────────────────┘ -``` - -Consider the results. - -`Jonh` is out of the sample because he’s too young. Other people are distributed according to the specified age intervals. - -Now let’s count the total number of people and their average wage in the specified age intervals. - -``` sql -SELECT - countResample(30, 75, 30)(name, age) AS amount, - avgResample(30, 75, 30)(wage, age) AS avg_wage -FROM people -``` - -``` text -┌─amount─┬─avg_wage──────────────────┐ -│ [3,2] │ [11.5,12.949999809265137] │ -└────────┴───────────────────────────┘ -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/agg_functions/combinators/) diff --git a/docs/zh/query_language/agg_functions/index.md b/docs/zh/query_language/agg_functions/index.md deleted file mode 100644 index c439ddb1e6a..00000000000 --- a/docs/zh/query_language/agg_functions/index.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -en_copy: true ---- - -# Aggregate functions {#aggregate-functions} - -Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) way as expected by database experts. - -ClickHouse also supports: - -- [Parametric aggregate functions](parametric_functions.md#aggregate_functions_parametric), which accept other parameters in addition to columns. -- [Combinators](combinators.md#aggregate_functions_combinators), which change the behavior of aggregate functions. - -## NULL processing {#null-processing} - -During aggregation, all `NULL`s are skipped. - -**Examples:** - -Consider this table: - -``` text -┌─x─┬────y─┐ -│ 1 │ 2 │ -│ 2 │ ᴺᵁᴸᴸ │ -│ 3 │ 2 │ -│ 3 │ 3 │ -│ 3 │ ᴺᵁᴸᴸ │ -└───┴──────┘ -``` - -Let’s say you need to total the values in the `y` column: - -``` sql -SELECT sum(y) FROM t_null_big -``` - - ┌─sum(y)─┐ - │ 7 │ - └────────┘ - -The `sum` function interprets `NULL` as `0`. In particular, this means that if the function receives input of a selection where all the values are `NULL`, then the result will be `0`, not `NULL`. - -Now you can use the `groupArray` function to create an array from the `y` column: - -``` sql -SELECT groupArray(y) FROM t_null_big -``` - -``` text -┌─groupArray(y)─┐ -│ [2,2,3] │ -└───────────────┘ -``` - -`groupArray` does not include `NULL` in the resulting array. - -[Original article](https://clickhouse.tech/docs/en/query_language/agg_functions/) diff --git a/docs/zh/query_language/agg_functions/reference.md b/docs/zh/query_language/agg_functions/reference.md deleted file mode 100644 index 31de8bf1226..00000000000 --- a/docs/zh/query_language/agg_functions/reference.md +++ /dev/null @@ -1,1834 +0,0 @@ ---- -en_copy: true ---- - -# Function Reference {#function-reference} - -## count {#agg_function-count} - -Counts the number of rows or not-NULL values. - -ClickHouse supports the following syntaxes for `count`: -- `count(expr)` or `COUNT(DISTINCT expr)`. -- `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. - -**Parameters** - -The function can take: - -- Zero parameters. -- One [expression](../syntax.md#syntax-expressions). - -**Returned value** - -- If the function is called without parameters it counts the number of rows. -- If the [expression](../syntax.md#syntax-expressions) is passed, then the function counts how many times this expression returned not null. If the expression returns a [Nullable](../../data_types/nullable.md)-type value, then the result of `count` stays not `Nullable`. The function returns 0 if the expression returned `NULL` for all the rows. - -In both cases the type of the returned value is [UInt64](../../data_types/int_uint.md). - -**Details** - -ClickHouse supports the `COUNT(DISTINCT ...)` syntax. The behavior of this construction depends on the [count\_distinct\_implementation](../../operations/settings/settings.md#settings-count_distinct_implementation) setting. It defines which of the [uniq\*](#agg_function-uniq) functions is used to perform the operation. The default is the [uniqExact](#agg_function-uniqexact) function. - -The `SELECT count() FROM table` query is not optimized, because the number of entries in the table is not stored separately. It chooses a small column from the table and counts the number of values in it. - -**Examples** - -Example 1: - -``` sql -SELECT count() FROM t -``` - -``` text -┌─count()─┐ -│ 5 │ -└─────────┘ -``` - -Example 2: - -``` sql -SELECT name, value FROM system.settings WHERE name = 'count_distinct_implementation' -``` - -``` text -┌─name──────────────────────────┬─value─────┐ -│ count_distinct_implementation │ uniqExact │ -└───────────────────────────────┴───────────┘ -``` - -``` sql -SELECT count(DISTINCT num) FROM t -``` - -``` text -┌─uniqExact(num)─┐ -│ 3 │ -└────────────────┘ -``` - -This example shows that `count(DISTINCT num)` is performed by the `uniqExact` function according to the `count_distinct_implementation` setting value. - -## any(x) {#agg_function-any} - -Selects the first encountered value. -The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. -To get a determinate result, you can use the ‘min’ or ‘max’ function instead of ‘any’. - -In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY. - -When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. - -## anyHeavy(x) {#anyheavyx} - -Selects a frequently occurring value using the [heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) algorithm. If there is a value that occurs more than in half the cases in each of the query’s execution threads, this value is returned. Normally, the result is nondeterministic. - -``` sql -anyHeavy(column) -``` - -**Arguments** - -- `column` – The column name. - -**Example** - -Take the [OnTime](../../getting_started/example_datasets/ontime.md) data set and select any frequently occurring value in the `AirlineID` column. - -``` sql -SELECT anyHeavy(AirlineID) AS res -FROM ontime -``` - -``` text -┌───res─┐ -│ 19690 │ -└───────┘ -``` - -## anyLast(x) {#anylastx} - -Selects the last value encountered. -The result is just as indeterminate as for the `any` function. - -## groupBitAnd {#groupbitand} - -Applies bitwise `AND` for series of numbers. - -``` sql -groupBitAnd(expr) -``` - -**Parameters** - -`expr` – An expression that results in `UInt*` type. - -**Return value** - -Value of the `UInt*` type. - -**Example** - -Test data: - -``` text -binary decimal -00101100 = 44 -00011100 = 28 -00001101 = 13 -01010101 = 85 -``` - -Query: - -``` sql -SELECT groupBitAnd(num) FROM t -``` - -Where `num` is the column with the test data. - -Result: - -``` text -binary decimal -00000100 = 4 -``` - -## groupBitOr {#groupbitor} - -Applies bitwise `OR` for series of numbers. - -``` sql -groupBitOr(expr) -``` - -**Parameters** - -`expr` – An expression that results in `UInt*` type. - -**Return value** - -Value of the `UInt*` type. - -**Example** - -Test data: - -``` text -binary decimal -00101100 = 44 -00011100 = 28 -00001101 = 13 -01010101 = 85 -``` - -Query: - -``` sql -SELECT groupBitOr(num) FROM t -``` - -Where `num` is the column with the test data. - -Result: - -``` text -binary decimal -01111101 = 125 -``` - -## groupBitXor {#groupbitxor} - -Applies bitwise `XOR` for series of numbers. - -``` sql -groupBitXor(expr) -``` - -**Parameters** - -`expr` – An expression that results in `UInt*` type. - -**Return value** - -Value of the `UInt*` type. - -**Example** - -Test data: - -``` text -binary decimal -00101100 = 44 -00011100 = 28 -00001101 = 13 -01010101 = 85 -``` - -Query: - -``` sql -SELECT groupBitXor(num) FROM t -``` - -Where `num` is the column with the test data. - -Result: - -``` text -binary decimal -01101000 = 104 -``` - -## groupBitmap {#groupbitmap} - -Bitmap or Aggregate calculations from a unsigned integer column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../functions/bitmap_functions.md). - -``` sql -groupBitmap(expr) -``` - -**Parameters** - -`expr` – An expression that results in `UInt*` type. - -**Return value** - -Value of the `UInt64` type. - -**Example** - -Test data: - -``` text -UserID -1 -1 -2 -3 -``` - -Query: - -``` sql -SELECT groupBitmap(UserID) as num FROM t -``` - -Result: - -``` text -num -3 -``` - -## min(x) {#agg_function-min} - -Calculates the minimum. - -## max(x) {#agg_function-max} - -Calculates the maximum. - -## argMin(arg, val) {#agg-function-argmin} - -Calculates the ‘arg’ value for a minimal ‘val’ value. If there are several different values of ‘arg’ for minimal values of ‘val’, the first of these values encountered is output. - -**Example:** - -``` text -┌─user─────┬─salary─┐ -│ director │ 5000 │ -│ manager │ 3000 │ -│ worker │ 1000 │ -└──────────┴────────┘ -``` - -``` sql -SELECT argMin(user, salary) FROM salary -``` - -``` text -┌─argMin(user, salary)─┐ -│ worker │ -└──────────────────────┘ -``` - -## argMax(arg, val) {#agg-function-argmax} - -Calculates the ‘arg’ value for a maximum ‘val’ value. If there are several different values of ‘arg’ for maximum values of ‘val’, the first of these values encountered is output. - -## sum(x) {#agg_function-sum} - -Calculates the sum. -Only works for numbers. - -## sumWithOverflow(x) {#sumwithoverflowx} - -Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, the function returns an error. - -Only works for numbers. - -## sumMap(key, value) {#agg_functions-summap} - -Totals the ‘value’ array according to the keys specified in the ‘key’ array. -The number of elements in ‘key’ and ‘value’ must be the same for each row that is totaled. -Returns a tuple of two arrays: keys in sorted order, and values ​​summed for the corresponding keys. - -Example: - -``` sql -CREATE TABLE sum_map( - date Date, - timeslot DateTime, - statusMap Nested( - status UInt16, - requests UInt64 - ) -) ENGINE = Log; -INSERT INTO sum_map VALUES - ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), - ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]), - ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), - ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]); -SELECT - timeslot, - sumMap(statusMap.status, statusMap.requests) -FROM sum_map -GROUP BY timeslot -``` - -``` text -┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐ -│ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10]) │ -│ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10]) │ -└─────────────────────┴──────────────────────────────────────────────┘ -``` - -## skewPop {#skewpop} - -Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. - -``` sql -skewPop(expr) -``` - -**Parameters** - -`expr` — [Expression](../syntax.md#syntax-expressions) returning a number. - -**Returned value** - -The skewness of the given distribution. Type — [Float64](../../data_types/float.md) - -**Example** - -``` sql -SELECT skewPop(value) FROM series_with_value_column -``` - -## skewSamp {#skewsamp} - -Computes the [sample skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. - -It represents an unbiased estimate of the skewness of a random variable if passed values form its sample. - -``` sql -skewSamp(expr) -``` - -**Parameters** - -`expr` — [Expression](../syntax.md#syntax-expressions) returning a number. - -**Returned value** - -The skewness of the given distribution. Type — [Float64](../../data_types/float.md). If `n <= 1` (`n` is the size of the sample), then the function returns `nan`. - -**Example** - -``` sql -SELECT skewSamp(value) FROM series_with_value_column -``` - -## kurtPop {#kurtpop} - -Computes the [kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence. - -``` sql -kurtPop(expr) -``` - -**Parameters** - -`expr` — [Expression](../syntax.md#syntax-expressions) returning a number. - -**Returned value** - -The kurtosis of the given distribution. Type — [Float64](../../data_types/float.md) - -**Example** - -``` sql -SELECT kurtPop(value) FROM series_with_value_column -``` - -## kurtSamp {#kurtsamp} - -Computes the [sample kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence. - -It represents an unbiased estimate of the kurtosis of a random variable if passed values form its sample. - -``` sql -kurtSamp(expr) -``` - -**Parameters** - -`expr` — [Expression](../syntax.md#syntax-expressions) returning a number. - -**Returned value** - -The kurtosis of the given distribution. Type — [Float64](../../data_types/float.md). If `n <= 1` (`n` is a size of the sample), then the function returns `nan`. - -**Example** - -``` sql -SELECT kurtSamp(value) FROM series_with_value_column -``` - -## timeSeriesGroupSum(uid, timestamp, value) {#agg-function-timeseriesgroupsum} - -`timeSeriesGroupSum` can aggregate different time series that sample timestamp not alignment. -It will use linear interpolation between two sample timestamp and then sum time-series together. - -- `uid` is the time series unique id, `UInt64`. -- `timestamp` is Int64 type in order to support millisecond or microsecond. -- `value` is the metric. - -The function returns array of tuples with `(timestamp, aggregated_value)` pairs. - -Before using this function make sure `timestamp` is in ascending order. - -Example: - -``` text -┌─uid─┬─timestamp─┬─value─┐ -│ 1 │ 2 │ 0.2 │ -│ 1 │ 7 │ 0.7 │ -│ 1 │ 12 │ 1.2 │ -│ 1 │ 17 │ 1.7 │ -│ 1 │ 25 │ 2.5 │ -│ 2 │ 3 │ 0.6 │ -│ 2 │ 8 │ 1.6 │ -│ 2 │ 12 │ 2.4 │ -│ 2 │ 18 │ 3.6 │ -│ 2 │ 24 │ 4.8 │ -└─────┴───────────┴───────┘ -``` - -``` sql -CREATE TABLE time_series( - uid UInt64, - timestamp Int64, - value Float64 -) ENGINE = Memory; -INSERT INTO time_series VALUES - (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), - (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); - -SELECT timeSeriesGroupSum(uid, timestamp, value) -FROM ( - SELECT * FROM time_series order by timestamp ASC -); -``` - -And the result will be: - -``` text -[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] -``` - -## timeSeriesGroupRateSum(uid, ts, val) {#agg-function-timeseriesgroupratesum} - -Similarly timeSeriesGroupRateSum, timeSeriesGroupRateSum will Calculate the rate of time-series and then sum rates together. -Also, timestamp should be in ascend order before use this function. - -Use this function, the result above case will be: - -``` text -[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] -``` - -## avg(x) {#agg_function-avg} - -Calculates the average. -Only works for numbers. -The result is always Float64. - -## uniq {#agg_function-uniq} - -Calculates the approximate number of different values of the argument. - -``` sql -uniq(x[, ...]) -``` - -**Parameters** - -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. - -**Returned value** - -- A [UInt64](../../data_types/int_uint.md)-type number. - -**Implementation details** - -Function: - -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. - -- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. - - This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. - -- Provides the result deterministically (it doesn’t depend on the query processing order). - -We recommend using this function in almost all scenarios. - -**See Also** - -- [uniqCombined](#agg_function-uniqcombined) -- [uniqCombined64](#agg_function-uniqcombined64) -- [uniqHLL12](#agg_function-uniqhll12) -- [uniqExact](#agg_function-uniqexact) - -## uniqCombined {#agg_function-uniqcombined} - -Calculates the approximate number of different argument values. - -``` sql -uniqCombined(HLL_precision)(x[, ...]) -``` - -The `uniqCombined` function is a good choice for calculating the number of different values. - -**Parameters** - -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. - -`HLL_precision` is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each). - -**Returned value** - -- A number [UInt64](../../data_types/int_uint.md)-type number. - -**Implementation details** - -Function: - -- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. - -- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. - - For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. - -- Provides the result deterministically (it doesn’t depend on the query processing order). - -!!! note "Note" - Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](#agg_function-uniqcombined64) - -Compared to the [uniq](#agg_function-uniq) function, the `uniqCombined`: - -- Consumes several times less memory. -- Calculates with several times higher accuracy. -- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network. - -**See Also** - -- [uniq](#agg_function-uniq) -- [uniqCombined64](#agg_function-uniqcombined64) -- [uniqHLL12](#agg_function-uniqhll12) -- [uniqExact](#agg_function-uniqexact) - -## uniqCombined64 {#agg_function-uniqcombined64} - -Same as [uniqCombined](#agg_function-uniqcombined), but uses 64-bit hash for all data types. - -## uniqHLL12 {#agg_function-uniqhll12} - -Calculates the approximate number of different argument values, using the [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) algorithm. - -``` sql -uniqHLL12(x[, ...]) -``` - -**Parameters** - -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. - -**Returned value** - -- A [UInt64](../../data_types/int_uint.md)-type number. - -**Implementation details** - -Function: - -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. - -- Uses the HyperLogLog algorithm to approximate the number of different argument values. - - 212 5-bit cells are used. The size of the state is slightly more than 2.5 KB. The result is not very accurate (up to ~10% error) for small data sets (<10K elements). However, the result is fairly accurate for high-cardinality data sets (10K-100M), with a maximum error of ~1.6%. Starting from 100M, the estimation error increases, and the function will return very inaccurate results for data sets with extremely high cardinality (1B+ elements). - -- Provides the determinate result (it doesn’t depend on the query processing order). - -We don’t recommend using this function. In most cases, use the [uniq](#agg_function-uniq) or [uniqCombined](#agg_function-uniqcombined) function. - -**See Also** - -- [uniq](#agg_function-uniq) -- [uniqCombined](#agg_function-uniqcombined) -- [uniqExact](#agg_function-uniqexact) - -## uniqExact {#agg_function-uniqexact} - -Calculates the exact number of different argument values. - -``` sql -uniqExact(x[, ...]) -``` - -Use the `uniqExact` function if you absolutely need an exact result. Otherwise use the [uniq](#agg_function-uniq) function. - -The `uniqExact` function uses more memory than `uniq`, because the size of the state has unbounded growth as the number of different values increases. - -**Parameters** - -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. - -**See Also** - -- [uniq](#agg_function-uniq) -- [uniqCombined](#agg_function-uniqcombined) -- [uniqHLL12](#agg_function-uniqhll12) - -## groupArray(x), groupArray(max\_size)(x) {#agg_function-grouparray} - -Creates an array of argument values. -Values can be added to the array in any (indeterminate) order. - -The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. -For example, `groupArray (1) (x)` is equivalent to `[any (x)]`. - -In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`. - -## groupArrayInsertAt(value, position) {#grouparrayinsertatvalue-position} - -Inserts a value into the array in the specified position. - -!!! note "Note" - This function uses zero-based positions, contrary to the conventional one-based positions for SQL arrays. - -Accepts the value and position as input. If several values ​​are inserted into the same position, any of them might end up in the resulting array (the first one will be used in the case of single-threaded execution). If no value is inserted into a position, the position is assigned the default value. - -Optional parameters: - -- The default value for substituting in empty positions. -- The length of the resulting array. This allows you to receive arrays of the same size for all the aggregate keys. When using this parameter, the default value must be specified. - -## groupArrayMovingSum {#agg_function-grouparraymovingsum} - -Calculates the moving sum of input values. - -``` sql -groupArrayMovingSum(numbers_for_summing) -groupArrayMovingSum(window_size)(numbers_for_summing) -``` - -The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. - -**Parameters** - -- `numbers_for_summing` — [Expression](../syntax.md#syntax-expressions) resulting in a numeric data type value. -- `window_size` — Size of the calculation window. - -**Returned values** - -- Array of the same size and type as the input data. - -**Example** - -The sample table: - -``` sql -CREATE TABLE t -( - `int` UInt8, - `float` Float32, - `dec` Decimal32(2) -) -ENGINE = TinyLog -``` - -``` text -┌─int─┬─float─┬──dec─┐ -│ 1 │ 1.1 │ 1.10 │ -│ 2 │ 2.2 │ 2.20 │ -│ 4 │ 4.4 │ 4.40 │ -│ 7 │ 7.77 │ 7.77 │ -└─────┴───────┴──────┘ -``` - -The queries: - -``` sql -SELECT - groupArrayMovingSum(int) AS I, - groupArrayMovingSum(float) AS F, - groupArrayMovingSum(dec) AS D -FROM t -``` - -``` text -┌─I──────────┬─F───────────────────────────────┬─D──────────────────────┐ -│ [1,3,7,14] │ [1.1,3.3000002,7.7000003,15.47] │ [1.10,3.30,7.70,15.47] │ -└────────────┴─────────────────────────────────┴────────────────────────┘ -``` - -``` sql -SELECT - groupArrayMovingSum(2)(int) AS I, - groupArrayMovingSum(2)(float) AS F, - groupArrayMovingSum(2)(dec) AS D -FROM t -``` - -``` text -┌─I──────────┬─F───────────────────────────────┬─D──────────────────────┐ -│ [1,3,6,11] │ [1.1,3.3000002,6.6000004,12.17] │ [1.10,3.30,6.60,12.17] │ -└────────────┴─────────────────────────────────┴────────────────────────┘ -``` - -## groupArrayMovingAvg {#agg_function-grouparraymovingavg} - -Calculates the moving average of input values. - -``` sql -groupArrayMovingAvg(numbers_for_summing) -groupArrayMovingAvg(window_size)(numbers_for_summing) -``` - -The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. - -**Parameters** - -- `numbers_for_summing` — [Expression](../syntax.md#syntax-expressions) resulting in a numeric data type value. -- `window_size` — Size of the calculation window. - -**Returned values** - -- Array of the same size and type as the input data. - -The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). It truncates the decimal places insignificant for the resulting data type. - -**Example** - -The sample table `b`: - -``` sql -CREATE TABLE t -( - `int` UInt8, - `float` Float32, - `dec` Decimal32(2) -) -ENGINE = TinyLog -``` - -``` text -┌─int─┬─float─┬──dec─┐ -│ 1 │ 1.1 │ 1.10 │ -│ 2 │ 2.2 │ 2.20 │ -│ 4 │ 4.4 │ 4.40 │ -│ 7 │ 7.77 │ 7.77 │ -└─────┴───────┴──────┘ -``` - -The queries: - -``` sql -SELECT - groupArrayMovingAvg(int) AS I, - groupArrayMovingAvg(float) AS F, - groupArrayMovingAvg(dec) AS D -FROM t -``` - -``` text -┌─I─────────┬─F───────────────────────────────────┬─D─────────────────────┐ -│ [0,0,1,3] │ [0.275,0.82500005,1.9250001,3.8675] │ [0.27,0.82,1.92,3.86] │ -└───────────┴─────────────────────────────────────┴───────────────────────┘ -``` - -``` sql -SELECT - groupArrayMovingAvg(2)(int) AS I, - groupArrayMovingAvg(2)(float) AS F, - groupArrayMovingAvg(2)(dec) AS D -FROM t -``` - -``` text -┌─I─────────┬─F────────────────────────────────┬─D─────────────────────┐ -│ [0,1,3,5] │ [0.55,1.6500001,3.3000002,6.085] │ [0.55,1.65,3.30,6.08] │ -└───────────┴──────────────────────────────────┴───────────────────────┘ -``` - -## groupUniqArray(x), groupUniqArray(max\_size)(x) {#groupuniqarrayx-groupuniqarraymax-sizex} - -Creates an array from different argument values. Memory consumption is the same as for the `uniqExact` function. - -The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. -For example, `groupUniqArray(1)(x)` is equivalent to `[any(x)]`. - -## quantile {#quantile} - -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. - -This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and a random number generator for sampling. The result is non-deterministic. To get an exact quantile, use the [quantileExact](#quantileexact) function. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantile(level)(expr) -``` - -Alias: `median`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../data_types/index.md#data_types), [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md). - -**Returned value** - -- Approximate quantile of the specified level. - -Type: - -- [Float64](../../data_types/float.md) for numeric data type input. -- [Date](../../data_types/date.md) if input values have the `Date` type. -- [DateTime](../../data_types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Input table: - -``` text -┌─val─┐ -│ 1 │ -│ 1 │ -│ 2 │ -│ 3 │ -└─────┘ -``` - -Query: - -``` sql -SELECT quantile(val) FROM t -``` - -Result: - -``` text -┌─quantile(val)─┐ -│ 1.5 │ -└───────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileDeterministic {#quantiledeterministic} - -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. - -This function applies [reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling) with a reservoir size up to 8192 and deterministic algorithm of sampling. The result is deterministic. To get an exact quantile, use the [quantileExact](#quantileexact) function. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileDeterministic(level)(expr, determinator) -``` - -Alias: `medianDeterministic`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../data_types/index.md#data_types), [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md). -- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly. - -**Returned value** - -- Approximate quantile of the specified level. - -Type: - -- [Float64](../../data_types/float.md) for numeric data type input. -- [Date](../../data_types/date.md) if input values have the `Date` type. -- [DateTime](../../data_types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Input table: - -``` text -┌─val─┐ -│ 1 │ -│ 1 │ -│ 2 │ -│ 3 │ -└─────┘ -``` - -Query: - -``` sql -SELECT quantileDeterministic(val, 1) FROM t -``` - -Result: - -``` text -┌─quantileDeterministic(val, 1)─┐ -│ 1.5 │ -└───────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileExact {#quantileexact} - -Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. - -To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` memory, where `n` is a number of values that were passed. However, for a small number of values, the function is very effective. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileExact(level)(expr) -``` - -Alias: `medianExact`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../data_types/index.md#data_types), [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md). - -**Returned value** - -- Quantile of the specified level. - -Type: - -- [Float64](../../data_types/float.md) for numeric data type input. -- [Date](../../data_types/date.md) if input values have the `Date` type. -- [DateTime](../../data_types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Query: - -``` sql -SELECT quantileExact(number) FROM numbers(10) -``` - -Result: - -``` text -┌─quantileExact(number)─┐ -│ 5 │ -└───────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileExactWeighted {#quantileexactweighted} - -Exactly computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence, taking into account the weight of each element. - -To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Each value is counted with its weight, as if it is present `weight` times. A hash table is used in the algorithm. Because of this, if the passed values ​​are frequently repeated, the function consumes less RAM than [quantileExact](#quantileexact). You can use this function instead of `quantileExact` and specify the weight 1. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileExactWeighted(level)(expr, weight) -``` - -Alias: `medianExactWeighted`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../data_types/index.md#data_types), [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md). -- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. - -**Returned value** - -- Quantile of the specified level. - -Type: - -- [Float64](../../data_types/float.md) for numeric data type input. -- [Date](../../data_types/date.md) if input values have the `Date` type. -- [DateTime](../../data_types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Input table: - -``` text -┌─n─┬─val─┐ -│ 0 │ 3 │ -│ 1 │ 2 │ -│ 2 │ 1 │ -│ 5 │ 4 │ -└───┴─────┘ -``` - -Query: - -``` sql -SELECT quantileExactWeighted(n, val) FROM t -``` - -Result: - -``` text -┌─quantileExactWeighted(n, val)─┐ -│ 1 │ -└───────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileTiming {#quantiletiming} - -With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence. - -The result is deterministic (it doesn’t depend on the query processing order). The function is optimized for working with sequences which describe distributions like loading web pages times or backend response times. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileTiming(level)(expr) -``` - -Alias: `medianTiming`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - -- `expr` — [Expression](../syntax.md#syntax-expressions) over a column values returning a [Float\*](../../data_types/float.md)-type number. - - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. - -**Accuracy** - -The calculation is accurate if: - -- Total number of values doesn’t exceed 5670. -- Total number of values exceeds 5670, but the page loading time is less than 1024ms. - -Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. - -!!! note "Note" - For calculating page loading time quantiles, this function is more effective and accurate than [quantile](#quantile). - -**Returned value** - -- Quantile of the specified level. - -Type: `Float32`. - -!!! note "Note" - If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../data_types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../select.md#select-order-by) for notes on sorting `NaN` values. - -**Example** - -Input table: - -``` text -┌─response_time─┐ -│ 72 │ -│ 112 │ -│ 126 │ -│ 145 │ -│ 104 │ -│ 242 │ -│ 313 │ -│ 168 │ -│ 108 │ -└───────────────┘ -``` - -Query: - -``` sql -SELECT quantileTiming(response_time) FROM t -``` - -Result: - -``` text -┌─quantileTiming(response_time)─┐ -│ 126 │ -└───────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileTimingWeighted {#quantiletimingweighted} - -With the determined precision computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence according to the weight of each sequence member. - -The result is deterministic (it doesn’t depend on the query processing order). The function is optimized for working with sequences which describe distributions like loading web pages times or backend response times. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileTimingWeighted(level)(expr, weight) -``` - -Alias: `medianTimingWeighted`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - -- `expr` — [Expression](../syntax.md#syntax-expressions) over a column values returning a [Float\*](../../data_types/float.md)-type number. - - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. - -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. - -**Accuracy** - -The calculation is accurate if: - -- Total number of values doesn’t exceed 5670. -- Total number of values exceeds 5670, but the page loading time is less than 1024ms. - -Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. - -!!! note "Note" - For calculating page loading time quantiles, this function is more effective and accurate than [quantile](#quantile). - -**Returned value** - -- Quantile of the specified level. - -Type: `Float32`. - -!!! note "Note" - If no values are passed to the function (when using `quantileTimingIf`), [NaN](../../data_types/float.md#data_type-float-nan-inf) is returned. The purpose of this is to differentiate these cases from cases that result in zero. See [ORDER BY clause](../select.md#select-order-by) for notes on sorting `NaN` values. - -**Example** - -Input table: - -``` text -┌─response_time─┬─weight─┐ -│ 68 │ 1 │ -│ 104 │ 2 │ -│ 112 │ 3 │ -│ 126 │ 2 │ -│ 138 │ 1 │ -│ 162 │ 1 │ -└───────────────┴────────┘ -``` - -Query: - -``` sql -SELECT quantileTimingWeighted(response_time, weight) FROM t -``` - -Result: - -``` text -┌─quantileTimingWeighted(response_time, weight)─┐ -│ 112 │ -└───────────────────────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileTDigest {#quantiletdigest} - -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. - -The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. The result depends on the order of running the query, and is nondeterministic. - -The performance of the function is lower than performance of [quantile](#quantile) or [quantileTiming](#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileTDigest(level)(expr) -``` - -Alias: `medianTDigest`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../data_types/index.md#data_types), [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md). - -**Returned value** - -- Approximate quantile of the specified level. - -Type: - -- [Float64](../../data_types/float.md) for numeric data type input. -- [Date](../../data_types/date.md) if input values have the `Date` type. -- [DateTime](../../data_types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Query: - -``` sql -SELECT quantileTDigest(number) FROM numbers(10) -``` - -Result: - -``` text -┌─quantileTDigest(number)─┐ -│ 4.5 │ -└─────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## quantileTDigestWeighted {#quantiletdigestweighted} - -Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) algorithm. The function takes into account the weight of each sequence member. The maximum error is 1%. Memory consumption is `log(n)`, where `n` is a number of values. - -The performance of the function is lower than performance of [quantile](#quantile) or [quantileTiming](#quantiletiming). In terms of the ratio of State size to precision, this function is much better than `quantile`. - -The result depends on the order of running the query, and is nondeterministic. - -When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](#quantiles) function. - -**Syntax** - -``` sql -quantileTDigest(level)(expr) -``` - -Alias: `medianTDigest`. - -**Parameters** - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../data_types/index.md#data_types), [Date](../../data_types/date.md) or [DateTime](../../data_types/datetime.md). -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. - -**Returned value** - -- Approximate quantile of the specified level. - -Type: - -- [Float64](../../data_types/float.md) for numeric data type input. -- [Date](../../data_types/date.md) if input values have the `Date` type. -- [DateTime](../../data_types/datetime.md) if input values have the `DateTime` type. - -**Example** - -Query: - -``` sql -SELECT quantileTDigestWeighted(number, 1) FROM numbers(10) -``` - -Result: - -``` text -┌─quantileTDigestWeighted(number, 1)─┐ -│ 4.5 │ -└────────────────────────────────────┘ -``` - -**See Also** - -- [median](#median) -- [quantiles](#quantiles) - -## median {#median} - -The `median*` functions are the aliases for the corresponding `quantile*` functions. They calculate median of a numeric data sample. - -Functions: - -- `median` — Alias for [quantile](#quantile). -- `medianDeterministic` — Alias for [quantileDeterministic](#quantiledeterministic). -- `medianExact` — Alias for [quantileExact](#quantileexact). -- `medianExactWeighted` — Alias for [quantileExactWeighted](#quantileexactweighted). -- `medianTiming` — Alias for [quantileTiming](#quantiletiming). -- `medianTimingWeighted` — Alias for [quantileTimingWeighted](#quantiletimingweighted). -- `medianTDigest` — Alias for [quantileTDigest](#quantiletdigest). -- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](#quantiletdigestweighted). - -**Example** - -Input table: - -``` text -┌─val─┐ -│ 1 │ -│ 1 │ -│ 2 │ -│ 3 │ -└─────┘ -``` - -Query: - -``` sql -SELECT medianDeterministic(val, 1) FROM t -``` - -Result: - -``` text -┌─medianDeterministic(val, 1)─┐ -│ 1.5 │ -└─────────────────────────────┘ -``` - -## quantiles(level1, level2, …)(x) {#quantiles} - -All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values. - -## varSamp(x) {#varsampx} - -Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`. - -It represents an unbiased estimate of the variance of a random variable if passed values form its sample. - -Returns `Float64`. When `n <= 1`, returns `+∞`. - -## varPop(x) {#varpopx} - -Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. - -In other words, dispersion for a set of values. Returns `Float64`. - -## stddevSamp(x) {#stddevsampx} - -The result is equal to the square root of `varSamp(x)`. - -## stddevPop(x) {#stddevpopx} - -The result is equal to the square root of `varPop(x)`. - -## topK(N)(x) {#topknx} - -Returns an array of the approximately most frequent values in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves). - -Implements the [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) algorithm for analyzing TopK, based on the reduce-and-combine algorithm from [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf). - -``` sql -topK(N)(column) -``` - -This function doesn’t provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that aren’t the most frequent values. - -We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`. - -**Parameters** - -- ‘N’ is the number of elements to return. - -If the parameter is omitted, default value 10 is used. - -**Arguments** - -- ’ x ’ – The value to calculate frequency. - -**Example** - -Take the [OnTime](../../getting_started/example_datasets/ontime.md) data set and select the three most frequently occurring values in the `AirlineID` column. - -``` sql -SELECT topK(3)(AirlineID) AS res -FROM ontime -``` - -``` text -┌─res─────────────────┐ -│ [19393,19790,19805] │ -└─────────────────────┘ -``` - -## topKWeighted {#topkweighted} - -Similar to `topK` but takes one additional argument of integer type - `weight`. Every value is accounted `weight` times for frequency calculation. - -**Syntax** - -``` sql -topKWeighted(N)(x, weight) -``` - -**Parameters** - -- `N` — The number of elements to return. - -**Arguments** - -- `x` – The value. -- `weight` — The weight. [UInt8](../../data_types/int_uint.md). - -**Returned value** - -Returns an array of the values with maximum approximate sum of weights. - -**Example** - -Query: - -``` sql -SELECT topKWeighted(10)(number, number) FROM numbers(1000) -``` - -Result: - -``` text -┌─topKWeighted(10)(number, number)──────────┐ -│ [999,998,997,996,995,994,993,992,991,990] │ -└───────────────────────────────────────────┘ -``` - -## covarSamp(x, y) {#covarsampx-y} - -Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. - -Returns Float64. When `n <= 1`, returns +∞. - -## covarPop(x, y) {#covarpopx-y} - -Calculates the value of `Σ((x - x̅)(y - y̅)) / n`. - -## corr(x, y) {#corrx-y} - -Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. - -## categoricalInformationValue {#categoricalinformationvalue} - -Calculates the value of `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` for each category. - -``` sql -categoricalInformationValue(category1, category2, ..., tag) -``` - -The result indicates how a discrete (categorical) feature `[category1, category2, ...]` contribute to a learning model which predicting the value of `tag`. - -## simpleLinearRegression {#simplelinearregression} - -Performs simple (unidimensional) linear regression. - -``` sql -simpleLinearRegression(x, y) -``` - -Parameters: - -- `x` — Column with dependent variable values. -- `y` — Column with explanatory variable values. - -Returned values: - -Constants `(a, b)` of the resulting line `y = a*x + b`. - -**Examples** - -``` sql -SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [0, 1, 2, 3]) -``` - -``` text -┌─arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [0, 1, 2, 3])─┐ -│ (1,0) │ -└───────────────────────────────────────────────────────────────────┘ -``` - -``` sql -SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) -``` - -``` text -┌─arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6])─┐ -│ (1,3) │ -└───────────────────────────────────────────────────────────────────┘ -``` - -## stochasticLinearRegression {#agg_functions-stochasticlinearregression} - -This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size and has few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). - -### Parameters {#agg_functions-stochasticlinearregression-parameters} - -There are 4 customizable parameters. They are passed to the function sequentially, but there is no need to pass all four - default values will be used, however good model required some parameter tuning. - -``` text -stochasticLinearRegression(1.0, 1.0, 10, 'SGD') -``` - -1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`. -2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`. -3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`. -4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods. - -### Usage {#agg_functions-stochasticlinearregression-usage} - -`stochasticLinearRegression` is used in two steps: fitting the model and predicting on new data. In order to fit the model and save its state for later usage we use `-State` combinator, which basically saves the state (model weights, etc). -To predict we use function [evalMLMethod](../functions/machine_learning_functions.md#machine_learning_methods-evalmlmethod), which takes a state as an argument as well as features to predict on. - - - -**1.** Fitting - -Such query may be used. - -``` sql -CREATE TABLE IF NOT EXISTS train_data -( - param1 Float64, - param2 Float64, - target Float64 -) ENGINE = Memory; - -CREATE TABLE your_model ENGINE = Memory AS SELECT -stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2) -AS state FROM train_data; -``` - -Here we also need to insert data into `train_data` table. The number of parameters is not fixed, it depends only on number of arguments, passed into `linearRegressionState`. They all must be numeric values. -Note that the column with target value(which we would like to learn to predict) is inserted as the first argument. - -**2.** Predicting - -After saving a state into the table, we may use it multiple times for prediction, or even merge with other states and create new even better models. - -``` sql -WITH (SELECT state FROM your_model) AS model SELECT -evalMLMethod(model, param1, param2) FROM test_data -``` - -The query will return a column of predicted values. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features. - -`test_data` is a table like `train_data` but may not contain target value. - -### Notes {#agg_functions-stochasticlinearregression-notes} - -1. To merge two models user may create such query: - `sql SELECT state1 + state2 FROM your_models` - where `your_models` table contains both models. This query will return new `AggregateFunctionState` object. - -2. User may fetch weights of the created model for its own purposes without saving the model if no `-State` combinator is used. - `sql SELECT stochasticLinearRegression(0.01)(target, param1, param2) FROM train_data` - Such query will fit the model and return its weights - first are weights, which correspond to the parameters of the model, the last one is bias. So in the example above the query will return a column with 3 values. - -**See Also** - -- [stochasticLogisticRegression](#agg_functions-stochasticlogisticregression) -- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) - -## stochasticLogisticRegression {#agg_functions-stochasticlogisticregression} - -This function implements stochastic logistic regression. It can be used for binary classification problem, supports the same custom parameters as stochasticLinearRegression and works the same way. - -### Parameters {#agg_functions-stochasticlogisticregression-parameters} - -Parameters are exactly the same as in stochasticLinearRegression: -`learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`. -For more information see [parameters](#agg_functions-stochasticlinearregression-parameters). - -``` text -stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') -``` - -1. Fitting - - - - See the `Fitting` section in the [stochasticLinearRegression](#stochasticlinearregression-usage-fitting) description. - - Predicted labels have to be in \[-1, 1\]. - -1. Predicting - - - - Using saved state we can predict probability of object having label `1`. - - ``` sql - WITH (SELECT state FROM your_model) AS model SELECT - evalMLMethod(model, param1, param2) FROM test_data - ``` - - The query will return a column of probabilities. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features. - - We can also set a bound of probability, which assigns elements to different labels. - - ``` sql - SELECT ans < 1.1 AND ans > 0.5 FROM - (WITH (SELECT state FROM your_model) AS model SELECT - evalMLMethod(model, param1, param2) AS ans FROM test_data) - ``` - - Then the result will be labels. - - `test_data` is a table like `train_data` but may not contain target value. - -**See Also** - -- [stochasticLinearRegression](#agg_functions-stochasticlinearregression) -- [Difference between linear and logistic regressions.](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) - -## groupBitmapAnd {#groupbitmapand} - -Calculations the AND of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../functions/bitmap_functions.md). - -``` sql -groupBitmapAnd(expr) -``` - -**Parameters** - -`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. - -**Return value** - -Value of the `UInt64` type. - -**Example** - -``` sql -DROP TABLE IF EXISTS bitmap_column_expr_test2; -CREATE TABLE bitmap_column_expr_test2 -( - tag_id String, - z AggregateFunction(groupBitmap, UInt32) -) -ENGINE = MergeTree -ORDER BY tag_id; - -INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); - -SELECT groupBitmapAnd(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─groupBitmapAnd(z)─┐ -│ 3 │ -└───────────────────┘ - -SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─arraySort(bitmapToArray(groupBitmapAndState(z)))─┐ -│ [6,8,10] │ -└──────────────────────────────────────────────────┘ -``` - -## groupBitmapOr {#groupbitmapor} - -Calculations the OR of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../functions/bitmap_functions.md). This is equivalent to `groupBitmapMerge`. - -``` sql -groupBitmapOr(expr) -``` - -**Parameters** - -`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. - -**Return value** - -Value of the `UInt64` type. - -**Example** - -``` sql -DROP TABLE IF EXISTS bitmap_column_expr_test2; -CREATE TABLE bitmap_column_expr_test2 -( - tag_id String, - z AggregateFunction(groupBitmap, UInt32) -) -ENGINE = MergeTree -ORDER BY tag_id; - -INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); - -SELECT groupBitmapOr(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─groupBitmapOr(z)─┐ -│ 15 │ -└──────────────────┘ - -SELECT arraySort(bitmapToArray(groupBitmapOrState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─arraySort(bitmapToArray(groupBitmapOrState(z)))─┐ -│ [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] │ -└─────────────────────────────────────────────────┘ -``` - -## groupBitmapXor {#groupbitmapxor} - -Calculations the XOR of a bitmap column, return cardinality of type UInt64, if add suffix -State, then return [bitmap object](../functions/bitmap_functions.md). - -``` sql -groupBitmapOr(expr) -``` - -**Parameters** - -`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. - -**Return value** - -Value of the `UInt64` type. - -**Example** - -``` sql -DROP TABLE IF EXISTS bitmap_column_expr_test2; -CREATE TABLE bitmap_column_expr_test2 -( - tag_id String, - z AggregateFunction(groupBitmap, UInt32) -) -ENGINE = MergeTree -ORDER BY tag_id; - -INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); -INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); - -SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─groupBitmapXor(z)─┐ -│ 10 │ -└───────────────────┘ - -SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); -┌─arraySort(bitmapToArray(groupBitmapXorState(z)))─┐ -│ [1,3,5,6,8,10,11,13,14,15] │ -└──────────────────────────────────────────────────┘ -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/agg_functions/reference/) diff --git a/docs/zh/query_language/alter.md b/docs/zh/query_language/alter.md deleted file mode 100644 index a2b05037315..00000000000 --- a/docs/zh/query_language/alter.md +++ /dev/null @@ -1,502 +0,0 @@ ---- -en_copy: true ---- - -## ALTER {#query_language_queries_alter} - -The `ALTER` query is only supported for `*MergeTree` tables, as well as `Merge`and`Distributed`. The query has several variations. - -### Column Manipulations {#column-manipulations} - -Changing the table structure. - -``` sql -ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN ... -``` - -In the query, specify a list of one or more comma-separated actions. -Each action is an operation on a column. - -The following actions are supported: - -- [ADD COLUMN](#alter_add-column) — Adds a new column to the table. -- [DROP COLUMN](#alter_drop-column) — Deletes the column. -- [CLEAR COLUMN](#alter_clear-column) — Resets column values. -- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column. -- [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL. - -These actions are described in detail below. - -#### ADD COLUMN {#alter_add-column} - -``` sql -ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after] -``` - -Adds a new column to the table with the specified `name`, `type`, [`codec`](create.md#codecs) and `default_expr` (see the section [Default expressions](create.md#create-default-values)). - -If the `IF NOT EXISTS` clause is included, the query won’t return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. Otherwise, the column is added to the end of the table. Note that there is no way to add a column to the beginning of a table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions. - -Adding a column just changes the table structure, without performing any actions with data. The data doesn’t appear on the disk after `ALTER`. If the data is missing for a column when reading from the table, it is filled in with default values (by performing the default expression if there is one, or using zeros or empty strings). The column appears on the disk after merging data parts (see [MergeTree](../operations/table_engines/mergetree.md)). - -This approach allows us to complete the `ALTER` query instantly, without increasing the volume of old data. - -Example: - -``` sql -ALTER TABLE visits ADD COLUMN browser String AFTER user_id -``` - -#### DROP COLUMN {#alter_drop-column} - -``` sql -DROP COLUMN [IF EXISTS] name -``` - -Deletes the column with the name `name`. If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist. - -Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly. - -Example: - -``` sql -ALTER TABLE visits DROP COLUMN browser -``` - -#### CLEAR COLUMN {#alter_clear-column} - -``` sql -CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name -``` - -Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to specify the partition expression](#alter-how-to-specify-part-expr). - -If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist. - -Example: - -``` sql -ALTER TABLE visits CLEAR COLUMN browser IN PARTITION tuple() -``` - -#### COMMENT COLUMN {#alter_comment-column} - -``` sql -COMMENT COLUMN [IF EXISTS] name 'comment' -``` - -Adds a comment to the column. If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist. - -Each column can have one comment. If a comment already exists for the column, a new comment overwrites the previous comment. - -Comments are stored in the `comment_expression` column returned by the [DESCRIBE TABLE](misc.md#misc-describe-table) query. - -Example: - -``` sql -ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for accessing the site.' -``` - -#### MODIFY COLUMN {#alter_modify-column} - -``` sql -MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] -``` - -This query changes the `name` column properties: - -- Type - -- Default expression - -- TTL - - For examples of columns TTL modifying, see [Column TTL](../operations/table_engines/mergetree.md#mergetree-column-ttl). - -If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist. - -When changing the type, values are converted as if the [toType](functions/type_conversion_functions.md) functions were applied to them. If only the default expression is changed, the query doesn’t do anything complex, and is completed almost instantly. - -Example: - -``` sql -ALTER TABLE visits MODIFY COLUMN browser Array(String) -``` - -Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time. - -There are several processing stages: - -- Preparing temporary (new) files with modified data. -- Renaming old files. -- Renaming the temporary (new) files to the old names. -- Deleting the old files. - -Only the first stage takes time. If there is a failure at this stage, the data is not changed. -If there is a failure during one of the successive stages, data can be restored manually. The exception is if the old files were deleted from the file system but the data for the new files did not get written to the disk and was lost. - -The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously. - -#### ALTER Query Limitations {#alter-query-limitations} - -The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot. - -There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`). - -If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](insert_into.md#insert_query_insert-select) query, then switch the tables using the [RENAME](misc.md#misc_operations-rename) query and delete the old table. You can use the [clickhouse-copier](../operations/utils/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. - -The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running. - -For tables that don’t store data themselves (such as `Merge` and `Distributed`), `ALTER` just changes the table structure, and does not change the structure of subordinate tables. For example, when running ALTER for a `Distributed` table, you will also need to run `ALTER` for the tables on all remote servers. - -### Manipulations With Key Expressions {#manipulations-with-key-expressions} - -The following command is supported: - -``` sql -MODIFY ORDER BY new_expression -``` - -It only works for tables in the [`MergeTree`](../operations/table_engines/mergetree.md) family (including -[replicated](../operations/table_engines/replication.md) tables). The command changes the -[sorting key](../operations/table_engines/mergetree.md) of the table -to `new_expression` (an expression or a tuple of expressions). Primary key remains the same. - -The command is lightweight in a sense that it only changes metadata. To keep the property that data part -rows are ordered by the sorting key expression you cannot add expressions containing existing columns -to the sorting key (only columns added by the `ADD COLUMN` command in the same `ALTER` query). - -### Manipulations With Data Skipping Indices {#manipulations-with-data-skipping-indices} - -It only works for tables in the [`*MergeTree`](../operations/table_engines/mergetree.md) family (including -[replicated](../operations/table_engines/replication.md) tables). The following operations -are available: - -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata. - -- `ALTER TABLE [db].name DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. - -These commands are lightweight in a sense that they only change metadata or remove files. -Also, they are replicated (syncing indices metadata through ZooKeeper). - -### Manipulations with constraints {#manipulations-with-constraints} - -See more on [constraints](create.md#constraints) - -Constraints could be added or deleted using following syntax: - -``` sql -ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression; -ALTER TABLE [db].name DROP CONSTRAINT constraint_name; -``` - -Queries will add or remove metadata about constraints from table so they are processed immediately. - -Constraint check *will not be executed* on existing data if it was added. - -All changes on replicated tables are broadcasting to ZooKeeper so will be applied on other replicas. - -### Manipulations With Partitions and Parts {#alter_manipulations-with-partitions} - -The following operations with [partitions](../operations/table_engines/custom_partitioning_key.md) are available: - -- [DETACH PARTITION](#alter_detach-partition) – Moves a partition to the `detached` directory and forget it. -- [DROP PARTITION](#alter_drop-partition) – Deletes a partition. -- [ATTACH PART\|PARTITION](#alter_attach-partition) – Adds a part or partition from the `detached` directory to the table. -- [REPLACE PARTITION](#alter_replace-partition) - Copies the data partition from one table to another. -- [ATTACH PARTITION FROM](#alter_attach-partition-from) – Copies the data partition from one table to another and adds. -- [REPLACE PARTITION](#alter_replace-partition) - Copies the data partition from one table to another and replaces. -- [MOVE PARTITION TO TABLE](#alter_move_to_table-partition) (\#alter\_move\_to\_table-partition) - Move the data partition from one table to another. -- [CLEAR COLUMN IN PARTITION](#alter_clear-column-partition) - Resets the value of a specified column in a partition. -- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) - Resets the specified secondary index in a partition. -- [FREEZE PARTITION](#alter_freeze-partition) – Creates a backup of a partition. -- [FETCH PARTITION](#alter_fetch-partition) – Downloads a partition from another server. -- [MOVE PARTITION\|PART](#alter_move-partition) – Move partition/data part to another disk or volume. - - - -#### DETACH PARTITION {\#alter\_detach-partition} {#detach-partition-alter-detach-partition} - -``` sql -ALTER TABLE table_name DETACH PARTITION partition_expr -``` - -Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#alter_attach-partition) query. - -Example: - -``` sql -ALTER TABLE visits DETACH PARTITION 201901 -``` - -Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). - -After the query is executed, you can do whatever you want with the data in the `detached` directory — delete it from the file system, or just leave it. - -This query is replicated – it moves the data to the `detached` directory on all replicas. Note that you can execute this query only on a leader replica. To find out if a replica is a leader, perform the `SELECT` query to the [system.replicas](../operations/system_tables.md#system_tables-replicas) table. Alternatively, it is easier to make a `DETACH` query on all replicas - all the replicas throw an exception, except the leader replica. - -#### DROP PARTITION {#alter_drop-partition} - -``` sql -ALTER TABLE table_name DROP PARTITION partition_expr -``` - -Deletes the specified partition from the table. This query tags the partition as inactive and deletes data completely, approximately in 10 minutes. - -Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). - -The query is replicated – it deletes data on all replicas. - -#### DROP DETACHED PARTITION\|PART {#alter_drop-detached} - -``` sql -ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr -``` - -Removes the specified part or all parts of the specified partition from `detached`. -Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). - -#### ATTACH PARTITION\|PART {#alter_attach-partition} - -``` sql -ALTER TABLE table_name ATTACH PARTITION|PART partition_expr -``` - -Adds data to the table from the `detached` directory. It is possible to add data for an entire partition or for a separate part. Examples: - -``` sql -ALTER TABLE visits ATTACH PARTITION 201901; -ALTER TABLE visits ATTACH PART 201901_2_2_0; -``` - -Read more about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). - -This query is replicated. The replica-initiator checks whether there is data in the `detached` directory. If data exists, the query checks its integrity. If everything is correct, the query adds the data to the table. All other replicas download the data from the replica-initiator. - -So you can put data to the `detached` directory on one replica, and use the `ALTER ... ATTACH` query to add it to the table on all replicas. - -#### ATTACH PARTITION FROM {#alter_attach-partition-from} - -``` sql -ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1 -``` - -This query copies the data partition from the `table1` to `table2` adds data to exsisting in the `table2`. Note that data won’t be deleted from `table1`. - -For the query to run successfully, the following conditions must be met: - -- Both tables must have the same structure. -- Both tables must have the same partition key. - -#### REPLACE PARTITION {#alter_replace-partition} - -``` sql -ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1 -``` - -This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`. Note that data won’t be deleted from `table1`. - -For the query to run successfully, the following conditions must be met: - -- Both tables must have the same structure. -- Both tables must have the same partition key. - -#### MOVE PARTITION TO TABLE {#alter_move_to_table-partition} - -``` sql -ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest -``` - -This query move the data partition from the `table_source` to `table_dest` with deleting the data from `table_source`. - -For the query to run successfully, the following conditions must be met: - -- Both tables must have the same structure. -- Both tables must have the same partition key. -- Both tables must be the same engine family. (replicated or non-replicated) -- Both tables must have the same storage policy. - -#### CLEAR COLUMN IN PARTITION {#alter_clear-column-partition} - -``` sql -ALTER TABLE table_name CLEAR COLUMN column_name IN PARTITION partition_expr -``` - -Resets all values in the specified column in a partition. If the `DEFAULT` clause was determined when creating a table, this query sets the column value to a specified default value. - -Example: - -``` sql -ALTER TABLE visits CLEAR COLUMN hour in PARTITION 201902 -``` - -#### FREEZE PARTITION {#alter_freeze-partition} - -``` sql -ALTER TABLE table_name FREEZE [PARTITION partition_expr] -``` - -This query creates a local backup of a specified partition. If the `PARTITION` clause is omitted, the query creates the backup of all partitions at once. - -!!! note "Note" - The entire backup process is performed without stopping the server. - -Note that for old-styled tables you can specify the prefix of the partition name (for example, ‘2019’) - then the query creates the backup for all the corresponding partitions. Read about setting the partition expression in a section [How to specify the partition expression](#alter-how-to-specify-part-expr). - -At the time of execution, for a data snapshot, the query creates hardlinks to a table data. Hardlinks are placed in the directory `/var/lib/clickhouse/shadow/N/...`, where: - -- `/var/lib/clickhouse/` is the working ClickHouse directory specified in the config. -- `N` is the incremental number of the backup. - -!!! note "Note" - If you use [a set of disks for data storage in a table](../operations/table_engines/mergetree.md#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression. - -The same structure of directories is created inside the backup as inside `/var/lib/clickhouse/`. The query performs ‘chmod’ for all files, forbidding writing into them. - -After creating the backup, you can copy the data from `/var/lib/clickhouse/shadow/` to the remote server and then delete it from the local server. Note that the `ALTER t FREEZE PARTITION` query is not replicated. It creates a local backup only on the local server. - -The query creates backup almost instantly (but first it waits for the current queries to the corresponding table to finish running). - -`ALTER TABLE t FREEZE PARTITION` copies only the data, not table metadata. To make a backup of table metadata, copy the file `/var/lib/clickhouse/metadata/database/table.sql` - -To restore data from a backup, do the following: - -1. Create the table if it does not exist. To view the query, use the .sql file (replace `ATTACH` in it with `CREATE`). -2. Copy the data from the `data/database/table/` directory inside the backup to the `/var/lib/clickhouse/data/database/table/detached/` directory. -3. Run `ALTER TABLE t ATTACH PARTITION` queries to add the data to a table. - -Restoring from a backup doesn’t require stopping the server. - -For more information about backups and restoring data, see the [Data Backup](../operations/backup.md) section. - -#### CLEAR INDEX IN PARTITION {#alter_clear-index-partition} - -``` sql -ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr -``` - -The query works similar to `CLEAR COLUMN`, but it resets an index instead of a column data. - -#### FETCH PARTITION {#alter_fetch-partition} - -``` sql -ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper' -``` - -Downloads a partition from another server. This query only works for the replicated tables. - -The query does the following: - -1. Downloads the partition from the specified shard. In ‘path-in-zookeeper’ you must specify a path to the shard in ZooKeeper. -2. Then the query puts the downloaded data to the `detached` directory of the `table_name` table. Use the [ATTACH PARTITION\|PART](#alter_attach-partition) query to add the data to the table. - -For example: - -``` sql -ALTER TABLE users FETCH PARTITION 201902 FROM '/clickhouse/tables/01-01/visits'; -ALTER TABLE users ATTACH PARTITION 201902; -``` - -Note that: - -- The `ALTER ... FETCH PARTITION` query isn’t replicated. It places the partition to the `detached` directory only on the local server. -- The `ALTER TABLE ... ATTACH` query is replicated. It adds the data to all replicas. The data is added to one of the replicas from the `detached` directory, and to the others - from neighboring replicas. - -Before downloading, the system checks if the partition exists and the table structure matches. The most appropriate replica is selected automatically from the healthy replicas. - -Although the query is called `ALTER TABLE`, it does not change the table structure and does not immediately change the data available in the table. - -#### MOVE PARTITION\|PART {#alter_move-partition} - -Moves partitions or data parts to another volume or disk for `MergeTree`-engine tables. See [Using Multiple Block Devices for Data Storage](../operations/table_engines/mergetree.md#table_engine-mergetree-multiple-volumes). - -``` sql -ALTER TABLE table_name MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name' -``` - -The `ALTER TABLE t MOVE` query: - -- Not replicated, because different replicas can have different storage policies. -- Returns an error if the specified disk or volume is not configured. Query also returns an error if conditions of data moving, that specified in the storage policy, can’t be applied. -- Can return an error in the case, when data to be moved is already moved by a background process, concurrent `ALTER TABLE t MOVE` query or as a result of background data merging. A user shouldn’t perform any additional actions in this case. - -Example: - -``` sql -ALTER TABLE hits MOVE PART '20190301_14343_16206_438' TO VOLUME 'slow' -ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd' -``` - -#### How To Set Partition Expression {#alter-how-to-specify-part-expr} - -You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: - -- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. -- As the expression from the table column. Constants and constant expressions are supported. For example, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`. -- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. -- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached\_parts](../operations/system_tables.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. - -Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed. - -For old-style tables, you can specify the partition either as a number `201901` or a string `'201901'`. The syntax for the new-style tables is stricter with types (similar to the parser for the VALUES input format). - -All the rules above are also true for the [OPTIMIZE](misc.md#misc_operations-optimize) query. If you need to specify the only partition when optimizing a non-partitioned table, set the expression `PARTITION tuple()`. For example: - -``` sql -OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL; -``` - -The examples of `ALTER ... PARTITION` queries are demonstrated in the tests [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) and [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql). - -### Manipulations with Table TTL {#manipulations-with-table-ttl} - -You can change [table TTL](../operations/table_engines/mergetree.md#mergetree-table-ttl) with a request of the following form: - -``` sql -ALTER TABLE table-name MODIFY TTL ttl-expression -``` - -### Synchronicity of ALTER Queries {#synchronicity-of-alter-queries} - -For non-replicatable tables, all `ALTER` queries are performed synchronously. For replicatable tables, the query just adds instructions for the appropriate actions to `ZooKeeper`, and the actions themselves are performed as soon as possible. However, the query can wait for these actions to be completed on all the replicas. - -For `ALTER ... ATTACH|DETACH|DROP` queries, you can use the `replication_alter_partitions_sync` setting to set up waiting. -Possible values: `0` – do not wait; `1` – only wait for own execution (default); `2` – wait for all. - -### Mutations {#alter-mutations} - -Mutations are an ALTER query variant that allows changing or deleting rows in a table. In contrast to standard `UPDATE` and `DELETE` queries that are intended for point data changes, mutations are intended for heavy operations that change a lot of rows in a table. Supported for the `MergeTree` family of table engines including the engines with replication support. - -Existing tables are ready for mutations as-is (no conversion necessary), but after the first mutation is applied to a table, its metadata format becomes incompatible with previous server versions and falling back to a previous version becomes impossible. - -Currently available commands: - -``` sql -ALTER TABLE [db.]table DELETE WHERE filter_expr -``` - -The `filter_expr` must be of type `UInt8`. The query deletes rows in the table for which this expression takes a non-zero value. - -``` sql -ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr -``` - -The `filter_expr` must be of type `UInt8`. This query updates values of specified columns to the values of corresponding expressions in rows for which the `filter_expr` takes a non-zero value. Values are casted to the column type using the `CAST` operator. Updating columns that are used in the calculation of the primary or the partition key is not supported. - -``` sql -ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name -``` - -The query rebuilds the secondary index `name` in the partition `partition_name`. - -One query can contain several commands separated by commas. - -For \*MergeTree tables mutations execute by rewriting whole data parts. There is no atomicity - parts are substituted for mutated parts as soon as they are ready and a `SELECT` query that started executing during a mutation will see data from parts that have already been mutated along with data from parts that have not been mutated yet. - -Mutations are totally ordered by their creation order and are applied to each part in that order. Mutations are also partially ordered with INSERTs - data that was inserted into the table before the mutation was submitted will be mutated and data that was inserted after that will not be mutated. Note that mutations do not block INSERTs in any way. - -A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for nonreplicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the [`system.mutations`](../operations/system_tables.md#system_tables-mutations) table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted, but if the mutation is stuck for some reason it can be cancelled with the [`KILL MUTATION`](misc.md#kill-mutation) query. - -Entries for finished mutations are not deleted right away (the number of preserved entries is determined by the `finished_mutations_to_keep` storage engine parameter). Older mutation entries are deleted. - -[Original article](https://clickhouse.tech/docs/en/query_language/alter/) diff --git a/docs/zh/query_language/dicts/external_dicts.md b/docs/zh/query_language/dicts/external_dicts.md deleted file mode 100644 index ef41a48f95f..00000000000 --- a/docs/zh/query_language/dicts/external_dicts.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -en_copy: true ---- - -# External Dictionaries {#dicts-external-dicts} - -You can add your own dictionaries from various data sources. The data source for a dictionary can be a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Sources for external dictionaries](external_dicts_dict_sources.md)”. - -ClickHouse: - -- Fully or partially stores dictionaries in RAM. -- Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically. -- Allows to create external dictionaries with xml files or [DDL queries](../create.md#create-dictionary-query). - -The configuration of external dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries\_config](../../operations/server_settings/settings.md#server_settings-dictionaries_config) parameter. - -Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries\_lazy\_load](../../operations/server_settings/settings.md#server_settings-dictionaries_lazy_load) setting. - -The dictionary configuration file has the following format: - -``` xml - - An optional element with any content. Ignored by the ClickHouse server. - - - /etc/metrika.xml - - - - - - - - -``` - -You can [configure](external_dicts_dict.md) any number of dictionaries in the same file. - -[DDL queries for dictionaries](../create.md#create-dictionary-query) doesn’t require any additional records in server configuration. They allow to work with dictionaries as first-class entities, like tables or views. - -!!! attention "Attention" - You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../functions/other_functions.md) function). This functionality is not related to external dictionaries. - -## See also {#ext-dicts-see-also} - -- [Configuring an External Dictionary](external_dicts_dict.md) -- [Storing Dictionaries in Memory](external_dicts_dict_layout.md) -- [Dictionary Updates](external_dicts_dict_lifetime.md) -- [Sources of External Dictionaries](external_dicts_dict_sources.md) -- [Dictionary Key and Fields](external_dicts_dict_structure.md) -- [Functions for Working with External Dictionaries](../functions/ext_dict_functions.md) - -[Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts/) diff --git a/docs/zh/query_language/dicts/external_dicts_dict.md b/docs/zh/query_language/dicts/external_dicts_dict.md deleted file mode 100644 index 0519cd381f4..00000000000 --- a/docs/zh/query_language/dicts/external_dicts_dict.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -en_copy: true ---- - -# Configuring an External Dictionary {#dicts-external-dicts-dict} - -If dictionary is configured using xml file, than dictionary configuration has the following structure: - -``` xml - - dict_name - - - - - - - - - - - - - - - - - -``` - -Corresponding [DDL-query](../create.md#create-dictionary-query) has the following structure: - -``` sql -CREATE DICTIONARY dict_name -( - ... -- attributes -) -PRIMARY KEY ... -- complex or single key configuration -SOURCE(...) -- Source configuration -LAYOUT(...) -- Memory layout configuration -LIFETIME(...) -- Lifetime of dictionary in memory -``` - -- `name` – The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`. -- [source](external_dicts_dict_sources.md) — Source of the dictionary. -- [layout](external_dicts_dict_layout.md) — Dictionary layout in memory. -- [structure](external_dicts_dict_structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key. -- [lifetime](external_dicts_dict_lifetime.md) — Frequency of dictionary updates. - -[Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict/) diff --git a/docs/zh/query_language/dicts/external_dicts_dict_hierarchical.md b/docs/zh/query_language/dicts/external_dicts_dict_hierarchical.md deleted file mode 100644 index 1a1232f95cd..00000000000 --- a/docs/zh/query_language/dicts/external_dicts_dict_hierarchical.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -en_copy: true ---- - -# Hierarchical Dictionaries {#hierarchical-dictionaries} - -ClickHouse supports hierarchical dictionaries with a [numeric key](external_dicts_dict_structure.md#ext_dict-numeric-key). - -Look at the following hierarchical structure: - -``` text -0 (Common parent) -│ -├── 1 (Russia) -│ │ -│ └── 2 (Moscow) -│ │ -│ └── 3 (Center) -│ -└── 4 (Great Britain) - │ - └── 5 (London) -``` - -This hierarchy can be expressed as the following dictionary table. - -| region\_id | parent\_region | region\_name | -|------------|----------------|---------------| -| 1 | 0 | Russia | -| 2 | 1 | Moscow | -| 3 | 2 | Center | -| 4 | 0 | Great Britain | -| 5 | 4 | London | - -This table contains a column `parent_region` that contains the key of the nearest parent for the element. - -ClickHouse supports the [hierarchical](external_dicts_dict_structure.md#hierarchical-dict-attr) property for [external dictionary](index.md) attributes. This property allows you to configure the hierarchical dictionary similar to described above. - -The [dictGetHierarchy](../functions/ext_dict_functions.md#dictgethierarchy) function allows you to get the parent chain of an element. - -For our example, the structure of dictionary can be the following: - -``` xml - - - - region_id - - - - parent_region - UInt64 - 0 - true - - - - region_name - String - - - - - -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_hierarchical/) diff --git a/docs/zh/query_language/dicts/external_dicts_dict_layout.md b/docs/zh/query_language/dicts/external_dicts_dict_layout.md deleted file mode 100644 index c6aa101da46..00000000000 --- a/docs/zh/query_language/dicts/external_dicts_dict_layout.md +++ /dev/null @@ -1,370 +0,0 @@ ---- -en_copy: true ---- - -# Storing Dictionaries in Memory {#dicts-external-dicts-dict-layout} - -There are a variety of ways to store dictionaries in memory. - -We recommend [flat](#flat), [hashed](#dicts-external_dicts_dict_layout-hashed) and [complex\_key\_hashed](#complex-key-hashed). which provide optimal processing speed. - -Caching is not recommended because of potentially poor performance and difficulties in selecting optimal parameters. Read more in the section “[cache](#cache)”. - -There are several ways to improve dictionary performance: - -- Call the function for working with the dictionary after `GROUP BY`. -- Mark attributes to extract as injective. An attribute is called injective if different attribute values correspond to different keys. So when `GROUP BY` uses a function that fetches an attribute value by the key, this function is automatically taken out of `GROUP BY`. - -ClickHouse generates an exception for errors with dictionaries. Examples of errors: - -- The dictionary being accessed could not be loaded. -- Error querying a `cached` dictionary. - -You can view the list of external dictionaries and their statuses in the `system.dictionaries` table. - -The configuration looks like this: - -``` xml - - - ... - - - - - - ... - - -``` - -Corresponding [DDL-query](../create.md#create-dictionary-query): - -``` sql -CREATE DICTIONARY (...) -... -LAYOUT(LAYOUT_TYPE(param value)) -- layout settings -... -``` - -## Ways to Store Dictionaries in Memory {#ways-to-store-dictionaries-in-memory} - -- [flat](#flat) -- [hashed](#dicts-external_dicts_dict_layout-hashed) -- [sparse\_hashed](#dicts-external_dicts_dict_layout-sparse_hashed) -- [cache](#cache) -- [range\_hashed](#range-hashed) -- [complex\_key\_hashed](#complex-key-hashed) -- [complex\_key\_cache](#complex-key-cache) -- [ip\_trie](#ip-trie) - -### flat {#flat} - -The dictionary is completely stored in memory in the form of flat arrays. How much memory does the dictionary use? The amount is proportional to the size of the largest key (in space used). - -The dictionary key has the `UInt64` type and the value is limited to 500,000. If a larger key is discovered when creating the dictionary, ClickHouse throws an exception and does not create the dictionary. - -All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety. - -This method provides the best performance among all available methods of storing the dictionary. - -Configuration example: - -``` xml - - - -``` - -or - -``` sql -LAYOUT(FLAT()) -``` - -### hashed {#dicts-external_dicts_dict_layout-hashed} - -The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items. - -All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety. - -Configuration example: - -``` xml - - - -``` - -or - -``` sql -LAYOUT(HASHED()) -``` - -### sparse\_hashed {#dicts-external_dicts_dict_layout-sparse_hashed} - -Similar to `hashed`, but uses less memory in favor more CPU usage. - -Configuration example: - -``` xml - - - -``` - -``` sql -LAYOUT(SPARSE_HASHED()) -``` - -### complex\_key\_hashed {#complex-key-hashed} - -This type of storage is for use with composite [keys](external_dicts_dict_structure.md). Similar to `hashed`. - -Configuration example: - -``` xml - - - -``` - -``` sql -LAYOUT(COMPLEX_KEY_HASHED()) -``` - -### range\_hashed {#range-hashed} - -The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values. - -This storage method works the same way as hashed and allows using date/time (arbitrary numeric type) ranges in addition to the key. - -Example: The table contains discounts for each advertiser in the format: - -``` text -+---------------|---------------------|-------------------|--------+ -| advertiser id | discount start date | discount end date | amount | -+===============+=====================+===================+========+ -| 123 | 2015-01-01 | 2015-01-15 | 0.15 | -+---------------|---------------------|-------------------|--------+ -| 123 | 2015-01-16 | 2015-01-31 | 0.25 | -+---------------|---------------------|-------------------|--------+ -| 456 | 2015-01-01 | 2015-01-15 | 0.05 | -+---------------|---------------------|-------------------|--------+ -``` - -To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](external_dicts_dict_structure.md). These elements must contain elements `name` and`type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others). - -Example: - -``` xml - - - Id - - - first - Date - - - last - Date - - ... -``` - -or - -``` sql -CREATE DICTIONARY somedict ( - id UInt64, - first Date, - last Date -) -PRIMARY KEY id -LAYOUT(RANGE_HASHED()) -RANGE(MIN first MAX last) -``` - -To work with these dictionaries, you need to pass an additional argument to the `dictGetT` function, for which a range is selected: - -``` sql -dictGetT('dict_name', 'attr_name', id, date) -``` - -This function returns the value for the specified `id`s and the date range that includes the passed date. - -Details of the algorithm: - -- If the `id` is not found or a range is not found for the `id`, it returns the default value for the dictionary. -- If there are overlapping ranges, you can use any. -- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01 or 2039-01-01), the range is left open. The range can be open on both sides. - -Configuration example: - -``` xml - - - - ... - - - - - - - - Abcdef - - - StartTimeStamp - UInt64 - - - EndTimeStamp - UInt64 - - - XXXType - String - - - - - - -``` - -or - -``` sql -CREATE DICTIONARY somedict( - Abcdef UInt64, - StartTimeStamp UInt64, - EndTimeStamp UInt64, - XXXType String DEFAULT '' -) -PRIMARY KEY Abcdef -RANGE(MIN StartTimeStamp MAX EndTimeStamp) -``` - -### cache {#cache} - -The dictionary is stored in a cache that has a fixed number of cells. These cells contain frequently used elements. - -When searching for a dictionary, the cache is searched first. For each block of data, all keys that are not found in the cache or are outdated are requested from the source using `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. The received data is then written to the cache. - -For cache dictionaries, the expiration [lifetime](external_dicts_dict_lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used, and it is re-requested the next time it needs to be used. -This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table. - -To improve cache performance, use a subquery with `LIMIT`, and call the function with the dictionary externally. - -Supported [sources](external_dicts_dict_sources.md): MySQL, ClickHouse, executable, HTTP. - -Example of settings: - -``` xml - - - - 1000000000 - - -``` - -or - -``` sql -LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) -``` - -Set a large enough cache size. You need to experiment to select the number of cells: - -1. Set some value. -2. Run queries until the cache is completely full. -3. Assess memory consumption using the `system.dictionaries` table. -4. Increase or decrease the number of cells until the required memory consumption is reached. - -!!! warning "Warning" - Do not use ClickHouse as a source, because it is slow to process queries with random reads. - -### complex\_key\_cache {#complex-key-cache} - -This type of storage is for use with composite [keys](external_dicts_dict_structure.md). Similar to `cache`. - -### ip\_trie {#ip-trie} - -This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN. - -Example: The table contains network prefixes and their corresponding AS number and country code: - -``` text - +-----------------|-------|--------+ - | prefix | asn | cca2 | - +=================+=======+========+ - | 202.79.32.0/20 | 17501 | NP | - +-----------------|-------|--------+ - | 2620:0:870::/48 | 3856 | US | - +-----------------|-------|--------+ - | 2a02:6b8:1::/48 | 13238 | RU | - +-----------------|-------|--------+ - | 2001:db8::/32 | 65536 | ZZ | - +-----------------|-------|--------+ -``` - -When using this type of layout, the structure must have a composite key. - -Example: - -``` xml - - - - prefix - String - - - - asn - UInt32 - - - - cca2 - String - ?? - - ... -``` - -or - -``` sql -CREATE DICTIONARY somedict ( - prefix String, - asn UInt32, - cca2 String DEFAULT '??' -) -PRIMARY KEY prefix -``` - -The key must have only one String type attribute that contains an allowed IP prefix. Other types are not supported yet. - -For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys: - -``` sql -dictGetT('dict_name', 'attr_name', tuple(ip)) -``` - -The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6: - -``` sql -dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) -``` - -Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned. - -Data is stored in a `trie`. It must completely fit into RAM. - -[Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_layout/) diff --git a/docs/zh/query_language/dicts/external_dicts_dict_lifetime.md b/docs/zh/query_language/dicts/external_dicts_dict_lifetime.md deleted file mode 100644 index 3a90e437681..00000000000 --- a/docs/zh/query_language/dicts/external_dicts_dict_lifetime.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -en_copy: true ---- - -# Dictionary Updates {#dictionary-updates} - -ClickHouse periodically updates the dictionaries. The update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries are defined in the `` tag in seconds. - -Dictionary updates (other than loading for first use) do not block queries. During updates, the old version of a dictionary is used. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. - -Example of settings: - -``` xml - - ... - 300 - ... - -``` - -``` sql -CREATE DICTIONARY (...) -... -LIFETIME(300) -... -``` - -Setting `0` (`LIFETIME(0)`) prevents dictionaries from updating. - -You can set a time interval for upgrades, and ClickHouse will choose a uniformly random time within this range. This is necessary in order to distribute the load on the dictionary source when upgrading on a large number of servers. - -Example of settings: - -``` xml - - ... - - 300 - 360 - - ... - -``` - -or - -``` sql -LIFETIME(MIN 300 MAX 360) -``` - -When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](external_dicts_dict_sources.md): - -- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. -- For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query. -- Dictionaries from other sources are updated every time by default. - -For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: - -- The dictionary table must have a field that always changes when the source data is updated. -- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](external_dicts_dict_sources.md). - -Example of settings: - -``` xml - - ... - - ... - SELECT update_time FROM dictionary_source where id = 1 - - ... - -``` - -or - -``` sql -... -SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source where id = 1')) -... -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_lifetime/) diff --git a/docs/zh/query_language/dicts/external_dicts_dict_structure.md b/docs/zh/query_language/dicts/external_dicts_dict_structure.md deleted file mode 100644 index acb0ce36875..00000000000 --- a/docs/zh/query_language/dicts/external_dicts_dict_structure.md +++ /dev/null @@ -1,172 +0,0 @@ ---- -en_copy: true ---- - -# Dictionary Key and Fields {#dictionary-key-and-fields} - -The `` clause describes the dictionary key and fields available for queries. - -XML description: - -``` xml - - - - Id - - - - - - - ... - - - -``` - -Attributes are described in the elements: - -- `` — [Key column](external_dicts_dict_structure.md#ext_dict_structure-key). -- `` — [Data column](external_dicts_dict_structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. - -DDL query: - -``` sql -CREATE DICTIONARY dict_name ( - Id UInt64, - -- attributes -) -PRIMARY KEY Id -... -``` - -Attributes are described in the query body: - -- `PRIMARY KEY` — [Key column](external_dicts_dict_structure.md#ext_dict_structure-key) -- `AttrName AttrType` — [Data column](external_dicts_dict_structure.md#ext_dict_structure-attributes). There can be a multiple number of attributes. - -## Key {#ext_dict_structure-key} - -ClickHouse supports the following types of keys: - -- Numeric key. `UInt64`. Defined in the `` tag or using `PRIMARY KEY` keyword. -- Composite key. Set of values of different types. Defined in the tag `` or `PRIMARY KEY` keyword. - -An xml structure can contain either `` or ``. DDL-query must contain single `PRIMARY KEY`. - -!!! warning "Warning" - You must not describe key as an attribute. - -### Numeric Key {#ext_dict-numeric-key} - -Type: `UInt64`. - -Configuration example: - -``` xml - - Id - -``` - -Configuration fields: - -- `name` – The name of the column with keys. - -For DDL-query: - -``` sql -CREATE DICTIONARY ( - Id UInt64, - ... -) -PRIMARY KEY Id -... -``` - -- `PRIMARY KEY` – The name of the column with keys. - -### Composite Key {#composite-key} - -The key can be a `tuple` from any types of fields. The [layout](external_dicts_dict_layout.md) in this case must be `complex_key_hashed` or `complex_key_cache`. - -!!! tip "Tip" - A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. - -The key structure is set in the element ``. Key fields are specified in the same format as the dictionary [attributes](external_dicts_dict_structure.md). Example: - -``` xml - - - - field1 - String - - - field2 - UInt32 - - ... - -... -``` - -or - -``` sql -CREATE DICTIONARY ( - field1 String, - field2 String - ... -) -PRIMARY KEY field1, field2 -... -``` - -For a query to the `dictGet*` function, a tuple is passed as the key. Example: `dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))`. - -## Attributes {#ext_dict_structure-attributes} - -Configuration example: - -``` xml - - ... - - Name - ClickHouseDataType - - rand64() - true - true - true - - -``` - -or - -``` sql -CREATE DICTIONARY somename ( - Name ClickHouseDataType DEFAULT '' EXPRESSION rand64() HIERARCHICAL INJECTIVE IS_OBJECT_ID -) -``` - -Configuration fields: - -| Tag | Description | Required | -|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| -| `name` | Column name. | Yes | -| `type` | ClickHouse data type.
ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.
[Nullable](../../data_types/nullable.md) is not supported. | Yes | -| `null_value` | Default value for a non-existing element.
In the example, it is an empty string. You cannot use `NULL` in this field. | Yes | -| `expression` | [Expression](../syntax.md#syntax-expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.

Default value: no expression. | No | -| `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](external_dicts_dict_hierarchical.md).

Default value: `false`. | No | -| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).
If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.

Default value: `false`. | No | -| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.

Default value: `false`. | No | - -## See Also {#see-also} - -- [Functions for working with external dictionaries](../functions/ext_dict_functions.md). - -[Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_structure/) diff --git a/docs/zh/query_language/dicts/index.md b/docs/zh/query_language/dicts/index.md deleted file mode 100644 index 9c7883cf7a1..00000000000 --- a/docs/zh/query_language/dicts/index.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -en_copy: true ---- - -# Dictionaries {#dictionaries} - -A dictionary is a mapping (`key -> attributes`) that is convenient for various types of reference lists. - -ClickHouse supports special functions for working with dictionaries that can be used in queries. It is easier and more efficient to use dictionaries with functions than a `JOIN` with reference tables. - -[NULL](../syntax.md#null) values can’t be stored in a dictionary. - -ClickHouse supports: - -- [Built-in dictionaries](internal_dicts.md#internal_dicts) with a specific [set of functions](../functions/ym_dict_functions.md). -- [Plug-in (external) dictionaries](external_dicts.md) with a [set of functions](../functions/ext_dict_functions.md). - -[Original article](https://clickhouse.tech/docs/en/query_language/dicts/) diff --git a/docs/zh/query_language/dicts/internal_dicts.md b/docs/zh/query_language/dicts/internal_dicts.md deleted file mode 100644 index a7ac9fe7d8c..00000000000 --- a/docs/zh/query_language/dicts/internal_dicts.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -en_copy: true ---- - -# Internal dictionaries {#internal_dicts} - -ClickHouse contains a built-in feature for working with a geobase. - -This allows you to: - -- Use a region’s ID to get its name in the desired language. -- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. -- Check whether a region is part of another region. -- Get a chain of parent regions. - -All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with Yandex.Metrica dictionaries”. - -The internal dictionaries are disabled in the default package. -To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file. - -The geobase is loaded from text files. - -Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory. - -Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory. - -You can also create these files yourself. The file format is as follows: - -`regions_hierarchy*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- parent region ID (`UInt32`) -- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types don’t have values -- population (`UInt32`) — optional column - -`regions_names_*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. - -A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. - -Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated. -For updates, the file modification times are checked. If a file has changed, the dictionary is updated. -The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter. -Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. - -We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. - -There are also functions for working with OS identifiers and Yandex.Metrica search engines, but they shouldn’t be used. - -[Original article](https://clickhouse.tech/docs/en/query_language/dicts/internal_dicts/) diff --git a/docs/zh/query_language/functions/ext_dict_functions.md b/docs/zh/query_language/functions/ext_dict_functions.md deleted file mode 100644 index c1d5d9b60ba..00000000000 --- a/docs/zh/query_language/functions/ext_dict_functions.md +++ /dev/null @@ -1,46 +0,0 @@ -# 字典函数 {#zi-dian-han-shu} - -有关连接和配置外部词典的信息,请参阅[外部词典](../dicts/external_dicts.md)。 - -## dictGetUInt8, dictGetUInt16, dictGetUInt32, dictGetUInt64 {#dictgetuint8-dictgetuint16-dictgetuint32-dictgetuint64} - -## dictGetInt8, dictGetInt16, dictGetInt32, dictGetInt64 {#dictgetint8-dictgetint16-dictgetint32-dictgetint64} - -## dictGetFloat32, dictGetFloat64 {#dictgetfloat32-dictgetfloat64} - -## dictGetDate, dictGetDateTime {#dictgetdate-dictgetdatetime} - -## dictGetUUID {#dictgetuuid} - -## dictGetString {#dictgetstring} - -`dictGetT('dict_name', 'attr_name', id)` - -- 使用’id’键获取dict\_name字典中attr\_name属性的值。`dict_name`和`attr_name`是常量字符串。`id`必须是UInt64。 - 如果字典中没有`id`键,则返回字典描述中指定的默认值。 - -## dictGetTOrDefault {#ext_dict_functions-dictgettordefault} - -`dictGetTOrDefault('dict_name', 'attr_name', id, default)` - -与`dictGetT`函数相同,但默认值取自函数的最后一个参数。 - -## dictIsIn {#dictisin} - -`dictIsIn ('dict_name', child_id, ancestor_id)` - -- 对于’dict\_name’分层字典,查找’child\_id’键是否位于’ancestor\_id’内(或匹配’ancestor\_id’)。返回UInt8。 - -## dictGetHierarchy {#dictgethierarchy} - -`dictGetHierarchy('dict_name', id)` - -- 对于’dict\_name’分层字典,返回从’id’开始并沿父元素链继续的字典键数组。返回Array(UInt64) - -## dictHas {#dicthas} - -`dictHas('dict_name', id)` - -- 检查字典是否存在指定的`id`。如果不存在,则返回0;如果存在,则返回1。 - -[来源文章](https://clickhouse.tech/docs/en/query_language/functions/ext_dict_functions/) diff --git a/docs/zh/query_language/functions/machine_learning_functions.md b/docs/zh/query_language/functions/machine_learning_functions.md deleted file mode 100644 index e9fe2622a57..00000000000 --- a/docs/zh/query_language/functions/machine_learning_functions.md +++ /dev/null @@ -1,15 +0,0 @@ -# 机器学习函数 {#ji-qi-xue-xi-han-shu} - -## evalMLMethod (prediction) {#machine_learning_methods-evalmlmethod} - -使用拟合回归模型的预测请使用`evalMLMethod`函数。 请参阅`linearRegression`中的链接。 - -## Stochastic Linear Regression {#stochastic-linear-regression} - -`stochasticLinearRegression`聚合函数使用线性模型和MSE损失函数实现随机梯度下降法。 使用`evalMLMethod`来预测新数据。 -请参阅示例和注释[此处](../agg_functions/reference.md#agg_functions-stochasticlinearregression)。 - -## Stochastic Logistic Regression {#stochastic-logistic-regression} - -`stochasticLogisticRegression`聚合函数实现了二元分类问题的随机梯度下降法。 使用`evalMLMethod`来预测新数据。 -请参阅示例和注释[此处](../agg_functions/reference.md#agg_functions-stochasticlogisticregression)。 diff --git a/docs/zh/query_language/index.md b/docs/zh/query_language/index.md deleted file mode 100644 index 07950fb56a6..00000000000 --- a/docs/zh/query_language/index.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -en_copy: true ---- - -# SQL Reference {#sql-reference} - -- [SELECT](select.md) -- [INSERT INTO](insert_into.md) -- [CREATE](create.md) -- [ALTER](alter.md#query_language_queries_alter) -- [Other types of queries](misc.md) - -[Original article](https://clickhouse.tech/docs/en/query_language/) diff --git a/docs/zh/query_language/misc.md b/docs/zh/query_language/misc.md deleted file mode 100644 index 152dc0dd3b4..00000000000 --- a/docs/zh/query_language/misc.md +++ /dev/null @@ -1,249 +0,0 @@ ---- -en_copy: true ---- - -# Miscellaneous Queries {#miscellaneous-queries} - -## ATTACH {#attach} - -This query is exactly the same as `CREATE`, but - -- Instead of the word `CREATE` it uses the word `ATTACH`. -- The query does not create data on the disk, but assumes that data is already in the appropriate places, and just adds information about the table to the server. - After executing an ATTACH query, the server will know about the existence of the table. - -If the table was previously detached (`DETACH`), meaning that its structure is known, you can use shorthand without defining the structure. - -``` sql -ATTACH TABLE [IF NOT EXISTS] [db.]name [ON CLUSTER cluster] -``` - -This query is used when starting the server. The server stores table metadata as files with `ATTACH` queries, which it simply runs at launch (with the exception of system tables, which are explicitly created on the server). - -## CHECK TABLE {#check-table} - -Checks if the data in the table is corrupted. - -``` sql -CHECK TABLE [db.]name -``` - -The `CHECK TABLE` query compares actual file sizes with the expected values which are stored on the server. If the file sizes do not match the stored values, it means the data is corrupted. This can be caused, for example, by a system crash during query execution. - -The query response contains the `result` column with a single row. The row has a value of -[Boolean](../data_types/boolean.md) type: - -- 0 - The data in the table is corrupted. -- 1 - The data maintains integrity. - -The `CHECK TABLE` query supports the following table engines: - -- [Log](../operations/table_engines/log.md) -- [TinyLog](../operations/table_engines/tinylog.md) -- [StripeLog](../operations/table_engines/stripelog.md) -- [MergeTree family](../operations/table_engines/mergetree.md) - -Performed over the tables with another table engines causes an exception. - -Engines from the `*Log` family don’t provide automatic data recovery on failure. Use the `CHECK TABLE` query to track data loss in a timely manner. - -For `MergeTree` family engines, the `CHECK TABLE` query shows a check status for every individual data part of a table on the local server. - -**If the data is corrupted** - -If the table is corrupted, you can copy the non-corrupted data to another table. To do this: - -1. Create a new table with the same structure as damaged table. To do this execute the query `CREATE TABLE AS `. -2. Set the [max\_threads](../operations/settings/settings.md#settings-max_threads) value to 1 to process the next query in a single thread. To do this run the query `SET max_threads = 1`. -3. Execute the query `INSERT INTO SELECT * FROM `. This request copies the non-corrupted data from the damaged table to another table. Only the data before the corrupted part will be copied. -4. Restart the `clickhouse-client` to reset the `max_threads` value. - -## DESCRIBE TABLE {#misc-describe-table} - -``` sql -DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] -``` - -Returns the following `String` type columns: - -- `name` — Column name. -- `type`— Column type. -- `default_type` — Clause that is used in [default expression](create.md#create-default-values) (`DEFAULT`, `MATERIALIZED` or `ALIAS`). Column contains an empty string, if the default expression isn’t specified. -- `default_expression` — Value specified in the `DEFAULT` clause. -- `comment_expression` — Comment text. - -Nested data structures are output in “expanded” format. Each column is shown separately, with the name after a dot. - -## DETACH {#detach} - -Deletes information about the ‘name’ table from the server. The server stops knowing about the table’s existence. - -``` sql -DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] -``` - -This does not delete the table’s data or metadata. On the next server launch, the server will read the metadata and find out about the table again. -Similarly, a “detached” table can be re-attached using the `ATTACH` query (with the exception of system tables, which do not have metadata stored for them). - -There is no `DETACH DATABASE` query. - -## DROP {#drop} - -This query has two types: `DROP DATABASE` and `DROP TABLE`. - -``` sql -DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] -``` - -Deletes all tables inside the ‘db’ database, then deletes the ‘db’ database itself. -If `IF EXISTS` is specified, it doesn’t return an error if the database doesn’t exist. - -``` sql -DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] -``` - -Deletes the table. -If `IF EXISTS` is specified, it doesn’t return an error if the table doesn’t exist or the database doesn’t exist. - - DROP DICTIONARY [IF EXISTS] [db.]name - -Delets the dictionary. -If `IF EXISTS` is specified, it doesn’t return an error if the table doesn’t exist or the database doesn’t exist. - -## EXISTS {#exists} - -``` sql -EXISTS [TEMPORARY] [TABLE|DICTIONARY] [db.]name [INTO OUTFILE filename] [FORMAT format] -``` - -Returns a single `UInt8`-type column, which contains the single value `0` if the table or database doesn’t exist, or `1` if the table exists in the specified database. - -## KILL QUERY {#kill-query} - -``` sql -KILL QUERY [ON CLUSTER cluster] - WHERE - [SYNC|ASYNC|TEST] - [FORMAT format] -``` - -Attempts to forcibly terminate the currently running queries. -The queries to terminate are selected from the system.processes table using the criteria defined in the `WHERE` clause of the `KILL` query. - -Examples: - -``` sql --- Forcibly terminates all queries with the specified query_id: -KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' - --- Synchronously terminates all queries run by 'username': -KILL QUERY WHERE user='username' SYNC -``` - -Read-only users can only stop their own queries. - -By default, the asynchronous version of queries is used (`ASYNC`), which doesn’t wait for confirmation that queries have stopped. - -The synchronous version (`SYNC`) waits for all queries to stop and displays information about each process as it stops. -The response contains the `kill_status` column, which can take the following values: - -1. ‘finished’ – The query was terminated successfully. -2. ‘waiting’ – Waiting for the query to end after sending it a signal to terminate. -3. The other values ​​explain why the query can’t be stopped. - -A test query (`TEST`) only checks the user’s rights and displays a list of queries to stop. - -## KILL MUTATION {#kill-mutation} - -``` sql -KILL MUTATION [ON CLUSTER cluster] - WHERE - [TEST] - [FORMAT format] -``` - -Tries to cancel and remove [mutations](alter.md#alter-mutations) that are currently executing. Mutations to cancel are selected from the [`system.mutations`](../operations/system_tables.md#system_tables-mutations) table using the filter specified by the `WHERE` clause of the `KILL` query. - -A test query (`TEST`) only checks the user’s rights and displays a list of queries to stop. - -Examples: - -``` sql --- Cancel and remove all mutations of the single table: -KILL MUTATION WHERE database = 'default' AND table = 'table' - --- Cancel the specific mutation: -KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = 'mutation_3.txt' -``` - -The query is useful when a mutation is stuck and cannot finish (e.g. if some function in the mutation query throws an exception when applied to the data contained in the table). - -Changes already made by the mutation are not rolled back. - -## OPTIMIZE {#misc_operations-optimize} - -``` sql -OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE] -``` - -This query tries to initialize an unscheduled merge of data parts for tables with a table engine from the [MergeTree](../operations/table_engines/mergetree.md) family. - -The `OPTMIZE` query is also supported for the [MaterializedView](../operations/table_engines/materializedview.md) and the [Buffer](../operations/table_engines/buffer.md) engines. Other table engines aren’t supported. - -When `OPTIMIZE` is used with the [ReplicatedMergeTree](../operations/table_engines/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all nodes (if the `replication_alter_partitions_sync` setting is enabled). - -- If `OPTIMIZE` doesn’t perform a merge for any reason, it doesn’t notify the client. To enable notifications, use the [optimize\_throw\_if\_noop](../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. -- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter.md#alter-how-to-specify-part-expr). -- If you specify `FINAL`, optimization is performed even when all the data is already in one part. -- If you specify `DEDUPLICATE`, then completely identical rows will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine. - -!!! warning "Warning" - `OPTIMIZE` can’t fix the “Too many parts” error. - -## RENAME {#misc_operations-rename} - -Renames one or more tables. - -``` sql -RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster] -``` - -All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned). - -## SET {#query-set} - -``` sql -SET param = value -``` - -Assigns `value` to the `param` [setting](../operations/settings/index.md) for the current session. You cannot change [server settings](../operations/server_settings/index.md) this way. - -You can also set all the values from the specified settings profile in a single query. - -``` sql -SET profile = 'profile-name-from-the-settings-file' -``` - -For more information, see [Settings](../operations/settings/settings.md). - -## TRUNCATE {#truncate} - -``` sql -TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] -``` - -Removes all data from a table. When the clause `IF EXISTS` is omitted, the query returns an error if the table does not exist. - -The `TRUNCATE` query is not supported for [View](../operations/table_engines/view.md), [File](../operations/table_engines/file.md), [URL](../operations/table_engines/url.md) and [Null](../operations/table_engines/null.md) table engines. - -## USE {#use} - -``` sql -USE db -``` - -Lets you set the current database for the session. -The current database is used for searching for tables if the database is not explicitly defined in the query with a dot before the table name. -This query can’t be made when using the HTTP protocol, since there is no concept of a session. - -[Original article](https://clickhouse.tech/docs/en/query_language/misc/) diff --git a/docs/zh/query_language/syntax.md b/docs/zh/query_language/syntax.md deleted file mode 100644 index fb86f56e7bd..00000000000 --- a/docs/zh/query_language/syntax.md +++ /dev/null @@ -1,184 +0,0 @@ ---- -en_copy: true ---- - -# Syntax {#syntax} - -There are two types of parsers in the system: the full SQL parser (a recursive descent parser), and the data format parser (a fast stream parser). -In all cases except the `INSERT` query, only the full SQL parser is used. -The `INSERT` query uses both parsers: - -``` sql -INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') -``` - -The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input\_format\_values\_interpret\_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions). - -Data can have any format. When a query is received, the server calculates no more than [max\_query\_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. -This means the system doesn’t have problems with large `INSERT` queries, like MySQL does. - -When using the `Values` format in an `INSERT` query, it may seem that data is parsed the same as expressions in a `SELECT` query, but this is not true. The `Values` format is much more limited. - -Next we will cover the full parser. For more information about format parsers, see the [Formats](../interfaces/formats.md) section. - -## Spaces {#spaces} - -There may be any number of space symbols between syntactical constructions (including the beginning and end of a query). Space symbols include the space, tab, line feed, CR, and form feed. - -## Comments {#comments} - -SQL-style and C-style comments are supported. -SQL-style comments: from `--` to the end of the line. The space after `--` can be omitted. -Comments in C-style: from `/*` to `*/`. These comments can be multiline. Spaces are not required here, either. - -## Keywords {#syntax-keywords} - -Keywords are case-insensitive when they correspond to: - -- SQL standard. For example, `SELECT`, `select` and `SeLeCt` are all valid. -- Implementation in some popular DBMS (MySQL or Postgres). For example, `DateTime` is same as `datetime`. - -Whether data type name is case-sensitive can be checked in the `system.data_type_families` table. - -In contrast to standard SQL all other keywords (including functions names) are **case-sensitive**. - -Keywords are not reserved (they are just parsed as keywords in the corresponding context). If you use [identifiers](#syntax-identifiers) the same as the keywords, enclose them into quotes. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`. - -## Identifiers {#syntax-identifiers} - -Identifiers are: - -- Cluster, database, table, partition and column names. -- Functions. -- Data types. -- [Expression aliases](#syntax-expression_aliases). - -Identifiers can be quoted or non-quoted. It is recommended to use non-quoted identifiers. - -Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x, _1, X_y__Z123_.` - -If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``. - -## Literals {#literals} - -There are: numeric, string, compound and `NULL` literals. - -### Numeric {#numeric} - -A numeric literal tries to be parsed: - -- First as a 64-bit signed number, using the [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) function. -- If unsuccessful, as a 64-bit unsigned number, using the [strtoll](https://en.cppreference.com/w/cpp/string/byte/strtol) function. -- If unsuccessful, as a floating-point number using the [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) function. -- Otherwise, an error is returned. - -The corresponding value will have the smallest type that the value fits in. -For example, 1 is parsed as `UInt8`, but 256 is parsed as `UInt16`. For more information, see [Data types](../data_types/index.md). - -Examples: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. - -### String {#syntax-string-literal} - -Only string literals in single quotes are supported. The enclosed characters can be backslash-escaped. The following escape sequences have a corresponding special value: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. In all other cases, escape sequences in the format `\c`, where `c` is any character, are converted to `c`. This means that you can use the sequences `\'`and`\\`. The value will have the [String](../data_types/string.md) type. - -The minimum set of characters that you need to escape in string literals: `'` and `\`. Single quote can be escaped with the single quote, literals `'It\'s'` and `'It''s'` are equal. - -### Compound {#compound} - -Constructions are supported for arrays: `[1, 2, 3]` and tuples: `(1, 'Hello, world!', 2)`.. -Actually, these are not literals, but expressions with the array creation operator and the tuple creation operator, respectively. -An array must consist of at least one item, and a tuple must have at least two items. -Tuples have a special purpose for use in the `IN` clause of a `SELECT` query. Tuples can be obtained as the result of a query, but they can’t be saved to a database (with the exception of [Memory](../operations/table_engines/memory.md) tables). - -### NULL {#null-literal} - -Indicates that the value is missing. - -In order to store `NULL` in a table field, it must be of the [Nullable](../data_types/nullable.md) type. - -Depending on the data format (input or output), `NULL` may have a different representation. For more information, see the documentation for [data formats](../interfaces/formats.md#formats). - -There are many nuances to processing `NULL`. For example, if at least one of the arguments of a comparison operation is `NULL`, the result of this operation will also be `NULL`. The same is true for multiplication, addition, and other operations. For more information, read the documentation for each operation. - -In queries, you can check `NULL` using the [IS NULL](operators.md#operator-is-null) and [IS NOT NULL](operators.md) operators and the related functions `isNull` and `isNotNull`. - -## Functions {#functions} - -Functions are written like an identifier with a list of arguments (possibly empty) in brackets. In contrast to standard SQL, the brackets are required, even for an empty arguments list. Example: `now()`. -There are regular and aggregate functions (see the section “Aggregate functions”). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions. - -## Operators {#operators} - -Operators are converted to their corresponding functions during query parsing, taking their priority and associativity into account. -For example, the expression `1 + 2 * 3 + 4` is transformed to `plus(plus(1, multiply(2, 3)), 4)`. - -## Data Types and Database Table Engines {#data_types-and-database-table-engines} - -Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an arguments list in brackets. For more information, see the sections “Data types,” “Table engines,” and “CREATE”. - -## Expression Aliases {#syntax-expression_aliases} - -An alias is a user-defined name for an expression in a query. - -``` sql -expr AS alias -``` - -- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` clause without using the `AS` keyword. - - For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. - - In the [CAST](functions/type_conversion_functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function. - -- `expr` — Any expression supported by ClickHouse. - - For example, `SELECT column_name * 2 AS double FROM some_table`. - -- `alias` — Name for `expr`. Aliases should comply with the [identifiers](#syntax-identifiers) syntax. - - For example, `SELECT "table t".column_name FROM table_name AS "table t"`. - -### Notes on Usage {#notes-on-usage} - -Aliases are global for a query or subquery and you can define an alias in any part of a query for any expression. For example, `SELECT (1 AS n) + 2, n`. - -Aliases are not visible in subqueries and between subqueries. For example, while executing the query `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ClickHouse generates the exception `Unknown identifier: num`. - -If an alias is defined for the result columns in the `SELECT` clause of a subquery, these columns are visible in the outer query. For example, `SELECT n + m FROM (SELECT 1 AS n, 2 AS m)`. - -Be careful with aliases that are the same as column or table names. Let’s consider the following example: - -``` sql -CREATE TABLE t -( - a Int, - b Int -) -ENGINE = TinyLog() -``` - -``` sql -SELECT - argMax(a, b), - sum(b) AS b -FROM t -``` - -``` text -Received exception from server (version 18.14.17): -Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. -``` - -In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. - -## Asterisk {#asterisk} - -In a `SELECT` query, an asterisk can replace the expression. For more information, see the section “SELECT”. - -## Expressions {#syntax-expressions} - -An expression is a function, identifier, literal, application of an operator, expression in brackets, subquery, or asterisk. It can also contain an alias. -A list of expressions is one or more expressions separated by commas. -Functions and operators, in turn, can have expressions as arguments. - -[Original article](https://clickhouse.tech/docs/en/query_language/syntax/) diff --git a/docs/zh/query_language/system.md b/docs/zh/query_language/system.md deleted file mode 100644 index a6b72d63ead..00000000000 --- a/docs/zh/query_language/system.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -en_copy: true ---- - -# SYSTEM Queries {#query-language-system} - -- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) -- [RELOAD DICTIONARY](#query_language-system-reload-dictionary) -- [DROP DNS CACHE](#query_language-system-drop-dns-cache) -- [DROP MARK CACHE](#query_language-system-drop-mark-cache) -- [FLUSH LOGS](#query_language-system-flush_logs) -- [RELOAD CONFIG](#query_language-system-reload-config) -- [SHUTDOWN](#query_language-system-shutdown) -- [KILL](#query_language-system-kill) -- [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends) -- [FLUSH DISTRIBUTED](#query_language-system-flush-distributed) -- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) -- [STOP MERGES](#query_language-system-stop-merges) -- [START MERGES](#query_language-system-start-merges) - -## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} - -Reloads all dictionaries that have been successfully loaded before. -By default, dictionaries are loaded lazily (see [dictionaries\_lazy\_load](../operations/server_settings/settings.md#server_settings-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED). -Always returns `Ok.` regardless of the result of the dictionary update. - -## RELOAD DICTIONARY dictionary\_name {#query_language-system-reload-dictionary} - -Completely reloads a dictionary `dictionary_name`, regardless of the state of the dictionary (LOADED / NOT\_LOADED / FAILED). -Always returns `Ok.` regardless of the result of updating the dictionary. -The status of the dictionary can be checked by querying the `system.dictionaries` table. - -``` sql -SELECT name, status FROM system.dictionaries; -``` - -## DROP DNS CACHE {#query_language-system-drop-dns-cache} - -Resets ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). - -For more convenient (automatic) cache management, see disable\_internal\_dns\_cache, dns\_cache\_update\_period parameters. - -## DROP MARK CACHE {#query_language-system-drop-mark-cache} - -Resets the mark cache. Used in development of ClickHouse and performance tests. - -## FLUSH LOGS {#query_language-system-flush_logs} - -Flushes buffers of log messages to system tables (e.g. system.query\_log). Allows you to not wait 7.5 seconds when debugging. - -## RELOAD CONFIG {#query_language-system-reload-config} - -Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeeper. - -## SHUTDOWN {#query_language-system-shutdown} - -Normally shuts down ClickHouse (like `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`) - -## KILL {#query_language-system-kill} - -Aborts ClickHouse process (like `kill -9 {$ pid_clickhouse-server}`) - -## Managing Distributed Tables {#query-language-system-distributed} - -ClickHouse can manage [distributed](../operations/table_engines/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), and [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) queries. You can also synchronously insert distributed data with the `insert_distributed_sync` setting. - -### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends} - -Disables background data distribution when inserting data into distributed tables. - -``` sql -SYSTEM STOP DISTRIBUTED SENDS [db.] -``` - -### FLUSH DISTRIBUTED {#query_language-system-flush-distributed} - -Forces ClickHouse to send data to cluster nodes synchronously. If any nodes are unavailable, ClickHouse throws an exception and stops query execution. You can retry the query until it succeeds, which will happen when all nodes are back online. - -``` sql -SYSTEM FLUSH DISTRIBUTED [db.] -``` - -### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends} - -Enables background data distribution when inserting data into distributed tables. - -``` sql -SYSTEM START DISTRIBUTED SENDS [db.] -``` - -### STOP MERGES {#query_language-system-stop-merges} - -Provides possibility to stop background merges for tables in the MergeTree family: - -``` sql -SYSTEM STOP MERGES [[db.]merge_tree_family_table_name] -``` - -!!! note "Note" - `DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all MergeTree tables before. - -### START MERGES {#query_language-system-start-merges} - -Provides possibility to start background merges for tables in the MergeTree family: - -``` sql -SYSTEM START MERGES [[db.]merge_tree_family_table_name] -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/system/) diff --git a/docs/zh/query_language/table_functions/index.md b/docs/zh/query_language/table_functions/index.md deleted file mode 100644 index ba231a6eeea..00000000000 --- a/docs/zh/query_language/table_functions/index.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -en_copy: true ---- - -# Table Functions {#table-functions} - -Table functions are methods for constructing tables. - -You can use table functions in: - -- [FROM](../select.md#select-from) clause of the `SELECT` query. - - The method for creating a temporary table that is available only in the current query. The table is deleted when the query finishes. - -- [CREATE TABLE AS \](../create.md#create-table-query) query. - - It's one of the methods of creating a table. - -!!! warning "Warning" - You can’t use table functions if the [allow\_ddl](../../operations/settings/permissions_for_queries.md#settings_allow_ddl) setting is disabled. - -| Function | Description | -|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------| -| [file](file.md) | Creates a [File](../../operations/table_engines/file.md)-engine table. | -| [merge](merge.md) | Creates a [Merge](../../operations/table_engines/merge.md)-engine table. | -| [numbers](numbers.md) | Creates a table with a single column filled with integer numbers. | -| [remote](remote.md) | Allows you to access remote servers without creating a [Distributed](../../operations/table_engines/distributed.md)-engine table. | -| [url](url.md) | Creates a [Url](../../operations/table_engines/url.md)-engine table. | -| [mysql](mysql.md) | Creates a [MySQL](../../operations/table_engines/mysql.md)-engine table. | -| [jdbc](jdbc.md) | Creates a [JDBC](../../operations/table_engines/jdbc.md)-engine table. | -| [odbc](odbc.md) | Creates a [ODBC](../../operations/table_engines/odbc.md)-engine table. | -| [hdfs](hdfs.md) | Creates a [HDFS](../../operations/table_engines/hdfs.md)-engine table. | - -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/) diff --git a/docs/zh/query_language/table_functions/input.md b/docs/zh/query_language/table_functions/input.md deleted file mode 100644 index 7536a9bffc2..00000000000 --- a/docs/zh/query_language/table_functions/input.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -en_copy: true ---- - -# input {#input} - -`input(structure)` - table function that allows effectively convert and insert data sent to the -server with given structure to the table with another structure. - -`structure` - structure of data sent to the server in following format `'column1_name column1_type, column2_name column2_type, ...'`. -For example, `'id UInt32, name String'`. - -This function can be used only in `INSERT SELECT` query and only once but otherwise behaves like ordinary table function -(for example, it can be used in subquery, etc.). - -Data can be sent in any way like for ordinary `INSERT` query and passed in any available [format](../../interfaces/formats.md#formats) -that must be specified in the end of query (unlike ordinary `INSERT SELECT`). - -The main feature of this function is that when server receives data from client it simultaneously converts it -according to the list of expressions in the `SELECT` clause and inserts into the target table. Temporary table -with all transferred data is not created. - -**Examples** - -- Let the `test` table has the following structure `(a String, b String)` - and data in `data.csv` has a different structure `(col1 String, col2 Date, col3 Int32)`. Query for insert - data from the `data.csv` into the `test` table with simultaneous conversion looks like this: - - - -``` bash -$ cat data.csv | clickhouse-client --query="INSERT INTO test SELECT lower(col1), col3 * col3 FROM input('col1 String, col2 Date, col3 Int32') FORMAT CSV"; -``` - -- If `data.csv` contains data of the same structure `test_structure` as the table `test` then these two queries are equal: - - - -``` bash -$ cat data.csv | clickhouse-client --query="INSERT INTO test FORMAT CSV" -$ cat data.csv | clickhouse-client --query="INSERT INTO test SELECT * FROM input('test_structure') FORMAT CSV" -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/input/) diff --git a/docs/zh/query_language/table_functions/jdbc.md b/docs/zh/query_language/table_functions/jdbc.md deleted file mode 100644 index e1ba7b362bd..00000000000 --- a/docs/zh/query_language/table_functions/jdbc.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -en_copy: true ---- - -# jdbc {#table-function-jdbc} - -`jdbc(jdbc_connection_uri, schema, table)` - returns table that is connected via JDBC driver. - -This table function requires separate `clickhouse-jdbc-bridge` program to be running. -It supports Nullable types (based on DDL of remote table that is queried). - -**Examples** - -``` sql -SELECT * FROM jdbc('jdbc:mysql://localhost:3306/?user=root&password=root', 'schema', 'table') -``` - -``` sql -SELECT * FROM jdbc('mysql://localhost:3306/?user=root&password=root', 'schema', 'table') -``` - -``` sql -SELECT * FROM jdbc('datasource://mysql-local', 'schema', 'table') -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/jdbc/) diff --git a/docs/zh/query_language/table_functions/merge.md b/docs/zh/query_language/table_functions/merge.md deleted file mode 100644 index 3638fad418d..00000000000 --- a/docs/zh/query_language/table_functions/merge.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -en_copy: true ---- - -# merge {#merge} - -`merge(db_name, 'tables_regexp')` – Creates a temporary Merge table. For more information, see the section “Table engines, Merge”. - -The table structure is taken from the first table encountered that matches the regular expression. - -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/merge/) diff --git a/docs/zh/query_language/table_functions/numbers.md b/docs/zh/query_language/table_functions/numbers.md deleted file mode 100644 index 5aec0b3c96b..00000000000 --- a/docs/zh/query_language/table_functions/numbers.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -en_copy: true ---- - -# numbers {#numbers} - -`numbers(N)` – Returns a table with the single ‘number’ column (UInt64) that contains integers from 0 to N-1. -`numbers(N, M)` - Returns a table with the single ‘number’ column (UInt64) that contains integers from N to (N + M - 1). - -Similar to the `system.numbers` table, it can be used for testing and generating successive values, `numbers(N, M)` more efficient than `system.numbers`. - -The following queries are equivalent: - -``` sql -SELECT * FROM numbers(10); -SELECT * FROM numbers(0, 10); -SELECT * FROM system.numbers LIMIT 10; -``` - -Examples: - -``` sql --- Generate a sequence of dates from 2010-01-01 to 2010-12-31 -select toDate('2010-01-01') + number as d FROM numbers(365); -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/numbers/) diff --git a/docs/zh/query_language/table_functions/odbc.md b/docs/zh/query_language/table_functions/odbc.md deleted file mode 100644 index 8c972b1f93a..00000000000 --- a/docs/zh/query_language/table_functions/odbc.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -en_copy: true ---- - -# odbc {#table-functions-odbc} - -Returns table that is connected via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). - -``` sql -odbc(connection_settings, external_database, external_table) -``` - -Parameters: - -- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. -- `external_database` — Name of a database in an external DBMS. -- `external_table` — Name of a table in the `external_database`. - -To safely implement ODBC connections, ClickHouse uses a separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from `clickhouse-server`, driver problems can crash the ClickHouse server. ClickHouse automatically starts `clickhouse-odbc-bridge` when it is required. The ODBC bridge program is installed from the same package as the `clickhouse-server`. - -The fields with the `NULL` values from the external table are converted into the default values for the base data type. For example, if a remote MySQL table field has the `INT NULL` type it is converted to 0 (the default value for ClickHouse `Int32` data type). - -## Usage example {#usage-example} - -**Getting data from the local MySQL installation via ODBC** - -This example is checked for Ubuntu Linux 18.04 and MySQL server 5.7. - -Ensure that unixODBC and MySQL Connector are installed. - -By default (if installed from packages), ClickHouse starts as user `clickhouse`. Thus you need to create and configure this user in the MySQL server. - -``` bash -$ sudo mysql -``` - -``` sql -mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse'; -mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION; -``` - -Then configure the connection in `/etc/odbc.ini`. - -``` bash -$ cat /etc/odbc.ini -[mysqlconn] -DRIVER = /usr/local/lib/libmyodbc5w.so -SERVER = 127.0.0.1 -PORT = 3306 -DATABASE = test -USERNAME = clickhouse -PASSWORD = clickhouse -``` - -You can check the connection using the `isql` utility from the unixODBC installation. - -``` bash -$ isql -v mysqlconn -+---------------------------------------+ -| Connected! | -| | -... -``` - -Table in MySQL: - -``` text -mysql> CREATE TABLE `test`.`test` ( - -> `int_id` INT NOT NULL AUTO_INCREMENT, - -> `int_nullable` INT NULL DEFAULT NULL, - -> `float` FLOAT NOT NULL, - -> `float_nullable` FLOAT NULL DEFAULT NULL, - -> PRIMARY KEY (`int_id`)); -Query OK, 0 rows affected (0,09 sec) - -mysql> insert into test (`int_id`, `float`) VALUES (1,2); -Query OK, 1 row affected (0,00 sec) - -mysql> select * from test; -+--------+--------------+-------+----------------+ -| int_id | int_nullable | float | float_nullable | -+--------+--------------+-------+----------------+ -| 1 | NULL | 2 | NULL | -+--------+--------------+-------+----------------+ -1 row in set (0,00 sec) -``` - -Retrieving data from the MySQL table in ClickHouse: - -``` sql -SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test') -``` - -``` text -┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐ -│ 1 │ 0 │ 2 │ 0 │ -└────────┴──────────────┴───────┴────────────────┘ -``` - -## See Also {#see-also} - -- [ODBC external dictionaries](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) -- [ODBC table engine](../../operations/table_engines/odbc.md). - -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/jdbc/) diff --git a/docs/zh/query_language/table_functions/remote.md b/docs/zh/query_language/table_functions/remote.md deleted file mode 100644 index e8c751af7e2..00000000000 --- a/docs/zh/query_language/table_functions/remote.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -en_copy: true ---- - -# remote, remoteSecure {#remote-remotesecure} - -Allows you to access remote servers without creating a `Distributed` table. - -Signatures: - -``` sql -remote('addresses_expr', db, table[, 'user'[, 'password']]) -remote('addresses_expr', db.table[, 'user'[, 'password']]) -``` - -`addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. The port is the TCP port on the remote server. If the port is omitted, it uses `tcp_port` from the server’s config file (by default, 9000). - -!!! important "Important" - The port is required for an IPv6 address. - -Examples: - -``` text -example01-01-1 -example01-01-1:9000 -localhost -127.0.0.1 -[::]:9000 -[2a02:6b8:0:1111::11]:9000 -``` - -Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like to shards with different data). - -Example: - -``` text -example01-01-1,example01-02-1 -``` - -Part of the expression can be specified in curly brackets. The previous example can be written as follows: - -``` text -example01-0{1,2}-1 -``` - -Curly brackets can contain a range of numbers separated by two dots (non-negative integers). In this case, the range is expanded to a set of values that generate shard addresses. If the first number starts with zero, the values are formed with the same zero alignment. The previous example can be written as follows: - -``` text -example01-{01..02}-1 -``` - -If you have multiple pairs of curly brackets, it generates the direct product of the corresponding sets. - -Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load\_balancing](../../operations/settings/settings.md) setting. - -Example: - -``` text -example01-{01..02}-{1|2} -``` - -This example specifies two shards that each have two replicas. - -The number of addresses generated is limited by a constant. Right now this is 1000 addresses. - -Using the `remote` table function is less optimal than creating a `Distributed` table, because in this case, the server connection is re-established for every request. In addition, if host names are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and don’t use the `remote` table function. - -The `remote` table function can be useful in the following cases: - -- Accessing a specific server for data comparison, debugging, and testing. -- Queries between various ClickHouse clusters for research purposes. -- Infrequent distributed requests that are made manually. -- Distributed requests where the set of servers is re-defined each time. - -If the user is not specified, `default` is used. -If the password is not specified, an empty password is used. - -`remoteSecure` - same as `remote` but with secured connection. Default port — [tcp\_port\_secure](../../operations/server_settings/settings.md#server_settings-tcp_port_secure) from config or 9440. - -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/remote/) diff --git a/docs/zh/query_language/table_functions/url.md b/docs/zh/query_language/table_functions/url.md deleted file mode 100644 index e1250b438ab..00000000000 --- a/docs/zh/query_language/table_functions/url.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -en_copy: true ---- - -# url {#url} - -`url(URL, format, structure)` - returns a table created from the `URL` with given -`format` and `structure`. - -URL - HTTP or HTTPS server address, which can accept `GET` and/or `POST` requests. - -format - [format](../../interfaces/formats.md#formats) of the data. - -structure - table structure in `'UserID UInt64, Name String'` format. Determines column names and types. - -**Example** - -``` sql --- getting the first 3 lines of a table that contains columns of String and UInt32 type from HTTP-server which answers in CSV format. -SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3 -``` - -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/url/) diff --git a/docs/zh/sql_reference/aggregate_functions/combinators.md b/docs/zh/sql_reference/aggregate_functions/combinators.md new file mode 100644 index 00000000000..a8be457ab23 --- /dev/null +++ b/docs/zh/sql_reference/aggregate_functions/combinators.md @@ -0,0 +1,166 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 37 +toc_title: "\u805A\u5408\u51FD\u6570\u7EC4\u5408\u5668" +--- + +# 聚合函数组合器 {#aggregate_functions_combinators} + +聚合函数的名称可以附加一个后缀。 这改变了聚合函数的工作方式。 + +## -如果 {#agg-functions-combinator-if} + +The suffix -If can be appended to the name of any aggregate function. In this case, the aggregate function accepts an extra argument – a condition (Uint8 type). The aggregate function processes only the rows that trigger the condition. If the condition was not triggered even once, it returns a default value (usually zeros or empty strings). + +例: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTimingIf(level1, level2)(x, cond)`, `argMinIf(arg, val, cond)` 等等。 + +使用条件聚合函数,您可以一次计算多个条件的聚合,而无需使用子查询和 `JOIN`例如,在Yandex的。Metrica,条件聚合函数用于实现段比较功能。 + +## -阵列 {#agg-functions-combinator-array} + +-Array后缀可以附加到任何聚合函数。 在这种情况下,聚合函数采用的参数 ‘Array(T)’ 类型(数组)而不是 ‘T’ 类型参数。 如果聚合函数接受多个参数,则它必须是长度相等的数组。 在处理数组时,聚合函数的工作方式与所有数组元素的原始聚合函数类似。 + +示例1: `sumArray(arr)` -总计所有的所有元素 ‘arr’ 阵列。 在这个例子中,它可以更简单地编写: `sum(arraySum(arr))`. + +示例2: `uniqArray(arr)` – Counts the number of unique elements in all ‘arr’ 阵列。 这可以做一个更简单的方法: `uniq(arrayJoin(arr))`,但它并不总是可以添加 ‘arrayJoin’ 到查询。 + +-如果和-阵列可以组合。 然而, ‘Array’ 必须先来,然后 ‘If’. 例: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. 由于这个顺序,该 ‘cond’ 参数不会是数组。 + +## -州 {#agg-functions-combinator-state} + +如果应用此combinator,则聚合函数不会返回结果值(例如唯一值的数量 [uniq](reference.md#agg_function-uniq) 函数),但聚合的中间状态(用于 `uniq`,这是用于计算唯一值的数量的散列表)。 这是一个 `AggregateFunction(...)` 可用于进一步处理或存储在表中以完成聚合。 + +要使用这些状态,请使用: + +- [AggregatingMergeTree](../../engines/table_engines/mergetree_family/aggregatingmergetree.md) 表引擎。 +- [最后聚会](../../sql_reference/functions/other_functions.md#function-finalizeaggregation) 功能。 +- [跑累积](../../sql_reference/functions/other_functions.md#function-runningaccumulate) 功能。 +- [-合并](#aggregate_functions_combinators_merge) combinator +- [-MergeState](#aggregate_functions_combinators_mergestate) combinator + +## -合并 {#aggregate_functions_combinators-merge} + +如果应用此组合器,则聚合函数将中间聚合状态作为参数,组合状态以完成聚合,并返回结果值。 + +## -MergeState {#aggregate_functions_combinators-mergestate} + +以与-Merge combinator相同的方式合并中间聚合状态。 但是,它不会返回结果值,而是返回中间聚合状态,类似于-State combinator。 + +## -ForEach {#agg-functions-combinator-foreach} + +将表的聚合函数转换为聚合相应数组项并返回结果数组的数组的聚合函数。 例如, `sumForEach` 对于数组 `[1, 2]`, `[3, 4, 5]`和`[6, 7]`返回结果 `[10, 13, 5]` 之后将相应的数组项添加在一起。 + +## -OrDefault {#agg-functions-combinator-ordefault} + +如果没有要聚合的内容,则填充聚合函数的返回类型的默认值。 + +``` sql +SELECT avg(number), avgOrDefault(number) FROM numbers(0) +``` + +``` text +┌─avg(number)─┬─avgOrDefault(number)─┐ +│ nan │ 0 │ +└─────────────┴──────────────────────┘ +``` + +## -OrNull {#agg-functions-combinator-ornull} + +填充 `null` 如果没有什么聚合。 返回列将为空。 + +``` sql +SELECT avg(number), avgOrNull(number) FROM numbers(0) +``` + +``` text +┌─avg(number)─┬─avgOrNull(number)─┐ +│ nan │ ᴺᵁᴸᴸ │ +└─────────────┴───────────────────┘ +``` + +-OrDefault和-OrNull可以与其他组合器相结合。 当聚合函数不接受空输入时,它很有用。 + +``` sql +SELECT avgOrNullIf(x, x > 10) +FROM +( + SELECT toDecimal32(1.23, 2) AS x +) +``` + +``` text +┌─avgOrNullIf(x, greater(x, 10))─┐ +│ ᴺᵁᴸᴸ │ +└────────────────────────────────┘ +``` + +## -重新采样 {#agg-functions-combinator-resample} + +允许您将数据划分为组,然后单独聚合这些组中的数据。 通过将一列中的值拆分为间隔来创建组。 + +``` sql +Resample(start, end, step)(, resampling_key) +``` + +**参数** + +- `start` — Starting value of the whole required interval for `resampling_key` 值。 +- `stop` — Ending value of the whole required interval for `resampling_key` 值。 整个时间间隔不包括 `stop` 价值 `[start, stop)`. +- `step` — Step for separating the whole interval into subintervals. The `aggFunction` 在每个子区间上独立执行。 +- `resampling_key` — Column whose values are used for separating data into intervals. +- `aggFunction_params` — `aggFunction` 参数。 + +**返回值** + +- 阵列 `aggFunction` 每个子区间的结果。 + +**示例** + +考虑一下 `people` 具有以下数据的表: + +``` text +┌─name───┬─age─┬─wage─┐ +│ John │ 16 │ 10 │ +│ Alice │ 30 │ 15 │ +│ Mary │ 35 │ 8 │ +│ Evelyn │ 48 │ 11.5 │ +│ David │ 62 │ 9.9 │ +│ Brian │ 60 │ 16 │ +└────────┴─────┴──────┘ +``` + +让我们得到的人的名字,他们的年龄在于的时间间隔 `[30,60)` 和 `[60,75)`. 由于我们使用整数表示的年龄,我们得到的年龄 `[30, 59]` 和 `[60,74]` 间隔。 + +要在数组中聚合名称,我们使用 [groupArray](reference.md#agg_function-grouparray) 聚合函数。 这需要一个参数。 在我们的例子中,它是 `name` 列。 该 `groupArrayResample` 函数应该使用 `age` 按年龄聚合名称的列。 要定义所需的时间间隔,我们通过 `30, 75, 30` 参数到 `groupArrayResample` 功能。 + +``` sql +SELECT groupArrayResample(30, 75, 30)(name, age) FROM people +``` + +``` text +┌─groupArrayResample(30, 75, 30)(name, age)─────┐ +│ [['Alice','Mary','Evelyn'],['David','Brian']] │ +└───────────────────────────────────────────────┘ +``` + +考虑结果。 + +`Jonh` 是因为他太年轻了 其他人按照指定的年龄间隔进行分配。 + +现在让我们计算指定年龄间隔内的总人数和平均工资。 + +``` sql +SELECT + countResample(30, 75, 30)(name, age) AS amount, + avgResample(30, 75, 30)(wage, age) AS avg_wage +FROM people +``` + +``` text +┌─amount─┬─avg_wage──────────────────┐ +│ [3,2] │ [11.5,12.949999809265137] │ +└────────┴───────────────────────────┘ +``` + +[原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/combinators/) diff --git a/docs/zh/sql_reference/aggregate_functions/index.md b/docs/zh/sql_reference/aggregate_functions/index.md new file mode 100644 index 00000000000..7e53c8c8c53 --- /dev/null +++ b/docs/zh/sql_reference/aggregate_functions/index.md @@ -0,0 +1,62 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u805A\u5408\u51FD\u6570" +toc_priority: 33 +toc_title: "\u5BFC\u8A00" +--- + +# 聚合函数 {#aggregate-functions} + +聚合函数在 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 方式如预期的数据库专家。 + +ClickHouse还支持: + +- [参数聚合函数](parametric_functions.md#aggregate_functions_parametric),它接受除列之外的其他参数。 +- [组合器](combinators.md#aggregate_functions_combinators),这改变了聚合函数的行为。 + +## 空处理 {#null-processing} + +在聚合过程中,所有 `NULL`s被跳过。 + +**例:** + +考虑这个表: + +``` text +┌─x─┬────y─┐ +│ 1 │ 2 │ +│ 2 │ ᴺᵁᴸᴸ │ +│ 3 │ 2 │ +│ 3 │ 3 │ +│ 3 │ ᴺᵁᴸᴸ │ +└───┴──────┘ +``` + +比方说,你需要在总的值 `y` 列: + +``` sql +SELECT sum(y) FROM t_null_big +``` + + ┌─sum(y)─┐ + │ 7 │ + └────────┘ + +该 `sum` 函数解释 `NULL` 作为 `0`. 特别是,这意味着,如果函数接收输入的选择,其中所有的值 `NULL`,那么结果将是 `0`,不 `NULL`. + +现在你可以使用 `groupArray` 函数从创建一个数组 `y` 列: + +``` sql +SELECT groupArray(y) FROM t_null_big +``` + +``` text +┌─groupArray(y)─┐ +│ [2,2,3] │ +└───────────────┘ +``` + +`groupArray` 不包括 `NULL` 在生成的数组中。 + +[原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/) diff --git a/docs/zh/query_language/agg_functions/parametric_functions.md b/docs/zh/sql_reference/aggregate_functions/parametric_functions.md similarity index 57% rename from docs/zh/query_language/agg_functions/parametric_functions.md rename to docs/zh/sql_reference/aggregate_functions/parametric_functions.md index d4e29feff0e..18adcd93487 100644 --- a/docs/zh/query_language/agg_functions/parametric_functions.md +++ b/docs/zh/sql_reference/aggregate_functions/parametric_functions.md @@ -1,29 +1,32 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 38 +toc_title: "\u53C2\u6570\u805A\u5408\u51FD\u6570" --- -# Parametric aggregate functions {#aggregate_functions_parametric} +# 参数聚合函数 {#aggregate_functions_parametric} Some aggregate functions can accept not only argument columns (used for compression), but a set of parameters – constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. -## histogram {#histogram} +## 直方图 {#histogram} -Calculates an adaptive histogram. It doesn’t guarantee precise results. +计算自适应直方图。 它不能保证精确的结果。 ``` sql histogram(number_of_bins)(values) ``` -The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf). The borders of histogram bins are adjusted as new data enters a function. In common case, the widths of bins are not equal. +该函数使用 [流式并行决策树算法](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf). 当新数据输入函数时,hist图分区的边界将被调整。 在通常情况下,箱的宽度不相等。 -**Parameters** +**参数** `number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins. -`values` — [Expression](../syntax.md#syntax-expressions) resulting in input values. +`values` — [表达式](../syntax.md#syntax-expressions) 导致输入值。 -**Returned values** +**返回值** -- [Array](../../data_types/array.md) of [Tuples](../../data_types/tuple.md) of the following format: +- [阵列](../../sql_reference/data_types/array.md) 的 [元组](../../sql_reference/data_types/tuple.md) 下面的格式: ``` [(lower_1, upper_1, height_1), ... (lower_N, upper_N, height_N)] @@ -33,7 +36,7 @@ The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.or - `upper` — Upper bound of the bin. - `height` — Calculated height of the bin. -**Example** +**示例** ``` sql SELECT histogram(5)(number + 1) @@ -50,7 +53,7 @@ FROM ( └─────────────────────────────────────────────────────────────────────────┘ ``` -You can visualize a histogram with the [bar](../functions/other_functions.md#function-bar) function, for example: +您可以使用 [酒吧](../../sql_reference/functions/other_functions.md#function-bar) 功能,例如: ``` sql WITH histogram(5)(rand() % 100) AS hist @@ -75,46 +78,46 @@ FROM └────────┴───────┘ ``` -In this case, you should remember that you don’t know the histogram bin borders. +在这种情况下,您应该记住您不知道直方图bin边界。 ## sequenceMatch(pattern)(timestamp, cond1, cond2, …) {#function-sequencematch} -Checks whether the sequence contains an event chain that matches the pattern. +检查序列是否包含与模式匹配的事件链。 ``` sql sequenceMatch(pattern)(timestamp, cond1, cond2, ...) ``` -!!! warning "Warning" - Events that occur at the same second may lay in the sequence in an undefined order affecting the result. +!!! warning "警告" + 在同一秒钟发生的事件可能以未定义的顺序排列在序列中,影响结果。 -**Parameters** +**参数** -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +- `pattern` — Pattern string. See [模式语法](#sequence-function-pattern-syntax). -- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types. +- `timestamp` — Column considered to contain time data. Typical data types are `Date` 和 `DateTime`. 您还可以使用任何支持的 [UInt](../../sql_reference/data_types/int_uint.md) 数据类型。 -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. 最多可以传递32个条件参数。 该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 -**Returned values** +**返回值** -- 1, if the pattern is matched. -- 0, if the pattern isn’t matched. +- 1,如果模式匹配。 +- 0,如果模式不匹配。 -Type: `UInt8`. +类型: `UInt8`. -**Pattern syntax** +**模式语法** -- `(?N)` — Matches the condition argument at position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter. +- `(?N)` — Matches the condition argument at position `N`. 条件在编号 `[1, 32]` 范围。 例如, `(?1)` 匹配传递给 `cond1` 参数。 -- `.*` — Matches any number of events. You don’t need conditional arguments to match this element of the pattern. +- `.*` — Matches any number of events. You don't need conditional arguments to match this element of the pattern. -- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators. +- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` 匹配彼此发生超过1800秒的事件。 这些事件之间可以存在任意数量的任何事件。 您可以使用 `>=`, `>`, `<`, `<=` 运营商。 -**Examples** +**例** -Consider data in the `t` table: +考虑在数据 `t` 表: ``` text ┌─time─┬─number─┐ @@ -124,7 +127,7 @@ Consider data in the `t` table: └──────┴────────┘ ``` -Perform the query: +执行查询: ``` sql SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2) FROM t @@ -136,7 +139,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2) FROM t └───────────────────────────────────────────────────────────────────────┘ ``` -The function found the event chain where number 2 follows number 1. It skipped number 3 between them, because the number is not described as an event. If we want to take this number into account when searching for the event chain given in the example, we should make a condition for it. +该函数找到了数字2跟随数字1的事件链。 它跳过了它们之间的数字3,因为该数字没有被描述为事件。 如果我们想在搜索示例中给出的事件链时考虑这个数字,我们应该为它创建一个条件。 ``` sql SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM t @@ -148,7 +151,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM └──────────────────────────────────────────────────────────────────────────────────────────┘ ``` -In this case, the function couldn’t find the event chain matching the pattern, because the event for number 3 occured between 1 and 2. If in the same case we checked the condition for number 4, the sequence would match the pattern. +在这种情况下,函数找不到与模式匹配的事件链,因为数字3的事件发生在1和2之间。 如果在相同的情况下,我们检查了数字4的条件,则序列将与模式匹配。 ``` sql SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM t @@ -160,38 +163,38 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM └──────────────────────────────────────────────────────────────────────────────────────────┘ ``` -**See Also** +**另请参阅** - [sequenceCount](#function-sequencecount) ## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount} -Counts the number of event chains that matched the pattern. The function searches event chains that don’t overlap. It starts to search for the next chain after the current chain is matched. +计数与模式匹配的事件链的数量。 该函数搜索不重叠的事件链。 当前链匹配后,它开始搜索下一个链。 -!!! warning "Warning" - Events that occur at the same second may lay in the sequence in an undefined order affecting the result. +!!! warning "警告" + 在同一秒钟发生的事件可能以未定义的顺序排列在序列中,影响结果。 ``` sql sequenceCount(pattern)(timestamp, cond1, cond2, ...) ``` -**Parameters** +**参数** -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +- `pattern` — Pattern string. See [模式语法](#sequence-function-pattern-syntax). -- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types. +- `timestamp` — Column considered to contain time data. Typical data types are `Date` 和 `DateTime`. 您还可以使用任何支持的 [UInt](../../sql_reference/data_types/int_uint.md) 数据类型。 -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. 最多可以传递32个条件参数。 该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 -**Returned values** +**返回值** -- Number of non-overlapping event chains that are matched. +- 匹配的非重叠事件链数。 -Type: `UInt64`. +类型: `UInt64`. -**Example** +**示例** -Consider data in the `t` table: +考虑在数据 `t` 表: ``` text ┌─time─┬─number─┐ @@ -204,7 +207,7 @@ Consider data in the `t` table: └──────┴────────┘ ``` -Count how many times the number 2 occurs after the number 1 with any amount of other numbers between them: +计算数字2在数字1之后出现的次数以及它们之间的任何其他数字: ``` sql SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t @@ -216,55 +219,55 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t └─────────────────────────────────────────────────────────────────────────┘ ``` -**See Also** +**另请参阅** - [sequenceMatch](#function-sequencematch) ## windowFunnel {#windowfunnel} -Searches for event chains in a sliding time window and calculates the maximum number of events that occurred from the chain. +搜索滑动时间窗中的事件链,并计算从链中发生的最大事件数。 -The function works according to the algorithm: +该函数根据算法工作: -- The function searches for data that triggers the first condition in the chain and sets the event counter to 1. This is the moment when the sliding window starts. +- 该函数搜索触发链中的第一个条件并将事件计数器设置为1的数据。 这是滑动窗口启动的时刻。 -- If events from the chain occur sequentially within the window, the counter is incremented. If the sequence of events is disrupted, the counter isn’t incremented. +- 如果来自链的事件在窗口内顺序发生,则计数器将递增。 如果事件序列中断,则计数器不会增加。 -- If the data has multiple event chains at varying points of completion, the function will only output the size of the longest chain. +- 如果数据在不同的完成点具有多个事件链,则该函数将仅输出最长链的大小。 -**Syntax** +**语法** ``` sql windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) ``` -**Parameters** +**参数** - `window` — Length of the sliding window in seconds. -- `mode` - It is an optional argument. - - `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values. -- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../data_types/date.md), [DateTime](../../data_types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). -- `cond` — Conditions or data describing the chain of events. [UInt8](../../data_types/int_uint.md). +- `mode` -这是一个可选的参数。 + - `'strict'` -当 `'strict'` 设置时,windowFunnel()仅对唯一值应用条件。 +- `timestamp` — Name of the column containing the timestamp. Data types supported: [日期](../../sql_reference/data_types/date.md), [日期时间](../../sql_reference/data_types/datetime.md#data_type-datetime) 和其他无符号整数类型(请注意,即使时间戳支持 `UInt64` 类型,它的值不能超过Int64最大值,即2^63-1)。 +- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql_reference/data_types/int_uint.md). -**Returned value** +**返回值** -The maximum number of consecutive triggered conditions from the chain within the sliding time window. -All the chains in the selection are analyzed. +滑动时间窗口内连续触发条件链的最大数目。 +对选择中的所有链进行了分析。 -Type: `Integer`. +类型: `Integer`. -**Example** +**示例** -Determine if a set period of time is enough for the user to select a phone and purchase it twice in the online store. +确定设定的时间段是否足以让用户选择手机并在在线商店中购买两次。 -Set the following chain of events: +设置以下事件链: -1. The user logged in to their account on the store (`eventID = 1003`). -2. The user searches for a phone (`eventID = 1007, product = 'phone'`). -3. The user placed an order (`eventID = 1009`). -4. The user made the order again (`eventID = 1010`). +1. 用户登录到其在应用商店中的帐户 (`eventID = 1003`). +2. 用户搜索手机 (`eventID = 1007, product = 'phone'`). +3. 用户下了订单 (`eventID = 1009`). +4. 用户再次下订单 (`eventID = 1010`). -Input table: +输入表: ``` text ┌─event_date─┬─user_id─┬───────────timestamp─┬─eventID─┬─product─┐ @@ -281,9 +284,9 @@ Input table: └────────────┴─────────┴─────────────────────┴─────────┴─────────┘ ``` -Find out how far the user `user_id` could get through the chain in a period in January-February of 2019. +了解用户有多远 `user_id` 可以在2019的1-2月期间通过链条。 -Query: +查询: ``` sql SELECT @@ -302,7 +305,7 @@ GROUP BY level ORDER BY level ASC ``` -Result: +结果: ``` text ┌─level─┬─c─┐ @@ -310,35 +313,35 @@ Result: └───────┴───┘ ``` -## retention {#retention} +## 保留 {#retention} -The function takes as arguments a set of conditions from 1 to 32 arguments of type `UInt8` that indicate whether a certain condition was met for the event. -Any condition can be specified as an argument (as in [WHERE](../../query_language/select.md#select-where)). +该函数将一组条件作为参数,类型为1到32个参数 `UInt8` 表示事件是否满足特定条件。 +任何条件都可以指定为参数(如 [WHERE](../../sql_reference/statements/select.md#select-where)). -The conditions, except the first, apply in pairs: the result of the second will be true if the first and second are true, of the third if the first and fird are true, etc. +除了第一个以外,条件成对适用:如果第一个和第二个是真的,第二个结果将是真的,如果第一个和fird是真的,第三个结果将是真的,等等。 -**Syntax** +**语法** ``` sql retention(cond1, cond2, ..., cond32); ``` -**Parameters** +**参数** -- `cond` — an expression that returns a `UInt8` result (1 or 0). +- `cond` — an expression that returns a `UInt8` 结果(1或0)。 -**Returned value** +**返回值** -The array of 1 or 0. +数组为1或0。 - 1 — condition was met for the event. -- 0 — condition wasn’t met for the event. +- 0 — condition wasn't met for the event. -Type: `UInt8`. +类型: `UInt8`. -**Example** +**示例** -Let’s consider an example of calculating the `retention` function to determine site traffic. +让我们考虑计算的一个例子 `retention` 功能,以确定网站流量。 **1.** Сreate a table to illustrate an example. @@ -350,15 +353,15 @@ INSERT INTO retention_test SELECT '2020-01-02', number FROM numbers(10); INSERT INTO retention_test SELECT '2020-01-03', number FROM numbers(15); ``` -Input table: +输入表: -Query: +查询: ``` sql SELECT * FROM retention_test ``` -Result: +结果: ``` text ┌───────date─┬─uid─┐ @@ -399,9 +402,9 @@ Result: └────────────┴─────┘ ``` -**2.** Group users by unique ID `uid` using the `retention` function. +**2.** 按唯一ID对用户进行分组 `uid` 使用 `retention` 功能。 -Query: +查询: ``` sql SELECT @@ -413,7 +416,7 @@ GROUP BY uid ORDER BY uid ASC ``` -Result: +结果: ``` text ┌─uid─┬─r───────┐ @@ -435,9 +438,9 @@ Result: └─────┴─────────┘ ``` -**3.** Calculate the total number of site visits per day. +**3.** 计算每天的现场访问总数。 -Query: +查询: ``` sql SELECT @@ -455,7 +458,7 @@ FROM ) ``` -Result: +结果: ``` text ┌─r1─┬─r2─┬─r3─┐ @@ -463,34 +466,34 @@ Result: └────┴────┴────┘ ``` -Where: +哪里: -- `r1`- the number of unique visitors who visited the site during 2020-01-01 (the `cond1` condition). -- `r2`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-02 (`cond1` and `cond2` conditions). -- `r3`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions). +- `r1`-2020-01-01期间访问该网站的独立访问者数量( `cond1` 条件)。 +- `r2`-在2020-01-01和2020-01-02之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond2` 条件)。 +- `r3`-在2020-01-01和2020-01-03之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond3` 条件)。 ## uniqUpTo(N)(x) {#uniquptonx} Calculates the number of different argument values ​​if it is less than or equal to N. If the number of different argument values is greater than N, it returns N + 1. -Recommended for use with small Ns, up to 10. The maximum value of N is 100. +建议使用小Ns,高达10。 N的最大值为100。 -For the state of an aggregate function, it uses the amount of memory equal to 1 + N \* the size of one value of bytes. -For strings, it stores a non-cryptographic hash of 8 bytes. That is, the calculation is approximated for strings. +对于聚合函数的状态,它使用的内存量等于1+N\*一个字节值的大小。 +对于字符串,它存储8个字节的非加密哈希。 也就是说,计算是近似的字符串。 -The function also works for several arguments. +该函数也适用于多个参数。 -It works as fast as possible, except for cases when a large N value is used and the number of unique values is slightly less than N. +它的工作速度尽可能快,除了使用较大的N值并且唯一值的数量略小于N的情况。 -Usage example: +用法示例: ``` text Problem: Generate a report that shows only keywords that produced at least 5 unique users. Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >= 5 ``` -[Original article](https://clickhouse.tech/docs/en/query_language/agg_functions/parametric_functions/) +[原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/parametric_functions/) -## sumMapFiltered(keys\_to\_keep)(keys, values) {#summapfilteredkeys-to-keepkeys-values} +## sumMapFiltered(keys\_to\_keep)(键值) {#summapfilteredkeys-to-keepkeys-values} -Same behavior as [sumMap](reference.md#agg_functions-summap) except that an array of keys is passed as a parameter. This can be especially useful when working with a high cardinality of keys. +同样的行为 [sumMap](reference.md#agg_functions-summap) 除了一个键数组作为参数传递。 这在使用高基数密钥时尤其有用。 diff --git a/docs/zh/sql_reference/aggregate_functions/reference.md b/docs/zh/sql_reference/aggregate_functions/reference.md new file mode 100644 index 00000000000..b8071860d41 --- /dev/null +++ b/docs/zh/sql_reference/aggregate_functions/reference.md @@ -0,0 +1,1878 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 36 +toc_title: "\u53C2\u8003\u8D44\u6599" +--- + +# 函数参考 {#function-reference} + +## 计数 {#agg_function-count} + +计数行数或非空值。 + +ClickHouse支持以下语法 `count`: +- `count(expr)` 或 `COUNT(DISTINCT expr)`. +- `count()` 或 `COUNT(*)`. 该 `count()` 语法是ClickHouse特定的。 + +**参数** + +该功能可以采取: + +- 零参数。 +- 一 [表达式](../syntax.md#syntax-expressions). + +**返回值** + +- 如果没有参数调用函数,它会计算行数。 +- 如果 [表达式](../syntax.md#syntax-expressions) 被传递,则该函数计数此表达式返回的次数非null。 如果表达式返回 [可为空](../../sql_reference/data_types/nullable.md)-键入值,然后结果 `count` 保持不 `Nullable`. 如果返回表达式,则该函数返回0 `NULL` 对于所有的行。 + +在这两种情况下,返回值的类型为 [UInt64](../../sql_reference/data_types/int_uint.md). + +**详细信息** + +ClickHouse支持 `COUNT(DISTINCT ...)` 语法 这种结构的行为取决于 [count\_distinct\_implementation](../../operations/settings/settings.md#settings-count_distinct_implementation) 设置。 它定义了其中的 [uniq\*](#agg_function-uniq) 函数用于执行操作。 默认值为 [uniqExact](#agg_function-uniqexact) 功能。 + +该 `SELECT count() FROM table` 查询未被优化,因为表中的条目数没有单独存储。 它从表中选择一个小列并计算其中的值数。 + +**例** + +示例1: + +``` sql +SELECT count() FROM t +``` + +``` text +┌─count()─┐ +│ 5 │ +└─────────┘ +``` + +示例2: + +``` sql +SELECT name, value FROM system.settings WHERE name = 'count_distinct_implementation' +``` + +``` text +┌─name──────────────────────────┬─value─────┐ +│ count_distinct_implementation │ uniqExact │ +└───────────────────────────────┴───────────┘ +``` + +``` sql +SELECT count(DISTINCT num) FROM t +``` + +``` text +┌─uniqExact(num)─┐ +│ 3 │ +└────────────────┘ +``` + +这个例子表明 `count(DISTINCT num)` 由执行 `uniqExact` 根据功能 `count_distinct_implementation` 设定值。 + +## 任何(x) {#agg_function-any} + +选择第一个遇到的值。 +查询可以以任何顺序执行,甚至每次都以不同的顺序执行,因此此函数的结果是不确定的。 +要获得确定的结果,您可以使用 ‘min’ 或 ‘max’ 功能,而不是 ‘any’. + +在某些情况下,可以依靠执行的顺序。 这适用于SELECT来自使用ORDER BY的子查询的情况。 + +当一个 `SELECT` 查询具有 `GROUP BY` 子句或至少一个聚合函数,ClickHouse(相对于MySQL)要求在所有表达式 `SELECT`, `HAVING`,和 `ORDER BY` 子句可以从键或聚合函数计算。 换句话说,从表中选择的每个列必须在键或聚合函数内使用。 要获得像MySQL这样的行为,您可以将其他列放在 `any` 聚合函数。 + +## anyHeavy(x) {#anyheavyx} + +使用选择一个频繁出现的值 [重打者](http://www.cs.umd.edu/~samir/498/karp.pdf) 算法。 如果某个值在查询的每个执行线程中出现的情况超过一半,则返回此值。 通常情况下,结果是不确定的。 + +``` sql +anyHeavy(column) +``` + +**参数** + +- `column` – The column name. + +**示例** + +就拿 [时间](../../getting_started/example_datasets/ontime.md) 数据集,并选择在任何频繁出现的值 `AirlineID` 列。 + +``` sql +SELECT anyHeavy(AirlineID) AS res +FROM ontime +``` + +``` text +┌───res─┐ +│ 19690 │ +└───────┘ +``` + +## anyLast(x) {#anylastx} + +选择遇到的最后一个值。 +其结果是一样不确定的 `any` 功能。 + +## 集团比特 {#groupbitand} + +按位应用 `AND` 对于一系列的数字。 + +``` sql +groupBitAnd(expr) +``` + +**参数** + +`expr` – An expression that results in `UInt*` 类型。 + +**返回值** + +的价值 `UInt*` 类型。 + +**示例** + +测试数据: + +``` text +binary decimal +00101100 = 44 +00011100 = 28 +00001101 = 13 +01010101 = 85 +``` + +查询: + +``` sql +SELECT groupBitAnd(num) FROM t +``` + +哪里 `num` 是包含测试数据的列。 + +结果: + +``` text +binary decimal +00000100 = 4 +``` + +## groupBitOr {#groupbitor} + +按位应用 `OR` 对于一系列的数字。 + +``` sql +groupBitOr(expr) +``` + +**参数** + +`expr` – An expression that results in `UInt*` 类型。 + +**返回值** + +的价值 `UInt*` 类型。 + +**示例** + +测试数据: + +``` text +binary decimal +00101100 = 44 +00011100 = 28 +00001101 = 13 +01010101 = 85 +``` + +查询: + +``` sql +SELECT groupBitOr(num) FROM t +``` + +哪里 `num` 是包含测试数据的列。 + +结果: + +``` text +binary decimal +01111101 = 125 +``` + +## groupBitXor {#groupbitxor} + +按位应用 `XOR` 对于一系列的数字。 + +``` sql +groupBitXor(expr) +``` + +**参数** + +`expr` – An expression that results in `UInt*` 类型。 + +**返回值** + +的价值 `UInt*` 类型。 + +**示例** + +测试数据: + +``` text +binary decimal +00101100 = 44 +00011100 = 28 +00001101 = 13 +01010101 = 85 +``` + +查询: + +``` sql +SELECT groupBitXor(num) FROM t +``` + +哪里 `num` 是包含测试数据的列。 + +结果: + +``` text +binary decimal +01101000 = 104 +``` + +## groupBitmap {#groupbitmap} + +从无符号整数列的位图或聚合计算,返回UInt64类型的基数,如果添加后缀状态,则返回 [位图对象](../../sql_reference/functions/bitmap_functions.md). + +``` sql +groupBitmap(expr) +``` + +**参数** + +`expr` – An expression that results in `UInt*` 类型。 + +**返回值** + +的价值 `UInt64` 类型。 + +**示例** + +测试数据: + +``` text +UserID +1 +1 +2 +3 +``` + +查询: + +``` sql +SELECT groupBitmap(UserID) as num FROM t +``` + +结果: + +``` text +num +3 +``` + +## min(x) {#agg_function-min} + +计算最小值。 + +## max(x) {#agg_function-max} + +计算最大值。 + +## argMin(arg,val) {#agg-function-argmin} + +计算 ‘arg’ 最小值的值 ‘val’ 价值。 如果有几个不同的值 ‘arg’ 对于最小值 ‘val’,遇到的第一个值是输出。 + +**示例:** + +``` text +┌─user─────┬─salary─┐ +│ director │ 5000 │ +│ manager │ 3000 │ +│ worker │ 1000 │ +└──────────┴────────┘ +``` + +``` sql +SELECT argMin(user, salary) FROM salary +``` + +``` text +┌─argMin(user, salary)─┐ +│ worker │ +└──────────────────────┘ +``` + +## argMax(arg,val) {#agg-function-argmax} + +计算 ‘arg’ 最大值 ‘val’ 价值。 如果有几个不同的值 ‘arg’ 对于最大值 ‘val’,遇到的第一个值是输出。 + +## sum(x) {#agg_function-sum} + +计算总和。 +只适用于数字。 + +## sumWithOverflow(x) {#sumwithoverflowx} + +使用与输入参数相同的数据类型计算数字的总和。 如果总和超过此数据类型的最大值,则函数返回错误。 + +只适用于数字。 + +## sumMap(key,value) {#agg_functions-summap} + +总计 ‘value’ 数组根据在指定的键 ‘key’ 阵列。 +元素的数量 ‘key’ 和 ‘value’ 总计的每一行必须相同。 +Returns a tuple of two arrays: keys in sorted order, and values ​​summed for the corresponding keys. + +示例: + +``` sql +CREATE TABLE sum_map( + date Date, + timeslot DateTime, + statusMap Nested( + status UInt16, + requests UInt64 + ) +) ENGINE = Log; +INSERT INTO sum_map VALUES + ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), + ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]), + ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), + ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]); +SELECT + timeslot, + sumMap(statusMap.status, statusMap.requests) +FROM sum_map +GROUP BY timeslot +``` + +``` text +┌────────────timeslot─┬─sumMap(statusMap.status, statusMap.requests)─┐ +│ 2000-01-01 00:00:00 │ ([1,2,3,4,5],[10,10,20,10,10]) │ +│ 2000-01-01 00:01:00 │ ([4,5,6,7,8],[10,10,20,10,10]) │ +└─────────────────────┴──────────────────────────────────────────────┘ +``` + +## skewPop {#skewpop} + +计算 [歪斜](https://en.wikipedia.org/wiki/Skewness) 的序列。 + +``` sql +skewPop(expr) +``` + +**参数** + +`expr` — [表达式](../syntax.md#syntax-expressions) 返回一个数字。 + +**返回值** + +The skewness of the given distribution. Type — [Float64](../../sql_reference/data_types/float.md) + +**示例** + +``` sql +SELECT skewPop(value) FROM series_with_value_column +``` + +## skewSamp {#skewsamp} + +计算 [样品偏度](https://en.wikipedia.org/wiki/Skewness) 的序列。 + +它表示随机变量的偏度的无偏估计,如果传递的值形成其样本。 + +``` sql +skewSamp(expr) +``` + +**参数** + +`expr` — [表达式](../syntax.md#syntax-expressions) 返回一个数字。 + +**返回值** + +The skewness of the given distribution. Type — [Float64](../../sql_reference/data_types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. + +**示例** + +``` sql +SELECT skewSamp(value) FROM series_with_value_column +``` + +## kurtPop {#kurtpop} + +计算 [峰度](https://en.wikipedia.org/wiki/Kurtosis) 的序列。 + +``` sql +kurtPop(expr) +``` + +**参数** + +`expr` — [表达式](../syntax.md#syntax-expressions) 返回一个数字。 + +**返回值** + +The kurtosis of the given distribution. Type — [Float64](../../sql_reference/data_types/float.md) + +**示例** + +``` sql +SELECT kurtPop(value) FROM series_with_value_column +``` + +## kurtSamp {#kurtsamp} + +计算 [峰度样本](https://en.wikipedia.org/wiki/Kurtosis) 的序列。 + +它表示随机变量峰度的无偏估计,如果传递的值形成其样本。 + +``` sql +kurtSamp(expr) +``` + +**参数** + +`expr` — [表达式](../syntax.md#syntax-expressions) 返回一个数字。 + +**返回值** + +The kurtosis of the given distribution. Type — [Float64](../../sql_reference/data_types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. + +**示例** + +``` sql +SELECT kurtSamp(value) FROM series_with_value_column +``` + +## timeSeriesGroupSum(uid,timestamp,value) {#agg-function-timeseriesgroupsum} + +`timeSeriesGroupSum` 可以聚合不同的时间序列,即采样时间戳不对齐。 +它将在两个采样时间戳之间使用线性插值,然后将时间序列和在一起。 + +- `uid` 是时间序列唯一id, `UInt64`. +- `timestamp` 是Int64型,以支持毫秒或微秒。 +- `value` 是指标。 + +函数返回元组数组 `(timestamp, aggregated_value)` 对。 + +在使用此功能之前,请确保 `timestamp` 按升序排列 + +示例: + +``` text +┌─uid─┬─timestamp─┬─value─┐ +│ 1 │ 2 │ 0.2 │ +│ 1 │ 7 │ 0.7 │ +│ 1 │ 12 │ 1.2 │ +│ 1 │ 17 │ 1.7 │ +│ 1 │ 25 │ 2.5 │ +│ 2 │ 3 │ 0.6 │ +│ 2 │ 8 │ 1.6 │ +│ 2 │ 12 │ 2.4 │ +│ 2 │ 18 │ 3.6 │ +│ 2 │ 24 │ 4.8 │ +└─────┴───────────┴───────┘ +``` + +``` sql +CREATE TABLE time_series( + uid UInt64, + timestamp Int64, + value Float64 +) ENGINE = Memory; +INSERT INTO time_series VALUES + (1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5), + (2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8); + +SELECT timeSeriesGroupSum(uid, timestamp, value) +FROM ( + SELECT * FROM time_series order by timestamp ASC +); +``` + +其结果将是: + +``` text +[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)] +``` + +## timeSeriesGroupRateSum(uid,ts,val) {#agg-function-timeseriesgroupratesum} + +同样,timeSeriesGroupRateSum,timeSeriesGroupRateSum将计算时间序列的速率,然后将速率总和在一起。 +此外,使用此函数之前,时间戳应该是上升顺序。 + +使用此函数,上述情况下的结果将是: + +``` text +[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)] +``` + +## avg(x) {#agg_function-avg} + +计算平均值。 +只适用于数字。 +结果总是Float64。 + +## 平均加权 {#avgweighted} + +计算 [加权算术平均值](https://en.wikipedia.org/wiki/Weighted_arithmetic_mean). + +**语法** + +``` sql +avgWeighted(x, weight) +``` + +**参数** + +- `x` — Values. [整数](../data_types/int_uint.md) 或 [浮点](../data_types/float.md). +- `weight` — Weights of the values. [整数](../data_types/int_uint.md) 或 [浮点](../data_types/float.md). + +类型 `x` 和 `weight` 一定是一样的 + +**返回值** + +- 加权平均值。 +- `NaN`. 如果所有的权重都等于0。 + +类型: [Float64](../data_types/float.md). + +**示例** + +查询: + +``` sql +SELECT avgWeighted(x, w) +FROM values('x Int8, w Int8', (4, 1), (1, 0), (10, 2)) +``` + +结果: + +``` text +┌─avgWeighted(x, weight)─┐ +│ 8 │ +└────────────────────────┘ +``` + +## uniq {#agg_function-uniq} + +计算参数的不同值的近似数量。 + +``` sql +uniq(x[, ...]) +``` + +**参数** + +该函数采用可变数量的参数。 参数可以是 `Tuple`, `Array`, `Date`, `DateTime`, `String`,或数字类型。 + +**返回值** + +- A [UInt64](../../sql_reference/data_types/int_uint.md)-键入号码。 + +**实施细节** + +功能: + +- 计算聚合中所有参数的哈希值,然后在计算中使用它。 + +- 使用自适应采样算法。 对于计算状态,该函数使用最多65536个元素哈希值的样本。 + + This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. + +- 确定性地提供结果(它不依赖于查询处理顺序)。 + +我们建议在几乎所有情况下使用此功能。 + +**另请参阅** + +- [uniqCombined](#agg_function-uniqcombined) +- [uniqCombined64](#agg_function-uniqcombined64) +- [uniqHLL12](#agg_function-uniqhll12) +- [uniqExact](#agg_function-uniqexact) + +## uniqCombined {#agg_function-uniqcombined} + +计算不同参数值的近似数量。 + +``` sql +uniqCombined(HLL_precision)(x[, ...]) +``` + +该 `uniqCombined` 函数是计算不同数值数量的不错选择。 + +**参数** + +该函数采用可变数量的参数。 参数可以是 `Tuple`, `Array`, `Date`, `DateTime`, `String`,或数字类型。 + +`HLL_precision` 是以2为底的单元格数的对数 [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). 可选,您可以将该函数用作 `uniqCombined(x[, ...])`. 默认值 `HLL_precision` 是17,这是有效的96KiB的空间(2^17个单元,每个6比特)。 + +**返回值** + +- 一个数字 [UInt64](../../sql_reference/data_types/int_uint.md)-键入号码。 + +**实施细节** + +功能: + +- 计算散列(64位散列 `String` 否则32位)对于聚合中的所有参数,然后在计算中使用它。 + +- 使用三种算法的组合:数组、哈希表和HyperLogLog与error错表。 + + For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. + +- 确定性地提供结果(它不依赖于查询处理顺序)。 + +!!! note "注" + 因为它使用32位散列非-`String` 类型,结果将有非常高的误差基数显着大于 `UINT_MAX` (错误将在几百亿不同值之后迅速提高),因此在这种情况下,您应该使用 [uniqCombined64](#agg_function-uniqcombined64) + +相比于 [uniq](#agg_function-uniq) 功能,该 `uniqCombined`: + +- 消耗少几倍的内存。 +- 计算精度高出几倍。 +- 通常具有略低的性能。 在某些情况下, `uniqCombined` 可以表现得比 `uniq`,例如,使用通过网络传输大量聚合状态的分布式查询。 + +**另请参阅** + +- [uniq](#agg_function-uniq) +- [uniqCombined64](#agg_function-uniqcombined64) +- [uniqHLL12](#agg_function-uniqhll12) +- [uniqExact](#agg_function-uniqexact) + +## uniqCombined64 {#agg_function-uniqcombined64} + +和 [uniqCombined](#agg_function-uniqcombined),但对所有数据类型使用64位哈希。 + +## uniqHLL12 {#agg_function-uniqhll12} + +计算不同参数值的近似数量,使用 [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) 算法。 + +``` sql +uniqHLL12(x[, ...]) +``` + +**参数** + +该函数采用可变数量的参数。 参数可以是 `Tuple`, `Array`, `Date`, `DateTime`, `String`,或数字类型。 + +**返回值** + +- A [UInt64](../../sql_reference/data_types/int_uint.md)-键入号码。 + +**实施细节** + +功能: + +- 计算聚合中所有参数的哈希值,然后在计算中使用它。 + +- 使用HyperLogLog算法来近似不同参数值的数量。 + + 212 5-bit cells are used. The size of the state is slightly more than 2.5 KB. The result is not very accurate (up to ~10% error) for small data sets (<10K elements). However, the result is fairly accurate for high-cardinality data sets (10K-100M), with a maximum error of ~1.6%. Starting from 100M, the estimation error increases, and the function will return very inaccurate results for data sets with extremely high cardinality (1B+ elements). + +- 提供确定结果(它不依赖于查询处理顺序)。 + +我们不建议使用此功能。 在大多数情况下,使用 [uniq](#agg_function-uniq) 或 [uniqCombined](#agg_function-uniqcombined) 功能。 + +**另请参阅** + +- [uniq](#agg_function-uniq) +- [uniqCombined](#agg_function-uniqcombined) +- [uniqExact](#agg_function-uniqexact) + +## uniqExact {#agg_function-uniqexact} + +计算不同参数值的准确数目。 + +``` sql +uniqExact(x[, ...]) +``` + +使用 `uniqExact` 功能,如果你绝对需要一个确切的结果。 否则使用 [uniq](#agg_function-uniq) 功能。 + +该 `uniqExact` 功能使用更多的内存比 `uniq`,因为状态的大小随着不同值的数量的增加而无界增长。 + +**参数** + +该函数采用可变数量的参数。 参数可以是 `Tuple`, `Array`, `Date`, `DateTime`, `String`,或数字类型。 + +**另请参阅** + +- [uniq](#agg_function-uniq) +- [uniqCombined](#agg_function-uniqcombined) +- [uniqHLL12](#agg_function-uniqhll12) + +## 群交(x),群交(max\_size)(x) {#agg_function-grouparray} + +创建参数值的数组。 +值可以按任何(不确定)顺序添加到数组中。 + +第二个版本(与 `max_size` 参数)将结果数组的大小限制为 `max_size` 元素。 +例如, `groupArray (1) (x)` 相当于 `[any (x)]`. + +在某些情况下,您仍然可以依靠执行的顺序。 这适用于以下情况 `SELECT` 来自使用 `ORDER BY`. + +## groupArrayInsertAt(值,位置) {#grouparrayinsertatvalue-position} + +将值插入到数组中的指定位置中。 + +!!! note "注" + 此函数使用从零开始的位置,与传统SQL数组的从一开始的位置相反。 + +Accepts the value and position as input. If several values ​​are inserted into the same position, any of them might end up in the resulting array (the first one will be used in the case of single-threaded execution). If no value is inserted into a position, the position is assigned the default value. + +可选参数: + +- 在空位置替换的默认值。 +- 生成数组的长度。 这允许您接收所有聚合键的相同大小的数组。 使用此参数时,必须指定默认值。 + +## groupArrayMovingSum {#agg_function-grouparraymovingsum} + +计算输入值的移动和。 + +``` sql +groupArrayMovingSum(numbers_for_summing) +groupArrayMovingSum(window_size)(numbers_for_summing) +``` + +该函数可以将窗口大小作为参数。 如果未指定,则该函数的窗口大小等于列中的行数。 + +**参数** + +- `numbers_for_summing` — [表达式](../syntax.md#syntax-expressions) 生成数值数据类型值。 +- `window_size` — Size of the calculation window. + +**返回值** + +- 与输入数据大小和类型相同的数组。 + +**示例** + +样品表: + +``` sql +CREATE TABLE t +( + `int` UInt8, + `float` Float32, + `dec` Decimal32(2) +) +ENGINE = TinyLog +``` + +``` text +┌─int─┬─float─┬──dec─┐ +│ 1 │ 1.1 │ 1.10 │ +│ 2 │ 2.2 │ 2.20 │ +│ 4 │ 4.4 │ 4.40 │ +│ 7 │ 7.77 │ 7.77 │ +└─────┴───────┴──────┘ +``` + +查询: + +``` sql +SELECT + groupArrayMovingSum(int) AS I, + groupArrayMovingSum(float) AS F, + groupArrayMovingSum(dec) AS D +FROM t +``` + +``` text +┌─I──────────┬─F───────────────────────────────┬─D──────────────────────┐ +│ [1,3,7,14] │ [1.1,3.3000002,7.7000003,15.47] │ [1.10,3.30,7.70,15.47] │ +└────────────┴─────────────────────────────────┴────────────────────────┘ +``` + +``` sql +SELECT + groupArrayMovingSum(2)(int) AS I, + groupArrayMovingSum(2)(float) AS F, + groupArrayMovingSum(2)(dec) AS D +FROM t +``` + +``` text +┌─I──────────┬─F───────────────────────────────┬─D──────────────────────┐ +│ [1,3,6,11] │ [1.1,3.3000002,6.6000004,12.17] │ [1.10,3.30,6.60,12.17] │ +└────────────┴─────────────────────────────────┴────────────────────────┘ +``` + +## groupArrayMovingAvg {#agg_function-grouparraymovingavg} + +计算输入值的移动平均值。 + +``` sql +groupArrayMovingAvg(numbers_for_summing) +groupArrayMovingAvg(window_size)(numbers_for_summing) +``` + +该函数可以将窗口大小作为参数。 如果未指定,则该函数的窗口大小等于列中的行数。 + +**参数** + +- `numbers_for_summing` — [表达式](../syntax.md#syntax-expressions) 生成数值数据类型值。 +- `window_size` — Size of the calculation window. + +**返回值** + +- 与输入数据大小和类型相同的数组。 + +该函数使用 [四舍五入到零](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). 它截断结果数据类型的小数位数。 + +**示例** + +样品表 `b`: + +``` sql +CREATE TABLE t +( + `int` UInt8, + `float` Float32, + `dec` Decimal32(2) +) +ENGINE = TinyLog +``` + +``` text +┌─int─┬─float─┬──dec─┐ +│ 1 │ 1.1 │ 1.10 │ +│ 2 │ 2.2 │ 2.20 │ +│ 4 │ 4.4 │ 4.40 │ +│ 7 │ 7.77 │ 7.77 │ +└─────┴───────┴──────┘ +``` + +查询: + +``` sql +SELECT + groupArrayMovingAvg(int) AS I, + groupArrayMovingAvg(float) AS F, + groupArrayMovingAvg(dec) AS D +FROM t +``` + +``` text +┌─I─────────┬─F───────────────────────────────────┬─D─────────────────────┐ +│ [0,0,1,3] │ [0.275,0.82500005,1.9250001,3.8675] │ [0.27,0.82,1.92,3.86] │ +└───────────┴─────────────────────────────────────┴───────────────────────┘ +``` + +``` sql +SELECT + groupArrayMovingAvg(2)(int) AS I, + groupArrayMovingAvg(2)(float) AS F, + groupArrayMovingAvg(2)(dec) AS D +FROM t +``` + +``` text +┌─I─────────┬─F────────────────────────────────┬─D─────────────────────┐ +│ [0,1,3,5] │ [0.55,1.6500001,3.3000002,6.085] │ [0.55,1.65,3.30,6.08] │ +└───────────┴──────────────────────────────────┴───────────────────────┘ +``` + +## 禄,赂麓ta脌麓,):脡,,拢脢,group媒group)galaxy s8碌胫脢)禄煤)酶脱脩) {#groupuniqarrayx-groupuniqarraymax-sizex} + +从不同的参数值创建一个数组。 内存消耗是一样的 `uniqExact` 功能。 + +第二个版本(与 `max_size` 参数)将结果数组的大小限制为 `max_size` 元素。 +例如, `groupUniqArray(1)(x)` 相当于 `[any(x)]`. + +## 分位数 {#quantile} + +计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 + +此功能适用 [油藏采样](https://en.wikipedia.org/wiki/Reservoir_sampling) 随着储存器大小高达8192和随机数发生器进行采样。 结果是非确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 + +当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 + +**语法** + +``` sql +quantile(level)(expr) +``` + +别名: `median`. + +**参数** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql_reference/data_types/index.md#data_types), [日期](../../sql_reference/data_types/date.md) 或 [日期时间](../../sql_reference/data_types/datetime.md). + +**返回值** + +- 指定电平的近似分位数。 + +类型: + +- [Float64](../../sql_reference/data_types/float.md) 对于数字数据类型输入。 +- [日期](../../sql_reference/data_types/date.md) 如果输入值具有 `Date` 类型。 +- [日期时间](../../sql_reference/data_types/datetime.md) 如果输入值具有 `DateTime` 类型。 + +**示例** + +输入表: + +``` text +┌─val─┐ +│ 1 │ +│ 1 │ +│ 2 │ +│ 3 │ +└─────┘ +``` + +查询: + +``` sql +SELECT quantile(val) FROM t +``` + +结果: + +``` text +┌─quantile(val)─┐ +│ 1.5 │ +└───────────────┘ +``` + +**另请参阅** + +- [中位数](#median) +- [分位数](#quantiles) + +## 量化确定 {#quantiledeterministic} + +计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 + +此功能适用 [油藏采样](https://en.wikipedia.org/wiki/Reservoir_sampling) 与储层大小高达8192和采样的确定性算法。 结果是确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 + +当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 + +**语法** + +``` sql +quantileDeterministic(level)(expr, determinator) +``` + +别名: `medianDeterministic`. + +**参数** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql_reference/data_types/index.md#data_types), [日期](../../sql_reference/data_types/date.md) 或 [日期时间](../../sql_reference/data_types/datetime.md). +- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly. + +**返回值** + +- 指定电平的近似分位数。 + +类型: + +- [Float64](../../sql_reference/data_types/float.md) 对于数字数据类型输入。 +- [日期](../../sql_reference/data_types/date.md) 如果输入值具有 `Date` 类型。 +- [日期时间](../../sql_reference/data_types/datetime.md) 如果输入值具有 `DateTime` 类型。 + +**示例** + +输入表: + +``` text +┌─val─┐ +│ 1 │ +│ 1 │ +│ 2 │ +│ 3 │ +└─────┘ +``` + +查询: + +``` sql +SELECT quantileDeterministic(val, 1) FROM t +``` + +结果: + +``` text +┌─quantileDeterministic(val, 1)─┐ +│ 1.5 │ +└───────────────────────────────┘ +``` + +**另请参阅** + +- [中位数](#median) +- [分位数](#quantiles) + +## quantileExact {#quantileexact} + +正是计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` 内存,其中 `n` 是传递的多个值。 然而,对于少量的值,该函数是非常有效的。 + +当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 + +**语法** + +``` sql +quantileExact(level)(expr) +``` + +别名: `medianExact`. + +**参数** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql_reference/data_types/index.md#data_types), [日期](../../sql_reference/data_types/date.md) 或 [日期时间](../../sql_reference/data_types/datetime.md). + +**返回值** + +- 指定电平的分位数。 + +类型: + +- [Float64](../../sql_reference/data_types/float.md) 对于数字数据类型输入。 +- [日期](../../sql_reference/data_types/date.md) 如果输入值具有 `Date` 类型。 +- [日期时间](../../sql_reference/data_types/datetime.md) 如果输入值具有 `DateTime` 类型。 + +**示例** + +查询: + +``` sql +SELECT quantileExact(number) FROM numbers(10) +``` + +结果: + +``` text +┌─quantileExact(number)─┐ +│ 5 │ +└───────────────────────┘ +``` + +**另请参阅** + +- [中位数](#median) +- [分位数](#quantiles) + +## 分位数加权 {#quantileexactweighted} + +正是计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 数值数据序列,考虑到每个元素的权重。 + +To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Each value is counted with its weight, as if it is present `weight` times. A hash table is used in the algorithm. Because of this, if the passed values ​​are frequently repeated, the function consumes less RAM than [quantileExact](#quantileexact). 您可以使用此功能,而不是 `quantileExact` 并指定重量1。 + +当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 + +**语法** + +``` sql +quantileExactWeighted(level)(expr, weight) +``` + +别名: `medianExactWeighted`. + +**参数** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql_reference/data_types/index.md#data_types), [日期](../../sql_reference/data_types/date.md) 或 [日期时间](../../sql_reference/data_types/datetime.md). +- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. + +**返回值** + +- 指定电平的分位数。 + +类型: + +- [Float64](../../sql_reference/data_types/float.md) 对于数字数据类型输入。 +- [日期](../../sql_reference/data_types/date.md) 如果输入值具有 `Date` 类型。 +- [日期时间](../../sql_reference/data_types/datetime.md) 如果输入值具有 `DateTime` 类型。 + +**示例** + +输入表: + +``` text +┌─n─┬─val─┐ +│ 0 │ 3 │ +│ 1 │ 2 │ +│ 2 │ 1 │ +│ 5 │ 4 │ +└───┴─────┘ +``` + +查询: + +``` sql +SELECT quantileExactWeighted(n, val) FROM t +``` + +结果: + +``` text +┌─quantileExactWeighted(n, val)─┐ +│ 1 │ +└───────────────────────────────┘ +``` + +**另请参阅** + +- [中位数](#median) +- [分位数](#quantiles) + +## 分位定时 {#quantiletiming} + +随着确定的精度计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 + +结果是确定性的(它不依赖于查询处理顺序)。 该函数针对描述加载网页时间或后端响应时间等分布的序列进行了优化。 + +当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 + +**语法** + +``` sql +quantileTiming(level)(expr) +``` + +别名: `medianTiming`. + +**参数** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). + +- `expr` — [表达式](../syntax.md#syntax-expressions) 在一个列值返回 [浮动\*](../../sql_reference/data_types/float.md)-键入号码。 + + - If negative values are passed to the function, the behavior is undefined. + - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + +**精度** + +计算是准确的,如果: + +- 值的总数不超过5670。 +- 总数值超过5670,但页面加载时间小于1024ms。 + +否则,计算结果将四舍五入到16毫秒的最接近倍数。 + +!!! note "注" + 对于计算页面加载时间分位数,此函数比 [分位数](#quantile). + +**返回值** + +- 指定电平的分位数。 + +类型: `Float32`. + +!!! note "注" + 如果没有值传递给函数(当使用 `quantileTimingIf`), [阿南](../../sql_reference/data_types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 看 [按条款订购](../statements/select.md#select-order-by) 对于排序注意事项 `NaN` 值。 + +**示例** + +输入表: + +``` text +┌─response_time─┐ +│ 72 │ +│ 112 │ +│ 126 │ +│ 145 │ +│ 104 │ +│ 242 │ +│ 313 │ +│ 168 │ +│ 108 │ +└───────────────┘ +``` + +查询: + +``` sql +SELECT quantileTiming(response_time) FROM t +``` + +结果: + +``` text +┌─quantileTiming(response_time)─┐ +│ 126 │ +└───────────────────────────────┘ +``` + +**另请参阅** + +- [中位数](#median) +- [分位数](#quantiles) + +## 分位时间加权 {#quantiletimingweighted} + +随着确定的精度计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 根据每个序列成员的权重对数字数据序列进行处理。 + +结果是确定性的(它不依赖于查询处理顺序)。 该函数针对描述加载网页时间或后端响应时间等分布的序列进行了优化。 + +当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 + +**语法** + +``` sql +quantileTimingWeighted(level)(expr, weight) +``` + +别名: `medianTimingWeighted`. + +**参数** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). + +- `expr` — [表达式](../syntax.md#syntax-expressions) 在一个列值返回 [浮动\*](../../sql_reference/data_types/float.md)-键入号码。 + + - If negative values are passed to the function, the behavior is undefined. + - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + +- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. + +**精度** + +计算是准确的,如果: + +- 值的总数不超过5670。 +- 总数值超过5670,但页面加载时间小于1024ms。 + +否则,计算结果将四舍五入到16毫秒的最接近倍数。 + +!!! note "注" + 对于计算页面加载时间分位数,此函数比 [分位数](#quantile). + +**返回值** + +- 指定电平的分位数。 + +类型: `Float32`. + +!!! note "注" + 如果没有值传递给函数(当使用 `quantileTimingIf`), [阿南](../../sql_reference/data_types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 看 [按条款订购](../statements/select.md#select-order-by) 对于排序注意事项 `NaN` 值。 + +**示例** + +输入表: + +``` text +┌─response_time─┬─weight─┐ +│ 68 │ 1 │ +│ 104 │ 2 │ +│ 112 │ 3 │ +│ 126 │ 2 │ +│ 138 │ 1 │ +│ 162 │ 1 │ +└───────────────┴────────┘ +``` + +查询: + +``` sql +SELECT quantileTimingWeighted(response_time, weight) FROM t +``` + +结果: + +``` text +┌─quantileTimingWeighted(response_time, weight)─┐ +│ 112 │ +└───────────────────────────────────────────────┘ +``` + +**另请参阅** + +- [中位数](#median) +- [分位数](#quantiles) + +## quantileTDigest {#quantiletdigest} + +计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 使用的数字数据序列 [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法。 + +最大误差为1%。 内存消耗 `log(n)`,哪里 `n` 是多个值。 结果取决于运行查询的顺序,并且是不确定的。 + +该功能的性能低于性能 [分位数](#quantile) 或 [分位定时](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`. + +当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 + +**语法** + +``` sql +quantileTDigest(level)(expr) +``` + +别名: `medianTDigest`. + +**参数** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql_reference/data_types/index.md#data_types), [日期](../../sql_reference/data_types/date.md) 或 [日期时间](../../sql_reference/data_types/datetime.md). + +**返回值** + +- 指定电平的近似分位数。 + +类型: + +- [Float64](../../sql_reference/data_types/float.md) 对于数字数据类型输入。 +- [日期](../../sql_reference/data_types/date.md) 如果输入值具有 `Date` 类型。 +- [日期时间](../../sql_reference/data_types/datetime.md) 如果输入值具有 `DateTime` 类型。 + +**示例** + +查询: + +``` sql +SELECT quantileTDigest(number) FROM numbers(10) +``` + +结果: + +``` text +┌─quantileTDigest(number)─┐ +│ 4.5 │ +└─────────────────────────┘ +``` + +**另请参阅** + +- [中位数](#median) +- [分位数](#quantiles) + +## quantileTDigestWeighted {#quantiletdigestweighted} + +计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 使用的数字数据序列 [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法。 该函数考虑了每个序列成员的权重。 最大误差为1%。 内存消耗 `log(n)`,哪里 `n` 是多个值。 + +该功能的性能低于性能 [分位数](#quantile) 或 [分位定时](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`. + +结果取决于运行查询的顺序,并且是不确定的。 + +当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 + +**语法** + +``` sql +quantileTDigest(level)(expr) +``` + +别名: `medianTDigest`. + +**参数** + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql_reference/data_types/index.md#data_types), [日期](../../sql_reference/data_types/date.md) 或 [日期时间](../../sql_reference/data_types/datetime.md). +- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. + +**返回值** + +- 指定电平的近似分位数。 + +类型: + +- [Float64](../../sql_reference/data_types/float.md) 对于数字数据类型输入。 +- [日期](../../sql_reference/data_types/date.md) 如果输入值具有 `Date` 类型。 +- [日期时间](../../sql_reference/data_types/datetime.md) 如果输入值具有 `DateTime` 类型。 + +**示例** + +查询: + +``` sql +SELECT quantileTDigestWeighted(number, 1) FROM numbers(10) +``` + +结果: + +``` text +┌─quantileTDigestWeighted(number, 1)─┐ +│ 4.5 │ +└────────────────────────────────────┘ +``` + +**另请参阅** + +- [中位数](#median) +- [分位数](#quantiles) + +## 中位数 {#median} + +该 `median*` 函数是相应的别名 `quantile*` 功能。 它们计算数字数据样本的中位数。 + +功能: + +- `median` — Alias for [分位数](#quantile). +- `medianDeterministic` — Alias for [量化确定](#quantiledeterministic). +- `medianExact` — Alias for [quantileExact](#quantileexact). +- `medianExactWeighted` — Alias for [分位数加权](#quantileexactweighted). +- `medianTiming` — Alias for [分位定时](#quantiletiming). +- `medianTimingWeighted` — Alias for [分位时间加权](#quantiletimingweighted). +- `medianTDigest` — Alias for [quantileTDigest](#quantiletdigest). +- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](#quantiletdigestweighted). + +**示例** + +输入表: + +``` text +┌─val─┐ +│ 1 │ +│ 1 │ +│ 2 │ +│ 3 │ +└─────┘ +``` + +查询: + +``` sql +SELECT medianDeterministic(val, 1) FROM t +``` + +结果: + +``` text +┌─medianDeterministic(val, 1)─┐ +│ 1.5 │ +└─────────────────────────────┘ +``` + +## quantiles(level1, level2, …)(x) {#quantiles} + +所有分位数函数也具有相应的分位数函数: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. 这些函数在一遍中计算所列电平的所有分位数,并返回结果值的数组。 + +## varSamp(x) {#varsampx} + +计算金额 `Σ((x - x̅)^2) / (n - 1)`,哪里 `n` 是样本大小和 `x̅`是平均值 `x`. + +它表示随机变量的方差的无偏估计,如果传递的值形成其样本。 + +返回 `Float64`. 当 `n <= 1`,返回 `+∞`. + +## varPop(x) {#varpopx} + +计算金额 `Σ((x - x̅)^2) / n`,哪里 `n` 是样本大小和 `x̅`是平均值 `x`. + +换句话说,分散为一组值。 返回 `Float64`. + +## stddevSamp(x) {#stddevsampx} + +结果等于平方根 `varSamp(x)`. + +## stddevPop(x) {#stddevpopx} + +结果等于平方根 `varPop(x)`. + +## topK(N)(x) {#topknx} + +返回指定列中近似最常见值的数组。 生成的数组按值的近似频率降序排序(而不是值本身)。 + +实现了 [过滤节省空间](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) 基于reduce-and-combine算法的TopK分析算法 [并行节省空间](https://arxiv.org/pdf/1401.0702.pdf). + +``` sql +topK(N)(column) +``` + +此函数不提供保证的结果。 在某些情况下,可能会发生错误,并且可能会返回不是最常见值的常见值。 + +我们建议使用 `N < 10` 值;性能降低了大 `N` 值。 的最大值 `N = 65536`. + +**参数** + +- ‘N’ 是要返回的元素数。 + +如果省略该参数,则使用默认值10。 + +**参数** + +- ' x ' – The value to calculate frequency. + +**示例** + +就拿 [时间](../../getting_started/example_datasets/ontime.md) 数据集,并选择在三个最频繁出现的值 `AirlineID` 列。 + +``` sql +SELECT topK(3)(AirlineID) AS res +FROM ontime +``` + +``` text +┌─res─────────────────┐ +│ [19393,19790,19805] │ +└─────────────────────┘ +``` + +## topKWeighted {#topkweighted} + +类似于 `topK` 但需要一个整数类型的附加参数 - `weight`. 每一价值是占 `weight` 次频率计算。 + +**语法** + +``` sql +topKWeighted(N)(x, weight) +``` + +**参数** + +- `N` — The number of elements to return. + +**参数** + +- `x` – The value. +- `weight` — The weight. [UInt8](../../sql_reference/data_types/int_uint.md). + +**返回值** + +返回具有最大近似权重总和的值数组。 + +**示例** + +查询: + +``` sql +SELECT topKWeighted(10)(number, number) FROM numbers(1000) +``` + +结果: + +``` text +┌─topKWeighted(10)(number, number)──────────┐ +│ [999,998,997,996,995,994,993,992,991,990] │ +└───────────────────────────────────────────┘ +``` + +## covarSamp(x,y) {#covarsampx-y} + +计算的值 `Σ((x - x̅)(y - y̅)) / (n - 1)`. + +返回Float64。 当 `n <= 1`, returns +∞. + +## covarPop(x,y) {#covarpopx-y} + +计算的值 `Σ((x - x̅)(y - y̅)) / n`. + +## corr(x,y) {#corrx-y} + +计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. + +## categoricalInformationValue {#categoricalinformationvalue} + +计算的值 `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` 对于每个类别。 + +``` sql +categoricalInformationValue(category1, category2, ..., tag) +``` + +结果指示离散(分类)要素如何使用 `[category1, category2, ...]` 有助于预测的价值的学习模型 `tag`. + +## simpleLinearRegression {#simplelinearregression} + +执行简单(一维)线性回归。 + +``` sql +simpleLinearRegression(x, y) +``` + +参数: + +- `x` — Column with dependent variable values. +- `y` — Column with explanatory variable values. + +返回值: + +常量 `(a, b)` 结果行的 `y = a*x + b`. + +**例** + +``` sql +SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [0, 1, 2, 3]) +``` + +``` text +┌─arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [0, 1, 2, 3])─┐ +│ (1,0) │ +└───────────────────────────────────────────────────────────────────┘ +``` + +``` sql +SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) +``` + +``` text +┌─arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6])─┐ +│ (1,3) │ +└───────────────────────────────────────────────────────────────────┘ +``` + +## 随机指标线上回归 {#agg_functions-stochasticlinearregression} + +该函数实现随机线性回归。 它支持自定义参数的学习率,L2正则化系数,迷你批量大小,并具有更新权重的方法很少 ([亚当](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (默认使用), [简单SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [动量](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). + +### 参数 {#agg_functions-stochasticlinearregression-parameters} + +有4个可自定义的参数。 它们按顺序传递给函数,但是没有必要传递所有四个默认值将被使用,但是好的模型需要一些参数调整。 + +``` text +stochasticLinearRegression(1.0, 1.0, 10, 'SGD') +``` + +1. `learning rate` 当执行梯度下降步骤时,步长上的系数。 过大的学习率可能会导致模型的权重无限大。 默认值为 `0.00001`. +2. `l2 regularization coefficient` 这可能有助于防止过度拟合。 默认值为 `0.1`. +3. `mini-batch size` 设置元素的数量,这些元素将被计算和求和以执行梯度下降的一个步骤。 纯随机下降使用一个元素,但是具有小批量(约10个元素)使梯度步骤更稳定。 默认值为 `15`. +4. `method for updating weights` 他们是: `Adam` (默认情况下), `SGD`, `Momentum`, `Nesterov`. `Momentum` 和 `Nesterov` 需要更多的计算和内存,但是它们恰好在收敛速度和随机梯度方法的稳定性方面是有用的。 + +### 用途 {#agg_functions-stochasticlinearregression-usage} + +`stochasticLinearRegression` 用于两个步骤:拟合模型和预测新数据。 为了拟合模型并保存其状态以供以后使用,我们使用 `-State` combinator,它基本上保存了状态(模型权重等)。 +为了预测我们使用函数 [evalMLMethod](../functions/machine_learning_functions.md#machine_learning_methods-evalmlmethod),这需要一个状态作为参数以及特征来预测。 + + + +**1.** 适合 + +可以使用这种查询。 + +``` sql +CREATE TABLE IF NOT EXISTS train_data +( + param1 Float64, + param2 Float64, + target Float64 +) ENGINE = Memory; + +CREATE TABLE your_model ENGINE = Memory AS SELECT +stochasticLinearRegressionState(0.1, 0.0, 5, 'SGD')(target, param1, param2) +AS state FROM train_data; +``` + +在这里,我们还需要将数据插入到 `train_data` 桌子 参数的数量不是固定的,它只取决于参数的数量,传递到 `linearRegressionState`. 它们都必须是数值。 +请注意,带有目标值的列(我们想要学习预测)被插入作为第一个参数。 + +**2.** 预测 + +在将状态保存到表中之后,我们可以多次使用它进行预测,甚至与其他状态合并并创建新的更好的模型。 + +``` sql +WITH (SELECT state FROM your_model) AS model SELECT +evalMLMethod(model, param1, param2) FROM test_data +``` + +查询将返回一列预测值。 请注意,第一个参数 `evalMLMethod` 是 `AggregateFunctionState` 对象,接下来是要素列。 + +`test_data` 是一个像表 `train_data` 但可能不包含目标值。 + +### 注 {#agg_functions-stochasticlinearregression-notes} + +1. 要合并两个模型,用户可以创建这样的查询: + `sql SELECT state1 + state2 FROM your_models` + 哪里 `your_models` 表包含这两个模型。 此查询将返回new `AggregateFunctionState` 对象。 + +2. 如果没有,用户可以获取创建的模型的权重用于自己的目的,而不保存模型 `-State` 使用combinator。 + `sql SELECT stochasticLinearRegression(0.01)(target, param1, param2) FROM train_data` + 这种查询将拟合模型并返回其权重-首先是权重,它对应于模型的参数,最后一个是偏差。 所以在上面的例子中,查询将返回一个具有3个值的列。 + +**另请参阅** + +- [stochasticLogisticRegression](#agg_functions-stochasticlogisticregression) +- [线性回归和逻辑回归之间的区别](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) + +## stochasticLogisticRegression {#agg_functions-stochasticlogisticregression} + +该函数实现随机逻辑回归。 它可以用于二进制分类问题,支持与stochasticLinearRegression相同的自定义参数,并以相同的方式工作。 + +### 参数 {#agg_functions-stochasticlogisticregression-parameters} + +参数与stochasticLinearRegression中的参数完全相同: +`learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`. +欲了解更多信息,请参阅 [参数](#agg_functions-stochasticlinearregression-parameters). + +``` text +stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') +``` + +1. 适合 + + + + See the `Fitting` section in the [stochasticLinearRegression](#stochasticlinearregression-usage-fitting) description. + + Predicted labels have to be in \[-1, 1\]. + +1. 预测 + + + + Using saved state we can predict probability of object having label `1`. + + ``` sql + WITH (SELECT state FROM your_model) AS model SELECT + evalMLMethod(model, param1, param2) FROM test_data + ``` + + The query will return a column of probabilities. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features. + + We can also set a bound of probability, which assigns elements to different labels. + + ``` sql + SELECT ans < 1.1 AND ans > 0.5 FROM + (WITH (SELECT state FROM your_model) AS model SELECT + evalMLMethod(model, param1, param2) AS ans FROM test_data) + ``` + + Then the result will be labels. + + `test_data` is a table like `train_data` but may not contain target value. + +**另请参阅** + +- [随机指标线上回归](#agg_functions-stochasticlinearregression) +- [线性回归和逻辑回归之间的差异。](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) + +## groupBitmapAnd {#groupbitmapand} + +计算位图列的AND,返回UInt64类型的基数,如果添加后缀状态,则返回 [位图对象](../../sql_reference/functions/bitmap_functions.md). + +``` sql +groupBitmapAnd(expr) +``` + +**参数** + +`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` 类型。 + +**返回值** + +的价值 `UInt64` 类型。 + +**示例** + +``` sql +DROP TABLE IF EXISTS bitmap_column_expr_test2; +CREATE TABLE bitmap_column_expr_test2 +( + tag_id String, + z AggregateFunction(groupBitmap, UInt32) +) +ENGINE = MergeTree +ORDER BY tag_id; + +INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); + +SELECT groupBitmapAnd(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─groupBitmapAnd(z)─┐ +│ 3 │ +└───────────────────┘ + +SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─arraySort(bitmapToArray(groupBitmapAndState(z)))─┐ +│ [6,8,10] │ +└──────────────────────────────────────────────────┘ +``` + +## groupBitmapOr {#groupbitmapor} + +计算位图列的OR,返回UInt64类型的基数,如果添加后缀状态,则返回 [位图对象](../../sql_reference/functions/bitmap_functions.md). 这相当于 `groupBitmapMerge`. + +``` sql +groupBitmapOr(expr) +``` + +**参数** + +`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` 类型。 + +**返回值** + +的价值 `UInt64` 类型。 + +**示例** + +``` sql +DROP TABLE IF EXISTS bitmap_column_expr_test2; +CREATE TABLE bitmap_column_expr_test2 +( + tag_id String, + z AggregateFunction(groupBitmap, UInt32) +) +ENGINE = MergeTree +ORDER BY tag_id; + +INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); + +SELECT groupBitmapOr(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─groupBitmapOr(z)─┐ +│ 15 │ +└──────────────────┘ + +SELECT arraySort(bitmapToArray(groupBitmapOrState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─arraySort(bitmapToArray(groupBitmapOrState(z)))─┐ +│ [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] │ +└─────────────────────────────────────────────────┘ +``` + +## groupBitmapXor {#groupbitmapxor} + +计算位图列的XOR,返回UInt64类型的基数,如果添加后缀状态,则返回 [位图对象](../../sql_reference/functions/bitmap_functions.md). + +``` sql +groupBitmapOr(expr) +``` + +**参数** + +`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` 类型。 + +**返回值** + +的价值 `UInt64` 类型。 + +**示例** + +``` sql +DROP TABLE IF EXISTS bitmap_column_expr_test2; +CREATE TABLE bitmap_column_expr_test2 +( + tag_id String, + z AggregateFunction(groupBitmap, UInt32) +) +ENGINE = MergeTree +ORDER BY tag_id; + +INSERT INTO bitmap_column_expr_test2 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32)))); +INSERT INTO bitmap_column_expr_test2 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32)))); + +SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─groupBitmapXor(z)─┐ +│ 10 │ +└───────────────────┘ + +SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2 WHERE like(tag_id, 'tag%'); +┌─arraySort(bitmapToArray(groupBitmapXorState(z)))─┐ +│ [1,3,5,6,8,10,11,13,14,15] │ +└──────────────────────────────────────────────────┘ +``` + +[原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/reference/) diff --git a/docs/zh/data_types/nested_data_structures/aggregatefunction.md b/docs/zh/sql_reference/data_types/aggregatefunction.md similarity index 82% rename from docs/zh/data_types/nested_data_structures/aggregatefunction.md rename to docs/zh/sql_reference/data_types/aggregatefunction.md index 3153150d2bd..e1fb7b1d133 100644 --- a/docs/zh/data_types/nested_data_structures/aggregatefunction.md +++ b/docs/zh/sql_reference/data_types/aggregatefunction.md @@ -1,3 +1,4 @@ + # AggregateFunction(name, types\_of\_arguments…) {#data-type-aggregatefunction} 聚合函数的中间状态,可以通过聚合函数名称加`-State`后缀的形式得到它。与此同时,当您需要访问该类型的最终状态数据时,您需要以相同的聚合函数名加`-Merge`后缀的形式来得到最终状态数据。 @@ -23,7 +24,7 @@ CREATE TABLE t ) ENGINE = ... ``` -上述中的[uniq](../../query_language/agg_functions/reference.md#agg_function-uniq), anyIf ([any](../../query_language/agg_functions/reference.md#agg_function-any)+[If](../../query_language/agg_functions/combinators.md#agg-functions-combinator-if)) 以及 [quantiles](../../query_language/agg_functions/reference.md) 都为ClickHouse中支持的聚合函数。 +上述中的[uniq](../../sql_reference/data_types/aggregatefunction.md#agg_function-uniq), anyIf ([任何](../../sql_reference/data_types/aggregatefunction.md#agg_function-any)+[如果](../../sql_reference/data_types/aggregatefunction.md#agg-functions-combinator-if)) 以及 [分位数](../../sql_reference/data_types/aggregatefunction.md) 都为ClickHouse中支持的聚合函数。 ## 使用指南 {#shi-yong-zhi-nan} @@ -58,6 +59,6 @@ SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP ## 使用示例 {#shi-yong-shi-li} -请参阅 [AggregatingMergeTree](../../operations/table_engines/aggregatingmergetree.md) 的说明 +请参阅 [AggregatingMergeTree](../../sql_reference/data_types/aggregatefunction.md) 的说明 [来源文章](https://clickhouse.tech/docs/en/data_types/nested_data_structures/aggregatefunction/) diff --git a/docs/zh/data_types/array.md b/docs/zh/sql_reference/data_types/array.md similarity index 91% rename from docs/zh/data_types/array.md rename to docs/zh/sql_reference/data_types/array.md index 774210b0d29..7a35647d20e 100644 --- a/docs/zh/data_types/array.md +++ b/docs/zh/sql_reference/data_types/array.md @@ -1,4 +1,5 @@ -# Array(T) {#data-type-array} + +# 阵列(T) {#data-type-array} 由 `T` 类型元素组成的数组。 @@ -42,7 +43,7 @@ ## 使用数据类型 {#shi-yong-shu-ju-lei-xing} -ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素最小的数据类型。如果在元素中存在 [NULL](../query_language/syntax.md#null-literal) 或存在 [Nullable](nullable.md#data_type-nullable) 类型元素,那么数组的元素类型将会变成 [Nullable](nullable.md)。 +ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素最小的数据类型。如果在元素中存在 [NULL](../../sql_reference/data_types/array.md#null-literal) 或存在 [可为空](nullable.md#data_type-nullable) 类型元素,那么数组的元素类型将会变成 [可为空](nullable.md)。 如果 ClickHouse 无法确定数据类型,它将产生异常。当尝试同时创建一个包含字符串和数字的数组时会发生这种情况 (`SELECT array(1, 'a')`)。 diff --git a/docs/zh/data_types/boolean.md b/docs/zh/sql_reference/data_types/boolean.md similarity index 73% rename from docs/zh/data_types/boolean.md rename to docs/zh/sql_reference/data_types/boolean.md index 1918bb1c56b..26c8ac5cdd5 100644 --- a/docs/zh/data_types/boolean.md +++ b/docs/zh/sql_reference/data_types/boolean.md @@ -1,3 +1,4 @@ -# Boolean Values {#boolean-values} + +# 布尔值 {#boolean-values} 没有单独的类型来存储布尔值。可以使用 UInt8 类型,取值限制为 0 或 1。 diff --git a/docs/zh/data_types/date.md b/docs/zh/sql_reference/data_types/date.md similarity index 94% rename from docs/zh/data_types/date.md rename to docs/zh/sql_reference/data_types/date.md index 96ee60d53a8..18bdb507f37 100644 --- a/docs/zh/data_types/date.md +++ b/docs/zh/sql_reference/data_types/date.md @@ -1,4 +1,5 @@ -# Date {#date} + +# 日期 {#date} 日期类型,用两个字节存储,表示从 1970-01-01 (无符号) 到当前的日期值。允许存储从 Unix 纪元开始到编译阶段定义的上限阈值常量(目前上限是2106年,但最终完全支持的年份为2105)。最小值输出为0000-00-00。 diff --git a/docs/zh/data_types/datetime.md b/docs/zh/sql_reference/data_types/datetime.md similarity index 95% rename from docs/zh/data_types/datetime.md rename to docs/zh/sql_reference/data_types/datetime.md index 50c5964360c..1122131614b 100644 --- a/docs/zh/data_types/datetime.md +++ b/docs/zh/sql_reference/data_types/datetime.md @@ -1,4 +1,5 @@ -# DateTime {#data_type-datetime} + +# 日期时间 {#data_type-datetime} 时间戳类型。用四个字节(无符号的)存储 Unix 时间戳)。允许存储与日期类型相同的范围内的值。最小值为 0000-00-00 00:00:00。时间戳类型值精确到秒(没有闰秒)。 diff --git a/docs/zh/sql_reference/data_types/datetime64.md b/docs/zh/sql_reference/data_types/datetime64.md new file mode 100644 index 00000000000..dd87486cee5 --- /dev/null +++ b/docs/zh/sql_reference/data_types/datetime64.md @@ -0,0 +1,104 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 49 +toc_title: DateTime64 +--- + +# Datetime64 {#data_type-datetime64} + +允许存储时间instant间,可以表示为日历日期和一天中的时间,具有定义的亚秒精度 + +刻度尺寸(精度):10-精度 秒 + +语法: + +``` sql +DateTime64(precision, [timezone]) +``` + +在内部,存储数据作为一些 ‘ticks’ 自纪元开始(1970-01-01 00:00:00UTC)作为Int64. 刻度分辨率由precision参数确定。 此外,该 `DateTime64` 类型可以存储时区是相同的整个列,影响如何的值 `DateTime64` 类型值以文本格式显示,以及如何解析指定为字符串的值 (‘2020-01-01 05:00:01.000’). 时区不存储在表的行中(或resultset中),而是存储在列元数据中。 查看详细信息 [日期时间](datetime.md). + +## 例 {#examples} + +**1.** 创建一个表 `DateTime64`-输入列并将数据插入其中: + +``` sql +CREATE TABLE dt +( + `timestamp` DateTime64(3, 'Europe/Moscow'), + `event_id` UInt8 +) +ENGINE = TinyLog +``` + +``` sql +INSERT INTO dt Values (1546300800000, 1), ('2019-01-01 00:00:00', 2) +``` + +``` sql +SELECT * FROM dt +``` + +``` text +┌───────────────timestamp─┬─event_id─┐ +│ 2019-01-01 03:00:00.000 │ 1 │ +│ 2019-01-01 00:00:00.000 │ 2 │ +└─────────────────────────┴──────────┘ +``` + +- 将日期时间作为整数插入时,将其视为适当缩放的Unix时间戳(UTC)。 `1546300800000` (精度为3)表示 `'2019-01-01 00:00:00'` UTC. 然而,作为 `timestamp` 列有 `Europe/Moscow` (UTC+3)指定的时区,当输出为字符串时,该值将显示为 `'2019-01-01 03:00:00'` +- 当插入字符串值作为日期时间时,它被视为处于列时区。 `'2019-01-01 00:00:00'` 将被视为 `Europe/Moscow` 时区并存储为 `1546290000000`. + +**2.** 过滤 `DateTime64` 值 + +``` sql +SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow') +``` + +``` text +┌───────────────timestamp─┬─event_id─┐ +│ 2019-01-01 00:00:00.000 │ 2 │ +└─────────────────────────┴──────────┘ +``` + +不像 `DateTime`, `DateTime64` 值不转换为 `String` 自动 + +**3.** 获取一个时区 `DateTime64`-类型值: + +``` sql +SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS x +``` + +``` text +┌──────────────────column─┬─x──────────────────────────────┐ +│ 2019-10-16 04:12:04.000 │ DateTime64(3, 'Europe/Moscow') │ +└─────────────────────────┴────────────────────────────────┘ +``` + +**4.** 时区转换 + +``` sql +SELECT +toDateTime64(timestamp, 3, 'Europe/London') as lon_time, +toDateTime64(timestamp, 3, 'Europe/Moscow') as mos_time +FROM dt +``` + +``` text +┌───────────────lon_time──┬────────────────mos_time─┐ +│ 2019-01-01 00:00:00.000 │ 2019-01-01 03:00:00.000 │ +│ 2018-12-31 21:00:00.000 │ 2019-01-01 00:00:00.000 │ +└─────────────────────────┴─────────────────────────┘ +``` + +## 另请参阅 {#see-also} + +- [类型转换函数](../../sql_reference/functions/type_conversion_functions.md) +- [用于处理日期和时间的函数](../../sql_reference/functions/date_time_functions.md) +- [用于处理数组的函数](../../sql_reference/functions/array_functions.md) +- [该 `date_time_input_format` 设置](../../operations/settings/settings.md#settings-date_time_input_format) +- [该 `timezone` 服务器配置参数](../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-timezone) +- [使用日期和时间的操作员](../../sql_reference/operators.md#operators-datetime) +- [`Date` 数据类型](date.md) +- [`DateTime` 数据类型](datetime.md) diff --git a/docs/zh/data_types/decimal.md b/docs/zh/sql_reference/data_types/decimal.md similarity index 85% rename from docs/zh/data_types/decimal.md rename to docs/zh/sql_reference/data_types/decimal.md index 1503da90d89..24bc1f70415 100644 --- a/docs/zh/data_types/decimal.md +++ b/docs/zh/sql_reference/data_types/decimal.md @@ -1,4 +1,5 @@ -# Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S) {#decimalp-s-decimal32s-decimal64s-decimal128s} + +# Decimal(P,S),Decimal32(S),Decimal64(S),Decimal128(S) {#decimalp-s-decimal32s-decimal64s-decimal128s} 有符号的定点数,可在加、减和乘法运算过程中保持精度。对于除法,最低有效数字会被丢弃(不舍入)。 @@ -8,15 +9,15 @@ - S - 规模。有效范围:\[0:P\],决定数字的小数部分中包含的小数位数。 对于不同的 P 参数值 Decimal 表示,以下例子都是同义的: -- P from \[ 1 : 9 \] - for Decimal32(S) -- P from \[ 10 : 18 \] - for Decimal64(S) -- P from \[ 19 : 38 \] - for Decimal128(S) +-P从\[1:9\]-对于Decimal32(S) +-P从\[10:18\]-对于Decimal64(小号) +-P从\[19:38\]-对于Decimal128(S) ## 十进制值范围 {#shi-jin-zhi-zhi-fan-wei} -- Decimal32(S) - ( -1 \* 10^(9 - S), 1 \* 10^(9 - S) ) -- Decimal64(S) - ( -1 \* 10^(18 - S), 1 \* 10^(18 - S) ) -- Decimal128(S) - ( -1 \* 10^(38 - S), 1 \* 10^(38 - S) ) +- Decimal32(S) - ( -1 \* 10^(9 - S),1\*10^(9-S) ) +- Decimal64(S) - ( -1 \* 10^(18 - S),1\*10^(18-S) ) +- Decimal128(S) - ( -1 \* 10^(38 - S),1\*10^(38-S) ) 例如,Decimal32(4) 可以表示 -99999.9999 至 99999.9999 的数值,步长为0.0001。 @@ -30,9 +31,9 @@ 对Decimal的二进制运算导致更宽的结果类型(无论参数的顺序如何)。 -- Decimal64(S1) Decimal32(S2) -\> Decimal64(S) -- Decimal128(S1) Decimal32(S2) -\> Decimal128(S) -- Decimal128(S1) Decimal64(S2) -\> Decimal128(S) +- Decimal64(S1) Decimal32(S2)-\>Decimal64(S) +- Decimal128(S1) Decimal32(S2)-\>Decimal128(S) +- Decimal128(S1) Decimal64(S2)-\>Decimal128(S) 精度变化的规则: diff --git a/docs/zh/sql_reference/data_types/domains/index.md b/docs/zh/sql_reference/data_types/domains/index.md new file mode 100644 index 00000000000..7df13d51e54 --- /dev/null +++ b/docs/zh/sql_reference/data_types/domains/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u57DF" +toc_priority: 56 +--- + + diff --git a/docs/zh/data_types/domains/ipv4.md b/docs/zh/sql_reference/data_types/domains/ipv4.md similarity index 99% rename from docs/zh/data_types/domains/ipv4.md rename to docs/zh/sql_reference/data_types/domains/ipv4.md index 65c066fb487..26ed4d84922 100644 --- a/docs/zh/data_types/domains/ipv4.md +++ b/docs/zh/sql_reference/data_types/domains/ipv4.md @@ -1,3 +1,4 @@ + ## IPv4 {#ipv4} `IPv4`是与`UInt32`类型保持二进制兼容的Domain类型,其用于存储IPv4地址的值。它提供了更为紧凑的二进制存储的同时支持识别可读性更加友好的输入输出格式。 diff --git a/docs/zh/data_types/domains/ipv6.md b/docs/zh/sql_reference/data_types/domains/ipv6.md similarity index 99% rename from docs/zh/data_types/domains/ipv6.md rename to docs/zh/sql_reference/data_types/domains/ipv6.md index bc0f95932aa..b147fb6db84 100644 --- a/docs/zh/data_types/domains/ipv6.md +++ b/docs/zh/sql_reference/data_types/domains/ipv6.md @@ -1,3 +1,4 @@ + ## IPv6 {#ipv6} `IPv6`是与`FixedString(16)`类型保持二进制兼容的Domain类型,其用于存储IPv6地址的值。它提供了更为紧凑的二进制存储的同时支持识别可读性更加友好的输入输出格式。 diff --git a/docs/zh/data_types/domains/overview.md b/docs/zh/sql_reference/data_types/domains/overview.md similarity index 98% rename from docs/zh/data_types/domains/overview.md rename to docs/zh/sql_reference/data_types/domains/overview.md index 6c59860132e..b330bad18c0 100644 --- a/docs/zh/data_types/domains/overview.md +++ b/docs/zh/sql_reference/data_types/domains/overview.md @@ -1,4 +1,5 @@ -# Domains {#domains} + +# 域 {#domains} Domain类型是特定实现的类型,它总是与某个现存的基础类型保持二进制兼容的同时添加一些额外的特性,以能够在维持磁盘数据不变的情况下使用这些额外的特性。目前ClickHouse暂不支持自定义domain类型。 diff --git a/docs/zh/data_types/enum.md b/docs/zh/sql_reference/data_types/enum.md similarity index 95% rename from docs/zh/data_types/enum.md rename to docs/zh/sql_reference/data_types/enum.md index 034406a303b..87ada143638 100644 --- a/docs/zh/data_types/enum.md +++ b/docs/zh/sql_reference/data_types/enum.md @@ -1,4 +1,5 @@ -# Enum8, Enum16 {#enum8-enum16} + +# Enum8,Enum16 {#enum8-enum16} 包括 `Enum8` 和 `Enum16` 类型。`Enum` 保存 `'string'= integer` 的对应关系。在 ClickHouse 中,尽管用户使用的是字符串常量,但所有含有 `Enum` 数据类型的操作都是按照包含整数的值来执行。这在性能方面比使用 `String` 数据类型更有效。 @@ -65,9 +66,9 @@ `Enum8` 类型的每个值范围是 `-128 ... 127`,`Enum16` 类型的每个值范围是 `-32768 ... 32767`。所有的字符串或者数字都必须是不一样的。允许存在空字符串。如果某个 Enum 类型被指定了(在表定义的时候),数字可以是任意顺序。然而,顺序并不重要。 -`Enum` 中的字符串和数值都不能是 [NULL](../query_language/syntax.md)。 +`Enum` 中的字符串和数值都不能是 [NULL](../../sql_reference/data_types/enum.md)。 -`Enum` 包含在 [Nullable](nullable.md) 类型中。因此,如果您使用此查询创建一个表 +`Enum` 包含在 [可为空](nullable.md) 类型中。因此,如果您使用此查询创建一个表 CREATE TABLE t_enum_nullable ( diff --git a/docs/zh/data_types/fixedstring.md b/docs/zh/sql_reference/data_types/fixedstring.md similarity index 82% rename from docs/zh/data_types/fixedstring.md rename to docs/zh/sql_reference/data_types/fixedstring.md index 27945b74fc8..c8e71e69303 100644 --- a/docs/zh/data_types/fixedstring.md +++ b/docs/zh/sql_reference/data_types/fixedstring.md @@ -1,4 +1,5 @@ -# FixedString {#fixedstring} + +# 固定字符串 {#fixedstring} 固定长度 N 的字符串(N 必须是严格的正自然数)。 @@ -23,7 +24,7 @@ 当向ClickHouse中插入数据时, -- 如果字符串包含的字节数少于\`N’,将对字符串末尾进行空字节填充。 +- 如果字符串包含的字节数少于\`N',将对字符串末尾进行空字节填充。 - 如果字符串包含的字节数大于`N`,将抛出`Too large value for FixedString(N)`异常。 当做数据查询时,ClickHouse不会删除字符串末尾的空字节。 如果使用`WHERE`子句,则须要手动添加空字节以匹配`FixedString`的值。 以下示例阐明了如何将`WHERE`子句与`FixedString`一起使用。 @@ -51,6 +52,6 @@ WHERE a = 'b\0' 这种方式与MySQL的`CHAR`类型的方式不同(MySQL中使用空格填充字符串,并在输出时删除空格)。 -请注意,`FixedString(N)`的长度是个常量。仅由空字符组成的字符串,函数[length](../query_language/functions/array_functions.md#array_functions-length)返回值为`N`,而函数[empty](../query_language/functions/string_functions.md#string_functions-empty)的返回值为`1`。 +请注意,`FixedString(N)`的长度是个常量。仅由空字符组成的字符串,函数[长度](../../sql_reference/data_types/fixedstring.md#array_functions-length)返回值为`N`,而函数[空](../../sql_reference/data_types/fixedstring.md#string_functions-empty)的返回值为`1`。 [来源文章](https://clickhouse.tech/docs/en/data_types/fixedstring/) diff --git a/docs/zh/data_types/float.md b/docs/zh/sql_reference/data_types/float.md similarity index 89% rename from docs/zh/data_types/float.md rename to docs/zh/sql_reference/data_types/float.md index f43000ffa35..bdc8093a9a9 100644 --- a/docs/zh/data_types/float.md +++ b/docs/zh/sql_reference/data_types/float.md @@ -1,4 +1,5 @@ -# Float32, Float64 {#float32-float64} + +# Float32,Float64 {#float32-float64} [浮点数](https://en.wikipedia.org/wiki/IEEE_754)。 @@ -29,7 +30,7 @@ SELECT 1 - 0.9 - 当一行行阅读浮点数的时候,浮点数的结果可能不是机器最近显示的数值。 -## NaN and Inf {#data_type-float-nan-inf} +## 南和Inf {#data_type-float-nan-inf} 与标准SQL相比,ClickHouse 支持以下类别的浮点数: @@ -67,4 +68,4 @@ SELECT -0.5 / 0 │ nan │ └──────────────┘ -可以在 [ORDER BY 子句](../query_language/select.md) 查看更多关于 `NaN` 排序的规则。 +可以在 [ORDER BY 子句](../../sql_reference/data_types/float.md) 查看更多关于 `NaN` 排序的规则。 diff --git a/docs/zh/data_types/index.md b/docs/zh/sql_reference/data_types/index.md similarity index 99% rename from docs/zh/data_types/index.md rename to docs/zh/sql_reference/data_types/index.md index 70aa976cb11..8df3911ab36 100644 --- a/docs/zh/data_types/index.md +++ b/docs/zh/sql_reference/data_types/index.md @@ -1,3 +1,4 @@ + # 数据类型 {#data_types} ClickHouse 可以在数据表中存储多种数据类型。 diff --git a/docs/zh/sql_reference/data_types/int_uint.md b/docs/zh/sql_reference/data_types/int_uint.md new file mode 100644 index 00000000000..b74bbcf178f --- /dev/null +++ b/docs/zh/sql_reference/data_types/int_uint.md @@ -0,0 +1,18 @@ + +# UInt8,UInt16,UInt32,UInt64,Int8,Int16,Int32,Int64 {#uint8-uint16-uint32-uint64-int8-int16-int32-int64} + +固定长度的整型,包括有符号整型或无符号整型。 + +## 整型范围 {#zheng-xing-fan-wei} + +- Int8-\[-128:127\] +- Int16-\[-32768:32767\] +- Int32-\[-2147483648:2147483647\] +- Int64-\[-9223372036854775808:9223372036854775807\] + +## 无符号整型范围 {#wu-fu-hao-zheng-xing-fan-wei} + +- UInt8-\[0:255\] +- UInt16-\[0:65535\] +- UInt32-\[0:4294967295\] +- UInt64-\[0:18446744073709551615\] diff --git a/docs/zh/data_types/nested_data_structures/index.md b/docs/zh/sql_reference/data_types/nested_data_structures/index.md similarity index 97% rename from docs/zh/data_types/nested_data_structures/index.md rename to docs/zh/sql_reference/data_types/nested_data_structures/index.md index 3914064674e..fdeb9fe6ac5 100644 --- a/docs/zh/data_types/nested_data_structures/index.md +++ b/docs/zh/sql_reference/data_types/nested_data_structures/index.md @@ -1 +1,2 @@ + # 嵌套数据结构 {#qian-tao-shu-ju-jie-gou} diff --git a/docs/zh/data_types/nested_data_structures/nested.md b/docs/zh/sql_reference/data_types/nested_data_structures/nested.md similarity index 99% rename from docs/zh/data_types/nested_data_structures/nested.md rename to docs/zh/sql_reference/data_types/nested_data_structures/nested.md index d2fd1e3a630..6ac26c0eeba 100644 --- a/docs/zh/data_types/nested_data_structures/nested.md +++ b/docs/zh/sql_reference/data_types/nested_data_structures/nested.md @@ -1,3 +1,4 @@ + # Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2} 嵌套数据结构类似于嵌套表。嵌套数据结构的参数(列名和类型)与 CREATE 查询类似。每个表可以包含任意多行嵌套数据结构。 diff --git a/docs/zh/data_types/nullable.md b/docs/zh/sql_reference/data_types/nullable.md similarity index 72% rename from docs/zh/data_types/nullable.md rename to docs/zh/sql_reference/data_types/nullable.md index ae4a2066fd7..6ece5f3c178 100644 --- a/docs/zh/data_types/nullable.md +++ b/docs/zh/sql_reference/data_types/nullable.md @@ -1,8 +1,9 @@ -# Nullable(TypeName) {#data_type-nullable} -允许用特殊标记 ([NULL](../query_language/syntax.md)) 表示«缺失值»,可以与 `TypeName` 的正常值存放一起。例如,`Nullable(Int8)` 类型的列可以存储 `Int8` 类型值,而没有值的行将存储 `NULL`。 +# 可为空(类型名称) {#data_type-nullable} -对于 `TypeName`,不能使用复合数据类型 [Array](array.md) 和 [Tuple](tuple.md)。复合数据类型可以包含 `Nullable` 类型值,例如`Array(Nullable(Int8))`。 +允许用特殊标记 ([NULL](../../sql_reference/data_types/nullable.md)) 表示«缺失值»,可以与 `TypeName` 的正常值存放一起。例如,`Nullable(Int8)` 类型的列可以存储 `Int8` 类型值,而没有值的行将存储 `NULL`。 + +对于 `TypeName`,不能使用复合数据类型 [阵列](array.md) 和 [元组](tuple.md)。复合数据类型可以包含 `Nullable` 类型值,例如`Array(Nullable(Int8))`。 `Nullable` 类型字段不能包含在表索引中。 diff --git a/docs/zh/data_types/special_data_types/expression.md b/docs/zh/sql_reference/data_types/special_data_types/expression.md similarity index 64% rename from docs/zh/data_types/special_data_types/expression.md rename to docs/zh/sql_reference/data_types/special_data_types/expression.md index 86b4d5591c7..d4fb3257f60 100644 --- a/docs/zh/data_types/special_data_types/expression.md +++ b/docs/zh/sql_reference/data_types/special_data_types/expression.md @@ -1,3 +1,4 @@ -# Expression {#expression} + +# 表达式 {#expression} 用于表示高阶函数中的Lambd表达式。 diff --git a/docs/zh/data_types/special_data_types/index.md b/docs/zh/sql_reference/data_types/special_data_types/index.md similarity index 71% rename from docs/zh/data_types/special_data_types/index.md rename to docs/zh/sql_reference/data_types/special_data_types/index.md index 5963c377f01..64d93783cb9 100644 --- a/docs/zh/data_types/special_data_types/index.md +++ b/docs/zh/sql_reference/data_types/special_data_types/index.md @@ -1,3 +1,4 @@ -# Special Data Types {#special-data-types} + +# 特殊数据类型 {#special-data-types} 特殊数据类型的值既不能存在表中也不能在结果中输出,但可用于查询的中间结果。 diff --git a/docs/zh/data_types/special_data_types/interval.md b/docs/zh/sql_reference/data_types/special_data_types/interval.md similarity index 51% rename from docs/zh/data_types/special_data_types/interval.md rename to docs/zh/sql_reference/data_types/special_data_types/interval.md index 8a37476579c..7a7ac888775 100644 --- a/docs/zh/data_types/special_data_types/interval.md +++ b/docs/zh/sql_reference/data_types/special_data_types/interval.md @@ -1,20 +1,23 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 61 +toc_title: "\u95F4\u9694" --- -# Interval {#data-type-interval} +# 间隔 {#data-type-interval} -The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../query_language/operators.md#operator-interval) operator. +表示时间和日期间隔的数据类型族。 由此产生的类型 [INTERVAL](../../../sql_reference/operators.md#operator-interval) 接线员 -!!! warning "Warning" - `Interval` data type values can’t be stored in tables. +!!! warning "警告" + `Interval` 数据类型值不能存储在表中。 -Structure: +结构: -- Time interval as an unsigned integer value. -- Type of an interval. +- 时间间隔作为无符号整数值。 +- 间隔的类型。 -Supported interval types: +支持的时间间隔类型: - `SECOND` - `MINUTE` @@ -25,7 +28,7 @@ Supported interval types: - `QUARTER` - `YEAR` -For each interval type, there is a separate data type. For example, the `DAY` interval corresponds to the `IntervalDay` data type: +对于每个间隔类型,都有一个单独的数据类型。 例如, `DAY` 间隔对应于 `IntervalDay` 数据类型: ``` sql SELECT toTypeName(INTERVAL 4 DAY) @@ -37,9 +40,9 @@ SELECT toTypeName(INTERVAL 4 DAY) └──────────────────────────────┘ ``` -## Usage Remarks {#data-type-interval-usage-remarks} +## 使用说明 {#data-type-interval-usage-remarks} -You can use `Interval`-type values in arithmetical operations with [Date](../../data_types/date.md) and [DateTime](../../data_types/datetime.md)-type values. For example, you can add 4 days to the current time: +您可以使用 `Interval`-在算术运算类型值 [日期](../../../sql_reference/data_types/date.md) 和 [日期时间](../../../sql_reference/data_types/datetime.md)-类型值。 例如,您可以将4天添加到当前时间: ``` sql SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY @@ -51,9 +54,9 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -Intervals with different types can’t be combined. You can’t use intervals like `4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, the interval `1 day and an hour` interval can be expressed as `25 HOUR` or `90000 SECOND`. +不同类型的间隔不能合并。 你不能使用间隔,如 `4 DAY 1 HOUR`. 以小于或等于间隔的最小单位的单位指定间隔,例如,间隔 `1 day and an hour` 间隔可以表示为 `25 HOUR` 或 `90000 SECOND`. -You can’t perform arithmetical operations with `Interval`-type values, but you can add intervals of different types consequently to values in `Date` or `DateTime` data types. For example: +你不能执行算术运算 `Interval`-类型值,但你可以添加不同类型的时间间隔,因此值 `Date` 或 `DateTime` 数据类型。 例如: ``` sql SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR @@ -65,7 +68,7 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL └─────────────────────┴────────────────────────────────────────────────────────┘ ``` -The following query causes an exception: +以下查询将导致异常: ``` sql select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) @@ -76,7 +79,7 @@ Received exception from server (version 19.14.1): Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. ``` -## See Also {#see-also} +## 另请参阅 {#see-also} -- [INTERVAL](../../query_language/operators.md#operator-interval) operator -- [toInterval](../../query_language/functions/type_conversion_functions.md#function-tointerval) type convertion functions +- [INTERVAL](../../../sql_reference/operators.md#operator-interval) 接线员 +- [toInterval](../../../sql_reference/functions/type_conversion_functions.md#function-tointerval) 类型转换函数 diff --git a/docs/zh/data_types/special_data_types/nothing.md b/docs/zh/sql_reference/data_types/special_data_types/nothing.md similarity index 61% rename from docs/zh/data_types/special_data_types/nothing.md rename to docs/zh/sql_reference/data_types/special_data_types/nothing.md index 7a6bf0e035b..ebc2b572983 100644 --- a/docs/zh/data_types/special_data_types/nothing.md +++ b/docs/zh/sql_reference/data_types/special_data_types/nothing.md @@ -1,8 +1,9 @@ -# Nothing {#nothing} + +# 没什么 {#nothing} 此数据类型的唯一目的是表示不是期望值的情况。 所以不能创建一个 `Nothing` 类型的值。 -例如,文本 [NULL](../../query_language/syntax.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [Nullable](../../data_types/nullable.md)。 +例如,文本 [NULL](../../../sql_reference/data_types/special_data_types/nothing.md#null-literal) 的类型为 `Nullable(Nothing)`。详情请见 [可为空](../../../sql_reference/data_types/special_data_types/nothing.md)。 `Nothing` 类型也可以用来表示空数组: diff --git a/docs/zh/data_types/special_data_types/set.md b/docs/zh/sql_reference/data_types/special_data_types/set.md similarity index 72% rename from docs/zh/data_types/special_data_types/set.md rename to docs/zh/sql_reference/data_types/special_data_types/set.md index d1f2ad368ee..0e1f9c6cc35 100644 --- a/docs/zh/data_types/special_data_types/set.md +++ b/docs/zh/sql_reference/data_types/special_data_types/set.md @@ -1,3 +1,4 @@ -# Set {#set} + +# 设置 {#set} 可以用在 IN 表达式的右半部分。 diff --git a/docs/zh/data_types/string.md b/docs/zh/sql_reference/data_types/string.md similarity index 97% rename from docs/zh/data_types/string.md rename to docs/zh/sql_reference/data_types/string.md index 742452ee0bf..3c9226787a2 100644 --- a/docs/zh/data_types/string.md +++ b/docs/zh/sql_reference/data_types/string.md @@ -1,4 +1,5 @@ -# String {#string} + +# 字符串 {#string} 字符串可以任意长度的。它可以包含任意的字节集,包含空字节。因此,字符串类型可以代替其他 DBMSs 中的 VARCHAR、BLOB、CLOB 等类型。 diff --git a/docs/zh/data_types/tuple.md b/docs/zh/sql_reference/data_types/tuple.md similarity index 86% rename from docs/zh/data_types/tuple.md rename to docs/zh/sql_reference/data_types/tuple.md index 4efeb651e76..e3520722c97 100644 --- a/docs/zh/data_types/tuple.md +++ b/docs/zh/sql_reference/data_types/tuple.md @@ -1,8 +1,9 @@ + # Tuple(T1, T2, …) {#tuplet1-t2} 元组,其中每个元素都有单独的 [类型](index.md#data_types)。 -不能在表中存储元组(除了内存表)。它们可以用于临时列分组。在查询中,IN 表达式和带特定参数的 lambda 函数可以来对临时列进行分组。更多信息,请参阅 [IN 操作符](../query_language/select.md) and [Higher order functions](../query_language/functions/higher_order_functions.md)。 +不能在表中存储元组(除了内存表)。它们可以用于临时列分组。在查询中,IN 表达式和带特定参数的 lambda 函数可以来对临时列进行分组。更多信息,请参阅 [IN 操作符](../../sql_reference/data_types/tuple.md) 和 [高阶函数](../../sql_reference/data_types/tuple.md)。 元组可以是查询的结果。在这种情况下,对于JSON以外的文本格式,括号中的值是逗号分隔的。在JSON格式中,元组作为数组输出(在方括号中)。 @@ -28,7 +29,7 @@ ## 元组中的数据类型 {#yuan-zu-zhong-de-shu-ju-lei-xing} -在动态创建元组时,ClickHouse 会自动为元组的每一个参数赋予最小可表达的类型。如果参数为 [NULL](../query_language/syntax.md#null-literal),那这个元组对应元素是 [Nullable](nullable.md)。 +在动态创建元组时,ClickHouse 会自动为元组的每一个参数赋予最小可表达的类型。如果参数为 [NULL](../../sql_reference/data_types/tuple.md#null-literal),那这个元组对应元素是 [可为空](nullable.md)。 自动数据类型检测示例: diff --git a/docs/zh/sql_reference/data_types/uuid.md b/docs/zh/sql_reference/data_types/uuid.md new file mode 100644 index 00000000000..4c35fcf2d9c --- /dev/null +++ b/docs/zh/sql_reference/data_types/uuid.md @@ -0,0 +1,77 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 46 +toc_title: UUID +--- + +# UUID {#uuid-data-type} + +通用唯一标识符(UUID)是用于标识记录的16字节数。 有关UUID的详细信息,请参阅 [维基百科](https://en.wikipedia.org/wiki/Universally_unique_identifier). + +UUID类型值的示例如下所示: + +``` text +61f0c404-5cb3-11e7-907b-a6006ad3dba0 +``` + +如果在插入新记录时未指定UUID列值,则UUID值将用零填充: + +``` text +00000000-0000-0000-0000-000000000000 +``` + +## 如何生成 {#how-to-generate} + +要生成UUID值,ClickHouse提供了 [generateuidv4](../../sql_reference/functions/uuid_functions.md) 功能。 + +## 用法示例 {#usage-example} + +**示例1** + +此示例演示如何创建具有UUID类型列的表并将值插入到表中。 + +``` sql +CREATE TABLE t_uuid (x UUID, y String) ENGINE=TinyLog +``` + +``` sql +INSERT INTO t_uuid SELECT generateUUIDv4(), 'Example 1' +``` + +``` sql +SELECT * FROM t_uuid +``` + +``` text +┌────────────────────────────────────x─┬─y─────────┐ +│ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ +└──────────────────────────────────────┴───────────┘ +``` + +**示例2** + +在此示例中,插入新记录时未指定UUID列值。 + +``` sql +INSERT INTO t_uuid (y) VALUES ('Example 2') +``` + +``` sql +SELECT * FROM t_uuid +``` + +``` text +┌────────────────────────────────────x─┬─y─────────┐ +│ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ +│ 00000000-0000-0000-0000-000000000000 │ Example 2 │ +└──────────────────────────────────────┴───────────┘ +``` + +## 限制 {#restrictions} + +UUID数据类型仅支持以下功能 [字符串](string.md) 数据类型也支持(例如, [min](../../sql_reference/aggregate_functions/reference.md#agg_function-min), [max](../../sql_reference/aggregate_functions/reference.md#agg_function-max),和 [计数](../../sql_reference/aggregate_functions/reference.md#agg_function-count)). + +算术运算不支持UUID数据类型(例如, [abs](../../sql_reference/functions/arithmetic_functions.md#arithm_func-abs))或聚合函数,例如 [sum](../../sql_reference/aggregate_functions/reference.md#agg_function-sum) 和 [avg](../../sql_reference/aggregate_functions/reference.md#agg_function-avg). + +[原始文章](https://clickhouse.tech/docs/en/data_types/uuid/) diff --git a/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts.md b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts.md new file mode 100644 index 00000000000..afbdd082576 --- /dev/null +++ b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts.md @@ -0,0 +1,56 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 39 +toc_title: "\u6982\u8FF0" +--- + +# 外部字典 {#dicts-external-dicts} + +您可以从各种数据源添加自己的字典。 字典的数据源可以是本地文本或可执行文件、HTTP(s)资源或其他DBMS。 有关详细信息,请参阅 “[外部字典的来源](external_dicts_dict_sources.md)”. + +ClickHouse: + +- 完全或部分存储在RAM中的字典。 +- 定期更新字典并动态加载缺失的值。 换句话说,字典可以动态加载。 +- 允许创建外部字典与xml文件或 [DDL查询](../../statements/create.md#create-dictionary-query). + +外部字典的配置可以位于一个或多个xml文件中。 配置的路径在指定 [dictionaries\_config](../../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-dictionaries_config) 参数。 + +字典可以在服务器启动或首次使用时加载,具体取决于 [dictionaries\_lazy\_load](../../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) 设置。 + +字典配置文件具有以下格式: + +``` xml + + An optional element with any content. Ignored by the ClickHouse server. + + + /etc/metrika.xml + + + + + + + + +``` + +你可以 [配置](external_dicts_dict.md) 同一文件中的任意数量的字典。 + +[字典的DDL查询](../../statements/create.md#create-dictionary-query) 在服务器配置中不需要任何其他记录。 它们允许使用字典作为一流的实体,如表或视图。 + +!!! attention "注意" + 您可以通过在一个小字典中描述它来转换小字典的值 `SELECT` 查询(见 [变换](../../../sql_reference/functions/other_functions.md) 功能)。 此功能与外部字典无关。 + +## 另请参阅 {#ext-dicts-see-also} + +- [配置外部字典](external_dicts_dict.md) +- [在内存中存储字典](external_dicts_dict_layout.md) +- [字典更新](external_dicts_dict_lifetime.md) +- [外部字典的来源](external_dicts_dict_sources.md) +- [字典键和字段](external_dicts_dict_structure.md) +- [使用外部字典的函数](../../../sql_reference/functions/ext_dict_functions.md) + +[原始文章](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts/) diff --git a/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md new file mode 100644 index 00000000000..df64d31d2a9 --- /dev/null +++ b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict.md @@ -0,0 +1,53 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 40 +toc_title: "\u914D\u7F6E\u5916\u90E8\u5B57\u5178" +--- + +# 配置外部字典 {#dicts-external-dicts-dict} + +如果使用xml文件配置字典,则比字典配置具有以下结构: + +``` xml + + dict_name + + + + + + + + + + + + + + + + + +``` + +相应的 [DDL-查询](../../statements/create.md#create-dictionary-query) 具有以下结构: + +``` sql +CREATE DICTIONARY dict_name +( + ... -- attributes +) +PRIMARY KEY ... -- complex or single key configuration +SOURCE(...) -- Source configuration +LAYOUT(...) -- Memory layout configuration +LIFETIME(...) -- Lifetime of dictionary in memory +``` + +- `name` – The identifier that can be used to access the dictionary. Use the characters `[a-zA-Z0-9_\-]`. +- [来源](external_dicts_dict_sources.md) — Source of the dictionary. +- [布局](external_dicts_dict_layout.md) — Dictionary layout in memory. +- [结构](external_dicts_dict_structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key. +- [使用寿命](external_dicts_dict_lifetime.md) — Frequency of dictionary updates. + +[原始文章](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict/) diff --git a/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_hierarchical.md b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_hierarchical.md new file mode 100644 index 00000000000..925e5f6c8f4 --- /dev/null +++ b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_hierarchical.md @@ -0,0 +1,70 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 45 +toc_title: "\u5206\u5C42\u5B57\u5178" +--- + +# 分层字典 {#hierarchical-dictionaries} + +ClickHouse支持分层字典与 [数字键](external_dicts_dict_structure.md#ext_dict-numeric-key). + +看看下面的层次结构: + +``` text +0 (Common parent) +│ +├── 1 (Russia) +│ │ +│ └── 2 (Moscow) +│ │ +│ └── 3 (Center) +│ +└── 4 (Great Britain) + │ + └── 5 (London) +``` + +这种层次结构可以表示为下面的字典表。 + +| region\_id | parent\_region | region\_name | +|------------|----------------|--------------| +| 1 | 0 | 俄罗斯 | +| 2 | 1 | 莫斯科 | +| 3 | 2 | 中心 | +| 4 | 0 | 英国 | +| 5 | 4 | 伦敦 | + +此表包含一列 `parent_region` 包含该元素的最近父项的键。 + +ClickHouse支持 [等级](external_dicts_dict_structure.md#hierarchical-dict-attr) 属性为 [外部字典](index.md) 属性。 此属性允许您配置类似于上述的分层字典。 + +该 [独裁主义](../../../sql_reference/functions/ext_dict_functions.md#dictgethierarchy) 函数允许您获取元素的父链。 + +对于我们的例子,dictionary的结构可以是以下内容: + +``` xml + + + + region_id + + + + parent_region + UInt64 + 0 + true + + + + region_name + String + + + + + +``` + +[原始文章](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_hierarchical/) diff --git a/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md new file mode 100644 index 00000000000..4dcf5f4c1b0 --- /dev/null +++ b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_layout.md @@ -0,0 +1,373 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 41 +toc_title: "\u5728\u5185\u5B58\u4E2D\u5B58\u50A8\u5B57\u5178" +--- + +# 在内存中存储字典 {#dicts-external-dicts-dict-layout} + +有多种方法可以将字典存储在内存中。 + +我们建议 [平](#flat), [散列](#dicts-external_dicts_dict_layout-hashed) 和 [complex\_key\_hashed](#complex-key-hashed). 其提供最佳的处理速度。 + +不建议使用缓存,因为性能可能较差,并且难以选择最佳参数。 阅读更多的部分 “[缓存](#cache)”. + +有几种方法可以提高字典性能: + +- 调用该函数以使用后的字典 `GROUP BY`. +- 将要提取的属性标记为"注射"。 如果不同的属性值对应于不同的键,则称为注射属性。 所以当 `GROUP BY` 使用由键获取属性值的函数,此函数会自动取出 `GROUP BY`. + +ClickHouse为字典中的错误生成异常。 错误示例: + +- 无法加载正在访问的字典。 +- 查询错误 `cached` 字典 + +您可以查看外部字典的列表及其状态 `system.dictionaries` 桌子 + +配置如下所示: + +``` xml + + + ... + + + + + + ... + + +``` + +相应的 [DDL-查询](../../statements/create.md#create-dictionary-query): + +``` sql +CREATE DICTIONARY (...) +... +LAYOUT(LAYOUT_TYPE(param value)) -- layout settings +... +``` + +## 在内存中存储字典的方法 {#ways-to-store-dictionaries-in-memory} + +- [平](#flat) +- [散列](#dicts-external_dicts_dict_layout-hashed) +- [sparse\_hashed](#dicts-external_dicts_dict_layout-sparse_hashed) +- [缓存](#cache) +- [range\_hashed](#range-hashed) +- [complex\_key\_hashed](#complex-key-hashed) +- [complex\_key\_cache](#complex-key-cache) +- [ip\_trie](#ip-trie) + +### 平 {#flat} + +字典以平面数组的形式完全存储在内存中。 字典使用多少内存? 量与最大键的大小(在使用的空间中)成正比。 + +字典键具有 `UInt64` 类型和值限制为500,000。 如果在创建字典时发现较大的键,ClickHouse将引发异常,不会创建字典。 + +支持所有类型的来源。 更新时,数据(来自文件或表)将完整读取。 + +此方法在存储字典的所有可用方法中提供了最佳性能。 + +配置示例: + +``` xml + + + +``` + +或 + +``` sql +LAYOUT(FLAT()) +``` + +### 散列 {#dicts-external_dicts_dict_layout-hashed} + +该字典以哈希表的形式完全存储在内存中。 字典中可以包含任意数量的带有任意标识符的元素,在实践中,键的数量可以达到数千万项。 + +支持所有类型的来源。 更新时,数据(来自文件或表)将完整读取。 + +配置示例: + +``` xml + + + +``` + +或 + +``` sql +LAYOUT(HASHED()) +``` + +### sparse\_hashed {#dicts-external_dicts_dict_layout-sparse_hashed} + +类似于 `hashed`,但使用更少的内存,有利于更多的CPU使用率。 + +配置示例: + +``` xml + + + +``` + +``` sql +LAYOUT(SPARSE_HASHED()) +``` + +### complex\_key\_hashed {#complex-key-hashed} + +这种类型的存储是用于复合 [键](external_dicts_dict_structure.md). 类似于 `hashed`. + +配置示例: + +``` xml + + + +``` + +``` sql +LAYOUT(COMPLEX_KEY_HASHED()) +``` + +### range\_hashed {#range-hashed} + +字典以哈希表的形式存储在内存中,其中包含有序范围及其相应值的数组。 + +此存储方法的工作方式与散列方式相同,除了键之外,还允许使用日期/时间(任意数字类型)范围。 + +示例:该表格包含每个广告客户的折扣,格式为: + +``` text ++---------|-------------|-------------|------+ +| advertiser id | discount start date | discount end date | amount | ++===============+=====================+===================+========+ +| 123 | 2015-01-01 | 2015-01-15 | 0.15 | ++---------|-------------|-------------|------+ +| 123 | 2015-01-16 | 2015-01-31 | 0.25 | ++---------|-------------|-------------|------+ +| 456 | 2015-01-01 | 2015-01-15 | 0.05 | ++---------|-------------|-------------|------+ +``` + +要对日期范围使用示例,请定义 `range_min` 和 `range_max` 中的元素 [结构](external_dicts_dict_structure.md). 这些元素必须包含元素 `name` 和`type` (如果 `type` 如果没有指定,则默认类型将使用-Date)。 `type` 可以是任何数字类型(Date/DateTime/UInt64/Int32/others)。 + +示例: + +``` xml + + + Id + + + first + Date + + + last + Date + + ... +``` + +或 + +``` sql +CREATE DICTIONARY somedict ( + id UInt64, + first Date, + last Date +) +PRIMARY KEY id +LAYOUT(RANGE_HASHED()) +RANGE(MIN first MAX last) +``` + +要使用这些字典,您需要将附加参数传递给 `dictGetT` 函数,为其选择一个范围: + +``` sql +dictGetT('dict_name', 'attr_name', id, date) +``` + +此函数返回指定的值 `id`s和包含传递日期的日期范围。 + +算法的详细信息: + +- 如果 `id` 未找到或范围未找到 `id`,它返回字典的默认值。 +- 如果存在重叠范围,则可以使用任意范围。 +- 如果范围分隔符是 `NULL` 或无效日期(如1900-01-01或2039-01-01),范围保持打开状态。 范围可以在两侧打开。 + +配置示例: + +``` xml + + + + ... + + + + + + + + Abcdef + + + StartTimeStamp + UInt64 + + + EndTimeStamp + UInt64 + + + XXXType + String + + + + + + +``` + +或 + +``` sql +CREATE DICTIONARY somedict( + Abcdef UInt64, + StartTimeStamp UInt64, + EndTimeStamp UInt64, + XXXType String DEFAULT '' +) +PRIMARY KEY Abcdef +RANGE(MIN StartTimeStamp MAX EndTimeStamp) +``` + +### 缓存 {#cache} + +字典存储在具有固定数量的单元格的缓存中。 这些单元格包含经常使用的元素。 + +搜索字典时,首先搜索缓存。 对于每个数据块,所有在缓存中找不到或过期的密钥都从源请求,使用 `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. 然后将接收到的数据写入高速缓存。 + +对于缓存字典,过期 [使用寿命](external_dicts_dict_lifetime.md) 可以设置高速缓存中的数据。 如果更多的时间比 `lifetime` 自从在单元格中加载数据以来,单元格的值不被使用,并且在下次需要使用时重新请求它。 +这是存储字典的所有方法中最不有效的。 缓存的速度在很大程度上取决于正确的设置和使用场景。 缓存类型字典只有在命中率足够高(推荐99%或更高)时才能表现良好。 您可以查看平均命中率 `system.dictionaries` 桌子 + +要提高缓存性能,请使用以下子查询 `LIMIT`,并从外部调用字典函数。 + +支持 [来源](external_dicts_dict_sources.md):MySQL的,ClickHouse的,可执行文件,HTTP. + +设置示例: + +``` xml + + + + 1000000000 + + +``` + +或 + +``` sql +LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) +``` + +设置足够大的缓存大小。 你需要尝试选择细胞的数量: + +1. 设置一些值。 +2. 运行查询,直到缓存完全满。 +3. 使用评估内存消耗 `system.dictionaries` 桌子 +4. 增加或减少单元数,直到达到所需的内存消耗。 + +!!! warning "警告" + 不要使用ClickHouse作为源,因为处理随机读取的查询速度很慢。 + +### complex\_key\_cache {#complex-key-cache} + +这种类型的存储是用于复合 [键](external_dicts_dict_structure.md). 类似于 `cache`. + +### ip\_trie {#ip-trie} + +这种类型的存储用于将网络前缀(IP地址)映射到ASN等元数据。 + +示例:该表包含网络前缀及其对应的AS号码和国家代码: + +``` text + +-----------|-----|------+ + | prefix | asn | cca2 | + +=================+=======+========+ + | 202.79.32.0/20 | 17501 | NP | + +-----------|-----|------+ + | 2620:0:870::/48 | 3856 | US | + +-----------|-----|------+ + | 2a02:6b8:1::/48 | 13238 | RU | + +-----------|-----|------+ + | 2001:db8::/32 | 65536 | ZZ | + +-----------|-----|------+ +``` + +使用此类布局时,结构必须具有复合键。 + +示例: + +``` xml + + + + prefix + String + + + + asn + UInt32 + + + + cca2 + String + ?? + + ... +``` + +或 + +``` sql +CREATE DICTIONARY somedict ( + prefix String, + asn UInt32, + cca2 String DEFAULT '??' +) +PRIMARY KEY prefix +``` + +该键必须只有一个包含允许的IP前缀的字符串类型属性。 还不支持其他类型。 + +对于查询,必须使用相同的函数 (`dictGetT` 与元组)至于具有复合键的字典: + +``` sql +dictGetT('dict_name', 'attr_name', tuple(ip)) +``` + +该函数采用任一 `UInt32` 对于IPv4,或 `FixedString(16)` 碌莽禄Ipv6拢IPv6: + +``` sql +dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) +``` + +还不支持其他类型。 该函数返回与此IP地址对应的前缀的属性。 如果有重叠的前缀,则返回最具体的前缀。 + +数据存储在一个 `trie`. 它必须完全适合RAM。 + +[原始文章](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_layout/) diff --git a/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md new file mode 100644 index 00000000000..66ff7124ba1 --- /dev/null +++ b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_lifetime.md @@ -0,0 +1,86 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 42 +toc_title: "\u5B57\u5178\u66F4\u65B0" +--- + +# 字典更新 {#dictionary-updates} + +ClickHouse定期更新字典。 完全下载字典的更新间隔和缓存字典的无效间隔在 `` 在几秒钟内标记。 + +字典更新(除首次使用的加载之外)不会阻止查询。 在更新期间,将使用旧版本的字典。 如果在更新过程中发生错误,则将错误写入服务器日志,并使用旧版本的字典继续查询。 + +设置示例: + +``` xml + + ... + 300 + ... + +``` + +``` sql +CREATE DICTIONARY (...) +... +LIFETIME(300) +... +``` + +设置 `0` (`LIFETIME(0)`)防止字典更新。 + +您可以设置升级的时间间隔,ClickHouse将在此范围内选择一个统一的随机时间。 为了在大量服务器上升级时分配字典源上的负载,这是必要的。 + +设置示例: + +``` xml + + ... + + 300 + 360 + + ... + +``` + +或 + +``` sql +LIFETIME(MIN 300 MAX 360) +``` + +升级字典时,ClickHouse服务器根据字典的类型应用不同的逻辑 [来源](external_dicts_dict_sources.md): + +- 对于文本文件,它检查修改的时间。 如果时间与先前记录的时间不同,则更新字典。 +- 对于MyISAM表,修改的时间使用检查 `SHOW TABLE STATUS` 查询。 +- 默认情况下,每次都会更新来自其他来源的字典。 + +对于MySQL(InnoDB),ODBC和ClickHouse源代码,您可以设置一个查询,只有在字典真正改变时才会更新字典,而不是每次都更新。 为此,请按照下列步骤操作: + +- 字典表必须具有在源数据更新时始终更改的字段。 +- 源的设置必须指定检索更改字段的查询。 ClickHouse服务器将查询结果解释为一行,如果此行相对于其以前的状态发生了更改,则更新字典。 指定查询 `` 字段中的设置 [来源](external_dicts_dict_sources.md). + +设置示例: + +``` xml + + ... + + ... + SELECT update_time FROM dictionary_source where id = 1 + + ... + +``` + +或 + +``` sql +... +SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source where id = 1')) +... +``` + +[原始文章](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_lifetime/) diff --git a/docs/zh/query_language/dicts/external_dicts_dict_sources.md b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md similarity index 70% rename from docs/zh/query_language/dicts/external_dicts_dict_sources.md rename to docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md index 37d050a8e72..c8173749b33 100644 --- a/docs/zh/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md @@ -1,12 +1,15 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 43 +toc_title: "\u5916\u90E8\u5B57\u5178\u7684\u6765\u6E90" --- -# Sources of External Dictionaries {#dicts-external-dicts-dict-sources} +# 外部字典的来源 {#dicts-external-dicts-dict-sources} -An external dictionary can be connected from many different sources. +外部字典可以从许多不同的来源连接。 -If dictionary is configured using xml-file, the configuration looks like this: +如果使用xml-file配置字典,则配置如下所示: ``` xml @@ -23,7 +26,7 @@ If dictionary is configured using xml-file, the configuration looks like this: ``` -In case of [DDL-query](../create.md#create-dictionary-query), equal configuration will looks like: +在情况下 [DDL-查询](../../statements/create.md#create-dictionary-query),相等的配置将看起来像: ``` sql CREATE DICTIONARY dict_name (...) @@ -32,12 +35,12 @@ SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration ... ``` -The source is configured in the `source` section. +源配置在 `source` 科。 -Types of sources (`source_type`): +来源类型 (`source_type`): -- [Local file](#dicts-external_dicts_dict_sources-local_file) -- [Executable file](#dicts-external_dicts_dict_sources-executable) +- [本地文件](#dicts-external_dicts_dict_sources-local_file) +- [可执行文件](#dicts-external_dicts_dict_sources-executable) - [HTTP(s)](#dicts-external_dicts_dict_sources-http) - DBMS - [ODBC](#dicts-external_dicts_dict_sources-odbc) @@ -46,9 +49,9 @@ Types of sources (`source_type`): - [MongoDB](#dicts-external_dicts_dict_sources-mongodb) - [Redis](#dicts-external_dicts_dict_sources-redis) -## Local File {#dicts-external_dicts_dict_sources-local_file} +## 本地文件 {#dicts-external_dicts_dict_sources-local_file} -Example of settings: +设置示例: ``` xml @@ -59,22 +62,22 @@ Example of settings: ``` -or +或 ``` sql SOURCE(FILE(path '/opt/dictionaries/os.tsv' format 'TabSeparated')) ``` -Setting fields: +设置字段: - `path` – The absolute path to the file. -- `format` – The file format. All the formats described in “[Formats](../../interfaces/formats.md#formats)” are supported. +- `format` – The file format. All the formats described in “[格式](../../../interfaces/formats.md#formats)” 支持。 -## Executable File {#dicts-external_dicts_dict_sources-executable} +## 可执行文件 {#dicts-external_dicts_dict_sources-executable} -Working with executable files depends on [how the dictionary is stored in memory](external_dicts_dict_layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts executable file and treats its output as dictionary data. +使用可执行文件取决于 [字典如何存储在内存中](external_dicts_dict_layout.md). 如果字典存储使用 `cache` 和 `complex_key_cache`,ClickHouse通过向可执行文件的STDIN发送请求来请求必要的密钥。 否则,ClickHouse将启动可执行文件并将其输出视为字典数据。 -Example of settings: +设置示例: ``` xml @@ -85,22 +88,22 @@ Example of settings: ``` -or +或 ``` sql SOURCE(EXECUTABLE(command 'cat /opt/dictionaries/os.tsv' format 'TabSeparated')) ``` -Setting fields: +设置字段: - `command` – The absolute path to the executable file, or the file name (if the program directory is written to `PATH`). -- `format` – The file format. All the formats described in “[Formats](../../interfaces/formats.md#formats)” are supported. +- `format` – The file format. All the formats described in “[格式](../../../interfaces/formats.md#formats)” 支持。 -## HTTP(s) {#dicts-external_dicts_dict_sources-http} +## Http(s) {#dicts-external_dicts_dict_sources-http} -Working with an HTTP(s) server depends on [how the dictionary is stored in memory](external_dicts_dict_layout.md). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. +使用HTTP(s)服务器取决于 [字典如何存储在内存中](external_dicts_dict_layout.md). 如果字典存储使用 `cache` 和 `complex_key_cache`,ClickHouse通过通过发送请求请求必要的密钥 `POST` 方法。 -Example of settings: +设置示例: ``` xml @@ -121,7 +124,7 @@ Example of settings: ``` -or +或 ``` sql SOURCE(HTTP( @@ -132,12 +135,12 @@ SOURCE(HTTP( )) ``` -In order for ClickHouse to access an HTTPS resource, you must [configure openSSL](../../operations/server_settings/settings.md#server_settings-openssl) in the server configuration. +为了让ClickHouse访问HTTPS资源,您必须 [配置openSSL](../../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-openssl) 在服务器配置中。 -Setting fields: +设置字段: - `url` – The source URL. -- `format` – The file format. All the formats described in “[Formats](../../interfaces/formats.md#formats)” are supported. +- `format` – The file format. All the formats described in “[格式](../../../interfaces/formats.md#formats)” 支持。 - `credentials` – Basic HTTP authentication. Optional parameter. - `user` – Username required for the authentication. - `password` – Password required for the authentication. @@ -148,9 +151,9 @@ Setting fields: ## ODBC {#dicts-external_dicts_dict_sources-odbc} -You can use this method to connect any database that has an ODBC driver. +您可以使用此方法连接具有ODBC驱动程序的任何数据库。 -Example of settings: +设置示例: ``` xml @@ -163,7 +166,7 @@ Example of settings: ``` -or +或 ``` sql SOURCE(ODBC( @@ -174,25 +177,25 @@ SOURCE(ODBC( )) ``` -Setting fields: +设置字段: -- `db` – Name of the database. Omit it if the database name is set in the `` parameters. +- `db` – Name of the database. Omit it if the database name is set in the `` 参数。 - `table` – Name of the table and schema if exists. - `connection_string` – Connection string. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](external_dicts_dict_lifetime.md). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [更新字典](external_dicts_dict_lifetime.md). -ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. +ClickHouse接收来自ODBC-driver的引用符号,并将查询中的所有设置引用到driver,因此有必要根据数据库中的表名大小写设置表名。 -If you have a problems with encodings when using Oracle, see the corresponding [FAQ](../../faq/general.md#oracle-odbc-encodings) article. +如果您在使用Oracle时遇到编码问题,请参阅相应的 [FAQ](../../../faq/general.md#oracle-odbc-encodings) 文章. -### Known vulnerability of the ODBC dictionary functionality {#known-vulnerability-of-the-odbc-dictionary-functionality} +### ODBC字典功能的已知漏洞 {#known-vulnerability-of-the-odbc-dictionary-functionality} -!!! attention "Attention" - When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised. +!!! attention "注意" + 通过ODBC驱动程序连接参数连接到数据库时 `Servername` 可以取代。 在这种情况下,值 `USERNAME` 和 `PASSWORD` 从 `odbc.ini` 被发送到远程服务器,并且可能会受到损害。 -**Example of insecure use** +**不安全使用示例** -Let’s configure unixODBC for PostgreSQL. Content of `/etc/odbc.ini`: +让我们为PostgreSQL配置unixODBC。 的内容 `/etc/odbc.ini`: ``` text [gregtest] @@ -205,25 +208,25 @@ USERNAME = test PASSWORD = test ``` -If you then make a query such as +如果然后进行查询,例如 ``` sql SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); ``` -ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`. +ODBC驱动程序将发送的值 `USERNAME` 和 `PASSWORD` 从 `odbc.ini` 到 `some-server.com`. -### Example of Connecting PostgreSQL {#example-of-connecting-postgresql} +### 连接Postgresql的示例 {#example-of-connecting-postgresql} -Ubuntu OS. +Ubuntu操作系统。 -Installing unixODBC and the ODBC driver for PostgreSQL: +为PostgreSQL安装unixODBC和ODBC驱动程序: ``` bash $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql ``` -Configuring `/etc/odbc.ini` (or `~/.odbc.ini`): +配置 `/etc/odbc.ini` (或 `~/.odbc.ini`): ``` text [DEFAULT] @@ -244,7 +247,7 @@ Configuring `/etc/odbc.ini` (or `~/.odbc.ini`): ConnSettings = ``` -The dictionary configuration in ClickHouse: +ClickHouse中的字典配置: ``` xml @@ -279,7 +282,7 @@ The dictionary configuration in ClickHouse: ``` -or +或 ``` sql CREATE DICTIONARY table_name ( @@ -292,19 +295,19 @@ LAYOUT(HASHED()) LIFETIME(MIN 300 MAX 360) ``` -You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`. +您可能需要编辑 `odbc.ini` 使用驱动程序指定库的完整路径 `DRIVER=/usr/local/lib/psqlodbcw.so`. -### Example of Connecting MS SQL Server {#example-of-connecting-ms-sql-server} +### 连接MS SQL Server的示例 {#example-of-connecting-ms-sql-server} -Ubuntu OS. +Ubuntu操作系统。 -Installing the driver: : +安装驱动程序: : ``` bash $ sudo apt-get install tdsodbc freetds-bin sqsh ``` -Configuring the driver: +配置驱动程序: ``` bash $ cat /etc/freetds/freetds.conf @@ -339,7 +342,7 @@ Configuring the driver: Port = 1433 ``` -Configuring the dictionary in ClickHouse: +在ClickHouse中配置字典: ``` xml @@ -375,7 +378,7 @@ Configuring the dictionary in ClickHouse: ``` -or +或 ``` sql CREATE DICTIONARY test ( @@ -390,9 +393,9 @@ LIFETIME(MIN 300 MAX 360) ## DBMS {#dbms} -### MySQL {#dicts-external_dicts_dict_sources-mysql} +### Mysql {#dicts-external_dicts_dict_sources-mysql} -Example of settings: +设置示例: ``` xml @@ -416,7 +419,7 @@ Example of settings: ``` -or +或 ``` sql SOURCE(MYSQL( @@ -432,7 +435,7 @@ SOURCE(MYSQL( )) ``` -Setting fields: +设置字段: - `port` – The port on the MySQL server. You can specify it for all replicas, or for each one individually (inside ``). @@ -449,13 +452,13 @@ Setting fields: - `table` – Name of the table. -- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter. +- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` 例如,mysql中的子句, `id > 10 AND id < 20`. 可选参数。 -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](external_dicts_dict_lifetime.md). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [更新字典](external_dicts_dict_lifetime.md). -MySQL can be connected on a local host via sockets. To do this, set `host` and `socket`. +MySQL可以通过套接字在本地主机上连接。 要做到这一点,设置 `host` 和 `socket`. -Example of settings: +设置示例: ``` xml @@ -472,7 +475,7 @@ Example of settings: ``` -or +或 ``` sql SOURCE(MYSQL( @@ -489,7 +492,7 @@ SOURCE(MYSQL( ### ClickHouse {#dicts-external_dicts_dict_sources-clickhouse} -Example of settings: +设置示例: ``` xml @@ -505,7 +508,7 @@ Example of settings: ``` -or +或 ``` sql SOURCE(CLICKHOUSE( @@ -519,20 +522,20 @@ SOURCE(CLICKHOUSE( )) ``` -Setting fields: +设置字段: -- `host` – The ClickHouse host. If it is a local host, the query is processed without any network activity. To improve fault tolerance, you can create a [Distributed](../../operations/table_engines/distributed.md) table and enter it in subsequent configurations. +- `host` – The ClickHouse host. If it is a local host, the query is processed without any network activity. To improve fault tolerance, you can create a [分布](../../../engines/table_engines/special/distributed.md) 表并在后续配置中输入它。 - `port` – The port on the ClickHouse server. - `user` – Name of the ClickHouse user. - `password` – Password of the ClickHouse user. - `db` – Name of the database. - `table` – Name of the table. - `where` – The selection criteria. May be omitted. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](external_dicts_dict_lifetime.md). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [更新字典](external_dicts_dict_lifetime.md). -### MongoDB {#dicts-external_dicts_dict_sources-mongodb} +### Mongodb {#dicts-external_dicts_dict_sources-mongodb} -Example of settings: +设置示例: ``` xml @@ -547,7 +550,7 @@ Example of settings: ``` -or +或 ``` sql SOURCE(MONGO( @@ -560,7 +563,7 @@ SOURCE(MONGO( )) ``` -Setting fields: +设置字段: - `host` – The MongoDB host. - `port` – The port on the MongoDB server. @@ -571,7 +574,7 @@ Setting fields: ### Redis {#dicts-external_dicts_dict_sources-redis} -Example of settings: +设置示例: ``` xml @@ -584,7 +587,7 @@ Example of settings: ``` -or +或 ``` sql SOURCE(REDIS( @@ -595,11 +598,11 @@ SOURCE(REDIS( )) ``` -Setting fields: +设置字段: - `host` – The Redis host. - `port` – The port on the Redis server. -- `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. +- `storage_type` – The structure of internal Redis storage using for work with keys. `simple` 适用于简单源和散列单键源, `hash_map` 用于具有两个键的散列源。 不支持具有复杂键的范围源和缓存源。 可以省略,默认值为 `simple`. - `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. -[Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_sources/) +[原始文章](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_sources/) diff --git a/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md new file mode 100644 index 00000000000..0ac0226aa50 --- /dev/null +++ b/docs/zh/sql_reference/dictionaries/external_dictionaries/external_dicts_dict_structure.md @@ -0,0 +1,175 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 44 +toc_title: "\u5B57\u5178\u952E\u548C\u5B57\u6BB5" +--- + +# 字典键和字段 {#dictionary-key-and-fields} + +该 `` 子句描述可用于查询的字典键和字段。 + +XML描述: + +``` xml + + + + Id + + + + + + + ... + + + +``` + +属性在元素中描述: + +- `` — [键列](external_dicts_dict_structure.md#ext_dict_structure-key). +- `` — [数据列](external_dicts_dict_structure.md#ext_dict_structure-attributes). 可以有多个属性。 + +DDL查询: + +``` sql +CREATE DICTIONARY dict_name ( + Id UInt64, + -- attributes +) +PRIMARY KEY Id +... +``` + +查询正文中描述了属性: + +- `PRIMARY KEY` — [键列](external_dicts_dict_structure.md#ext_dict_structure-key) +- `AttrName AttrType` — [数据列](external_dicts_dict_structure.md#ext_dict_structure-attributes). 可以有多个属性。 + +## 键 {#ext_dict_structure-key} + +ClickHouse支持以下类型的键: + +- 数字键。 `UInt64`. 在定义 `` 标记或使用 `PRIMARY KEY` 关键字。 +- 复合密钥。 组不同类型的值。 在标签中定义 `` 或 `PRIMARY KEY` 关键字。 + +Xml结构可以包含 `` 或 ``. DDL-查询必须包含单个 `PRIMARY KEY`. + +!!! warning "警告" + 不能将键描述为属性。 + +### 数字键 {#ext_dict-numeric-key} + +类型: `UInt64`. + +配置示例: + +``` xml + + Id + +``` + +配置字段: + +- `name` – The name of the column with keys. + +对于DDL-查询: + +``` sql +CREATE DICTIONARY ( + Id UInt64, + ... +) +PRIMARY KEY Id +... +``` + +- `PRIMARY KEY` – The name of the column with keys. + +### 复合密钥 {#composite-key} + +关键可以是一个 `tuple` 从任何类型的字段。 该 [布局](external_dicts_dict_layout.md) 在这种情况下,必须是 `complex_key_hashed` 或 `complex_key_cache`. + +!!! tip "提示" + 复合键可以由单个元素组成。 例如,这使得可以使用字符串作为键。 + +键结构在元素中设置 ``. 键字段的格式与字典的格式相同 [属性](external_dicts_dict_structure.md). 示例: + +``` xml + + + + field1 + String + + + field2 + UInt32 + + ... + +... +``` + +或 + +``` sql +CREATE DICTIONARY ( + field1 String, + field2 String + ... +) +PRIMARY KEY field1, field2 +... +``` + +对于查询 `dictGet*` 函数中,一个元组作为键传递。 示例: `dictGetString('dict_name', 'attr_name', tuple('string for field1', num_for_field2))`. + +## 属性 {#ext_dict_structure-attributes} + +配置示例: + +``` xml + + ... + + Name + ClickHouseDataType + + rand64() + true + true + true + + +``` + +或 + +``` sql +CREATE DICTIONARY somename ( + Name ClickHouseDataType DEFAULT '' EXPRESSION rand64() HIERARCHICAL INJECTIVE IS_OBJECT_ID +) +``` + +配置字段: + +| 标签 | 产品描述 | 必填项 | +|------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------| +| `name` | 列名称。 | 是 | +| `type` | ClickHouse数据类型。
ClickHouse尝试将字典中的值转换为指定的数据类型。 例如,对于MySQL,该字段可能是 `TEXT`, `VARCHAR`,或 `BLOB` 在MySQL源表中,但它可以上传为 `String` 在克里克豪斯
[可为空](../../../sql_reference/data_types/nullable.md) 不支持。 | 是 | +| `null_value` | 非现有元素的默认值。
在示例中,它是一个空字符串。 你不能使用 `NULL` 在这个领域。 | 是 | +| `expression` | [表达式](../../syntax.md#syntax-expressions) ClickHouse对该值执行。
表达式可以是远程SQL数据库中的列名。 因此,您可以使用它为远程列创建别名。

默认值:无表达式。 | 非也。 | +| `hierarchical` | 如果 `true`,该属性包含当前键的父键值。 看 [分层字典](external_dicts_dict_hierarchical.md).

默认值: `false`. | 非也。 | +| `injective` | 标志,显示是否 `id -> attribute` 图像是 [注射](https://en.wikipedia.org/wiki/Injective_function).
如果 `true`,ClickHouse可以自动放置后 `GROUP BY` 子句注入字典的请求。 通常它显着减少了这种请求的数量。

默认值: `false`. | 非也。 | +| `is_object_id` | 显示是否通过以下方式对MongoDB文档执行查询的标志 `ObjectID`.

默认值: `false`. | 非也。 | + +## 另请参阅 {#see-also} + +- [使用外部字典的函数](../../../sql_reference/functions/ext_dict_functions.md). + +[原始文章](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_structure/) diff --git a/docs/zh/sql_reference/dictionaries/external_dictionaries/index.md b/docs/zh/sql_reference/dictionaries/external_dictionaries/index.md new file mode 100644 index 00000000000..25d86ecda96 --- /dev/null +++ b/docs/zh/sql_reference/dictionaries/external_dictionaries/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u5916\u90E8\u5B57\u5178" +toc_priority: 37 +--- + + diff --git a/docs/zh/sql_reference/dictionaries/index.md b/docs/zh/sql_reference/dictionaries/index.md new file mode 100644 index 00000000000..9c9817ad0ad --- /dev/null +++ b/docs/zh/sql_reference/dictionaries/index.md @@ -0,0 +1,22 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u5B57\u5178" +toc_priority: 35 +toc_title: "\u5BFC\u8A00" +--- + +# 字典 {#dictionaries} + +字典是一个映射 (`key -> attributes`)这是方便各种类型的参考清单。 + +ClickHouse支持使用可用于查询的字典的特殊功能。 这是更容易和更有效地使用字典与功能比 `JOIN` 与参考表。 + +[NULL](../syntax.md#null) 值不能存储在字典中。 + +ClickHouse支持: + +- [内置字典](internal_dicts.md#internal_dicts) 具有特定的 [功能集](../../sql_reference/functions/ym_dict_functions.md). +- [插件(外部)字典](external_dictionaries/external_dicts.md) 用一个 [职能净额](../../sql_reference/functions/ext_dict_functions.md). + +[原始文章](https://clickhouse.tech/docs/en/query_language/dicts/) diff --git a/docs/zh/sql_reference/dictionaries/internal_dicts.md b/docs/zh/sql_reference/dictionaries/internal_dicts.md new file mode 100644 index 00000000000..bcede3c14ad --- /dev/null +++ b/docs/zh/sql_reference/dictionaries/internal_dicts.md @@ -0,0 +1,55 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 39 +toc_title: "\u5185\u90E8\u5B57\u5178" +--- + +# 内部字典 {#internal_dicts} + +ClickHouse包含用于处理地理数据库的内置功能。 + +这使您可以: + +- 使用区域的ID以所需语言获取其名称。 +- 使用区域ID获取城市、地区、联邦区、国家或大陆的ID。 +- 检查一个区域是否属于另一个区域。 +- 获取父区域链。 + +所有功能支持 “translocality,” 能够同时使用不同的角度对区域所有权。 有关详细信息,请参阅部分 “Functions for working with Yandex.Metrica dictionaries”. + +在默认包中禁用内部字典。 +要启用它们,请取消注释参数 `path_to_regions_hierarchy_file` 和 `path_to_regions_names_files` 在服务器配置文件中。 + +Geobase从文本文件加载。 + +将 `regions_hierarchy*.txt` 文件到 `path_to_regions_hierarchy_file` 目录。 此配置参数必须包含指向 `regions_hierarchy.txt` 文件(默认区域层次结构)和其他文件 (`regions_hierarchy_ua.txt`)必须位于同一目录中。 + +把 `regions_names_*.txt` 在文件 `path_to_regions_names_files` 目录。 + +您也可以自己创建这些文件。 文件格式如下: + +`regions_hierarchy*.txt`:TabSeparated(无标题),列: + +- 地区ID (`UInt32`) +- 父区域ID (`UInt32`) +- 区域类型 (`UInt8`):1-大陆,3-国家,4-联邦区,5-地区,6-城市;其他类型没有价值 +- 人口 (`UInt32`) — optional column + +`regions_names_*.txt`:TabSeparated(无标题),列: + +- 地区ID (`UInt32`) +- 地区名称 (`String`) — Can't contain tabs or line feeds, even escaped ones. + +平面阵列用于存储在RAM中。 出于这个原因,Id不应该超过一百万。 + +字典可以在不重新启动服务器的情况下更新。 但是,不会更新可用字典集。 +对于更新,将检查文件修改时间。 如果文件已更改,则更新字典。 +检查更改的时间间隔在 `builtin_dictionaries_reload_interval` 参数。 +字典更新(首次使用时加载除外)不会阻止查询。 在更新期间,查询使用旧版本的字典。 如果在更新过程中发生错误,则将错误写入服务器日志,并使用旧版本的字典继续查询。 + +我们建议定期使用geobase更新字典。 在更新期间,生成新文件并将其写入单独的位置。 一切准备就绪后,将其重命名为服务器使用的文件。 + +还有与操作系统标识符和Yandex的工作功能。Metrica搜索引擎,但他们不应该被使用。 + +[原始文章](https://clickhouse.tech/docs/en/query_language/dicts/internal_dicts/) diff --git a/docs/zh/query_language/functions/arithmetic_functions.md b/docs/zh/sql_reference/functions/arithmetic_functions.md similarity index 79% rename from docs/zh/query_language/functions/arithmetic_functions.md rename to docs/zh/sql_reference/functions/arithmetic_functions.md index 08d13b15af1..66bd42ec63a 100644 --- a/docs/zh/query_language/functions/arithmetic_functions.md +++ b/docs/zh/sql_reference/functions/arithmetic_functions.md @@ -1,3 +1,4 @@ + # 算术函数 {#suan-zhu-han-shu} 对于所有算术函数,结果类型为结果适合的最小数字类型(如果存在这样的类型)。最小数字类型是根据数字的位数,是否有符号以及是否是浮点类型而同时进行的。如果没有足够的位,则采用最高位类型。 @@ -16,59 +17,59 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0 溢出的产生方式与C++相同。 -## plus(a, b), a + b {#plusa-b-a-b} +## 加(a,b),a+b {#plusa-b-a-b} 计算数字的总和。 您还可以将Date或DateTime与整数进行相加。在Date的情况下,添加的整数意味着添加相应的天数。对于DateTime,这意味这添加相应的描述。 -## minus(a, b), a - b {#minusa-b-a-b} +## 减(a,b),a-b {#minusa-b-a-b} 计算数字之间的差,结果总是有符号的。 -您还可以将Date或DateTime与整数进行相减。见上面的’plus’。 +您还可以将Date或DateTime与整数进行相减。见上面的'plus'。 -## multiply(a, b), a \* b {#multiplya-b-a-b} +## 乘(a,b),a\*b {#multiplya-b-a-b} 计算数字的乘积。 -## divide(a, b), a / b {#dividea-b-a-b} +## 除以(a,b),a/b {#dividea-b-a-b} 计算数字的商。结果类型始终是浮点类型。 -它不是整数除法。对于整数除法,请使用’intDiv’函数。 -当除以零时,你得到’inf’,‘- inf’或’nan’。 +它不是整数除法。对于整数除法,请使用'intDiv'函数。 +当除以零时,你得到'inf',‘- inf’或’nan’。 -## intDiv(a, b) {#intdiva-b} +## intDiv(a,b) {#intdiva-b} 计算整数数字的商,向下舍入(按绝对值)。 除以零或将最小负数除以-1时抛出异常。 -## intDivOrZero(a, b) {#intdivorzeroa-b} +## intDivOrZero(a,b) {#intdivorzeroa-b} -与’intDiv’的不同之处在于它在除以零或将最小负数除以-1时返回零。 +与'intDiv'的不同之处在于它在除以零或将最小负数除以-1时返回零。 -## modulo(a, b), a % b {#moduloa-b-a-b} +## 模(a,b),a%b {#moduloa-b-a-b} 计算除法后的余数。 如果参数是浮点数,则通过删除小数部分将它们预转换为整数。 其余部分与C++中的含义相同。截断除法用于负数。 除以零或将最小负数除以-1时抛出异常。 -## negate(a), -a {#negatea-a} +## 否定(a),-a {#negatea-a} 计算一个数字的 用反转符号计算一个数字。结果始终是签名的。 -Calculates a number with the reverse sign. The result is always signed. +计算具有反向符号的数字。 结果始终签名。 ## abs(a) {#arithm_func-abs} 计算数字(a)的绝对值。也就是说,如果a &lt; 0,它返回-a。对于无符号类型,它不执行任何操作。对于有符号整数类型,它返回无符号数。 -## gcd(a, b) {#gcda-b} +## gcd(a,b) {#gcda-b} 返回数字的最大公约数。 除以零或将最小负数除以-1时抛出异常。 -## lcm(a, b) {#lcma-b} +## lcm(a,b) {#lcma-b} 返回数字的最小公倍数。 除以零或将最小负数除以-1时抛出异常。 diff --git a/docs/zh/query_language/functions/array_functions.md b/docs/zh/sql_reference/functions/array_functions.md similarity index 88% rename from docs/zh/query_language/functions/array_functions.md rename to docs/zh/sql_reference/functions/array_functions.md index 7f0d734a7c9..cb8f7347b72 100644 --- a/docs/zh/query_language/functions/array_functions.md +++ b/docs/zh/sql_reference/functions/array_functions.md @@ -1,6 +1,7 @@ + # 数组函数 {#shu-zu-han-shu} -## empty {#empty} +## 空 {#empty} 对于空数组返回1,对于非空数组返回0。 结果类型是UInt8。 @@ -12,21 +13,21 @@ 结果类型是UInt8。 该函数也适用于字符串。 -## length {#array_functions-length} +## 长度 {#array_functions-length} 返回数组中的元素个数。 结果类型是UInt64。 该函数也适用于字符串。 -## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} +## emptyArrayUInt8,emptyArrayUInt16,emptyArrayUInt32,emptyArrayUInt64 {#emptyarrayuint8-emptyarrayuint16-emptyarrayuint32-emptyarrayuint64} -## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 {#emptyarrayint8-emptyarrayint16-emptyarrayint32-emptyarrayint64} +## emptyArrayInt8,emptyArrayInt16,emptyArrayInt32,emptyArrayInt64 {#emptyarrayint8-emptyarrayint16-emptyarrayint32-emptyarrayint64} -## emptyArrayFloat32, emptyArrayFloat64 {#emptyarrayfloat32-emptyarrayfloat64} +## emptyArrayFloat32,emptyArrayFloat64 {#emptyarrayfloat32-emptyarrayfloat64} -## emptyArrayDate, emptyArrayDateTime {#emptyarraydate-emptyarraydatetime} +## 空空漫步,空空漫步时间 {#emptyarraydate-emptyarraydatetime} -## emptyArrayString {#emptyarraystring} +## 空字符串 {#emptyarraystring} 不接受任何参数并返回适当类型的空数组。 @@ -34,7 +35,7 @@ 接受一个空数组并返回一个仅包含一个默认值元素的数组。 -## range(N) {#rangen} +## 范围(N) {#rangen} 返回从0到N-1的数字数组。 以防万一,如果在数据块中创建总长度超过100,000,000个元素的数组,则抛出异常。 @@ -42,8 +43,8 @@ ## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1} 使用函数的参数作为数组元素创建一个数组。 -参数必须是常量,并且具有最小公共类型的类型。必须至少传递一个参数,否则将不清楚要创建哪种类型的数组。也就是说,你不能使用这个函数来创建一个空数组(为此,使用上面描述的’emptyArray  \*’函数)。 -返回’Array(T)’类型的结果,其中’T’是传递的参数中最小的公共类型。 +参数必须是常量,并且具有最小公共类型的类型。必须至少传递一个参数,否则将不清楚要创建哪种类型的数组。也就是说,你不能使用这个函数来创建一个空数组(为此,使用上面描述的'emptyArray  \*'函数)。 +返回'Array(T)'类型的结果,其中'T'是传递的参数中最小的公共类型。 ## arrayConcat {#arrayconcat} @@ -53,7 +54,7 @@ **参数** -- `arrays` – 任意数量的[Array](../../data_types/array.md)类型的参数. +- `arrays` – 任意数量的[阵列](../../sql_reference/functions/array_functions.md)类型的参数. **示例** @@ -66,7 +67,7 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res │ [1,2,3,4,5,6] │ └───────────────┘ -## arrayElement(arr, n), operator arr\[n\] {#arrayelementarr-n-operator-arrn} +## arrayElement(arr,n),运算符arr\[n\] {#arrayelementarr-n-operator-arrn} 从数组`arr`中获取索引为«n»的元素。 `n`必须是任何整数类型。 数组中的索引从一开始。 @@ -74,9 +75,9 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res 如果索引超出数组的边界,则返回默认值(数字为0,字符串为空字符串等)。 -## has(arr, elem) {#hasarr-elem} +## 有(arr,elem) {#hasarr-elem} -检查’arr’数组是否具有’elem’元素。 +检查'arr'数组是否具有'elem'元素。 如果元素不在数组中,则返回0;如果在,则返回1。 `NULL` 值的处理。 @@ -111,17 +112,17 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res **示例** -`SELECT hasAll([], [])` returns 1. +`SELECT hasAll([], [])` 返回1。 -`SELECT hasAll([1, Null], [Null])` returns 1. +`SELECT hasAll([1, Null], [Null])` 返回1。 -`SELECT hasAll([1.0, 2, 3, 4], [1, 3])` returns 1. +`SELECT hasAll([1.0, 2, 3, 4], [1, 3])` 返回1。 -`SELECT hasAll(['a', 'b'], ['a'])` returns 1. +`SELECT hasAll(['a', 'b'], ['a'])` 返回1。 -`SELECT hasAll([1], ['a'])` returns 0. +`SELECT hasAll([1], ['a'])` 返回0。 -`SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [3, 5]])` returns 0. +`SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [3, 5]])` 返回0。 ## hasAny {#hasany} @@ -146,19 +147,19 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res **示例** -`SELECT hasAny([1], [])` returns `0`. +`SELECT hasAny([1], [])` 返回 `0`. -`SELECT hasAny([Null], [Null, 1])` returns `1`. +`SELECT hasAny([Null], [Null, 1])` 返回 `1`. -`SELECT hasAny([-128, 1., 512], [1])` returns `1`. +`SELECT hasAny([-128, 1., 512], [1])` 返回 `1`. -`SELECT hasAny([[1, 2], [3, 4]], ['a', 'c'])` returns `0`. +`SELECT hasAny([[1, 2], [3, 4]], ['a', 'c'])` 返回 `0`. -`SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [1, 2]])` returns `1`. +`SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [1, 2]])` 返回 `1`. -## indexOf(arr, x) {#indexofarr-x} +## indexOf(arr,x) {#indexofarr-x} -返回数组中第一个‘x’元素的索引(从1开始),如果‘x’元素不存在在数组中,则返回0。 +返回数组中第一个'x'元素的索引(从1开始),如果'x'元素不存在在数组中,则返回0。 示例: @@ -172,7 +173,7 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res 设置为«NULL»的元素将作为普通的元素值处理。 -## countEqual(arr, x) {#countequalarr-x} +## countEqual(arr,x) {#countequalarr-x} 返回数组中等于x的元素的个数。相当于arrayCount(elem - \> elem = x,arr)。 @@ -186,7 +187,7 @@ SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res │ 2 │ └──────────────────────────────────────┘ -## arrayEnumerate(arr) {#array_functions-arrayenumerate} +## ツ暗ェツ氾环催ツ団ツ法ツ人) {#array_functions-arrayenumerate} 返回 Array \[1, 2, 3, …, length (arr) \] @@ -324,7 +325,7 @@ SELECT arrayPopFront([1, 2, 3]) AS res **参数** - `array` – 数组。 -- `single_value` – 单个值。只能将数字添加到带数字的数组中,并且只能将字符串添加到字符串数组中。添加数字时,ClickHouse会自动为数组的数据类型设置`single_value`类型。有关ClickHouse中数据类型的更多信息,请参阅«[数据类型](../../data_types/index.md#data_types)»。可以是’NULL`。该函数向数组添加一个«NULL»元素,数组元素的类型转换为`Nullable\`。 +- `single_value` – 单个值。只能将数字添加到带数字的数组中,并且只能将字符串添加到字符串数组中。添加数字时,ClickHouse会自动为数组的数据类型设置`single_value`类型。有关ClickHouse中数据类型的更多信息,请参阅«[数据类型](../../sql_reference/functions/array_functions.md#data_types)»。可以是'NULL`。该函数向数组添加一个«NULL»元素,数组元素的类型转换为`Nullable\`。 **示例** @@ -345,7 +346,7 @@ SELECT arrayPushBack(['a'], 'b') AS res **参数** - `array` – 数组。 -- `single_value` – 单个值。只能将数字添加到带数字的数组中,并且只能将字符串添加到字符串数组中。添加数字时,ClickHouse会自动为数组的数据类型设置`single_value`类型。有关ClickHouse中数据类型的更多信息,请参阅«[数据类型](../../data_types/index.md#data_types)»。可以是’NULL`。该函数向数组添加一个«NULL»元素,数组元素的类型转换为`Nullable\`。 +- `single_value` – 单个值。只能将数字添加到带数字的数组中,并且只能将字符串添加到字符串数组中。添加数字时,ClickHouse会自动为数组的数据类型设置`single_value`类型。有关ClickHouse中数据类型的更多信息,请参阅«[数据类型](../../sql_reference/functions/array_functions.md#data_types)»。可以是'NULL`。该函数向数组添加一个«NULL»元素,数组元素的类型转换为`Nullable\`。 **示例** @@ -369,7 +370,7 @@ SELECT arrayPushFront(['b'], 'a') AS res - `size` — 数组所需的长度。 - 如果`size`小于数组的原始大小,则数组将从右侧截断。 - 如果`size`大于数组的初始大小,则使用`extender`值或数组项的数据类型的默认值将数组扩展到右侧。 -- `extender` — 扩展数组的值。可以是’NULL\`。 +- `extender` — 扩展数组的值。可以是'NULL\`。 **返回值:** @@ -476,7 +477,7 @@ SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; │ ['world', 'hello'] │ └────────────────────┘ -这里,在第二个数组(\[2, 1\])中定义了第一个数组(\[‘hello’,‘world’\])的相应元素的排序键,即\[‘hello’ -\> 2,‘world’ -\> 1\]。 由于lambda函数中没有使用`x`,因此源数组中的实际值不会影响结果的顺序。所以,‘world’将是结果中的第一个元素,‘hello’将是结果中的第二个元素。 +这里,在第二个数组(\[2, 1\])中定义了第一个数组(\[‘hello’,‘world’\])的相应元素的排序键,即\[‘hello’ -\> 2,‘world’ -\> 1\]。 由于lambda函数中没有使用`x`,因此源数组中的实际值不会影响结果的顺序。所以,'world'将是结果中的第一个元素,'hello'将是结果中的第二个元素。 其他示例如下所示。 @@ -501,7 +502,7 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; ``` !!! 注意 "注意" - 为了提高排序效率, 使用了[Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform)。 + 为了提高排序效率, 使用了[施瓦茨变换](https://en.wikipedia.org/wiki/Schwartzian_transform)。 ## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} @@ -555,7 +556,7 @@ SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res; └─────────┘ 数组按以下方式排序: -The array is sorted in the following way: +数组按以下方式排序: 1. 首先,根据lambda函数的调用结果对源数组(\[1, 2, 3\])进行排序。 结果是\[3, 2, 1\]。 2. 反转上一步获得的数组。 所以,最终的结果是\[1, 2, 3\]。 @@ -606,7 +607,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; 如果要获取数组中唯一项的列表,可以使用arrayReduce(‘groupUniqArray’,arr)。 -## arrayJoin(arr) {#array-functions-join} +## arryjoin(arr) {#array-functions-join} 一个特殊的功能。请参见[«ArrayJoin函数»](array_join.md#functions_arrayjoin)部分。 @@ -658,7 +659,7 @@ SELECT arrayReduce(‘agg\_func’,arr1,…) - 将聚合函数`agg_func`应用于数组`arr1 ...`。如果传递了多个数组,则相应位置上的元素将作为多个参数传递给聚合函数。例如:SELECT arrayReduce(‘max’,\[1,2,3\])= 3 -## arrayReverse(arr) {#arrayreversearr} +## ツ暗ェツ氾环催ツ団ツ法ツ人) {#arrayreversearr} 返回与源数组大小相同的数组,包含反转源数组的所有元素的结果。 diff --git a/docs/zh/query_language/functions/array_join.md b/docs/zh/sql_reference/functions/array_join.md similarity index 93% rename from docs/zh/query_language/functions/array_join.md rename to docs/zh/sql_reference/functions/array_join.md index b7a4855efa5..1788b44f3e5 100644 --- a/docs/zh/query_language/functions/array_join.md +++ b/docs/zh/sql_reference/functions/array_join.md @@ -1,10 +1,11 @@ + # arrayJoin函数 {#functions_arrayjoin} 这是一个非常有用的函数。 普通函数不会更改结果集的行数,而只是计算每行中的值(map)。 聚合函数将多行压缩到一行中(fold或reduce)。 -’arrayJoin’函数获取每一行并将他们展开到多行(unfold)。 +'arrayJoin'函数获取每一行并将他们展开到多行(unfold)。 此函数将数组作为参数,并将该行在结果集中复制数组元素个数。 除了应用此函数的列中的值之外,简单地复制列中的所有值;它被替换为相应的数组值。 diff --git a/docs/zh/query_language/functions/bit_functions.md b/docs/zh/sql_reference/functions/bit_functions.md similarity index 58% rename from docs/zh/query_language/functions/bit_functions.md rename to docs/zh/sql_reference/functions/bit_functions.md index a9ded6b0930..1b280c8babd 100644 --- a/docs/zh/query_language/functions/bit_functions.md +++ b/docs/zh/sql_reference/functions/bit_functions.md @@ -1,29 +1,30 @@ + # 位操作函数 {#wei-cao-zuo-han-shu} 位操作函数适用于UInt8,UInt16,UInt32,UInt64,Int8,Int16,Int32,Int64,Float32或Float64中的任何类型。 结果类型是一个整数,其位数等于其参数的最大位。如果至少有一个参数为有符数字,则结果为有符数字。如果参数是浮点数,则将其强制转换为Int64。 -## bitAnd(a, b) {#bitanda-b} +## bitAnd(a,b) {#bitanda-b} -## bitOr(a, b) {#bitora-b} +## bitOr(a,b) {#bitora-b} -## bitXor(a, b) {#bitxora-b} +## bitXor(a,b) {#bitxora-b} ## bitNot(a) {#bitnota} -## bitShiftLeft(a, b) {#bitshiftlefta-b} +## bitShiftLeft(a,b) {#bitshiftlefta-b} -## bitShiftRight(a, b) {#bitshiftrighta-b} +## bitShiftRight(a,b) {#bitshiftrighta-b} -## bitRotateLeft(a, b) {#bitrotatelefta-b} +## bitRotateLeft(a,b) {#bitrotatelefta-b} -## bitRotateRight(a, b) {#bitrotaterighta-b} +## bitRotateRight(a,b) {#bitrotaterighta-b} -## bitTest(a, b) {#bittesta-b} +## bitTest(a,b) {#bittesta-b} -## bitTestAll(a, b) {#bittestalla-b} +## bitTestAll(a,b) {#bittestalla-b} -## bitTestAny(a, b) {#bittestanya-b} +## bitTestAny(a,b) {#bittestanya-b} [来源文章](https://clickhouse.tech/docs/en/query_language/functions/bit_functions/) diff --git a/docs/zh/query_language/functions/bitmap_functions.md b/docs/zh/sql_reference/functions/bitmap_functions.md similarity index 96% rename from docs/zh/query_language/functions/bitmap_functions.md rename to docs/zh/sql_reference/functions/bitmap_functions.md index 498212bc1fe..3415b590644 100644 --- a/docs/zh/query_language/functions/bitmap_functions.md +++ b/docs/zh/sql_reference/functions/bitmap_functions.md @@ -1,3 +1,4 @@ + # 位图函数 {#wei-tu-han-shu} 位图函数用于对两个位图对象进行计算,对于任何一个位图函数,它都将返回一个位图对象,例如and,or,xor,not等等。 @@ -6,7 +7,7 @@ 我们使用RoaringBitmap实际存储位图对象,当基数小于或等于32时,它使用Set保存。当基数大于32时,它使用RoaringBitmap保存。这也是为什么低基数集的存储更快的原因。 -有关RoaringBitmap的更多信息,请参阅:[CRoaring](https://github.com/RoaringBitmap/CRoaring)。 +有关RoaringBitmap的更多信息,请参阅:[呻吟声](https://github.com/RoaringBitmap/CRoaring)。 ## bitmapBuild {#bitmapbuild} @@ -153,7 +154,7 @@ SELECT bitmapHasAll(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res │ 0 │ └─────┘ -## bitmapAnd {#bitmapand} +## 位图和 {#bitmapand} 为两个位图对象进行与操作,返回一个新的位图对象。 @@ -174,13 +175,13 @@ SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS re │ [3] │ └─────┘ -## bitmapOr {#bitmapor} +## 位图 {#bitmapor} 为两个位图对象进行或操作,返回一个新的位图对象。 bitmapOr(bitmap1,bitmap2) -**Parameters** +**参数** - `bitmap1` – 位图对象。 - `bitmap2` – 位图对象。 @@ -243,7 +244,7 @@ SELECT bitmapToArray(bitmapAndnot(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS bitmapCardinality(bitmap) -**Parameters** +**参数** - `bitmap` – 位图对象。 @@ -263,7 +264,7 @@ SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res bitmapMin(bitmap) -**Parameters** +**参数** - `bitmap` – 位图对象。 @@ -283,7 +284,7 @@ SELECT bitmapMin(bitmapBuild([1, 2, 3, 4, 5])) AS res bitmapMax(bitmap) -**Parameters** +**参数** - `bitmap` – 位图对象。 @@ -297,7 +298,7 @@ SELECT bitmapMax(bitmapBuild([1, 2, 3, 4, 5])) AS res │ 5 │ └─────┘ -## bitmapAndCardinality {#bitmapandcardinality} +## 位图和标准性 {#bitmapandcardinality} 为两个位图对象进行与操作,返回结果位图的基数。 @@ -360,7 +361,7 @@ SELECT bitmapXorCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; │ 4 │ └─────┘ -## bitmapAndnotCardinality {#bitmapandnotcardinality} +## 位图和非标准性 {#bitmapandnotcardinality} 计算两个位图的差异,返回结果位图的基数。 diff --git a/docs/zh/query_language/functions/comparison_functions.md b/docs/zh/sql_reference/functions/comparison_functions.md similarity index 69% rename from docs/zh/query_language/functions/comparison_functions.md rename to docs/zh/sql_reference/functions/comparison_functions.md index ce8c3728b5b..a73d983f386 100644 --- a/docs/zh/query_language/functions/comparison_functions.md +++ b/docs/zh/sql_reference/functions/comparison_functions.md @@ -1,13 +1,14 @@ + # 比较函数 {#bi-jiao-han-shu} 比较函数始终返回0或1(UInt8)。 可以比较以下类型: -- Numbers +- 数字 - String 和 FixedString -- Date -- DateTime +- 日期 +- 日期时间 以上每个组内的类型均可互相比较,但是对于不同组的类型间不能够进行比较。 @@ -17,16 +18,16 @@ 注意。直到1.1.54134版本,有符号和无符号数字的比较方式与C++相同。换句话说,在SELECT 9223372036854775807 &gt; -1 等情况下,您可能会得到错误的结果。 此行为在版本1.1.54134中已更改,现在在数学上是正确的。 -## equals, a = b and a == b operator {#equals-a-b-and-a-b-operator} +## 等于,a=b和a==b运算符 {#equals-a-b-and-a-b-operator} -## notEquals, a ! operator= b and a `<>` b {#notequals-a-operator-b-and-a-b} +## notEquals,a! 运算符=b和a `<>` b {#notequals-a-operator-b-and-a-b} -## less, `< operator` {#less-operator} +## 少, `< operator` {#less-operator} -## greater, `> operator` {#greater-operator} +## 更大, `> operator` {#greater-operator} -## lessOrEquals, `<= operator` {#lessorequals-operator} +## 出租等级, `<= operator` {#lessorequals-operator} -## greaterOrEquals, `>= operator` {#greaterorequals-operator} +## 伟大的等级, `>= operator` {#greaterorequals-operator} [来源文章](https://clickhouse.tech/docs/en/query_language/functions/comparison_functions/) diff --git a/docs/zh/query_language/functions/conditional_functions.md b/docs/zh/sql_reference/functions/conditional_functions.md similarity index 91% rename from docs/zh/query_language/functions/conditional_functions.md rename to docs/zh/sql_reference/functions/conditional_functions.md index 175656b8374..d9721fa3f60 100644 --- a/docs/zh/query_language/functions/conditional_functions.md +++ b/docs/zh/sql_reference/functions/conditional_functions.md @@ -1,13 +1,14 @@ + # 条件函数 {#tiao-jian-han-shu} -## if(cond, then, else), cond ? operator then : else {#ifcond-then-else-cond-operator-then-else} +## 如果(cond,那么,否则),cond? 运算符然后:else {#ifcond-then-else-cond-operator-then-else} 如果`cond != 0`则返回`then`,如果`cond = 0`则返回`else`。 `cond`必须是`UInt8`类型,`then`和`else`必须存在最低的共同类型。 `then`和`else`可以是`NULL` -## multiIf {#multiif} +## 多 {#multiif} 允许您在查询中更紧凑地编写[CASE](../operators.md#operator_case)运算符。 diff --git a/docs/zh/query_language/functions/date_time_functions.md b/docs/zh/sql_reference/functions/date_time_functions.md similarity index 66% rename from docs/zh/query_language/functions/date_time_functions.md rename to docs/zh/sql_reference/functions/date_time_functions.md index fe9961d7658..ca974f563db 100644 --- a/docs/zh/query_language/functions/date_time_functions.md +++ b/docs/zh/sql_reference/functions/date_time_functions.md @@ -1,3 +1,4 @@ + # 时间日期函数 {#shi-jian-ri-qi-han-shu} 支持时区。 @@ -22,11 +23,11 @@ SELECT 将Date或DateTime转换为指定的时区。 -## toYear {#toyear} +## 玩一年 {#toyear} 将Date或DateTime转换为包含年份编号(AD)的UInt16类型的数字。 -## toQuarter {#toquarter} +## 到四分钟 {#toquarter} 将Date或DateTime转换为包含季度编号的UInt8类型的数字。 @@ -34,15 +35,15 @@ SELECT 将Date或DateTime转换为包含月份编号(1-12)的UInt8类型的数字。 -## toDayOfYear {#todayofyear} +## 今天一年 {#todayofyear} 将Date或DateTime转换为包含一年中的某一天的编号的UInt16(1-366)类型的数字。 -## toDayOfMonth {#todayofmonth} +## 今天月 {#todayofmonth} 将Date或DateTime转换为包含一月中的某一天的编号的UInt8(1-31)类型的数字。 -## toDayOfWeek {#todayofweek} +## 今天一周 {#todayofweek} 将Date或DateTime转换为包含一周中的某一天的编号的UInt8(周一是1, 周日是7)类型的数字。 @@ -55,7 +56,7 @@ SELECT 将DateTime转换为包含一小时中分钟数(0-59)的UInt8数字。 -## toSecond {#tosecond} +## 秒 {#tosecond} 将DateTime转换为包含一分钟中秒数(0-59)的UInt8数字。 闰秒不计算在内。 @@ -64,22 +65,22 @@ SELECT 将DateTime转换为unix时间戳。 -## toStartOfYear {#tostartofyear} +## 开始一年 {#tostartofyear} 将Date或DateTime向前取整到本年的第一天。 返回Date类型。 -## toStartOfISOYear {#tostartofisoyear} +## 今年开始 {#tostartofisoyear} 将Date或DateTime向前取整到ISO本年的第一天。 返回Date类型。 -## toStartOfQuarter {#tostartofquarter} +## 四分之一开始 {#tostartofquarter} 将Date或DateTime向前取整到本季度的第一天。 返回Date类型。 -## toStartOfMonth {#tostartofmonth} +## 到月份开始 {#tostartofmonth} 将Date或DateTime向前取整到本月的第一天。 返回Date类型。 @@ -92,31 +93,31 @@ SELECT 将Date或DateTime向前取整到本周的星期一。 返回Date类型。 -## toStartOfDay {#tostartofday} +## 今天开始 {#tostartofday} 将DateTime向前取整到当日的开始。 -## toStartOfHour {#tostartofhour} +## 开始一小时 {#tostartofhour} 将DateTime向前取整到当前小时的开始。 -## toStartOfMinute {#tostartofminute} +## to startofminute {#tostartofminute} 将DateTime向前取整到当前分钟的开始。 -## toStartOfFiveMinute {#tostartoffiveminute} +## to startoffiveminute {#tostartoffiveminute} 将DateTime以五分钟为单位向前取整到最接近的时间点。 -## toStartOfTenMinutes {#tostartoftenminutes} +## 开始分钟 {#tostartoftenminutes} 将DateTime以十分钟为单位向前取整到最接近的时间点。 -## toStartOfFifteenMinutes {#tostartoffifteenminutes} +## 开始几分钟 {#tostartoffifteenminutes} 将DateTime以十五分钟为单位向前取整到最接近的时间点。 -## toStartOfInterval(time\_or\_data, INTERVAL x unit \[, time\_zone\]) {#tostartofintervaltime-or-data-interval-x-unit-time-zone} +## toStartOfInterval(time\_or\_data,间隔x单位\[,time\_zone\]) {#tostartofintervaltime-or-data-interval-x-unit-time-zone} 这是名为`toStartOf*`的所有函数的通用函数。例如, `toStartOfInterval(t,INTERVAL 1 year)`返回与`toStartOfYear(t)`相同的结果, @@ -168,31 +169,31 @@ SELECT 将Date或DateTime转换为包含ISO周数的UInt8类型的编号。 -## now {#now} +## 现在 {#now} 不接受任何参数并在请求执行时的某一刻返回当前时间(DateTime)。 此函数返回一个常量,即时请求需要很长时间能够完成。 -## today {#today} +## 今天 {#today} 不接受任何参数并在请求执行时的某一刻返回当前日期(Date)。 -其功能与’toDate(now())’相同。 +其功能与'toDate(now())'相同。 -## yesterday {#yesterday} +## 昨天 {#yesterday} 不接受任何参数并在请求执行时的某一刻返回昨天的日期(Date)。 -其功能与’today() - 1’相同。 +其功能与'today() - 1'相同。 -## timeSlot {#timeslot} +## 时隙 {#timeslot} 将时间向前取整半小时。 此功能用于Yandex.Metrica,因为如果跟踪标记显示单个用户的连续综合浏览量在时间上严格超过此数量,则半小时是将会话分成两个会话的最短时间。这意味着(tag id,user id,time slot)可用于搜索相应会话中包含的综合浏览量。 -## toYYYYMM {#toyyyymm} +## toyyymm {#toyyyymm} 将Date或DateTime转换为包含年份和月份编号的UInt32类型的数字(YYYY \* 100 + MM)。 -## toYYYYMMDD {#toyyyymmdd} +## toyyymmdd {#toyyyymmdd} 将Date或DateTime转换为包含年份和月份编号的UInt32类型的数字(YYYY \* 10000 + MM \* 100 + DD)。 @@ -200,7 +201,7 @@ SELECT 将Date或DateTime转换为包含年份和月份编号的UInt64类型的数字(YYYY \* 10000000000 + MM \* 100000000 + DD \* 1000000 + hh \* 10000 + mm \* 100 + ss)。 -## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters {#addyears-addmonths-addweeks-adddays-addhours-addminutes-addseconds-addquarters} +## 隆隆隆隆路虏脢,,陇,貌,垄拢卢虏禄quar陇,貌路,隆拢脳枚脢虏,麓脢,脱,,,录,禄庐戮,utes, {#addyears-addmonths-addweeks-adddays-addhours-addminutes-addseconds-addquarters} 函数将一段时间间隔添加到Date/DateTime,然后返回Date/DateTime。例如: @@ -217,7 +218,7 @@ SELECT │ 2019-01-01 │ 2019-01-01 00:00:00 │ └─────────────────────┴──────────────────────────┘ -## subtractYears, subtractMonths, subtractWeeks, subtractDays, subtractHours, subtractMinutes, subtractSeconds, subtractQuarters {#subtractyears-subtractmonths-subtractweeks-subtractdays-subtracthours-subtractminutes-subtractseconds-subtractquarters} +## subtractYears,subtractMonths,subtractWeeks,subtractDays,subtractours,subtractMinutes,subtractSeconds,subtractQuarters {#subtractyears-subtractmonths-subtractweeks-subtractdays-subtracthours-subtractminutes-subtractseconds-subtractquarters} 函数将Date/DateTime减去一段时间间隔,然后返回Date/DateTime。例如: @@ -234,59 +235,59 @@ SELECT │ 2018-01-01 │ 2018-01-01 00:00:00 │ └──────────────────────────┴───────────────────────────────┘ -## dateDiff(‘unit’, t1, t2, \[timezone\]) {#datediffunit-t1-t2-timezone} +## dateDiff(‘unit’,t1,t2,\[时区\]) {#datediffunit-t1-t2-timezone} -返回以’unit’为单位表示的两个时间之间的差异,例如`'hours'`。 ‘t1’和’t2’可以是Date或DateTime,如果指定’timezone’,它将应用于两个参数。如果不是,则使用来自数据类型’t1’和’t2’的时区。如果时区不相同,则结果将是未定义的。 +返回以'unit'为单位表示的两个时间之间的差异,例如`'hours'`。 ‘t1’和’t2’可以是Date或DateTime,如果指定’timezone’,它将应用于两个参数。如果不是,则使用来自数据类型't1'和't2'的时区。如果时区不相同,则结果将是未定义的。 -Supported unit values: +支持的单位值: -| unit | -|---------| -| second | -| minute | -| hour | -| day | -| week | -| month | -| quarter | -| year | +| 单位 | +|------| +| 第二 | +| 分钟 | +| 小时 | +| 日 | +| 周 | +| 月 | +| 季 | +| 年 | -## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size} +## 时隙(开始时间,持续时间,\[,大小\]) {#timeslotsstarttime-duration-size} 它返回一个时间数组,其中包括从从«StartTime»开始到«StartTime + Duration 秒»内的所有符合«size»(以秒为单位)步长的时间点。其中«size»是一个可选参数,默认为1800。 例如,`timeSlots(toDateTime('2012-01-01 12:20:00'),600) = [toDateTime('2012-01-01 12:00:00'),toDateTime('2012-01-01 12:30:00' )]`。 这对于搜索在相应会话中综合浏览量是非常有用的。 -## formatDateTime(Time, Format\[, Timezone\]) {#formatdatetimetime-format-timezone} +## formatDateTime(时间,格式\[,时区\]) {#formatdatetimetime-format-timezone} 函数根据给定的格式字符串来格式化时间。请注意:格式字符串必须是常量表达式,例如:单个结果列不能有多种格式字符串。 支持的格式修饰符: («Example» 列是对`2018-01-02 22:33:44`的格式化结果) -| Modifier | Description | Example | -|----------|---------------------------------------------------------|------------| -| %C | year divided by 100 and truncated to integer (00-99) | 20 | -| %d | day of the month, zero-padded (01-31) | 02 | -| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/2018 | -| %e | day of the month, space-padded ( 1-31) | 2 | -| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 | -| %H | hour in 24h format (00-23) | 22 | -| %I | hour in 12h format (01-12) | 10 | -| %j | day of the year (001-366) | 002 | -| %m | month as a decimal number (01-12) | 01 | -| %M | minute (00-59) | 33 | -| %n | new-line character (‘’) | | -| %p | AM or PM designation | PM | -| %R | 24-hour HH:MM time, equivalent to %H:%M | 22:33 | -| %S | second (00-59) | 44 | -| %t | horizontal-tab character (’) | | -| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 | -| %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 | -| %V | ISO 8601 week number (01-53) | 01 | -| %w | weekday as a decimal number with Sunday as 0 (0-6) | 2 | -| %y | Year, last two digits (00-99) | 18 | -| %Y | Year | 2018 | -| %% | a % sign | % | +| 修饰符 | 产品描述 | 示例 | +|--------|-------------------------------------------|------------| +| %C | 年除以100并截断为整数(00-99) | 20 | +| %d | 月中的一天,零填充(01-31) | 02 | +| %D | 短MM/DD/YY日期,相当于%m/%d/%y | 01/02/2018 | +| %e | 月中的一天,空格填充(1-31) | 2 | +| %F | 短YYYY-MM-DD日期,相当于%Y-%m-%d | 2018-01-02 | +| %H | 24小时格式(00-23) | 22 | +| %I | 小时12h格式(01-12) | 10 | +| %j | 一年(001-366) | 002 | +| %m | 月份为十进制数(01-12) | 01 | +| %M | 分钟(00-59) | 33 | +| %n | 换行符(") | | +| %p | AM或PM指定 | PM | +| %R | 24小时HH:MM时间,相当于%H:%M | 22:33 | +| %S | 第二(00-59) | 44 | +| %t | 水平制表符(') | | +| %T | ISO8601时间格式(HH:MM:SS),相当于%H:%M:%S | 22:33:44 | +| %u | ISO8601平日as编号,星期一为1(1-7) | 2 | +| %V | ISO8601周编号(01-53) | 01 | +| %w | 周日为十进制数,周日为0(0-6) | 2 | +| %y | 年份,最后两位数字(00-99) | 18 | +| %Y | 年 | 2018 | +| %% | %符号 | % | [来源文章](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) diff --git a/docs/zh/query_language/functions/encoding_functions.md b/docs/zh/sql_reference/functions/encoding_functions.md similarity index 95% rename from docs/zh/query_language/functions/encoding_functions.md rename to docs/zh/sql_reference/functions/encoding_functions.md index 589edd75450..42d10c4408f 100644 --- a/docs/zh/query_language/functions/encoding_functions.md +++ b/docs/zh/sql_reference/functions/encoding_functions.md @@ -1,3 +1,4 @@ + # 编码函数 {#bian-ma-han-shu} ## hex {#hex} @@ -17,11 +18,11 @@ 接受FixedString(16)值。返回包含36个字符的文本格式的字符串。 -## bitmaskToList(num) {#bitmasktolistnum} +## 位掩码列表(num) {#bitmasktolistnum} 接受一个整数。返回一个字符串,其中包含一组2的幂列表,其列表中的所有值相加等于这个整数。列表使用逗号分割,按升序排列。 -## bitmaskToArray(num) {#bitmasktoarraynum} +## 位掩码阵列(num) {#bitmasktoarraynum} 接受一个整数。返回一个UInt64类型数组,其中包含一组2的幂列表,其列表中的所有值相加等于这个整数。数组中的数字按升序排列。 diff --git a/docs/zh/sql_reference/functions/ext_dict_functions.md b/docs/zh/sql_reference/functions/ext_dict_functions.md new file mode 100644 index 00000000000..23077618722 --- /dev/null +++ b/docs/zh/sql_reference/functions/ext_dict_functions.md @@ -0,0 +1,47 @@ + +# 字典函数 {#zi-dian-han-shu} + +有关连接和配置外部词典的信息,请参阅[外部词典](../../sql_reference/functions/ext_dict_functions.md)。 + +## dictGetUInt8,dictGetUInt16,dictGetUInt32,dictGetUInt64 {#dictgetuint8-dictgetuint16-dictgetuint32-dictgetuint64} + +## dictGetInt8,dictGetInt16,dictGetInt32,dictGetInt64 {#dictgetint8-dictgetint16-dictgetint32-dictgetint64} + +## dictGetFloat32,dictGetFloat64 {#dictgetfloat32-dictgetfloat64} + +## dictGetDate,dictGetDateTime {#dictgetdate-dictgetdatetime} + +## dictgetuid {#dictgetuuid} + +## dictGetString {#dictgetstring} + +`dictGetT('dict_name', 'attr_name', id)` + +- 使用'id'键获取dict\_name字典中attr\_name属性的值。`dict_name`和`attr_name`是常量字符串。`id`必须是UInt64。 + 如果字典中没有`id`键,则返回字典描述中指定的默认值。 + +## dictGetTOrDefault {#ext_dict_functions-dictgettordefault} + +`dictGetTOrDefault('dict_name', 'attr_name', id, default)` + +与`dictGetT`函数相同,但默认值取自函数的最后一个参数。 + +## dictIsIn {#dictisin} + +`dictIsIn ('dict_name', child_id, ancestor_id)` + +- 对于'dict\_name'分层字典,查找'child\_id'键是否位于'ancestor\_id'内(或匹配'ancestor\_id')。返回UInt8。 + +## 独裁主义 {#dictgethierarchy} + +`dictGetHierarchy('dict_name', id)` + +- 对于'dict\_name'分层字典,返回从'id'开始并沿父元素链继续的字典键数组。返回Array(UInt64) + +## dictHas {#dicthas} + +`dictHas('dict_name', id)` + +- 检查字典是否存在指定的`id`。如果不存在,则返回0;如果存在,则返回1。 + +[来源文章](https://clickhouse.tech/docs/en/query_language/functions/ext_dict_functions/) diff --git a/docs/zh/query_language/functions/functions_for_nulls.md b/docs/zh/sql_reference/functions/functions_for_nulls.md similarity index 93% rename from docs/zh/query_language/functions/functions_for_nulls.md rename to docs/zh/sql_reference/functions/functions_for_nulls.md index d6db2906e92..9252d8bfeb0 100644 --- a/docs/zh/query_language/functions/functions_for_nulls.md +++ b/docs/zh/sql_reference/functions/functions_for_nulls.md @@ -1,3 +1,4 @@ + # Nullable处理函数 {#nullablechu-li-han-shu} ## isNull {#isnull} @@ -76,7 +77,7 @@ 1 rows in set. Elapsed: 0.010 sec. -## coalesce {#coalesce} +## 合并 {#coalesce} 检查从左到右是否传递了«NULL»参数并返回第一个非`'NULL`参数。 @@ -88,8 +89,8 @@ **返回值** -- 第一个非’NULL\`参数。 -- `NULL`,如果所有参数都是’NULL\`。 +- 第一个非'NULL\`参数。 +- `NULL`,如果所有参数都是'NULL\`。 **示例** @@ -125,12 +126,12 @@ **参数:** - `x` — 要检查«NULL»的值。 -- `alt` — 如果`x`为’NULL\`,函数返回的值。 +- `alt` — 如果`x`为'NULL\`,函数返回的值。 **返回值** -- The value `x`, if `x` is not `NULL`. -- The value `alt`, if `x` is `NULL`. +- 价值 `x`,如果 `x` 不是 `NULL`. +- 价值 `alt`,如果 `x` 是 `NULL`. **示例** @@ -177,7 +178,7 @@ ## assumeNotNull {#assumenotnull} -将[Nullable](../../data_types/nullable.md)类型的值转换为非`Nullable`类型的值。 +将[可为空](../../sql_reference/functions/functions_for_nulls.md)类型的值转换为非`Nullable`类型的值。 assumeNotNull(x) @@ -221,7 +222,7 @@ │ Int8 │ └──────────────────────────────┘ -## toNullable {#tonullable} +## 可调整 {#tonullable} 将参数的类型转换为`Nullable`。 diff --git a/docs/zh/query_language/functions/geo.md b/docs/zh/sql_reference/functions/geo.md similarity index 89% rename from docs/zh/query_language/functions/geo.md rename to docs/zh/sql_reference/functions/geo.md index 3e6e6aa6b64..3f6e6a3bb10 100644 --- a/docs/zh/query_language/functions/geo.md +++ b/docs/zh/sql_reference/functions/geo.md @@ -1,6 +1,7 @@ + # GEO函数 {#geohan-shu} -## greatCircleDistance {#greatcircledistance} +## 大圆形距离 {#greatcircledistance} 使用[great-circle distance公式](https://en.wikipedia.org/wiki/Great-circle_distance)计算地球表面两点之间的距离。 @@ -35,7 +36,7 @@ SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673) └───────────────────────────────────────────────────────────────────┘ ``` -## pointInEllipses {#pointinellipses} +## 尖尖的人 {#pointinellipses} 检查指定的点是否至少包含在指定的一个椭圆中。 下述中的坐标是几何图形在笛卡尔坐标系中的位置。 @@ -78,8 +79,8 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...) **输入参数** -- `(x, y)` — 平面上某个点的坐标。[Tuple](../../data_types/tuple.md)类型,包含坐标的两个数字。 -- `[(a, b), (c, d) ...]` — 多边形的顶点。[Array](../../data_types/array.md)类型。每个顶点由一对坐标`(a, b)`表示。顶点可以按顺时针或逆时针指定。顶点的个数应该大于等于3。同时只能是常量的。 +- `(x, y)` — 平面上某个点的坐标。[元组](../../sql_reference/functions/geo.md)类型,包含坐标的两个数字。 +- `[(a, b), (c, d) ...]` — 多边形的顶点。[阵列](../../sql_reference/functions/geo.md)类型。每个顶点由一对坐标`(a, b)`表示。顶点可以按顺时针或逆时针指定。顶点的个数应该大于等于3。同时只能是常量的。 - 该函数还支持镂空的多边形(切除部分)。如果需要,可以使用函数的其他参数定义需要切除部分的多边形。(The function does not support non-simply-connected polygons.) **返回值** @@ -163,16 +164,16 @@ geoToH3(lon, lat, resolution) **输入值** -- `lon` — 经度。 [Float64](../../data_types/float.md)类型。 -- `lat` — 纬度。 [Float64](../../data_types/float.md)类型。 -- `resolution` — 索引的分辨率。 取值范围为: `[0, 15]`。 [UInt8](../../data_types/int_uint.md)类型。 +- `lon` — 经度。 [Float64](../../sql_reference/functions/geo.md)类型。 +- `lat` — 纬度。 [Float64](../../sql_reference/functions/geo.md)类型。 +- `resolution` — 索引的分辨率。 取值范围为: `[0, 15]`。 [UInt8](../../sql_reference/functions/geo.md)类型。 **返回值** - H3中六边形的索引值。 - 发生异常时返回0。 -[UInt64](../../data_types/int_uint.md)类型。 +[UInt64](../../sql_reference/functions/geo.md)类型。 **示例** diff --git a/docs/zh/query_language/functions/hash_functions.md b/docs/zh/sql_reference/functions/hash_functions.md similarity index 92% rename from docs/zh/query_language/functions/hash_functions.md rename to docs/zh/sql_reference/functions/hash_functions.md index 835da4a9204..9dc4aa9b794 100644 --- a/docs/zh/query_language/functions/hash_functions.md +++ b/docs/zh/sql_reference/functions/hash_functions.md @@ -1,3 +1,4 @@ + # Hash函数 {#hashhan-shu} Hash函数可以用于将元素不可逆的伪随机打乱。 @@ -6,12 +7,12 @@ Hash函数可以用于将元素不可逆的伪随机打乱。 计算字符串的MD5。然后获取结果的前8个字节并将它们作为UInt64(大端)返回。 此函数相当低效(500万个短字符串/秒/核心)。 -如果您不需要一定使用MD5,请使用‘sipHash64’函数。 +如果您不需要一定使用MD5,请使用'sipHash64'函数。 ## MD5 {#md5} 计算字符串的MD5并将结果放入FixedString(16)中返回。 -如果您只是需要一个128位的hash,同时不需要一定使用MD5,请使用‘sipHash128’函数。 +如果您只是需要一个128位的hash,同时不需要一定使用MD5,请使用'sipHash128'函数。 如果您要获得与md5sum程序相同的输出结果,请使用lower(hex(MD5(s)))。 ## sipHash64 {#siphash64} @@ -56,7 +57,7 @@ SipHash是一种加密哈希函数。它的处理性能至少比MD5快三倍。 我们建议仅在必须使用这些Hash函数且无法更改的情况下使用这些函数。 即使在这些情况下,我们仍建议将函数采用在写入数据时使用预计算的方式将其计算完毕。而不是在SELECT中计算它们。 -## URLHash(url\[, N\]) {#urlhashurl-n} +## URLHash(url\[,N\]) {#urlhashurl-n} 一种快速的非加密哈希函数,用于规范化的从URL获得的字符串。 `URLHash(s)` - 从一个字符串计算一个哈希,如果结尾存在尾随符号`/`,`?`或`#`则忽略。 @@ -93,19 +94,19 @@ URL的层级与URLHierarchy中的层级相同。 此函数被用于Yandex.Metric 接受UInt64类型的参数。返回Int32。 有关更多信息,请参见链接:[JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf) -## murmurHash2\_32, murmurHash2\_64 {#murmurhash2-32-murmurhash2-64} +## murmurHash2\_32,murmurHash2\_64 {#murmurhash2-32-murmurhash2-64} 计算字符串的MurmurHash2。 接受一个String类型的参数。返回UInt64或UInt32。 有关更多信息,请参阅链接:[MurmurHash2](https://github.com/aappleby/smhasher) -## murmurHash3\_32, murmurHash3\_64, murmurHash3\_128 {#murmurhash3-32-murmurhash3-64-murmurhash3-128} +## murmurHash3\_32,murmurHash3\_64,murmurHash3\_128 {#murmurhash3-32-murmurhash3-64-murmurhash3-128} 计算字符串的MurmurHash3。 接受一个String类型的参数。返回UInt64或UInt32或FixedString(16)。 有关更多信息,请参阅链接:[MurmurHash3](https://github.com/aappleby/smhasher) -## xxHash32, xxHash64 {#xxhash32-xxhash64} +## xxHash32,xxHash64 {#xxhash32-xxhash64} 计算字符串的xxHash。 接受一个String类型的参数。返回UInt64或UInt32。 diff --git a/docs/zh/query_language/functions/higher_order_functions.md b/docs/zh/sql_reference/functions/higher_order_functions.md similarity index 76% rename from docs/zh/query_language/functions/higher_order_functions.md rename to docs/zh/sql_reference/functions/higher_order_functions.md index 9e84a73f0b8..6d090e7330d 100644 --- a/docs/zh/query_language/functions/higher_order_functions.md +++ b/docs/zh/sql_reference/functions/higher_order_functions.md @@ -1,3 +1,4 @@ + # 高阶函数 {#gao-jie-han-shu} ## `->` 运算符, lambda(params, expr) 函数 {#yun-suan-fu-lambdaparams-expr-han-shu} @@ -10,17 +11,17 @@ 高阶函数可以接受多个参数的lambda函数作为其参数,在这种情况下,高阶函数需要同时传递几个长度相等的数组,这些数组将被传递给lambda参数。 -除了’arrayMap’和’arrayFilter’以外的所有其他函数,都可以省略第一个参数(lambda函数)。在这种情况下,默认返回数组元素本身。 +除了'arrayMap'和'arrayFilter'以外的所有其他函数,都可以省略第一个参数(lambda函数)。在这种情况下,默认返回数组元素本身。 ### arrayMap(func, arr1, …) {#higher_order_functions-array-map} 将arr -将从’func’函数的原始应用程序获得的数组返回到’arr’数组中的每个元素。 -Returns an array obtained from the original application of the ‘func’ function to each element in the ‘arr’ array. +将从'func'函数的原始应用程序获得的数组返回到'arr'数组中的每个元素。 +返回从原始应用程序获得的数组 ‘func’ 函数中的每个元素 ‘arr’ 阵列。 ### arrayFilter(func, arr1, …) {#arrayfilterfunc-arr1} -Returns an array containing only the elements in ‘arr1’ for which ‘func’ returns something other than 0. +返回一个仅包含以下元素的数组 ‘arr1’ 对于哪个 ‘func’ 返回0以外的内容。 示例: @@ -47,27 +48,27 @@ SELECT ### arrayCount(\[func,\] arr1, …) {#arraycountfunc-arr1} -返回数组arr中非零元素的数量,如果指定了‘func’,则通过‘func’的返回值确定元素是否为非零元素。 +返回数组arr中非零元素的数量,如果指定了'func',则通过'func'的返回值确定元素是否为非零元素。 ### arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} -返回数组‘arr’中是否存在非零元素,如果指定了‘func’,则使用‘func’的返回值确定元素是否为非零元素。 +返回数组'arr'中是否存在非零元素,如果指定了'func',则使用'func'的返回值确定元素是否为非零元素。 ### arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} -返回数组‘arr’中是否存在为零的元素,如果指定了‘func’,则使用‘func’的返回值确定元素是否为零元素。 +返回数组'arr'中是否存在为零的元素,如果指定了'func',则使用'func'的返回值确定元素是否为零元素。 ### arraySum(\[func,\] arr1, …) {#arraysumfunc-arr1} -计算arr数组的总和,如果指定了‘func’,则通过‘func’的返回值计算数组的总和。 +计算arr数组的总和,如果指定了'func',则通过'func'的返回值计算数组的总和。 ### arrayFirst(func, arr1, …) {#arrayfirstfunc-arr1} -返回数组中第一个匹配的元素,函数使用‘func’匹配所有元素,直到找到第一个匹配的元素。 +返回数组中第一个匹配的元素,函数使用'func'匹配所有元素,直到找到第一个匹配的元素。 ### arrayFirstIndex(func, arr1, …) {#arrayfirstindexfunc-arr1} -返回数组中第一个匹配的元素的下标索引,函数使用‘func’匹配所有元素,直到找到第一个匹配的元素。 +返回数组中第一个匹配的元素的下标索引,函数使用'func'匹配所有元素,直到找到第一个匹配的元素。 ### arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} diff --git a/docs/zh/query_language/functions/in_functions.md b/docs/zh/sql_reference/functions/in_functions.md similarity index 65% rename from docs/zh/query_language/functions/in_functions.md rename to docs/zh/sql_reference/functions/in_functions.md index 60df3e25db1..f4f358bad9a 100644 --- a/docs/zh/query_language/functions/in_functions.md +++ b/docs/zh/sql_reference/functions/in_functions.md @@ -1,8 +1,9 @@ + # IN运算符相关函数 {#inyun-suan-fu-xiang-guan-han-shu} -## in, notIn, globalIn, globalNotIn {#in-notin-globalin-globalnotin} +## in,notIn,globalIn,globalNotIn {#in-notin-globalin-globalnotin} -请参阅[IN 运算符](../select.md#select-in-operators)部分。 +请参阅[IN 运算符](../statements/select.md#select-in-operators)部分。 ## tuple(x, y, …), operator (x, y, …) {#tuplex-y-operator-x-y} @@ -10,10 +11,10 @@ 对于具有类型T1,T2,…的列,它返回包含这些列的元组(T1,T2,…)。 执行该函数没有任何成本。 元组通常用作IN运算符的中间参数值,或用于创建lambda函数的形参列表。 元组不能写入表。 -## tupleElement(tuple, n), operator x.N {#tupleelementtuple-n-operator-x-n} +## 元组元素(元组,n),运算符x.N {#tupleelementtuple-n-operator-x-n} 函数用于从元组中获取列。 -’N’是列索引,从1开始。N必须是常量正整数常数,并且不大于元组的大小。 +'N'是列索引,从1开始。N必须是常量正整数常数,并且不大于元组的大小。 执行该函数没有任何成本。 [来源文章](https://clickhouse.tech/docs/en/query_language/functions/in_functions/) diff --git a/docs/zh/query_language/functions/index.md b/docs/zh/sql_reference/functions/index.md similarity index 91% rename from docs/zh/query_language/functions/index.md rename to docs/zh/sql_reference/functions/index.md index 8d178592e92..1f61a1f2919 100644 --- a/docs/zh/query_language/functions/index.md +++ b/docs/zh/sql_reference/functions/index.md @@ -1,10 +1,11 @@ + # 函数 {#han-shu} ClickHouse中至少存在两种类型的函数 - 常规函数(它们称之为«函数»)和聚合函数。 常规函数的工作就像分别为每一行执行一次函数计算一样(对于每一行,函数的结果不依赖于其他行)。 聚合函数则从各行累积一组值(即函数的结果以来整个结果集)。 在本节中,我们将讨论常规函数。 有关聚合函数,请参阅«聚合函数»一节。 - \* - ’arrayJoin’函数与表函数均属于第三种类型的函数。 \* + \* - 'arrayJoin'函数与表函数均属于第三种类型的函数。 \* ## 强类型 {#qiang-lei-xing} @@ -22,7 +23,7 @@ ClickHouse中至少存在两种类型的函数 - 常规函数(它们称之为 为了简单起见,某些函数的某些参数只能是常量。 例如,LIKE运算符的右参数必须是常量。 几乎所有函数都为常量参数返回常量。 除了用于生成随机数的函数。 -’now’函数为在不同时间运行的查询返回不同的值,但结果被视为常量,因为常量在单个查询中很重要。 +'now'函数为在不同时间运行的查询返回不同的值,但结果被视为常量,因为常量在单个查询中很重要。 常量表达式也被视为常量(例如,LIKE运算符的右半部分可以由多个常量构造)。 对于常量和非常量参数,可以以不同方式实现函数(执行不同的代码)。 但是,对于包含相同数据的常量和非常量参数它们的结果应该是一致的。 @@ -54,8 +55,8 @@ ClickHouse中至少存在两种类型的函数 - 常规函数(它们称之为 这意味着可以在不同的服务器上执行功能。 例如,在查询`SELECT f(sum(g(x)))FROM distributed_table GROUP BY h(y)中,` -- 如果`distributed_table`至少有两个分片,则在远程服务器上执行函数’g’和’h’,并在请求服务器上执行函数’f’。 -- 如果`distributed_table`只有一个分片,则在该分片的服务器上执行所有’f’,’g’和’h’功能。 +- 如果`distributed_table`至少有两个分片,则在远程服务器上执行函数'g'和'h',并在请求服务器上执行函数'f'。 +- 如果`distributed_table`只有一个分片,则在该分片的服务器上执行所有'f','g'和'h'功能。 函数的结果通常不依赖于它在哪个服务器上执行。但是,有时这很重要。 例如,使用字典的函数时将使用运行它们的服务器上存在的字典。 diff --git a/docs/zh/query_language/functions/introspection.md b/docs/zh/sql_reference/functions/introspection.md similarity index 63% rename from docs/zh/query_language/functions/introspection.md rename to docs/zh/sql_reference/functions/introspection.md index bb1d884d15b..f0c907b3e67 100644 --- a/docs/zh/query_language/functions/introspection.md +++ b/docs/zh/sql_reference/functions/introspection.md @@ -1,61 +1,64 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 65 +toc_title: "\u81EA\u7701" --- -# Introspection Functions {#introspection-functions} +# 内省功能 {#introspection-functions} -You can use functions described in this chapter to introspect [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) and [DWARF](https://en.wikipedia.org/wiki/DWARF) for query profiling. +您可以使用本章中描述的函数来反省 [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) 和 [DWARF](https://en.wikipedia.org/wiki/DWARF) 用于查询分析。 -!!! warning "Warning" - These functions are slow and may impose security considerations. +!!! warning "警告" + 这些功能很慢,可能会强加安全考虑。 -For proper operation of introspection functions: +对于内省功能的正确操作: -- Install the `clickhouse-common-static-dbg` package. +- 安装 `clickhouse-common-static-dbg` 包。 -- Set the [allow\_introspection\_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1. +- 设置 [allow\_introspection\_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) 设置为1。 For security reasons introspection functions are disabled by default. -ClickHouse saves profiler reports to the [trace\_log](../../operations/system_tables.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly. +ClickHouse将探查器报告保存到 [trace\_log](../../operations/system_tables.md#system_tables-trace_log) 系统表. 确保正确配置了表和探查器。 ## addressToLine {#addresstoline} -Converts virtual memory address inside ClickHouse server process to the filename and the line number in ClickHouse source code. +将ClickHouse服务器进程内的虚拟内存地址转换为ClickHouse源代码中的文件名和行号。 -If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. +如果您使用官方的ClickHouse软件包,您需要安装 `clickhouse-common-static-dbg` 包。 -**Syntax** +**语法** ``` sql addressToLine(address_of_binary_instruction) ``` -**Parameters** +**参数** -- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../../sql_reference/data_types/int_uint.md)) — Address of instruction in a running process. -**Returned value** +**返回值** -- Source code filename and the line number in this file delimited by colon. +- 源代码文件名和此文件中用冒号分隔的行号。 - For example, `/build/obj-x86_64-linux-gnu/../dbms/Common/ThreadPool.cpp:199`, where `199` is a line number. + For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. -- Name of a binary, if the function couldn’t find the debug information. +- 二进制文件的名称,如果函数找不到调试信息。 -- Empty string, if the address is not valid. +- 空字符串,如果地址无效。 -Type: [String](../../data_types/string.md). +类型: [字符串](../../sql_reference/data_types/string.md). -**Example** +**示例** -Enabling introspection functions: +启用内省功能: ``` sql SET allow_introspection_functions=1 ``` -Selecting the first string from the `trace_log` system table: +从中选择第一个字符串 `trace_log` 系统表: ``` sql SELECT * FROM system.trace_log LIMIT 1 \G @@ -73,9 +76,9 @@ query_id: 421b6855-1858-45a5-8f37-f383409d6d72 trace: [140658411141617,94784174532828,94784076370703,94784076372094,94784076361020,94784175007680,140658411116251,140658403895439] ``` -The `trace` field contains the stack trace at the moment of sampling. +该 `trace` 字段包含采样时的堆栈跟踪。 -Getting the source code filename and the line number for a single address: +获取单个地址的源代码文件名和行号: ``` sql SELECT addressToLine(94784076370703) \G @@ -84,10 +87,10 @@ SELECT addressToLine(94784076370703) \G ``` text Row 1: ────── -addressToLine(94784076370703): /build/obj-x86_64-linux-gnu/../dbms/Common/ThreadPool.cpp:199 +addressToLine(94784076370703): /build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199 ``` -Applying the function to the whole stack trace: +将函数应用于整个堆栈跟踪: ``` sql SELECT @@ -97,15 +100,15 @@ LIMIT 1 \G ``` -The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output. +该 [arrayMap](higher_order_functions.md#higher_order_functions-array-map) 功能允许处理的每个单独的元素 `trace` 阵列由 `addressToLine` 功能。 这种处理的结果,你在看 `trace_source_code_lines` 列的输出。 ``` text Row 1: ────── trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so /usr/lib/debug/usr/bin/clickhouse -/build/obj-x86_64-linux-gnu/../dbms/Common/ThreadPool.cpp:199 -/build/obj-x86_64-linux-gnu/../dbms/Common/ThreadPool.h:155 +/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199 +/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.h:155 /usr/include/c++/9/bits/atomic_base.h:551 /usr/lib/debug/usr/bin/clickhouse /lib/x86_64-linux-gnu/libpthread-2.27.so @@ -114,34 +117,34 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so ## addressToSymbol {#addresstosymbol} -Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files. +将ClickHouse服务器进程内的虚拟内存地址转换为ClickHouse对象文件中的符号。 -**Syntax** +**语法** ``` sql addressToSymbol(address_of_binary_instruction) ``` -**Parameters** +**参数** -- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../../sql_reference/data_types/int_uint.md)) — Address of instruction in a running process. -**Returned value** +**返回值** -- Symbol from ClickHouse object files. -- Empty string, if the address is not valid. +- 来自ClickHouse对象文件的符号。 +- 空字符串,如果地址无效。 -Type: [String](../../data_types/string.md). +类型: [字符串](../../sql_reference/data_types/string.md). -**Example** +**示例** -Enabling introspection functions: +启用内省功能: ``` sql SET allow_introspection_functions=1 ``` -Selecting the first string from the `trace_log` system table: +从中选择第一个字符串 `trace_log` 系统表: ``` sql SELECT * FROM system.trace_log LIMIT 1 \G @@ -159,9 +162,9 @@ query_id: 724028bf-f550-45aa-910d-2af6212b94ac trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583] ``` -The `trace` field contains the stack trace at the moment of sampling. +该 `trace` 字段包含采样时的堆栈跟踪。 -Getting a symbol for a single address: +获取单个地址的符号: ``` sql SELECT addressToSymbol(94138803686098) \G @@ -173,7 +176,7 @@ Row 1: addressToSymbol(94138803686098): _ZNK2DB24IAggregateFunctionHelperINS_20AggregateFunctionSumImmNS_24AggregateFunctionSumDataImEEEEE19addBatchSinglePlaceEmPcPPKNS_7IColumnEPNS_5ArenaE ``` -Applying the function to the whole stack trace: +将函数应用于整个堆栈跟踪: ``` sql SELECT @@ -183,7 +186,7 @@ LIMIT 1 \G ``` -The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output. +该 [arrayMap](higher_order_functions.md#higher_order_functions-array-map) 功能允许处理的每个单独的元素 `trace` 阵列由 `addressToSymbols` 功能。 这种处理的结果,你在看 `trace_symbols` 列的输出。 ``` text Row 1: @@ -211,34 +214,34 @@ clone ## demangle {#demangle} -Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) function to the C++ function name. +转换一个符号,您可以使用 [addressToSymbol](#addresstosymbol) 函数到C++函数名。 -**Syntax** +**语法** ``` sql demangle(symbol) ``` -**Parameters** +**参数** -- `symbol` ([String](../../data_types/string.md)) — Symbol from an object file. +- `symbol` ([字符串](../../sql_reference/data_types/string.md)) — Symbol from an object file. -**Returned value** +**返回值** -- Name of the C++ function. -- Empty string if a symbol is not valid. +- C++函数的名称。 +- 如果符号无效,则为空字符串。 -Type: [String](../../data_types/string.md). +类型: [字符串](../../sql_reference/data_types/string.md). -**Example** +**示例** -Enabling introspection functions: +启用内省功能: ``` sql SET allow_introspection_functions=1 ``` -Selecting the first string from the `trace_log` system table: +从中选择第一个字符串 `trace_log` 系统表: ``` sql SELECT * FROM system.trace_log LIMIT 1 \G @@ -256,9 +259,9 @@ query_id: 724028bf-f550-45aa-910d-2af6212b94ac trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583] ``` -The `trace` field contains the stack trace at the moment of sampling. +该 `trace` 字段包含采样时的堆栈跟踪。 -Getting a function name for a single address: +获取单个地址的函数名称: ``` sql SELECT demangle(addressToSymbol(94138803686098)) \G @@ -270,7 +273,7 @@ Row 1: demangle(addressToSymbol(94138803686098)): DB::IAggregateFunctionHelper > >::addBatchSinglePlace(unsigned long, char*, DB::IColumn const**, DB::Arena*) const ``` -Applying the function to the whole stack trace: +将函数应用于整个堆栈跟踪: ``` sql SELECT @@ -280,7 +283,7 @@ LIMIT 1 \G ``` -The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output. +该 [arrayMap](higher_order_functions.md#higher_order_functions-array-map) 功能允许处理的每个单独的元素 `trace` 阵列由 `demangle` 功能。 这种处理的结果,你在看 `trace_functions` 列的输出。 ``` text Row 1: diff --git a/docs/zh/query_language/functions/ip_address_functions.md b/docs/zh/sql_reference/functions/ip_address_functions.md similarity index 92% rename from docs/zh/query_language/functions/ip_address_functions.md rename to docs/zh/sql_reference/functions/ip_address_functions.md index 0e012f90f84..17f4c4a5991 100644 --- a/docs/zh/query_language/functions/ip_address_functions.md +++ b/docs/zh/sql_reference/functions/ip_address_functions.md @@ -1,3 +1,4 @@ + # IP函数 {#iphan-shu} ## IPv4NumToString(num) {#ipv4numtostringnum} @@ -37,7 +38,7 @@ LIMIT 10 │ 83.149.48.xxx │ 17406 │ └────────────────┴───────┘ -由于使用’xxx’是不规范的,因此将来可能会更改。我们建议您不要依赖此格式。 +由于使用'xxx'是不规范的,因此将来可能会更改。我们建议您不要依赖此格式。 ### IPv6NumToString(x) {#ipv6numtostringx} @@ -117,7 +118,7 @@ SELECT IPv6NumToString(IPv4ToIPv6(IPv4StringToNum('192.168.0.1'))) AS addr │ ::ffff:192.168.0.1 │ └────────────────────┘ -## cutIPv6(x, bitsToCutForIPv6, bitsToCutForIPv4) {#cutipv6x-bitstocutforipv6-bitstocutforipv4} +## cutIPv6(x,bitsToCutForIPv6,bitsToCutForIPv4) {#cutipv6x-bitstocutforipv6-bitstocutforipv4} 接受一个FixedString(16)类型的IPv6地址,返回一个String,这个String中包含了删除指定位之后的地址的文本格式。例如: @@ -134,7 +135,7 @@ SELECT │ 2001:db8:ac10:fe01:feed:babe:cafe:0 │ ::ffff:192.168.0.0 │ └─────────────────────────────────────┴─────────────────────┘ -## IPv4CIDRToRange(ipv4, cidr), {#ipv4cidrtorangeipv4-cidr} +## ツ古カツ益ツ催ツ団ツ法ツ人), {#ipv4cidrtorangeipv4-cidr} 接受一个IPv4地址以及一个UInt8类型的CIDR。返回包含子网最低范围以及最高范围的元组。 @@ -146,7 +147,7 @@ SELECT IPv4CIDRToRange(toIPv4('192.168.5.2'), 16) │ ('192.168.0.0','192.168.255.255') │ └────────────────────────────────────────────┘ -## IPv6CIDRToRange(ipv6, cidr), {#ipv6cidrtorangeipv6-cidr} +## ツ暗ェツ氾环催ツ団ツ法ツ人), {#ipv6cidrtorangeipv6-cidr} 接受一个IPv6地址以及一个UInt8类型的CIDR。返回包含子网最低范围以及最高范围的元组。 @@ -158,9 +159,9 @@ SELECT IPv6CIDRToRange(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32); │ ('2001:db8::','2001:db8:ffff:ffff:ffff:ffff:ffff:ffff') │ └────────────────────────────────────────────────────────────────────────┘ -## toIPv4(string) {#toipv4string} +## toIPv4(字符串) {#toipv4string} -`IPv4StringToNum()`的别名,它采用字符串形式的IPv4地址并返回[IPv4](../../data_types/domains/ipv4.md)类型的值,该二进制值等于`IPv4StringToNum()`返回的值。 +`IPv4StringToNum()`的别名,它采用字符串形式的IPv4地址并返回[IPv4](../../sql_reference/functions/ip_address_functions.md)类型的值,该二进制值等于`IPv4StringToNum()`返回的值。 ``` sql WITH @@ -186,9 +187,9 @@ SELECT │ ABE1822D │ ABE1822D │ └───────────────────────────────────┴──────────────────────────┘ -## toIPv6(string) {#toipv6string} +## toIPv6(字符串) {#toipv6string} -`IPv6StringToNum()`的别名,它采用字符串形式的IPv6地址并返回[IPv6](../../data_types/domains/ipv6.md)类型的值,该二进制值等于`IPv6StringToNum()`返回的值。 +`IPv6StringToNum()`的别名,它采用字符串形式的IPv6地址并返回[IPv6](../../sql_reference/functions/ip_address_functions.md)类型的值,该二进制值等于`IPv6StringToNum()`返回的值。 ``` sql WITH diff --git a/docs/zh/query_language/functions/json_functions.md b/docs/zh/sql_reference/functions/json_functions.md similarity index 92% rename from docs/zh/query_language/functions/json_functions.md rename to docs/zh/sql_reference/functions/json_functions.md index 5203ae91291..ca76edde09c 100644 --- a/docs/zh/query_language/functions/json_functions.md +++ b/docs/zh/sql_reference/functions/json_functions.md @@ -1,3 +1,4 @@ + # JSON函数 {#jsonhan-shu} 在Yandex.Metrica中,用户使用JSON作为访问参数。为了处理这些JSON,实现了一些函数。(尽管在大多数情况下,JSON是预先进行额外处理的,并将结果值放在单独的列中。)所有的这些函数都进行了尽可能的假设。以使函数能够尽快的完成工作。 @@ -9,27 +10,27 @@ 3. 函数可以随意的在多层嵌套结构下查找字段。如果存在多个匹配字段,则返回第一个匹配字段。 4. JSON除字符串文本外不存在空格字符。 -## visitParamHas(params, name) {#visitparamhasparams-name} +## ツ环板(ョツ嘉ッツ偲青visャツ静ャツ青サツ催ャツ渉) {#visitparamhasparams-name} 检查是否存在«name»名称的字段 -## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name} +## 访问paramextractuint(参数,名称) {#visitparamextractuintparams-name} 将名为«name»的字段的值解析成UInt64。如果这是一个字符串字段,函数将尝试从字符串的开头解析一个数字。如果该字段不存在,或无法从它中解析到数字,则返回0。 -## visitParamExtractInt(params, name) {#visitparamextractintparams-name} +## visitParamExtractInt(参数,名称) {#visitparamextractintparams-name} 与visitParamExtractUInt相同,但返回Int64。 -## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name} +## 访问paramextractfloat(参数,名称) {#visitparamextractfloatparams-name} 与visitParamExtractUInt相同,但返回Float64。 -## visitParamExtractBool(params, name) {#visitparamextractboolparams-name} +## ツ环板(ョツ嘉ッツ偲青妥-ツ姪(不ツ督ョツ産) {#visitparamextractboolparams-name} 解析true/false值。其结果是UInt8类型的。 -## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name} +## 掳胫((禄脢鹿脷露胫鲁隆鹿((酶-11-16""\[脪陆(,,,) {#visitparamextractrawparams-name} 返回字段的值,包含空格符。 @@ -38,7 +39,7 @@ visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"' visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}' -## visitParamExtractString(params, name) {#visitparamextractstringparams-name} +## visitParamExtractString(参数,名称) {#visitparamextractstringparams-name} 使用双引号解析字符串。这个值没有进行转义。如果转义失败,它将返回一个空白字符串。 diff --git a/docs/zh/query_language/functions/logical_functions.md b/docs/zh/sql_reference/functions/logical_functions.md similarity index 70% rename from docs/zh/query_language/functions/logical_functions.md rename to docs/zh/sql_reference/functions/logical_functions.md index 2f2a61f57a6..18a383edbdb 100644 --- a/docs/zh/query_language/functions/logical_functions.md +++ b/docs/zh/sql_reference/functions/logical_functions.md @@ -1,15 +1,16 @@ + # 逻辑函数 {#luo-ji-han-shu} 逻辑函数可以接受任何数字类型的参数,并返回UInt8类型的0或1。 当向函数传递零时,函数将判定为«false»,否则,任何其他非零的值都将被判定为«true»。 -## and, AND operator {#and-and-operator} +## 和,和运营商 {#and-and-operator} -## or, OR operator {#or-or-operator} +## 或,或运营商 {#or-or-operator} -## not, NOT operator {#not-not-operator} +## 不是,不是运营商 {#not-not-operator} -## xor {#xor} +## 异或 {#xor} [来源文章](https://clickhouse.tech/docs/en/query_language/functions/logical_functions/) diff --git a/docs/zh/sql_reference/functions/machine_learning_functions.md b/docs/zh/sql_reference/functions/machine_learning_functions.md new file mode 100644 index 00000000000..0bdea52c59f --- /dev/null +++ b/docs/zh/sql_reference/functions/machine_learning_functions.md @@ -0,0 +1,16 @@ + +# 机器学习函数 {#ji-qi-xue-xi-han-shu} + +## evalMLMethod(预测) {#machine_learning_methods-evalmlmethod} + +使用拟合回归模型的预测请使用`evalMLMethod`函数。 请参阅`linearRegression`中的链接。 + +## 随机线性回归 {#stochastic-linear-regression} + +`stochasticLinearRegression`聚合函数使用线性模型和MSE损失函数实现随机梯度下降法。 使用`evalMLMethod`来预测新数据。 +请参阅示例和注释[此处](../../sql_reference/functions/machine_learning_functions.md#agg_functions-stochasticlinearregression)。 + +## 随机逻辑回归 {#stochastic-logistic-regression} + +`stochasticLogisticRegression`聚合函数实现了二元分类问题的随机梯度下降法。 使用`evalMLMethod`来预测新数据。 +请参阅示例和注释[此处](../../sql_reference/functions/machine_learning_functions.md#agg_functions-stochasticlogisticregression)。 diff --git a/docs/zh/query_language/functions/math_functions.md b/docs/zh/sql_reference/functions/math_functions.md similarity index 88% rename from docs/zh/query_language/functions/math_functions.md rename to docs/zh/sql_reference/functions/math_functions.md index 38b3115e396..fef88389b86 100644 --- a/docs/zh/query_language/functions/math_functions.md +++ b/docs/zh/sql_reference/functions/math_functions.md @@ -1,3 +1,4 @@ + # 数学函数 {#shu-xue-han-shu} 以下所有的函数都返回一个Float64类型的数值。返回结果总是以尽可能最大精度返回,但还是可能与机器中可表示最接近该值的数字不同。 @@ -14,7 +15,7 @@ 接受一个数值类型的参数并返回它的指数。 -## log(x), ln(x) {#logx-lnx} +## log(x),ln(x) {#logx-lnx} 接受一个数值类型的参数并返回它的自然对数。 @@ -44,7 +45,7 @@ ## erf(x) {#erfx} -如果’x’是非负数,那么erf(x / σ√2)是具有正态分布且标准偏差为«σ»的随机变量的值与预期值之间的距离大于«x»。 +如果'x'是非负数,那么erf(x / σ√2)是具有正态分布且标准偏差为«σ»的随机变量的值与预期值之间的距离大于«x»。 示例 (三西格玛准则): @@ -76,7 +77,7 @@ SELECT erf(3 / sqrt(2)) 返回x的三角余弦值。 -## tan(x) {#tanx} +## 谭(x) {#tanx} 返回x的三角正切值。 @@ -88,11 +89,11 @@ SELECT erf(3 / sqrt(2)) 返回x的反三角余弦值。 -## atan(x) {#atanx} +## 阿坦(x) {#atanx} 返回x的反三角正切值。 -## pow(x, y), power(x, y) {#powx-y-powerx-y} +## pow(x,y),power(x,y) {#powx-y-powerx-y} 接受x和y两个参数。返回x的y次方。 diff --git a/docs/zh/query_language/functions/other_functions.md b/docs/zh/sql_reference/functions/other_functions.md similarity index 85% rename from docs/zh/query_language/functions/other_functions.md rename to docs/zh/sql_reference/functions/other_functions.md index 8383c57150c..e0c7e47be58 100644 --- a/docs/zh/query_language/functions/other_functions.md +++ b/docs/zh/sql_reference/functions/other_functions.md @@ -1,6 +1,7 @@ + # 其他函数 {#qi-ta-han-shu} -## hostName() {#hostname} +## 主机名() {#hostname} 返回一个字符串,其中包含执行此函数的主机的名称。 对于分布式处理,如果在远程服务器上执行此函数,则将返回远程服务器主机的名称。 @@ -12,7 +13,7 @@ **参数** -- `expr` — 任何一个返回[String](../../data_types/string.md)结果的表达式。[String](../../data_types/string.md) +- `expr` — 任何一个返回[字符串](../../sql_reference/functions/other_functions.md)结果的表达式。[字符串](../../sql_reference/functions/other_functions.md) **返回值** @@ -60,10 +61,10 @@ SELECT 'some-file-name' AS a, basename(a) 以文本格式(以制表符分隔)向控制台输出值时,计算近似宽度。 系统使用此函数实现Pretty格式。 -Calculates the approximate width when outputting values to the console in text format (tab-separated). -This function is used by the system for implementing Pretty formats. +以文本格式(制表符分隔)将值输出到控制台时,计算近似宽度。 +这个函数被系统用于实现漂亮的格式。 -`NULL` is represented as a string corresponding to `NULL` in `Pretty` formats. +`NULL` 表示为对应于 `NULL` 在 `Pretty` 格式。 SELECT visibleWidth(NULL) @@ -77,12 +78,12 @@ This function is used by the system for implementing Pretty formats. 如果将`NULL`作为参数传递给函数,那么它返回`Nullable(Nothing)`类型,它对应于ClickHouse中的内部`NULL`。 -## blockSize() {#function-blocksize} +## 块大小() {#function-blocksize} 获取Block的大小。 在ClickHouse中,查询始终工作在Block(包含列的部分的集合)上。此函数允许您获取调用其的块的大小。 -## materialize(x) {#materializex} +## 实现(x) {#materializex} 将一个常量列变为一个非常量列。 在ClickHouse中,非常量列和常量列在内存中的表示方式不同。尽管函数对于常量列和非常量总是返回相同的结果,但它们的工作方式可能完全不同(执行不同的代码)。此函数用于调试这种行为。 @@ -92,15 +93,15 @@ This function is used by the system for implementing Pretty formats. 接受任何参数,包括`NULL`。始终返回0。 但是,函数的参数总是被计算的。该函数可以用于基准测试。 -## sleep(seconds) {#sleepseconds} +## 睡眠(秒) {#sleepseconds} -在每个Block上休眠’seconds’秒。可以是整数或浮点数。 +在每个Block上休眠'seconds'秒。可以是整数或浮点数。 -## sleepEachRow(seconds) {#sleepeachrowseconds} +## sleepEachRow(秒) {#sleepeachrowseconds} -在每行上休眠’seconds’秒。可以是整数或浮点数。 +在每行上休眠'seconds'秒。可以是整数或浮点数。 -## currentDatabase() {#currentdatabase} +## 当前数据库() {#currentdatabase} 返回当前数据库的名称。 当您需要在CREATE TABLE中的表引擎参数中指定数据库,您可以使用此函数。 @@ -119,11 +120,11 @@ This function is used by the system for implementing Pretty formats. ## hasColumnInTable(\[‘hostname’\[, ‘username’\[, ‘password’\]\],\] ‘database’, ‘table’, ‘column’) {#hascolumnintablehostname-username-password-database-table-column} -Accepts constant strings: database name, table name, and column name. Returns a UInt8 constant expression equal to 1 if there is a column, otherwise 0. If the hostname parameter is set, the test will run on a remote server. -The function throws an exception if the table does not exist. -For elements in a nested data structure, the function checks for the existence of a column. For the nested data structure itself, the function returns 0. +接受常量字符串:数据库名称、表名称和列名称。 如果存在列,则返回等于1的UInt8常量表达式,否则返回0。 如果设置了hostname参数,则测试将在远程服务器上运行。 +如果表不存在,该函数将引发异常。 +对于嵌套数据结构中的元素,该函数检查是否存在列。 对于嵌套数据结构本身,函数返回0。 -## bar {#function-bar} +## 酒吧 {#function-bar} 使用unicode构建图表。 @@ -176,7 +177,7 @@ ORDER BY h ASC │ 23 │ 400397 │ █████████████▎ │ └────┴────────┴────────────────────┘ -## transform {#transform} +## 变换 {#transform} 根据定义,将某些元素转换为其他元素。 此函数有两种使用方式: @@ -187,9 +188,9 @@ ORDER BY h ASC `array_from` – 用于转换的常量数组。 -`array_to` – 将‘from’中的值转换为的常量数组。 +`array_to` – 将'from'中的值转换为的常量数组。 -`default` – 如果‘x’不等于‘from’中的任何值,则默认转换的值。 +`default` – 如果'x'不等于'from'中的任何值,则默认转换的值。 `array_from` 和 `array_to` – 拥有相同大小的数组。 @@ -201,7 +202,7 @@ ORDER BY h ASC 对于相同的字母(T或U),如果数值类型,那么它们不可不完全匹配的,只需要具备共同的类型即可。 例如,第一个参数是Int64类型,第二个参数是Array(UInt16)类型。 -如果’x’值等于’array\_from’数组中的一个元素,它将从’array\_to’数组返回一个对应的元素(下标相同)。否则,它返回’default’。如果’array\_from’匹配到了多个元素,则返回第一个匹配的元素。 +如果'x'值等于'array\_from'数组中的一个元素,它将从'array\_to'数组返回一个对应的元素(下标相同)。否则,它返回'default'。如果'array\_from'匹配到了多个元素,则返回第一个匹配的元素。 示例: @@ -223,8 +224,8 @@ ORDER BY c DESC 1. `transform(x, array_from, array_to)` -与第一种不同在于省略了’default’参数。 -如果’x’值等于’array\_from’数组中的一个元素,它将从’array\_to’数组返回相应的元素(下标相同)。 否则,它返回’x’。 +与第一种不同在于省略了'default'参数。 +如果'x'值等于'array\_from'数组中的一个元素,它将从'array\_to'数组返回相应的元素(下标相同)。 否则,它返回'x'。 类型约束: @@ -273,23 +274,23 @@ SELECT │ 192851925 │ 183.92 MiB │ └────────────────┴────────────┘ -## least(a, b) {#leasta-b} +## 至少(a,b) {#leasta-b} 返回a和b中的最小值。 -## greatest(a, b) {#greatesta-b} +## 最伟大(a,b) {#greatesta-b} 返回a和b的最大值。 -## uptime() {#uptime} +## 碌莽禄time拢time() {#uptime} 返回服务正常运行的秒数。 -## version() {#version} +## 版本() {#version} 以字符串形式返回服务器的版本。 -## timezone() {#timezone} +## 时区() {#timezone} 返回服务器的时区。 @@ -305,7 +306,7 @@ SELECT 返回行所在结果集中的序列号。此函数仅考虑受影响的Block。 -## runningDifference(x) {#other_functions-runningdifference} +## 运行差异(x) {#other_functions-runningdifference} 计算数据块中相邻行的值之间的差异。 对于第一行返回0,并为每个后续行返回与前一行的差异。 @@ -340,9 +341,9 @@ FROM │ 1110 │ 2016-11-24 00:00:10 │ 1 │ └─────────┴─────────────────────┴───────┘ -## runningDifferenceStartingWithFirstValue {#runningdifferencestartingwithfirstvalue} +## 运行差异启动与第一值 {#runningdifferencestartingwithfirstvalue} -与[runningDifference](./other_functions.md#other_functions-runningdifference)相同,区别在于第一行返回第一行的值,后续每个后续行返回与上一行的差值。 +与[运行差异](./other_functions.md#other_functions-runningdifference)相同,区别在于第一行返回第一行的值,后续每个后续行返回与上一行的差值。 ## MACNumToString(num) {#macnumtostringnum} @@ -358,7 +359,7 @@ FROM ## getSizeOfEnumType {#getsizeofenumtype} -返回[Enum](../../data_types/enum.md)中的枚举数量。 +返回[枚举](../../sql_reference/functions/other_functions.md)中的枚举数量。 getSizeOfEnumType(value) @@ -453,7 +454,7 @@ FROM - 数值类型返回`0`。 - 字符串类型返回空的字符串。 -- [Nullable](../../data_types/nullable.md)类型返回`ᴺᵁᴸᴸ`。 +- [可为空](../../sql_reference/functions/other_functions.md)类型返回`ᴺᵁᴸᴸ`。 **示例** @@ -477,7 +478,7 @@ FROM 1 rows in set. Elapsed: 0.002 sec. -## replicate {#replicate} +## 复制 {#replicate} 使用单个值填充一个数组。 @@ -502,26 +503,26 @@ FROM │ [1,1,1] │ └───────────────────────────────┘ -## filesystemAvailable {#filesystemavailable} +## 文件系统可用 {#filesystemavailable} 返回磁盘的剩余空间信息(以字节为单位)。使用配置文件中的path配置评估此信息。 -## filesystemCapacity {#filesystemcapacity} +## 文件系统容量 {#filesystemcapacity} 返回磁盘的容量信息,以字节为单位。使用配置文件中的path配置评估此信息。 -## finalizeAggregation {#function-finalizeaggregation} +## 最后聚会 {#function-finalizeaggregation} 获取聚合函数的状态。返回聚合结果(最终状态)。 -## runningAccumulate {#function-runningaccumulate} +## 跑累积 {#function-runningaccumulate} 获取聚合函数的状态并返回其具体的值。这是从第一行到当前行的所有行累计的结果。 例如,获取聚合函数的状态(示例runningAccumulate(uniqState(UserID))),对于数据块的每一行,返回所有先前行和当前行的状态合并后的聚合函数的结果。 因此,函数的结果取决于分区中数据块的顺序以及数据块中行的顺序。 -## joinGet(‘join\_storage\_table\_name’, ‘get\_column’, join\_key) {#joingetjoin-storage-table-name-get-column-join-key} +## joinGet(‘join\_storage\_table\_name’, ‘get\_column’,join\_key) {#joingetjoin-storage-table-name-get-column-join-key} 使用指定的连接键从Join类型引擎的表中获取数据。 diff --git a/docs/zh/query_language/functions/random_functions.md b/docs/zh/sql_reference/functions/random_functions.md similarity index 98% rename from docs/zh/query_language/functions/random_functions.md rename to docs/zh/sql_reference/functions/random_functions.md index 31283cce08a..1db2f4a8438 100644 --- a/docs/zh/query_language/functions/random_functions.md +++ b/docs/zh/sql_reference/functions/random_functions.md @@ -1,3 +1,4 @@ + # 随机函数 {#sui-ji-han-shu} 随机函数使用非加密方式生成伪随机数字。 @@ -6,7 +7,7 @@ 您可以向它传递任何类型的参数,但传递的参数将不会使用在任何随机数生成过程中。 此参数的唯一目的是防止公共子表达式消除,以便在相同的查询中使用相同的随机函数生成不同的随机数。 -## rand {#rand} +## 兰德 {#rand} 返回一个UInt32类型的随机数字,所有UInt32类型的数字被生成的概率均相等。此函数线性同于的方式生成随机数。 diff --git a/docs/zh/query_language/functions/rounding_functions.md b/docs/zh/sql_reference/functions/rounding_functions.md similarity index 79% rename from docs/zh/query_language/functions/rounding_functions.md rename to docs/zh/sql_reference/functions/rounding_functions.md index fb421be3b28..773f969090d 100644 --- a/docs/zh/query_language/functions/rounding_functions.md +++ b/docs/zh/sql_reference/functions/rounding_functions.md @@ -1,22 +1,23 @@ + # 取整函数 {#qu-zheng-han-shu} -## floor(x\[, N\]) {#floorx-n} +## 楼(x\[,N\]) {#floorx-n} 返回小于或等于x的最大舍入数。该函数使用参数乘1/10N,如果1/10N不精确,则选择最接近的精确的适当数据类型的数。 -‘N’是一个整数常量,可选参数。默认为0,这意味着不对其进行舍入。 -‘N’可以是负数。 +'N'是一个整数常量,可选参数。默认为0,这意味着不对其进行舍入。 +'N'可以是负数。 示例: `floor(123.45, 1) = 123.4, floor(123.45, -1) = 120.` `x`是任何数字类型。结果与其为相同类型。 -对于整数参数,使用负‘N’值进行舍入是有意义的(对于非负«N»,该函数不执行任何操作)。 +对于整数参数,使用负'N'值进行舍入是有意义的(对于非负«N»,该函数不执行任何操作)。 如果取整导致溢出(例如,floor(-128,-1)),则返回特定于实现的结果。 -## ceil(x\[, N\]), ceiling(x\[, N\]) {#ceilx-n-ceilingx-n} +## ceil(x\[,N\]),天花板(x\[,N\]) {#ceilx-n-ceilingx-n} -返回大于或等于’x’的最小舍入数。在其他方面,它与’floor’功能相同(见上文)。 +返回大于或等于'x'的最小舍入数。在其他方面,它与'floor'功能相同(见上文)。 -## round(x\[, N\]) {#rounding_functions-round} +## 圆形(x\[,N\]) {#rounding_functions-round} 将值取整到指定的小数位数。 @@ -26,7 +27,7 @@ **参数:** -- `expression` — 要进行取整的数字。可以是任何返回数字[类型](../../data_types/index.md#data_types)的[表达式](../syntax.md#syntax-expressions)。 +- `expression` — 要进行取整的数字。可以是任何返回数字[类型](../../sql_reference/functions/rounding_functions.md#data_types)的[表达式](../syntax.md#syntax-expressions)。 - `decimal-places` — 整数类型。 - 如果`decimal-places > 0`,则该函数将值舍入小数点右侧。 - 如果`decimal-places < 0`,则该函数将小数点左侧的值四舍五入。 @@ -71,15 +72,15 @@ SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3 接受一个数字。如果数字小于1,则返回0。否则,它将数字向下舍入到最接近的(整个非负)2的x次幂。 -## roundDuration(num) {#rounddurationnum} +## 圆形饱和度(num) {#rounddurationnum} 接受一个数字。如果数字小于1,则返回0。否则,它将数字向下舍入为集合中的数字:1,10,30,60,120,180,240,300,600,1200,1800,3600,7200,18000,36000。此函数用于Yandex.Metrica报表中计算会话的持续时长。 -## roundAge(num) {#roundagenum} +## 圆数(num) {#roundagenum} 接受一个数字。如果数字小于18,则返回0。否则,它将数字向下舍入为集合中的数字:18,25,35,45,55。此函数用于Yandex.Metrica报表中用户年龄的计算。 -## roundDown(num, arr) {#rounddownnum-arr} +## roundDown(num,arr) {#rounddownnum-arr} 接受一个数字,将其向下舍入到指定数组中的元素。如果该值小于数组中的最低边界,则返回最低边界。 diff --git a/docs/zh/query_language/functions/splitting_merging_functions.md b/docs/zh/sql_reference/functions/splitting_merging_functions.md similarity index 63% rename from docs/zh/query_language/functions/splitting_merging_functions.md rename to docs/zh/sql_reference/functions/splitting_merging_functions.md index 7477e89441e..d217ea19f0d 100644 --- a/docs/zh/query_language/functions/splitting_merging_functions.md +++ b/docs/zh/sql_reference/functions/splitting_merging_functions.md @@ -1,17 +1,18 @@ + # 字符串拆分合并函数 {#zi-fu-chuan-chai-fen-he-bing-han-shu} -## splitByChar(separator, s) {#splitbycharseparator-s} +## splitByChar(分隔符,s) {#splitbycharseparator-s} -将字符串以‘separator’拆分成多个子串。‘separator’必须为仅包含一个字符的字符串常量。 +将字符串以'separator'拆分成多个子串。'separator'必须为仅包含一个字符的字符串常量。 返回拆分后的子串的数组。 如果分隔符出现在字符串的开头或结尾,或者如果有多个连续的分隔符,则将在对应位置填充空的子串。 -## splitByString(separator, s) {#splitbystringseparator-s} +## splitByString(分隔符,s) {#splitbystringseparator-s} 与上面相同,但它使用多个字符的字符串作为分隔符。 该字符串必须为非空。 -## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator} +## arrayStringConcat(arr\[,分隔符\]) {#arraystringconcatarr-separator} -使用separator将数组中列出的字符串拼接起来。‘separator’是一个可选参数:一个常量字符串,默认情况下设置为空字符串。 +使用separator将数组中列出的字符串拼接起来。'separator'是一个可选参数:一个常量字符串,默认情况下设置为空字符串。 返回拼接后的字符串。 ## alphaTokens(s) {#alphatokenss} diff --git a/docs/zh/query_language/functions/string_functions.md b/docs/zh/sql_reference/functions/string_functions.md similarity index 78% rename from docs/zh/query_language/functions/string_functions.md rename to docs/zh/sql_reference/functions/string_functions.md index a2b5355ae8c..c04305b9d67 100644 --- a/docs/zh/query_language/functions/string_functions.md +++ b/docs/zh/sql_reference/functions/string_functions.md @@ -1,6 +1,7 @@ + # 字符串函数 {#zi-fu-chuan-han-shu} -## empty {#string-functions-empty} +## 空 {#string-functions-empty} 对于空字符串返回1,对于非空字符串返回0。 结果类型是UInt8。 @@ -13,32 +14,32 @@ 结果类型是UInt8。 该函数也适用于数组。 -## length {#length} +## 长度 {#length} 返回字符串的字节长度。 结果类型是UInt64。 该函数也适用于数组。 -## lengthUTF8 {#lengthutf8} +## 长度8 {#lengthutf8} 假定字符串以UTF-8编码组成的文本,返回此字符串的Unicode字符长度。如果传入的字符串不是UTF-8编码,则函数可能返回一个预期外的值(不会抛出异常)。 结果类型是UInt64。 -## char\_length, CHAR\_LENGTH {#char-length-char-length} +## char\_length,CHAR\_LENGTH {#char-length-char-length} 假定字符串以UTF-8编码组成的文本,返回此字符串的Unicode字符长度。如果传入的字符串不是UTF-8编码,则函数可能返回一个预期外的值(不会抛出异常)。 结果类型是UInt64。 -## character\_length, CHARACTER\_LENGTH {#character-length-character-length} +## 字符长度,字符长度 {#character-length-character-length} 假定字符串以UTF-8编码组成的文本,返回此字符串的Unicode字符长度。如果传入的字符串不是UTF-8编码,则函数可能返回一个预期外的值(不会抛出异常)。 结果类型是UInt64。 -## lower, lcase {#lower-lcase} +## 低一点 {#lower-lcase} 将字符串中的ASCII转换为小写。 -## upper, ucase {#upper-ucase} +## 上,ucase {#upper-ucase} 将字符串中的ASCII转换为大写。 @@ -68,7 +69,7 @@ 参数: -- input\_string — 任何一个[String](../../data_types/string.md)类型的对象。 +- input\_string — 任何一个[字符串](../../sql_reference/functions/string_functions.md)类型的对象。 返回值: 有效的UTF-8字符串。 @@ -84,7 +85,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b') └───────────────────────┘ ``` -## reverse {#reverse} +## 反向 {#reverse} 反转字符串。 @@ -118,25 +119,25 @@ SELECT format('{} {}', 'Hello', 'World') 与[concat](./string_functions.md#concat-s1-s2)相同,区别在于,你需要保证concat(s1, s2, s3) -\> s4是单射的,它将用于GROUP BY的优化。 -## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substrings-offset-length-mids-offset-length-substrs-offset-length} +## 子串(s,offset,length),mid(s,offset,length),substr(s,offset,length) {#substrings-offset-length-mids-offset-length-substrs-offset-length} -以字节为单位截取指定位置字符串,返回以‘offset’位置为开头,长度为‘length’的子串。‘offset’从1开始(与标准SQL相同)。‘offset’和‘length’参数必须是常量。 +以字节为单位截取指定位置字符串,返回以'offset'位置为开头,长度为'length'的子串。'offset'从1开始(与标准SQL相同)。'offset'和'length'参数必须是常量。 -## substringUTF8(s, offset, length) {#substringutf8s-offset-length} +## substringf8(s,offset,length) {#substringutf8s-offset-length} -与‘substring’相同,但其操作单位为Unicode字符,函数假设字符串是以UTF-8进行编码的文本。如果不是则可能返回一个预期外的结果(不会抛出异常)。 +与'substring'相同,但其操作单位为Unicode字符,函数假设字符串是以UTF-8进行编码的文本。如果不是则可能返回一个预期外的结果(不会抛出异常)。 -## appendTrailingCharIfAbsent(s, c) {#appendtrailingcharifabsents-c} +## appendTrailingCharIfAbsent(s,c) {#appendtrailingcharifabsents-c} -如果‘s’字符串非空并且末尾不包含‘c’字符,则将‘c’字符附加到末尾。 +如果's'字符串非空并且末尾不包含'c'字符,则将'c'字符附加到末尾。 -## convertCharset(s, from, to) {#convertcharsets-from-to} +## convertCharset(s,from,to) {#convertcharsets-from-to} -返回从‘from’中的编码转换为‘to’中的编码的字符串‘s’。 +返回从'from'中的编码转换为'to'中的编码的字符串's'。 ## base64Encode(s) {#base64encodes} -将字符串‘s’编码成base64 +将字符串's'编码成base64 ## base64Decode(s) {#base64decodes} @@ -146,11 +147,11 @@ SELECT format('{} {}', 'Hello', 'World') 使用base64将字符串解码成原始字符串。但如果出现错误,将返回空字符串。 -## endsWith(s, suffix) {#endswiths-suffix} +## endsWith(s,后缀) {#endswiths-suffix} 返回是否以指定的后缀结尾。如果字符串以指定的后缀结束,则返回1,否则返回0。 -## startsWith(s, prefix) {#startswiths-prefix} +## 开始使用(s,前缀) {#startswiths-prefix} 返回是否以指定的前缀开头。如果字符串以指定的前缀开头,则返回1,否则返回0。 diff --git a/docs/zh/query_language/functions/string_replace_functions.md b/docs/zh/sql_reference/functions/string_replace_functions.md similarity index 75% rename from docs/zh/query_language/functions/string_replace_functions.md rename to docs/zh/sql_reference/functions/string_replace_functions.md index e70dcade3a0..04b110a2cef 100644 --- a/docs/zh/query_language/functions/string_replace_functions.md +++ b/docs/zh/sql_reference/functions/string_replace_functions.md @@ -1,20 +1,21 @@ + # 字符串替换函数 {#zi-fu-chuan-ti-huan-han-shu} -## replaceOne(haystack, pattern, replacement) {#replaceonehaystack-pattern-replacement} +## replaceOne(大海捞针,模式,更换) {#replaceonehaystack-pattern-replacement} -用‘replacement’子串替换‘haystack’中与‘pattern’子串第一个匹配的匹配项(如果存在)。 -‘pattern’和‘replacement’必须是常量。 +用'replacement'子串替换'haystack'中与'pattern'子串第一个匹配的匹配项(如果存在)。 +'pattern'和'replacement'必须是常量。 -## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement) {#replaceallhaystack-pattern-replacement-replacehaystack-pattern-replacement} +## replaceAll(大海捞针,模式,替换),替换(大海捞针,模式,替换) {#replaceallhaystack-pattern-replacement-replacehaystack-pattern-replacement} -用‘replacement’子串替换‘haystack’中出现的所有‘pattern’子串。 +用'replacement'子串替换'haystack'中出现的所有'pattern'子串。 -## replaceRegexpOne(haystack, pattern, replacement) {#replaceregexponehaystack-pattern-replacement} +## replaceRegexpOne(大海捞针,模式,更换) {#replaceregexponehaystack-pattern-replacement} -使用‘pattern’正则表达式替换。 ‘pattern’可以是任意一个有效的re2正则表达式。 +使用'pattern'正则表达式替换。 ‘pattern’可以是任意一个有效的re2正则表达式。 如果存在与正则表达式匹配的匹配项,仅替换第一个匹配项。 同时‘replacement’可以指定为正则表达式中的捕获组。可以包含`\0-\9`。 -在这种情况下,函数将使用正则表达式的整个匹配项替换‘\\0’。使用其他与之对应的子模式替换对应的‘\\1-\\9’。要在模版中使用‘’字符,请使用‘’将其转义。 +在这种情况下,函数将使用正则表达式的整个匹配项替换‘\\0’。使用其他与之对应的子模式替换对应的'\\1-\\9'。要在模版中使用''字符,请使用''将其转义。 另外还请记住,字符串文字需要额外的转义。 示例1.将日期转换为美国格式: @@ -46,7 +47,7 @@ SELECT replaceRegexpOne('Hello, World!', '.*', '\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0') │ Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World! │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -## replaceRegexpAll(haystack, pattern, replacement) {#replaceregexpallhaystack-pattern-replacement} +## replaceRegexpAll(大海捞针,模式,替换) {#replaceregexpallhaystack-pattern-replacement} 与replaceRegexpOne相同,但会替换所有出现的匹配项。例如: @@ -72,7 +73,7 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res ## regexpQuoteMeta(s) {#regexpquotemetas} 该函数用于在字符串中的某些预定义字符之前添加反斜杠。 -预定义字符:‘0’,‘\\’,‘\|’,‘(’,‘)’,‘^’,‘$’,‘。’,‘\[’,’\]’,‘?’,‘\*’,‘+’,‘{’,‘:’,’ - ’。 +预定义字符:‘0’,‘\\’,‘\|’,‘(’,‘)’,‘^’,‘$’,‘。’,‘\[’,'\]',‘?’,‘\*’,‘+’,‘{’,‘:’,' - '。 这个实现与re2 :: RE2 :: QuoteMeta略有不同。它以\\0而不是00转义零字节,它只转义所需的字符。 有关详细信息,请参阅链接:\[RE2\](https://github.com/google/re2/blob/master/re2/re2.cc\#L473) diff --git a/docs/zh/query_language/functions/string_search_functions.md b/docs/zh/sql_reference/functions/string_search_functions.md similarity index 63% rename from docs/zh/query_language/functions/string_search_functions.md rename to docs/zh/sql_reference/functions/string_search_functions.md index 8a27c460966..e0f5e06a357 100644 --- a/docs/zh/query_language/functions/string_search_functions.md +++ b/docs/zh/sql_reference/functions/string_search_functions.md @@ -1,48 +1,49 @@ + # 字符串搜索函数 {#zi-fu-chuan-sou-suo-han-shu} 下列所有函数在默认的情况下区分大小写。对于不区分大小写的搜索,存在单独的变体。 -## position(haystack, needle), locate(haystack, needle) {#positionhaystack-needle-locatehaystack-needle} +## 位置(大海捞针),定位(大海捞针) {#positionhaystack-needle-locatehaystack-needle} 在字符串`haystack`中搜索子串`needle`。 返回子串的位置(以字节为单位),从1开始,如果未找到子串,则返回0。 对于不区分大小写的搜索,请使用函数`positionCaseInsensitive`。 -## positionUTF8(haystack, needle) {#positionutf8haystack-needle} +## positionUTF8(大海捞针) {#positionutf8haystack-needle} 与`position`相同,但位置以Unicode字符返回。此函数工作在UTF-8编码的文本字符集中。如非此编码的字符集,则返回一些非预期结果(他不会抛出异常)。 对于不区分大小写的搜索,请使用函数`positionCaseInsensitiveUTF8`。 -## multiSearchAllPositions(haystack, \[needle1, needle2, …, needlen\]) {#multisearchallpositionshaystack-needle1-needle2-needlen} +## 多搜索分配(干草堆,\[针1,针2, …, needlen\]) {#multisearchallpositionshaystack-needle1-needle2-needlen} -与`position`相同,但函数返回一个数组,其中包含所有匹配needlei的位置。 +与`position`相同,但函数返回一个数组,其中包含所有匹配needle的位置。 对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchAllPositionsCaseInsensitive,multiSearchAllPositionsUTF8,multiSearchAllPositionsCaseInsensitiveUTF8`。 -## multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen} +## multiSearchFirstPosition(大海捞针,\[针1,针2, …, needlen\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen} 与`position`相同,但返回在`haystack`中与needles字符串匹配的最左偏移。 对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchFirstPositionCaseInsensitive,multiSearchFirstPositionUTF8,multiSearchFirstPositionCaseInsensitiveUTF8`。 -## multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen} +## multiSearchFirstIndex(大海捞针,\[针1,针2, …, needlen\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen} -返回在字符串`haystack`中最先查找到的needlei的索引`i`(从1开始),没有找到任何匹配项则返回0。 +返回在字符串`haystack`中最先查找到的needle的索引`i`(从1开始),没有找到任何匹配项则返回0。 对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchFirstIndexCaseInsensitive,multiSearchFirstIndexUTF8,multiSearchFirstIndexCaseInsensitiveUTF8`。 -## multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) {#multisearchanyhaystack-needle1-needle2-needlen} +## 多搜索(大海捞针,\[针1,针2, …, needlen\]) {#multisearchanyhaystack-needle1-needle2-needlen} -如果`haystack`中至少存在一个needlei匹配则返回1,否则返回0。 +如果`haystack`中至少存在一个needle匹配则返回1,否则返回0。 对于不区分大小写的搜索或/和UTF-8格式,使用函数`multiSearchAnyCaseInsensitive,multiSearchAnyUTF8,multiSearchAnyCaseInsensitiveUTF8`。 !!! note "注意" 在所有`multiSearch*`函数中,由于实现规范,needles的数量应小于28。 -## match(haystack, pattern) {#matchhaystack-pattern} +## 匹配(大海捞针,模式) {#matchhaystack-pattern} 检查字符串是否与`pattern`正则表达式匹配。`pattern`可以是一个任意的`re2`正则表达式。 `re2`正则表达式的[语法](https://github.com/google/re2/wiki/Syntax)比Perl正则表达式的语法存在更多限制。 @@ -53,22 +54,22 @@ 正则表达式与字符串一起使用,就像它是一组字节一样。正则表达式中不能包含空字节。 对于在字符串中搜索子字符串的模式,最好使用LIKE或«position»,因为它们更加高效。 -## multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyhaystack-pattern1-pattern2-patternn} +## multiMatchAny(大海捞针,\[模式1,模式2, …, patternn\]) {#multimatchanyhaystack-pattern1-pattern2-patternn} -与`match`相同,但如果所有正则表达式都不匹配,则返回0;如果任何模式匹配,则返回1。它使用[hyperscan](https://github.com/intel/hyperscan)库。对于在字符串中搜索子字符串的模式,最好使用«multisearchany»,因为它更高效。 +与`match`相同,但如果所有正则表达式都不匹配,则返回0;如果任何模式匹配,则返回1。它使用[超扫描](https://github.com/intel/hyperscan)库。对于在字符串中搜索子字符串的模式,最好使用«multisearchany»,因为它更高效。 !!! note "注意" 任何`haystack`字符串的长度必须小于232\字节,否则抛出异常。这种限制是因为hyperscan API而产生的。 -## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} +## multiMatchAnyIndex(大海捞针,\[模式1,模式2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} 与`multiMatchAny`相同,但返回与haystack匹配的任何内容的索引位置。 -## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAny(干草堆,距离,\[模式1,模式2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} 与`multiMatchAny`相同,但如果在haystack能够查找到任何模式匹配能够在指定的[编辑距离](https://en.wikipedia.org/wiki/Edit_distance)内进行匹配,则返回1。此功能也处于实验模式,可能非常慢。有关更多信息,请参阅[hyperscan文档](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching)。 -## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} +## multiFuzzyMatchAnyIndex(大海捞针,距离,\[模式1,模式2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} 与`multiFuzzyMatchAny`相同,但返回匹配项的匹配能容的索引位置。 @@ -78,15 +79,15 @@ !!! note "注意" 如要关闭所有hyperscan函数的使用,请设置`SET allow_hyperscan = 0;`。 -## extract(haystack, pattern) {#extracthaystack-pattern} +## 提取(大海捞针,图案) {#extracthaystack-pattern} -使用正则表达式截取字符串。如果‘haystack’与‘pattern’不匹配,则返回空字符串。如果正则表达式中不包含子模式,它将获取与整个正则表达式匹配的子串。否则,它将获取与第一个子模式匹配的子串。 +使用正则表达式截取字符串。如果'haystack'与'pattern'不匹配,则返回空字符串。如果正则表达式中不包含子模式,它将获取与整个正则表达式匹配的子串。否则,它将获取与第一个子模式匹配的子串。 -## extractAll(haystack, pattern) {#extractallhaystack-pattern} +## extractAll(大海捞针,图案) {#extractallhaystack-pattern} -使用正则表达式提取字符串的所有片段。如果‘haystack’与‘pattern’正则表达式不匹配,则返回一个空字符串。否则返回所有与正则表达式匹配的字符串数组。通常,行为与‘extract’函数相同(它采用第一个子模式,如果没有子模式,则采用整个表达式)。 +使用正则表达式提取字符串的所有片段。如果'haystack'与'pattern'正则表达式不匹配,则返回一个空字符串。否则返回所有与正则表达式匹配的字符串数组。通常,行为与'extract'函数相同(它采用第一个子模式,如果没有子模式,则采用整个表达式)。 -## like(haystack, pattern), haystack LIKE pattern operator {#likehaystack-pattern-haystack-like-pattern-operator} +## 像(干草堆,模式),干草堆像模式运算符 {#likehaystack-pattern-haystack-like-pattern-operator} 检查字符串是否与简单正则表达式匹配。 正则表达式可以包含的元符号有`%`和`_`。 @@ -98,19 +99,19 @@ 可以使用反斜杠(`\`)来对元符号进行转义。请参阅«match»函数说明中有关转义的说明。 对于像`%needle%`这样的正则表达式,改函数与`position`函数一样快。 -对于其他正则表达式,函数与‘match’函数相同。 +对于其他正则表达式,函数与'match'函数相同。 -## notLike(haystack, pattern), haystack NOT LIKE pattern operator {#notlikehaystack-pattern-haystack-not-like-pattern-operator} +## 不喜欢(干草堆,模式),干草堆不喜欢模式运算符 {#notlikehaystack-pattern-haystack-not-like-pattern-operator} -与‘like’函数返回相反的结果。 +与'like'函数返回相反的结果。 -## ngramDistance(haystack, needle) {#ngramdistancehaystack-needle} +## 大海捞针) {#ngramdistancehaystack-needle} 基于4-gram计算`haystack`和`needle`之间的距离:计算两个4-gram集合之间的对称差异,并用它们的基数和对其进行归一化。返回0到1之间的任何浮点数 – 越接近0则表示越多的字符串彼此相似。如果常量的`needle`或`haystack`超过32KB,函数将抛出异常。如果非常量的`haystack`或`needle`字符串超过32Kb,则距离始终为1。 对于不区分大小写的搜索或/和UTF-8格式,使用函数`ngramDistanceCaseInsensitive,ngramDistanceUTF8,ngramDistanceCaseInsensitiveUTF8`。 -## ngramSearch(haystack, needle) {#ngramsearchhaystack-needle} +## ツ暗ェツ氾环催ツ団ツ法ツ人) {#ngramsearchhaystack-needle} 与`ngramDistance`相同,但计算`needle`和`haystack`之间的非对称差异——`needle`的n-gram减去`needle`归一化n-gram。可用于模糊字符串搜索。 diff --git a/docs/zh/query_language/functions/type_conversion_functions.md b/docs/zh/sql_reference/functions/type_conversion_functions.md similarity index 67% rename from docs/zh/query_language/functions/type_conversion_functions.md rename to docs/zh/sql_reference/functions/type_conversion_functions.md index a9c97589c9f..56375eabc07 100644 --- a/docs/zh/query_language/functions/type_conversion_functions.md +++ b/docs/zh/sql_reference/functions/type_conversion_functions.md @@ -1,16 +1,17 @@ + # 类型转换函数 {#lei-xing-zhuan-huan-han-shu} -## toUInt8, toUInt16, toUInt32, toUInt64 {#touint8-touint16-touint32-touint64} +## toUInt8,toUInt16,toUInt32,toUInt64 {#touint8-touint16-touint32-touint64} -## toInt8, toInt16, toInt32, toInt64 {#toint8-toint16-toint32-toint64} +## toInt8,toInt16,toInt32,toInt64 {#toint8-toint16-toint32-toint64} -## toFloat32, toFloat64 {#tofloat32-tofloat64} +## toFloat32,toFloat64 {#tofloat32-tofloat64} -## toDate, toDateTime {#todate-todatetime} +## 今天,今天 {#todate-todatetime} -## toUInt8OrZero, toUInt16OrZero, toUInt32OrZero, toUInt64OrZero, toInt8OrZero, toInt16OrZero, toInt32OrZero, toInt64OrZero, toFloat32OrZero, toFloat64OrZero, toDateOrZero, toDateTimeOrZero {#touint8orzero-touint16orzero-touint32orzero-touint64orzero-toint8orzero-toint16orzero-toint32orzero-toint64orzero-tofloat32orzero-tofloat64orzero-todateorzero-todatetimeorzero} +## toUInt8OrZero,toUInt16OrZero,toUInt32OrZero,toUInt64OrZero,toInt8OrZero,toInt16OrZero,toInt32OrZero,toInt64OrZero,toFloat32OrZero,toFloat64OrZero,toDateOrZero,toDateTimeOrZero {#touint8orzero-touint16orzero-touint32orzero-touint64orzero-toint8orzero-toint16orzero-toint32orzero-toint64orzero-tofloat32orzero-tofloat64orzero-todateorzero-todatetimeorzero} -## toUInt8OrNull, toUInt16OrNull, toUInt32OrNull, toUInt64OrNull, toInt8OrNull, toInt16OrNull, toInt32OrNull, toInt64OrNull, toFloat32OrNull, toFloat64OrNull, toDateOrNull, toDateTimeOrNull {#touint8ornull-touint16ornull-touint32ornull-touint64ornull-toint8ornull-toint16ornull-toint32ornull-toint64ornull-tofloat32ornull-tofloat64ornull-todateornull-todatetimeornull} +## toUInt8OrNull,toUInt16OrNull,toUInt32OrNull,toUInt64OrNull,toInt8OrNull,toInt16OrNull,toInt32OrNull,toInt64OrNull,toFloat32OrNull,toFloat64OrNull,toDateOrNull,toDateTimeOrNull {#touint8ornull-touint16ornull-touint32ornull-touint64ornull-toint8ornull-toint16ornull-toint32ornull-toint64ornull-tofloat32ornull-tofloat64ornull-todateornull-todatetimeornull} ## toString {#tostring} @@ -27,7 +28,7 @@ toDate/toDateTime函数的日期和日期时间格式定义如下: YYYY-MM-DD YYYY-MM-DD hh:mm:ss -例外的是,如果将UInt32、Int32、UInt64或Int64类型的数值转换为Date类型,并且其对应的值大于等于65536,则该数值将被解析成unix时间戳(而不是对应的天数)。这意味着允许写入‘toDate(unix\_timestamp)’这种常见情况,否则这将是错误的,并且需要便携更加繁琐的‘toDate(toDateTime(unix\_timestamp))’。 +例外的是,如果将UInt32、Int32、UInt64或Int64类型的数值转换为Date类型,并且其对应的值大于等于65536,则该数值将被解析成unix时间戳(而不是对应的天数)。这意味着允许写入'toDate(unix\_timestamp)'这种常见情况,否则这将是错误的,并且需要便携更加繁琐的'toDate(toDateTime(unix\_timestamp))'。 Date与DateTime之间的转换以更为自然的方式进行:通过添加空的time或删除time。 @@ -47,11 +48,11 @@ SELECT 另请参阅`toUnixTimestamp`函数。 -## toDecimal32(value, S), toDecimal64(value, S), toDecimal128(value, S) {#todecimal32value-s-todecimal64value-s-todecimal128value-s} +## toDecimal32(value,S),toDecimal64(value,S),toDecimal128(value,S) {#todecimal32value-s-todecimal64value-s-todecimal128value-s} -将`value`转换为精度为`S`的[Decimal](../../data_types/decimal.md)。`value`可以是数字或字符串。`S`参数为指定的小数位数。 +将`value`转换为精度为`S`的[十进制](../../sql_reference/functions/type_conversion_functions.md)。`value`可以是数字或字符串。`S`参数为指定的小数位数。 -## toFixedString(s, N) {#tofixedstrings-n} +## toFixedString(s,N) {#tofixedstrings-n} 将String类型的参数转换为FixedString(N)类型的值(具有固定长度N的字符串)。N必须是一个常量。 如果字符串的字节数少于N,则向右填充空字节。如果字符串的字节数多于N,则抛出异常。 @@ -78,17 +79,17 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut │ foo\0bar\0 │ foo │ └────────────┴───────┘ -## reinterpretAsUInt8, reinterpretAsUInt16, reinterpretAsUInt32, reinterpretAsUInt64 {#reinterpretasuint8-reinterpretasuint16-reinterpretasuint32-reinterpretasuint64} +## reinterpretAsUInt8,reinterpretAsUInt16,reinterpretAsUInt32,reinterpretAsUInt64 {#reinterpretasuint8-reinterpretasuint16-reinterpretasuint32-reinterpretasuint64} -## reinterpretAsInt8, reinterpretAsInt16, reinterpretAsInt32, reinterpretAsInt64 {#reinterpretasint8-reinterpretasint16-reinterpretasint32-reinterpretasint64} +## reinterpretAsInt8,reinterpretAsInt16,reinterpretAsInt32,reinterpretAsInt64 {#reinterpretasint8-reinterpretasint16-reinterpretasint32-reinterpretasint64} -## reinterpretAsFloat32, reinterpretAsFloat64 {#reinterpretasfloat32-reinterpretasfloat64} +## reinterpretAsFloat32,reinterpretAsFloat64 {#reinterpretasfloat32-reinterpretasfloat64} -## reinterpretAsDate, reinterpretAsDateTime {#reinterpretasdate-reinterpretasdatetime} +## 重新解释日期,重新解释日期时间 {#reinterpretasdate-reinterpretasdatetime} 这些函数接受一个字符串,并将放在字符串开头的字节解释为主机顺序中的数字(little endian)。如果字符串不够长,则函数就像使用必要数量的空字节填充字符串一样。如果字符串比需要的长,则忽略额外的字节。Date被解释为Unix时间戳的天数,DateTime被解释为Unix时间戳。 -## reinterpretAsString {#reinterpretasstring} +## 重新解释字符串 {#reinterpretasstring} 此函数接受数字、Date或DateTime,并返回一个字符串,其中包含表示主机顺序(小端)的相应值的字节。从末尾删除空字节。例如,UInt32类型值255是一个字节长的字符串。 @@ -96,9 +97,9 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut 此函数接受数字、Date或DateTime,并返回包含表示主机顺序(小端)的相应值的字节的FixedString。从末尾删除空字节。例如,UInt32类型值255是一个长度为一个字节的FixedString。 -## CAST(x, t) {#type_conversion_function-cast} +## 演员(x,t) {#type_conversion_function-cast} -将‘x’转换为‘t’数据类型。还支持语法CAST(x AS t) +将'x'转换为't'数据类型。还支持语法CAST(x AS t) 示例: @@ -117,7 +118,7 @@ SELECT 将参数转换为FixedString(N),仅适用于String或FixedString(N)类型的参数。 -支持将数据转换为[Nullable](../../data_types/nullable.md)。例如: +支持将数据转换为[可为空](../../sql_reference/functions/type_conversion_functions.md)。例如: SELECT toTypeName(x) FROM t_null @@ -133,7 +134,7 @@ SELECT │ Nullable(UInt16) │ └─────────────────────────────────────────┘ -## toIntervalYear, toIntervalQuarter, toIntervalMonth, toIntervalWeek, toIntervalDay, toIntervalHour, toIntervalMinute, toIntervalSecond {#function-tointerval} +## 每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每天每 {#function-tointerval} 将数字类型参数转换为Interval类型(时间区间)。 Interval类型实际上是非常有用的,您可以使用此类型的数据直接与Date或DateTime执行算术运算。同时,ClickHouse为Interval类型数据的声明提供了更方便的语法。例如: @@ -152,18 +153,18 @@ SELECT │ 2019-01-08 │ 2019-01-08 │ └───────────────────────────┴──────────────────────────────┘ -## parseDateTimeBestEffort {#type_conversion_functions-parsedatetimebesteffort} +## parsedatetimebestefort {#type_conversion_functions-parsedatetimebesteffort} 将数字类型参数解析为Date或DateTime类型。 与toDate和toDateTime不同,parseDateTimeBestEffort可以进行更复杂的日期格式。 有关详细信息,请参阅链接:[复杂日期格式](https://xkcd.com/1179/)。 -## parseDateTimeBestEffortOrNull {#parsedatetimebesteffortornull} +## parsedatetimebestefortornull {#parsedatetimebesteffortornull} -与[parseDateTimeBestEffort](#type_conversion_functions-parsedatetimebesteffort)相同,但它遇到无法处理的日期格式时返回null。 +与[parsedatetimebestefort](#type_conversion_functions-parsedatetimebesteffort)相同,但它遇到无法处理的日期格式时返回null。 -## parseDateTimeBestEffortOrZero {#parsedatetimebesteffortorzero} +## parsedatetimebestefortorzero {#parsedatetimebesteffortorzero} -与[parseDateTimeBestEffort](#type_conversion_functions-parsedatetimebesteffort)相同,但它遇到无法处理的日期格式时返回零Date或零DateTime。 +与[parsedatetimebestefort](#type_conversion_functions-parsedatetimebesteffort)相同,但它遇到无法处理的日期格式时返回零Date或零DateTime。 [来源文章](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) diff --git a/docs/zh/query_language/functions/url_functions.md b/docs/zh/sql_reference/functions/url_functions.md similarity index 69% rename from docs/zh/query_language/functions/url_functions.md rename to docs/zh/sql_reference/functions/url_functions.md index df8b1cb69c4..53295221e51 100644 --- a/docs/zh/query_language/functions/url_functions.md +++ b/docs/zh/sql_reference/functions/url_functions.md @@ -1,3 +1,4 @@ + # URL函数 {#urlhan-shu} 所有这些功能都不遵循RFC。它们被最大程度简化以提高性能。 @@ -6,25 +7,25 @@ 如果URL中没有要截取的内容则返回空字符串。 -### protocol {#protocol} +### 协议 {#protocol} 返回URL的协议。例如: http、ftp、mailto、magnet… -### domain {#domain} +### 域 {#domain} 获取域名。 -### domainWithoutWWW {#domainwithoutwww} +### domainwithoutww {#domainwithoutwww} -返回域名并删除第一个‘www.’。 +返回域名并删除第一个'www.'。 ### topLevelDomain {#topleveldomain} 返回顶级域名。例如:.ru。 -### firstSignificantSubdomain {#firstsignificantsubdomain} +### 第一重要的元素分区域 {#firstsignificantsubdomain} -返回«第一个有效子域名»。这并不是一个标准概念,仅用于Yandex.Metrica。如果顶级域名为‘com’,‘net’,‘org’或者‘co’则第一个有效子域名为二级域名。否则则返回三级域名。例如,irstSignificantSubdomain (’https://news.yandex.ru/‘) = ’yandex’, firstSignificantSubdomain (‘https://news.yandex.com.tr/’) = ‘yandex’。一些实现细节在未来可能会进行改变。 +返回«第一个有效子域名»。这并不是一个标准概念,仅用于Yandex.Metrica。如果顶级域名为'com',‘net’,‘org’或者‘co’则第一个有效子域名为二级域名。否则则返回三级域名。例如,irstSignificantSubdomain (’https://news.yandex.ru/‘) = ’yandex’, firstSignificantSubdomain (‘https://news.yandex.com.tr/’) = ‘yandex’。一些实现细节在未来可能会进行改变。 ### cutToFirstSignificantSubdomain {#cuttofirstsignificantsubdomain} @@ -32,7 +33,7 @@ 例如, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. -### path {#path} +### 路径 {#path} 返回URL路径。例如:`/top/news.html`,不包含请求参数。 @@ -40,21 +41,21 @@ 与上面相同,但包括请求参数和fragment。例如:/top/news.html?page=2\#comments -### queryString {#querystring} +### 查询字符串 {#querystring} 返回请求参数。例如:page=1&lr=213。请求参数不包含问号已经\# 以及\# 之后所有的内容。 -### fragment {#fragment} +### 片段 {#fragment} 返回URL的fragment标识。fragment不包含\#。 -### queryStringAndFragment {#querystringandfragment} +### querystring andfragment {#querystringandfragment} 返回请求参数和fragment标识。例如:page=1\#29390。 -### extractURLParameter(URL, name) {#extracturlparameterurl-name} +### extractURLParameter(URL,name) {#extracturlparameterurl-name} -返回URL请求参数中名称为‘name’的参数。如果不存在则返回一个空字符串。如果存在多个匹配项则返回第一个相匹配的。此函数假设参数名称与参数值在url中的编码方式相同。 +返回URL请求参数中名称为'name'的参数。如果不存在则返回一个空字符串。如果存在多个匹配项则返回第一个相匹配的。此函数假设参数名称与参数值在url中的编码方式相同。 ### extractURLParameters(URL) {#extracturlparametersurl} @@ -68,7 +69,7 @@ 返回一个数组,其中包含以/切割的URL的所有内容。?将被包含在URL路径以及请求参数中。连续的分割符号被记为一个。 -### URLPathHierarchy(URL) {#urlpathhierarchyurl} +### Urlpathhierarchy(URL) {#urlpathhierarchyurl} 与上面相同,但结果不包含协议和host部分。 /element(root)不包括在内。该函数用于在Yandex.Metric中实现导出URL的树形结构。 @@ -97,7 +98,7 @@ SELECT decodeURLComponent('http://127.0.0.1:8123/?query=SELECT%201%3B') AS Decod ### cutWWW {#cutwww} -删除开始的第一个’www.’。 +删除开始的第一个'www.'。 ### cutQueryString {#cutquerystring} @@ -107,12 +108,12 @@ SELECT decodeURLComponent('http://127.0.0.1:8123/?query=SELECT%201%3B') AS Decod 删除fragment标识。\#同样也会被删除。 -### cutQueryStringAndFragment {#cutquerystringandfragment} +### cutquerystring andfragment {#cutquerystringandfragment} 删除请求参数以及fragment标识。问号以及\#也会被删除。 -### cutURLParameter(URL, name) {#cuturlparameterurl-name} +### cutURLParameter(URL,name) {#cuturlparameterurl-name} -删除URL中名称为‘name’的参数。改函数假设参数名称以及参数值经过URL相同的编码。 +删除URL中名称为'name'的参数。改函数假设参数名称以及参数值经过URL相同的编码。 [来源文章](https://clickhouse.tech/docs/en/query_language/functions/url_functions/) diff --git a/docs/zh/query_language/functions/uuid_functions.md b/docs/zh/sql_reference/functions/uuid_functions.md similarity index 87% rename from docs/zh/query_language/functions/uuid_functions.md rename to docs/zh/sql_reference/functions/uuid_functions.md index 2cb2ff30872..306a55f08a0 100644 --- a/docs/zh/query_language/functions/uuid_functions.md +++ b/docs/zh/sql_reference/functions/uuid_functions.md @@ -1,8 +1,9 @@ + # UUID函数 {#uuidhan-shu} 下面列出了所有UUID的相关函数 -## generateUUIDv4 {#uuid-function-generate} +## generateuidv4 {#uuid-function-generate} 生成一个UUID([版本4](https://tools.ietf.org/html/rfc4122#section-4.4))。 @@ -30,7 +31,7 @@ UUID类型的值。 └──────────────────────────────────────┘ ``` -## toUUID (x) {#touuid-x} +## toUUID(x) {#touuid-x} 将String类型的值转换为UUID类型的值。 @@ -54,7 +55,7 @@ UUID类型的值 ## UUIDStringToNum {#uuidstringtonum} -接受一个String类型的值,其中包含36个字符且格式为`xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`,将其转换为UUID的数值并以[FixedString(16)](../../data_types/fixedstring.md)将其返回。 +接受一个String类型的值,其中包含36个字符且格式为`xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`,将其转换为UUID的数值并以[固定字符串(16)](../../sql_reference/functions/uuid_functions.md)将其返回。 ``` sql UUIDStringToNum(String) @@ -62,7 +63,7 @@ UUIDStringToNum(String) **返回值** -FixedString(16) +固定字符串(16) **使用示例** @@ -78,7 +79,7 @@ FixedString(16) ## UUIDNumToString {#uuidnumtostring} -接受一个[FixedString(16)](../../data_types/fixedstring.md)类型的值,返回其对应的String表现形式。 +接受一个[固定字符串(16)](../../sql_reference/functions/uuid_functions.md)类型的值,返回其对应的String表现形式。 ``` sql UUIDNumToString(FixedString(16)) @@ -86,7 +87,7 @@ UUIDNumToString(FixedString(16)) **返回值** -String. +字符串。 **使用示例** @@ -102,6 +103,6 @@ SELECT ## 另请参阅 {#ling-qing-can-yue} -- [dictGetUUID](ext_dict_functions.md) +- [dictgetuid](ext_dict_functions.md) [来源文章](https://clickhouse.tech/docs/en/query_language/functions/uuid_function/) diff --git a/docs/zh/query_language/functions/ym_dict_functions.md b/docs/zh/sql_reference/functions/ym_dict_functions.md similarity index 56% rename from docs/zh/query_language/functions/ym_dict_functions.md rename to docs/zh/sql_reference/functions/ym_dict_functions.md index 6d03ae228e8..87492ec9d12 100644 --- a/docs/zh/query_language/functions/ym_dict_functions.md +++ b/docs/zh/sql_reference/functions/ym_dict_functions.md @@ -1,36 +1,37 @@ -# Functions for working with Yandex.Metrica dictionaries {#functions-for-working-with-yandex-metrica-dictionaries} -In order for the functions below to work, the server config must specify the paths and addresses for getting all the Yandex.Metrica dictionaries. The dictionaries are loaded at the first call of any of these functions. If the reference lists can’t be loaded, an exception is thrown. +# 功能与Yandex的工作。梅特里卡词典 {#functions-for-working-with-yandex-metrica-dictionaries} + +为了使下面的功能正常工作,服务器配置必须指定获取所有Yandex的路径和地址。梅特里卡字典. 字典在任何这些函数的第一次调用时加载。 如果无法加载引用列表,则会引发异常。 For information about creating reference lists, see the section «Dictionaries». -## Multiple geobases {#multiple-geobases} +## 多个地理基 {#multiple-geobases} -ClickHouse supports working with multiple alternative geobases (regional hierarchies) simultaneously, in order to support various perspectives on which countries certain regions belong to. +ClickHouse支持同时使用多个备选地理基(区域层次结构),以支持某些地区所属国家的各种观点。 -The ‘clickhouse-server’ config specifies the file with the regional hierarchy::`/opt/geo/regions_hierarchy.txt` +该 ‘clickhouse-server’ config指定具有区域层次结构的文件::`/opt/geo/regions_hierarchy.txt` -Besides this file, it also searches for files nearby that have the \_ symbol and any suffix appended to the name (before the file extension). -For example, it will also find the file `/opt/geo/regions_hierarchy_ua.txt`, if present. +除了这个文件,它还搜索附近有\_符号和任何后缀附加到名称(文件扩展名之前)的文件。 +例如,它还会找到该文件 `/opt/geo/regions_hierarchy_ua.txt`,如果存在。 -`ua` is called the dictionary key. For a dictionary without a suffix, the key is an empty string. +`ua` 被称为字典键。 对于没有后缀的字典,键是空字符串。 -All the dictionaries are re-loaded in runtime (once every certain number of seconds, as defined in the builtin\_dictionaries\_reload\_interval config parameter, or once an hour by default). However, the list of available dictionaries is defined one time, when the server starts. +所有字典都在运行时重新加载(每隔一定数量的秒重新加载一次,如builtin\_dictionaries\_reload\_interval config参数中定义,或默认情况下每小时一次)。 但是,可用字典列表在服务器启动时定义一次。 All functions for working with regions have an optional argument at the end – the dictionary key. It is referred to as the geobase. -Example: +示例: regionToCountry(RegionID) – Uses the default dictionary: /opt/geo/regions_hierarchy.txt regionToCountry(RegionID, '') – Uses the default dictionary: /opt/geo/regions_hierarchy.txt regionToCountry(RegionID, 'ua') – Uses the dictionary for the 'ua' key: /opt/geo/regions_hierarchy_ua.txt -### regionToCity(id\[, geobase\]) {#regiontocityid-geobase} +### ツ环板(ョツ嘉ッツ偲青regionシツ氾カツ鉄ツ工ツ渉\]) {#regiontocityid-geobase} Accepts a UInt32 number – the region ID from the Yandex geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0. -### regionToArea(id\[, geobase\]) {#regiontoareaid-geobase} +### 虏茅驴麓卤戮碌禄路戮鲁拢\]) {#regiontoareaid-geobase} -Converts a region to an area (type 5 in the geobase). In every other way, this function is the same as ‘regionToCity’. +将区域转换为区域(地理数据库中的类型5)。 在所有其他方式,这个功能是一样的 ‘regionToCity’. ``` sql SELECT DISTINCT regionToName(regionToArea(toUInt32(number), 'ua')) @@ -56,9 +57,9 @@ LIMIT 15 │ Tula region │ └──────────────────────────────────────────────────────┘ -### regionToDistrict(id\[, geobase\]) {#regiontodistrictid-geobase} +### regionToDistrict(id\[,geobase\]) {#regiontodistrictid-geobase} -Converts a region to a federal district (type 4 in the geobase). In every other way, this function is the same as ‘regionToCity’. +将区域转换为联邦区(地理数据库中的类型4)。 在所有其他方式,这个功能是一样的 ‘regionToCity’. ``` sql SELECT DISTINCT regionToName(regionToDistrict(toUInt32(number), 'ua')) @@ -84,37 +85,37 @@ LIMIT 15 │ Federation of Bosnia and Herzegovina │ └──────────────────────────────────────────────────────────┘ -### regionToCountry(id\[, geobase\]) {#regiontocountryid-geobase} +### 虏茅驴麓卤戮碌禄路戮鲁拢(陆毛隆隆(803)888-8325\]) {#regiontocountryid-geobase} -Converts a region to a country. In every other way, this function is the same as ‘regionToCity’. -Example: `regionToCountry(toUInt32(213)) = 225` converts Moscow (213) to Russia (225). +将区域转换为国家。 在所有其他方式,这个功能是一样的 ‘regionToCity’. +示例: `regionToCountry(toUInt32(213)) = 225` 转换莫斯科(213)到俄罗斯(225)。 -### regionToContinent(id\[, geobase\]) {#regiontocontinentid-geobase} +### 掳胫((禄脢鹿脷露胫鲁隆鹿((酶-11-16""\[脪陆,ase\]) {#regiontocontinentid-geobase} -Converts a region to a continent. In every other way, this function is the same as ‘regionToCity’. -Example: `regionToContinent(toUInt32(213)) = 10001` converts Moscow (213) to Eurasia (10001). +将区域转换为大陆。 在所有其他方式,这个功能是一样的 ‘regionToCity’. +示例: `regionToContinent(toUInt32(213)) = 10001` 将莫斯科(213)转换为欧亚大陆(10001)。 -### regionToPopulation(id\[, geobase\]) {#regiontopopulationid-geobase} +### ツ环板(ョツ嘉ッツ偲青regionャツ静ャツ青サツ催ャツ渉\]) {#regiontopopulationid-geobase} -Gets the population for a region. +获取区域的人口。 The population can be recorded in files with the geobase. See the section «External dictionaries». -If the population is not recorded for the region, it returns 0. -In the Yandex geobase, the population might be recorded for child regions, but not for parent regions. +如果没有为该区域记录人口,则返回0。 +在Yandex地理数据库中,可能会为子区域记录人口,但不会为父区域记录人口。 -### regionIn(lhs, rhs\[, geobase\]) {#regioninlhs-rhs-geobase} +### regionIn(lhs,rhs\[,地理数据库\]) {#regioninlhs-rhs-geobase} -Checks whether a ‘lhs’ region belongs to a ‘rhs’ region. Returns a UInt8 number equal to 1 if it belongs, or 0 if it doesn’t belong. +检查是否 ‘lhs’ 属于一个区域 ‘rhs’ 区域。 如果属于UInt8,则返回等于1的数字,如果不属于则返回0。 The relationship is reflexive – any region also belongs to itself. -### regionHierarchy(id\[, geobase\]) {#regionhierarchyid-geobase} +### ツ暗ェツ氾环催ツ団ツ法ツ人\]) {#regionhierarchyid-geobase} Accepts a UInt32 number – the region ID from the Yandex geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain. -Example: `regionHierarchy(toUInt32(213)) = [213,1,3,225,10001,10000]`. +示例: `regionHierarchy(toUInt32(213)) = [213,1,3,225,10001,10000]`. -### regionToName(id\[, lang\]) {#regiontonameid-lang} +### 地区名称(id\[,郎\]) {#regiontonameid-lang} -Accepts a UInt32 number – the region ID from the Yandex geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ‘ru’ is used. If the language is not supported, an exception is thrown. Returns a string – the name of the region in the corresponding language. If the region with the specified ID doesn’t exist, an empty string is returned. +Accepts a UInt32 number – the region ID from the Yandex geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ‘ru’ is used. If the language is not supported, an exception is thrown. Returns a string – the name of the region in the corresponding language. If the region with the specified ID doesn't exist, an empty string is returned. -`ua` and `uk` both mean Ukrainian. +`ua` 和 `uk` 都意味着乌克兰。 -[Original article](https://clickhouse.tech/docs/en/query_language/functions/ym_dict_functions/) +[原始文章](https://clickhouse.tech/docs/en/query_language/functions/ym_dict_functions/) diff --git a/docs/zh/sql_reference/index.md b/docs/zh/sql_reference/index.md new file mode 100644 index 00000000000..aed704442ab --- /dev/null +++ b/docs/zh/sql_reference/index.md @@ -0,0 +1,18 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "SQL\u53C2\u8003" +toc_hidden: true +toc_priority: 28 +toc_title: "\u9690\u85CF" +--- + +# SQL参考 {#sql-reference} + +- [SELECT](statements/select.md) +- [INSERT INTO](statements/insert_into.md) +- [CREATE](statements/create.md) +- [ALTER](statements/alter.md#query_language_queries_alter) +- [其他类型的查询](statements/misc.md) + +[原始文章](https://clickhouse.tech/docs/en/query_language/) diff --git a/docs/zh/query_language/operators.md b/docs/zh/sql_reference/operators.md similarity index 80% rename from docs/zh/query_language/operators.md rename to docs/zh/sql_reference/operators.md index 2e73f85d5ec..fb5c7d0ee38 100644 --- a/docs/zh/query_language/operators.md +++ b/docs/zh/sql_reference/operators.md @@ -1,3 +1,4 @@ + # 操作符 {#cao-zuo-fu} 所有的操作符(运算符)都会在查询时依据他们的优先级及其结合顺序在被解析时转换为对应的函数。下面按优先级从高到低列出各组运算符及其对应的函数: @@ -52,7 +53,7 @@ ## 集合关系运算符 {#ji-he-guan-xi-yun-suan-fu} -*详见此节 [IN 相关操作符](select.md#select-in-operators) 。* +*详见此节 [IN 相关操作符](statements/select.md#select-in-operators) 。* `a IN ...` – 对应函数 `in(a, b)` @@ -80,9 +81,9 @@ 注意: -条件运算符会先计算表达式b和表达式c的值,再根据表达式a的真假,返回相应的值。如果表达式b和表达式c是 [arrayJoin()](functions/array_join.md#functions_arrayjoin) 函数,则不管表达式a是真是假,每行都会被复制展开。 +条件运算符会先计算表达式b和表达式c的值,再根据表达式a的真假,返回相应的值。如果表达式b和表达式c是 [arrayJoin()](../sql_reference/functions/array_join.md#functions_arrayjoin) 函数,则不管表达式a是真是假,每行都会被复制展开。 -## Operators for Working with Dates and Times {#operators-datetime} +## 使用日期和时间的操作员 {#operators-datetime} ### EXTRACT {#operator-extract} @@ -90,9 +91,9 @@ EXTRACT(part FROM date); ``` -Extracts a part from a given date. For example, you can retrieve a month from a given date, or a second from a time. +从给定日期中提取部件。 例如,您可以从给定日期检索一个月,或从时间检索一秒钟。 -The `part` parameter specifies which part of the date to retrieve. The following values are available: +该 `part` 参数指定要检索的日期部分。 以下值可用: - `DAY` — The day of the month. Possible values: 1–31. - `MONTH` — The number of a month. Possible values: 1–12. @@ -101,11 +102,11 @@ The `part` parameter specifies which part of the date to retrieve. The following - `MINUTE` — The minute. Possible values: 0–59. - `HOUR` — The hour. Possible values: 0–23. -The `part` parameter is case-insensitive. +该 `part` 参数不区分大小写。 -The `date` parameter specifies the date or the time to process. Either [Date](../data_types/date.md) or [DateTime](../data_types/datetime.md) type is supported. +该 `date` 参数指定要处理的日期或时间。 无论是 [日期](../sql_reference/data_types/date.md) 或 [日期时间](../sql_reference/data_types/datetime.md) 支持类型。 -Examples: +例: ``` sql SELECT EXTRACT(DAY FROM toDate('2017-06-15')); @@ -113,7 +114,7 @@ SELECT EXTRACT(MONTH FROM toDate('2017-06-15')); SELECT EXTRACT(YEAR FROM toDate('2017-06-15')); ``` -In the following example we create a table and insert into it a value with the `DateTime` type. +在下面的例子中,我们创建一个表,并在其中插入一个值 `DateTime` 类型。 ``` sql CREATE TABLE test.Orders @@ -146,13 +147,13 @@ FROM test.Orders; └───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘ ``` -You can see more examples in [tests](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00619_extract.sql). +你可以看到更多的例子 [测试](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00619_extract.sql). ### INTERVAL {#operator-interval} -Creates an [Interval](../data_types/special_data_types/interval.md)-type value that should be used in arithmetical operations with [Date](../data_types/date.md) and [DateTime](../data_types/datetime.md)-type values. +创建一个 [间隔](../sql_reference/operators.md)-应在算术运算中使用的类型值 [日期](../sql_reference/data_types/date.md) 和 [日期时间](../sql_reference/data_types/datetime.md)-类型值。 -Example: +示例: ``` sql SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR @@ -164,10 +165,10 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL └─────────────────────┴────────────────────────────────────────────────────────┘ ``` -**See Also** +**另请参阅** -- [Interval](../data_types/special_data_types/interval.md) data type -- [toInterval](functions/type_conversion_functions.md#function-tointerval) type convertion functions +- [间隔](../sql_reference/operators.md) 数据类型 +- [toInterval](../sql_reference/operators.md#function-tointerval) 类型转换函数 ## CASE条件表达式 {#operator_case} @@ -216,7 +217,7 @@ ClickHouse 支持 `IS NULL` 和 `IS NOT NULL` 。 ### IS NULL {#operator-is-null} -- 对于 [Nullable](../data_types/nullable.md) 类型的值, `IS NULL` 会返回: +- 对于 [可为空](../sql_reference/operators.md) 类型的值, `IS NULL` 会返回: - `1` 值为 `NULL` - `0` 否则 - 对于其他类型的值, `IS NULL` 总会返回 `0` @@ -239,7 +240,7 @@ WHERE isNull(y) ### IS NOT NULL {#is-not-null} -- 对于 [Nullable](../data_types/nullable.md) 类型的值, `IS NOT NULL` 会返回: +- 对于 [可为空](../sql_reference/operators.md) 类型的值, `IS NOT NULL` 会返回: - `0` 值为 `NULL` - `1` 否则 - 对于其他类型的值,`IS NOT NULL` 总会返回 `1` diff --git a/docs/zh/sql_reference/statements/alter.md b/docs/zh/sql_reference/statements/alter.md new file mode 100644 index 00000000000..ee8911edea2 --- /dev/null +++ b/docs/zh/sql_reference/statements/alter.md @@ -0,0 +1,505 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 36 +toc_title: ALTER +--- + +## ALTER {#query_language_queries_alter} + +该 `ALTER` 查询仅支持 `*MergeTree` 表,以及 `Merge`和`Distributed`. 查询有几个变体。 + +### 列操作 {#column-manipulations} + +更改表结构。 + +``` sql +ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN ... +``` + +在查询中,指定一个或多个逗号分隔操作的列表。 +每个操作都是对列的操作。 + +支持以下操作: + +- [ADD COLUMN](#alter_add-column) — Adds a new column to the table. +- [DROP COLUMN](#alter_drop-column) — Deletes the column. +- [CLEAR COLUMN](#alter_clear-column) — Resets column values. +- [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column. +- [MODIFY COLUMN](#alter_modify-column) — Changes column's type, default expression and TTL. + +下面详细描述这些动作。 + +#### ADD COLUMN {#alter_add-column} + +``` sql +ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after] +``` + +将一个新列添加到表中,并指定 `name`, `type`, [`codec`](create.md#codecs) 和 `default_expr` (请参阅部分 [默认表达式](create.md#create-default-values)). + +如果 `IF NOT EXISTS` 如果列已经存在,则查询不会返回错误。 如果您指定 `AFTER name_after` (另一列的名称),该列被添加在表列表中指定的一列之后。 否则,该列将添加到表的末尾。 请注意,没有办法将列添加到表的开头。 为了一系列的行动, `name_after` 可将该名称一栏,加入一个以前的行动。 + +添加列只是更改表结构,而不对数据执行任何操作。 数据不会出现在磁盘上后 `ALTER`. 如果从表中读取某一列的数据缺失,则将使用默认值填充该列(如果存在默认表达式,则执行默认表达式,或使用零或空字符串)。 合并数据部分后,该列将出现在磁盘上(请参阅 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md)). + +这种方法使我们能够完成 `ALTER` 即时查询,不增加旧数据量。 + +示例: + +``` sql +ALTER TABLE visits ADD COLUMN browser String AFTER user_id +``` + +#### DROP COLUMN {#alter_drop-column} + +``` sql +DROP COLUMN [IF EXISTS] name +``` + +删除具有名称的列 `name`. 如果 `IF EXISTS` 如果指定了子句,如果该列不存在,则查询不会返回错误。 + +从文件系统中删除数据。 由于这将删除整个文件,查询几乎立即完成。 + +示例: + +``` sql +ALTER TABLE visits DROP COLUMN browser +``` + +#### CLEAR COLUMN {#alter_clear-column} + +``` sql +CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name +``` + +重置指定分区的列中的所有数据。 了解有关设置分区名称的详细信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). + +如果 `IF EXISTS` 如果指定了子句,如果该列不存在,则查询不会返回错误。 + +示例: + +``` sql +ALTER TABLE visits CLEAR COLUMN browser IN PARTITION tuple() +``` + +#### COMMENT COLUMN {#alter_comment-column} + +``` sql +COMMENT COLUMN [IF EXISTS] name 'comment' +``` + +向列添加注释。 如果 `IF EXISTS` 如果指定了子句,如果该列不存在,则查询不会返回错误。 + +每列可以有一个注释。 如果列的注释已存在,则新注释将复盖以前的注释。 + +注释存储在 `comment_expression` 由返回的列 [DESCRIBE TABLE](misc.md#misc-describe-table) 查询。 + +示例: + +``` sql +ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for accessing the site.' +``` + +#### MODIFY COLUMN {#alter_modify-column} + +``` sql +MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] +``` + +此查询更改 `name` 列属性: + +- 类型 + +- 默认表达式 + +- TTL + + For examples of columns TTL modifying, see [Column TTL](../engines/table_engines/mergetree_family/mergetree.md#mergetree-column-ttl). + +如果 `IF EXISTS` 如果指定了子句,如果该列不存在,则查询不会返回错误。 + +更改类型时,值将被转换为 [toType](../../sql_reference/functions/type_conversion_functions.md) 函数被应用到它们。 如果仅更改默认表达式,则查询不会执行任何复杂的操作,并且几乎立即完成。 + +示例: + +``` sql +ALTER TABLE visits MODIFY COLUMN browser Array(String) +``` + +Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time. + +有几个处理阶段: + +- 准备具有修改数据的临时(新)文件。 +- 重命名旧文件。 +- 将临时(新)文件重命名为旧名称。 +- 删除旧文件。 + +只有第一阶段需要时间。 如果在此阶段出现故障,则不会更改数据。 +如果在其中一个连续阶段中出现故障,可以手动恢复数据。 例外情况是,如果旧文件从文件系统中删除,但新文件的数据没有写入磁盘并丢失。 + +该 `ALTER` 复制更改列的查询。 这些指令保存在ZooKeeper中,然后每个副本应用它们。 全部 `ALTER` 查询以相同的顺序运行。 查询等待对其他副本完成适当的操作。 但是,更改复制表中的列的查询可能会中断,并且所有操作都将异步执行。 + +#### 更改查询限制 {#alter-query-limitations} + +该 `ALTER` query允许您在嵌套数据结构中创建和删除单独的元素(列),但不能创建整个嵌套数据结构。 要添加嵌套数据结构,可以添加名称如下的列 `name.nested_name` 和类型 `Array(T)`. 嵌套数据结构等效于名称在点之前具有相同前缀的多个数组列。 + +不支持删除主键或采样键中的列(在主键中使用的列 `ENGINE` 表达式)。 只有在此更改不会导致数据被修改时,才可以更改主键中包含的列的类型(例如,允许您向枚举添加值或更改类型 `DateTime` 到 `UInt32`). + +如果 `ALTER` 查询不足以使您需要的表更改,您可以创建一个新的表,使用 [INSERT SELECT](insert_into.md#insert_query_insert-select) 查询,然后使用切换表 [RENAME](misc.md#misc_operations-rename) 查询并删除旧表。 您可以使用 [ツ环板-ョツ嘉ッツ偲](../../operations/utilities/clickhouse-copier.md) 作为替代 `INSERT SELECT` 查询。 + +该 `ALTER` 查询阻止对表的所有读取和写入。 换句话说,如果长 `SELECT` 正在运行的时间 `ALTER` 查询,该 `ALTER` 查询将等待它完成。 同时,对同一个表的所有新查询将等待 `ALTER` 正在运行。 + +对于本身不存储数据的表(例如 `Merge` 和 `Distributed`), `ALTER` 只是改变了表结构,并且不改变从属表的结构。 例如,当运行ALTER时 `Distributed` 表,你还需要运行 `ALTER` 对于所有远程服务器上的表。 + +### 使用键表达式进行操作 {#manipulations-with-key-expressions} + +支持以下命令: + +``` sql +MODIFY ORDER BY new_expression +``` + +它只适用于在表 [`MergeTree`](../../engines/table_engines/mergetree_family/mergetree.md) 家庭(包括 +[复制](../../engines/table_engines/mergetree_family/replication.md) 表)。 该命令更改 +[排序键](../../engines/table_engines/mergetree_family/mergetree.md) 表 +到 `new_expression` (表达式或表达式元组)。 主键保持不变。 + +该命令是轻量级的,因为它只更改元数据。 要保持该数据部分的属性 +行按排序键表达式排序您不能添加包含现有列的表达式 +到排序键(仅由列添加 `ADD COLUMN` 命令在同一个 `ALTER` 查询)。 + +### 使用数据跳过索引进行操作 {#manipulations-with-data-skipping-indices} + +它只适用于在表 [`*MergeTree`](../../engines/table_engines/mergetree_family/mergetree.md) 家庭(包括 +[复制](../../engines/table_engines/mergetree_family/replication.md) 表)。 以下操作 +可用: + +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` -将索引描述添加到表元数据。 + +- `ALTER TABLE [db].name DROP INDEX name` -从表元数据中删除索引描述并从磁盘中删除索引文件。 + +这些命令是轻量级的,因为它们只更改元数据或删除文件。 +此外,它们被复制(通过ZooKeeper同步索引元数据)。 + +### 使用约束进行操作 {#manipulations-with-constraints} + +查看更多 [制约因素](create.md#constraints) + +可以使用以下语法添加或删除约束: + +``` sql +ALTER TABLE [db].name ADD CONSTRAINT constraint_name CHECK expression; +ALTER TABLE [db].name DROP CONSTRAINT constraint_name; +``` + +查询将从表中添加或删除有关约束的元数据,以便立即处理它们。 + +约束检查 *不会被执行* 在现有数据上,如果它被添加。 + +复制表上的所有更改都广播到ZooKeeper,因此将应用于其他副本。 + +### 操作与分区和零件 {#alter_manipulations-with-partitions} + +下面的操作与 [分区](../../engines/table_engines/mergetree_family/custom_partitioning_key.md) 可用: + +- [DETACH PARTITION](#alter_detach-partition) – Moves a partition to the `detached` 目录和忘记它。 +- [DROP PARTITION](#alter_drop-partition) – Deletes a partition. +- [ATTACH PART\|PARTITION](#alter_attach-partition) – Adds a part or partition from the `detached` 目录到表。 +- [REPLACE PARTITION](#alter_replace-partition) -将数据分区从一个表复制到另一个表。 +- [ATTACH PARTITION FROM](#alter_attach-partition-from) – Copies the data partition from one table to another and adds. +- [REPLACE PARTITION](#alter_replace-partition) -将数据分区从一个表复制到另一个表并替换。 +- [MOVE PARTITION TO TABLE](#alter_move_to_table-partition) (\#alter\_move\_to\_table-partition)-将数据分区从一个表移动到另一个表。 +- [CLEAR COLUMN IN PARTITION](#alter_clear-column-partition) -重置分区中指定列的值。 +- [CLEAR INDEX IN PARTITION](#alter_clear-index-partition) -重置分区中指定的二级索引。 +- [FREEZE PARTITION](#alter_freeze-partition) – Creates a backup of a partition. +- [FETCH PARTITION](#alter_fetch-partition) – Downloads a partition from another server. +- [MOVE PARTITION\|PART](#alter_move-partition) – Move partition/data part to another disk or volume. + + + +#### 分离分区{\#alter\_detach-partition} {#detach-partition-alter-detach-partition} + +``` sql +ALTER TABLE table_name DETACH PARTITION partition_expr +``` + +将指定分区的所有数据移动到 `detached` 目录。 服务器会忘记分离的数据分区,就好像它不存在一样。 服务器不会知道这个数据,直到你做 [ATTACH](#alter_attach-partition) 查询。 + +示例: + +``` sql +ALTER TABLE visits DETACH PARTITION 201901 +``` + +阅读有关在一节中设置分区表达式的信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). + +执行查询后,您可以对查询中的数据进行任何操作 `detached` directory — delete it from the file system, or just leave it. + +This query is replicated – it moves the data to the `detached` 所有副本上的目录。 请注意,您只能对领导副本执行此查询。 要确定副本是否为领导者,请执行 `SELECT` 查询到 [系统。副本](../../operations/system_tables.md#system_tables-replicas) 桌子 或者,它更容易使 `DETACH` 对所有副本进行查询-除了领导副本之外,所有副本都会引发异常。 + +#### DROP PARTITION {#alter_drop-partition} + +``` sql +ALTER TABLE table_name DROP PARTITION partition_expr +``` + +从表中删除指定的分区。 此查询将分区标记为非活动分区,并在大约10分钟内完全删除数据。 + +阅读有关在一节中设置分区表达式的信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). + +The query is replicated – it deletes data on all replicas. + +#### DROP DETACHED PARTITION\|PART {#alter_drop-detached} + +``` sql +ALTER TABLE table_name DROP DETACHED PARTITION|PART partition_expr +``` + +从中删除指定分区的指定部分或所有部分 `detached`. +了解有关在一节中设置分区表达式的详细信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). + +#### ATTACH PARTITION\|PART {#alter_attach-partition} + +``` sql +ALTER TABLE table_name ATTACH PARTITION|PART partition_expr +``` + +将数据从 `detached` 目录。 可以为整个分区或单独的部分添加数据。 例: + +``` sql +ALTER TABLE visits ATTACH PARTITION 201901; +ALTER TABLE visits ATTACH PART 201901_2_2_0; +``` + +了解有关在一节中设置分区表达式的详细信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). + +此查询被复制。 副本发起程序检查是否有数据在 `detached` 目录。 如果数据存在,则查询将检查其完整性。 如果一切正确,则查询将数据添加到表中。 所有其他副本都从副本发起程序下载数据。 + +所以你可以把数据到 `detached` 在一个副本上的目录,并使用 `ALTER ... ATTACH` 查询以将其添加到所有副本上的表中。 + +#### ATTACH PARTITION FROM {#alter_attach-partition-from} + +``` sql +ALTER TABLE table2 ATTACH PARTITION partition_expr FROM table1 +``` + +此查询将数据分区从 `table1` 到 `table2` 将数据添加到存在 `table2`. 请注意,数据不会从中删除 `table1`. + +要使查询成功运行,必须满足以下条件: + +- 两个表必须具有相同的结构。 +- 两个表必须具有相同的分区键。 + +#### REPLACE PARTITION {#alter_replace-partition} + +``` sql +ALTER TABLE table2 REPLACE PARTITION partition_expr FROM table1 +``` + +此查询将数据分区从 `table1` 到 `table2` 并替换在现有的分区 `table2`. 请注意,数据不会从中删除 `table1`. + +要使查询成功运行,必须满足以下条件: + +- 两个表必须具有相同的结构。 +- 两个表必须具有相同的分区键。 + +#### MOVE PARTITION TO TABLE {#alter_move_to_table-partition} + +``` sql +ALTER TABLE table_source MOVE PARTITION partition_expr TO TABLE table_dest +``` + +此查询将数据分区从 `table_source` 到 `table_dest` 删除数据 `table_source`. + +要使查询成功运行,必须满足以下条件: + +- 两个表必须具有相同的结构。 +- 两个表必须具有相同的分区键。 +- 两个表必须是相同的引擎系列。 (已复制或未复制) +- 两个表必须具有相同的存储策略。 + +#### CLEAR COLUMN IN PARTITION {#alter_clear-column-partition} + +``` sql +ALTER TABLE table_name CLEAR COLUMN column_name IN PARTITION partition_expr +``` + +重置分区中指定列中的所有值。 如果 `DEFAULT` 创建表时确定了子句,此查询将列值设置为指定的默认值。 + +示例: + +``` sql +ALTER TABLE visits CLEAR COLUMN hour in PARTITION 201902 +``` + +#### FREEZE PARTITION {#alter_freeze-partition} + +``` sql +ALTER TABLE table_name FREEZE [PARTITION partition_expr] +``` + +此查询创建指定分区的本地备份。 如果 `PARTITION` 子句被省略,查询一次创建所有分区的备份。 + +!!! note "注" + 在不停止服务器的情况下执行整个备份过程。 + +请注意,对于旧式表,您可以指定分区名称的前缀(例如, ‘2019’)-然后查询为所有相应的分区创建备份。 阅读有关在一节中设置分区表达式的信息 [如何指定分区表达式](#alter-how-to-specify-part-expr). + +在执行时,对于数据快照,查询将创建指向表数据的硬链接。 硬链接被放置在目录中 `/var/lib/clickhouse/shadow/N/...`,哪里: + +- `/var/lib/clickhouse/` 是配置中指定的工作ClickHouse目录。 +- `N` 是备份的增量编号。 + +!!! note "注" + 如果您使用 [用于在表中存储数据的一组磁盘](../../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-multiple-volumes),该 `shadow/N` 目录出现在每个磁盘上,存储由匹配的数据部分 `PARTITION` 表达。 + +在备份内部创建的目录结构与在备份内部创建的目录结构相同 `/var/lib/clickhouse/`. 查询执行 ‘chmod’ 对于所有文件,禁止写入它们。 + +创建备份后,您可以从以下位置复制数据 `/var/lib/clickhouse/shadow/` 然后将其从本地服务器中删除。 请注意, `ALTER t FREEZE PARTITION` 不复制查询。 它仅在本地服务器上创建本地备份。 + +查询几乎立即创建备份(但首先它会等待对相应表的当前查询完成运行)。 + +`ALTER TABLE t FREEZE PARTITION` 仅复制数据,而不复制表元数据。 若要备份表元数据,请复制该文件 `/var/lib/clickhouse/metadata/database/table.sql` + +要从备份还原数据,请执行以下操作: + +1. 如果表不存在,则创建该表。 要查看查询,请使用。sql文件(替换 `ATTACH` 在它与 `CREATE`). +2. 从复制数据 `data/database/table/` 目录内的备份到 `/var/lib/clickhouse/data/database/table/detached/` 目录。 +3. 快跑 `ALTER TABLE t ATTACH PARTITION` 将数据添加到表的查询。 + +从备份还原不需要停止服务器。 + +有关备份和还原数据的详细信息,请参阅 [数据备份](../../operations/backup.md) 科。 + +#### CLEAR INDEX IN PARTITION {#alter_clear-index-partition} + +``` sql +ALTER TABLE table_name CLEAR INDEX index_name IN PARTITION partition_expr +``` + +查询的工作原理类似于 `CLEAR COLUMN`,但它重置索引而不是列数据。 + +#### FETCH PARTITION {#alter_fetch-partition} + +``` sql +ALTER TABLE table_name FETCH PARTITION partition_expr FROM 'path-in-zookeeper' +``` + +从另一台服务器下载分区。 此查询仅适用于复制的表。 + +查询执行以下操作: + +1. 从指定的分片下载分区。 在 ‘path-in-zookeeper’ 您必须在ZooKeeper中指定分片的路径。 +2. 然后查询将下载的数据放到 `detached` 的目录 `table_name` 桌子 使用 [ATTACH PARTITION\|PART](#alter_attach-partition) 查询将数据添加到表中。 + +例如: + +``` sql +ALTER TABLE users FETCH PARTITION 201902 FROM '/clickhouse/tables/01-01/visits'; +ALTER TABLE users ATTACH PARTITION 201902; +``` + +请注意: + +- 该 `ALTER ... FETCH PARTITION` 查询不被复制。 它将分区放置在 `detached` 仅在本地服务器上的目录。 +- 该 `ALTER TABLE ... ATTACH` 复制查询。 它将数据添加到所有副本。 数据被添加到从副本之一 `detached` 目录,以及其他-从相邻的副本。 + +在下载之前,系统会检查分区是否存在并且表结构匹配。 从正常副本中自动选择最合适的副本。 + +虽然查询被调用 `ALTER TABLE`,它不会更改表结构,并且不会立即更改表中可用的数据。 + +#### MOVE PARTITION\|PART {#alter_move-partition} + +将分区或数据部分移动到另一个卷或磁盘 `MergeTree`-发动机表。 看 [使用多个块设备进行数据存储](../../engines/table_engines/mergetree_family/mergetree.md#table_engine-mergetree-multiple-volumes). + +``` sql +ALTER TABLE table_name MOVE PARTITION|PART partition_expr TO DISK|VOLUME 'disk_name' +``` + +该 `ALTER TABLE t MOVE` 查询: + +- 不复制,因为不同的副本可能具有不同的存储策略。 +- 如果未配置指定的磁盘或卷,则返回错误。 如果无法应用存储策略中指定的数据移动条件,Query还会返回错误。 +- 可以在返回错误的情况下,当要移动的数据已经被后台进程移动时,并发 `ALTER TABLE t MOVE` 查询或作为后台数据合并的结果。 在这种情况下,用户不应该执行任何其他操作。 + +示例: + +``` sql +ALTER TABLE hits MOVE PART '20190301_14343_16206_438' TO VOLUME 'slow' +ALTER TABLE hits MOVE PARTITION '2019-09-01' TO DISK 'fast_ssd' +``` + +#### 如何设置分区表达式 {#alter-how-to-specify-part-expr} + +您可以在以下内容中指定分区表达式 `ALTER ... PARTITION` 以不同方式查询: + +- 作为从值 `partition` 列 `system.parts` 桌子 例如, `ALTER TABLE visits DETACH PARTITION 201901`. +- 作为来自表列的表达式。 支持常量和常量表达式。 例如, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`. +- 使用分区ID。 分区ID是用作文件系统和ZooKeeper中分区名称的分区的字符串标识符(如果可能的话,人类可读)。 分区ID必须在指定 `PARTITION ID` 子句,用单引号。 例如, `ALTER TABLE visits DETACH PARTITION ID '201901'`. +- 在 [ALTER ATTACH PART](#alter_attach-partition) 和 [DROP DETACHED PART](#alter_drop-detached) 查询时,要指定部件的名称,请将字符串文字与来自 `name` 列 [系统。detached\_parts](../../operations/system_tables.md#system_tables-detached_parts) 桌子 例如, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. + +指定分区时引号的使用取决于分区表达式的类型。 例如,对于 `String` 类型,你必须在引号中指定其名称 (`'`). 为 `Date` 和 `Int*` 类型不需要引号。 + +对于旧式表,您可以将分区指定为数字 `201901` 或者一个字符串 `'201901'`. 对于类型,新样式表的语法更严格(类似于值输入格式的解析器)。 + +上述所有规则也适用于 [OPTIMIZE](misc.md#misc_operations-optimize) 查询。 如果在优化非分区表时需要指定唯一的分区,请设置表达式 `PARTITION tuple()`. 例如: + +``` sql +OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL; +``` + +的例子 `ALTER ... PARTITION` 查询在测试中演示 [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) 和 [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql). + +### 使用表TTL进行操作 {#manipulations-with-table-ttl} + +你可以改变 [表TTL](../../engines/table_engines/mergetree_family/mergetree.md#mergetree-table-ttl) 请填写以下表格: + +``` sql +ALTER TABLE table-name MODIFY TTL ttl-expression +``` + +### ALTER查询的同步性 {#synchronicity-of-alter-queries} + +对于不可复制的表,所有 `ALTER` 查询是同步执行的。 对于可复制的表,查询仅添加相应操作的说明 `ZooKeeper`,并尽快执行操作本身。 但是,查询可以等待在所有副本上完成这些操作。 + +为 `ALTER ... ATTACH|DETACH|DROP` 查询,您可以使用 `replication_alter_partitions_sync` 设置设置等待。 +可能的值: `0` – do not wait; `1` – only wait for own execution (default); `2` – wait for all. + +### 突变 {#alter-mutations} + +突变是允许更改或删除表中的行的ALTER查询变体。 与标准相比 `UPDATE` 和 `DELETE` 用于点数据更改的查询,mutations适用于更改表中大量行的繁重操作。 支持的 `MergeTree` 表引擎系列,包括具有复制支持的引擎。 + +现有表可以按原样进行突变(无需转换),但是在将第一次突变应用于表之后,其元数据格式将与以前的服务器版本不兼容,并且无法回退到以前的版本。 + +当前可用的命令: + +``` sql +ALTER TABLE [db.]table DELETE WHERE filter_expr +``` + +该 `filter_expr` 必须是类型 `UInt8`. 查询删除表中此表达式采用非零值的行。 + +``` sql +ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr +``` + +该 `filter_expr` 必须是类型 `UInt8`. 此查询将指定列的值更新为行中相应表达式的值。 `filter_expr` 取非零值。 使用以下命令将值转换为列类型 `CAST` 接线员 不支持更新用于计算主键或分区键的列。 + +``` sql +ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name +``` + +查询将重新生成二级索引 `name` 在分区中 `partition_name`. + +一个查询可以包含多个用逗号分隔的命令。 + +For\*MergeTree表的突变通过重写整个数据部分来执行。 没有原子性-部分被取代为突变的部分,只要他们准备好和 `SELECT` 在突变期间开始执行的查询将看到来自已经突变的部件的数据以及来自尚未突变的部件的数据。 + +突变完全按其创建顺序排序,并以该顺序应用于每个部分。 突变也使用插入进行部分排序-在提交突变之前插入到表中的数据将被突变,之后插入的数据将不会被突变。 请注意,突变不会以任何方式阻止插入。 + +Mutation查询在添加mutation条目后立即返回(如果将复制的表复制到ZooKeeper,则将非复制的表复制到文件系统)。 突变本身使用系统配置文件设置异步执行。 要跟踪突变的进度,您可以使用 [`system.mutations`](../../operations/system_tables.md#system_tables-mutations) 桌子 即使重新启动ClickHouse服务器,成功提交的突变仍将继续执行。 一旦提交,没有办法回滚突变,但如果突变由于某种原因被卡住,可以使用 [`KILL MUTATION`](misc.md#kill-mutation) 查询。 + +已完成突变的条目不会立即删除(保留条目的数量由 `finished_mutations_to_keep` 存储引擎参数)。 旧的突变条目将被删除。 + +[原始文章](https://clickhouse.tech/docs/en/query_language/alter/) diff --git a/docs/zh/query_language/create.md b/docs/zh/sql_reference/statements/create.md similarity index 68% rename from docs/zh/query_language/create.md rename to docs/zh/sql_reference/statements/create.md index 94c4ea3669c..1697df692b5 100644 --- a/docs/zh/query_language/create.md +++ b/docs/zh/sql_reference/statements/create.md @@ -1,3 +1,4 @@ + ## CREATE DATABASE {#create-database} 该查询用于根据指定名称创建数据库。 @@ -22,7 +23,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ) ENGINE = engine ``` -在指定的‘db’数据库中创建一个名为‘name’的表,如果查询中没有包含‘db’,则默认使用当前选择的数据库作为‘db’。后面的是包含在括号中的表结构以及表引擎的声明。 +在指定的'db'数据库中创建一个名为'name'的表,如果查询中没有包含'db',则默认使用当前选择的数据库作为'db'。后面的是包含在括号中的表结构以及表引擎的声明。 其中表结构声明是一个包含一组列描述声明的组合。如果表引擎是支持索引的,那么可以在表引擎的参数中对其进行说明。 在最简单的情况下,列描述是指`名称 类型`这样的子句。例如: `RegionID UInt32`。 @@ -42,16 +43,16 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... 以上所有情况,如果指定了`IF NOT EXISTS`,那么在该表已经存在的情况下,查询不会返回任何错误。在这种情况下,查询几乎不会做任何事情。 -在`ENGINE`子句后还可能存在一些其他的子句,更详细的信息可以参考 [表引擎](../operations/table_engines/index.md) 中关于建表的描述。 +在`ENGINE`子句后还可能存在一些其他的子句,更详细的信息可以参考 [表引擎](../../sql_reference/statements/create.md) 中关于建表的描述。 ### 默认值 {#create-default-values} 在列描述中你可以通过以下方式之一为列指定默认表达式:`DEFAULT expr`,`MATERIALIZED expr`,`ALIAS expr`。 示例:`URLDomain String DEFAULT domain(URL)`。 -如果在列描述中未定义任何默认表达式,那么系统将会根据类型设置对应的默认值,如:数值类型为零、字符串类型为空字符串、数组类型为空数组、日期类型为‘0000-00-00’以及时间类型为‘0000-00-00 00:00:00’。不支持使用NULL作为普通类型的默认值。 +如果在列描述中未定义任何默认表达式,那么系统将会根据类型设置对应的默认值,如:数值类型为零、字符串类型为空字符串、数组类型为空数组、日期类型为'0000-00-00'以及时间类型为'0000-00-00 00:00:00'。不支持使用NULL作为普通类型的默认值。 -如果定义了默认表达式,则可以不定义列的类型。如果没有明确的定义类的类型,则使用默认表达式的类型。例如:`EventDate DEFAULT toDate(EventTime)` - 最终‘EventDate’将使用‘Date’作为类型。 +如果定义了默认表达式,则可以不定义列的类型。如果没有明确的定义类的类型,则使用默认表达式的类型。例如:`EventDate DEFAULT toDate(EventTime)` - 最终'EventDate'将使用'Date'作为类型。 如果同时指定了默认表达式与列的类型,则将使用类型转换函数将默认表达式转换为指定的类型。例如:`Hits UInt32 DEFAULT 0`与`Hits UInt32 DEFAULT toUInt32(0)`意思相同。 @@ -65,7 +66,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... 物化表达式,被该表达式指定的列不能包含在INSERT的列表中,因为它总是被计算出来的。 对于INSERT而言,不需要考虑这些列。 -另外,在SELECT查询中如果包含星号,此列不会被用来替换星号,这是因为考虑到数据转储,在使用`SELECT *`查询出的结果总能够被’INSERT’回表。 +另外,在SELECT查询中如果包含星号,此列不会被用来替换星号,这是因为考虑到数据转储,在使用`SELECT *`查询出的结果总能够被'INSERT'回表。 `ALIAS expr` @@ -79,9 +80,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ... 不能够为nested类型的列设置默认值。 -### Constraints {#constraints} +### 制约因素 {#constraints} -Along with columns descriptions constraints could be defined: +随着列描述约束可以定义: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -93,17 +94,17 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ) ENGINE = engine ``` -`boolean_expr_1` could by any boolean expression. If constraints are defined for the table, each of them will be checked for every row in `INSERT` query. If any constraint is not satisfied — server will raise an exception with constraint name and checking expression. +`boolean_expr_1` 可以通过任何布尔表达式。 如果为表定义了约束,则将为表中的每一行检查它们中的每一行 `INSERT` query. If any constraint is not satisfied — server will raise an exception with constraint name and checking expression. -Adding large amount of constraints can negatively affect performance of big `INSERT` queries. +添加大量的约束会对big的性能产生负面影响 `INSERT` 查询。 -### TTL Expression {#ttl-expression} +### Ttl表达式 {#ttl-expression} -Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../operations/table_engines/mergetree.md#table_engine-mergetree-ttl). +定义值的存储时间。 只能为MergeTree系列表指定。 有关详细说明,请参阅 [列和表的TTL](../../sql_reference/statements/create.md#table_engine-mergetree-ttl). -### Column Compression Codecs {#codecs} +### 列压缩编解ecs {#codecs} -By default, ClickHouse applies the compression method, defined in [server settings](../operations/server_settings/settings.md#server-settings-compression), to columns. You can also define the compression method for each individual column in the `CREATE TABLE` query. +默认情况下,ClickHouse应用以下定义的压缩方法 [服务器设置](../../sql_reference/statements/create.md#server-settings-compression),列。 您还可以定义在每个单独的列的压缩方法 `CREATE TABLE` 查询。 ``` sql CREATE TABLE codec_example @@ -118,32 +119,32 @@ ENGINE = ... ``` -If a codec is specified, the default codec doesn’t apply. Codecs can be combined in a pipeline, for example, `CODEC(Delta, ZSTD)`. To select the best codec combination for you project, pass benchmarks similar to described in the Altinity [New Encodings to Improve ClickHouse Efficiency](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) article. +如果指定了编解ec,则默认编解码器不适用。 编解码器可以组合在一个流水线中,例如, `CODEC(Delta, ZSTD)`. 要为您的项目选择最佳的编解码器组合,请通过类似于Altinity中描述的基准测试 [新编码提高ClickHouse效率](https://www.altinity.com/blog/2019/7/new-encodings-to-improve-clickhouse) 文章. -!!! warning "Warning" - You can’t decompress ClickHouse database files with external utilities like `lz4`. Instead, use the special [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) utility. +!!! warning "警告" + 您无法使用外部实用程序解压缩ClickHouse数据库文件,如 `lz4`. 相反,使用特殊的 [ツ环板compressorョツ嘉ッツ偲](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor) 实用程序。 -Compression is supported for the following table engines: +下表引擎支持压缩: -- [MergeTree](../operations/table_engines/mergetree.md) family -- [Log](../operations/table_engines/log_family.md) family -- [Set](../operations/table_engines/set.md) -- [Join](../operations/table_engines/join.md) +- [MergeTree](../../sql_reference/statements/create.md) 家庭 +- [日志](../../sql_reference/statements/create.md) 家庭 +- [设置](../../sql_reference/statements/create.md) +- [加入我们](../../sql_reference/statements/create.md) -ClickHouse supports common purpose codecs and specialized codecs. +ClickHouse支持通用编解码器和专用编解ecs。 -#### Specialized Codecs {#create-query-specialized-codecs} +#### 专业编解ecs {#create-query-specialized-codecs} -These codecs are designed to make compression more effective by using specific features of data. Some of these codecs don’t compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation. +这些编解码器旨在通过使用数据的特定功能使压缩更有效。 其中一些编解码器不压缩数据本身。 相反,他们准备的数据用于共同目的的编解ec,其压缩它比没有这种准备更好。 -Specialized codecs: +专业编解ecs: -- `Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` are used for storing delta values, so `delta_bytes` is the maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. The default value for `delta_bytes` is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. -- `DoubleDelta` — Calculates delta of deltas and writes it in compact binary form. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-byte deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). -- `Gorilla` — Calculates XOR between current and previous value and writes it in compact binary form. Efficient when storing a series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see Compressing Values in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). -- `T64` — Compression approach that crops unused high bits of values in integer data types (including `Enum`, `Date` and `DateTime`). At each step of its algorithm, codec takes a block of 64 values, puts them into 64x64 bit matrix, transposes it, crops the unused bits of values and returns the rest as a sequence. Unused bits are the bits, that don’t differ between maximum and minimum values in the whole data part for which the compression is used. +- `Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` 用于存储增量值,所以 `delta_bytes` 是原始值的最大大小。 可能 `delta_bytes` 值:1,2,4,8. 默认值 `delta_bytes` 是 `sizeof(type)` 如果等于1,2,4或8。 在所有其他情况下,它是1。 +- `DoubleDelta` — Calculates delta of deltas and writes it in compact binary form. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-byte deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla:一个快速、可扩展的内存时间序列数据库](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +- `Gorilla` — Calculates XOR between current and previous value and writes it in compact binary form. Efficient when storing a series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see Compressing Values in [Gorilla:一个快速、可扩展的内存时间序列数据库](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +- `T64` — Compression approach that crops unused high bits of values in integer data types (including `Enum`, `Date` 和 `DateTime`). 在算法的每个步骤中,编解码器采用64个值块,将它们放入64x64位矩阵中,对其进行转置,裁剪未使用的值位并将其余部分作为序列返回。 未使用的位是使用压缩的整个数据部分的最大值和最小值之间没有区别的位。 -`DoubleDelta` and `Gorilla` codecs are used in Gorilla TSDB as the components of its compressing algorithm. Gorilla approach is effective in scenarios when there is a sequence of slowly changing values with their timestamps. Timestamps are effectively compressed by the `DoubleDelta` codec, and values are effectively compressed by the `Gorilla` codec. For example, to get an effectively stored table, you can create it in the following configuration: +`DoubleDelta` 和 `Gorilla` 编解码器在Gorilla TSDB中用作其压缩算法的组件。 大猩猩的方法是有效的情况下,当有缓慢变化的值与他们的时间戳序列。 时间戳是由有效地压缩 `DoubleDelta` 编解ec,和值有效地由压缩 `Gorilla` 编解ec 例如,要获取有效存储的表,可以在以下配置中创建它: ``` sql CREATE TABLE codec_example @@ -154,16 +155,16 @@ CREATE TABLE codec_example ENGINE = MergeTree() ``` -#### Common purpose codecs {#create-query-common-purpose-codecs} +#### 通用编解ecs {#create-query-common-purpose-codecs} -Codecs: +编解ecs: - `NONE` — No compression. -- `LZ4` — Lossless [data compression algorithm](https://github.com/lz4/lz4) used by default. Applies LZ4 fast compression. -- `LZ4HC[(level)]` — LZ4 HC (high compression) algorithm with configurable level. Default level: 9. Setting `level <= 0` applies the default level. Possible levels: \[1, 12\]. Recommended level range: \[4, 9\]. -- `ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: \[1, 22\]. Default value: 1. +- `LZ4` — Lossless [数据压缩算法](https://github.com/lz4/lz4) 默认情况下使用。 应用LZ4快速压缩。 +- `LZ4HC[(level)]` — LZ4 HC (high compression) algorithm with configurable level. Default level: 9. Setting `level <= 0` 应用默认级别。 可能的水平:\[1,12\]。 推荐级别范围:\[4,9\]。 +- `ZSTD[(level)]` — [ZSTD压缩算法](https://en.wikipedia.org/wiki/Zstandard) 可配置 `level`. 可能的水平:\[1,22\]。 默认值:1。 -High compression levels are useful for asymmetric scenarios, like compress once, decompress repeatedly. Higher levels mean better compression and higher CPU usage. +高压缩级别对于非对称场景非常有用,例如压缩一次,重复解压缩。 更高的级别意味着更好的压缩和更高的CPU使用率。 ## 临时表 {#lin-shi-biao} diff --git a/docs/zh/sql_reference/statements/index.md b/docs/zh/sql_reference/statements/index.md new file mode 100644 index 00000000000..bb04551dea1 --- /dev/null +++ b/docs/zh/sql_reference/statements/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u53D1\u8A00" +toc_priority: 31 +--- + + diff --git a/docs/zh/query_language/insert_into.md b/docs/zh/sql_reference/statements/insert_into.md similarity index 89% rename from docs/zh/query_language/insert_into.md rename to docs/zh/sql_reference/statements/insert_into.md index b271f62bb03..a59730f5750 100644 --- a/docs/zh/query_language/insert_into.md +++ b/docs/zh/sql_reference/statements/insert_into.md @@ -1,3 +1,4 @@ + ## INSERT {#insert} INSERT查询主要用于向系统中添加数据. @@ -13,9 +14,9 @@ INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), . - 如果存在`DEFAULT`表达式,根据`DEFAULT`表达式计算被填充的值。 - 如果没有定义`DEFAULT`表达式,则填充零或空字符串。 -如果 [strict\_insert\_defaults=1](../operations/settings/settings.md),你必须在查询中列出所有没有定义`DEFAULT`表达式的列。 +如果 [strict\_insert\_defaults=1](../../operations/settings/settings.md),你必须在查询中列出所有没有定义`DEFAULT`表达式的列。 -数据可以以ClickHouse支持的任何 [输入输出格式](../interfaces/formats.md#formats) 传递给INSERT。格式的名称必须显示的指定在查询中: +数据可以以ClickHouse支持的任何 [输入输出格式](../../interfaces/formats.md#formats) 传递给INSERT。格式的名称必须显示的指定在查询中: ``` sql INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set @@ -37,7 +38,7 @@ INSERT INTO t FORMAT TabSeparated 22 Qwerty ``` -在使用命令行客户端或HTTP客户端时,你可以将具体的查询语句与数据分开发送。更多具体信息,请参考«[客户端](../interfaces/index.md#interfaces)»部分。 +在使用命令行客户端或HTTP客户端时,你可以将具体的查询语句与数据分开发送。更多具体信息,请参考«[客户端](../../interfaces/index.md#interfaces)»部分。 ### 使用`SELECT`的结果写入 {#insert_query_insert-select} diff --git a/docs/zh/sql_reference/statements/misc.md b/docs/zh/sql_reference/statements/misc.md new file mode 100644 index 00000000000..e50f08464b7 --- /dev/null +++ b/docs/zh/sql_reference/statements/misc.md @@ -0,0 +1,252 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 39 +toc_title: "\u5176\u4ED6" +--- + +# 杂项查询 {#miscellaneous-queries} + +## ATTACH {#attach} + +这个查询是完全一样的 `CREATE`,但是 + +- 而不是这个词 `CREATE` 它使用这个词 `ATTACH`. +- 查询不会在磁盘上创建数据,但假定数据已经在适当的位置,只是将有关表的信息添加到服务器。 + 执行附加查询后,服务器将知道表的存在。 + +如果表之前已分离 (`DETACH`),意味着其结构是已知的,可以使用速记而不限定该结构。 + +``` sql +ATTACH TABLE [IF NOT EXISTS] [db.]name [ON CLUSTER cluster] +``` + +启动服务器时使用此查询。 服务器将表元数据作为文件存储 `ATTACH` 查询,它只是在启动时运行(除了在服务器上显式创建的系统表)。 + +## CHECK TABLE {#check-table} + +检查表中的数据是否已损坏。 + +``` sql +CHECK TABLE [db.]name +``` + +该 `CHECK TABLE` 查询将实际文件大小与存储在服务器上的预期值进行比较。 如果文件大小与存储的值不匹配,则表示数据已损坏。 例如,这可能是由查询执行期间的系统崩溃引起的。 + +查询响应包含 `result` 具有单行的列。 该行的值为 +[布尔值](../../sql_reference/data_types/boolean.md) 类型: + +- 0-表中的数据已损坏。 +- 1-数据保持完整性。 + +该 `CHECK TABLE` 查询支持下表引擎: + +- [日志](../../engines/table_engines/log_family/log.md) +- [TinyLog](../../engines/table_engines/log_family/tinylog.md) +- [StripeLog](../../engines/table_engines/log_family/stripelog.md) +- [梅树家族](../../engines/table_engines/mergetree_family/mergetree.md) + +使用另一个表引擎对表执行会导致异常。 + +从发动机 `*Log` 家庭不提供故障自动数据恢复。 使用 `CHECK TABLE` 查询以及时跟踪数据丢失。 + +为 `MergeTree` 家庭发动机, `CHECK TABLE` 查询显示本地服务器上表的每个单独数据部分的检查状态。 + +**如果数据已损坏** + +如果表已损坏,则可以将未损坏的数据复制到另一个表。 要做到这一点: + +1. 创建具有与损坏的表相同结构的新表。 要执行此操作,请执行查询 `CREATE TABLE AS `. +2. 设置 [max\_threads](../../operations/settings/settings.md#settings-max_threads) 值为1以在单个线程中处理下一个查询。 要执行此操作,请运行查询 `SET max_threads = 1`. +3. 执行查询 `INSERT INTO SELECT * FROM `. 此请求将未损坏的数据从损坏的表复制到另一个表。 只有损坏部分之前的数据才会被复制。 +4. 重新启动 `clickhouse-client` 要重置 `max_threads` 价值。 + +## DESCRIBE TABLE {#misc-describe-table} + +``` sql +DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] +``` + +返回以下内容 `String` 类型列: + +- `name` — Column name. +- `type`— Column type. +- `default_type` — Clause that is used in [默认表达式](create.md#create-default-values) (`DEFAULT`, `MATERIALIZED` 或 `ALIAS`). 如果未指定默认表达式,则Column包含一个空字符串。 +- `default_expression` — Value specified in the `DEFAULT` 条款 +- `comment_expression` — Comment text. + +嵌套的数据结构输出 “expanded” 格式。 每列分别显示,名称后面有一个点。 + +## DETACH {#detach} + +删除有关 ‘name’ 表从服务器。 服务器停止了解表的存在。 + +``` sql +DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +这不会删除表的数据或元数据。 在下一次服务器启动时,服务器将读取元数据并再次查找有关表的信息。 +同样,一个 “detached” 表可以使用重新连接 `ATTACH` 查询(系统表除外,它们没有为它们存储元数据)。 + +没有 `DETACH DATABASE` 查询。 + +## DROP {#drop} + +此查询有两种类型: `DROP DATABASE` 和 `DROP TABLE`. + +``` sql +DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] +``` + +删除内部的所有表 ‘db’ 数据库,然后删除 ‘db’ 数据库本身。 +如果 `IF EXISTS` 如果数据库不存在,则不会返回错误。 + +``` sql +DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +删除表。 +如果 `IF EXISTS` 如果表不存在或数据库不存在,则不会返回错误。 + + DROP DICTIONARY [IF EXISTS] [db.]name + +删除字典。 +如果 `IF EXISTS` 如果表不存在或数据库不存在,则不会返回错误。 + +## EXISTS {#exists} + +``` sql +EXISTS [TEMPORARY] [TABLE|DICTIONARY] [db.]name [INTO OUTFILE filename] [FORMAT format] +``` + +返回单 `UInt8`-type column,其中包含单个值 `0` 如果表或数据库不存在,或 `1` 如果该表存在于指定的数据库中。 + +## KILL QUERY {#kill-query} + +``` sql +KILL QUERY [ON CLUSTER cluster] + WHERE + [SYNC|ASYNC|TEST] + [FORMAT format] +``` + +尝试强制终止当前正在运行的查询。 +要终止的查询是从系统中选择的。使用在定义的标准进程表 `WHERE` 《公约》条款 `KILL` 查询。 + +例: + +``` sql +-- Forcibly terminates all queries with the specified query_id: +KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' + +-- Synchronously terminates all queries run by 'username': +KILL QUERY WHERE user='username' SYNC +``` + +只读用户只能停止自己的查询。 + +默认情况下,使用异步版本的查询 (`ASYNC`),不等待确认查询已停止。 + +同步版本 (`SYNC`)等待所有查询停止,并在停止时显示有关每个进程的信息。 +响应包含 `kill_status` 列,它可以采用以下值: + +1. ‘finished’ – The query was terminated successfully. +2. ‘waiting’ – Waiting for the query to end after sending it a signal to terminate. +3. The other values ​​explain why the query can't be stopped. + +测试查询 (`TEST`)仅检查用户的权限并显示要停止的查询列表。 + +## KILL MUTATION {#kill-mutation} + +``` sql +KILL MUTATION [ON CLUSTER cluster] + WHERE + [TEST] + [FORMAT format] +``` + +尝试取消和删除 [突变](alter.md#alter-mutations) 当前正在执行。 要取消的突变选自 [`system.mutations`](../../operations/system_tables.md#system_tables-mutations) 表使用由指定的过滤器 `WHERE` 《公约》条款 `KILL` 查询。 + +测试查询 (`TEST`)仅检查用户的权限并显示要停止的查询列表。 + +例: + +``` sql +-- Cancel and remove all mutations of the single table: +KILL MUTATION WHERE database = 'default' AND table = 'table' + +-- Cancel the specific mutation: +KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = 'mutation_3.txt' +``` + +The query is useful when a mutation is stuck and cannot finish (e.g. if some function in the mutation query throws an exception when applied to the data contained in the table). + +已经由突变所做的更改不会回滚。 + +## OPTIMIZE {#misc_operations-optimize} + +``` sql +OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE] +``` + +此查询尝试使用来自表引擎的表初始化表的数据部分的非计划合并 [MergeTree](../../engines/table_engines/mergetree_family/mergetree.md) 家人 + +该 `OPTMIZE` 查询也支持 [MaterializedView](../../engines/table_engines/special/materializedview.md) 和 [缓冲区](../../engines/table_engines/special/buffer.md) 引擎 不支持其他表引擎。 + +当 `OPTIMIZE` 与使用 [ReplicatedMergeTree](../../engines/table_engines/mergetree_family/replication.md) 表引擎的家族,ClickHouse创建合并任务,并等待在所有节点上执行(如果 `replication_alter_partitions_sync` 设置已启用)。 + +- 如果 `OPTIMIZE` 出于任何原因不执行合并,它不通知客户端。 要启用通知,请使用 [optimize\_throw\_if\_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) 设置。 +- 如果您指定 `PARTITION`,仅优化指定的分区。 [如何设置分区表达式](alter.md#alter-how-to-specify-part-expr). +- 如果您指定 `FINAL`,即使所有数据已经在一个部分中,也会执行优化。 +- 如果您指定 `DEDUPLICATE`,然后完全相同的行将被重复数据删除(所有列进行比较),这仅适用于MergeTree引擎。 + +!!! warning "警告" + `OPTIMIZE` 无法修复 “Too many parts” 错误 + +## RENAME {#misc_operations-rename} + +重命名一个或多个表。 + +``` sql +RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster] +``` + +所有表都在全局锁定下重命名。 重命名表是一个轻型操作。 如果您在TO之后指定了另一个数据库,则表将被移动到此数据库。 但是,包含数据库的目录必须位于同一文件系统中(否则,将返回错误)。 + +## SET {#query-set} + +``` sql +SET param = value +``` + +分配 `value` 到 `param` [设置](../../operations/settings/index.md) 对于当前会话。 你不能改变 [服务器设置](../../operations/server_configuration_parameters/index.md) 这边 + +您还可以在单个查询中设置指定设置配置文件中的所有值。 + +``` sql +SET profile = 'profile-name-from-the-settings-file' +``` + +有关详细信息,请参阅 [设置](../../operations/settings/settings.md). + +## TRUNCATE {#truncate} + +``` sql +TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +从表中删除所有数据。 当条款 `IF EXISTS` 如果该表不存在,则查询返回错误。 + +该 `TRUNCATE` 查询不支持 [查看](../../engines/table_engines/special/view.md), [文件](../../engines/table_engines/special/file.md), [URL](../../engines/table_engines/special/url.md) 和 [Null](../../engines/table_engines/special/null.md) 表引擎. + +## USE {#use} + +``` sql +USE db +``` + +用于设置会话的当前数据库。 +当前数据库用于搜索表,如果数据库没有在查询中明确定义与表名之前的点。 +使用HTTP协议时无法进行此查询,因为没有会话的概念。 + +[原始文章](https://clickhouse.tech/docs/en/query_language/misc/) diff --git a/docs/zh/query_language/select.md b/docs/zh/sql_reference/statements/select.md similarity index 91% rename from docs/zh/query_language/select.md rename to docs/zh/sql_reference/statements/select.md index 8400e963f3c..9f4e71c3343 100644 --- a/docs/zh/query_language/select.md +++ b/docs/zh/sql_reference/statements/select.md @@ -1,3 +1,4 @@ + # SELECT 查询语法 {#select-cha-xun-yu-fa} `SELECT` 语句用于执行数据的检索。 @@ -35,7 +36,7 @@ FROM子句规定了将从哪个表、或子查询、或表函数中读取数据 可以使用包含在括号里的子查询来替代表。 在这种情况下,子查询的处理将会构建在外部的查询内部。 -不同于SQL标准,子查询后无需指定别名。为了兼容,你可以在子查询后添加‘AS 别名’,但是指定的名字不能被使用在任何地方。 +不同于SQL标准,子查询后无需指定别名。为了兼容,你可以在子查询后添加'AS 别名',但是指定的名字不能被使用在任何地方。 也可以使用表函数来代替表,有关信息,参见«表函数»。 @@ -50,10 +51,10 @@ FROM子句规定了将从哪个表、或子查询、或表函数中读取数据 `SAMPLE`子句可以使用`SAMPLE k`来表示,其中k可以是0到1的小数值,或者是一个足够大的正整数值。 -当k为0到1的小数时,查询将使用’k’作为百分比选取数据。例如,`SAMPLE 0.1`查询只会检索数据总量的10%。 -当k为一个足够大的正整数时,查询将使用’k’作为最大样本数。例如, `SAMPLE 10000000`查询只会检索最多10,000,000行数据。 +当k为0到1的小数时,查询将使用'k'作为百分比选取数据。例如,`SAMPLE 0.1`查询只会检索数据总量的10%。 +当k为一个足够大的正整数时,查询将使用'k'作为最大样本数。例如, `SAMPLE 10000000`查询只会检索最多10,000,000行数据。 -Example: +示例: ``` sql SELECT @@ -346,15 +347,15 @@ FROM 在使用`ALL`修饰符对JOIN进行修饰时,如果右表中存在多个与左表关联的数据,那么系统则将右表中所有可以与左表关联的数据全部返回在结果中。这与SQL标准的JOIN行为相同。 在使用`ANY`修饰符对JOIN进行修饰时,如果右表中存在多个与左表关联的数据,那么系统仅返回第一个与左表匹配的结果。如果左表与右表一一对应,不存在多余的行时,`ANY`与`ALL`的结果相同。 -你可以在会话中通过设置 [join\_default\_strictness](../operations/settings/settings.md) 来指定默认的JOIN修饰符。 +你可以在会话中通过设置 [join\_default\_strictness](../../operations/settings/settings.md) 来指定默认的JOIN修饰符。 -**`GLOBAL` distribution** +**`GLOBAL` 分布** 当使用普通的`JOIN`时,查询将被发送给远程的服务器。并在这些远程服务器上生成右表并与它们关联。换句话说,右表来自于各个服务器本身。 当使用`GLOBAL ... JOIN`,首先会在请求服务器上计算右表并以临时表的方式将其发送到所有服务器。这时每台服务器将直接使用它进行计算。 -使用`GLOBAL`时需要小心。更多信息,参阅 [Distributed subqueries](#select-distributed-subqueries) 部分。 +使用`GLOBAL`时需要小心。更多信息,参阅 [分布式子查询](#select-distributed-subqueries) 部分。 **使用建议** @@ -402,7 +403,7 @@ LIMIT 10 └───────────┴────────┴────────┘ 子查询不允许您设置别名或在其他地方引用它们。 -`USING`中指定的列必须在两个子查询中具有相同的名称,而其他列必须具有不同的名称。您可以通过使用别名的方式来更改子查询中的列名(示例中就分别使用了’hits’与’visits’别名)。 +`USING`中指定的列必须在两个子查询中具有相同的名称,而其他列必须具有不同的名称。您可以通过使用别名的方式来更改子查询中的列名(示例中就分别使用了'hits'与'visits'别名)。 `USING`子句用于指定要进行链接的一个或多个列,系统会将这些列在两张表中相等的值连接起来。如果列是一个列表,不需要使用括号包裹。同时JOIN不支持其他更复杂的Join方式。 @@ -410,17 +411,17 @@ LIMIT 10 只能在查询中指定一个`JOIN`。若要运行多个`JOIN`,你可以将它们放入子查询中。 -每次运行相同的`JOIN`查询,总是会再次计算 - 没有缓存结果。 为了避免这种情况,可以使用‘Join’引擎,它是一个预处理的Join数据结构,总是保存在内存中。更多信息,参见«Join引擎»部分。 +每次运行相同的`JOIN`查询,总是会再次计算 - 没有缓存结果。 为了避免这种情况,可以使用'Join'引擎,它是一个预处理的Join数据结构,总是保存在内存中。更多信息,参见«Join引擎»部分。 在一些场景下,使用`IN`代替`JOIN`将会得到更高的效率。在各种类型的JOIN中,最高效的是`ANY LEFT JOIN`,然后是`ANY INNER JOIN`,效率最差的是`ALL LEFT JOIN`以及`ALL INNER JOIN`。 -如果你需要使用`JOIN`来关联一些纬度表(包含纬度属性的一些相对比较小的表,例如广告活动的名称),那么`JOIN`可能不是好的选择,因为语法负责,并且每次查询都将重新访问这些表。对于这种情况,您应该使用«外部字典»的功能来替换`JOIN`。更多信息,参见 [外部字典](dicts/external_dicts.md) 部分。 +如果你需要使用`JOIN`来关联一些纬度表(包含纬度属性的一些相对比较小的表,例如广告活动的名称),那么`JOIN`可能不是好的选择,因为语法负责,并且每次查询都将重新访问这些表。对于这种情况,您应该使用«外部字典»的功能来替换`JOIN`。更多信息,参见 [外部字典](../../sql_reference/statements/select.md) 部分。 #### Null的处理 {#nullde-chu-li} -JOIN的行为受 [join\_use\_nulls](../operations/settings/settings.md) 的影响。当`join_use_nulls=1`时,`JOIN`的工作与SQL标准相同。 +JOIN的行为受 [join\_use\_nulls](../../operations/settings/settings.md) 的影响。当`join_use_nulls=1`时,`JOIN`的工作与SQL标准相同。 -如果JOIN的key是 [Nullable](../data_types/nullable.md) 类型的字段,则其中至少一个存在 [NULL](syntax.md) 值的key不会被关联。 +如果JOIN的key是 [可为空](../../sql_reference/statements/select.md) 类型的字段,则其中至少一个存在 [NULL](../syntax.md) 值的key不会被关联。 ### WHERE 子句 {#select-where} @@ -444,7 +445,7 @@ PREWHERE 仅支持`*MergeTree`系列引擎。 值得注意的是,PREWHERE不适合用于已经存在于索引中的列,因为当列已经存在于索引中的情况下,只有满足索引的数据块才会被读取。 -如果将’optimize\_move\_to\_prewhere’设置为1,并且在查询中不包含PREWHERE,则系统将自动的把适合PREWHERE表达式的部分从WHERE中抽离到PREWHERE中。 +如果将'optimize\_move\_to\_prewhere'设置为1,并且在查询中不包含PREWHERE,则系统将自动的把适合PREWHERE表达式的部分从WHERE中抽离到PREWHERE中。 ### GROUP BY 子句 {#select-group-by-clause} @@ -455,7 +456,7 @@ SELECT,HAVING,ORDER BY子句中的表达式列表必须来自于这些«key 如果查询表达式列表中仅包含聚合函数,则可以省略GROUP BY子句,这时会假定将所有数据聚合成一组空«key»。 -Example: +示例: ``` sql SELECT @@ -467,9 +468,9 @@ FROM hits 与SQL标准不同的是,如果表中不存在任何数据(可能表本身中就不存在任何数据,或者由于被WHERE条件过滤掉了),将返回一个空结果,而不是一个包含聚合函数初始值的结果。 -与MySQL不同的是(实际上这是符合SQL标准的),你不能够获得一个不在key中的非聚合函数列(除了常量表达式)。但是你可以使用‘any’(返回遇到的第一个值)、max、min等聚合函数使它工作。 +与MySQL不同的是(实际上这是符合SQL标准的),你不能够获得一个不在key中的非聚合函数列(除了常量表达式)。但是你可以使用'any'(返回遇到的第一个值)、max、min等聚合函数使它工作。 -Example: +示例: ``` sql SELECT @@ -488,7 +489,7 @@ GROUP BY子句会为遇到的每一个不同的key计算一组聚合函数的值 #### NULL 处理 {#null-chu-li} -对于GROUP BY子句,ClickHouse将 [NULL](syntax.md) 解释为一个值,并且支持`NULL=NULL`。 +对于GROUP BY子句,ClickHouse将 [NULL](../syntax.md) 解释为一个值,并且支持`NULL=NULL`。 下面这个例子将说明这将意味着什么。 @@ -520,9 +521,9 @@ GROUP BY子句会为遇到的每一个不同的key计算一组聚合函数的值 该行仅在JSON\*, TabSeparated\*, Pretty\*输出格式中与其他行分开输出。 -在JSON\*输出格式中,这行将出现在Json的‘totals’字段中。在TabSeparated\*输出格式中,这行将位于其他结果之后,同时与其他结果使用空白行分隔。在Pretty\*输出格式中,这行将作为单独的表在所有结果之后输出。 +在JSON\*输出格式中,这行将出现在Json的'totals'字段中。在TabSeparated\*输出格式中,这行将位于其他结果之后,同时与其他结果使用空白行分隔。在Pretty\*输出格式中,这行将作为单独的表在所有结果之后输出。 -当`WITH TOTALS`与HAVING子句同时存在时,它的行为受‘totals\_mode’配置的影响。 +当`WITH TOTALS`与HAVING子句同时存在时,它的行为受'totals\_mode'配置的影响。 默认情况下,`totals_mode = 'before_having'`,这时`WITH TOTALS`将会在HAVING前计算最多不超过`max_rows_to_group_by`行的数据。 在`group_by_overflow_mode = 'any'`并指定了`max_rows_to_group_by`的情况下,`WITH TOTALS`的行为受`totals_mode`的影响。 @@ -531,7 +532,7 @@ GROUP BY子句会为遇到的每一个不同的key计算一组聚合函数的值 `after_having_inclusive` - 在HAVING后进行计算,计算不少于`max_rows_to_group_by`行的数据。 -`after_having_auto` - 在HAVING后进行计算,采用统计通过HAVING的行数,在超过不超过‘max\_rows\_to\_group\_by’指定值(默认为50%)的情况下,包含所有行的结果。否则排除这些结果。 +`after_having_auto` - 在HAVING后进行计算,采用统计通过HAVING的行数,在超过不超过'max\_rows\_to\_group\_by'指定值(默认为50%)的情况下,包含所有行的结果。否则排除这些结果。 `totals_auto_threshold` - 默认 0.5,是`after_having_auto`的参数。 @@ -637,9 +638,9 @@ WHERE于HAVING不同之处在于WHERE在聚合前(GROUP BY)执行,HAVING在聚 如果你在ORDER BY子句后面存在LIMIT并给定了较小的数值,则将会使用较少的内存。否则,内存的使用量将与需要排序的数据成正比。对于分布式查询,如果省略了GROUP BY,则在远程服务器上执行部分排序,最后在请求服务器上合并排序结果。这意味这对于分布式查询而言,要排序的数据量可以大于单台服务器的内存。 -如果没有足够的内存,可以使用外部排序(在磁盘中创建一些临时文件)。可以使用`max_bytes_before_external_sort`来设置外部排序,如果你讲它设置为0(默认),则表示禁用外部排序功能。如果启用该功能。当要排序的数据量达到所指定的字节数时,当前排序的结果会被转存到一个临时文件中去。当全部数据读取完毕后,所有的临时文件将会合并成最终输出结果。这些临时文件将会写到config文件配置的/var/lib/clickhouse/tmp/目录中(默认值,你可以通过修改’tmp\_path’配置调整该目录的位置)。 +如果没有足够的内存,可以使用外部排序(在磁盘中创建一些临时文件)。可以使用`max_bytes_before_external_sort`来设置外部排序,如果你讲它设置为0(默认),则表示禁用外部排序功能。如果启用该功能。当要排序的数据量达到所指定的字节数时,当前排序的结果会被转存到一个临时文件中去。当全部数据读取完毕后,所有的临时文件将会合并成最终输出结果。这些临时文件将会写到config文件配置的/var/lib/clickhouse/tmp/目录中(默认值,你可以通过修改'tmp\_path'配置调整该目录的位置)。 -查询运行使用的内存要高于‘max\_bytes\_before\_external\_sort’,为此,这个配置必须要远远小于‘max\_memory\_usage’配置的值。例如,如果你的服务器有128GB的内存去运行一个查询,那么推荐你将‘max\_memory\_usage’设置为100GB,‘max\_bytes\_before\_external\_sort’设置为80GB。 +查询运行使用的内存要高于'max\_bytes\_before\_external\_sort',为此,这个配置必须要远远小于'max\_memory\_usage'配置的值。例如,如果你的服务器有128GB的内存去运行一个查询,那么推荐你将'max\_memory\_usage'设置为100GB,'max\_bytes\_before\_external\_sort'设置为80GB。 外部排序效率要远低于在内存中排序。 @@ -661,14 +662,14 @@ WHERE于HAVING不同之处在于WHERE在聚合前(GROUP BY)执行,HAVING在聚 在SELECT表达式中存在Array类型的列时,不能使用DISTINCT。 -`DISTINCT`可以与 [NULL](syntax.md)一起工作,就好像`NULL`仅是一个特殊的值一样,并且`NULL=NULL`。换而言之,在`DISTINCT`的结果中,与`NULL`不同的组合仅能出现一次。 +`DISTINCT`可以与 [NULL](../syntax.md)一起工作,就好像`NULL`仅是一个特殊的值一样,并且`NULL=NULL`。换而言之,在`DISTINCT`的结果中,与`NULL`不同的组合仅能出现一次。 ### LIMIT 子句 {#limit-zi-ju} LIMIT m 用于在查询结果中选择前m行数据。 LIMIT n, m 用于在查询结果中选择从n行开始的m行数据。 -‘n’与‘m’必须是正整数。 +'n'与'm'必须是正整数。 如果没有指定ORDER BY子句,则结果可能是任意的顺序,并且是不确定的。 @@ -730,11 +731,11 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ... 如果左侧是单个列并且是一个索引,并且右侧是一组常量时,系统将使用索引来处理查询。 不要在列表中列出太多的值(百万)。如果数据集很大,将它们放入临时表中(可以参考«»), 然后使用子查询。 -Don’t list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section «External data for query processing»), then use a subquery. +Don't list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section «External data for query processing»), then use a subquery. 右侧可以是一个由常量表达式组成的元组列表(像上面的例子一样),或者是一个数据库中的表的名称,或是一个包含在括号中的子查询。 -如果右侧是一个表的名字(例如,`UserID IN users`),这相当于`UserID IN (SELECT * FROM users)`。在查询与外部数据表组合使用时可以使用该方法。例如,查询与包含user IDS的‘users’临时表一起被发送的同时需要对结果进行过滤时。 +如果右侧是一个表的名字(例如,`UserID IN users`),这相当于`UserID IN (SELECT * FROM users)`。在查询与外部数据表组合使用时可以使用该方法。例如,查询与包含user IDS的'users'临时表一起被发送的同时需要对结果进行过滤时。 如果操作符的右侧是一个Set引擎的表时(数据总是在内存中准备好),则不会每次都为查询创建新的数据集。 @@ -779,7 +780,7 @@ IN子句中的子查询仅在单个服务器上运行一次。不能够是相关 #### NULL 处理 {#null-chu-li-1} -在处理中,IN操作符总是假定 [NULL](syntax.md) 值的操作结果总是等于`0`,而不管`NULL`位于左侧还是右侧。`NULL`值不应该包含在任何数据集中,它们彼此不能够对应,并且不能够比较。 +在处理中,IN操作符总是假定 [NULL](../syntax.md) 值的操作结果总是等于`0`,而不管`NULL`位于左侧还是右侧。`NULL`值不应该包含在任何数据集中,它们彼此不能够对应,并且不能够比较。 下面的示例中有一个`t_null`表: @@ -809,7 +810,7 @@ IN子句中的子查询仅在单个服务器上运行一次。不能够是相关 对于带有子查询的(类似与JOINs)IN中,有两种选择:普通的`IN`/`JOIN`与`GLOBAL IN` / `GLOBAL JOIN`。它们对于分布式查询的处理运行方式是不同的。 !!! 注意 "注意" - 请记住,下面描述的算法可能因为根据 [settings](../operations/settings/settings.md) 配置的不同而不同。 + 请记住,下面描述的算法可能因为根据 [设置](../../operations/settings/settings.md) 配置的不同而不同。 当使用普通的IN时,查询总是被发送到远程的服务器,并且在每个服务器中运行«IN»或«JOIN»子句中的子查询。 @@ -905,13 +906,13 @@ SELECT uniq(UserID) FROM local_table WHERE CounterID = 101500 AND UserID GLOBAL 另外,在`GLOBAL IN`子句中使用本地表也是有用的,比如,本地表仅在请求服务器上可用,并且您希望在远程服务器上使用来自本地表的数据。 -### Extreme Values {#extreme-values} +### 极端值 {#extreme-values} -除了结果外,你还可以获得结果列的最大值与最小值,可以将**extremes**配置设置成1来做到这一点。最大值最小值的计算是针对于数字类型,日期类型进行计算的,对于其他列,将会输出默认值。 +除了结果外,你还可以获得结果列的最大值与最小值,可以将**极端**配置设置成1来做到这一点。最大值最小值的计算是针对于数字类型,日期类型进行计算的,对于其他列,将会输出默认值。 额外计算的两行结果 - 最大值与最小值,这两行额外的结果仅在JSON\*, TabSeparated\*, and Pretty\* 格式与其他行分开的输出方式输出,不支持其他输出格式。 -在JSON\*格式中,Extreme值在单独的’extremes’字段中。在TabSeparated\*格式中,在其他结果与’totals’之后输出,并使用空行与其分隔。在Pretty\* 格式中,将在其他结果与’totals’后以单独的表格输出。 +在JSON\*格式中,Extreme值在单独的'extremes'字段中。在TabSeparated\*格式中,在其他结果与'totals'之后输出,并使用空行与其分隔。在Pretty\* 格式中,将在其他结果与'totals'后以单独的表格输出。 如果在计算Extreme值的同时包含LIMIT。extremes的计算结果将包含offset跳过的行。在流式的请求中,它可能还包含多余LIMIT的少量行的值。 diff --git a/docs/zh/query_language/show.md b/docs/zh/sql_reference/statements/show.md similarity index 52% rename from docs/zh/query_language/show.md rename to docs/zh/sql_reference/statements/show.md index 840a2fc9766..f60452f97a3 100644 --- a/docs/zh/query_language/show.md +++ b/docs/zh/sql_reference/statements/show.md @@ -1,8 +1,11 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 38 +toc_title: SHOW --- -# SHOW Queries {#show-queries} +# 显示查询 {#show-queries} ## SHOW CREATE TABLE {#show-create-table} @@ -10,7 +13,7 @@ en_copy: true SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY] [db.]table [INTO OUTFILE filename] [FORMAT format] ``` -Returns a single `String`-type ‘statement’ column, which contains a single value – the `CREATE` query used for creating the specified object. +返回单 `String`-类型 ‘statement’ column, which contains a single value – the `CREATE` 用于创建指定对象的查询。 ## SHOW DATABASES {#show-databases} @@ -18,8 +21,8 @@ Returns a single `String`-type ‘statement’ column, which contains a single v SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] ``` -Prints a list of all databases. -This query is identical to `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`. +打印所有数据库的列表。 +这个查询是相同的 `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`. ## SHOW PROCESSLIST {#show-processlist} @@ -27,11 +30,11 @@ This query is identical to `SELECT name FROM system.databases [INTO OUTFILE file SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] ``` -Outputs the content of the [system.processes](../operations/system_tables.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries. +输出的内容 [系统。流程](../../operations/system_tables.md#system_tables-processes) 表,包含目前正在处理的查询列表,除了 `SHOW PROCESSLIST` 查询。 -The `SELECT * FROM system.processes` query returns data about all the current queries. +该 `SELECT * FROM system.processes` 查询返回有关所有当前查询的数据。 -Tip (execute in the console): +提示(在控制台中执行): ``` bash $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" @@ -39,23 +42,23 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" ## SHOW TABLES {#show-tables} -Displays a list of tables. +显示表的列表。 ``` sql SHOW [TEMPORARY] TABLES [{FROM | IN} ] [LIKE '' | WHERE expr] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -If the `FROM` clause is not specified, the query returns the list of tables from the current database. +如果 `FROM` 如果未指定子句,则查询返回当前数据库中的表列表。 -You can get the same results as the `SHOW TABLES` query in the following way: +你可以得到相同的结果 `SHOW TABLES` 通过以下方式进行查询: ``` sql SELECT name FROM system.tables WHERE database = [AND name LIKE ] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -**Example** +**示例** -The following query selects the first two rows from the list of tables in the `system` database, whose names contain `co`. +下面的查询从表的列表中选择前两行 `system` 数据库,其名称包含 `co`. ``` sql SHOW TABLES FROM system LIKE '%co%' LIMIT 2 @@ -70,23 +73,23 @@ SHOW TABLES FROM system LIKE '%co%' LIMIT 2 ## SHOW DICTIONARIES {#show-dictionaries} -Displays a list of [external dictionaries](dicts/external_dicts.md). +显示列表 [外部字典](../../sql_reference/dictionaries/external_dictionaries/external_dicts.md). ``` sql SHOW DICTIONARIES [FROM ] [LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -If the `FROM` clause is not specified, the query returns the list of dictionaries from the current database. +如果 `FROM` 如果未指定子句,则查询从当前数据库返回字典列表。 -You can get the same results as the `SHOW DICTIONARIES` query in the following way: +你可以得到相同的结果 `SHOW DICTIONARIES` 通过以下方式进行查询: ``` sql SELECT name FROM system.dictionaries WHERE database = [AND name LIKE ] [LIMIT ] [INTO OUTFILE ] [FORMAT ] ``` -**Example** +**示例** -The following query selects the first two rows from the list of tables in the `system` database, whose names contain `reg`. +下面的查询从表的列表中选择前两行 `system` 数据库,其名称包含 `reg`. ``` sql SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2 @@ -99,4 +102,4 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2 └──────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/query_language/show/) +[原始文章](https://clickhouse.tech/docs/en/query_language/show/) diff --git a/docs/zh/sql_reference/statements/system.md b/docs/zh/sql_reference/statements/system.md new file mode 100644 index 00000000000..06d4f6dc1cb --- /dev/null +++ b/docs/zh/sql_reference/statements/system.md @@ -0,0 +1,113 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 37 +toc_title: SYSTEM +--- + +# 系统查询 {#query-language-system} + +- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) +- [RELOAD DICTIONARY](#query_language-system-reload-dictionary) +- [DROP DNS CACHE](#query_language-system-drop-dns-cache) +- [DROP MARK CACHE](#query_language-system-drop-mark-cache) +- [FLUSH LOGS](#query_language-system-flush_logs) +- [RELOAD CONFIG](#query_language-system-reload-config) +- [SHUTDOWN](#query_language-system-shutdown) +- [KILL](#query_language-system-kill) +- [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends) +- [FLUSH DISTRIBUTED](#query_language-system-flush-distributed) +- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) +- [STOP MERGES](#query_language-system-stop-merges) +- [START MERGES](#query_language-system-start-merges) + +## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} + +重新加载之前已成功加载的所有字典。 +默认情况下,字典是懒惰加载的(请参阅 [dictionaries\_lazy\_load](../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load)),所以不是在启动时自动加载,而是通过dictGet函数在第一次访问时初始化,或者从ENGINE=Dictionary的表中选择。 该 `SYSTEM RELOAD DICTIONARIES` 查询重新加载这样的字典(加载)。 +总是返回 `Ok.` 无论字典更新的结果如何。 + +## 重新加载字典Dictionary\_name {#query_language-system-reload-dictionary} + +完全重新加载字典 `dictionary_name`,与字典的状态无关(LOADED/NOT\_LOADED/FAILED)。 +总是返回 `Ok.` 无论更新字典的结果如何。 +字典的状态可以通过查询 `system.dictionaries` 桌子 + +``` sql +SELECT name, status FROM system.dictionaries; +``` + +## DROP DNS CACHE {#query_language-system-drop-dns-cache} + +重置ClickHouse的内部DNS缓存。 有时(对于旧的ClickHouse版本)在更改基础架构(更改另一个ClickHouse服务器或字典使用的服务器的IP地址)时需要使用此命令。 + +有关更方便(自动)缓存管理,请参阅disable\_internal\_dns\_cache、dns\_cache\_update\_period参数。 + +## DROP MARK CACHE {#query_language-system-drop-mark-cache} + +重置标记缓存。 用于开发ClickHouse和性能测试。 + +## FLUSH LOGS {#query_language-system-flush_logs} + +Flushes buffers of log messages to system tables (e.g. system.query\_log). Allows you to not wait 7.5 seconds when debugging. + +## RELOAD CONFIG {#query_language-system-reload-config} + +重新加载ClickHouse配置。 当配置存储在ZooKeeeper中时使用。 + +## SHUTDOWN {#query_language-system-shutdown} + +通常关闭ClickHouse(如 `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`) + +## KILL {#query_language-system-kill} + +中止ClickHouse进程(如 `kill -9 {$ pid_clickhouse-server}`) + +## 管理分布式表 {#query-language-system-distributed} + +ClickHouse可以管理 [分布](../../engines/table_engines/special/distributed.md) 桌子 当用户将数据插入到这些表中时,ClickHouse首先创建应发送到群集节点的数据队列,然后异步发送它。 您可以使用 [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed),和 [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) 查询。 您也可以同步插入分布式数据与 `insert_distributed_sync` 设置。 + +### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends} + +将数据插入分布式表时禁用后台数据分发。 + +``` sql +SYSTEM STOP DISTRIBUTED SENDS [db.] +``` + +### FLUSH DISTRIBUTED {#query_language-system-flush-distributed} + +强制ClickHouse将数据同步发送到群集节点。 如果任何节点不可用,ClickHouse将引发异常并停止查询执行。 您可以重试查询,直到查询成功,这将在所有节点恢复联机时发生。 + +``` sql +SYSTEM FLUSH DISTRIBUTED [db.] +``` + +### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends} + +将数据插入分布式表时启用后台数据分发。 + +``` sql +SYSTEM START DISTRIBUTED SENDS [db.] +``` + +### STOP MERGES {#query_language-system-stop-merges} + +提供停止MergeTree系列中表的后台合并的可能性: + +``` sql +SYSTEM STOP MERGES [[db.]merge_tree_family_table_name] +``` + +!!! note "注" + `DETACH / ATTACH` 即使在之前所有MergeTree表的合并已停止的情况下,table也会为表启动后台合并。 + +### START MERGES {#query_language-system-start-merges} + +为MergeTree系列中的表提供启动后台合并的可能性: + +``` sql +SYSTEM START MERGES [[db.]merge_tree_family_table_name] +``` + +[原始文章](https://clickhouse.tech/docs/en/query_language/system/) diff --git a/docs/zh/sql_reference/syntax.md b/docs/zh/sql_reference/syntax.md new file mode 100644 index 00000000000..ab9009def47 --- /dev/null +++ b/docs/zh/sql_reference/syntax.md @@ -0,0 +1,187 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 31 +toc_title: "\u8BED\u6CD5" +--- + +# 语法 {#syntax} + +系统中有两种类型的解析器:完整SQL解析器(递归下降解析器)和数据格式解析器(快速流解析器)。 +在所有情况下,除了 `INSERT` 查询时,只使用完整的SQL解析器。 +该 `INSERT` 查询使用的分析程序: + +``` sql +INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') +``` + +该 `INSERT INTO t VALUES` 片段由完整的解析器解析,并且数据 `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` 由快速流解析器解析。 您也可以通过使用 [input\_format\_values\_interpret\_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) 设置。 当 `input_format_values_interpret_expressions = 1`,ClickHouse首先尝试使用fast stream解析器解析值。 如果失败,ClickHouse将尝试对数据使用完整的解析器,将其视为SQL [表达式](#syntax-expressions). + +数据可以有任何格式。 当接收到查询时,服务器计算不超过 [max\_query\_size](../operations/settings/settings.md#settings-max_query_size) RAM中请求的字节(默认为1MB),其余的是流解析。 +这意味着系统没有大的问题 `INSERT` 查询,就像MySQL一样。 + +使用时 `Values` 格式为 `INSERT` 查询,它可能看起来数据被解析相同的表达式 `SELECT` 查询,但事实并非如此。 该 `Values` 格式更为有限。 + +接下来我们将介绍完整的解析器。 有关格式解析器的详细信息,请参阅 [格式](../interfaces/formats.md) 科。 + +## 空间 {#spaces} + +语法结构之间可能有任意数量的空格符号(包括查询的开始和结束)。 空格符号包括空格、制表符、换行符、CR和换页符。 + +## 评论 {#comments} + +支持SQL样式和C样式注释。 +SQL风格的评论:来自 `--` 直到最后 后的空间 `--` 可以省略。 +C风格的评论:来自 `/*` 到 `*/`. 这些注释可以是多行。 这里也不需要空格。 + +## 关键词 {#syntax-keywords} + +当关键字对应于以下关键字时,不区分大小写: + +- SQL标准。 例如, `SELECT`, `select` 和 `SeLeCt` 都是有效的。 +- 在一些流行的DBMS(MySQL或Postgres)中实现。 例如, `DateTime` 是一样的 `datetime`. + +数据类型名称是否区分大小写可以在 `system.data_type_families` 桌子 + +与标准SQL相比,所有其他关键字(包括函数名称)都是 **区分大小写**. + +不保留关键字(它们只是在相应的上下文中解析为关键字)。 如果您使用 [标识符](#syntax-identifiers) 与关键字相同,将它们括在引号中。 例如,查询 `SELECT "FROM" FROM table_name` 是有效的,如果表 `table_name` 具有名称的列 `"FROM"`. + +## 标识符 {#syntax-identifiers} + +标识符是: + +- 集群、数据库、表、分区和列名称。 +- 功能。 +- 数据类型。 +- [表达式别名](#syntax-expression_aliases). + +标识符可以是引号或非引号。 建议使用非引号标识符。 + +非引号标识符必须与正则表达式匹配 `^[a-zA-Z_][0-9a-zA-Z_]*$` 并且不能等于 [关键词](#syntax-keywords). 例: `x, _1, X_y__Z123_.` + +如果要使用与关键字相同的标识符,或者要在标识符中使用其他符号,请使用双引号或反引号对其进行引用,例如, `"id"`, `` `id` ``. + +## 文字数 {#literals} + +有:数字,字符串,复合和 `NULL` 文字。 + +### 数字 {#numeric} + +数值文本尝试进行分析: + +- 首先作为一个64位有符号的数字,使用 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) 功能。 +- 如果不成功,作为64位无符号数,使用 [strtoll](https://en.cppreference.com/w/cpp/string/byte/strtol) 功能。 +- 如果不成功,作为一个浮点数使用 [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) 功能。 +- 否则,将返回错误。 + +相应的值将具有该值适合的最小类型。 +例如,1被解析为 `UInt8`,但256被解析为 `UInt16`. 有关详细信息,请参阅 [数据类型](../sql_reference/data_types/index.md). + +例: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. + +### 字符串 {#syntax-string-literal} + +仅支持单引号中的字符串文字。 封闭的字符可以反斜杠转义。 以下转义序列具有相应的特殊值: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. 在所有其他情况下,转义序列的格式为 `\c`,哪里 `c` 是任何字符,被转换为 `c`. 这意味着您可以使用序列 `\'`和`\\`. 该值将具有 [字符串](../sql_reference/data_types/string.md) 类型。 + +在字符串文字中需要转义的最小字符集: `'` 和 `\`. 单引号可以用单引号,文字转义 `'It\'s'` 和 `'It''s'` 是平等的。 + +### 化合物 {#compound} + +数组支持构造: `[1, 2, 3]` 和元组: `(1, 'Hello, world!', 2)`.. +实际上,这些不是文字,而是分别具有数组创建运算符和元组创建运算符的表达式。 +数组必须至少包含一个项目,元组必须至少包含两个项目。 +元组有一个特殊的用途用于 `IN` a条款 `SELECT` 查询。 元组可以作为查询的结果获得,但它们不能保存到数据库(除了 [记忆](../engines/table_engines/special/memory.md) 表)。 + +### NULL {#null-literal} + +指示该值丢失。 + +为了存储 `NULL` 在表字段中,它必须是 [可为空](../sql_reference/data_types/nullable.md) 类型。 + +根据数据格式(输入或输出), `NULL` 可能有不同的表示。 有关详细信息,请参阅以下文档 [数据格式](../interfaces/formats.md#formats). + +处理有许多细微差别 `NULL`. 例如,如果比较操作的至少一个参数是 `NULL`,此操作的结果也将是 `NULL`. 对于乘法,加法和其他操作也是如此。 有关详细信息,请阅读每个操作的文档。 + +在查询中,您可以检查 `NULL` 使用 [IS NULL](operators.md#operator-is-null) 和 [IS NOT NULL](operators.md) 运算符及相关功能 `isNull` 和 `isNotNull`. + +## 功能 {#functions} + +函数像标识符一样写入,并在括号中包含一个参数列表(可能是空的)。 与标准SQL相比,括号是必需的,即使是空的参数列表。 示例: `now()`. +有常规函数和聚合函数(请参阅部分 “Aggregate functions”). 某些聚合函数可以包含括号中的两个参数列表。 示例: `quantile (0.9) (x)`. 这些聚合函数被调用 “parametric” 函数,并在第一个列表中的参数被调用 “parameters”. 不带参数的聚合函数的语法与常规函数的语法相同。 + +## 运营商 {#operators} + +在查询解析过程中,运算符会转换为相应的函数,同时考虑它们的优先级和关联性。 +例如,表达式 `1 + 2 * 3 + 4` 转化为 `plus(plus(1, multiply(2, 3)), 4)`. + +## 数据类型和数据库表引擎 {#data_types-and-database-table-engines} + +数据类型和表引擎 `CREATE` 查询的编写方式与标识符或函数相同。 换句话说,它们可能包含也可能不包含括在括号中的参数列表。 有关详细信息,请参阅部分 “Data types,” “Table engines,” 和 “CREATE”. + +## 表达式别名 {#syntax-expression_aliases} + +别名是查询中表达式的用户定义名称。 + +``` sql +expr AS alias +``` + +- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` 子句不使用 `AS` 关键字。 + + For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. + + In the [CAST](sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function. + +- `expr` — Any expression supported by ClickHouse. + + For example, `SELECT column_name * 2 AS double FROM some_table`. + +- `alias` — Name for `expr`. 别名应符合 [标识符](#syntax-identifiers) 语法 + + For example, `SELECT "table t".column_name FROM table_name AS "table t"`. + +### 使用注意事项 {#notes-on-usage} + +别名对于查询或子查询是全局的,您可以在查询的任何部分中为任何表达式定义别名。 例如, `SELECT (1 AS n) + 2, n`. + +别名在子查询和子查询之间不可见。 例如,在执行查询时 `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ClickHouse生成异常 `Unknown identifier: num`. + +如果为结果列定义了别名 `SELECT` 子查询的子句,这些列在外部查询中可见。 例如, `SELECT n + m FROM (SELECT 1 AS n, 2 AS m)`. + +小心使用与列或表名相同的别名。 让我们考虑以下示例: + +``` sql +CREATE TABLE t +( + a Int, + b Int +) +ENGINE = TinyLog() +``` + +``` sql +SELECT + argMax(a, b), + sum(b) AS b +FROM t +``` + +``` text +Received exception from server (version 18.14.17): +Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. +``` + +在这个例子中,我们声明表 `t` 带柱 `b`. 然后,在选择数据时,我们定义了 `sum(b) AS b` 别名 由于别名是全局的,ClickHouse替换了文字 `b` 在表达式中 `argMax(a, b)` 用表达式 `sum(b)`. 这种替换导致异常。 + +## 星号 {#asterisk} + +在一个 `SELECT` 查询中,星号可以替换表达式。 有关详细信息,请参阅部分 “SELECT”. + +## 表达式 {#syntax-expressions} + +表达式是函数、标识符、文字、运算符的应用程序、括号中的表达式、子查询或星号。 它还可以包含别名。 +表达式列表是一个或多个用逗号分隔的表达式。 +函数和运算符,反过来,可以有表达式作为参数。 + +[原始文章](https://clickhouse.tech/docs/en/query_language/syntax/) diff --git a/docs/zh/query_language/table_functions/file.md b/docs/zh/sql_reference/table_functions/file.md similarity index 52% rename from docs/zh/query_language/table_functions/file.md rename to docs/zh/sql_reference/table_functions/file.md index 88bbc2a3453..b3c93f7f1fd 100644 --- a/docs/zh/query_language/table_functions/file.md +++ b/docs/zh/sql_reference/table_functions/file.md @@ -1,28 +1,31 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 37 +toc_title: "\u6587\u4EF6" --- -# file {#file} +# 文件 {#file} -Creates a table from a file. This table function is similar to [url](url.md) and [hdfs](hdfs.md) ones. +从文件创建表。 此表函数类似于 [url](url.md) 和 [hdfs](hdfs.md) 一些的。 ``` sql file(path, format, structure) ``` -**Input parameters** +**输入参数** -- `path` — The relative path to the file from [user\_files\_path](../../operations/server_settings/settings.md#server_settings-user_files_path). Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, \``'abc', 'def'` — strings. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `path` — The relative path to the file from [user\_files\_path](../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-user_files_path). 只读模式下的globs后的文件支持路径: `*`, `?`, `{abc,def}` 和 `{N..M}` 哪里 `N`, `M` — numbers, \``'abc', 'def'` — strings. +- `format` — The [格式](../../interfaces/formats.md#formats) 的文件。 - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -**Returned value** +**返回值** -A table with the specified structure for reading or writing data in the specified file. +具有指定结构的表,用于读取或写入指定文件中的数据。 -**Example** +**示例** -Setting `user_files_path` and the contents of the file `test.csv`: +设置 `user_files_path` 和文件的内容 `test.csv`: ``` bash $ grep user_files_path /etc/clickhouse-server/config.xml @@ -34,7 +37,7 @@ $ cat /var/lib/clickhouse/user_files/test.csv 78,43,45 ``` -Table from`test.csv` and selection of the first two rows from it: +表从`test.csv` 并从中选择前两行: ``` sql SELECT * @@ -54,20 +57,20 @@ LIMIT 2 SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10 ``` -**Globs in path** +**路径中的水珠** -Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). +多个路径组件可以具有globs。 对于正在处理的文件应该存在并匹配到整个路径模式(不仅后缀或前缀)。 -- `*` — Substitutes any number of any characters except `/` including empty string. +- `*` — Substitutes any number of any characters except `/` 包括空字符串。 - `?` — Substitutes any single character. - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders (could include leading zeros). +- `{N..M}` — Substitutes any number in range from N to M including both borders. -Constructions with `{}` are similar to the [remote table function](../../query_language/table_functions/remote.md)). +建筑与 `{}` 类似于 [远程表功能](../../sql_reference/table_functions/remote.md)). -**Example** +**示例** -1. Suppose we have several files with the following relative paths: +1. 假设我们有几个具有以下相对路径的文件: - ‘some\_dir/some\_file\_1’ - ‘some\_dir/some\_file\_2’ @@ -76,7 +79,7 @@ Constructions with `{}` are similar to the [remote table function](../../query_l - ‘another\_dir/some\_file\_2’ - ‘another\_dir/some\_file\_3’ -1. Query the amount of rows in these files: +1. 查询这些文件中的行数: @@ -85,7 +88,7 @@ SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32') ``` -1. Query the amount of rows in all files of these two directories: +1. 查询这两个目录的所有文件中的行数: @@ -94,25 +97,25 @@ SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32') ``` -!!! warning "Warning" - If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +!!! warning "警告" + 如果您的文件列表包含带前导零的数字范围,请单独使用带大括号的构造或使用 `?`. -**Example** +**示例** -Query the data from files named `file000`, `file001`, … , `file999`: +从名为 `file000`, `file001`, … , `file999`: ``` sql SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32') ``` -## Virtual Columns {#virtual-columns} +## 虚拟列 {#virtual-columns} - `_path` — Path to the file. - `_file` — Name of the file. -**See Also** +**另请参阅** -- [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) +- [虚拟列](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/file/) +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/file/) diff --git a/docs/zh/query_language/table_functions/generate.md b/docs/zh/sql_reference/table_functions/generate.md similarity index 76% rename from docs/zh/query_language/table_functions/generate.md rename to docs/zh/sql_reference/table_functions/generate.md index 273b5bd7e23..84c711711d5 100644 --- a/docs/zh/query_language/table_functions/generate.md +++ b/docs/zh/sql_reference/table_functions/generate.md @@ -1,18 +1,21 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 47 +toc_title: generateRandom --- # generateRandom {#generaterandom} -Generates random data with given schema. -Allows to populate test tables with data. -Supports all data types that can be stored in table except `LowCardinality` and `AggregateFunction`. +使用给定的模式生成随机数据。 +允许用数据填充测试表。 +支持可以存储在表中的所有数据类型,除了 `LowCardinality` 和 `AggregateFunction`. ``` sql generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]); ``` -**Parameters** +**参数** - `name` — Name of corresponding column. - `TypeName` — Type of corresponding column. @@ -21,11 +24,11 @@ generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_stri - `max_string_length` — Maximum string length for all generated strings. Defaults to `10`. - `random_seed` — Specify random seed manually to produce stable results. If NULL — seed is randomly generated. -**Returned Value** +**返回值** -A table object with requested schema. +具有请求架构的表对象。 -## Usage Example {#usage-example} +## 用法示例 {#usage-example} ``` sql SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64(3), UUID)', 1, 10, 2); @@ -39,4 +42,4 @@ SELECT * FROM generateRandom('a Array(Int8), d Decimal32(4), c Tuple(DateTime64( └──────────┴──────────────┴────────────────────────────────────────────────────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/generate/) +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/generate/) diff --git a/docs/zh/query_language/table_functions/hdfs.md b/docs/zh/sql_reference/table_functions/hdfs.md similarity index 53% rename from docs/zh/query_language/table_functions/hdfs.md rename to docs/zh/sql_reference/table_functions/hdfs.md index 22e64665179..2cf79c31c83 100644 --- a/docs/zh/query_language/table_functions/hdfs.md +++ b/docs/zh/sql_reference/table_functions/hdfs.md @@ -1,28 +1,31 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 45 +toc_title: hdfs --- # hdfs {#hdfs} -Creates a table from files in HDFS. This table function is similar to [url](url.md) and [file](file.md) ones. +从HDFS中的文件创建表。 此表函数类似于 [url](url.md) 和 [文件](file.md) 一些的。 ``` sql hdfs(URI, format, structure) ``` -**Input parameters** +**输入参数** -- `URI` — The relative URI to the file in HDFS. Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, \``'abc', 'def'` — strings. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `URI` — The relative URI to the file in HDFS. Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` 和 `{N..M}` 哪里 `N`, `M` — numbers, \``'abc', 'def'` — strings. +- `format` — The [格式](../../interfaces/formats.md#formats) 的文件。 - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -**Returned value** +**返回值** -A table with the specified structure for reading or writing data in the specified file. +具有指定结构的表,用于读取或写入指定文件中的数据。 -**Example** +**示例** -Table from `hdfs://hdfs1:9000/test` and selection of the first two rows from it: +表从 `hdfs://hdfs1:9000/test` 并从中选择前两行: ``` sql SELECT * @@ -37,20 +40,20 @@ LIMIT 2 └─────────┴─────────┴─────────┘ ``` -**Globs in path** +**路径中的水珠** -Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). +多个路径组件可以具有globs。 对于正在处理的文件应该存在并匹配到整个路径模式(不仅后缀或前缀)。 -- `*` — Substitutes any number of any characters except `/` including empty string. +- `*` — Substitutes any number of any characters except `/` 包括空字符串。 - `?` — Substitutes any single character. - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders (could include leading zeros). +- `{N..M}` — Substitutes any number in range from N to M including both borders. -Constructions with `{}` are similar to the [remote table function](../../query_language/table_functions/remote.md)). +建筑与 `{}` 类似于 [远程表功能](../../sql_reference/table_functions/remote.md)). -**Example** +**示例** -1. Suppose that we have several files with following URIs on HDFS: +1. 假设我们在HDFS上有几个具有以下Uri的文件: - ‘hdfs://hdfs1:9000/some\_dir/some\_file\_1’ - ‘hdfs://hdfs1:9000/some\_dir/some\_file\_2’ @@ -59,7 +62,7 @@ Constructions with `{}` are similar to the [remote table function](../../query_l - ‘hdfs://hdfs1:9000/another\_dir/some\_file\_2’ - ‘hdfs://hdfs1:9000/another\_dir/some\_file\_3’ -1. Query the amount of rows in these files: +1. 查询这些文件中的行数: @@ -68,7 +71,7 @@ SELECT count(*) FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32') ``` -1. Query the amount of rows in all files of these two directories: +1. 查询这两个目录的所有文件中的行数: @@ -77,25 +80,25 @@ SELECT count(*) FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32') ``` -!!! warning "Warning" - If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +!!! warning "警告" + 如果您的文件列表包含带前导零的数字范围,请单独使用带大括号的构造或使用 `?`. -**Example** +**示例** -Query the data from files named `file000`, `file001`, … , `file999`: +从名为 `file000`, `file001`, … , `file999`: ``` sql SELECT count(*) FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32') ``` -## Virtual Columns {#virtual-columns} +## 虚拟列 {#virtual-columns} - `_path` — Path to the file. - `_file` — Name of the file. -**See Also** +**另请参阅** -- [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) +- [虚拟列](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/hdfs/) +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/hdfs/) diff --git a/docs/zh/sql_reference/table_functions/index.md b/docs/zh/sql_reference/table_functions/index.md new file mode 100644 index 00000000000..38ef9bf1f4b --- /dev/null +++ b/docs/zh/sql_reference/table_functions/index.md @@ -0,0 +1,38 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u8868\u51FD\u6570" +toc_priority: 34 +toc_title: "\u5BFC\u8A00" +--- + +# 表函数 {#table-functions} + +表函数是构造表的方法。 + +您可以使用表函数: + +- [FROM](../statements/select.md#select-from) 《公约》条款 `SELECT` 查询。 + + The method for creating a temporary table that is available only in the current query. The table is deleted when the query finishes. + +- [创建表为\](../statements/create.md#create-table-query) 查询。 + + It's one of the methods of creating a table. + +!!! warning "警告" + 你不能使用表函数,如果 [allow\_ddl](../../operations/settings/permissions_for_queries.md#settings_allow_ddl) 设置被禁用。 + +| 功能 | 产品描述 | +|--------------------|--------------------------------------------------------------------------------------------------------| +| [文件](file.md) | 创建一个 [文件](../../engines/table_engines/special/file.md)-发动机表。 | +| [合并](merge.md) | 创建一个 [合并](../../engines/table_engines/special/merge.md)-发动机表。 | +| [数字](numbers.md) | 创建一个包含整数填充的单列的表。 | +| [远程](remote.md) | 允许您访问远程服务器,而无需创建 [分布](../../engines/table_engines/special/distributed.md)-发动机表。 | +| [url](url.md) | 创建一个 [Url](../../engines/table_engines/special/url.md)-发动机表。 | +| [mysql](mysql.md) | 创建一个 [MySQL](../../engines/table_engines/integrations/mysql.md)-发动机表。 | +| [jdbc](jdbc.md) | 创建一个 [JDBC](../../engines/table_engines/integrations/jdbc.md)-发动机表。 | +| [odbc](odbc.md) | 创建一个 [ODBC](../../engines/table_engines/integrations/odbc.md)-发动机表。 | +| [hdfs](hdfs.md) | 创建一个 [HDFS](../../engines/table_engines/integrations/hdfs.md)-发动机表。 | + +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/) diff --git a/docs/zh/sql_reference/table_functions/input.md b/docs/zh/sql_reference/table_functions/input.md new file mode 100644 index 00000000000..72f71576729 --- /dev/null +++ b/docs/zh/sql_reference/table_functions/input.md @@ -0,0 +1,47 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 46 +toc_title: "\u8F93\u5165" +--- + +# 输入 {#input} + +`input(structure)` -表功能,允许有效地转换和插入数据发送到 +服务器与给定结构的表与另一种结构。 + +`structure` -以下格式发送到服务器的数据结构 `'column1_name column1_type, column2_name column2_type, ...'`. +例如, `'id UInt32, name String'`. + +此功能只能用于 `INSERT SELECT` 查询,只有一次,但其他行为像普通表函数 +(例如,它可以用于子查询等。). + +数据可以以任何方式像普通发送 `INSERT` 查询并传递任何可用 [格式](../../interfaces/formats.md#formats) +必须在查询结束时指定(不像普通 `INSERT SELECT`). + +这个功能的主要特点是,当服务器从客户端接收数据时,它同时将其转换 +根据表达式中的列表 `SELECT` 子句并插入到目标表中。 临时表 +不创建所有传输的数据。 + +**例** + +- 让 `test` 表具有以下结构 `(a String, b String)` + 和数据 `data.csv` 具有不同的结构 `(col1 String, col2 Date, col3 Int32)`. 查询插入 + 从数据 `data.csv` 进 `test` 同时转换的表如下所示: + + + +``` bash +$ cat data.csv | clickhouse-client --query="INSERT INTO test SELECT lower(col1), col3 * col3 FROM input('col1 String, col2 Date, col3 Int32') FORMAT CSV"; +``` + +- 如果 `data.csv` 包含相同结构的数据 `test_structure` 作为表 `test` 那么这两个查询是相等的: + + + +``` bash +$ cat data.csv | clickhouse-client --query="INSERT INTO test FORMAT CSV" +$ cat data.csv | clickhouse-client --query="INSERT INTO test SELECT * FROM input('test_structure') FORMAT CSV" +``` + +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/input/) diff --git a/docs/zh/sql_reference/table_functions/jdbc.md b/docs/zh/sql_reference/table_functions/jdbc.md new file mode 100644 index 00000000000..e2268b42e28 --- /dev/null +++ b/docs/zh/sql_reference/table_functions/jdbc.md @@ -0,0 +1,29 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 43 +toc_title: jdbc +--- + +# jdbc {#table-function-jdbc} + +`jdbc(jdbc_connection_uri, schema, table)` -返回通过JDBC驱动程序连接的表。 + +此表函数需要单独的 `clickhouse-jdbc-bridge` 程序正在运行。 +它支持可空类型(基于查询的远程表的DDL)。 + +**例** + +``` sql +SELECT * FROM jdbc('jdbc:mysql://localhost:3306/?user=root&password=root', 'schema', 'table') +``` + +``` sql +SELECT * FROM jdbc('mysql://localhost:3306/?user=root&password=root', 'schema', 'table') +``` + +``` sql +SELECT * FROM jdbc('datasource://mysql-local', 'schema', 'table') +``` + +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/jdbc/) diff --git a/docs/zh/sql_reference/table_functions/merge.md b/docs/zh/sql_reference/table_functions/merge.md new file mode 100644 index 00000000000..7304c447b1f --- /dev/null +++ b/docs/zh/sql_reference/table_functions/merge.md @@ -0,0 +1,14 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 38 +toc_title: "\u5408\u5E76" +--- + +# 合并 {#merge} + +`merge(db_name, 'tables_regexp')` – Creates a temporary Merge table. For more information, see the section “Table engines, Merge”. + +表结构取自与正则表达式匹配的第一个表。 + +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/merge/) diff --git a/docs/zh/query_language/table_functions/mysql.md b/docs/zh/sql_reference/table_functions/mysql.md similarity index 59% rename from docs/zh/query_language/table_functions/mysql.md rename to docs/zh/sql_reference/table_functions/mysql.md index 5a8e8d4fd96..3cdf3047aac 100644 --- a/docs/zh/query_language/table_functions/mysql.md +++ b/docs/zh/sql_reference/table_functions/mysql.md @@ -1,16 +1,19 @@ --- -en_copy: true +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 42 +toc_title: mysql --- # mysql {#mysql} -Allows `SELECT` queries to be performed on data that is stored on a remote MySQL server. +允许 `SELECT` 要对存储在远程MySQL服务器上的数据执行的查询。 ``` sql mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']); ``` -**Parameters** +**参数** - `host:port` — MySQL server address. @@ -22,25 +25,25 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_ - `password` — User password. -- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is replaced. +- `replace_query` — Flag that converts `INSERT INTO` 查询到 `REPLACE INTO`. 如果 `replace_query=1`,查询被替换。 -- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. +- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` 表达式被添加到 `INSERT` 查询。 Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the MySQL documentation to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause. To specify `on_duplicate_clause` you need to pass `0` to the `replace_query` parameter. If you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception. -Simple `WHERE` clauses such as `=, !=, >, >=, <, <=` are currently executed on the MySQL server. +简单 `WHERE` 条款如 `=, !=, >, >=, <, <=` 当前在MySQL服务器上执行。 -The rest of the conditions and the `LIMIT` sampling constraint are executed in ClickHouse only after the query to MySQL finishes. +其余的条件和 `LIMIT` 只有在对MySQL的查询完成后,才会在ClickHouse中执行采样约束。 -**Returned Value** +**返回值** -A table object with the same columns as the original MySQL table. +与原始MySQL表具有相同列的table对象。 -## Usage Example {#usage-example} +## 用法示例 {#usage-example} -Table in MySQL: +MySQL中的表: ``` text mysql> CREATE TABLE `test`.`test` ( @@ -55,15 +58,15 @@ mysql> insert into test (`int_id`, `float`) VALUES (1,2); Query OK, 1 row affected (0,00 sec) mysql> select * from test; -+--------+--------------+-------+----------------+ ++------+----------+-----+----------+ | int_id | int_nullable | float | float_nullable | -+--------+--------------+-------+----------------+ ++------+----------+-----+----------+ | 1 | NULL | 2 | NULL | -+--------+--------------+-------+----------------+ ++------+----------+-----+----------+ 1 row in set (0,00 sec) ``` -Selecting data from ClickHouse: +从ClickHouse中选择数据: ``` sql SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123') @@ -75,9 +78,9 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123') └────────┴──────────────┴───────┴────────────────┘ ``` -## See Also {#see-also} +## 另请参阅 {#see-also} -- [The ‘MySQL’ table engine](../../operations/table_engines/mysql.md) -- [Using MySQL as a source of external dictionary](../dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-mysql) +- [该 ‘MySQL’ 表引擎](../../engines/table_engines/integrations/mysql.md) +- [使用MySQL作为外部字典的来源](../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-mysql) -[Original article](https://clickhouse.tech/docs/en/query_language/table_functions/mysql/) +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/mysql/) diff --git a/docs/zh/sql_reference/table_functions/numbers.md b/docs/zh/sql_reference/table_functions/numbers.md new file mode 100644 index 00000000000..aaee632d5dc --- /dev/null +++ b/docs/zh/sql_reference/table_functions/numbers.md @@ -0,0 +1,30 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 39 +toc_title: "\u6570\u5B57" +--- + +# 数字 {#numbers} + +`numbers(N)` – Returns a table with the single ‘number’ 包含从0到N-1的整数的列(UInt64)。 +`numbers(N, M)` -返回一个表与单 ‘number’ 包含从N到(N+M-1)的整数的列(UInt64)。 + +类似于 `system.numbers` 表,它可以用于测试和生成连续的值, `numbers(N, M)` 比 `system.numbers`. + +以下查询是等效的: + +``` sql +SELECT * FROM numbers(10); +SELECT * FROM numbers(0, 10); +SELECT * FROM system.numbers LIMIT 10; +``` + +例: + +``` sql +-- Generate a sequence of dates from 2010-01-01 to 2010-12-31 +select toDate('2010-01-01') + number as d FROM numbers(365); +``` + +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/numbers/) diff --git a/docs/zh/sql_reference/table_functions/odbc.md b/docs/zh/sql_reference/table_functions/odbc.md new file mode 100644 index 00000000000..ad7503fd551 --- /dev/null +++ b/docs/zh/sql_reference/table_functions/odbc.md @@ -0,0 +1,108 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 44 +toc_title: odbc +--- + +# odbc {#table-functions-odbc} + +返回通过连接的表 [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). + +``` sql +odbc(connection_settings, external_database, external_table) +``` + +参数: + +- `connection_settings` — Name of the section with connection settings in the `odbc.ini` 文件 +- `external_database` — Name of a database in an external DBMS. +- `external_table` — Name of a table in the `external_database`. + +为了安全地实现ODBC连接,ClickHouse使用单独的程序 `clickhouse-odbc-bridge`. 如果直接从ODBC驱动程序加载 `clickhouse-server`,驱动程序问题可能会导致ClickHouse服务器崩溃。 ClickHouse自动启动 `clickhouse-odbc-bridge` 当它是必需的。 ODBC桥程序是从相同的软件包作为安装 `clickhouse-server`. + +与字段 `NULL` 外部表中的值将转换为基数据类型的默认值。 例如,如果远程MySQL表字段具有 `INT NULL` 键入它将转换为0(ClickHouse的默认值 `Int32` 数据类型)。 + +## 用法示例 {#usage-example} + +**通过ODBC从本地MySQL安装获取数据** + +此示例检查Ubuntu Linux18.04和MySQL服务器5.7。 + +确保安装了unixODBC和MySQL连接器。 + +默认情况下(如果从软件包安装),ClickHouse以用户身份启动 `clickhouse`. 因此,您需要在MySQL服务器中创建和配置此用户。 + +``` bash +$ sudo mysql +``` + +``` sql +mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse'; +mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION; +``` + +然后配置连接 `/etc/odbc.ini`. + +``` bash +$ cat /etc/odbc.ini +[mysqlconn] +DRIVER = /usr/local/lib/libmyodbc5w.so +SERVER = 127.0.0.1 +PORT = 3306 +DATABASE = test +USERNAME = clickhouse +PASSWORD = clickhouse +``` + +您可以使用 `isql` unixodbc安装中的实用程序。 + +``` bash +$ isql -v mysqlconn ++-------------------------+ +| Connected! | +| | +... +``` + +MySQL中的表: + +``` text +mysql> CREATE TABLE `test`.`test` ( + -> `int_id` INT NOT NULL AUTO_INCREMENT, + -> `int_nullable` INT NULL DEFAULT NULL, + -> `float` FLOAT NOT NULL, + -> `float_nullable` FLOAT NULL DEFAULT NULL, + -> PRIMARY KEY (`int_id`)); +Query OK, 0 rows affected (0,09 sec) + +mysql> insert into test (`int_id`, `float`) VALUES (1,2); +Query OK, 1 row affected (0,00 sec) + +mysql> select * from test; ++------+----------+-----+----------+ +| int_id | int_nullable | float | float_nullable | ++------+----------+-----+----------+ +| 1 | NULL | 2 | NULL | ++------+----------+-----+----------+ +1 row in set (0,00 sec) +``` + +从ClickHouse中的MySQL表中检索数据: + +``` sql +SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test') +``` + +``` text +┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐ +│ 1 │ 0 │ 2 │ 0 │ +└────────┴──────────────┴───────┴────────────────┘ +``` + +## 另请参阅 {#see-also} + +- [ODBC外部字典](../../sql_reference/dictionaries/external_dictionaries/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) +- [ODBC表引擎](../../engines/table_engines/integrations/odbc.md). + +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/jdbc/) diff --git a/docs/zh/sql_reference/table_functions/remote.md b/docs/zh/sql_reference/table_functions/remote.md new file mode 100644 index 00000000000..be6e9138fb4 --- /dev/null +++ b/docs/zh/sql_reference/table_functions/remote.md @@ -0,0 +1,83 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 40 +toc_title: "\u8FDC\u7A0B" +--- + +# 远程,远程安全 {#remote-remotesecure} + +允许您访问远程服务器,而无需创建 `Distributed` 桌子 + +签名: + +``` sql +remote('addresses_expr', db, table[, 'user'[, 'password']]) +remote('addresses_expr', db.table[, 'user'[, 'password']]) +``` + +`addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`,或者只是 `host`. 主机可以指定为服务器名称,也可以指定为IPv4或IPv6地址。 IPv6地址在方括号中指定。 端口是远程服务器上的TCP端口。 如果省略端口,它使用 `tcp_port` 从服务器的配置文件(默认情况下,9000)。 + +!!! important "重要事项" + IPv6地址需要该端口。 + +例: + +``` text +example01-01-1 +example01-01-1:9000 +localhost +127.0.0.1 +[::]:9000 +[2a02:6b8:0:1111::11]:9000 +``` + +多个地址可以用逗号分隔。 在这种情况下,ClickHouse将使用分布式处理,因此它将将查询发送到所有指定的地址(如具有不同数据的分片)。 + +示例: + +``` text +example01-01-1,example01-02-1 +``` + +表达式的一部分可以用大括号指定。 前面的示例可以写成如下: + +``` text +example01-0{1,2}-1 +``` + +大括号可以包含由两个点(非负整数)分隔的数字范围。 在这种情况下,范围将扩展为生成分片地址的一组值。 如果第一个数字以零开头,则使用相同的零对齐形成值。 前面的示例可以写成如下: + +``` text +example01-{01..02}-1 +``` + +如果您有多对大括号,它会生成相应集合的直接乘积。 + +大括号中的地址和部分地址可以用管道符号(\|)分隔。 在这种情况下,相应的地址集被解释为副本,并且查询将被发送到第一个正常副本。 但是,副本将按照当前设置的顺序进行迭代 [load\_balancing](../../operations/settings/settings.md) 设置。 + +示例: + +``` text +example01-{01..02}-{1|2} +``` + +此示例指定两个分片,每个分片都有两个副本。 + +生成的地址数由常量限制。 现在这是1000个地址。 + +使用 `remote` 表函数比创建一个不太优化 `Distributed` 表,因为在这种情况下,服务器连接被重新建立为每个请求。 此外,如果设置了主机名,则会解析这些名称,并且在使用各种副本时不会计算错误。 在处理大量查询时,始终创建 `Distributed` 表的时间提前,不要使用 `remote` 表功能。 + +该 `remote` 表函数可以在以下情况下是有用的: + +- 访问特定服务器进行数据比较、调试和测试。 +- 查询之间的各种ClickHouse群集用于研究目的。 +- 手动发出的罕见分布式请求。 +- 每次重新定义服务器集的分布式请求。 + +如果未指定用户, `default` 被使用。 +如果未指定密码,则使用空密码。 + +`remoteSecure` -相同 `remote` but with secured connection. Default port — [tcp\_port\_secure](../../operations/server_configuration_parameters/settings.md#server_configuration_parameters-tcp_port_secure) 从配置或9440. + +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/remote/) diff --git a/docs/zh/sql_reference/table_functions/url.md b/docs/zh/sql_reference/table_functions/url.md new file mode 100644 index 00000000000..d220bb05c2c --- /dev/null +++ b/docs/zh/sql_reference/table_functions/url.md @@ -0,0 +1,26 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 41 +toc_title: url +--- + +# url {#url} + +`url(URL, format, structure)` -返回从创建的表 `URL` 与给定 +`format` 和 `structure`. + +URL-HTTP或HTTPS服务器地址,它可以接受 `GET` 和/或 `POST` 请求。 + +格式 - [格式](../../interfaces/formats.md#formats) 的数据。 + +结构-表结构 `'UserID UInt64, Name String'` 格式。 确定列名称和类型。 + +**示例** + +``` sql +-- getting the first 3 lines of a table that contains columns of String and UInt32 type from HTTP-server which answers in CSV format. +SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3 +``` + +[原始文章](https://clickhouse.tech/docs/en/query_language/table_functions/url/) diff --git a/docs/zh/whats_new/changelog/2017.md b/docs/zh/whats_new/changelog/2017.md new file mode 100644 index 00000000000..ed77ead9023 --- /dev/null +++ b/docs/zh/whats_new/changelog/2017.md @@ -0,0 +1,268 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 79 +toc_title: '2017' +--- + +### ClickHouse释放1.1.54327,2017-12-21 {#clickhouse-release-1-1-54327-2017-12-21} + +此版本包含以前版本1.1.54318的错误修复: + +- 修复了可能导致数据丢失的复制中可能存在的争用条件的错误。 此问题影响版本1.1.54310和1.1.54318。 如果将其中一个版本用于复制的表,则强烈建议进行更新。 此问题显示在日志中的警告消息,如 `Part ... from own log doesn't exist.` 即使您在日志中没有看到这些消息,问题也是相关的。 + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-1-1-54318-2017-11-30} + +此版本包含以前版本1.1.54310的错误修复: + +- 修复了SummingMergeTree引擎中合并过程中错误的行删除 +- 修复了未复制的MergeTree引擎中的内存泄漏 +- 修复了MergeTree引擎中频繁插入的性能下降 +- 修复了导致复制队列停止运行的问题 +- 固定服务器日志的轮换和归档 + +### ClickHouse释放1.1.54310,2017-11-01 {#clickhouse-release-1-1-54310-2017-11-01} + +#### 新功能: {#new-features} + +- MergeTree表引擎系列的自定义分区键。 +- [卡夫卡](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) 表引擎。 +- 增加了对加载的支持 [CatBoost](https://catboost.yandex/) 模型并将其应用到ClickHouse中存储的数据。 +- 增加了对UTC非整数偏移的时区的支持。 +- 增加了对具有时间间隔的算术运算的支持。 +- 日期和日期时间类型的值范围扩展到2105年。 +- 添加了 `CREATE MATERIALIZED VIEW x TO y` 查询(指定用于存储实例化视图数据的现有表)。 +- 添加了 `ATTACH TABLE` 不带参数的查询。 +- 将SummingMergeTree表中名称以-Map结尾的嵌套列的处理逻辑提取到sumMap聚合函数中。 现在,您可以显式指定此类列。 +- IP trie字典的最大大小增加到128M条目。 +- 添加了getSizeOfEnumType函数。 +- 添加了sumWithOverflow聚合函数。 +- 增加了对Cap'n Proto输入格式的支持。 +- 使用zstd算法时,您现在可以自定义压缩级别。 + +#### 向后不兼容的更改: {#backward-incompatible-changes} + +- 不允许使用内存以外的引擎创建临时表。 +- 不允许使用View或MaterializedView引擎显式创建表。 +- 在创建表期间,新检查将验证采样键表达式是否包含在主键中。 + +#### 错误修复: {#bug-fixes} + +- 修复了同步插入到分布式表中时的挂断问题。 +- 修复了复制表中部分的非原子添加和删除。 +- 插入到实例化视图中的数据不会遭受不必要的重复数据删除。 +- 对本地副本滞后且远程副本不可用的分布式表执行查询不会再导致错误。 +- 用户不需要访问权限 `default` 数据库创建临时表了。 +- 修复了在指定数组类型时不带参数的崩溃。 +- 修复了包含服务器日志的磁盘卷已满时的挂机问题。 +- 修复了unix时代的第一周toRelativeWeekNum函数的溢出。 + +#### 构建改进: {#build-improvements} + +- 几个第三方库(特别是Poco)被更新并转换为git子模块。 + +### ClickHouse释放1.1.54304,2017-10-19 {#clickhouse-release-1-1-54304-2017-10-19} + +#### 新功能: {#new-features-1} + +- 本机协议中的TLS支持(要启用,请设置 `tcp_ssl_port` 在 `config.xml` ). + +#### 错误修复: {#bug-fixes-1} + +- `ALTER` 对于复制的表现在尝试尽快开始运行。 +- 使用设置读取数据时修复崩溃 `preferred_block_size_bytes=0.` +- 固定的崩溃 `clickhouse-client` 按下时 `Page Down` +- 正确解释某些复杂的查询 `GLOBAL IN` 和 `UNION ALL` +- `FREEZE PARTITION` 现在总是以原子方式工作。 +- 空POST请求现在返回代码411的响应。 +- 修正了像表达式的解释错误 `CAST(1 AS Nullable(UInt8)).` +- 修正了读取时的错误 `Array(Nullable(String))` 从列 `MergeTree` 桌子 +- 修正了解析查询时崩溃,如 `SELECT dummy AS dummy, dummy AS b` +- 用户正确更新无效 `users.xml` +- 可执行字典返回非零响应代码时的正确处理。 + +### ClickHouse释放1.1.54292,2017-09-20 {#clickhouse-release-1-1-54292-2017-09-20} + +#### 新功能: {#new-features-2} + +- 添加了 `pointInPolygon` 用于处理坐标平面上的坐标的函数。 +- 添加了 `sumMap` 用于计算数组总和的聚合函数,类似于 `SummingMergeTree`. +- 添加了 `trunc` 功能。 改进舍入函数的性能 (`round`, `floor`, `ceil`, `roundToExp2`)并corrected正了他们如何工作的逻辑。 改变的逻辑 `roundToExp2` 分数和负数的功能。 +- ClickHouse可执行文件现在对libc版本的依赖性较低。 同样的ClickHouse可执行文件可以在各种各样的Linux系统上运行。 使用编译的查询(使用设置)时仍然存在依赖关系 `compile = 1` ,默认情况下不使用)。 +- 减少了动态编译查询所需的时间。 + +#### 错误修复: {#bug-fixes-2} + +- 修正了有时产生的错误 `part ... intersects previous part` 消息和副本的一致性减弱。 +- 修正了一个错误,导致服务器锁定,如果ZooKeeper在关闭过程中不可用。 +- 恢复副本时删除了过多的日志记录。 +- 修复了UNION ALL实现中的错误。 +- 修复了在块中的第一列具有数组类型时发生的concat函数中的错误。 +- 进度现在在系统中正确显示。合并表。 + +### ClickHouse释放1.1.54289,2017-09-13 {#clickhouse-release-1-1-54289-2017-09-13} + +#### 新功能: {#new-features-3} + +- `SYSTEM` 服务器管理查询: `SYSTEM RELOAD DICTIONARY`, `SYSTEM RELOAD DICTIONARIES`, `SYSTEM DROP DNS CACHE`, `SYSTEM SHUTDOWN`, `SYSTEM KILL`. +- 添加了用于处理数组的函数: `concat`, `arraySlice`, `arrayPushBack`, `arrayPushFront`, `arrayPopBack`, `arrayPopFront`. +- 已添加 `root` 和 `identity` ZooKeeper配置的参数。 这允许您隔离同一个ZooKeeper集群上的单个用户。 +- 添加聚合函数 `groupBitAnd`, `groupBitOr`,和 `groupBitXor` (为了兼容,它们也可以在名称下使用 `BIT_AND`, `BIT_OR`,和 `BIT_XOR`). +- 通过在文件系统中指定套接字,可以从MySQL加载外部字典。 +- 外部字典可以通过SSL从MySQL加载 (`ssl_cert`, `ssl_key`, `ssl_ca` 参数)。 +- 添加了 `max_network_bandwidth_for_user` 设置为限制每个用户查询的总带宽使用。 +- 支持 `DROP TABLE` 对于临时表。 +- 支持阅读 `DateTime` 从Unix时间戳格式的值 `CSV` 和 `JSONEachRow` 格式。 +- 分布式查询中的滞后副本现在默认排除(默认阈值为5分钟)。 +- 在ALTER期间使用FIFO锁定:对于连续运行的查询,ALTER查询不会无限期地阻止。 +- 选项设置 `umask` 在配置文件中。 +- 改进了查询的性能 `DISTINCT` . + +#### 错误修复: {#bug-fixes-3} + +- 改进了在ZooKeeper中删除旧节点的过程。 以前,如果插入非常频繁,旧节点有时不会被删除,这导致服务器关闭速度缓慢等等。 +- 修正了选择主机连接到ZooKeeper时的随机化。 +- 修复了在分布式查询中排除滞后副本,如果副本是localhost。 +- 修正了一个错误,其中在一个数据部分 `ReplicatedMergeTree` 运行后表可能会被打破 `ALTER MODIFY` 在一个元素 `Nested` 结构。 +- 修复了可能导致SELECT查询执行以下操作的错误 “hang”. +- 对分布式DDL查询的改进。 +- 修正了查询 `CREATE TABLE ... AS `. +- 解决了在僵局 `ALTER ... CLEAR COLUMN IN PARTITION` 查询为 `Buffer` 桌子 +- 修正了无效的默认值 `Enum` s(0,而不是最小)使用时 `JSONEachRow` 和 `TSKV` 格式。 +- 解决了使用字典时僵尸进程的外观 `executable` 资料来源。 +- 修正了HEAD查询的段错误。 + +#### 改进开发和组装ClickHouse的工作流程: {#improved-workflow-for-developing-and-assembling-clickhouse} + +- 您可以使用 `pbuilder` 建造克里克豪斯 +- 您可以使用 `libc++` 而不是 `libstdc++` 对于构建在Linux上。 +- 添加了使用静态代码分析工具的说明: `Coverage`, `clang-tidy`, `cppcheck`. + +#### 升级时请注意: {#please-note-when-upgrading} + +- MergeTree设置现在有一个更高的默认值 `max_bytes_to_merge_at_max_space_in_pool` (要合并的数据部分的最大总大小,以字节为单位):它已从100GiB增加到150GiB。 这可能会导致服务器升级后运行大型合并,这可能会导致磁盘子系统的负载增加。 如果服务器上的可用空间小于正在运行的合并总量的两倍,这将导致所有其他合并停止运行,包括小数据部分的合并。 因此,插入查询将失败,并显示消息 “Merges are processing significantly slower than inserts.” 使用 `SELECT * FROM system.merges` 查询监控情况。 您还可以检查 `DiskSpaceReservedForMerge` 度量在 `system.metrics` 表,或石墨。 你不需要做任何事情来解决这个问题,因为一旦大合并完成,问题就会自行解决。 如果您发现这是不可接受的,则可以恢复以前的值 `max_bytes_to_merge_at_max_space_in_pool` 设置。 要做到这一点,请转到 在配置部分。xml,设置 ``` ``107374182400 ``` 并重新启动服务器。 + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-1-1-54284-2017-08-29} + +- 这是一个错误修正版本,以前的1.1.54282版本。 它修复了ZooKeeper中部件目录中的泄漏。 + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-1-1-54282-2017-08-23} + +此版本包含以前版本1.1.54276的错误修复: + +- 固定 `DB::Exception: Assertion violation: !_path.empty()` 当插入到分布式表中。 +- 如果输入数据以";"开头,则以RowBinary格式插入时修复了解析。 +- Errors during runtime compilation of certain aggregate functions (e.g. `groupArray()`). + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-1-1-54276-2017-08-16} + +#### 新功能: {#new-features-4} + +- 为选择查询添加了一个可选的WITH部分。 查询示例: `WITH 1+1 AS a SELECT a, a*a` +- INSERT可以在分布式表中同步执行:仅在所有分片上保存所有数据后才返回OK。 这是由设置insert\_distributed\_sync=1激活的。 +- 添加了用于处理16字节标识符的UUID数据类型。 +- 添加了CHAR,FLOAT和其他类型的别名,以便与Tableau兼容。 +- 添加了toyyyymm,toYYYYMMDD和toyyyyymmddhhmmss将时间转换为数字的功能。 +- 您可以使用IP地址(与主机名一起使用)来标识群集DDL查询的服务器。 +- 增加了对函数中非常量参数和负偏移的支持 `substring(str, pos, len).` +- 添加了max\_size参数 `groupArray(max_size)(column)` 聚合函数,并优化了其性能。 + +#### 主要变化: {#main-changes} + +- 安全性改进:所有服务器文件都使用0640权限创建(可以通过更改 配置参数)。 +- 改进了语法无效的查询的错误消息。 +- 在合并mergetree大部分数据时,显着降低了内存消耗并提高了性能。 +- 显着提高了ReplacingMergeTree引擎的数据合并性能。 +- 通过组合多个源插入来改进来自分布式表的异步插入的性能。 要启用此功能,请使用设置distributed\_directory\_monitor\_batch\_inserts=1。 + +#### 向后不兼容的更改: {#backward-incompatible-changes-1} + +- 改变聚合状态的二进制格式 `groupArray(array_column)` 数组的函数。 + +#### 更改的完整列表: {#complete-list-of-changes} + +- 添加了 `output_format_json_quote_denormals` 设置,允许以JSON格式输出nan和inf值。 +- 从分布式表读取时优化流分配。 +- 如果值没有更改,可以在只读模式下配置设置。 +- 添加了检索MergeTree引擎的非整数颗粒的功能,以满足preferred\_block\_size\_bytes设置中指定的块大小的限制。 其目的是在处理来自具有大列的表的查询时减少RAM消耗并增加缓存局部性。 +- 高效使用包含如下表达式的索引 `toStartOfHour(x)` 对于像条件 `toStartOfHour(x) op сonstexpr.` +- 添加了MergeTree引擎的新设置(配置中的merge\_tree部分。xml): + - replicated\_deduplication\_window\_seconds设置复制表中重复数据删除插入所允许的秒数。 + - cleanup\_delay\_period设置启动清理以删除过时数据的频率。 + - replicated\_can\_become\_leader可以防止副本成为领导者(并分配合并)。 +- 加速清理,从ZooKeeper中删除过时的数据。 +- 针对群集DDL查询的多个改进和修复。 特别令人感兴趣的是新设置distributed\_ddl\_task\_timeout,它限制了等待群集中服务器响应的时间。 如果未在所有主机上执行ddl请求,则响应将包含超时错误,并且请求将以异步模式执行。 +- 改进了服务器日志中堆栈跟踪的显示。 +- 添加了 “none” 压缩方法的值。 +- 您可以在config中使用多个dictionaries\_config部分。xml +- 可以通过文件系统中的套接字连接到MySQL。 +- 系统。部件表有一个新的列,其中包含有关标记大小的信息,以字节为单位。 + +#### 错误修复: {#bug-fixes-4} + +- 使用合并表的分布式表现在可以正确地用于具有条件的SELECT查询 `_table` 场。 +- 修复了检查数据部分时ReplicatedMergeTree中罕见的争用条件。 +- 固定可能冻结 “leader election” 启动服务器时。 +- 使用数据源的本地副本时,将忽略max\_replica\_delay\_for\_distributed\_queries设置。 这已被修复。 +- 修正了不正确的行为 `ALTER TABLE CLEAR COLUMN IN PARTITION` 尝试清除不存在的列时。 +- 修复了multif函数中使用空数组或字符串时的异常。 +- 修正了反序列化本机格式时过多的内存分配。 +- 修正了Trie字典的不正确的自动更新。 +- 修复了使用SAMPLE从合并表中使用GROUP BY子句运行查询时的异常。 +- 修复了使用distributed\_aggregation\_memory\_efficient=1时组的崩溃。 +- 现在,您可以指定数据库。表在右侧的IN和JOIN。 +- 用于并行聚合的线程太多。 这已被修复。 +- 固定如何 “if” 函数与FixedString参数一起使用。 +- 为权重为0的分片从分布式表中选择工作不正确。 这已被修复。 +- 运行 `CREATE VIEW IF EXISTS no longer causes crashes.` +- 修正了input\_format\_skip\_unknown\_fields=1设置并且有负数时的不正确行为。 +- 修正了一个无限循环 `dictGetHierarchy()` 如果字典中有一些无效的数据,则函数。 +- 固定 `Syntax error: unexpected (...)` 在IN或JOIN子句和合并表中使用子查询运行分布式查询时出错。 +- 修复了从字典表中选择查询的不正确解释。 +- 修正了 “Cannot mremap” 在IN和JOIN子句中使用包含超过20亿个元素的数组时出错。 +- 修复了以MySQL为源的字典的故障转移。 + +#### 改进开发和组装ClickHouse的工作流程: {#improved-workflow-for-developing-and-assembling-clickhouse-1} + +- 构建可以在阿卡迪亚组装。 +- 您可以使用gcc7来编译ClickHouse。 +- 现在使用ccache+distcc的并行构建速度更快。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54245-2017-07-04} + +#### 新功能: {#new-features-5} + +- 分布式的DDL(例如, `CREATE TABLE ON CLUSTER`) +- 复制的查询 `ALTER TABLE CLEAR COLUMN IN PARTITION.` +- 字典表的引擎(以表格形式访问字典数据)。 +- 字典数据库引擎(这种类型的数据库会自动为所有连接的外部字典提供字典表)。 +- 您可以通过向源发送请求来检查字典的更新。 +- 限定列名称 +- 使用双引号引用标识符。 +- Http接口中的会话。 +- 复制表的优化查询不仅可以在leader上运行。 + +#### 向后不兼容的更改: {#backward-incompatible-changes-2} + +- 删除设置全局。 + +#### 小的变化: {#minor-changes} + +- 现在,在触发警报之后,日志将打印完整的堆栈跟踪。 +- 在启动时放宽对损坏/额外数据部件数量的验证(有太多误报)。 + +#### 错误修复: {#bug-fixes-5} + +- 修复了连接错误 “sticking” 当插入到分布式表中。 +- GLOBAL IN现在适用于查看分布式表的合并表中的查询。 +- 在Google Compute Engine虚拟机上检测到不正确的内核数。 这已被修复。 +- 缓存外部字典的可执行源如何工作的更改。 +- 修复了包含空字符的字符串的比较。 +- 修正了Float32主键字段与常量的比较。 +- 以前,对字段大小的不正确估计可能导致分配过大。 +- 修复了使用ALTER查询添加到表中的可空列时的崩溃。 +- 修复了按可空列排序时的崩溃,如果行数小于限制。 +- 修复了仅由常量值组成的子查询的顺序。 +- 以前,复制的表在丢弃表失败后可能仍处于无效状态。 +- 具有空结果的标量子查询的别名不再丢失。 +- 现在如果.so文件被损坏,使用编译的查询不会失败并出现错误。 diff --git a/docs/zh/whats_new/changelog/2018.md b/docs/zh/whats_new/changelog/2018.md new file mode 100644 index 00000000000..b62d8372d1a --- /dev/null +++ b/docs/zh/whats_new/changelog/2018.md @@ -0,0 +1,1063 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 78 +toc_title: '2018' +--- + +## ClickHouse释放18.16 {#clickhouse-release-18-16} + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-18-16-1-2018-12-21} + +#### 错误修复: {#bug-fixes} + +- 修复了导致使用ODBC源更新字典时出现问题的错误。 [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) +- 聚集函数的JIT编译现在适用于低心率列。 [\#3838](https://github.com/ClickHouse/ClickHouse/issues/3838) + +#### 改进: {#improvements} + +- 添加了 `low_cardinality_allow_in_native_format` 设置(默认情况下启用)。 如果禁用,则选择查询的LowCardinality列将转换为普通列,插入查询将需要普通列。 [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) + +#### 构建改进: {#build-improvements} + +- 修复了基于macOS和ARM的构建。 + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-18-16-0-2018-12-14} + +#### 新功能: {#new-features} + +- `DEFAULT` 在以半结构化输入格式加载数据时,会计算表达式是否缺少字段 (`JSONEachRow`, `TSKV`). 该功能与启用 `insert_sample_with_metadata` 设置。 [\#3555](https://github.com/ClickHouse/ClickHouse/pull/3555) +- 该 `ALTER TABLE` 查询现在有 `MODIFY ORDER BY` 用于在添加或删除表列时更改排序键的操作。 这是在表有用 `MergeTree` 基于此排序键合并时执行其他任务的系列,例如 `SummingMergeTree`, `AggregatingMergeTree`,等等。 [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) [\#3755](https://github.com/ClickHouse/ClickHouse/pull/3755) +- 对于在表 `MergeTree` 家庭,现在你可以指定一个不同的排序键 (`ORDER BY`)和索引 (`PRIMARY KEY`). 排序键可以长于索引。 [\#3581](https://github.com/ClickHouse/ClickHouse/pull/3581) +- 添加了 `hdfs` 表功能和 `HDFS` 用于将数据导入和导出到HDFS的表引擎。 [晨兴-xc](https://github.com/ClickHouse/ClickHouse/pull/3617) +- 增加了使用base64的功能: `base64Encode`, `base64Decode`, `tryBase64Decode`. [Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3350) +- 现在,您可以使用一个参数来配置的精度 `uniqCombined` 聚合函数(选择HyperLogLog单元格的数量)。 [\#3406](https://github.com/ClickHouse/ClickHouse/pull/3406) +- 添加了 `system.contributors` 包含在ClickHouse中进行提交的所有人的名称的表。 [\#3452](https://github.com/ClickHouse/ClickHouse/pull/3452) +- 增加了省略分区的能力 `ALTER TABLE ... FREEZE` 查询以便一次备份所有分区。 [\#3514](https://github.com/ClickHouse/ClickHouse/pull/3514) +- 已添加 `dictGet` 和 `dictGetOrDefault` 不需要指定返回值类型的函数。 该类型是从字典描述自动确定的。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3564) +- 现在,您可以在表描述中为列指定注释,并使用以下方式对其进行更改 `ALTER`. [\#3377](https://github.com/ClickHouse/ClickHouse/pull/3377) +- 阅读支持 `Join` 使用简单键键入表格。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3728) +- 现在,您可以指定选项 `join_use_nulls`, `max_rows_in_join`, `max_bytes_in_join`,和 `join_overflow_mode` 当创建一个 `Join` 键入表。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3728) +- 添加了 `joinGet` 功能,允许您使用 `Join` 像字典一样键入表格。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3728) +- 添加了 `partition_key`, `sorting_key`, `primary_key`,和 `sampling_key` 列到 `system.tables` 表以便提供关于表键的信息。 [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) +- 添加了 `is_in_partition_key`, `is_in_sorting_key`, `is_in_primary_key`,和 `is_in_sampling_key` 列到 `system.columns` 桌子 [\#3609](https://github.com/ClickHouse/ClickHouse/pull/3609) +- 添加了 `min_time` 和 `max_time` 列到 `system.parts` 桌子 当分区键是由以下表达式组成的表达式时,将填充这些列 `DateTime` 列。 [Emmanuel Donin de Rosière](https://github.com/ClickHouse/ClickHouse/pull/3800) + +#### 错误修复: {#bug-fixes-1} + +- 修复和性能改进 `LowCardinality` 数据类型。 `GROUP BY` 使用 `LowCardinality(Nullable(...))`. 获取的值 `extremes`. 处理高阶函数。 `LEFT ARRAY JOIN`. 分布 `GROUP BY`. 返回的函数 `Array`. 执行 `ORDER BY`. 写入 `Distributed` 表(nicelulu)。 向后兼容 `INSERT` 从实现旧客户端的查询 `Native` 协议 支持 `LowCardinality` 为 `JOIN`. 在单个流中工作时提高性能。 [\#3823](https://github.com/ClickHouse/ClickHouse/pull/3823) [\#3803](https://github.com/ClickHouse/ClickHouse/pull/3803) [\#3799](https://github.com/ClickHouse/ClickHouse/pull/3799) [\#3769](https://github.com/ClickHouse/ClickHouse/pull/3769) [\#3744](https://github.com/ClickHouse/ClickHouse/pull/3744) [\#3681](https://github.com/ClickHouse/ClickHouse/pull/3681) [\#3651](https://github.com/ClickHouse/ClickHouse/pull/3651) [\#3649](https://github.com/ClickHouse/ClickHouse/pull/3649) [\#3641](https://github.com/ClickHouse/ClickHouse/pull/3641) [\#3632](https://github.com/ClickHouse/ClickHouse/pull/3632) [\#3568](https://github.com/ClickHouse/ClickHouse/pull/3568) [\#3523](https://github.com/ClickHouse/ClickHouse/pull/3523) [\#3518](https://github.com/ClickHouse/ClickHouse/pull/3518) +- 固定如何 `select_sequential_consistency` 选项工作。 以前,启用此设置时,在开始写入新分区后,有时会返回不完整的结果。 [\#2863](https://github.com/ClickHouse/ClickHouse/pull/2863) +- 执行DDL时正确指定数据库 `ON CLUSTER` 查询和 `ALTER UPDATE/DELETE`. [\#3772](https://github.com/ClickHouse/ClickHouse/pull/3772) [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) +- 为视图中的子查询正确指定了数据库。 [\#3521](https://github.com/ClickHouse/ClickHouse/pull/3521) +- 修正了一个错误 `PREWHERE` 与 `FINAL` 为 `VersionedCollapsingMergeTree`. [7167bfd7](https://github.com/ClickHouse/ClickHouse/commit/7167bfd7b365538f7a91c4307ad77e552ab4e8c1) +- 现在你可以使用 `KILL QUERY` 取消尚未启动的查询,因为它们正在等待锁定表。 [\#3517](https://github.com/ClickHouse/ClickHouse/pull/3517) +- 更正日期和时间计算,如果时钟被移回午夜(这发生在伊朗,并发生在莫斯科1981年至1983年)。 以前,这导致时间比必要的时间早一天重置,并且还导致文本格式的日期和时间格式不正确。 [\#3819](https://github.com/ClickHouse/ClickHouse/pull/3819) +- 修正了某些情况下的错误 `VIEW` 和省略数据库的子查询。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/3521) +- 修正了一个争用条件时,同时从读取 `MATERIALIZED VIEW` 和删除 `MATERIALIZED VIEW` 由于不锁定内部 `MATERIALIZED VIEW`. [\#3404](https://github.com/ClickHouse/ClickHouse/pull/3404) [\#3694](https://github.com/ClickHouse/ClickHouse/pull/3694) +- 修正了错误 `Lock handler cannot be nullptr.` [\#3689](https://github.com/ClickHouse/ClickHouse/pull/3689) +- 固定查询处理时 `compile_expressions` 选项已启用(默认情况下启用)。 非确定性常量表达式,如 `now` 功能不再展开。 [\#3457](https://github.com/ClickHouse/ClickHouse/pull/3457) +- 修复了在指定非常量比例参数时发生的崩溃 `toDecimal32/64/128` 功能。 +- 修复了尝试插入数组时的错误 `NULL` 中的元素 `Values` 格式化为类型的列 `Array` 没有 `Nullable` (如果 `input_format_values_interpret_expressions` = 1). [\#3487](https://github.com/ClickHouse/ClickHouse/pull/3487) [\#3503](https://github.com/ClickHouse/ClickHouse/pull/3503) +- 固定连续错误登录 `DDLWorker` 如果动物园管理员不可用。 [8f50c620](https://github.com/ClickHouse/ClickHouse/commit/8f50c620334988b28018213ec0092fe6423847e2) +- 修正了返回类型 `quantile*` 从功能 `Date` 和 `DateTime` 参数的类型。 [\#3580](https://github.com/ClickHouse/ClickHouse/pull/3580) +- 修正了 `WITH` 子句,如果它指定了一个没有表达式的简单别名。 [\#3570](https://github.com/ClickHouse/ClickHouse/pull/3570) +- 固定处理具有命名子查询和限定列名的查询时 `enable_optimize_predicate_expression` 被启用。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/3588) +- 修正了错误 `Attempt to attach to nullptr thread group` 使用实例化视图时。 [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3623) +- 修正了传递某些不正确的参数时崩溃 `arrayReverse` 功能。 [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) +- 修正了缓冲区溢出 `extractURLParameter` 功能。 改进的性能。 添加了包含零字节的字符串的正确处理。 [141e9799](https://github.com/ClickHouse/ClickHouse/commit/141e9799e49201d84ea8e951d1bed4fb6d3dacb5) +- 在固定缓冲区溢出 `lowerUTF8` 和 `upperUTF8` 功能。 删除了执行这些功能的能力 `FixedString` 类型参数。 [\#3662](https://github.com/ClickHouse/ClickHouse/pull/3662) +- 修复了删除时罕见的竞争条件 `MergeTree` 桌子 [\#3680](https://github.com/ClickHouse/ClickHouse/pull/3680) +- 修正了从读取时的争用条件 `Buffer` 表和同时执行 `ALTER` 或 `DROP` 在目标桌上。 [\#3719](https://github.com/ClickHouse/ClickHouse/pull/3719) +- 修正了一个段错误,如果 `max_temporary_non_const_columns` 超过限制。 [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) + +#### 改进: {#improvements-1} + +- 服务器不会将处理后的配置文件写入 `/etc/clickhouse-server/` 目录。 相反,它将它们保存在 `preprocessed_configs` 里面的目录 `path`. 这意味着 `/etc/clickhouse-server/` 目录没有写访问权限 `clickhouse` 用户,从而提高了安全性。 [\#2443](https://github.com/ClickHouse/ClickHouse/pull/2443) +- 该 `min_merge_bytes_to_use_direct_io` 默认情况下,选项设置为10GiB。 将在MergeTree系列中执行形成大部分表的合并 `O_DIRECT` 模式,这可以防止过多的页高速缓存逐出。 [\#3504](https://github.com/ClickHouse/ClickHouse/pull/3504) +- 当表数量非常多时,加速服务器启动。 [\#3398](https://github.com/ClickHouse/ClickHouse/pull/3398) +- 添加了连接池和HTTP `Keep-Alive` 用于副本之间的连接。 [\#3594](https://github.com/ClickHouse/ClickHouse/pull/3594) +- 如果查询语法无效,则 `400 Bad Request` 代码在返回 `HTTP` 接口(500以前返回)。 [31bc680a](https://github.com/ClickHouse/ClickHouse/commit/31bc680ac5f4bb1d0360a8ba4696fa84bb47d6ab) +- 该 `join_default_strictness` 选项设置为 `ALL` 默认情况下为兼容性。 [120e2cbe](https://github.com/ClickHouse/ClickHouse/commit/120e2cbe2ff4fbad626c28042d9b28781c805afe) +- 删除日志记录 `stderr` 从 `re2` 无效或复杂正则表达式的库。 [\#3723](https://github.com/ClickHouse/ClickHouse/pull/3723) +- 添加的 `Kafka` 表引擎:在开始从Kafka读取之前检查订阅;表的kafka\_max\_block\_size设置。 [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3396) +- 该 `cityHash64`, `farmHash64`, `metroHash64`, `sipHash64`, `halfMD5`, `murmurHash2_32`, `murmurHash2_64`, `murmurHash3_32`,和 `murmurHash3_64` 函数现在适用于任意数量的参数和元组形式的参数。 [\#3451](https://github.com/ClickHouse/ClickHouse/pull/3451) [\#3519](https://github.com/ClickHouse/ClickHouse/pull/3519) +- 该 `arrayReverse` 函数现在适用于任何类型的数组。 [73e3a7b6](https://github.com/ClickHouse/ClickHouse/commit/73e3a7b662161d6005e7727d8a711b930386b871) +- 增加了一个可选参数:插槽大小的 `timeSlots` 功能。 [基里尔\*什瓦科夫](https://github.com/ClickHouse/ClickHouse/pull/3724) +- 为 `FULL` 和 `RIGHT JOIN`,该 `max_block_size` 设置用于右表中未连接的数据流。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3699) +- 添加了 `--secure` 命令行参数 `clickhouse-benchmark` 和 `clickhouse-performance-test` 启用TLS。 [\#3688](https://github.com/ClickHouse/ClickHouse/pull/3688) [\#3690](https://github.com/ClickHouse/ClickHouse/pull/3690) +- 类型转换时的结构 `Buffer` 表的类型与目标表的结构不匹配。 [维塔利\*巴拉诺夫](https://github.com/ClickHouse/ClickHouse/pull/3603) +- 添加了 `tcp_keep_alive_timeout` 在指定的时间间隔内不活动后启用保持活动数据包的选项。 [\#3441](https://github.com/ClickHouse/ClickHouse/pull/3441) +- 删除不必要的引用值的分区键中 `system.parts` 表,如果它由单列组成。 [\#3652](https://github.com/ClickHouse/ClickHouse/pull/3652) +- 模函数适用于 `Date` 和 `DateTime` 数据类型。 [\#3385](https://github.com/ClickHouse/ClickHouse/pull/3385) +- 添加同义词的 `POWER`, `LN`, `LCASE`, `UCASE`, `REPLACE`, `LOCATE`, `SUBSTR`,和 `MID` 功能。 [\#3774](https://github.com/ClickHouse/ClickHouse/pull/3774) [\#3763](https://github.com/ClickHouse/ClickHouse/pull/3763) 为了与SQL标准兼容,某些函数名称不区分大小写。 添加语法糖 `SUBSTRING(expr FROM start FOR length)` 对于与SQL的兼容性。 [\#3804](https://github.com/ClickHouse/ClickHouse/pull/3804) +- 增加了以下能力 `mlock` 对应于存储器页 `clickhouse-server` 可执行代码,以防止它被强制出内存。 默认情况下禁用此功能。 [\#3553](https://github.com/ClickHouse/ClickHouse/pull/3553) +- 从读取时改进的性能 `O_DIRECT` (与 `min_bytes_to_use_direct_io` 选项启用)。 [\#3405](https://github.com/ClickHouse/ClickHouse/pull/3405) +- 的改进的性能 `dictGet...OrDefault` 常量键参数和非常量默认参数的函数。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3563) +- 该 `firstSignificantSubdomain` 功能现在处理域 `gov`, `mil`,和 `edu`. [Igor Hatarist](https://github.com/ClickHouse/ClickHouse/pull/3601) 改进的性能。 [\#3628](https://github.com/ClickHouse/ClickHouse/pull/3628) +- 能够指定用于启动的自定义环境变量 `clickhouse-server` 使用 `SYS-V init.d` 通过定义脚本 `CLICKHOUSE_PROGRAM_ENV` 在 `/etc/default/clickhouse`. + [Pavlo Bashynskyi](https://github.com/ClickHouse/ClickHouse/pull/3612) +- Clickhouse-server init脚本的正确返回代码。 [\#3516](https://github.com/ClickHouse/ClickHouse/pull/3516) +- 该 `system.metrics` 表现在有 `VersionInteger` 公制和 `system.build_options` 有添加的行 `VERSION_INTEGER`,其中包含ClickHouse版本的数字形式,例如 `18016000`. [\#3644](https://github.com/ClickHouse/ClickHouse/pull/3644) +- 删除比较的能力 `Date` 输入一个数字,以避免潜在的错误,如 `date = 2018-12-17`,其中日期周围的引号被错误省略。 [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) +- 修正了有状态函数的行为,如 `rowNumberInAllBlocks`. 他们之前输出的结果是由于在查询分析期间启动而大一个数字。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3729) +- 如果 `force_restore_data` 文件无法删除,将显示错误消息。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3794) + +#### 构建改进: {#build-improvements-1} + +- 更新了 `jemalloc` 库,它修复了潜在的内存泄漏。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3557) +- 分析与 `jemalloc` 默认情况下为了调试生成启用。 [2cc82f5c](https://github.com/ClickHouse/ClickHouse/commit/2cc82f5cbe266421cd4c1165286c2c47e5ffcb15) +- 增加了运行集成测试的能力,当只 `Docker` 安装在系统上。 [\#3650](https://github.com/ClickHouse/ClickHouse/pull/3650) +- 在SELECT查询中添加了模糊表达式测试。 [\#3442](https://github.com/ClickHouse/ClickHouse/pull/3442) +- 为提交添加了一个压力测试,它以并行和随机顺序执行功能测试,以检测更多的竞争条件。 [\#3438](https://github.com/ClickHouse/ClickHouse/pull/3438) +- 改进了在Docker映像中启动clickhouse-server的方法。 [Elghazal Ahmed](https://github.com/ClickHouse/ClickHouse/pull/3663) +- 对于Docker映像,增加了对使用数据库中的文件初始化数据库的支持 `/docker-entrypoint-initdb.d` 目录。 [康斯坦丁\*列别杰夫](https://github.com/ClickHouse/ClickHouse/pull/3695) +- 修复了基于ARM的构建。 [\#3709](https://github.com/ClickHouse/ClickHouse/pull/3709) + +#### 向后不兼容的更改: {#backward-incompatible-changes} + +- 删除比较的能力 `Date` 用数字键入。 而不是 `toDate('2018-12-18') = 17883`,必须使用显式类型转换 `= toDate(17883)` [\#3687](https://github.com/ClickHouse/ClickHouse/pull/3687) + +## ClickHouse释放18.14 {#clickhouse-release-18-14} + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-18-14-19-2018-12-19} + +#### 错误修复: {#bug-fixes-2} + +- 修复了导致使用ODBC源更新字典时出现问题的错误。 [\#3825](https://github.com/ClickHouse/ClickHouse/issues/3825), [\#3829](https://github.com/ClickHouse/ClickHouse/issues/3829) +- 执行DDL时正确指定数据库 `ON CLUSTER` 查询。 [\#3460](https://github.com/ClickHouse/ClickHouse/pull/3460) +- 修正了一个段错误,如果 `max_temporary_non_const_columns` 超过限制。 [\#3788](https://github.com/ClickHouse/ClickHouse/pull/3788) + +#### 构建改进: {#build-improvements-2} + +- 修复了基于ARM的构建。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-14-18-2018-12-04} + +#### 错误修复: {#bug-fixes-3} + +- 修正错误 `dictGet...` 类型字典的函数 `range`,如果其中一个参数是恒定的,而另一个则不是。 [\#3751](https://github.com/ClickHouse/ClickHouse/pull/3751) +- 修复了导致消息的错误 `netlink: '...': attribute type 1 has an invalid length` 要打印在Linux内核日志中,这只发生在足够新鲜的Linux内核版本上。 [\#3749](https://github.com/ClickHouse/ClickHouse/pull/3749) +- 在功能固定段错误 `empty` 对于争论 `FixedString` 类型。 [丹尼尔,道广明](https://github.com/ClickHouse/ClickHouse/pull/3703) +- 修正了使用大值时过多的内存分配 `max_query_size` 设置(内存块 `max_query_size` 字节被预先分配一次)。 [\#3720](https://github.com/ClickHouse/ClickHouse/pull/3720) + +#### 构建更改: {#build-changes} + +- 使用操作系统包中的版本7的LLVM/Clang库修复构建(这些库用于运行时查询编译)。 [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-14-17-2018-11-30} + +#### 错误修复: {#bug-fixes-4} + +- 修复了ODBC桥进程未与主服务器进程终止的情况。 [\#3642](https://github.com/ClickHouse/ClickHouse/pull/3642) +- 固定同步插入 `Distributed` 具有不同于远程表的列列表的列列表的表。 [\#3673](https://github.com/ClickHouse/ClickHouse/pull/3673) +- 修复了丢弃MergeTree表时可能导致崩溃的罕见竞争条件。 [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) +- 修复了查询线程创建失败时的查询死锁 `Resource temporarily unavailable` 错误 [\#3643](https://github.com/ClickHouse/ClickHouse/pull/3643) +- 修正了解析 `ENGINE` 条款时 `CREATE AS table` 语法被使用和 `ENGINE` 子句之前指定 `AS table` (错误导致忽略指定的引擎)。 [\#3692](https://github.com/ClickHouse/ClickHouse/pull/3692) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-14-15-2018-11-21} + +#### 错误修复: {#bug-fixes-5} + +- 反序列化类型的列时,高估了内存块的大小 `Array(String)` 这导致 “Memory limit exceeded” 错误。 该问题出现在版本18.12.13中。 [\#3589](https://github.com/ClickHouse/ClickHouse/issues/3589) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-14-14-2018-11-20} + +#### 错误修复: {#bug-fixes-6} + +- 固定 `ON CLUSTER` 当群集配置为安全时进行查询(标志 ``). [\#3599](https://github.com/ClickHouse/ClickHouse/pull/3599) + +#### 构建更改: {#build-changes-1} + +- 固定的问题(llvm-7从系统,macos) [\#3582](https://github.com/ClickHouse/ClickHouse/pull/3582) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-14-13-2018-11-08} + +#### 错误修复: {#bug-fixes-7} + +- 修正了 `Block structure mismatch in MergingSorted stream` 错误 [\#3162](https://github.com/ClickHouse/ClickHouse/issues/3162) +- 固定 `ON CLUSTER` 查询的情况下,当安全连接被打开的群集配置( `` 标志)。 [\#3465](https://github.com/ClickHouse/ClickHouse/pull/3465) +- 修复了查询中使用的错误 `SAMPLE`, `PREWHERE` 和别名列。 [\#3543](https://github.com/ClickHouse/ClickHouse/pull/3543) +- 修正了一个罕见的 `unknown compression method` 错误时 `min_bytes_to_use_direct_io` 设置已启用。 [3544](https://github.com/ClickHouse/ClickHouse/pull/3544) + +#### 性能改进: {#performance-improvements} + +- 查询的固定性能回归 `GROUP BY` 在AMD EPYC处理器上执行时,uint16或Date类型的列。 [Igor Lapko](https://github.com/ClickHouse/ClickHouse/pull/3512) +- 修正了处理长字符串的查询的性能回归。 [\#3530](https://github.com/ClickHouse/ClickHouse/pull/3530) + +#### 构建改进: {#build-improvements-3} + +- 简化阿卡迪亚构建的改进。 [\#3475](https://github.com/ClickHouse/ClickHouse/pull/3475), [\#3535](https://github.com/ClickHouse/ClickHouse/pull/3535) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-14-12-2018-11-02} + +#### 错误修复: {#bug-fixes-8} + +- 修复了加入两个未命名的子查询时的崩溃。 [\#3505](https://github.com/ClickHouse/ClickHouse/pull/3505) +- 修正了生成不正确的查询(用空 `WHERE` 子句)查询外部数据库时。 [hotid](https://github.com/ClickHouse/ClickHouse/pull/3477) +- 修正了在ODBC字典中使用不正确的超时值。 [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3511) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-14-11-2018-10-29} + +#### 错误修复: {#bug-fixes-9} + +- 修正了错误 `Block structure mismatch in UNION stream: different number of columns` 在限制查询。 [\#2156](https://github.com/ClickHouse/ClickHouse/issues/2156) +- 修复了在嵌套结构中包含数组的表中合并数据时出现的错误。 [\#3397](https://github.com/ClickHouse/ClickHouse/pull/3397) +- 修正了不正确的查询结果,如果 `merge_tree_uniform_read_distribution` 设置被禁用(默认情况下启用)。 [\#3429](https://github.com/ClickHouse/ClickHouse/pull/3429) +- 修复了在本机格式的分布式表中插入错误。 [\#3411](https://github.com/ClickHouse/ClickHouse/issues/3411) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-14-10-2018-10-23} + +- 该 `compile_expressions` 默认情况下禁用设置(表达式的JIT编译)。 [\#3410](https://github.com/ClickHouse/ClickHouse/pull/3410) +- 该 `enable_optimize_predicate_expression` 默认情况下禁用设置。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-14-9-2018-10-16} + +#### 新功能: {#new-features-1} + +- 该 `WITH CUBE` 修饰符 `GROUP BY` (替代语法 `GROUP BY CUBE(...)` 也可用)。 [\#3172](https://github.com/ClickHouse/ClickHouse/pull/3172) +- 添加了 `formatDateTime` 功能。 [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2770) +- 添加了 `JDBC` 表引擎和 `jdbc` 表功能(需要安装clickhouse-jdbc桥)。 [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) +- 增加了使用ISO周编号的功能: `toISOWeek`, `toISOYear`, `toStartOfISOYear`,和 `toDayOfYear`. [\#3146](https://github.com/ClickHouse/ClickHouse/pull/3146) +- 现在你可以使用 `Nullable` 列 `MySQL` 和 `ODBC` 桌子 [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) +- 嵌套的数据结构可以被读取为嵌套的对象 `JSONEachRow` 格式。 添加了 `input_format_import_nested_json` 设置。 [维罗曼\*云坎](https://github.com/ClickHouse/ClickHouse/pull/3144) +- 并行处理可用于许多 `MATERIALIZED VIEW`s插入数据时。 见 `parallel_view_processing` 设置。 [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3208) +- 添加了 `SYSTEM FLUSH LOGS` 查询(强制日志刷新到系统表,如 `query_log`) [\#3321](https://github.com/ClickHouse/ClickHouse/pull/3321) +- 现在,您可以使用预定义 `database` 和 `table` 声明时的宏 `Replicated` 桌子 [\#3251](https://github.com/ClickHouse/ClickHouse/pull/3251) +- 增加了阅读的能力 `Decimal` 工程表示法中的类型值(表示十的幂)。 [\#3153](https://github.com/ClickHouse/ClickHouse/pull/3153) + +#### 实验特点: {#experimental-features} + +- 对GROUP BY子句进行优化 `LowCardinality data types.` [\#3138](https://github.com/ClickHouse/ClickHouse/pull/3138) +- 表达式的优化计算 `LowCardinality data types.` [\#3200](https://github.com/ClickHouse/ClickHouse/pull/3200) + +#### 改进: {#improvements-2} + +- 显着减少查询的内存消耗 `ORDER BY` 和 `LIMIT`. 见 `max_bytes_before_remerge_sort` 设置。 [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) +- 在没有 `JOIN` (`LEFT`, `INNER`, …), `INNER JOIN` 是假定的。 [\#3147](https://github.com/ClickHouse/ClickHouse/pull/3147) +- 限定星号在以下查询中正常工作 `JOIN`. [张冬](https://github.com/ClickHouse/ClickHouse/pull/3202) +- 该 `ODBC` 表引擎正确地选择用于引用远程数据库的SQL方言中的标识符的方法。 [Alexandr Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/3210) +- 该 `compile_expressions` 默认情况下启用设置(表达式的JIT编译)。 +- 修复了同时删除数据库/表(如果存在)和创建数据库/表(如果不存在)的行为。 前情提要 `CREATE DATABASE ... IF NOT EXISTS` 查询可能会返回错误消息 “File … already exists” 和 `CREATE TABLE ... IF NOT EXISTS` 和 `DROP TABLE IF EXISTS` 查询可能会返回 `Table ... is creating or attaching right now`. [\#3101](https://github.com/ClickHouse/ClickHouse/pull/3101) +- 当从MySQL或ODBC表中查询时,LIKE和IN表达式具有常量右半部分被传递到远程服务器。 [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) +- 当从MySQL和ODBC表查询时,与WHERE子句中常量表达式的比较会传递给远程服务器。 以前,只通过与常量的比较。 [\#3182](https://github.com/ClickHouse/ClickHouse/pull/3182) +- 正确计算终端中的行宽 `Pretty` 格式,包括带有象形文字的字符串。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/3257). +- `ON CLUSTER` 可以指定 `ALTER UPDATE` 查询。 +- 提高了读取数据的性能 `JSONEachRow` 格式。 [\#3332](https://github.com/ClickHouse/ClickHouse/pull/3332) +- 添加同义词的 `LENGTH` 和 `CHARACTER_LENGTH` 功能的兼容性。 该 `CONCAT` 函数不再区分大小写。 [\#3306](https://github.com/ClickHouse/ClickHouse/pull/3306) +- 添加了 `TIMESTAMP` 的同义词 `DateTime` 类型。 [\#3390](https://github.com/ClickHouse/ClickHouse/pull/3390) +- 服务器日志中始终为query\_id保留空间,即使日志行与查询无关。 这使得使用第三方工具更容易分析服务器文本日志。 +- 当查询超过整数千兆字节的下一级别时,会记录查询的内存消耗。 [\#3205](https://github.com/ClickHouse/ClickHouse/pull/3205) +- 为使用本机协议的客户端库错误发送的列少于服务器预期的插入查询时的情况添加了兼容模式。 使用clickhouse-cpp库时,这种情况是可能的。 以前,此方案会导致服务器崩溃。 [\#3171](https://github.com/ClickHouse/ClickHouse/pull/3171) +- 在用户定义的WHERE表达式中 `clickhouse-copier`,您现在可以使用 `partition_key` 别名(用于按源表分区进行其他过滤)。 如果分区方案在复制过程中发生更改,但仅稍有更改,这很有用。 [\#3166](https://github.com/ClickHouse/ClickHouse/pull/3166) +- 的工作流程 `Kafka` 引擎已被移动到后台线程池中,以便在高负载下自动降低数据读取速度。 [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). +- 支持阅读 `Tuple` 和 `Nested` 结构的值,如 `struct` 在 `Cap'n'Proto format`. [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3216) +- 顶级域名列表 `firstSignificantSubdomain` 功能现在包括域 `biz`. [decaseal](https://github.com/ClickHouse/ClickHouse/pull/3219) +- 在外部字典的配置, `null_value` 被解释为默认数据类型的值。 [\#3330](https://github.com/ClickHouse/ClickHouse/pull/3330) +- 支持 `intDiv` 和 `intDivOrZero` 功能 `Decimal`. [b48402e8](https://github.com/ClickHouse/ClickHouse/commit/b48402e8712e2b9b151e0eef8193811d433a1264) +- 支持 `Date`, `DateTime`, `UUID`,和 `Decimal` 类型作为键 `sumMap` 聚合函数。 [\#3281](https://github.com/ClickHouse/ClickHouse/pull/3281) +- 支持 `Decimal` 外部字典中的数据类型。 [\#3324](https://github.com/ClickHouse/ClickHouse/pull/3324) +- 支持 `Decimal` 数据类型in `SummingMergeTree` 桌子 [\#3348](https://github.com/ClickHouse/ClickHouse/pull/3348) +- 增加了专业化 `UUID` 在 `if`. [\#3366](https://github.com/ClickHouse/ClickHouse/pull/3366) +- 减少的数量 `open` 和 `close` 从读取时系统调用 `MergeTree table`. [\#3283](https://github.com/ClickHouse/ClickHouse/pull/3283) +- A `TRUNCATE TABLE` 查询可以在任何副本上执行(将查询传递给领导副本)。 [基里尔\*什瓦科夫](https://github.com/ClickHouse/ClickHouse/pull/3375) + +#### 错误修复: {#bug-fixes-10} + +- 修正了一个问题 `Dictionary` 表 `range_hashed` 字典 此错误发生在版本18.12.17中。 [\#1702](https://github.com/ClickHouse/ClickHouse/pull/1702) +- 修正了加载时的错误 `range_hashed` 字典(消息 `Unsupported type Nullable (...)`). 此错误发生在版本18.12.17中。 [\#3362](https://github.com/ClickHouse/ClickHouse/pull/3362) +- 在固定的错误 `pointInPolygon` 函数由于不准确的计算的多边形与大量的顶点位于彼此靠近的积累。 [\#3331](https://github.com/ClickHouse/ClickHouse/pull/3331) [\#3341](https://github.com/ClickHouse/ClickHouse/pull/3341) +- 如果在合并数据部分之后,结果部分的校验和与另一个副本中相同合并的结果不同,则删除合并的结果并从另一个副本下载数据部分(这是正确的行为)。 但是在下载数据部分之后,由于该部分已经存在的错误(因为合并后数据部分被删除了一些延迟),因此无法将其添加到工作集中。 这导致周期性尝试下载相同的数据。 [\#3194](https://github.com/ClickHouse/ClickHouse/pull/3194) +- 修正了查询总内存消耗的不正确计算(由于计算不正确, `max_memory_usage_for_all_queries` 设置工作不正确, `MemoryTracking` 度量值不正确)。 此错误发生在版本18.12.13中。 [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3344) +- 修正的功能 `CREATE TABLE ... ON CLUSTER ... AS SELECT ...` 此错误发生在版本18.12.13中。 [\#3247](https://github.com/ClickHouse/ClickHouse/pull/3247) +- 修正了数据结构的不必要的准备 `JOIN`如果发起查询的服务器上 `JOIN` 仅在远程服务器上执行。 [\#3340](https://github.com/ClickHouse/ClickHouse/pull/3340) +- 在固定的错误 `Kafka` 引擎:开始读取数据时异常后的死锁,并在完成时锁定 [Marek Vavruša](https://github.com/ClickHouse/ClickHouse/pull/3215). +- 为 `Kafka` 表,可选 `schema` 参数未被传递(的架构 `Cap'n'Proto` 格式)。 [Vojtech Splichal](https://github.com/ClickHouse/ClickHouse/pull/3150) +- 如果ZooKeeper服务器的整体服务器接受连接,但随后立即关闭它,而不是响应握手,ClickHouse选择连接另一台服务器。 以前,这会产生错误 `Cannot read all data. Bytes read: 0. Bytes expected: 4.` 服务器无法启动。 [8218cf3a](https://github.com/ClickHouse/ClickHouse/commit/8218cf3a5f39a43401953769d6d12a0bb8d29da9) +- 如果ZooKeeper服务器的整体包含DNS查询返回错误的服务器,则忽略这些服务器。 [17b8e209](https://github.com/ClickHouse/ClickHouse/commit/17b8e209221061325ad7ba0539f03c6e65f87f29) +- 固定类型之间的转换 `Date` 和 `DateTime` 当在插入数据 `VALUES` 格式(如果 `input_format_values_interpret_expressions = 1`). 以前,转换是在Unix Epoch时间中的天数和Unix时间戳的数值之间进行的,这会导致意外的结果。 [\#3229](https://github.com/ClickHouse/ClickHouse/pull/3229) +- 修正类型之间的转换 `Decimal` 和整数。 [\#3211](https://github.com/ClickHouse/ClickHouse/pull/3211) +- 在固定的错误 `enable_optimize_predicate_expression` 设置。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/3231) +- 如果使用非默认的CSV分隔符,则修复了CSV格式的浮点数解析错误,例如 `;` [\#3155](https://github.com/ClickHouse/ClickHouse/pull/3155) +- 修正了 `arrayCumSumNonNegative` 函数(它不累加负值,如果累加器小于零)。 [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/3163) +- 固定如何 `Merge` 表工作的顶部 `Distributed` 使用时的表 `PREWHERE`. [\#3165](https://github.com/ClickHouse/ClickHouse/pull/3165) +- 在错误修复 `ALTER UPDATE` 查询。 +- 在固定的错误 `odbc` 表功能,出现在版本18.12。 [\#3197](https://github.com/ClickHouse/ClickHouse/pull/3197) +- 修正了聚合函数的操作 `StateArray` 组合子 [\#3188](https://github.com/ClickHouse/ClickHouse/pull/3188) +- 修正了划分时崩溃 `Decimal` 值为零。 [69dd6609](https://github.com/ClickHouse/ClickHouse/commit/69dd6609193beb4e7acd3e6ad216eca0ccfb8179) +- 使用固定输出类型的操作 `Decimal` 和整数参数。 [\#3224](https://github.com/ClickHouse/ClickHouse/pull/3224) +- 修正了在段错误 `GROUP BY` 上 `Decimal128`. [3359ba06](https://github.com/ClickHouse/ClickHouse/commit/3359ba06c39fcd05bfdb87d6c64154819621e13a) +- 该 `log_query_threads` 设置(关于查询执行的每个线程的日志记录信息)现在生效,只有当 `log_queries` 选项(有关查询的日志记录信息)设置为1。 由于 `log_query_threads` 默认情况下,即使禁用了查询日志记录,也会先前记录有关线程的信息。 [\#3241](https://github.com/ClickHouse/ClickHouse/pull/3241) +- 修正了分位数聚合函数的分布式操作中的错误(错误消息 `Not found column quantile...`). [292a8855](https://github.com/ClickHouse/ClickHouse/commit/292a885533b8e3b41ce8993867069d14cbd5a664) +- 修复了同时在18.12.17版服务器和旧服务器的集群上工作时的兼容性问题。 对于具有固定和非固定长度的GROUP BY键的分布式查询,如果要聚合大量数据,则返回的数据并不总是完全聚合(两个不同的行包含相同的聚合键)。 [\#3254](https://github.com/ClickHouse/ClickHouse/pull/3254) +- 固定处理替换 `clickhouse-performance-test`,如果查询只包含测试中声明的替换的一部分。 [\#3263](https://github.com/ClickHouse/ClickHouse/pull/3263) +- 修复了使用时的错误 `FINAL` 与 `PREWHERE`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) +- 修复了使用时的错误 `PREWHERE` 在过程中添加的列 `ALTER`. [\#3298](https://github.com/ClickHouse/ClickHouse/pull/3298) +- 增加了一个检查没有 `arrayJoin` 为 `DEFAULT` 和 `MATERIALIZED` 表达式。 前情提要, `arrayJoin` 插入数据时导致错误。 [\#3337](https://github.com/ClickHouse/ClickHouse/pull/3337) +- 增加了一个检查没有 `arrayJoin` 在一个 `PREWHERE` 条款 以前,这导致了类似的消息 `Size ... doesn't match` 或 `Unknown compression method` 执行查询时。 [\#3357](https://github.com/ClickHouse/ClickHouse/pull/3357) +- 修复了优化后可能发生的极少数情况下的段错误,并将相等性评估与相应的IN表达式链接起来。 [刘一民-字节舞](https://github.com/ClickHouse/ClickHouse/pull/3339) +- 小幅更正 `clickhouse-benchmark`:以前,客户端信息没有发送到服务器;现在关闭时更准确地计算执行的查询数量,并限制迭代次数。 [\#3351](https://github.com/ClickHouse/ClickHouse/pull/3351) [\#3352](https://github.com/ClickHouse/ClickHouse/pull/3352) + +#### 向后不兼容的更改: {#backward-incompatible-changes-1} + +- 删除了 `allow_experimental_decimal_type` 选项。 该 `Decimal` 数据类型可供默认使用。 [\#3329](https://github.com/ClickHouse/ClickHouse/pull/3329) + +## ClickHouse释放18.12 {#clickhouse-release-18-12} + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-18-12-17-2018-09-16} + +#### 新功能: {#new-features-2} + +- `invalidate_query` (指定查询来检查是否需要更新外部字典的能力)实现了 `clickhouse` 资料来源。 [\#3126](https://github.com/ClickHouse/ClickHouse/pull/3126) +- 增加了使用的能力 `UInt*`, `Int*`,和 `DateTime` 数据类型(与 `Date` 类型)作为 `range_hashed` 定义范围边界的外部字典键。 现在 `NULL` 可用于指定开放范围。 [瓦西里\*内姆科夫](https://github.com/ClickHouse/ClickHouse/pull/3123) +- 该 `Decimal` 类型现在支持 `var*` 和 `stddev*` 聚合函数。 [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) +- 该 `Decimal` 类型现在支持数学函数 (`exp`, `sin` 等等。) [\#3129](https://github.com/ClickHouse/ClickHouse/pull/3129) +- 该 `system.part_log` 表现在有 `partition_id` 列。 [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) + +#### 错误修复: {#bug-fixes-11} + +- `Merge` 现在正常工作 `Distributed` 桌子 [张冬](https://github.com/ClickHouse/ClickHouse/pull/3159) +- 修复了不兼容(不必要的依赖 `glibc` 版本),这使得它不可能运行ClickHouse的 `Ubuntu Precise` 和旧版本。 在版本18.12.13中出现了不兼容。 [\#3130](https://github.com/ClickHouse/ClickHouse/pull/3130) +- 在固定的错误 `enable_optimize_predicate_expression` 设置。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/3107) +- 修复了在早于18.12.13的版本上使用副本集群并同时在具有较新版本的服务器上创建表的新副本时出现的向后兼容性的一个小问题(如消息中所示 `Can not clone replica, because the ... updated to new ClickHouse version`,这是合乎逻辑的,但不应该发生)。 [\#3122](https://github.com/ClickHouse/ClickHouse/pull/3122) + +#### 向后不兼容的更改: {#backward-incompatible-changes-2} + +- 该 `enable_optimize_predicate_expression` 默认情况下启用选项(这是相当乐观的)。 如果发生与搜索列名相关的查询分析错误,请设置 `enable_optimize_predicate_expression` 为0。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/3107) + +### 碌莽禄,拢,0755-88888888 {#clickhouse-release-18-12-14-2018-09-13} + +#### 新功能: {#new-features-3} + +- 增加了对 `ALTER UPDATE` 查询。 [\#3035](https://github.com/ClickHouse/ClickHouse/pull/3035) +- 添加了 `allow_ddl` 选项,它限制用户对DDL查询的访问。 [\#3104](https://github.com/ClickHouse/ClickHouse/pull/3104) +- 添加了 `min_merge_bytes_to_use_direct_io` 备选案文 `MergeTree` 引擎允许您为合并的总大小设置阈值(当超过阈值时,将使用O\_DIRECT处理数据部分文件)。 [\#3117](https://github.com/ClickHouse/ClickHouse/pull/3117) +- 该 `system.merges` 系统表现在包含 `partition_id` 列。 [\#3099](https://github.com/ClickHouse/ClickHouse/pull/3099) + +#### 改进 {#improvements-3} + +- 如果数据部分在变异期间保持不变,则副本不会下载该数据部分。 [\#3103](https://github.com/ClickHouse/ClickHouse/pull/3103) +- 使用时,自动完成可用于设置名称 `clickhouse-client`. [\#3106](https://github.com/ClickHouse/ClickHouse/pull/3106) + +#### 错误修复: {#bug-fixes-12} + +- 添加了一个检查是元素的数组的大小 `Nested` 插入时的类型字段。 [\#3118](https://github.com/ClickHouse/ClickHouse/pull/3118) +- 修正了一个错误更新外部字典与 `ODBC` 来源和 `hashed` 存储。 此错误发生在版本18.12.13中。 +- 修复了使用以下命令从查询创建临时表时出现的崩溃 `IN` 条件。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/3098) +- 修复了聚合函数中可能具有的数组的错误 `NULL` 元素。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/3097) + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-18-12-13-2018-09-10} + +#### 新功能: {#new-features-4} + +- 添加了 `DECIMAL(digits, scale)` 数据类型 (`Decimal32(scale)`, `Decimal64(scale)`, `Decimal128(scale)`). 要启用它,请使用以下设置 `allow_experimental_decimal_type`. [\#2846](https://github.com/ClickHouse/ClickHouse/pull/2846) [\#2970](https://github.com/ClickHouse/ClickHouse/pull/2970) [\#3008](https://github.com/ClickHouse/ClickHouse/pull/3008) [\#3047](https://github.com/ClickHouse/ClickHouse/pull/3047) +- 新 `WITH ROLLUP` 修饰符 `GROUP BY` (替代语法: `GROUP BY ROLLUP(...)`). [\#2948](https://github.com/ClickHouse/ClickHouse/pull/2948) +- 在具有JOIN的查询中,星形字符将扩展为符合SQL标准的所有表中的列列表。 您可以通过设置恢复旧行为 `asterisk_left_columns_only` 在用户配置级别上为1。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/2787) +- 增加了对连接表函数的支持。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/2907) +- 在clickhouse-client中按Tab键进行自动完成。 [谢尔盖\*谢尔宾](https://github.com/ClickHouse/ClickHouse/pull/2447) +- Clickhouse-client中的Ctrl+C清除输入的查询。 [\#2877](https://github.com/ClickHouse/ClickHouse/pull/2877) +- 添加了 `join_default_strictness` 设置(值: `"`, `'any'`, `'all'`). 这允许您不指定 `ANY` 或 `ALL` 为 `JOIN`. [\#2982](https://github.com/ClickHouse/ClickHouse/pull/2982) +- 与查询处理相关的服务器日志的每一行都显示了查询ID。 [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- 现在,您可以在clickhouse-client中获取查询执行日志(使用 `send_logs_level` 设置)。 通过分布式查询处理,日志从所有服务器级联。 [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- 该 `system.query_log` 和 `system.processes` (`SHOW PROCESSLIST`)表现在有关所有更改的设置信息,当你运行一个查询(的嵌套结构 `Settings` 数据)。 添加了 `log_query_settings` 设置。 [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- 该 `system.query_log` 和 `system.processes` 表现在显示有关参与查询执行的线程数的信息(请参阅 `thread_numbers` 列)。 [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- 已添加 `ProfileEvents` 用于度量通过网络读取和写入磁盘以及读取和写入磁盘所花费的时间、网络错误的数量以及在网络带宽受限时所花费的等待时间。 [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- 已添加 `ProfileEvents`包含来自rusage的系统指标的计数器(您可以使用它们获取有关用户空间和内核、页面错误和上下文切换的CPU使用率的信息),以及taskstats指标(使用它们获取有关I/O等待时间、CPU等待时间以及读取和记录的数据量的信息,无论是否包含页面缓存)。 [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- 该 `ProfileEvents` 计数器应用于全局和每个查询,以及每个查询执行线程,它允许您按查询详细分析资源消耗情况。 [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- 添加了 `system.query_thread_log` 表,其中包含有关每个查询执行线程的信息。 添加了 `log_query_threads` 设置。 [\#2482](https://github.com/ClickHouse/ClickHouse/pull/2482) +- 该 `system.metrics` 和 `system.events` 表现在有内置文档。 [\#3016](https://github.com/ClickHouse/ClickHouse/pull/3016) +- 添加了 `arrayEnumerateDense` 功能。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2975) +- 添加了 `arrayCumSumNonNegative` 和 `arrayDifference` 功能。 [Aleksey Studnev](https://github.com/ClickHouse/ClickHouse/pull/2942) +- 添加了 `retention` 聚合函数。 [李尚迪](https://github.com/ClickHouse/ClickHouse/pull/2887) +- 现在,您可以使用plus运算符添加(合并)聚合函数的状态,并将聚合函数的状态乘以非负常数。 [\#3062](https://github.com/ClickHouse/ClickHouse/pull/3062) [\#3034](https://github.com/ClickHouse/ClickHouse/pull/3034) +- MergeTree系列中的表现在具有虚拟列 `_partition_id`. [\#3089](https://github.com/ClickHouse/ClickHouse/pull/3089) + +#### 实验特点: {#experimental-features-1} + +- 添加了 `LowCardinality(T)` 数据类型。 此数据类型自动创建值的本地字典,并允许数据处理而无需解压字典。 [\#2830](https://github.com/ClickHouse/ClickHouse/pull/2830) +- 添加了JIT编译函数的缓存和编译前使用次数的计数器。 要JIT编译表达式,请启用 `compile_expressions` 设置。 [\#2990](https://github.com/ClickHouse/ClickHouse/pull/2990) [\#3077](https://github.com/ClickHouse/ClickHouse/pull/3077) + +#### 改进: {#improvements-4} + +- 修复了放弃副本时复制日志无限积累的问题。 为延迟较长的副本添加了有效的恢复模式。 +- 改进的性能 `GROUP BY` 当其中一个是string,其他是固定长度时,具有多个聚合字段。 +- 使用时提高性能 `PREWHERE` 并与表达式的隐式转移 `PREWHERE`. +- 改进文本格式的解析性能 (`CSV`, `TSV`). [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2977) [\#2980](https://github.com/ClickHouse/ClickHouse/pull/2980) +- 改进了读取二进制格式字符串和数组的性能。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2955) +- 提高性能和减少内存消耗的查询 `system.tables` 和 `system.columns` 当单个服务器上有非常大量的表时。 [\#2953](https://github.com/ClickHouse/ClickHouse/pull/2953) +- 修复了大量查询导致错误的情况下的性能问题( `_dl_addr` 功能是可见的 `perf top`,但服务器没有使用太多的CPU)。 [\#2938](https://github.com/ClickHouse/ClickHouse/pull/2938) +- 条件被转换到视图中(当 `enable_optimize_predicate_expression` 被启用)。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/2907) +- 改进的功能 `UUID` 数据类型。 [\#3074](https://github.com/ClickHouse/ClickHouse/pull/3074) [\#2985](https://github.com/ClickHouse/ClickHouse/pull/2985) +- 该 `UUID` -Alchemist字典支持数据类型。 [\#2822](https://github.com/ClickHouse/ClickHouse/pull/2822) +- 该 `visitParamExtractRaw` 函数与嵌套结构正常工作。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/2974) +- 当 `input_format_skip_unknown_fields` 启用设置,在对象字段 `JSONEachRow` 格式被正确跳过。 [BlahGeek](https://github.com/ClickHouse/ClickHouse/pull/2958) +- 对于一个 `CASE` 表达式与条件,你现在可以省略 `ELSE`,这相当于 `ELSE NULL`. [\#2920](https://github.com/ClickHouse/ClickHouse/pull/2920) +- 现在可以在使用ZooKeeper时配置操作超时。 [urykhy](https://github.com/ClickHouse/ClickHouse/pull/2971) +- 您可以指定偏移量 `LIMIT n, m` 作为 `LIMIT n OFFSET m`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) +- 您可以使用 `SELECT TOP n` 语法作为替代 `LIMIT`. [\#2840](https://github.com/ClickHouse/ClickHouse/pull/2840) +- 增加了队列的大小写入系统表,因此 `SystemLog parameter queue is full` 错误不经常发生。 +- 该 `windowFunnel` aggregate函数现在支持满足多个条件的事件。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2801) +- 重复的列可以用于 `USING` 条款 `JOIN`. [\#3006](https://github.com/ClickHouse/ClickHouse/pull/3006) +- `Pretty` 格式现在对列对齐宽度有限制。 使用 `output_format_pretty_max_column_pad_width` 设置。 如果一个值较宽,它仍将完整显示,但表中的其他单元格不会太宽。 [\#3003](https://github.com/ClickHouse/ClickHouse/pull/3003) +- 该 `odbc` 表函数现在允许您指定数据库/模式名称。 [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2885) +- 增加了使用在指定的用户名的能力 `clickhouse-client` 配置文件。 [弗拉基米尔\*科兹宾](https://github.com/ClickHouse/ClickHouse/pull/2909) +- 该 `ZooKeeperExceptions` 计数器已被分成三个计数器: `ZooKeeperUserExceptions`, `ZooKeeperHardwareExceptions`,和 `ZooKeeperOtherExceptions`. +- `ALTER DELETE` 查询适用于实例化视图。 +- 在定期运行清理线程时添加了随机化 `ReplicatedMergeTree` 表,以避免周期性负载尖峰时有一个非常大的数量 `ReplicatedMergeTree` 桌子 +- 支持 `ATTACH TABLE ... ON CLUSTER` 查询。 [\#3025](https://github.com/ClickHouse/ClickHouse/pull/3025) + +#### 错误修复: {#bug-fixes-13} + +- 修正了一个问题 `Dictionary` 表(抛出 `Size of offsets doesn't match size of column` 或 `Unknown compression method` 例外)。 此错误出现在版本18.10.3中。 [\#2913](https://github.com/ClickHouse/ClickHouse/issues/2913) +- 修复了合并时的错误 `CollapsingMergeTree` 如果其中一个数据部分为空(这些部分在合并或合并期间形成 `ALTER DELETE` 如果所有数据被删除),和 `vertical` 算法被用于合并。 [\#3049](https://github.com/ClickHouse/ClickHouse/pull/3049) +- 在固定的竞争条件 `DROP` 或 `TRUNCATE` 为 `Memory` 表与同时 `SELECT`,这可能导致服务器崩溃。 此错误出现在版本1.1.54388中。 [\#3038](https://github.com/ClickHouse/ClickHouse/pull/3038) +- 修正了插入时数据丢失的可能性 `Replicated` 表如果 `Session is expired` 错误返回(数据丢失可以通过检测 `ReplicatedDataLoss` 公制)。 此错误发生在版本1.1.54378。 [\#2939](https://github.com/ClickHouse/ClickHouse/pull/2939) [\#2949](https://github.com/ClickHouse/ClickHouse/pull/2949) [\#2964](https://github.com/ClickHouse/ClickHouse/pull/2964) +- 在修复段错误 `JOIN ... ON`. [\#3000](https://github.com/ClickHouse/ClickHouse/pull/3000) +- 修正了错误搜索列名时 `WHERE` 表达式完全由限定列名组成,例如 `WHERE table.column`. [\#2994](https://github.com/ClickHouse/ClickHouse/pull/2994) +- 修正了 “Not found column” 如果从远程服务器请求由IN表达式和子查询组成的单个列,则在执行分布式查询时发生错误。 [\#3087](https://github.com/ClickHouse/ClickHouse/pull/3087) +- 修正了 `Block structure mismatch in UNION stream: different number of columns` 如果其中一个分片是本地的,而另一个分片不是,则发生分布式查询的错误,并优化移动到 `PREWHERE` 被触发。 [\#2226](https://github.com/ClickHouse/ClickHouse/pull/2226) [\#3037](https://github.com/ClickHouse/ClickHouse/pull/3037) [\#3055](https://github.com/ClickHouse/ClickHouse/pull/3055) [\#3065](https://github.com/ClickHouse/ClickHouse/pull/3065) [\#3073](https://github.com/ClickHouse/ClickHouse/pull/3073) [\#3090](https://github.com/ClickHouse/ClickHouse/pull/3090) [\#3093](https://github.com/ClickHouse/ClickHouse/pull/3093) +- 修正了 `pointInPolygon` 非凸多边形的某些情况下的函数。 [\#2910](https://github.com/ClickHouse/ClickHouse/pull/2910) +- 修正了比较时不正确的结果 `nan` 与整数。 [\#3024](https://github.com/ClickHouse/ClickHouse/pull/3024) +- 修正了一个错误 `zlib-ng` 在极少数情况下可能导致segfault的库。 [\#2854](https://github.com/ClickHouse/ClickHouse/pull/2854) +- 修复了插入到表中时的内存泄漏 `AggregateFunction` 列,如果聚合函数的状态不简单(分别分配内存),并且如果单个插入请求导致多个小块。 [\#3084](https://github.com/ClickHouse/ClickHouse/pull/3084) +- 修复了创建和删除相同的竞争条件 `Buffer` 或 `MergeTree` 同时表。 +- 修复了比较由某些非平凡类型(如元组)组成的元组时出现段错误的可能性。 [\#2989](https://github.com/ClickHouse/ClickHouse/pull/2989) +- 修正了运行某些时段错误的可能性 `ON CLUSTER` 查询。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/2960) +- 修正了一个错误 `arrayDistinct` 功能 `Nullable` 数组元素。 [\#2845](https://github.com/ClickHouse/ClickHouse/pull/2845) [\#2937](https://github.com/ClickHouse/ClickHouse/pull/2937) +- 该 `enable_optimize_predicate_expression` 选项现在正确支持的情况下 `SELECT *`. [张冬](https://github.com/ClickHouse/ClickHouse/pull/2929) +- 修复了重新初始化ZooKeeper会话时的段错误。 [\#2917](https://github.com/ClickHouse/ClickHouse/pull/2917) +- 与ZooKeeper工作时固定的潜在阻塞。 +- 修正了不正确的代码添加嵌套的数据结构中 `SummingMergeTree`. +- 在为聚合函数的状态分配内存时,会正确考虑对齐,这使得在实现聚合函数的状态时可以使用需要对齐的操作。 [晨兴-xc](https://github.com/ClickHouse/ClickHouse/pull/2808) + +#### 安全修复: {#security-fix} + +- 安全使用ODBC数据源。 与ODBC驱动程序的交互使用单独的 `clickhouse-odbc-bridge` 过程。 第三方ODBC驱动程序中的错误不再导致服务器稳定性问题或漏洞。 [\#2828](https://github.com/ClickHouse/ClickHouse/pull/2828) [\#2879](https://github.com/ClickHouse/ClickHouse/pull/2879) [\#2886](https://github.com/ClickHouse/ClickHouse/pull/2886) [\#2893](https://github.com/ClickHouse/ClickHouse/pull/2893) [\#2921](https://github.com/ClickHouse/ClickHouse/pull/2921) +- 修正了在文件路径的不正确的验证 `catBoostPool` 表功能。 [\#2894](https://github.com/ClickHouse/ClickHouse/pull/2894) +- 系统表的内容 (`tables`, `databases`, `parts`, `columns`, `parts_columns`, `merges`, `mutations`, `replicas`,和 `replication_queue`)根据用户对数据库的配置访问权限进行过滤 (`allow_databases`). [张冬](https://github.com/ClickHouse/ClickHouse/pull/2856) + +#### 向后不兼容的更改: {#backward-incompatible-changes-3} + +- 在具有JOIN的查询中,星形字符将扩展为符合SQL标准的所有表中的列列表。 您可以通过设置恢复旧行为 `asterisk_left_columns_only` 在用户配置级别上为1。 + +#### 构建更改: {#build-changes-2} + +- 大多数集成测试现在可以通过commit运行。 +- 代码样式检查也可以通过提交运行。 +- 该 `memcpy` 在CentOS7/Fedora上构建时,正确选择实现。 [Etienne Champetier](https://github.com/ClickHouse/ClickHouse/pull/2912) +- 当使用clang来构建时,来自一些警告 `-Weverything` 已添加,除了常规 `-Wall-Wextra -Werror`. [\#2957](https://github.com/ClickHouse/ClickHouse/pull/2957) +- 调试生成使用 `jemalloc` 调试选项。 +- 用于与ZooKeeper交互的库接口被声明为抽象。 [\#2950](https://github.com/ClickHouse/ClickHouse/pull/2950) + +## ClickHouse释放18.10 {#clickhouse-release-18-10} + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-18-10-3-2018-08-13} + +#### 新功能: {#new-features-5} + +- HTTPS可用于复制。 [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) +- 新增功能 `murmurHash2_64`, `murmurHash3_32`, `murmurHash3_64`,和 `murmurHash3_128` 除了现有的 `murmurHash2_32`. [\#2791](https://github.com/ClickHouse/ClickHouse/pull/2791) +- 支持ClickHouse ODBC驱动程序中的可空类型 (`ODBCDriver2` 输出格式)。 [\#2834](https://github.com/ClickHouse/ClickHouse/pull/2834) +- 支持 `UUID` 在关键列。 + +#### 改进: {#improvements-5} + +- 当群集从配置文件中删除时,可以在不重新启动服务器的情况下删除群集。 [\#2777](https://github.com/ClickHouse/ClickHouse/pull/2777) +- 从配置文件中删除外部字典时,可以在不重新启动服务器的情况下删除它们。 [\#2779](https://github.com/ClickHouse/ClickHouse/pull/2779) +- 已添加 `SETTINGS` 支持 `Kafka` 表引擎。 [Alexander Marshalov](https://github.com/ClickHouse/ClickHouse/pull/2781) +- 改进的 `UUID` 数据类型(尚未完成)。 [\#2618](https://github.com/ClickHouse/ClickHouse/pull/2618) +- 支持合并后的空部件 `SummingMergeTree`, `CollapsingMergeTree` 和 `VersionedCollapsingMergeTree` 引擎 [\#2815](https://github.com/ClickHouse/ClickHouse/pull/2815) +- 已完成突变的旧记录将被删除 (`ALTER DELETE`). [\#2784](https://github.com/ClickHouse/ClickHouse/pull/2784) +- 添加了 `system.merge_tree_settings` 桌子 [基里尔\*什瓦科夫](https://github.com/ClickHouse/ClickHouse/pull/2841) +- 该 `system.tables` 表现在具有依赖列: `dependencies_database` 和 `dependencies_table`. [张冬](https://github.com/ClickHouse/ClickHouse/pull/2851) +- 添加了 `max_partition_size_to_drop` 配置选项。 [\#2782](https://github.com/ClickHouse/ClickHouse/pull/2782) +- 添加了 `output_format_json_escape_forward_slashes` 选项。 [Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2812) +- 添加了 `max_fetch_partition_retries_count` 设置。 [\#2831](https://github.com/ClickHouse/ClickHouse/pull/2831) +- 添加了 `prefer_localhost_replica` 用于禁用本地副本的首选项以及在不进程间交互的情况下转到本地副本的设置。 [\#2832](https://github.com/ClickHouse/ClickHouse/pull/2832) +- 该 `quantileExact` 聚合函数返回 `nan` 在聚合在一个空的情况下 `Float32` 或 `Float64` 预备 [李尚迪](https://github.com/ClickHouse/ClickHouse/pull/2855) + +#### 错误修复: {#bug-fixes-14} + +- 删除了ODBC的连接字符串参数的不必要的转义,这使得无法建立连接。 此错误发生在版本18.6.0中。 +- 修正了处理逻辑 `REPLACE PARTITION` 复制队列中的命令。 如果有两个 `REPLACE` 对于同一个分区的命令,不正确的逻辑可能会导致其中一个保留在复制队列中而无法执行。 [\#2814](https://github.com/ClickHouse/ClickHouse/pull/2814) +- 修正了一个合并错误,当所有的数据部分都是空的(从合并或从形成的部分 `ALTER DELETE` 如果所有数据都被删除)。 此错误出现在18.1.0版本。 [\#2930](https://github.com/ClickHouse/ClickHouse/pull/2930) +- 修复了并发错误 `Set` 或 `Join`. [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2823) +- 修正了 `Block structure mismatch in UNION stream: different number of columns` 发生的错误 `UNION ALL` 子查询内的查询,如果一个 `SELECT` 查询包含重复的列名。 [张冬](https://github.com/ClickHouse/ClickHouse/pull/2094) +- 修复了连接到MySQL服务器时发生异常时的内存泄漏。 +- 在查询错误的情况下修复了不正确的clickhouse客户端响应代码。 +- 修正了包含DISTINCT的实例化视图的不正确行为。 [\#2795](https://github.com/ClickHouse/ClickHouse/issues/2795) + +#### 向后不兼容的更改 {#backward-incompatible-changes-4} + +- 删除了对分布式表的检查表查询的支持。 + +#### 构建更改: {#build-changes-3} + +- 分配器已被替换: `jemalloc` 现在用来代替 `tcmalloc`. 在某些情况下,这增加了速度高达20%。 但是,有些查询已经减慢了20%。 在某些情况下,内存消耗减少了大约10%,稳定性得到了提高。 由于竞争激烈的负载,用户空间和系统中的CPU使用率略有增加。 [\#2773](https://github.com/ClickHouse/ClickHouse/pull/2773) +- 从子模块使用libressl。 [\#1983](https://github.com/ClickHouse/ClickHouse/pull/1983) [\#2807](https://github.com/ClickHouse/ClickHouse/pull/2807) +- 从子模块使用unixodbc。 [\#2789](https://github.com/ClickHouse/ClickHouse/pull/2789) +- 从子模块中使用mariadb-connector-c。 [\#2785](https://github.com/ClickHouse/ClickHouse/pull/2785) +- 将功能性测试文件添加到存储库中,这些文件取决于测试数据的可用性(暂时不包含测试数据本身)。 + +## ClickHouse释放18.6 {#clickhouse-release-18-6} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-6-0-2018-08-02} + +#### 新功能: {#new-features-6} + +- 增加了对ON表达式的支持,以便在语法上加入: + `JOIN ON Expr([table.]column ...) = Expr([table.]column, ...) [AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]` + 表达式必须是由AND运算符连接的等式链。 等式的每一侧都可以是其中一个表的列上的任意表达式。 支持使用完全限定的列名 (`table.name`, `database.table.name`, `table_alias.name`, `subquery_alias.name`)对于正确的表。 [\#2742](https://github.com/ClickHouse/ClickHouse/pull/2742) +- 可以启用HTTPS进行复制。 [\#2760](https://github.com/ClickHouse/ClickHouse/pull/2760) + +#### 改进: {#improvements-6} + +- 服务器将其版本的补丁组件传递给客户端。 有关修补程序版本组件的数据位于 `system.processes` 和 `query_log`. [\#2646](https://github.com/ClickHouse/ClickHouse/pull/2646) + +## ClickHouse释放18.5 {#clickhouse-release-18-5} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-18-5-1-2018-07-31} + +#### 新功能: {#new-features-7} + +- 添加了哈希函数 `murmurHash2_32` [\#2756](https://github.com/ClickHouse/ClickHouse/pull/2756). + +#### 改进: {#improvements-7} + +- 现在你可以使用 `from_env` [\#2741](https://github.com/ClickHouse/ClickHouse/pull/2741) 从环境变量设置配置文件中的值的属性。 +- 增加了不区分大小写的版本 `coalesce`, `ifNull`,和 `nullIf functions` [\#2752](https://github.com/ClickHouse/ClickHouse/pull/2752). + +#### 错误修复: {#bug-fixes-15} + +- 修复了启动副本时可能出现的错误 [\#2759](https://github.com/ClickHouse/ClickHouse/pull/2759). + +## ClickHouse释放18.4 {#clickhouse-release-18-4} + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-18-4-0-2018-07-28} + +#### 新功能: {#new-features-8} + +- 添加系统表: `formats`, `data_type_families`, `aggregate_function_combinators`, `table_functions`, `table_engines`, `collations` [\#2721](https://github.com/ClickHouse/ClickHouse/pull/2721). +- 增加了使用表函数代替表作为参数的能力 `remote` 或 `cluster table function` [\#2708](https://github.com/ClickHouse/ClickHouse/pull/2708). +- 支持 `HTTP Basic` 复制协议中的身份验证 [\#2727](https://github.com/ClickHouse/ClickHouse/pull/2727). +- 该 `has` 函数现在允许搜索数组中的数值 `Enum` 值 [Maxim Khrisanfov](https://github.com/ClickHouse/ClickHouse/pull/2699). +- 支持添加任意消息分隔符从读取时 `Kafka` [阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2701). + +#### 改进: {#improvements-8} + +- 该 `ALTER TABLE t DELETE WHERE` 查询不会重写未受WHERE条件影响的数据部分 [\#2694](https://github.com/ClickHouse/ClickHouse/pull/2694). +- 该 `use_minimalistic_checksums_in_zookeeper` 备选案文 `ReplicatedMergeTree` 默认情况下启用表。 此设置在版本1.1.54378,2018-04-16中添加。 不能再安装超过1.1.54378的版本。 +- 支持运行 `KILL` 和 `OPTIMIZE` 指定的查询 `ON CLUSTER` [张冬](https://github.com/ClickHouse/ClickHouse/pull/2689). + +#### 错误修复: {#bug-fixes-16} + +- 修正了错误 `Column ... is not under an aggregate function and not in GROUP BY` 用于具有IN表达式的聚合。 此错误出现在18.1.0版本。 ([bbdd780b](https://github.com/ClickHouse/ClickHouse/commit/bbdd780be0be06a0f336775941cdd536878dd2c2)) +- 修正了一个错误 `windowFunnel aggregate function` [张冬](https://github.com/ClickHouse/ClickHouse/pull/2735). +- 修正了一个错误 `anyHeavy` 聚合函数 ([a2101df2](https://github.com/ClickHouse/ClickHouse/commit/a2101df25a6a0fba99aa71f8793d762af2b801ee)) +- 使用时固定服务器崩溃 `countArray()` 聚合函数。 + +#### 向后不兼容的更改: {#backward-incompatible-changes-5} + +- 参数 `Kafka` 发动机从改变 `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_schema, kafka_num_consumers])` 到 `Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format[, kafka_row_delimiter, kafka_schema, kafka_num_consumers])`. 如果你的表使用 `kafka_schema` 或 `kafka_num_consumers` 参数,你必须手动编辑元数据文件 `path/metadata/database/table.sql` 并添加 `kafka_row_delimiter` 参数 `''` 价值。 + +## ClickHouse释放18.1 {#clickhouse-release-18-1} + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-18-1-0-2018-07-23} + +#### 新功能: {#new-features-9} + +- 支持 `ALTER TABLE t DELETE WHERE` 非复制MergeTree表的查询 ([\#2634](https://github.com/ClickHouse/ClickHouse/pull/2634)). +- 支持任意类型的 `uniq*` 聚合函数族 ([\#2010](https://github.com/ClickHouse/ClickHouse/issues/2010)). +- 支持比较运算符中的任意类型 ([\#2026](https://github.com/ClickHouse/ClickHouse/issues/2026)). +- 该 `users.xml` 文件允许设置子网掩码的格式 `10.0.0.1/255.255.255.0`. 这对于在中间使用零的IPv6网络使用掩码是必要的 ([\#2637](https://github.com/ClickHouse/ClickHouse/pull/2637)). +- 添加了 `arrayDistinct` 功能 ([\#2670](https://github.com/ClickHouse/ClickHouse/pull/2670)). +- SummingMergeTree引擎现在可以使用AggregateFunction类型列 ([康斯坦丁\*潘](https://github.com/ClickHouse/ClickHouse/pull/2566)). + +#### 改进: {#improvements-9} + +- 更改了发布版本的编号方案。 现在第一部分包含发布年份(公元,莫斯科时区,减去2000),第二部分包含主要更改的数量(大多数版本的增加),第三部分是补丁版本。 除非在更新日志中另有说明,否则版本仍然向后兼容。 +- 更快地将浮点数转换为字符串 ([阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2664)). +- 如果在插入过程中由于解析错误而跳过某些行(这可能与 `input_allow_errors_num` 和 `input_allow_errors_ratio` 启用设置),跳过的行数现在写入服务器日志 ([列奥纳多\*切奇](https://github.com/ClickHouse/ClickHouse/pull/2669)). + +#### 错误修复: {#bug-fixes-17} + +- 修复了临时表的截断命令 ([阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2624)). +- 修复了读取响应时出现网络错误时ZooKeeper客户端库中罕见的死锁 ([c315200](https://github.com/ClickHouse/ClickHouse/commit/c315200e64b87e44bdf740707fc857d1fdf7e947)). +- 修复了转换为可空类型期间的错误 ([\#1322](https://github.com/ClickHouse/ClickHouse/issues/1322)). +- 修正了不正确的结果 `maxIntersection()` 函数时间间隔的边界重合 ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2657)). +- 修复了函数参数中OR表达式链的不正确转换 ([晨兴-xc](https://github.com/ClickHouse/ClickHouse/pull/2663)). +- 修复了包含 `IN (subquery)` 另一个子查询中的表达式 ([\#2571](https://github.com/ClickHouse/ClickHouse/issues/2571)). +- 修复了分布式查询中使用不同版本的服务器之间的不兼容性 `CAST` 不是大写字母的函数 ([fe8c4d6](https://github.com/ClickHouse/ClickHouse/commit/fe8c4d64e434cacd4ceef34faa9005129f2190a5)). +- 添加了对外部数据库管理系统查询的缺少标识符引用 ([\#2635](https://github.com/ClickHouse/ClickHouse/issues/2635)). + +#### 向后不兼容的更改: {#backward-incompatible-changes-6} + +- 将包含数字零的字符串转换为DateTime不起作用。 示例: `SELECT toDateTime('0')`. 这也是原因 `DateTime DEFAULT '0'` 在表中不起作用,以及 `0` 在字典里 解决方案:替换 `0` 与 `0000-00-00 00:00:00`. + +## ClickHouse释放1.1 {#clickhouse-release-1-1} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54394-2018-07-12} + +#### 新功能: {#new-features-10} + +- 添加了 `histogram` 聚合函数 ([米哈伊尔\*苏林](https://github.com/ClickHouse/ClickHouse/pull/2521)). +- 现在 `OPTIMIZE TABLE ... FINAL` 可以在不指定分区的情况下使用 `ReplicatedMergeTree` ([阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2600)). + +#### 错误修复: {#bug-fixes-18} + +- 修复了在发送和下载复制数据时读取和写入套接字超时非常小的问题(一秒钟),这使得在网络或磁盘上存在负载时无法下载更大的部分(导致周期性尝试下载部分)。 此错误发生在版本1.1.54388。 +- 修复了在ZooKeeper中使用chroot时在表中插入重复数据块的问题。 +- 该 `has` 函数现在可以正常工作用于具有可为空元素的数组 ([\#2115](https://github.com/ClickHouse/ClickHouse/issues/2115)). +- 该 `system.tables` 在分布式查询中使用表现在可以正常工作。 该 `metadata_modification_time` 和 `engine_full` 列现在是非虚拟的。 修复了仅从表中查询这些列时发生的错误。 +- 固定如何空 `TinyLog` 表插入一个空数据块后工作 ([\#2563](https://github.com/ClickHouse/ClickHouse/issues/2563)). +- 该 `system.zookeeper` 如果ZooKeeper中节点的值为NULL,表就可以工作。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54390-2018-07-06} + +#### 新功能: {#new-features-11} + +- 查询可以在发送 `multipart/form-data` 格式(在 `query` 字段),如果外部数据也被发送用于查询处理,这是有用的 ([Olga Hvostikova](https://github.com/ClickHouse/ClickHouse/pull/2490)). +- 增加了在读取CSV格式数据时启用或禁用处理单引号或双引号的功能。 您可以在 `format_csv_allow_single_quotes` 和 `format_csv_allow_double_quotes` 设置 ([阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2574)). +- 现在 `OPTIMIZE TABLE ... FINAL` 可以在不指定非复制变体的分区的情况下使用 `MergeTree` ([阿莫斯鸟](https://github.com/ClickHouse/ClickHouse/pull/2599)). + +#### 改进: {#improvements-10} + +- 在可以使用表索引时使用IN运算符提高性能、减少内存消耗并正确跟踪内存消耗 ([\#2584](https://github.com/ClickHouse/ClickHouse/pull/2584)). +- 删除添加数据部分时校验和的冗余检查。 当存在大量副本时,这一点很重要,因为在这些情况下,检查的总数等于N^2。 +- 增加了对 `Array(Tuple(...))` 对于参数 `arrayEnumerateUniq` 功能 ([\#2573](https://github.com/ClickHouse/ClickHouse/pull/2573)). +- 已添加 `Nullable` 支持 `runningDifference` 功能 ([\#2594](https://github.com/ClickHouse/ClickHouse/pull/2594)). +- 当存在大量表达式时,改进了查询分析性能 ([\#2572](https://github.com/ClickHouse/ClickHouse/pull/2572)). +- 更快地选择用于合并的数据部分 `ReplicatedMergeTree` 桌子 更快地恢复ZooKeeper会话 ([\#2597](https://github.com/ClickHouse/ClickHouse/pull/2597)). +- 该 `format_version.txt` 文件 `MergeTree` 如果表丢失,则重新创建表,如果在没有文件的情况下复制目录结构后启动ClickHouse,这是有意义的 ([Ciprian Hacman](https://github.com/ClickHouse/ClickHouse/pull/2593)). + +#### 错误修复: {#bug-fixes-19} + +- 修复了与ZooKeeper一起工作时的错误,这可能会导致无法在重新启动服务器之前恢复表的会话和只读状态。 +- 修复了与ZooKeeper一起工作时的错误,如果会话中断,可能会导致旧节点不被删除。 +- 修正了一个错误 `quantileTDigest` Float参数的函数(此错误在版本1.1.54388中引入) ([米哈伊尔\*苏林](https://github.com/ClickHouse/ClickHouse/pull/2553)). +- 修复了MergeTree表索引中的一个错误,如果主键列位于函数内部,用于在相同大小的有符号和无符号整数之间转换类型 ([\#2603](https://github.com/ClickHouse/ClickHouse/pull/2603)). +- 如果修复段错误 `macros` 使用,但它们不在配置文件中 ([\#2570](https://github.com/ClickHouse/ClickHouse/pull/2570)). +- 修复了重新连接客户端时切换到默认数据库的问题 ([\#2583](https://github.com/ClickHouse/ClickHouse/pull/2583)). +- 修正了当发生的错误 `use_index_for_in_with_subqueries` 设置被禁用。 + +#### 安全修复: {#security-fix-1} + +- 当连接到MySQL时,发送文件不再可能 (`LOAD DATA LOCAL INFILE`). + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54388-2018-06-28} + +#### 新功能: {#new-features-12} + +- 支持 `ALTER TABLE t DELETE WHERE` 查询复制的表。 添加了 `system.mutations` 表来跟踪这种类型的查询的进度。 +- 支持 `ALTER TABLE t [REPLACE|ATTACH] PARTITION` 查询\*MergeTree表。 +- 支持 `TRUNCATE TABLE` 查询 ([张冬](https://github.com/ClickHouse/ClickHouse/pull/2260)) +- 几个新的 `SYSTEM` 复制表的查询 (`RESTART REPLICAS`, `SYNC REPLICA`, `[STOP|START] [MERGES|FETCHES|SENDS REPLICATED|REPLICATION QUEUES]`). +- 增加了使用MySQL引擎和相应的表函数写入表的能力 ([三弟](https://github.com/ClickHouse/ClickHouse/pull/2294)). +- 添加了 `url()` 表功能和 `URL` 表引擎 ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2501)). +- 添加了 `windowFunnel` 聚合函数 ([三弟](https://github.com/ClickHouse/ClickHouse/pull/2352)). +- 新 `startsWith` 和 `endsWith` 字符串的函数 ([Vadim Plakhtinsky](https://github.com/ClickHouse/ClickHouse/pull/2429)). +- 该 `numbers()` 表函数现在允许您指定偏移量 ([张冬](https://github.com/ClickHouse/ClickHouse/pull/2535)). +- 密码 `clickhouse-client` 可以交互输入。 +- 服务器日志现在可以发送到系统日志 ([Alexander Krasheninnikov](https://github.com/ClickHouse/ClickHouse/pull/2459)). +- 支持使用共享库源登录字典 ([Alexander Sapin](https://github.com/ClickHouse/ClickHouse/pull/2472)). +- 支持自定义CSV分隔符 ([伊万\*朱可夫](https://github.com/ClickHouse/ClickHouse/pull/2263)) +- 添加了 `date_time_input_format` 设置。 如果将此设置切换到 `'best_effort'`,日期时间值将以各种格式读取。 +- 添加了 `clickhouse-obfuscator` 用于数据混ob的实用程序。 用法示例:发布性能测试中使用的数据。 + +#### 实验特点: {#experimental-features-2} + +- 增加了计算能力 `and` 只有在需要的地方才能参数 ([阿纳斯塔西娅Tsarkova](https://github.com/ClickHouse/ClickHouse/pull/2272)) +- Jit编译为本机代码现在可用于某些表达式 ([pyos](https://github.com/ClickHouse/ClickHouse/pull/2277)). + +#### 错误修复: {#bug-fixes-20} + +- 对于具有以下内容的查询,不再显示重复项 `DISTINCT` 和 `ORDER BY`. +- 查询与 `ARRAY JOIN` 和 `arrayFilter` 不再返回不正确的结果。 +- 修复了从嵌套结构读取数组列时的错误 ([\#2066](https://github.com/ClickHouse/ClickHouse/issues/2066)). +- 修复了使用HAVING子句分析查询时出现的错误,如 `HAVING tuple IN (...)`. +- 修复了使用递归别名分析查询时出现的错误。 +- 修复了从REPLACINGMERGETREE读取过滤所有行的PREWHERE中的条件时出现的错误 ([\#2525](https://github.com/ClickHouse/ClickHouse/issues/2525)). +- 在HTTP界面中使用会话时,未应用用户配置文件设置。 +- 修复了如何从clickhouse-local中的命令行参数应用设置。 +- ZooKeeper客户端库现在使用从服务器接收的会话超时。 +- 修正了ZooKeeper客户端库中的一个错误,当客户端等待服务器响应时间超过超时时间。 +- 修剪部分的查询与分区键列的条件 ([\#2342](https://github.com/ClickHouse/ClickHouse/issues/2342)). +- 合并后,现在可以 `CLEAR COLUMN IN PARTITION` ([\#2315](https://github.com/ClickHouse/ClickHouse/issues/2315)). +- ODBC表函数中的类型映射已修复 ([三弟](https://github.com/ClickHouse/ClickHouse/pull/2268)). +- 类型比较已修复 `DateTime` 有和没有时区 ([Alexander Bocharov](https://github.com/ClickHouse/ClickHouse/pull/2400)). +- 修正了语法解析和格式化的 `CAST` 接线员 +- 固定插入到分布式表引擎的实例化视图中 ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2411)). +- 修正了从写入数据时的争用条件 `Kafka` 引擎到实例化视图 ([刘杨宽](https://github.com/ClickHouse/ClickHouse/pull/2448)). +- 固定ssrf中的remote()表函数。 +- 固定退出行为 `clickhouse-client` 在多行模式下 ([\#2510](https://github.com/ClickHouse/ClickHouse/issues/2510)). + +#### 改进: {#improvements-11} + +- 复制表中的后台任务现在在线程池中执行,而不是在单独的线程中执行 ([Silviu Caragea](https://github.com/ClickHouse/ClickHouse/pull/1722)). +- 改进的LZ4压缩性能。 +- 更快地分析具有大量联接和子查询的查询。 +- 当有太多的网络错误时,DNS缓存现在会自动更新。 +- 如果由于其中一个实例化视图包含太多部件而无法插入表格插入,则不再发生表格插入。 +- 纠正了事件计数器中的差异 `Query`, `SelectQuery`,和 `InsertQuery`. +- 像这样的表达式 `tuple IN (SELECT tuple)` 如果元组类型匹配,则允许。 +- 即使您没有配置ZooKeeper,具有复制表的服务器也可以启动。 +- 在计算可用CPU内核数时,现在考虑了cgroups的限制 ([Atri Sharma](https://github.com/ClickHouse/ClickHouse/pull/2325)). +- 在systemd配置文件中添加了配置目录的chown ([米哈伊尔Shiryaev](https://github.com/ClickHouse/ClickHouse/pull/2421)). + +#### 构建更改: {#build-changes-4} + +- Gcc8编译器可用于构建。 +- 增加了从子模块构建llvm的能力。 +- Librdkafka库的版本已更新为v0.11.4。 +- 增加了使用系统libcpuid库的能力。 库版本已更新为0.4.0。 +- 使用vectorclass库修复了构建 ([Babacar Diassé](https://github.com/ClickHouse/ClickHouse/pull/2274)). +- Cmake现在默认情况下为ninja生成文件(如使用 `-G Ninja`). +- 添加了使用libtinfo库而不是libtermcap的功能 ([Georgy Kondratiev](https://github.com/ClickHouse/ClickHouse/pull/2519)). +- 修复了Fedora Rawhide中的头文件冲突 ([\#2520](https://github.com/ClickHouse/ClickHouse/issues/2520)). + +#### 向后不兼容的更改: {#backward-incompatible-changes-7} + +- 删除逃逸 `Vertical` 和 `Pretty*` 格式和删除 `VerticalRaw` 格式。 +- 如果在分布式查询中同时使用版本1.1.54388(或更高版本)的服务器和版本较旧的服务器,并且查询具有 `cast(x, 'Type')` 表达式没有 `AS` 关键字并没有这个词 `cast` 以大写形式,将引发一个异常,并显示如下消息 `Not found column cast(0, 'UInt8') in block`. 解决方案:更新整个群集上的服务器。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682 {#clickhouse-release-1-1-54385-2018-06-01} + +#### 错误修复: {#bug-fixes-21} + +- 修复了在某些情况下导致ZooKeeper操作阻塞的错误。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54383-2018-05-22} + +#### 错误修复: {#bug-fixes-22} + +- 修正了如果一个表有许多副本,复制队列的放缓。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54381-2018-05-14} + +#### 错误修复: {#bug-fixes-23} + +- 修复了ClickHouse与ZooKeeper服务器断开连接时,ZooKeeper中的节点泄漏问题。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54380-2018-04-21} + +#### 新功能: {#new-features-13} + +- 增加了表功能 `file(path, format, structure)`. 从读取字节的示例 `/dev/urandom`: ``` ln -s /dev/urandom /var/lib/clickhouse/user_files/random``clickhouse-client -q "SELECT * FROM file('random', 'RowBinary', 'd UInt8') LIMIT 10" ```. + +#### 改进: {#improvements-12} + +- 子查询可以包装在 `()` 括号以增强查询的可读性。 例如: `(SELECT 1) UNION ALL (SELECT 1)`. +- 简单 `SELECT` 从查询 `system.processes` 表不包括在 `max_concurrent_queries` 限制。 + +#### 错误修复: {#bug-fixes-24} + +- 修正了不正确的行为 `IN` 从中选择时的运算符 `MATERIALIZED VIEW`. +- 修正了不正确的过滤分区索引的表达式,如 `partition_key_column IN (...)`. +- 固定无法执行 `OPTIMIZE` 在以下情况下对非领导副本进行查询 `REANAME` 在桌子上进行。 +- 修复了执行时的授权错误 `OPTIMIZE` 或 `ALTER` 对非领导副本的查询。 +- 固定的冻结 `KILL QUERY`. +- 修复了ZooKeeper客户端库中的错误,这导致了手表丢失,分布式的DDL队列冻结,并在复制队列中的速度变慢,如果非空 `chroot` 前缀在ZooKeeper配置中使用。 + +#### 向后不兼容的更改: {#backward-incompatible-changes-8} + +- 删除对如下表达式的支持 `(a, b) IN (SELECT (a, b))` (可以使用等效表达式 `(a, b) IN (SELECT a, b)`). 在以前的版本中,这些表达式导致未确定 `WHERE` 过滤或导致的错误。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54378-2018-04-16} + +#### 新功能: {#new-features-14} + +- 可以在不重新启动服务器的情况下更改日志记录级别。 +- 添加了 `SHOW CREATE DATABASE` 查询。 +- 该 `query_id` 可以传递给 `clickhouse-client` (肘部空间)。 +- 新设置: `max_network_bandwidth_for_all_users`. +- 增加了对 `ALTER TABLE ... PARTITION ...` 为 `MATERIALIZED VIEW`. +- 在系统表中以未压缩形式添加有关数据部件大小的信息。 +- 对分布式表的服务器到服务器加密支持 (`1` 在副本配置中 ``). +- 表级别的配置 `ReplicatedMergeTree` 家庭,以最大限度地减少存储在Zookeeper的数据量: : `use_minimalistic_checksums_in_zookeeper = 1` +- 的配置 `clickhouse-client` 提示。 默认情况下,服务器名称现在输出到提示符。 可以更改服务器的显示名称。 它也发送了 `X-ClickHouse-Display-Name` HTTP头(基里尔Shvakov)。 +- 多个逗号分隔 `topics` 可为指定 `Kafka` 发动机(托比亚斯\*亚当森) +- 当查询停止时 `KILL QUERY` 或 `replace_running_query`,客户端接收 `Query was canceled` 异常而不是不完整的结果。 + +#### 改进: {#improvements-13} + +- `ALTER TABLE ... DROP/DETACH PARTITION` 查询在复制队列的前面运行。 +- `SELECT ... FINAL` 和 `OPTIMIZE ... FINAL` 即使表具有单个数据部分,也可以使用。 +- A `query_log` 如果手动删除(基里尔Shvakov),则会在飞行中重新创建表格。 +- 该 `lengthUTF8` 功能运行速度更快(zhang2014)。 +- 在同步刀片的性能提高 `Distributed` 表 (`insert_distributed_sync = 1`)当有一个非常大的数量的碎片。 +- 服务器接受 `send_timeout` 和 `receive_timeout` 从客户端设置并在连接到客户端时应用它们(它们以相反的顺序应用:服务器套接字的 `send_timeout` 被设置为 `receive_timeout` 值,反之亦然)。 +- 更强大的崩溃恢复异步插入 `Distributed` 桌子 +- 的返回类型 `countEqual` 功能从更改 `UInt32` 到 `UInt64` (谢磊). + +#### 错误修复: {#bug-fixes-25} + +- 修正了一个错误 `IN` 当表达式的左侧是 `Nullable`. +- 使用元组时,现在返回正确的结果 `IN` 当某些元组组件位于表索引中时。 +- 该 `max_execution_time` limit现在可以正常使用分布式查询。 +- 在计算复合列的大小时修正错误 `system.columns` 桌子 +- 修复了创建临时表时的错误 `CREATE TEMPORARY TABLE IF NOT EXISTS.` +- 修正错误 `StorageKafka` (\#\#2075) +- 修复了某些聚合函数的无效参数导致的服务器崩溃。 +- 修正了防止错误 `DETACH DATABASE` 查询停止后台任务 `ReplicatedMergeTree` 桌子 +- `Too many parts` 插入到聚合实例化视图时,状态不太可能发生(\#\#2084)。 +- 如果替换必须在同一级别上跟随另一个替换,则更正了配置中替换的递归处理。 +- 更正了创建元数据文件时的语法 `VIEW` 这使用一个查询 `UNION ALL`. +- `SummingMergeTree` 现在可以正常使用复合键对嵌套数据结构进行求和。 +- 修复了在选择领导者时出现竞争条件的可能性 `ReplicatedMergeTree` 桌子 + +#### 构建更改: {#build-changes-5} + +- 构建支持 `ninja` 而不是 `make` 和用途 `ninja` 默认情况下,构建版本。 +- 重命名的软件包: `clickhouse-server-base` 在 `clickhouse-common-static`; `clickhouse-server-common` 在 `clickhouse-server`; `clickhouse-common-dbg` 在 `clickhouse-common-static-dbg`. 要安装,请使用 `clickhouse-server clickhouse-client`. 具有旧名称的软件包仍将加载到存储库中,以便向后兼容。 + +#### 向后不兼容的更改: {#backward-incompatible-changes-9} + +- 如果在左侧指定了数组,则删除了IN表达式的特殊解释。 以前,表达式 `arr IN (set)` 被解释为 “at least one `arr` element belongs to the `set`”. 要在新版本中获得相同的行为,请编写 `arrayExists(x -> x IN (set), arr)`. +- 禁用套接字选项的不正确使用 `SO_REUSEPORT`,默认情况下,Poco库中未正确启用。 请注意,在Linux上,不再有任何理由同时指定地址 `::` 和 `0.0.0.0` for listen – use just `::`,它允许监听通过IPv4和IPv6的连接(使用默认的内核配置设置)。 您还可以通过指定以下命令恢复到以前版本中的行为 `1` 在配置。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54370-2018-03-16} + +#### 新功能: {#new-features-15} + +- 添加了 `system.macros` 更改配置文件时,宏的表和自动更新。 +- 添加了 `SYSTEM RELOAD CONFIG` 查询。 +- 添加了 `maxIntersections(left_col, right_col)` 聚合函数,它返回同时相交间隔的最大数目 `[left; right]`. 该 `maxIntersectionsPosition(left, right)` 函数返回的开始 “maximum” 间隔。 ([Michael Furmur](https://github.com/ClickHouse/ClickHouse/pull/2012)). + +#### 改进: {#improvements-14} + +- 当在一个插入数据 `Replicated` 表,较少的请求是由 `ZooKeeper` (和大多数用户级错误已经从消失 `ZooKeeper` 日志)。 +- 添加了为数据集创建别名的功能。 示例: `WITH (1, 2, 3) AS set SELECT number IN set FROM system.numbers LIMIT 10`. + +#### 错误修复: {#bug-fixes-26} + +- 修正了 `Illegal PREWHERE` 从合并表读取时出错 `Distributed`桌子 +- 添加了修复,允许您在仅支持IPv4的Docker容器中启动clickhouse-server。 +- 修正了从系统读取时的争用条件 `system.parts_columns tables.` +- 同步插入到一个过程中删除双缓冲 `Distributed` 表,这可能导致连接超时。 +- 修正了一个错误,导致过长的等待不可用的副本开始之前 `SELECT` 查询。 +- 在固定不正确的日期 `system.parts` 桌子 +- 修正了一个错误,使得它无法在插入数据 `Replicated` 表if `chroot` 是非空的配置 `ZooKeeper` 集群。 +- 修正了一个空的垂直合并算法 `ORDER BY` 桌子 +- 恢复了在对远程表的查询中使用字典的能力,即使这些字典不存在于请求者服务器上。 此功能在版本1.1.54362中丢失。 +- 恢复查询的行为,如 `SELECT * FROM remote('server2', default.table) WHERE col IN (SELECT col2 FROM default.table)` 当右侧的 `IN` 应该使用远程 `default.table` 而不是当地的 此行为在版本1.1.54358中被破坏。 +- 删除了无关的错误级别日志记录 `Not found column ... in block`. + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-1-1-54362-2018-03-11} + +#### 新功能: {#new-features-16} + +- 聚合不 `GROUP BY` 对于一个空集(如 `SELECT count(*) FROM table WHERE 0`)现在返回一个结果,其中一行为聚合函数带有null值,符合SQL标准。 要恢复旧行为(返回一个空结果),请设置 `empty_result_for_aggregation_by_empty_set` 到1。 +- 增加了类型转换 `UNION ALL`. 不同的别名被允许 `SELECT` 在职位 `UNION ALL`,符合SQL标准。 +- 任意表达式支持 `LIMIT BY` 条款 以前,只能使用以下内容产生的列 `SELECT`. +- 的索引 `MergeTree` 表用于以下情况 `IN` 应用于来自主键列的表达式元组。 示例: `WHERE (UserID, EventDate) IN ((123, '2000-01-01'), ...)` (Anastasiya Tsarkova) +- 添加了 `clickhouse-copier` 用于在群集之间复制和重新分布数据的工具(测试版)。 +- 添加了一致的哈希函数: `yandexConsistentHash`, `jumpConsistentHash`, `sumburConsistentHash`. 它们可以用作分片密钥,以减少后续重新分片期间的网络流量。 +- 新增功能: `arrayAny`, `arrayAll`, `hasAny`, `hasAll`, `arrayIntersect`, `arrayResize`. +- 添加了 `arrayCumSum` 功能(哈维桑塔纳)。 +- 添加了 `parseDateTimeBestEffort`, `parseDateTimeBestEffortOrZero`,和 `parseDateTimeBestEffortOrNull` 用于从包含各种可能格式的文本的字符串中读取DateTime的函数。 +- 数据可以在更新期间从外部字典部分重新加载(加载只是记录,其中指定字段的值大于先前的下载)(Arsen Hakobyan)。 +- 添加了 `cluster` 表功能。 示例: `cluster(cluster_name, db, table)`. 该 `remote` 表函数可以接受集群名称作为第一个参数,如果它被指定为标识符。 +- 该 `remote` 和 `cluster` 表函数可用于 `INSERT` 查询。 +- 添加了 `create_table_query` 和 `engine_full` 虚拟列到 `system.tables`桌子 该 `metadata_modification_time` 列是虚拟的。 +- 添加了 `data_path` 和 `metadata_path` 列 `system.tables`和`system.databases` 表,并添加了 `path` 列到 `system.parts` 和 `system.parts_columns` 桌子 +- 添加了关于合并的其他信息 `system.part_log` 桌子 +- 一个任意的分区键可以用于 `system.query_log` 表(基里尔Shvakov)。 +- 该 `SHOW TABLES` 查询现在还显示临时表。 添加临时表和 `is_temporary` 列到 `system.tables` (张2014)。 +- 已添加 `DROP TEMPORARY TABLE` 和 `EXISTS TEMPORARY TABLE` 查询(zhang2014)。 +- 支持 `SHOW CREATE TABLE` 对于临时表(zhang2014)。 +- 添加了 `system_profile` 内部进程使用的设置的配置参数。 +- 支持加载 `object_id` 作为一个属性 `MongoDB` 字典(帕维尔\*利特维年科)。 +- 阅读 `null` 作为加载数据的外部字典与时的默认值 `MongoDB` 资料来源(帕维尔\*利特维年科)。 +- 阅读 `DateTime` 在值 `Values` 从不带单引号的Unix时间戳格式化。 +- 故障转移支持 `remote` 当某些副本缺少请求的表时,表函数。 +- 运行时可以在命令行中复盖配置设置 `clickhouse-server`. 示例: `clickhouse-server -- --logger.level=information`. +- 实施了 `empty` 从功能 `FixedString` 参数:如果字符串完全由空字节组成,则函数返回1(zhang2014)。 +- 添加了 `listen_try`如果某些地址无法侦听,则在不退出的情况下侦听至少一个侦听地址的配置参数(对于禁用IPv4或IPv6支持的系统非常有用)。 +- 添加了 `VersionedCollapsingMergeTree` 表引擎。 +- 对于行和任意数字类型的支持 `library` 字典源. +- `MergeTree` 表可以在没有主键的情况下使用(您需要指定 `ORDER BY tuple()`). +- A `Nullable` 类型可以是 `CAST` 到非-`Nullable` 如果参数不是,则键入 `NULL`. +- `RENAME TABLE` 可以进行 `VIEW`. +- 添加了 `throwIf` 功能。 +- 添加了 `odbc_default_field_size` 选项,它允许您扩展从ODBC源加载的值的最大大小(默认情况下为1024)。 +- 该 `system.processes` 表和 `SHOW PROCESSLIST` 现在有 `is_cancelled` 和 `peak_memory_usage` 列。 + +#### 改进: {#improvements-15} + +- 结果的限制和配额不再应用于以下内容的中间数据 `INSERT SELECT` 查询或 `SELECT` 子查询。 +- 更少的虚假触发 `force_restore_data` 当检查的状态 `Replicated` 服务器启动时的表。 +- 添加了 `allow_distributed_ddl` 选项。 +- 表达式中不允许使用非确定性函数 `MergeTree` 表键。 +- 从替换文件 `config.d` 目录按字母顺序加载。 +- 的改进的性能 `arrayElement` 函数在常量多维数组的情况下,以空数组作为元素之一。 示例: `[[1], []][x]`. +- 当使用具有非常大的替换(例如,非常大的IP网络列表)的配置文件时,服务器现在启动速度更快。 +- 运行查询时,表值函数运行一次。 前情提要, `remote` 和 `mysql` 表值函数执行两次相同的查询以从远程服务器检索表结构。 +- 该 `MkDocs` 使用文档生成器。 +- 当您尝试删除表列时 `DEFAULT`/`MATERIALIZED` 取决于其他列的表达式,会抛出异常(zhang2014)。 +- 增加了解析文本格式的空行作为数字0的能力 `Float` 数据类型。 此功能以前可用,但在版本1.1.54342中丢失。 +- `Enum` 值可以用于 `min`, `max`, `sum` 和其他一些功能。 在这些情况下,它使用相应的数值。 此功能以前可用,但在版本1.1.54337中丢失。 +- 已添加 `max_expanded_ast_elements` 递归扩展别名后限制AST的大小。 + +#### 错误修复: {#bug-fixes-27} + +- 修复了错误地从子查询中删除不必要的列或未从包含以下内容的子查询中删除不必要列的情况 `UNION ALL`. +- 修正了合并的错误 `ReplacingMergeTree` 桌子 +- 在固定的同步插入 `Distributed` 表 (`insert_distributed_sync = 1`). +- 固定段错误的某些用途 `FULL` 和 `RIGHT JOIN` 在子查询中使用重复的列。 +- 固定段错误的某些用途 `replace_running_query` 和 `KILL QUERY`. +- 固定的顺序 `source` 和 `last_exception` 在列 `system.dictionaries` 桌子 +- 修正了一个错误,当 `DROP DATABASE` 查询没有删除带有元数据的文件。 +- 修正了 `DROP DATABASE` 查询为 `Dictionary` 数据库。 +- 固定的低精度 `uniqHLL12` 和 `uniqCombined` 功能基数大于100万个项目(Alex克斯Bocharov)。 +- 修复了在必要时计算隐式默认值,以便同时计算默认显式表达式 `INSERT` 查询(zhang2014)。 +- 修正了一个罕见的情况下,当一个查询 `MergeTree` 表不能完成(陈星-xc)。 +- 修正了运行时发生的崩溃 `CHECK` 查询为 `Distributed` 如果所有分片都是本地的(chenxing.xc)。 +- 修复了使用正则表达式的函数的轻微性能回归。 +- 修复了从复杂表达式创建多维数组时的性能回归。 +- 修正了一个错误,可能会导致一个额外的 `FORMAT` 部分出现在一个 `.sql` 具有元数据的文件。 +- 修复了导致 `max_table_size_to_drop` 尝试删除时应用的限制 `MATERIALIZED VIEW` 查看显式指定的表。 +- 修复了与旧客户端的不兼容性(旧客户端有时会发送数据 `DateTime('timezone')` 类型,他们不明白)。 +- 修复了阅读时的错误 `Nested` 使用以下方式添加的结构的列元素 `ALTER` 但是,这是空的旧分区,当这些列的条件移动到 `PREWHERE`. +- 修正了通过虚拟过滤表时的错误 `_table` 查询中的列 `Merge` 桌子 +- 修复了使用时的错误 `ALIAS` 列 `Distributed` 桌子 +- 修正了一个错误,使得动态编译不可能从聚合函数的查询 `quantile` 家人 +- 修复了查询执行管道中极少数情况下使用时发生的争用条件 `Merge` 具有大量表的表,并且当使用 `GLOBAL` 子查询。 +- 修复了将不同大小的数组传递给 `arrayReduce` 使用来自多个参数的聚合函数时的函数。 +- 禁止使用与查询 `UNION ALL` 在一个 `MATERIALIZED VIEW`. +- 修正了初始化过程中的错误 `part_log` 服务器启动时的系统表(默认情况下, `part_log` 被禁用)。 + +#### 向后不兼容的更改: {#backward-incompatible-changes-10} + +- 删除了 `distributed_ddl_allow_replicated_alter` 选项。 默认情况下启用此行为。 +- 删除了 `strict_insert_defaults` 设置。 如果您使用此功能,请写入 `clickhouse-feedback@yandex-team.com`. +- 删除了 `UnsortedMergeTree` 引擎 + +### 碌莽禄,拢,010-68520682\戮漏鹿芦,酶,虏卤赂拢,110102005602 {#clickhouse-release-1-1-54343-2018-02-05} + +- 在分布式DDL查询和分布式表的构造函数中添加了用于定义集群名称的宏支持: `CREATE TABLE distr ON CLUSTER '{cluster}' (...) ENGINE = Distributed('{cluster}', 'db', 'table')`. +- 现在像查询 `SELECT ... FROM table WHERE expr IN (subquery)` 使用处理 `table` 指数。 +- 在插入到复制表时改进了重复项的处理,因此它们不再减慢复制队列的执行速度。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682 {#clickhouse-release-1-1-54342-2018-01-22} + +此版本包含以前版本1.1.54337的错误修复: + +- 修正了1.1.54337中的回归:如果默认用户具有只读访问权限,则服务器拒绝启动消息 `Cannot create database in readonly mode`. +- 修正了1.1.54337中的回归:在具有systemd的系统上,无论配置如何,日志总是写入syslog;看门狗脚本仍然使用init。d。 +- 修正了1.1.54337中的回归:Docker映像中错误的默认配置。 +- 修正GraphiteMergeTree的非确定性行为(您可以在日志消息中看到它 `Data after merge is not byte-identical to the data on another replicas`). +- 修复了优化查询到复制表后可能导致合并不一致的错误(您可能会在日志消息中看到它 `Part ... intersects the previous part`). +- 当目标表中存在具体化列时,缓冲区表现在可以正常工作(by zhang2014)。 +- 修复了实现NULL的错误。 + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-1-1-54337-2018-01-18} + +#### 新功能: {#new-features-17} + +- 增加了对多维数组和元组存储的支持 (`Tuple` 表中的数据类型)。 +- 支持表函数 `DESCRIBE` 和 `INSERT` 查询。 增加了对子查询的支持 `DESCRIBE`. 例: `DESC TABLE remote('host', default.hits)`; `DESC TABLE (SELECT 1)`; `INSERT INTO TABLE FUNCTION remote('host', default.hits)`. 支持 `INSERT INTO TABLE` 除了 `INSERT INTO`. +- 改进了对时区的支持。 该 `DateTime` 数据类型可以使用用于以文本格式进行分析和格式化的时区进行注释。 示例: `DateTime('Europe/Moscow')`. 当在函数中指定时区时 `DateTime` 参数,返回类型将跟踪时区,并且值将按预期显示。 +- 新增功能 `toTimeZone`, `timeDiff`, `toQuarter`, `toRelativeQuarterNum`. 该 `toRelativeHour`/`Minute`/`Second` 函数可以采用类型的值 `Date` 作为参数。 该 `now` 函数名称区分大小写。 +- 添加了 `toStartOfFifteenMinutes` 功能(基里尔Shvakov)。 +- 添加了 `clickhouse format` 用于格式化查询的工具。 +- 添加了 `format_schema_path` configuration parameter (Marek Vavruşa). It is used for specifying a schema in `Cap'n Proto` 格式。 架构文件只能位于指定的目录中。 +- 增加了对配置替换的支持 (`incl` 和 `conf.d`)外部字典和模型的配置(帕维尔\*亚库宁)。 +- 添加了一列文档 `system.settings` 表(基里尔Shvakov)。 +- 添加了 `system.parts_columns` 表中的每个数据部分的列大小信息 `MergeTree` 桌子 +- 添加了 `system.models` 包含已加载信息的表 `CatBoost` 机器学习模型。 +- 添加了 `mysql` 和 `odbc` 表函数和对应 `MySQL` 和 `ODBC` 用于访问远程数据库的表引擎。 此功能处于测试阶段。 +- 增加了传递类型参数的可能性 `AggregateFunction` 为 `groupArray` 聚合函数(这样你就可以创建一些聚合函数的状态数组)。 +- 删除了对聚合函数组合器的各种组合的限制。 例如,您可以使用 `avgForEachIf` 以及 `avgIfForEach` 聚合函数,它们具有不同的行为。 +- 该 `-ForEach` 聚合函数combinator是针对多个参数的聚合函数的情况进行扩展的。 +- 增加了对聚合函数的支持 `Nullable` 即使是函数返回非参数的情况-`Nullable` 结果(添加Silviu Caragea的贡献)。 示例: `groupArray`, `groupUniqArray`, `topK`. +- 添加了 `max_client_network_bandwidth` 为 `clickhouse-client` (基里尔\*什瓦科夫)。 +- 用户与 `readonly = 2` setting are allowed to work with TEMPORARY tables (CREATE, DROP, INSERT…) (Kirill Shvakov). +- 增加了对使用多个消费者的支持 `Kafka` 引擎 扩展的配置选项 `Kafka` (Marek Vavruša). +- 添加了 `intExp3` 和 `intExp4` 功能。 +- 添加了 `sumKahan` 聚合函数。 +- 添加了to\*Number\*OrNull函数,其中\*Number\*是数字类型。 +- 增加了对 `WITH` a的子句 `INSERT SELECT` 查询(作者:zhang2014)。 +- 添加设置: `http_connection_timeout`, `http_send_timeout`, `http_receive_timeout`. 特别是,这些设置用于下载用于复制的数据部分。 如果网络过载,更改这些设置可以更快地进行故障转移。 +- 增加了对 `ALTER` 对于类型的表 `Null` (Anastasiya Tsarkova) +- 该 `reinterpretAsString` 函数扩展为连续存储在内存中的所有数据类型。 +- 添加了 `--silent` 选项的 `clickhouse-local` 工具 它禁止在stderr中打印查询执行信息。 +- 增加了对读取类型值的支持 `Date` 从使用单个数字而不是两个数字(Amos Bird)指定月份和/或月份日的格式的文本。 + +#### 性能优化: {#performance-optimizations} + +- 改进聚合函数的性能 `min`, `max`, `any`, `anyLast`, `anyHeavy`, `argMin`, `argMax` 从字符串参数。 +- 改进功能的性能 `isInfinite`, `isFinite`, `isNaN`, `roundToExp2`. +- 改进了解析和格式化的性能 `Date` 和 `DateTime` 以文本格式键入值。 +- 改进了解析浮点数的性能和精度。 +- 降低内存使用量 `JOIN` 在左部分和右部分具有不包含在相同名称的列的情况下 `USING` . +- 改进聚合函数的性能 `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr` 通过降低计算稳定性。 旧函数的名称下可用 `varSampStable`, `varPopStable`, `stddevSampStable`, `stddevPopStable`, `covarSampStable`, `covarPopStable`, `corrStable`. + +#### 错误修复: {#bug-fixes-28} + +- 固定数据重复数据删除运行后 `DROP` 或 `DETACH PARTITION` 查询。 在以前的版本中,删除分区并再次插入相同的数据不起作用,因为插入的块被认为是重复的。 +- 修复了可能导致错误解释的错误 `WHERE` 条款 `CREATE MATERIALIZED VIEW` 查询与 `POPULATE` . +- 修正了在使用 `root_path` 在参数 `zookeeper_servers` 配置。 +- 通过固定意外的结果 `Date` 论据 `toStartOfDay` . +- 修正了 `addMonths` 和 `subtractMonths` 函数和算术 `INTERVAL n MONTH` 在情况下,当结果有前一年。 +- 增加了缺少的支持 `UUID` 数据类型 `DISTINCT` , `JOIN` ,和 `uniq` 聚合函数和外部字典(叶夫根尼伊万诺夫)。 支持 `UUID` 仍然是不完整的。 +- 固定 `SummingMergeTree` 行为的情况下,当行相加为零。 +- 各种修复 `Kafka` engine (Marek Vavruša). +- 修正了不正确的行为 `Join` 表引擎(阿莫斯鸟)。 +- 修复了FreeBSD和OS X下不正确的分配器行为。 +- 该 `extractAll` 函数现在支持空匹配。 +- 修复了阻止使用的错误 `libressl` 而不是 `openssl` . +- 修正了 `CREATE TABLE AS SELECT` 从临时表查询。 +- 修复了更新复制队列的非原子性。 这可能导致副本在服务器重新启动之前不同步。 +- 修正了可能的溢出 `gcd` , `lcm` 和 `modulo` (`%` 运营商)(Maks Skorokhod)。 +- `-preprocessed` 现在更改后创建文件 `umask` (`umask` 可以在配置中更改)。 +- 修正了部分的背景检查中的错误 (`MergeTreePartChecker` )使用自定义分区密钥时。 +- 元组的固定解析(的值 `Tuple` 数据类型)的文本格式。 +- 改进了有关传递到的不兼容类型的错误消息 `multiIf` , `array` 和其他一些功能。 +- 重新设计的支持 `Nullable` 类型。 修复了可能导致服务器崩溃的错误。 修正了与几乎所有其他错误 `NULL` 支持:insert SELECT中的类型转换不正确,HAVING和PREWHERE中对Nullable的支持不足, `join_use_nulls` 模式,可以为Null的类型作为参数 `OR` 操作员等。 +- 修正了与数据类型的内部语义相关的各种错误。 例子:不必要的总结 `Enum` 输入字段 `SummingMergeTree` ;对齐 `Enum` 类型 `Pretty` 格式等。 +- 对复合列的允许组合进行更严格的检查。 +- 修复了指定一个非常大的参数时的溢出 `FixedString` 数据类型。 +- 修正了一个错误 `topK` 一般情况下的聚合函数。 +- 在聚合函数的n元变体的参数中添加了对数组大小相等性的缺失检查。 `-Array` combinator +- 修正了一个错误 `--pager` 为 `clickhouse-client` (作者:ks1322)。 +- 固定的精度 `exp10` 功能。 +- 固定的行为 `visitParamExtract` 功能更好地符合文档。 +- 修复了指定不正确的数据类型时的崩溃。 +- 固定的行为 `DISTINCT` 在所有列都是常量的情况下。 +- 在使用的情况下固定的查询格式 `tupleElement` 使用复数常量表达式作为元组元素索引的函数。 +- 修正了一个错误 `Dictionary` 表 `range_hashed` 字典 +- 修正了导致结果中的过多行的错误 `FULL` 和 `RIGHT JOIN` (阿莫斯鸟)。 +- 修复了在创建和删除临时文件时的服务器崩溃 `config.d` 配置重新加载期间的目录。 +- 修正了 `SYSTEM DROP DNS CACHE` 查询:缓存已刷新,但群集节点的地址未更新。 +- 固定的行为 `MATERIALIZED VIEW` 执行后 `DETACH TABLE` for the table under the view (Marek Vavruša). + +#### 构建改进: {#build-improvements-4} + +- 该 `pbuilder` 工具用于构建。 构建过程几乎完全独立于构建主机环境。 +- 单个构建用于不同的操作系统版本。 软件包和二进制文件已经与各种Linux系统兼容。 +- 添加了 `clickhouse-test` 包。 它可用于运行功能测试。 +- 现在可以将源代码包发布到存储库。 它可以用来在不使用GitHub的情况下重现构建。 +- 增加了有限的集成与特拉维斯CI。 由于Travis中的构建时间限制,仅测试调试构建并运行有限的测试子集。 +- 增加了对 `Cap'n'Proto` 在默认构建中。 +- 更改文档来源的格式 `Restricted Text` 到 `Markdown`. +- 增加了对 `systemd` (弗拉基米尔\*斯米尔诺夫)。 默认情况下,由于与某些操作系统映像不兼容,它被禁用,并且可以手动启用。 +- 用于动态代码生成, `clang` 和 `lld` 嵌入到 `clickhouse` 二进制 它们也可以被调用为 `clickhouse clang` 和 `clickhouse lld` . +- 从代码中删除GNU扩展的使用。 启用 `-Wextra` 选项。 当与建设 `clang` 默认值为 `libc++` 而不是 `libstdc++`. +- 提取 `clickhouse_parsers` 和 `clickhouse_common_io` 库,以加快各种工具的构建。 + +#### 向后不兼容的更改: {#backward-incompatible-changes-11} + +- 标记的格式 `Log` 键入包含以下内容的表 `Nullable` 列以向后不兼容的方式进行了更改。 如果你有这些表,你应该将它们转换为 `TinyLog` 在启动新服务器版本之前键入。 要做到这一点,替换 `ENGINE = Log` 与 `ENGINE = TinyLog` 在相应的 `.sql` 文件中的 `metadata` 目录。 如果你的桌子没有 `Nullable` 列或表的类型不是 `Log`,那么你什么都不需要做。 +- 删除了 `experimental_allow_extended_storage_definition_syntax` 设置。 现在,此功能默认启用。 +- 该 `runningIncome` 函数重命名为 `runningDifferenceStartingWithFirstvalue` 为了避免混confusion。 +- 删除了 `FROM ARRAY JOIN arr` 在FROM with no table(Amos Bird)之后直接指定数组连接时的语法。 +- 删除了 `BlockTabSeparated` 仅用于演示目的的格式。 +- 更改聚合函数的状态格式 `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. 如果您已将这些聚合函数的状态存储在表中(使用 `AggregateFunction` 数据类型或具体化视图与相应的状态),请写信给clickhouse-feedback@yandex-team.com. +- 在以前的服务器版本中,有一个未记录的功能:如果聚合函数依赖于参数,则仍然可以在AggregateFunction数据类型中指定它而不带参数。 示例: `AggregateFunction(quantiles, UInt64)` 而不是 `AggregateFunction(quantiles(0.5, 0.9), UInt64)`. 此功能已丢失。 虽然它没有记录,但我们计划在未来的版本中再次支持它。 +- 枚举数据类型不能用于最小/最大聚合函数。 这种能力将在下一个版本中返回。 + +#### 升级时请注意: {#please-note-when-upgrading} + +- 当在群集上执行滚动更新时,当某些副本运行旧版本的ClickHouse,而某些副本运行新版本时,复制会暂时停止,并且消息 `unknown parameter 'shard'` 出现在日志中。 更新集群的所有副本后,复制将继续。 +- 如果群集服务器上运行不同版本的ClickHouse,则使用以下函数的分布式查询可能会产生不正确的结果: `varSamp`, `varPop`, `stddevSamp`, `stddevPop`, `covarSamp`, `covarPop`, `corr`. 您应该更新所有群集节点。 + +## [更新日志2017](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2017.md) {#changelog-for-2017} diff --git a/docs/zh/whats_new/changelog/2019.md b/docs/zh/whats_new/changelog/2019.md new file mode 100644 index 00000000000..f776141b14a --- /dev/null +++ b/docs/zh/whats_new/changelog/2019.md @@ -0,0 +1,2074 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_priority: 77 +toc_title: '2019' +--- + +## 碌莽禄.拢.0755-88888888 {#clickhouse-release-v19-17} + +### ClickHouse释放v19.17.6.36,2019-12-27 {#clickhouse-release-v19-17-6-36-2019-12-27} + +#### 错误修复 {#bug-fix} + +- 在解压缩固定潜在的缓冲区溢出。 恶意用户可以传递制造的压缩数据,可能导致缓冲区后读取。 这个问题是由Yandex信息安全团队的Eldar Zaitov发现的。 [\#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了可能的服务器崩溃 (`std::terminate`)当服务器不能发送或写入JSON或XML格式的数据与字符串数据类型的值(需要UTF-8验证),或者当压缩结果数据与Brotli算法或在其他一些罕见的情况下。 [\#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从clickhouse源固定字典 `VIEW`,现在阅读这样的字典不会导致错误 `There is no query`. [\#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复了用户中指定的host\_regexp是否允许客户端主机的检查。xml [\#8241](https://github.com/ClickHouse/ClickHouse/pull/8241), [\#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- `RENAME TABLE` 对于分布式表,现在在发送到分片之前重命名包含插入数据的文件夹。 这解决了连续重命名的问题 `tableA->tableB`, `tableC->tableA`. [\#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([tavplubix](https://github.com/tavplubix)) +- `range_hashed` DDL查询创建的外部字典现在允许任意数字类型的范围。 [\#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([阿利沙平](https://github.com/alesapin)) +- 固定 `INSERT INTO table SELECT ... FROM mysql(...)` 表功能。 [\#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([tavplubix](https://github.com/tavplubix)) +- 修复段错误 `INSERT INTO TABLE FUNCTION file()` 同时插入到一个不存在的文件。 现在在这种情况下,文件将被创建,然后插入将被处理。 [\#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Olga Khvostikova](https://github.com/stavrolia)) +- 修正了聚合位图和标量位图相交时的位图和错误。 [\#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([黄月](https://github.com/moon03432)) +- 修复段错误时 `EXISTS` 查询没有使用 `TABLE` 或 `DICTIONARY` 预选赛,就像 `EXISTS t`. [\#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 函数的固定返回类型 `rand` 和 `randConstant` 在可为空的参数的情况下。 现在函数总是返回 `UInt32` 而且从来没有 `Nullable(UInt32)`. [\#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 固定 `DROP DICTIONARY IF EXISTS db.dict`,现在它不会抛出异常,如果 `db` 根本不存在 [\#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 如果由于服务器崩溃而未完全删除表,服务器将尝试恢复并加载它 [\#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([tavplubix](https://github.com/tavplubix)) +- 修正了一个简单的计数查询分布式表,如果有两个以上的分片本地表。 [\#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) +- 修正了导致DB::BlockStreamProfileInfo::calculateRowsBeforeLimit数据竞赛的错误() [\#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 固定 `ALTER table MOVE part` 在合并指定部件后立即执行,这可能导致移动指定部件合并到的部件。 现在它正确地移动指定的部分。 [\#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 字典的表达式现在可以指定为字符串。 这对于从非ClickHouse源中提取数据时计算属性非常有用,因为它允许对这些表达式使用非ClickHouse语法。 [\#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([阿利沙平](https://github.com/alesapin)) +- 修正了一个非常罕见的比赛 `clickhouse-copier` 由于ZXid的溢出。 [\#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([丁香飞](https://github.com/dingxiangfei2009)) +- 修复了查询失败后的错误(由于 “Too many simultaneous queries” 例如)它不会读取外部表信息,并且 + 下一个请求会将此信息解释为下一个查询的开始,导致如下错误 `Unknown packet from client`. [\#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Azat Khuzhin](https://github.com/azat)) +- 避免空取消引用后 “Unknown packet X from server” [\#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Azat Khuzhin](https://github.com/azat)) +- 恢复对所有ICU区域设置的支持,添加对常量表达式应用排序规则的能力,并将语言名称添加到系统。排序规则表。 [\#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([阿利沙平](https://github.com/alesapin)) +- 用于读取的流数 `StorageFile` 和 `StorageHDFS` 现在是有限的,以避免超过内存限制。 [\#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([阿利沙平](https://github.com/alesapin)) +- 固定 `CHECK TABLE` 查询为 `*MergeTree` 表没有关键. [\#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([阿利沙平](https://github.com/alesapin)) +- 如果没有突变,则从部件名称中删除突变编号。 这种删除提高了与旧版本的兼容性。 [\#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([阿利沙平](https://github.com/alesapin)) +- 修复了某些附加部分因data\_version大于表突变版本而跳过突变的问题。 [\#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([余志昌](https://github.com/yuzhichang)) +- 允许在将部件移动到其他设备后使用冗余副本启动服务器。 [\#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修正了错误 “Sizes of columns doesn’t match” 使用聚合函数列时可能会出现。 [\#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Boris Granveaud](https://github.com/bgranvea)) +- 现在在使用WITH TIES和LIMIT BY的情况下,将抛出一个异常。 现在可以使用TOP with LIMIT BY。 [\#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 修复字典重新加载,如果它有 `invalidate_query`,停止更新,并在以前的更新尝试一些异常。 [\#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([阿利沙平](https://github.com/alesapin)) + +### ClickHouse释放v19.17.4.11时,2019-11-22 {#clickhouse-release-v19-17-4-11-2019-11-22} + +#### 向后不兼容的更改 {#backward-incompatible-change} + +- 使用列而不是AST来存储标量子查询结果以获得更好的性能。 设置 `enable_scalar_subquery_optimization` 在19.17中添加,默认情况下启用。 它会导致以下错误 [这](https://github.com/ClickHouse/ClickHouse/issues/7851) 在从以前的版本升级到19.17.2或19.17.3期间。 默认情况下,19.17.4中禁用此设置,以便可以从19.16及更早版本升级而不会出现错误。 [\#7392](https://github.com/ClickHouse/ClickHouse/pull/7392) ([阿莫斯鸟](https://github.com/amosbird)) + +#### 新功能 {#new-feature} + +- 添加使用DDL查询创建字典的功能。 [\#7360](https://github.com/ClickHouse/ClickHouse/pull/7360) ([阿利沙平](https://github.com/alesapin)) +- 赂眉露\>\> `bloom_filter` 支持的索引类型 `LowCardinality` 和 `Nullable` [\#7363](https://github.com/ClickHouse/ClickHouse/issues/7363) [\#7561](https://github.com/ClickHouse/ClickHouse/pull/7561) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 添加功能 `isValidJSON` 要检查传递的字符串是否是有效的json。 [\#5910](https://github.com/ClickHouse/ClickHouse/issues/5910) [\#7293](https://github.com/ClickHouse/ClickHouse/pull/7293) ([Vdimir](https://github.com/Vdimir)) +- 执行 `arrayCompact` 功能 [\#7328](https://github.com/ClickHouse/ClickHouse/pull/7328) ([备忘录](https://github.com/Joeywzr)) +- 创建函数 `hex` 对于十进制数。 它的工作原理如下 `hex(reinterpretAsString())`,但不会删除最后的零字节。 [\#7355](https://github.com/ClickHouse/ClickHouse/pull/7355) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 添加 `arrayFill` 和 `arrayReverseFill` 函数,用数组中其他元素替换它们前面/后面的元素。 [\#7380](https://github.com/ClickHouse/ClickHouse/pull/7380) ([hcz](https://github.com/hczhcz)) +- 添加 `CRC32IEEE()`/`CRC64()` 碌莽禄support: [\#7480](https://github.com/ClickHouse/ClickHouse/pull/7480) ([Azat Khuzhin](https://github.com/azat)) +- 执行 `char` 功能类似于一个 [mysql](https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char) [\#7486](https://github.com/ClickHouse/ClickHouse/pull/7486) ([sundyli](https://github.com/sundy-li)) +- 添加 `bitmapTransform` 功能。 它将位图中的值数组转换为另一个值数组,结果是一个新的位图 [\#7598](https://github.com/ClickHouse/ClickHouse/pull/7598) ([余志昌](https://github.com/yuzhichang)) +- 已实施 `javaHashUTF16LE()` 功能 [\#7651](https://github.com/ClickHouse/ClickHouse/pull/7651) ([achimbab](https://github.com/achimbab)) +- 添加 `_shard_num` 分布式引擎的虚拟列 [\#7624](https://github.com/ClickHouse/ClickHouse/pull/7624) ([Azat Khuzhin](https://github.com/azat)) + +#### 实验特点 {#experimental-feature} + +- 支持处理器(新的查询执行管道) `MergeTree`. [\#7181](https://github.com/ClickHouse/ClickHouse/pull/7181) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +#### 错误修复 {#bug-fix-1} + +- 修复不正确的浮点解析 `Values` [\#7817](https://github.com/ClickHouse/ClickHouse/issues/7817) [\#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([tavplubix](https://github.com/tavplubix)) +- 修复启用trace\_log时可能发生的罕见死锁。 [\#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([filimonov](https://github.com/filimonov)) +- 当生成Kafka表时有任何从中选择的Mv时,防止消息重复 [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([伊万](https://github.com/abyss7)) +- 支持 `Array(LowCardinality(Nullable(String)))` 在 `IN`. 决定 [\#7364](https://github.com/ClickHouse/ClickHouse/issues/7364) [\#7366](https://github.com/ClickHouse/ClickHouse/pull/7366) ([achimbab](https://github.com/achimbab)) +- 添加处理 `SQL_TINYINT` 和 `SQL_BIGINT`,并修复处理 `SQL_FLOAT` ODBC桥中的数据源类型。 [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) +- 修复聚合 (`avg` 和分位数)在空的十进制列 [\#7431](https://github.com/ClickHouse/ClickHouse/pull/7431) ([安德烈\*科尼亚耶夫](https://github.com/akonyaev90)) +- 修复 `INSERT` 变成分布式 `MATERIALIZED` 列 [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) +- 赂眉露\>\> `MOVE PARTITION` 如果分区的某些部分已经在目标磁盘或卷上,则可以工作 [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修正了在突变过程中无法创建硬链接的错误 `ReplicatedMergeTree` 在多磁盘配置。 [\#7558](https://github.com/ClickHouse/ClickHouse/pull/7558) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复了当整个部分保持不变并且在另一个磁盘上找到最佳空间时,MergeTree上出现突变的错误 [\#7602](https://github.com/ClickHouse/ClickHouse/pull/7602) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修正错误 `keep_free_space_ratio` 未从磁盘读取配置 [\#7645](https://github.com/ClickHouse/ClickHouse/pull/7645) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修正错误与表只包含 `Tuple` 列或具有复杂路径的列。 修复 [7541](https://github.com/ClickHouse/ClickHouse/issues/7541). [\#7545](https://github.com/ClickHouse/ClickHouse/pull/7545) ([阿利沙平](https://github.com/alesapin)) +- 在max\_memory\_usage限制中不考虑缓冲区引擎的内存 [\#7552](https://github.com/ClickHouse/ClickHouse/pull/7552) ([Azat Khuzhin](https://github.com/azat)) +- 修复最终标记用法 `MergeTree` 表排序 `tuple()`. 在极少数情况下,它可能会导致 `Can't adjust last granule` 选择时出错。 [\#7639](https://github.com/ClickHouse/ClickHouse/pull/7639) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复了需要上下文操作(例如json函数)的谓词突变中的错误,这可能会导致崩溃或奇怪的异常。 [\#7664](https://github.com/ClickHouse/ClickHouse/pull/7664) ([阿利沙平](https://github.com/alesapin)) +- 修复转义的数据库和表名称不匹配 `data/` 和 `shadow/` 目录 [\#7575](https://github.com/ClickHouse/ClickHouse/pull/7575) ([Alexander Burmak](https://github.com/Alex-Burmak)) +- Support duplicated keys in RIGHT\|FULL JOINs, e.g. `ON t.x = u.x AND t.x = u.y`. 在这种情况下修复崩溃。 [\#7586](https://github.com/ClickHouse/ClickHouse/pull/7586) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复 `Not found column in block` 当加入表达式与权利或完全连接。 [\#7641](https://github.com/ClickHouse/ClickHouse/pull/7641) ([Artem Zuikov](https://github.com/4ertus2)) +- 再次尝试修复无限循环 `PrettySpace` 格式 [\#7591](https://github.com/ClickHouse/ClickHouse/pull/7591) ([Olga Khvostikova](https://github.com/stavrolia)) +- 修复bug `concat` 函数时,所有的参数 `FixedString` 同样大小的 [\#7635](https://github.com/ClickHouse/ClickHouse/pull/7635) ([阿利沙平](https://github.com/alesapin)) +- 在定义S3,URL和HDFS存储时使用1个参数的情况下修复了异常。 [\#7618](https://github.com/ClickHouse/ClickHouse/pull/7618) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复查询视图的InterpreterSelectQuery的范围 [\#7601](https://github.com/ClickHouse/ClickHouse/pull/7601) ([Azat Khuzhin](https://github.com/azat)) + +#### 改进 {#improvement} + +- `Nullable` ODBC-bridge可识别的列和正确处理的NULL值 [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 以原子方式写入分布式发送的当前批次 [\#7600](https://github.com/ClickHouse/ClickHouse/pull/7600) ([Azat Khuzhin](https://github.com/azat)) +- 如果我们无法在查询中检测到列名称的表,则引发异常。 [\#7358](https://github.com/ClickHouse/ClickHouse/pull/7358) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加 `merge_max_block_size` 设置为 `MergeTreeSettings` [\#7412](https://github.com/ClickHouse/ClickHouse/pull/7412) ([Artem Zuikov](https://github.com/4ertus2)) +- 查询与 `HAVING` 而没有 `GROUP BY` 假设按常量分组。 所以, `SELECT 1 HAVING 1` 现在返回一个结果。 [\#7496](https://github.com/ClickHouse/ClickHouse/pull/7496) ([阿莫斯鸟](https://github.com/amosbird)) +- 支持解析 `(X,)` 作为类似python的元组。 [\#7501](https://github.com/ClickHouse/ClickHouse/pull/7501), [\#7562](https://github.com/ClickHouse/ClickHouse/pull/7562) ([阿莫斯鸟](https://github.com/amosbird)) +- 赂眉露\>\> `range` 函数行为几乎像pythonic。 [\#7518](https://github.com/ClickHouse/ClickHouse/pull/7518) ([sundyli](https://github.com/sundy-li)) +- 添加 `constraints` 列到表 `system.settings` [\#7553](https://github.com/ClickHouse/ClickHouse/pull/7553) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- Tcp处理程序的更好的Null格式,以便可以使用 `select ignore() from table format Null` 通过clickhouse-client进行性能测量 [\#7606](https://github.com/ClickHouse/ClickHouse/pull/7606) ([阿莫斯鸟](https://github.com/amosbird)) +- 查询如 `CREATE TABLE ... AS (SELECT (1, 2))` 正确解析 [\#7542](https://github.com/ClickHouse/ClickHouse/pull/7542) ([hcz](https://github.com/hczhcz)) + +#### 性能改进 {#performance-improvement} + +- 改进了对短字符串键的聚合性能。 [\#6243](https://github.com/ClickHouse/ClickHouse/pull/6243) ([Alexander Kuzmenkov](https://github.com/akuzm), [阿莫斯鸟](https://github.com/amosbird)) +- 运行另一次语法/表达式分析以在常量谓词折叠后获得潜在的优化。 [\#7497](https://github.com/ClickHouse/ClickHouse/pull/7497) ([阿莫斯鸟](https://github.com/amosbird)) +- 使用存储元信息来评估琐碎 `SELECT count() FROM table;` [\#7510](https://github.com/ClickHouse/ClickHouse/pull/7510) ([阿莫斯鸟](https://github.com/amosbird), [阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 矢量化处理 `arrayReduce` 与聚合器类似 `addBatch`. [\#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([阿莫斯鸟](https://github.com/amosbird)) +- 在性能的小改进 `Kafka` 消费 [\#7475](https://github.com/ClickHouse/ClickHouse/pull/7475) ([伊万](https://github.com/abyss7)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement} + +- 添加对交叉编译的支持到CPU架构AARCH64。 重构打包器脚本。 [\#7370](https://github.com/ClickHouse/ClickHouse/pull/7370) [\#7539](https://github.com/ClickHouse/ClickHouse/pull/7539) ([伊万](https://github.com/abyss7)) +- 在构建软件包时,将darwin-x86\_64和linux-aarch64工具链解压缩到已挂载的Docker卷中 [\#7534](https://github.com/ClickHouse/ClickHouse/pull/7534) ([伊万](https://github.com/abyss7)) +- 更新二进制打包器的Docker映像 [\#7474](https://github.com/ClickHouse/ClickHouse/pull/7474) ([伊万](https://github.com/abyss7)) +- 修复了MacOS Catalina上的编译错误 [\#7585](https://github.com/ClickHouse/ClickHouse/pull/7585) ([欧内斯特\*波列塔耶夫](https://github.com/ernestp)) +- 查询分析逻辑中的一些重构:将复杂的类拆分为几个简单的类。 [\#7454](https://github.com/ClickHouse/ClickHouse/pull/7454) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复没有子模块的构建 [\#7295](https://github.com/ClickHouse/ClickHouse/pull/7295) ([proller](https://github.com/proller)) +- 更好 `add_globs` 在CMake文件中 [\#7418](https://github.com/ClickHouse/ClickHouse/pull/7418) ([阿莫斯鸟](https://github.com/amosbird)) +- 删除硬编码路径 `unwind` 目标 [\#7460](https://github.com/ClickHouse/ClickHouse/pull/7460) ([Konstantin Podshumok](https://github.com/podshumok)) +- 允许在没有ssl的情况下使用mysql格式 [\#7524](https://github.com/ClickHouse/ClickHouse/pull/7524) ([proller](https://github.com/proller)) + +#### 其他 {#other} + +- 为ClickHouse SQL方言添加了ANTLR4语法 [\#7595](https://github.com/ClickHouse/ClickHouse/issues/7595) [\#7596](https://github.com/ClickHouse/ClickHouse/pull/7596) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +## 碌莽禄.拢.0755-88888888 {#clickhouse-release-v19-16} + +#### ClickHouse版本v19.16.14.65,2020-03-25 {#clickhouse-release-v19-16-14-65-2020-03-25} + +- 修复了多个参数(超过10)的三元逻辑运算批量计算中的错误。 [\#8718](https://github.com/ClickHouse/ClickHouse/pull/8718) ([亚历山大\*卡扎科夫](https://github.com/Akazz))这个错误修正是由Altinity的特殊要求回移到版本19.16的。 + +#### ClickHouse释放v19.16.14.65,2020-03-05 {#clickhouse-release-v19-16-14-65-2020-03-05} + +- 修复分布式子查询与旧版本的CH不兼容。 修复 [\#7851](https://github.com/ClickHouse/ClickHouse/issues/7851) + [(tabplubix)](https://github.com/tavplubix) +- 执行时 `CREATE` 查询,在存储引擎参数中折叠常量表达式。 将空数据库名称替换为当前数据库。 修复 [\#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [\#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). 还修复检查本地地址 `ClickHouseDictionarySource`. + [\#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) +- 现在背景合并 `*MergeTree` 表引擎家族更准确地保留存储策略卷顺序。 + [\#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 防止丢失数据 `Kafka` 在极少数情况下,在读取后缀之后但在提交之前发生异常。 修复 [\#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). 相关: [\#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) + [\#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(菲利蒙诺夫)](https://github.com/filimonov) +- 修复尝试使用/删除时导致服务器终止的错误 `Kafka` 使用错误的参数创建的表。 修复 [\#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). 结合 [\#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). + [\#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(菲利蒙诺夫)](https://github.com/filimonov) +- 允许使用 `MaterializedView` 与上面的子查询 `Kafka` 桌子 + [\#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov)) + +#### 新功能 {#new-feature-1} + +- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 这个新功能是由Altinity的特殊要求添加到错误修正版本中的。 + [\#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) + +### ClickHouse发布版本v19.16.2.2,2019-10-30 {#clickhouse-release-v19-16-2-2-2019-10-30} + +#### 向后不兼容的更改 {#backward-incompatible-change-1} + +- 为count/counIf添加缺失的验证。 + [\#7095](https://github.com/ClickHouse/ClickHouse/issues/7095) + [\#7298](https://github.com/ClickHouse/ClickHouse/pull/7298) ([Vdimir](https://github.com/Vdimir)) +- 删除旧版 `asterisk_left_columns_only` 设置(默认情况下禁用)。 + [\#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([阿尔乔姆 + Zuikov](https://github.com/4ertus2)) +- 模板数据格式的格式字符串现在在文件中指定。 + [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) + ([tavplubix](https://github.com/tavplubix)) + +#### 新功能 {#new-feature-2} + +- 引入uniqCombined64()来计算大于UINT\_MAX的基数。 + [\#7213](https://github.com/ClickHouse/ClickHouse/pull/7213), + [\#7222](https://github.com/ClickHouse/ClickHouse/pull/7222) ([Azat + Khuzhin](https://github.com/azat)) +- 支持数组列上的Bloom filter索引。 + [\#6984](https://github.com/ClickHouse/ClickHouse/pull/6984) + ([achimbab](https://github.com/achimbab)) +- 添加函数 `getMacro(name)` 返回与相应值的字符串 `` + 从服务器配置. [\#7240](https://github.com/ClickHouse/ClickHouse/pull/7240) + ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 为基于HTTP源的字典设置两个配置选项: `credentials` 和 + `http-headers`. [\#7092](https://github.com/ClickHouse/ClickHouse/pull/7092) ([纪尧姆 + Tassery](https://github.com/YiuRULE)) +- 添加新的ProfileEvent `Merge` 这计算启动的背景合并的数量。 + [\#7093](https://github.com/ClickHouse/ClickHouse/pull/7093) ([米哈伊尔 + 科罗托夫](https://github.com/millb)) +- 添加返回完全限定域名的fullHostName函数。 + [\#7263](https://github.com/ClickHouse/ClickHouse/issues/7263) + [\#7291](https://github.com/ClickHouse/ClickHouse/pull/7291) ([sundyli](https://github.com/sundy-li)) +- 添加功能 `arraySplit` 和 `arrayReverseSplit` 通过拆分数组 “cut off” + 条件。 它们在时间序列处理中非常有用。 + [\#7294](https://github.com/ClickHouse/ClickHouse/pull/7294) ([hcz](https://github.com/hczhcz)) +- 添加返回multiMatch函数系列中所有匹配索引的数组的新函数。 + [\#7299](https://github.com/ClickHouse/ClickHouse/pull/7299) ([Danila + 库特宁](https://github.com/danlark1)) +- 添加新的数据库引擎 `Lazy` 即针对存储大量小日志进行了优化 + 桌子 [\#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([尼基塔 + Vasilev](https://github.com/nikvas0)) +- 为位图列添加聚合函数groupBitmapAnd,-或-Xor。 [\#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([知昌 + 阿优](https://github.com/yuzhichang)) +- 添加聚合函数组合器-OrNull和-OrDefault,它们返回null + 或默认值时没有任何聚合。 + [\#7331](https://github.com/ClickHouse/ClickHouse/pull/7331) + ([hcz](https://github.com/hczhcz)) +- 引入支持自定义转义的CustomSeparated数据格式 + 分隔符规则。 [\#7118](https://github.com/ClickHouse/ClickHouse/pull/7118) + ([tavplubix](https://github.com/tavplubix)) +- 支持Redis作为外部字典的来源。 [\#4361](https://github.com/ClickHouse/ClickHouse/pull/4361) [\#6962](https://github.com/ClickHouse/ClickHouse/pull/6962) ([comunodi](https://github.com/comunodi), [安东 + 波波夫](https://github.com/CurtizJ)) + +#### 错误修复 {#bug-fix-2} + +- 修复错误的查询结果,如果它有 `WHERE IN (SELECT ...)` 部分和 `optimize_read_in_order` 是 + 使用。 [\#7371](https://github.com/ClickHouse/ClickHouse/pull/7371) ([安东 + 波波夫](https://github.com/CurtizJ)) +- 禁用MariaDB身份验证插件,这取决于项目之外的文件。 + [\#7140](https://github.com/ClickHouse/ClickHouse/pull/7140) ([尤里 + 巴拉诺夫](https://github.com/yurriy)) +- 修复异常 `Cannot convert column ... because it is constant but values of constants are different in source and result` 这可能很少发生,当功能 `now()`, `today()`, + `yesterday()`, `randConstant()` 被使用。 + [\#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([尼古拉 + Kochetov](https://github.com/KochetovNicolai)) +- 修复了使用HTTP保持活动超时而不是TCP保持活动超时的问题。 + [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([瓦西里 + Nemkov](https://github.com/Enmk)) +- 修复了groupBitmapOr中的分段错误(问题 [\#7109](https://github.com/ClickHouse/ClickHouse/issues/7109)). + [\#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([知昌 + 阿优](https://github.com/yuzhichang)) +- 对于实例化视图,在写入所有数据之后调用kafka的提交。 + [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([伊万](https://github.com/abyss7)) +- 修复错误 `duration_ms` 值 `system.part_log` 桌子 这是十次关闭。 + [\#7172](https://github.com/ClickHouse/ClickHouse/pull/7172) ([弗拉基米尔 + Chebotarev](https://github.com/excitoon)) +- 快速修复解决实时查看表中的崩溃并重新启用所有实时查看测试。 + [\#7201](https://github.com/ClickHouse/ClickHouse/pull/7201) + ([vzakaznikov](https://github.com/vzakaznikov)) +- 在MergeTree部件的最小/最大索引中正确序列化NULL值。 + [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([亚历山大 + 库兹门科夫](https://github.com/akuzm)) +- 不要把虚拟列。创建表时的sql元数据 `CREATE TABLE AS`. + [\#7183](https://github.com/ClickHouse/ClickHouse/pull/7183) ([伊万](https://github.com/abyss7)) +- 修复分段故障 `ATTACH PART` 查询。 + [\#7185](https://github.com/ClickHouse/ClickHouse/pull/7185) + ([阿利沙平](https://github.com/alesapin)) +- 修复了子查询中empty和empty优化给出的某些查询的错误结果 + INNER/RIGHT JOIN. [\#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([尼古拉 + Kochetov](https://github.com/KochetovNicolai)) +- 修复LIVE VIEW getHeader()方法中的AddressSanitizer错误。 + [\#7271](https://github.com/ClickHouse/ClickHouse/pull/7271) + ([vzakaznikov](https://github.com/vzakaznikov)) + +#### 改进 {#improvement-1} + +- 在queue\_wait\_max\_ms等待发生的情况下添加消息。 + [\#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Azat + Khuzhin](https://github.com/azat)) +- 制作设置 `s3_min_upload_part_size` 表级别。 + [\#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([弗拉基米尔 + Chebotarev](https://github.com/excitoon)) +- 检查Ttl在StorageFactory。 [\#7304](https://github.com/ClickHouse/ClickHouse/pull/7304) + ([sundyli](https://github.com/sundy-li)) +- 在部分合并连接(优化)中压缩左侧块。 + [\#7122](https://github.com/ClickHouse/ClickHouse/pull/7122) ([阿尔乔姆 + Zuikov](https://github.com/4ertus2)) +- 不允许在复制表引擎的突变中使用非确定性函数,因为这 + 可能会在副本之间引入不一致。 + [\#7247](https://github.com/ClickHouse/ClickHouse/pull/7247) ([亚历山大 + 卡扎科夫](https://github.com/Akazz)) +- 将异常堆栈跟踪转换为字符串时禁用内存跟踪器。 它可以防止损失 + 类型的错误消息 `Memory limit exceeded` 在服务器上,这导致了 `Attempt to read after eof` 客户端上的例外。 [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) + ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 其他格式改进。 决定 + [\#6033](https://github.com/ClickHouse/ClickHouse/issues/6033), + [\#2633](https://github.com/ClickHouse/ClickHouse/issues/2633), + [\#6611](https://github.com/ClickHouse/ClickHouse/issues/6611), + [\#6742](https://github.com/ClickHouse/ClickHouse/issues/6742) + [\#7215](https://github.com/ClickHouse/ClickHouse/pull/7215) + ([tavplubix](https://github.com/tavplubix)) +- ClickHouse将忽略IN运算符右侧不可转换为左侧的值 + side type. Make it work properly for compound types – Array and Tuple. + [\#7283](https://github.com/ClickHouse/ClickHouse/pull/7283) ([亚历山大 + 库兹门科夫](https://github.com/akuzm)) +- 支持ASOF加入缺失的不平等。 它可以加入小于或等于变体和严格 + 在语法上,ASOF列的变体越来越多。 + [\#7282](https://github.com/ClickHouse/ClickHouse/pull/7282) ([阿尔乔姆 + Zuikov](https://github.com/4ertus2)) +- 优化部分合并连接。 [\#7070](https://github.com/ClickHouse/ClickHouse/pull/7070) + ([Artem Zuikov](https://github.com/4ertus2)) +- 不要在uniqCombined函数中使用超过98K的内存。 + [\#7236](https://github.com/ClickHouse/ClickHouse/pull/7236), + [\#7270](https://github.com/ClickHouse/ClickHouse/pull/7270) ([Azat + Khuzhin](https://github.com/azat)) +- 在PartialMergeJoin中刷新磁盘上右连接表的部分(如果没有足够的 + 记忆)。 需要时加载数据。 [\#7186](https://github.com/ClickHouse/ClickHouse/pull/7186) + ([Artem Zuikov](https://github.com/4ertus2)) + +#### 性能改进 {#performance-improvement-1} + +- 通过避免数据重复加快使用const参数的joinGet。 + [\#7359](https://github.com/ClickHouse/ClickHouse/pull/7359) ([阿莫斯 + 鸟](https://github.com/amosbird)) +- 如果子查询为空,请提前返回。 + [\#7007](https://github.com/ClickHouse/ClickHouse/pull/7007) ([小路](https://github.com/nicelulu)) +- 优化值中SQL表达式的解析。 + [\#6781](https://github.com/ClickHouse/ClickHouse/pull/6781) + ([tavplubix](https://github.com/tavplubix)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-1} + +- 禁用交叉编译到Mac OS的一些贡献。 + [\#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([伊万](https://github.com/abyss7)) +- 为clickhouse\_common\_io添加与PocoXML缺少的链接。 + [\#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Azat + Khuzhin](https://github.com/azat)) +- 在clickhouse-test中接受多个测试过滤器参数。 + [\#7226](https://github.com/ClickHouse/ClickHouse/pull/7226) ([亚历山大 + 库兹门科夫](https://github.com/akuzm)) +- 为ARM启用musl和jemalloc。 [\#7300](https://github.com/ClickHouse/ClickHouse/pull/7300) + ([阿莫斯鸟](https://github.com/amosbird)) +- 已添加 `--client-option` 参数 `clickhouse-test` 将其他参数传递给客户端。 + [\#7277](https://github.com/ClickHouse/ClickHouse/pull/7277) ([尼古拉 + Kochetov](https://github.com/KochetovNicolai)) +- 在rpm软件包升级时保留现有配置。 + [\#7103](https://github.com/ClickHouse/ClickHouse/pull/7103) + ([filimonov](https://github.com/filimonov)) +- 修复PVS检测到的错误。 [\#7153](https://github.com/ClickHouse/ClickHouse/pull/7153) ([阿尔乔姆 + Zuikov](https://github.com/4ertus2)) +- 修复达尔文的构建。 [\#7149](https://github.com/ClickHouse/ClickHouse/pull/7149) + ([伊万](https://github.com/abyss7)) +- glibc2.29兼容性. [\#7142](https://github.com/ClickHouse/ClickHouse/pull/7142) ([阿莫斯 + 鸟](https://github.com/amosbird)) +- 确保dh\_clean不会触及潜在的源文件。 + [\#7205](https://github.com/ClickHouse/ClickHouse/pull/7205) ([阿莫斯 + 鸟](https://github.com/amosbird)) +- 尝试避免从altinity rpm更新时发生冲突-它有单独打包的配置文件 + 在clickhouse服务器-常见. [\#7073](https://github.com/ClickHouse/ClickHouse/pull/7073) + ([filimonov](https://github.com/filimonov)) +- 优化一些头文件,以便更快地重建。 + [\#7212](https://github.com/ClickHouse/ClickHouse/pull/7212), + [\#7231](https://github.com/ClickHouse/ClickHouse/pull/7231) ([亚历山大 + 库兹门科夫](https://github.com/akuzm)) +- 添加日期和日期时间的性能测试。 [\#7332](https://github.com/ClickHouse/ClickHouse/pull/7332) ([瓦西里 + Nemkov](https://github.com/Enmk)) +- 修复一些包含非确定性突变的测试。 + [\#7132](https://github.com/ClickHouse/ClickHouse/pull/7132) ([亚历山大 + 卡扎科夫](https://github.com/Akazz)) +- 添加构建与MemorySanitizer CI。 [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) + ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 避免在MetricsTransmitter中使用未初始化的值。 + [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat + Khuzhin](https://github.com/azat)) +- 修复MemorySanitizer发现的字段中的一些问题。 + [\#7135](https://github.com/ClickHouse/ClickHouse/pull/7135), + [\#7179](https://github.com/ClickHouse/ClickHouse/pull/7179) ([亚历山大 + 库兹门科夫](https://github.com/akuzm)), [\#7376](https://github.com/ClickHouse/ClickHouse/pull/7376) + ([阿莫斯鸟](https://github.com/amosbird)) +- 修复murmurhash32中未定义的行为。 [\#7388](https://github.com/ClickHouse/ClickHouse/pull/7388) ([阿莫斯 + 鸟](https://github.com/amosbird)) +- 修复StoragesInfoStream中未定义的行为。 [\#7384](https://github.com/ClickHouse/ClickHouse/pull/7384) + ([tavplubix](https://github.com/tavplubix)) +- 固定常量表达式折叠外部数据库引擎(MySQL,ODBC,JDBC)。 在上一页 + 版本它不适用于多个常量表达式,并且根本不适用于日期, + 日期时间和UUID。 这修复 [\#7245](https://github.com/ClickHouse/ClickHouse/issues/7245) + [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) + ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在访问no\_users\_thread变量时修复实时查看中的ThreadSanitizer数据竞争错误。 + [\#7353](https://github.com/ClickHouse/ClickHouse/pull/7353) + ([vzakaznikov](https://github.com/vzakaznikov)) +- 在libcommon中摆脱malloc符号 + [\#7134](https://github.com/ClickHouse/ClickHouse/pull/7134), + [\#7065](https://github.com/ClickHouse/ClickHouse/pull/7065) ([阿莫斯 + 鸟](https://github.com/amosbird)) +- 添加全局标志ENABLE\_LIBRARY以禁用所有库。 + [\#7063](https://github.com/ClickHouse/ClickHouse/pull/7063) + ([proller](https://github.com/proller)) + +#### 代码清理 {#code-cleanup} + +- 概括配置存储库以准备字典的DDL。 [\#7155](https://github.com/ClickHouse/ClickHouse/pull/7155) + ([阿利沙平](https://github.com/alesapin)) +- 解析器字典DDL没有任何语义。 + [\#7209](https://github.com/ClickHouse/ClickHouse/pull/7209) + ([阿利沙平](https://github.com/alesapin)) +- 将ParserCreateQuery拆分为不同的较小的解析器。 + [\#7253](https://github.com/ClickHouse/ClickHouse/pull/7253) + ([阿利沙平](https://github.com/alesapin)) +- 在外部字典附近进行小型重构和重命名。 + [\#7111](https://github.com/ClickHouse/ClickHouse/pull/7111) + ([阿利沙平](https://github.com/alesapin)) +- 重构一些代码以准备基于角色的访问控制。 [\#7235](https://github.com/ClickHouse/ClickHouse/pull/7235) ([维塔利 + 巴拉诺夫](https://github.com/vitlibar)) +- DatabaseOrdinary代码中的一些改进。 + [\#7086](https://github.com/ClickHouse/ClickHouse/pull/7086) ([尼基塔 + Vasilev](https://github.com/nikvas0)) +- 不要在哈希表的find()和emplace()方法中使用迭代器。 + [\#7026](https://github.com/ClickHouse/ClickHouse/pull/7026) ([亚历山大 + 库兹门科夫](https://github.com/akuzm)) +- 修正getMultipleValuesFromConfig的情况下,当参数根不为空。 [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) + ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 删除一些复制粘贴(TemporaryFile和TemporaryFileStream) + [\#7166](https://github.com/ClickHouse/ClickHouse/pull/7166) ([阿尔乔姆 + Zuikov](https://github.com/4ertus2)) +- 改进了代码的可读性一点点 (`MergeTreeData::getActiveContainingPart`). + [\#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([弗拉基米尔 + Chebotarev](https://github.com/excitoon)) +- 等待使用本地对象的所有计划作业,如果 `ThreadPool::schedule(...)` 投掷 + 一个例外 重命名 `ThreadPool::schedule(...)` 到 `ThreadPool::scheduleOrThrowOnError(...)` 和 + 修复注释,使明显的,它可能会抛出。 + [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) + ([tavplubix](https://github.com/tavplubix)) + +## ClickHouse释放19.15 {#clickhouse-release-19-15} + +### ClickHouse释放19.15.4.10,2019-10-31 {#clickhouse-release-19-15-4-10-2019-10-31} + +#### 错误修复 {#bug-fix-3} + +- 增加了sql\_tinyint和SQL\_BIGINT的处理,并修复了ODBC桥中SQL\_FLOAT数据源类型的处理。 + [\#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon)) +- 允许在移动分区中的目标磁盘或卷上有一些部分。 + [\#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 通过ODBC桥固定可空列中的NULL值。 + [\#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 固定插入到具体化列的分布式非本地节点。 + [\#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat)) +- 固定函数getMultipleValuesFromConfig。 + [\#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 修复了使用HTTP保持活动超时而不是TCP保持活动超时的问题。 + [\#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 等待所有作业在异常时完成(修复罕见的段错误)。 + [\#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) ([tavplubix](https://github.com/tavplubix)) +- 在插入Kafka表时不要推送MVs。 + [\#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([伊万](https://github.com/abyss7)) +- 禁用异常堆栈的内存跟踪器。 + [\#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复了外部数据库转换查询中的错误代码。 + [\#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免在MetricsTransmitter中使用未初始化的值。 + [\#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat Khuzhin](https://github.com/azat)) +- 添加了用于测试的宏的示例配置 ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### ClickHouse释放19.15.3.6,2019-10-09 {#clickhouse-release-19-15-3-6-2019-10-09} + +#### 错误修复 {#bug-fix-4} + +- 修正了哈希字典中的bad\_variant。 + ([阿利沙平](https://github.com/alesapin)) +- 修复了附加部件查询中分段故障的错误。 + ([阿利沙平](https://github.com/alesapin)) +- 固定时间计算 `MergeTreeData`. + ([Vladimir Chebotarev](https://github.com/excitoon)) +- 写作完成后明确提交给Kafka。 + [\#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([伊万](https://github.com/abyss7)) +- 在MergeTree部件的最小/最大索引中正确序列化NULL值。 + [\#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander Kuzmenkov](https://github.com/akuzm)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-15-2-2-2019-10-01} + +#### 新功能 {#new-feature-3} + +- 分层存储:支持使用MergeTree引擎对表使用多个存储卷。 可以将新数据存储在SSD上,并自动将旧数据移动到HDD。 ([示例](https://clickhouse.github.io/clickhouse-presentations/meetup30/new_features/#12)). [\#4918](https://github.com/ClickHouse/ClickHouse/pull/4918) ([Igr](https://github.com/ObjatieGroba)) [\#6489](https://github.com/ClickHouse/ClickHouse/pull/6489) ([阿利沙平](https://github.com/alesapin)) +- 添加表函数 `input` 用于读取传入的数据 `INSERT SELECT` 查询。 [\#5450](https://github.com/ClickHouse/ClickHouse/pull/5450) ([palasonic1](https://github.com/palasonic1)) [\#6832](https://github.com/ClickHouse/ClickHouse/pull/6832) ([安东\*波波夫](https://github.com/CurtizJ)) +- 添加一个 `sparse_hashed` 字典布局,即在功能上等同于 `hashed` 布局,但更高效的内存。 它使用的内存减少了大约两倍,代价是较慢的值检索。 [\#6894](https://github.com/ClickHouse/ClickHouse/pull/6894) ([Azat Khuzhin](https://github.com/azat)) +- 实现定义用户列表以访问字典的能力。 仅使用当前连接的数据库。 [\#6907](https://github.com/ClickHouse/ClickHouse/pull/6907) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 添加 `LIMIT` 选项 `SHOW` 查询。 [\#6944](https://github.com/ClickHouse/ClickHouse/pull/6944) ([Philipp Malkovsky](https://github.com/malkfilipp)) +- 添加 `bitmapSubsetLimit(bitmap, range_start, limit)` 函数,返回最小的子集 `limit` 设置中的值不小于 `range_start`. [\#6957](https://github.com/ClickHouse/ClickHouse/pull/6957) ([余志昌](https://github.com/yuzhichang)) +- 添加 `bitmapMin` 和 `bitmapMax` 功能。 [\#6970](https://github.com/ClickHouse/ClickHouse/pull/6970) ([余志昌](https://github.com/yuzhichang)) +- 添加功能 `repeat` 有关 [问题-6648](https://github.com/ClickHouse/ClickHouse/issues/6648) [\#6999](https://github.com/ClickHouse/ClickHouse/pull/6999) ([弗林](https://github.com/ucasFL)) + +#### 实验特点 {#experimental-feature-1} + +- 实现(在内存中)不更改当前管道的合并联接变体。 结果按合并键进行部分排序。 设置 `partial_merge_join = 1` 要使用此功能。 合并联接仍在开发中。 [\#6940](https://github.com/ClickHouse/ClickHouse/pull/6940) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加 `S3` 发动机和表功能. 它仍在开发中(还没有身份验证支持)。 [\#5596](https://github.com/ClickHouse/ClickHouse/pull/5596) ([Vladimir Chebotarev](https://github.com/excitoon)) + +#### 改进 {#improvement-2} + +- 从Kafka读取的每条消息都是以原子方式插入的。 这解决了Kafka引擎的几乎所有已知问题。 [\#6950](https://github.com/ClickHouse/ClickHouse/pull/6950) ([伊万](https://github.com/abyss7)) +- 对分布式查询故障转移的改进。 缩短恢复时间,也是现在可配置的,可以看出 `system.clusters`. [\#6399](https://github.com/ClickHouse/ClickHouse/pull/6399) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 直接支持枚举的数值 `IN` 科。 \#6766 [\#6941](https://github.com/ClickHouse/ClickHouse/pull/6941) ([dimarub2000](https://github.com/dimarub2000)) +- 支持(可选,默认情况下禁用)对URL存储进行重定向。 [\#6914](https://github.com/ClickHouse/ClickHouse/pull/6914) ([maqroll](https://github.com/maqroll)) +- 当具有较旧版本的客户端连接到服务器时添加信息消息。 [\#6893](https://github.com/ClickHouse/ClickHouse/pull/6893) ([Philipp Malkovsky](https://github.com/malkfilipp)) +- 删除在分布式表中发送数据的最大退避睡眠时间限制 [\#6895](https://github.com/ClickHouse/ClickHouse/pull/6895) ([Azat Khuzhin](https://github.com/azat)) +- 添加将配置文件事件(计数器)与累积值发送到graphite的能力。 它可以在启用 `` 在服务器 `config.xml`. [\#6969](https://github.com/ClickHouse/ClickHouse/pull/6969) ([Azat Khuzhin](https://github.com/azat)) +- 添加自动转换类型 `T` 到 `LowCardinality(T)` 在类型的列中插入数据 `LowCardinality(T)` 在本机格式通过HTTP。 [\#6891](https://github.com/ClickHouse/ClickHouse/pull/6891) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 添加使用功能的能力 `hex` 不使用 `reinterpretAsString` 为 `Float32`, `Float64`. [\#7024](https://github.com/ClickHouse/ClickHouse/pull/7024) ([米哈伊尔\*科罗托夫](https://github.com/millb)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-2} + +- 将gdb-index添加到带有调试信息的clickhouse二进制文件。 这将加快启动时间 `gdb`. [\#6947](https://github.com/ClickHouse/ClickHouse/pull/6947) ([阿利沙平](https://github.com/alesapin)) +- 加速deb包装与补丁dpkg-deb它使用 `pigz`. [\#6960](https://github.com/ClickHouse/ClickHouse/pull/6960) ([阿利沙平](https://github.com/alesapin)) +- 设置 `enable_fuzzing = 1` 启用所有项目代码的libfuzzer检测功能。 [\#7042](https://github.com/ClickHouse/ClickHouse/pull/7042) ([kyprizel](https://github.com/kyprizel)) +- 在CI中添加拆分构建烟雾测试。 [\#7061](https://github.com/ClickHouse/ClickHouse/pull/7061) ([阿利沙平](https://github.com/alesapin)) +- 添加构建与MemorySanitizer CI。 [\#7066](https://github.com/ClickHouse/ClickHouse/pull/7066) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 替换 `libsparsehash` 与 `sparsehash-c11` [\#6965](https://github.com/ClickHouse/ClickHouse/pull/6965) ([Azat Khuzhin](https://github.com/azat)) + +#### 错误修复 {#bug-fix-5} + +- 修复了大型表上复杂键的索引分析的性能下降。 这修复了#6924。 [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复从Kafka空主题中选择时导致段错误的逻辑错误。 [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([伊万](https://github.com/abyss7)) +- 修复过早的MySQL连接关闭 `MySQLBlockInputStream.cpp`. [\#6882](https://github.com/ClickHouse/ClickHouse/pull/6882) ([Clément Rodriguez](https://github.com/clemrodriguez)) +- 返回对非常旧的Linux内核的支持(修复 [\#6841](https://github.com/ClickHouse/ClickHouse/issues/6841)) [\#6853](https://github.com/ClickHouse/ClickHouse/pull/6853) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复可能的数据丢失 `insert select` 在输入流中的空块的情况下进行查询。 \#6834 \#6862 [\#6911](https://github.com/ClickHouse/ClickHouse/pull/6911) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复功能 `АrrayEnumerateUniqRanked` 在参数中使用空数组 [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) +- 使用数组联接和全局子查询修复复杂的查询。 [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([伊万](https://github.com/abyss7)) +- 修复 `Unknown identifier` 按顺序排列和按多个联接分组的错误 [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) +- 固定 `MSan` 执行函数时发出警告 `LowCardinality` 争论。 [\#7062](https://github.com/ClickHouse/ClickHouse/pull/7062) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +#### 向后不兼容的更改 {#backward-incompatible-change-2} + +- 更改了位图\*聚合函数状态的序列化格式,以提高性能。 无法读取以前版本的位图\*的序列化状态。 [\#6908](https://github.com/ClickHouse/ClickHouse/pull/6908) ([余志昌](https://github.com/yuzhichang)) + +## ClickHouse释放19.14 {#clickhouse-release-19-14} + +### ClickHouse释放19.14.7.15,2019-10-02 {#clickhouse-release-19-14-7-15-2019-10-02} + +#### 错误修复 {#bug-fix-6} + +- 此版本还包含19.11.12.69的所有错误修复。 +- 修复了19.14和早期版本之间分布式查询的兼容性。 这修复 [\#7068](https://github.com/ClickHouse/ClickHouse/issues/7068). [\#7069](https://github.com/ClickHouse/ClickHouse/pull/7069) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### ClickHouse释放19.14.6.12,2019-09-19 {#clickhouse-release-19-14-6-12-2019-09-19} + +#### 错误修复 {#bug-fix-7} + +- 修复功能 `АrrayEnumerateUniqRanked` 在参数中使用空数组。 [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) +- 修复了查询中的子查询名称 `ARRAY JOIN` 和 `GLOBAL IN subquery` 用化名。 如果指定了外部表名,请使用子查询别名。 [\#6934](https://github.com/ClickHouse/ClickHouse/pull/6934) ([伊万](https://github.com/abyss7)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-3} + +- 修复 [拍打](https://clickhouse-test-reports.s3.yandex.net/6944/aab95fd5175a513413c7395a73a82044bdafb906/functional_stateless_tests_(debug).html) 测试 `00715_fetch_merged_or_mutated_part_zookeeper` 通过将其重写为shell脚本,因为它需要等待突变应用。 [\#6977](https://github.com/ClickHouse/ClickHouse/pull/6977) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 修正了UBSan和MemSan功能失败 `groupUniqArray` 使用emtpy数组参数。 这是由于放置空 `PaddedPODArray` 因为没有调用零单元格值的构造函数,所以将其转换为哈希表零单元格。 [\#6937](https://github.com/ClickHouse/ClickHouse/pull/6937) ([阿莫斯鸟](https://github.com/amosbird)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-14-3-3-2019-09-10} + +#### 新功能 {#new-feature-4} + +- `WITH FILL` 修饰符 `ORDER BY`. (继续 [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([安东\*波波夫](https://github.com/CurtizJ)) +- `WITH TIES` 修饰符 `LIMIT`. (继续 [\#5069](https://github.com/ClickHouse/ClickHouse/issues/5069)) [\#6610](https://github.com/ClickHouse/ClickHouse/pull/6610) ([安东\*波波夫](https://github.com/CurtizJ)) +- 解析无引号 `NULL` 文字为NULL(如果设置 `format_csv_unquoted_null_literal_as_null=1`). 如果此字段的数据类型不可为空,则使用默认值初始化null字段(如果设置 `input_format_null_as_default=1`). [\#5990](https://github.com/ClickHouse/ClickHouse/issues/5990) [\#6055](https://github.com/ClickHouse/ClickHouse/pull/6055) ([tavplubix](https://github.com/tavplubix)) +- 支持表函数路径中的通配符 `file` 和 `hdfs`. 如果路径包含通配符,则表将为只读。 使用示例: `select * from hdfs('hdfs://hdfs1:9000/some_dir/another_dir/*/file{0..9}{0..9}')` 和 `select * from file('some_dir/{some_file,another_file,yet_another}.tsv', 'TSV', 'value UInt32')`. [\#6092](https://github.com/ClickHouse/ClickHouse/pull/6092) ([Olga Khvostikova](https://github.com/stavrolia)) +- 新 `system.metric_log` 表存储的值 `system.events` 和 `system.metrics` 具有指定的时间间隔。 [\#6363](https://github.com/ClickHouse/ClickHouse/issues/6363) [\#6467](https://github.com/ClickHouse/ClickHouse/pull/6467) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) [\#6530](https://github.com/ClickHouse/ClickHouse/pull/6530) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 允许将ClickHouse文本日志写入 `system.text_log` 桌子 [\#6037](https://github.com/ClickHouse/ClickHouse/issues/6037) [\#6103](https://github.com/ClickHouse/ClickHouse/pull/6103) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) [\#6164](https://github.com/ClickHouse/ClickHouse/pull/6164) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在堆栈跟踪中显示私有符号(这是通过解析ELF文件的符号表来完成的)。 如果存在调试信息,则在堆栈跟踪中添加有关文件和行号的信息。 使用程序中存在的索引符号加速符号名称查找。 增加了新的SQL函数的反省: `demangle` 和 `addressToLine`. 重命名函数 `symbolizeAddress` 到 `addressToSymbol` 为了一致性。 功能 `addressToSymbol` 将返回错位的名称出于性能原因,你必须申请 `demangle`. 添加设置 `allow_introspection_functions` 默认情况下,这是关闭的。 [\#6201](https://github.com/ClickHouse/ClickHouse/pull/6201) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 表函数 `values` (名称不区分大小写)。 它允许从读取 `VALUES` 建议的名单 [\#5984](https://github.com/ClickHouse/ClickHouse/issues/5984). 示例: `SELECT * FROM VALUES('a UInt64, s String', (1, 'one'), (2, 'two'), (3, 'three'))`. [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) +- 增加了改变存储设置的功能。 语法: `ALTER TABLE MODIFY SETTING = `. [\#6366](https://github.com/ClickHouse/ClickHouse/pull/6366) [\#6669](https://github.com/ClickHouse/ClickHouse/pull/6669) [\#6685](https://github.com/ClickHouse/ClickHouse/pull/6685) ([阿利沙平](https://github.com/alesapin)) +- 用于拆卸分离部件的支撑。 语法: `ALTER TABLE DROP DETACHED PART ''`. [\#6158](https://github.com/ClickHouse/ClickHouse/pull/6158) ([tavplubix](https://github.com/tavplubix)) +- 表约束。 允许将约束添加到将在插入时检查的表定义。 [\#5273](https://github.com/ClickHouse/ClickHouse/pull/5273) ([格列布\*诺维科夫](https://github.com/NanoBjorn)) [\#6652](https://github.com/ClickHouse/ClickHouse/pull/6652) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 支持级联实例化视图。 [\#6324](https://github.com/ClickHouse/ClickHouse/pull/6324) ([阿莫斯鸟](https://github.com/amosbird)) +- 默认情况下,打开查询探查器以每秒对每个查询执行线程进行一次采样。 [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 输入格式 `ORC`. [\#6454](https://github.com/ClickHouse/ClickHouse/pull/6454) [\#6703](https://github.com/ClickHouse/ClickHouse/pull/6703) ([akonyaev90](https://github.com/akonyaev90)) +- 增加了两个新功能: `sigmoid` 和 `tanh` (这对于机器学习应用程序非常有用)。 [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 功能 `hasToken(haystack, token)`, `hasTokenCaseInsensitive(haystack, token)` 检查给定的令牌是否在干草堆中。 Token是两个非字母数字ASCII字符(或干草堆的边界)之间的最大长度子串。 Token必须是常量字符串。 由tokenbf\_v1索引专业化支持。 [\#6596](https://github.com/ClickHouse/ClickHouse/pull/6596), [\#6662](https://github.com/ClickHouse/ClickHouse/pull/6662) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 新功能 `neighbor(value, offset[, default_value])`. 允许在一个数据块中的列中达到上一个/下一个值。 [\#5925](https://github.com/ClickHouse/ClickHouse/pull/5925) ([Alex Krash](https://github.com/alex-krash)) [6685365ab8c5b74f9650492c88a012596eb1b0c6](https://github.com/ClickHouse/ClickHouse/commit/6685365ab8c5b74f9650492c88a012596eb1b0c6) [341e2e4587a18065c2da1ca888c73389f48ce36c](https://github.com/ClickHouse/ClickHouse/commit/341e2e4587a18065c2da1ca888c73389f48ce36c) [Alexey Milovidov](https://github.com/alexey-milovidov) +- 创建了一个函数 `currentUser()`,返回授权用户的登录。 添加别名 `user()` 对于与MySQL的兼容性。 [\#6470](https://github.com/ClickHouse/ClickHouse/pull/6470) ([Alex Krash](https://github.com/alex-krash)) +- 新的聚合函数 `quantilesExactInclusive` 和 `quantilesExactExclusive` 这是在提出 [\#5885](https://github.com/ClickHouse/ClickHouse/issues/5885). [\#6477](https://github.com/ClickHouse/ClickHouse/pull/6477) ([dimarub2000](https://github.com/dimarub2000)) +- 功能 `bitmapRange(bitmap, range_begin, range_end)` 返回具有指定范围的新集(不包括 `range_end`). [\#6314](https://github.com/ClickHouse/ClickHouse/pull/6314) ([余志昌](https://github.com/yuzhichang)) +- 功能 `geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision)` 它创建了一系列精确的长串geohash盒复盖提供的区域。 [\#6127](https://github.com/ClickHouse/ClickHouse/pull/6127) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 实现对插入查询的支持 `Kafka` 桌子 [\#6012](https://github.com/ClickHouse/ClickHouse/pull/6012) ([伊万](https://github.com/abyss7)) +- 增加了对 `_partition` 和 `_timestamp` 虚拟列到Kafka引擎。 [\#6400](https://github.com/ClickHouse/ClickHouse/pull/6400) ([伊万](https://github.com/abyss7)) +- 可以从中删除敏感数据 `query_log`,服务器日志,基于正则表达式的规则的进程列表。 [\#5710](https://github.com/ClickHouse/ClickHouse/pull/5710) ([filimonov](https://github.com/filimonov)) + +#### 实验特点 {#experimental-feature-2} + +- 输入和输出数据格式 `Template`. 它允许为输入和输出指定自定义格式字符串。 [\#4354](https://github.com/ClickHouse/ClickHouse/issues/4354) [\#6727](https://github.com/ClickHouse/ClickHouse/pull/6727) ([tavplubix](https://github.com/tavplubix)) +- 执行 `LIVE VIEW` 最初提出的表 [\#2898](https://github.com/ClickHouse/ClickHouse/pull/2898),准备 [\#3925](https://github.com/ClickHouse/ClickHouse/issues/3925),然后更新 [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541). 看 [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) 详细描述。 [\#5541](https://github.com/ClickHouse/ClickHouse/issues/5541) ([vzakaznikov](https://github.com/vzakaznikov)) [\#6425](https://github.com/ClickHouse/ClickHouse/pull/6425) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) [\#6656](https://github.com/ClickHouse/ClickHouse/pull/6656) ([vzakaznikov](https://github.com/vzakaznikov))请注意 `LIVE VIEW` 功能可能会在下一个版本中删除。 + +#### 错误修复 {#bug-fix-8} + +- 此版本还包含19.13和19.11的所有错误修复。 +- 修复表有跳过索引和垂直合并发生时的分段错误。 [\#6723](https://github.com/ClickHouse/ClickHouse/pull/6723) ([阿利沙平](https://github.com/alesapin)) +- 使用非平凡的列默认值修复每列TTL。 以前在强制TTL合并的情况下 `OPTIMIZE ... FINAL` 查询,过期的值被替换为类型默认值,而不是用户指定的列默认值。 [\#6796](https://github.com/ClickHouse/ClickHouse/pull/6796) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复kafka服务器正常重启时的消息重复问题。 [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([伊万](https://github.com/abyss7)) +- 修正了读取Kafka消息时的无限循环。 根本不要暂停/恢复订阅消费者-否则在某些情况下可能会无限期暂停。 [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([伊万](https://github.com/abyss7)) +- 修复 `Key expression contains comparison between inconvertible types` 例外 `bitmapContains` 功能。 [\#6136](https://github.com/ClickHouse/ClickHouse/issues/6136) [\#6146](https://github.com/ClickHouse/ClickHouse/issues/6146) [\#6156](https://github.com/ClickHouse/ClickHouse/pull/6156) ([dimarub2000](https://github.com/dimarub2000)) +- 修复已启用的段错误 `optimize_skip_unused_shards` 还丢失了分片钥匙 [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复了可能导致内存损坏的突变中的错误代码。 修复了读取地址的段错误 `0x14c0` 这可能发生由于并发 `DROP TABLE` 和 `SELECT` 从 `system.parts` 或 `system.parts_columns`. 在准备突变查询时修复了竞争条件。 修复了由于 `OPTIMIZE` 复制的表和并发修改操作,如改变。 [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在MySQL界面中删除了额外的详细日志记录 [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 返回解析布尔设置的能力 ‘true’ 和 ‘false’ 在配置文件中。 [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([阿利沙平](https://github.com/alesapin)) +- 修复崩溃 `quantile` 和 `median` 功能结束 `Nullable(Decimal128)`. [\#6378](https://github.com/ClickHouse/ClickHouse/pull/6378) ([Artem Zuikov](https://github.com/4ertus2)) +- 修正了可能不完整的结果返回 `SELECT` 查询与 `WHERE` 主键上的条件包含转换为浮点类型。 它是由不正确的单调性检查引起的 `toFloat` 功能。 [\#6248](https://github.com/ClickHouse/ClickHouse/issues/6248) [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) +- 检查 `max_expanded_ast_elements` 设置为突变。 明确突变后 `TRUNCATE TABLE`. [\#6205](https://github.com/ClickHouse/ClickHouse/pull/6205) ([张冬](https://github.com/zhang2014)) +- 修复使用键列时的联接结果 `join_use_nulls`. 附加空值而不是列默认值。 [\#6249](https://github.com/ClickHouse/ClickHouse/pull/6249) ([Artem Zuikov](https://github.com/4ertus2)) +- 修正了跳过索引与垂直合并和改变。 修复 `Bad size of marks file` 例外。 [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594) [\#6713](https://github.com/ClickHouse/ClickHouse/pull/6713) ([阿利沙平](https://github.com/alesapin)) +- 修复罕见的崩溃 `ALTER MODIFY COLUMN` 和垂直合并,当合并/改变的部分之一是空的(0行) [\#6746](https://github.com/ClickHouse/ClickHouse/issues/6746) [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([阿利沙平](https://github.com/alesapin)) +- 修正错误的转换 `LowCardinality` 类型 `AggregateFunctionFactory`. 这修复 [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复错误的行为和可能的段错误 `topK` 和 `topKWeighted` 聚合函数。 [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([安东\*波波夫](https://github.com/CurtizJ)) +- 固定周围的不安全代码 `getIdentifier` 功能。 [\#6401](https://github.com/ClickHouse/ClickHouse/issues/6401) [\#6409](https://github.com/ClickHouse/ClickHouse/pull/6409) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在MySQL线协议(连接到ClickHouse的形式MySQL客户端时使用)修正了错误。 引起的堆缓冲区溢出 `PacketPayloadWriteBuffer`. [\#6212](https://github.com/ClickHouse/ClickHouse/pull/6212) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 固定内存泄漏 `bitmapSubsetInRange` 功能。 [\#6819](https://github.com/ClickHouse/ClickHouse/pull/6819) ([余志昌](https://github.com/yuzhichang)) +- 修复粒度变化后执行突变时的罕见错误。 [\#6816](https://github.com/ClickHouse/ClickHouse/pull/6816) ([阿利沙平](https://github.com/alesapin)) +- 默认情况下允许包含所有字段的protobuf消息。 [\#6132](https://github.com/ClickHouse/ClickHouse/pull/6132) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 解决错误 `nullIf` 功能,当我们发送 `NULL` 第二个参数的参数。 [\#6446](https://github.com/ClickHouse/ClickHouse/pull/6446) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 修正了错误的内存分配/解除分配在复杂的键高速缓存字典与字符串字段,导致无限的内存消耗罕见的错误(看起来像内存泄漏)。 当字符串大小为8(8,16,32等)开始的2的幂时,错误会重现。 [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([阿利沙平](https://github.com/alesapin)) +- 修复了导致异常的小序列上的大猩猩编码 `Cannot write after end of buffer`. [\#6398](https://github.com/ClickHouse/ClickHouse/issues/6398) [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 允许在连接中使用不可为空的类型 `join_use_nulls` 已启用。 [\#6705](https://github.com/ClickHouse/ClickHouse/pull/6705) ([Artem Zuikov](https://github.com/4ertus2)) +- 禁用 `Poco::AbstractConfiguration` 查询中的替换 `clickhouse-client`. [\#6706](https://github.com/ClickHouse/ClickHouse/pull/6706) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免死锁 `REPLACE PARTITION`. [\#6677](https://github.com/ClickHouse/ClickHouse/pull/6677) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用 `arrayReduce` 对于不变的参数可能会导致段错误。 [\#6242](https://github.com/ClickHouse/ClickHouse/issues/6242) [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复可能出现的不一致的部分,如果副本恢复后 `DROP PARTITION`. [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +- 固定挂起 `JSONExtractRaw` 功能。 [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正错误跳过索引序列化和聚合与自适应粒度。 [\#6594](https://github.com/ClickHouse/ClickHouse/issues/6594). [\#6748](https://github.com/ClickHouse/ClickHouse/pull/6748) ([阿利沙平](https://github.com/alesapin)) +- 修复 `WITH ROLLUP` 和 `WITH CUBE` 修饰符 `GROUP BY` 具有两级聚合。 [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复编写具有自适应粒度的二级索引标记的错误。 [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([阿利沙平](https://github.com/alesapin)) +- 修复服务器启动时的初始化顺序。 由于 `StorageMergeTree::background_task_handle` 在初始化 `startup()` 该 `MergeTreeBlockOutputStream::write()` 可以尝试在初始化之前使用它。 只需检查它是否被初始化。 [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([伊万](https://github.com/abyss7)) +- 从以前的读取操作中清除数据缓冲区,该操作完成时出现错误。 [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([尼古拉](https://github.com/bopohaa)) +- 修复为复制\*MergeTree表创建新副本时启用自适应粒度的错误。 [\#6394](https://github.com/ClickHouse/ClickHouse/issues/6394) [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([阿利沙平](https://github.com/alesapin)) +- 修复了在服务器启动过程中发生异常的情况下可能发生的崩溃 `libunwind` 在异常访问未初始化 `ThreadStatus` 结构。 [\#6456](https://github.com/ClickHouse/ClickHouse/pull/6456) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 修复崩溃 `yandexConsistentHash` 功能。 通过模糊测试发现。 [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304) [\#6305](https://github.com/ClickHouse/ClickHouse/pull/6305) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了服务器过载和全局线程池接近满时挂起查询的可能性。 这在具有大量分片(数百个)的集群上发生的机会更高,因为分布式查询为每个分片分配每个连接的线程。 例如,如果集群330分片正在处理30个并发分布式查询,则此问题可能再现。 此问题会影响从19.2开始的所有版本。 [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 的固定逻辑 `arrayEnumerateUniqRanked` 功能。 [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 解码符号表时修复段错误。 [\#6603](https://github.com/ClickHouse/ClickHouse/pull/6603) ([阿莫斯鸟](https://github.com/amosbird)) +- 在固定不相关的异常转换 `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 删除描述中的额外引用 `system.settings` 桌子 [\#6696](https://github.com/ClickHouse/ClickHouse/issues/6696) [\#6699](https://github.com/ClickHouse/ClickHouse/pull/6699) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免可能的死锁 `TRUNCATE` 复制的表。 [\#6695](https://github.com/ClickHouse/ClickHouse/pull/6695) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复读取排序键的顺序。 [\#6189](https://github.com/ClickHouse/ClickHouse/pull/6189) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复 `ALTER TABLE ... UPDATE` 查询表 `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([阿利沙平](https://github.com/alesapin)) +- 修复错误打开 [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) (自19.4.0)。 当我们不查询任何列时,在对MergeTree表的分布式表的查询中复制 (`SELECT 1`). [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([阿利沙平](https://github.com/alesapin)) +- 在有符号类型的整数划分为无符号类型的固定溢出。 这种行为与C或C++语言(整数升级规则)完全相同,这可能令人惊讶。 请注意,当将大型有符号数字划分为大型无符号数字或反之亦然时,溢出仍然是可能的(但这种情况不太常见)。 所有服务器版本都存在此问题。 [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214) [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 限制最大睡眠时间限制时 `max_execution_speed` 或 `max_execution_speed_bytes` 已设置。 修正错误,如 `Estimated query execution time (inf seconds) is too long`. [\#5547](https://github.com/ClickHouse/ClickHouse/issues/5547) [\#6232](https://github.com/ClickHouse/ClickHouse/pull/6232) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 关于使用固定的问题 `MATERIALIZED` 列和别名 `MaterializedView`. [\#448](https://github.com/ClickHouse/ClickHouse/issues/448) [\#3484](https://github.com/ClickHouse/ClickHouse/issues/3484) [\#3450](https://github.com/ClickHouse/ClickHouse/issues/3450) [\#2878](https://github.com/ClickHouse/ClickHouse/issues/2878) [\#2285](https://github.com/ClickHouse/ClickHouse/issues/2285) [\#3796](https://github.com/ClickHouse/ClickHouse/pull/3796) ([阿莫斯鸟](https://github.com/amosbird)) [\#6316](https://github.com/ClickHouse/ClickHouse/pull/6316) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `FormatFactory` 未实现为处理器的输入流的行为。 [\#6495](https://github.com/ClickHouse/ClickHouse/pull/6495) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 固定错字。 [\#6631](https://github.com/ClickHouse/ClickHouse/pull/6631) ([Alex Ryndin](https://github.com/alexryndin)) +- 错字在错误消息(是-\>是)。 [\#6839](https://github.com/ClickHouse/ClickHouse/pull/6839) ([Denis Zhuravlev](https://github.com/den-crane)) +- 修复了从字符串中解析列列表时的错误,如果类型包含逗号(这个问题与 `File`, `URL`, `HDFS` 储存) [\#6217](https://github.com/ClickHouse/ClickHouse/issues/6217). [\#6209](https://github.com/ClickHouse/ClickHouse/pull/6209) ([dimarub2000](https://github.com/dimarub2000)) + +#### 安全修复 {#security-fix} + +- 此版本还包含19.13和19.11的所有错误安全修复。 +- 修复了由于SQL解析器中的堆栈溢出而导致服务器崩溃的制造查询的可能性。 修复了合并和分布式表,实例化视图和涉及子查询的行级安全性条件中堆栈溢出的可能性。 [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 改进 {#improvement-3} + +- 三元逻辑的正确实现 `AND/OR`. [\#6048](https://github.com/ClickHouse/ClickHouse/pull/6048) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 现在,值和行与过期的TTL将被删除后 `OPTIMIZE ... FINAL` query from old parts without TTL infos or with outdated TTL infos, e.g. after `ALTER ... MODIFY TTL` 查询。 添加查询 `SYSTEM STOP/START TTL MERGES` 要禁止/允许使用TTL分配合并,并在所有合并中过滤过期值。 [\#6274](https://github.com/ClickHouse/ClickHouse/pull/6274) ([安东\*波波夫](https://github.com/CurtizJ)) +- 可以更改ClickHouse历史文件的位置为客户端使用 `CLICKHOUSE_HISTORY_FILE` env [\#6840](https://github.com/ClickHouse/ClickHouse/pull/6840) ([filimonov](https://github.com/filimonov)) +- 删除 `dry_run` 从标志 `InterpreterSelectQuery`. … [\#6375](https://github.com/ClickHouse/ClickHouse/pull/6375) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 碌莽禄Support: `ASOF JOIN` 与 `ON` 科。 [\#6211](https://github.com/ClickHouse/ClickHouse/pull/6211) ([Artem Zuikov](https://github.com/4ertus2)) +- 更好地支持用于突变和复制的跳过索引。 支持 `MATERIALIZE/CLEAR INDEX ... IN PARTITION` 查询。 `UPDATE x = x` 重新计算使用列的所有索引 `x`. [\#5053](https://github.com/ClickHouse/ClickHouse/pull/5053) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 允许 `ATTACH` 实时视图(例如,在服务器启动时),无论 `allow_experimental_live_view` 设置。 [\#6754](https://github.com/ClickHouse/ClickHouse/pull/6754) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 对于由查询探查器收集的堆栈跟踪,不包括由查询探查器本身生成的堆栈帧。 [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在表函数 `values`, `file`, `url`, `hdfs` 支持别名列。 [\#6255](https://github.com/ClickHouse/ClickHouse/pull/6255) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 如果抛出异常 `config.d` 文件没有相应的根元素作为配置文件。 [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) +- 在异常消息中打印额外的信息 `no space left on device`. [\#6182](https://github.com/ClickHouse/ClickHouse/issues/6182), [\#6252](https://github.com/ClickHouse/ClickHouse/issues/6252) [\#6352](https://github.com/ClickHouse/ClickHouse/pull/6352) ([tavplubix](https://github.com/tavplubix)) +- 当确定一个碎片 `Distributed` 要被读取查询复盖的表(用于 `optimize_skip_unused_shards` =1)ClickHouse现在从两个检查条件 `prewhere` 和 `where` select语句的子句。 [\#6521](https://github.com/ClickHouse/ClickHouse/pull/6521) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 已启用 `SIMDJSON` 对于没有AVX2,但与SSE4.2和PCLMUL指令集的机器。 [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285) [\#6320](https://github.com/ClickHouse/ClickHouse/pull/6320) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- ClickHouse可以在文件系统上工作,而无需 `O_DIRECT` 支持(如ZFS和BtrFS),无需额外的调整。 [\#4449](https://github.com/ClickHouse/ClickHouse/issues/4449) [\#6730](https://github.com/ClickHouse/ClickHouse/pull/6730) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 支持最终子查询的下推谓词。 [\#6120](https://github.com/ClickHouse/ClickHouse/pull/6120) ([TCeason](https://github.com/TCeason)) [\#6162](https://github.com/ClickHouse/ClickHouse/pull/6162) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更好 `JOIN ON` 密钥提取 [\#6131](https://github.com/ClickHouse/ClickHouse/pull/6131) ([Artem Zuikov](https://github.com/4ertus2)) +- Upated `SIMDJSON`. [\#6285](https://github.com/ClickHouse/ClickHouse/issues/6285). [\#6306](https://github.com/ClickHouse/ClickHouse/pull/6306) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 优化最小列的选择 `SELECT count()` 查询。 [\#6344](https://github.com/ClickHouse/ClickHouse/pull/6344) ([阿莫斯鸟](https://github.com/amosbird)) +- 已添加 `strict` 参数in `windowFunnel()`. 当 `strict` 设置,该 `windowFunnel()` 仅对唯一值应用条件。 [\#6548](https://github.com/ClickHouse/ClickHouse/pull/6548) ([achimbab](https://github.com/achimbab)) +- 更安全的界面 `mysqlxx::Pool`. [\#6150](https://github.com/ClickHouse/ClickHouse/pull/6150) ([avasiliev](https://github.com/avasiliev)) +- 执行时选项行大小 `--help` 选项现在与终端大小对应。 [\#6590](https://github.com/ClickHouse/ClickHouse/pull/6590) ([dimarub2000](https://github.com/dimarub2000)) +- 禁用 “read in order” 优化无键的聚合。 [\#6599](https://github.com/ClickHouse/ClickHouse/pull/6599) ([安东\*波波夫](https://github.com/CurtizJ)) +- Http状态代码 `INCORRECT_DATA` 和 `TYPE_MISMATCH` 错误代码已从默认值更改 `500 Internal Server Error` 到 `400 Bad Request`. [\#6271](https://github.com/ClickHouse/ClickHouse/pull/6271) ([亚历山大\*罗丹](https://github.com/a-rodin)) +- 从移动连接对象 `ExpressionAction` 成 `AnalyzedJoin`. `ExpressionAnalyzer` 和 `ExpressionAction` 不知道 `Join` 不再上课了 它的逻辑被隐藏 `AnalyzedJoin` 伊菲斯 [\#6801](https://github.com/ClickHouse/ClickHouse/pull/6801) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复了当其中一个分片是localhost但查询通过网络连接发送时可能出现的分布式查询死锁。 [\#6759](https://github.com/ClickHouse/ClickHouse/pull/6759) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更改多个表的语义 `RENAME` 为了避免可能的死锁。 [\#6757](https://github.com/ClickHouse/ClickHouse/issues/6757). [\#6756](https://github.com/ClickHouse/ClickHouse/pull/6756) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 重写MySQL兼容性服务器以防止在内存中加载完整的数据包有效负载。 每个连接的内存消耗减少到大约 `2 * DBMS_DEFAULT_BUFFER_SIZE` (读/写缓冲区)。 [\#5811](https://github.com/ClickHouse/ClickHouse/pull/5811) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 将AST别名解释逻辑移出不必了解查询语义的解析器。 [\#6108](https://github.com/ClickHouse/ClickHouse/pull/6108) ([Artem Zuikov](https://github.com/4ertus2)) +- 稍微更安全的解析 `NamesAndTypesList`. [\#6408](https://github.com/ClickHouse/ClickHouse/issues/6408). [\#6410](https://github.com/ClickHouse/ClickHouse/pull/6410) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- `clickhouse-copier`:允许使用 `where_condition` 从配置 `partition_key` 查询中用于检查分区存在的别名(以前它仅用于读取数据查询)。 [\#6577](https://github.com/ClickHouse/ClickHouse/pull/6577) ([proller](https://github.com/proller)) +- 在添加可选的消息参数 `throwIf`. ([\#5772](https://github.com/ClickHouse/ClickHouse/issues/5772)) [\#6329](https://github.com/ClickHouse/ClickHouse/pull/6329) ([Vdimir](https://github.com/Vdimir)) +- 在发送插入数据时,服务器异常也正在客户端中处理。 [\#5891](https://github.com/ClickHouse/ClickHouse/issues/5891) [\#6711](https://github.com/ClickHouse/ClickHouse/pull/6711) ([dimarub2000](https://github.com/dimarub2000)) +- 添加了指标 `DistributedFilesToInsert` 这显示了文件系统中选择通过分布式表发送到远程服务器的文件总数。 该数字在所有分片之间相加。 [\#6600](https://github.com/ClickHouse/ClickHouse/pull/6600) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 将大部分连接准备逻辑从 `ExpressionAction/ExpressionAnalyzer` 到 `AnalyzedJoin`. [\#6785](https://github.com/ClickHouse/ClickHouse/pull/6785) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复曾 [警告](https://clickhouse-test-reports.s3.yandex.net/6399/c1c1d1daa98e199e620766f1bd06a5921050a00d/functional_stateful_tests_(thread).html) ‘lock-order-inversion’. [\#6740](https://github.com/ClickHouse/ClickHouse/pull/6740) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 关于缺乏Linux功能的更好的信息消息。 记录致命错误 “fatal” 水平,这将使它更容易找到 `system.text_log`. [\#6441](https://github.com/ClickHouse/ClickHouse/pull/6441) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 当启用转储临时数据到磁盘,以限制内存使用期间 `GROUP BY`, `ORDER BY`,它没有检查可用磁盘空间。 修复程序添加新设置 `min_free_disk_space`,当可用磁盘空间小于阈值时,查询将停止并抛出 `ErrorCodes::NOT_ENOUGH_SPACE`. [\#6678](https://github.com/ClickHouse/ClickHouse/pull/6678) ([徐伟清](https://github.com/weiqxu)) [\#6691](https://github.com/ClickHouse/ClickHouse/pull/6691) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 通过线程删除递归rwlock。 这是没有意义的,因为线程在查询之间重用。 `SELECT` 查询可以在一个线程中获取锁,从另一个线程持有锁并从第一个线程退出。 在同一时间,第一个线程可以通过重复使用 `DROP` 查询。 这将导致虚假 “Attempt to acquire exclusive lock recursively” 消息 [\#6771](https://github.com/ClickHouse/ClickHouse/pull/6771) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 斯普利特 `ExpressionAnalyzer.appendJoin()`. 准备一个地方 `ExpressionAnalyzer` 为 `MergeJoin`. [\#6524](https://github.com/ClickHouse/ClickHouse/pull/6524) ([Artem Zuikov](https://github.com/4ertus2)) +- 已添加 `mysql_native_password` mysql兼容性服务器的身份验证插件。 [\#6194](https://github.com/ClickHouse/ClickHouse/pull/6194) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 更少的数量 `clock_gettime` 调用;调试/发布之间的固定ABI兼容性 `Allocator` (微不足道的问题)。 [\#6197](https://github.com/ClickHouse/ClickHouse/pull/6197) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 移动 `collectUsedColumns` 从 `ExpressionAnalyzer` 到 `SyntaxAnalyzer`. `SyntaxAnalyzer` 赂眉露\>\> `required_source_columns` 现在本身。 [\#6416](https://github.com/ClickHouse/ClickHouse/pull/6416) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加设置 `joined_subquery_requires_alias` 要求子选择和表函数的别名 `FROM` that more than one table is present (i.e. queries with JOINs). [\#6733](https://github.com/ClickHouse/ClickHouse/pull/6733) ([Artem Zuikov](https://github.com/4ertus2)) +- 提取物 `GetAggregatesVisitor` 从类 `ExpressionAnalyzer`. [\#6458](https://github.com/ClickHouse/ClickHouse/pull/6458) ([Artem Zuikov](https://github.com/4ertus2)) +- `system.query_log`:更改数据类型 `type` 列到 `Enum`. [\#6265](https://github.com/ClickHouse/ClickHouse/pull/6265) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 静态链接 `sha256_password` 身份验证插件。 [\#6512](https://github.com/ClickHouse/ClickHouse/pull/6512) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 避免对设置的额外依赖 `compile` 去工作 在以前的版本中,用户可能会得到如下错误 `cannot open crti.o`, `unable to find library -lc` 等。 [\#6309](https://github.com/ClickHouse/ClickHouse/pull/6309) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 对可能来自恶意副本的输入进行更多验证。 [\#6303](https://github.com/ClickHouse/ClickHouse/pull/6303) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在 `clickhouse-obfuscator` 文件是可用的 `clickhouse-client` 包。 在以前的版本中,它可以作为 `clickhouse obfuscator` (带空格)。 [\#5816](https://github.com/ClickHouse/ClickHouse/issues/5816) [\#6609](https://github.com/ClickHouse/ClickHouse/pull/6609) ([dimarub2000](https://github.com/dimarub2000)) +- 当我们至少有两个查询以不同的顺序读取至少两个表,另一个查询对其中一个表执行DDL操作时,修复了死锁。 修复了另一个非常罕见的死锁。 [\#6764](https://github.com/ClickHouse/ClickHouse/pull/6764) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 已添加 `os_thread_ids` 列到 `system.processes` 和 `system.query_log` 为了更好的调试可能性。 [\#6763](https://github.com/ClickHouse/ClickHouse/pull/6763) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 当发生PHP mysqlnd扩展错误的解决方法 `sha256_password` 用作默认身份验证插件(在描述 [\#6031](https://github.com/ClickHouse/ClickHouse/issues/6031)). [\#6113](https://github.com/ClickHouse/ClickHouse/pull/6113) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 删除不需要的地方与更改为空列。 [\#6693](https://github.com/ClickHouse/ClickHouse/pull/6693) ([Artem Zuikov](https://github.com/4ertus2)) +- 设置默认值 `queue_max_wait_ms` 为零,因为当前值(五秒)是没有意义的。 在极少数情况下,此设置有任何用途。 添加设置 `replace_running_query_max_wait_ms`, `kafka_max_wait_ms` 和 `connection_pool_max_wait_ms` 用于消除歧义。 [\#6692](https://github.com/ClickHouse/ClickHouse/pull/6692) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 提取物 `SelectQueryExpressionAnalyzer` 从 `ExpressionAnalyzer`. 保留最后一个用于非select查询。 [\#6499](https://github.com/ClickHouse/ClickHouse/pull/6499) ([Artem Zuikov](https://github.com/4ertus2)) +- 删除重复输入和输出格式。 [\#6239](https://github.com/ClickHouse/ClickHouse/pull/6239) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 允许用户复盖 `poll_interval` 和 `idle_connection_timeout` 连接设置。 [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- `MergeTree` 现在有一个额外的选项 `ttl_only_drop_parts` (默认情况下禁用),以避免部分的部分修剪,以便在部分中的所有行都过期时完全删除它们。 [\#6191](https://github.com/ClickHouse/ClickHouse/pull/6191) ([塞尔吉\*弗拉季金](https://github.com/svladykin)) +- 类型检查set索引函数。 如果函数类型错误,则引发异常。 这修复了模糊测试与UBSan。 [\#6511](https://github.com/ClickHouse/ClickHouse/pull/6511) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) + +#### 性能改进 {#performance-improvement-2} + +- 优化查询 `ORDER BY expressions` 条款,其中 `expressions` 有重合前缀与排序键 `MergeTree` 桌子 此优化由以下方式控制 `optimize_read_in_order` 设置。 [\#6054](https://github.com/ClickHouse/ClickHouse/pull/6054) [\#6629](https://github.com/ClickHouse/ClickHouse/pull/6629) ([安东\*波波夫](https://github.com/CurtizJ)) +- 允许在零件装载和拆卸期间使用多个螺纹。 [\#6372](https://github.com/ClickHouse/ClickHouse/issues/6372) [\#6074](https://github.com/ClickHouse/ClickHouse/issues/6074) [\#6438](https://github.com/ClickHouse/ClickHouse/pull/6438) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 实现了更新聚合函数状态的批处理变体。 这可能导致性能优势。 [\#6435](https://github.com/ClickHouse/ClickHouse/pull/6435) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用 `FastOps` 函数库 `exp`, `log`, `sigmoid`, `tanh`. FastOps是迈克尔\*帕拉欣(Yandex的首席技术官)的快速矢量数学库。 改进的性能 `exp` 和 `log` 功能超过6倍。 功能 `exp` 和 `log` 从 `Float32` 参数将返回 `Float32` (在以前的版本中,他们总是返回 `Float64`). 现在 `exp(nan)` 可能会回来 `inf`. 的结果 `exp` 和 `log` 函数可能不是最接近机器可代表的数字到真正的答案。 [\#6254](https://github.com/ClickHouse/ClickHouse/pull/6254) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov))使用Danila Kutenin变体使fastops工作 [\#6317](https://github.com/ClickHouse/ClickHouse/pull/6317) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 禁用连续密钥优化 `UInt8/16`. [\#6298](https://github.com/ClickHouse/ClickHouse/pull/6298) [\#6701](https://github.com/ClickHouse/ClickHouse/pull/6701) ([akuzm](https://github.com/akuzm)) +- 改进的性能 `simdjson` 库通过摆脱动态分配 `ParsedJson::Iterator`. [\#6479](https://github.com/ClickHouse/ClickHouse/pull/6479) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 预故障页分配内存时 `mmap()`. [\#6667](https://github.com/ClickHouse/ClickHouse/pull/6667) ([akuzm](https://github.com/akuzm)) +- 修复性能错误 `Decimal` 比较。 [\#6380](https://github.com/ClickHouse/ClickHouse/pull/6380) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-4} + +- 删除编译器(运行时模板实例化),因为我们已经赢得了它的性能。 [\#6646](https://github.com/ClickHouse/ClickHouse/pull/6646) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了性能测试,以显示gcc-9以更隔离的方式性能下降。 [\#6302](https://github.com/ClickHouse/ClickHouse/pull/6302) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加表功能 `numbers_mt`,这是多线程版本 `numbers`. 使用哈希函数更新性能测试。 [\#6554](https://github.com/ClickHouse/ClickHouse/pull/6554) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 比较模式 `clickhouse-benchmark` [\#6220](https://github.com/ClickHouse/ClickHouse/issues/6220) [\#6343](https://github.com/ClickHouse/ClickHouse/pull/6343) ([dimarub2000](https://github.com/dimarub2000)) +- 尽最大努力打印堆栈痕迹。 还添加了 `SIGPROF` 作为调试信号,打印正在运行的线程的堆栈跟踪。 [\#6529](https://github.com/ClickHouse/ClickHouse/pull/6529) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 每个函数都在自己的文件中,第10部分。 [\#6321](https://github.com/ClickHouse/ClickHouse/pull/6321) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除两倍常量 `TABLE_IS_READ_ONLY`. [\#6566](https://github.com/ClickHouse/ClickHouse/pull/6566) ([filimonov](https://github.com/filimonov)) +- 格式化更改 `StringHashMap` PR [\#5417](https://github.com/ClickHouse/ClickHouse/issues/5417). [\#6700](https://github.com/ClickHouse/ClickHouse/pull/6700) ([akuzm](https://github.com/akuzm)) +- 更好的联接创建子查询 `ExpressionAnalyzer`. [\#6824](https://github.com/ClickHouse/ClickHouse/pull/6824) ([Artem Zuikov](https://github.com/4ertus2)) +- 删除冗余条件(由PVS Studio找到)。 [\#6775](https://github.com/ClickHouse/ClickHouse/pull/6775) ([akuzm](https://github.com/akuzm)) +- 分隔散列表接口 `ReverseIndex`. [\#6672](https://github.com/ClickHouse/ClickHouse/pull/6672) ([akuzm](https://github.com/akuzm)) +- 重构设置。 [\#6689](https://github.com/ClickHouse/ClickHouse/pull/6689) ([阿利沙平](https://github.com/alesapin)) +- 添加注释 `set` 索引函数。 [\#6319](https://github.com/ClickHouse/ClickHouse/pull/6319) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 在Linux上的调试版本中增加OOM分数。 [\#6152](https://github.com/ClickHouse/ClickHouse/pull/6152) ([akuzm](https://github.com/akuzm)) +- HDFS HA现在在调试版本中工作。 [\#6650](https://github.com/ClickHouse/ClickHouse/pull/6650) ([徐伟清](https://github.com/weiqxu)) +- 添加了一个测试 `transform_query_for_external_database`. [\#6388](https://github.com/ClickHouse/ClickHouse/pull/6388) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 为Kafka表添加多个实例化视图的测试。 [\#6509](https://github.com/ClickHouse/ClickHouse/pull/6509) ([伊万](https://github.com/abyss7)) +- 制定一个更好的构建计划。 [\#6500](https://github.com/ClickHouse/ClickHouse/pull/6500) ([伊万](https://github.com/abyss7)) +- 固定 `test_external_dictionaries` 集成的情况下,它是在非root用户下执行。 [\#6507](https://github.com/ClickHouse/ClickHouse/pull/6507) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 当写入的数据包的总大小超过 `DBMS_DEFAULT_BUFFER_SIZE`. [\#6204](https://github.com/ClickHouse/ClickHouse/pull/6204) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 增加了一个测试 `RENAME` 表竞争条件 [\#6752](https://github.com/ClickHouse/ClickHouse/pull/6752) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免在设置数据竞赛 `KILL QUERY`. [\#6753](https://github.com/ClickHouse/ClickHouse/pull/6753) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 通过缓存字典添加处理错误的集成测试。 [\#6755](https://github.com/ClickHouse/ClickHouse/pull/6755) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 在Mac OS上禁用ELF对象文件的解析,因为这是没有意义的。 [\#6578](https://github.com/ClickHouse/ClickHouse/pull/6578) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 尝试使更新日志生成器更好。 [\#6327](https://github.com/ClickHouse/ClickHouse/pull/6327) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加 `-Wshadow` 切换到海湾合作委员会。 [\#6325](https://github.com/ClickHouse/ClickHouse/pull/6325) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +- 删除过时的代码 `mimalloc` 支持。 [\#6715](https://github.com/ClickHouse/ClickHouse/pull/6715) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- `zlib-ng` 确定x86功能并将此信息保存到全局变量。 这是在defalteInit调用中完成的,它可以由不同的线程同时进行。 为了避免多线程写入,请在库启动时执行此操作。 [\#6141](https://github.com/ClickHouse/ClickHouse/pull/6141) ([akuzm](https://github.com/akuzm)) +- 回归测试一个错误,在连接这是固定的 [\#5192](https://github.com/ClickHouse/ClickHouse/issues/5192). [\#6147](https://github.com/ClickHouse/ClickHouse/pull/6147) ([Bakhtiyor Ruziev](https://github.com/theruziev)) +- 修正MSan报告。 [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复扑ttl测试。 [\#6782](https://github.com/ClickHouse/ClickHouse/pull/6782) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修正了虚假数据竞赛 `MergeTreeDataPart::is_frozen` 场。 [\#6583](https://github.com/ClickHouse/ClickHouse/pull/6583) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了模糊测试中的超时。 在以前的版本中,它设法在查询中找到虚假挂断 `SELECT * FROM numbers_mt(gccMurmurHash(''))`. [\#6582](https://github.com/ClickHouse/ClickHouse/pull/6582) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加了调试检查 `static_cast` 列。 [\#6581](https://github.com/ClickHouse/ClickHouse/pull/6581) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在官方RPM软件包中支持Oracle Linux。 [\#6356](https://github.com/ClickHouse/ClickHouse/issues/6356) [\#6585](https://github.com/ClickHouse/ClickHouse/pull/6585) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从更改json perftests `once` 到 `loop` 类型。 [\#6536](https://github.com/ClickHouse/ClickHouse/pull/6536) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- `odbc-bridge.cpp` 定义 `main()` 所以它不应该被包括在 `clickhouse-lib`. [\#6538](https://github.com/ClickHouse/ClickHouse/pull/6538) ([Origej Desh](https://github.com/orivej)) +- 测试碰撞 `FULL|RIGHT JOIN` 右表的键中有空值。 [\#6362](https://github.com/ClickHouse/ClickHouse/pull/6362) ([Artem Zuikov](https://github.com/4ertus2)) +- 为了以防万一,增加了对别名扩展限制的测试。 [\#6442](https://github.com/ClickHouse/ClickHouse/pull/6442) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从切换 `boost::filesystem` 到 `std::filesystem` 在适当的情况下。 [\#6253](https://github.com/ClickHouse/ClickHouse/pull/6253) [\#6385](https://github.com/ClickHouse/ClickHouse/pull/6385) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 将RPM包添加到网站。 [\#6251](https://github.com/ClickHouse/ClickHouse/pull/6251) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 为固定添加测试 `Unknown identifier` 例外 `IN` 科。 [\#6708](https://github.com/ClickHouse/ClickHouse/pull/6708) ([Artem Zuikov](https://github.com/4ertus2)) +- 简化操作 `shared_ptr_helper` 因为人们面临困难理解它。 [\#6675](https://github.com/ClickHouse/ClickHouse/pull/6675) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了固定大猩猩和DoubleDelta编解码器的性能测试。 [\#6179](https://github.com/ClickHouse/ClickHouse/pull/6179) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 拆分集成测试 `test_dictionaries` 分成四个单独的测试。 [\#6776](https://github.com/ClickHouse/ClickHouse/pull/6776) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复PVS-Studio中的警告 `PipelineExecutor`. [\#6777](https://github.com/ClickHouse/ClickHouse/pull/6777) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 允许使用 `library` 与ASan字典源. [\#6482](https://github.com/ClickHouse/ClickHouse/pull/6482) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了从Pr列表生成更新日志的选项。 [\#6350](https://github.com/ClickHouse/ClickHouse/pull/6350) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 锁定 `TinyLog` 读取时存储。 [\#6226](https://github.com/ClickHouse/ClickHouse/pull/6226) ([akuzm](https://github.com/akuzm)) +- 检查CI中损坏的符号链接。 [\#6634](https://github.com/ClickHouse/ClickHouse/pull/6634) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加超时时间 “stack overflow” 测试,因为它可能需要很长的时间在调试构建。 [\#6637](https://github.com/ClickHouse/ClickHouse/pull/6637) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加了双空格检查。 [\#6643](https://github.com/ClickHouse/ClickHouse/pull/6643) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `new/delete` 使用消毒剂构建时的内存跟踪。 跟踪不清楚。 它只防止测试中的内存限制异常。 [\#6450](https://github.com/ClickHouse/ClickHouse/pull/6450) ([Artem Zuikov](https://github.com/4ertus2)) +- 启用链接时对未定义符号的检查。 [\#6453](https://github.com/ClickHouse/ClickHouse/pull/6453) ([伊万](https://github.com/abyss7)) +- 避免重建 `hyperscan` 每天 [\#6307](https://github.com/ClickHouse/ClickHouse/pull/6307) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在固定的瑞银报告 `ProtobufWriter`. [\#6163](https://github.com/ClickHouse/ClickHouse/pull/6163) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 不要允许将查询探查器与消毒器一起使用,因为它不兼容。 [\#6769](https://github.com/ClickHouse/ClickHouse/pull/6769) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加测试计时器失败后重新加载字典。 [\#6114](https://github.com/ClickHouse/ClickHouse/pull/6114) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复不一致 `PipelineExecutor::prepareProcessor` 参数类型。 [\#6494](https://github.com/ClickHouse/ClickHouse/pull/6494) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 添加了对坏Uri的测试。 [\#6493](https://github.com/ClickHouse/ClickHouse/pull/6493) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了更多的检查 `CAST` 功能。 这应该获得更多关于模糊测试中分割故障的信息。 [\#6346](https://github.com/ClickHouse/ClickHouse/pull/6346) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 已添加 `gcc-9` 支持 `docker/builder` 本地生成映像的容器。 [\#6333](https://github.com/ClickHouse/ClickHouse/pull/6333) ([格列布\*诺维科夫](https://github.com/NanoBjorn)) +- 测试主键 `LowCardinality(String)`. [\#5044](https://github.com/ClickHouse/ClickHouse/issues/5044) [\#6219](https://github.com/ClickHouse/ClickHouse/pull/6219) ([dimarub2000](https://github.com/dimarub2000)) +- 修复了缓慢堆栈跟踪打印影响的测试。 [\#6315](https://github.com/ClickHouse/ClickHouse/pull/6315) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加崩溃的测试用例 `groupUniqArray` 固定在 [\#6029](https://github.com/ClickHouse/ClickHouse/pull/6029). [\#4402](https://github.com/ClickHouse/ClickHouse/issues/4402) [\#6129](https://github.com/ClickHouse/ClickHouse/pull/6129) ([akuzm](https://github.com/akuzm)) +- 固定指数突变测试。 [\#6645](https://github.com/ClickHouse/ClickHouse/pull/6645) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 在性能测试中,不要读取我们没有运行的查询的查询日志。 [\#6427](https://github.com/ClickHouse/ClickHouse/pull/6427) ([akuzm](https://github.com/akuzm)) +- 现在可以使用任何低基数类型创建实例化视图,而不考虑关于可疑低基数类型的设置。 [\#6428](https://github.com/ClickHouse/ClickHouse/pull/6428) ([Olga Khvostikova](https://github.com/stavrolia)) +- 更新的测试 `send_logs_level` 设置。 [\#6207](https://github.com/ClickHouse/ClickHouse/pull/6207) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复gcc-8.2下的构建。 [\#6196](https://github.com/ClickHouse/ClickHouse/pull/6196) ([Max Akhmedov](https://github.com/zlobober)) +- 修复构建与内部libc++。 [\#6724](https://github.com/ClickHouse/ClickHouse/pull/6724) ([伊万](https://github.com/abyss7)) +- 修复共享构建 `rdkafka` 图书馆 [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([伊万](https://github.com/abyss7)) +- 修复Mac OS构建(不完整)。 [\#6390](https://github.com/ClickHouse/ClickHouse/pull/6390) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) [\#6429](https://github.com/ClickHouse/ClickHouse/pull/6429) ([alex-zaitsev](https://github.com/alex-zaitsev)) +- 修复 “splitted” 碌莽禄.拢. [\#6618](https://github.com/ClickHouse/ClickHouse/pull/6618) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 其他构建修复: [\#6186](https://github.com/ClickHouse/ClickHouse/pull/6186) ([阿莫斯鸟](https://github.com/amosbird)) [\#6486](https://github.com/ClickHouse/ClickHouse/pull/6486) [\#6348](https://github.com/ClickHouse/ClickHouse/pull/6348) ([vxider](https://github.com/Vxider)) [\#6744](https://github.com/ClickHouse/ClickHouse/pull/6744) ([伊万](https://github.com/abyss7)) [\#6016](https://github.com/ClickHouse/ClickHouse/pull/6016) [\#6421](https://github.com/ClickHouse/ClickHouse/pull/6421) [\#6491](https://github.com/ClickHouse/ClickHouse/pull/6491) ([proller](https://github.com/proller)) + +#### 向后不兼容的更改 {#backward-incompatible-change-3} + +- 删除很少使用的表函数 `catBoostPool` 和存储 `CatBoostPool`. 如果您使用了此表功能,请写电子邮件至 `clickhouse-feedback@yandex-team.com`. 请注意,CatBoost集成仍然存在,并将受到支持。 [\#6279](https://github.com/ClickHouse/ClickHouse/pull/6279) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 禁用 `ANY RIGHT JOIN` 和 `ANY FULL JOIN` 默认情况下。 设置 `any_join_distinct_right_table_keys` 设置启用它们。 [\#5126](https://github.com/ClickHouse/ClickHouse/issues/5126) [\#6351](https://github.com/ClickHouse/ClickHouse/pull/6351) ([Artem Zuikov](https://github.com/4ertus2)) + +## ClickHouse释放19.13 {#clickhouse-release-19-13} + +### ClickHouse释放19.13.6.51,2019-10-02 {#clickhouse-release-19-13-6-51-2019-10-02} + +#### 错误修复 {#bug-fix-9} + +- 此版本还包含19.11.12.69的所有错误修复。 + +### ClickHouse释放19.13.5.44,2019-09-20 {#clickhouse-release-19-13-5-44-2019-09-20} + +#### 错误修复 {#bug-fix-10} + +- 此版本还包含19.14.6.12的所有错误修复。 +- 修复了在执行时表的可能不一致的状态 `DROP` 在zookeeper无法访问时查询复制的表。 [\#6045](https://github.com/ClickHouse/ClickHouse/issues/6045) [\#6413](https://github.com/ClickHouse/ClickHouse/pull/6413) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 修复了StorageMerge中的数据竞赛 [\#6717](https://github.com/ClickHouse/ClickHouse/pull/6717) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复查询分析器中引入的错误,从而导致套接字无休止的recv。 [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) ([阿利沙平](https://github.com/alesapin)) +- 修复执行时过多的CPU使用率 `JSONExtractRaw` 函数在一个布尔值。 [\#6208](https://github.com/ClickHouse/ClickHouse/pull/6208) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 在推送到实例化视图时修复回归。 [\#6415](https://github.com/ClickHouse/ClickHouse/pull/6415) ([伊万](https://github.com/abyss7)) +- 表函数 `url` 该漏洞是否允许攻击者在请求中注入任意HTTP头。 这个问题被发现 [尼基塔\*季霍米罗夫](https://github.com/NSTikhomirov). [\#6466](https://github.com/ClickHouse/ClickHouse/pull/6466) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复无用 `AST` 检查设置索引。 [\#6510](https://github.com/ClickHouse/ClickHouse/issues/6510) [\#6651](https://github.com/ClickHouse/ClickHouse/pull/6651) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 修正了解析 `AggregateFunction` 查询中嵌入的值。 [\#6575](https://github.com/ClickHouse/ClickHouse/issues/6575) [\#6773](https://github.com/ClickHouse/ClickHouse/pull/6773) ([余志昌](https://github.com/yuzhichang)) +- 修正了错误的行为 `trim` 功能家庭。 [\#6647](https://github.com/ClickHouse/ClickHouse/pull/6647) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### ClickHouse释放19.13.4.32,2019-09-10 {#clickhouse-release-19-13-4-32-2019-09-10} + +#### 错误修复 {#bug-fix-11} + +- 此版本还包含19.11.9.52和19.11.10.54的所有错误安全修复。 +- 固定数据竞赛 `system.parts` 表和 `ALTER` 查询。 [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245) [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了从带有sample和prewhere的空分布式表中读取流中发生的不匹配标题。 [\#6167](https://github.com/ClickHouse/ClickHouse/issues/6167) ([钱丽祥](https://github.com/fancyqlx)) [\#6823](https://github.com/ClickHouse/ClickHouse/pull/6823) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复使用时的崩溃 `IN` 子句带有一个元组的子查询。 [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) +- 修复具有相同列名的大小写 `GLOBAL JOIN ON` 科。 [\#6181](https://github.com/ClickHouse/ClickHouse/pull/6181) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复强制转换类型时的崩溃 `Decimal` 这不支持它。 抛出异常代替。 [\#6297](https://github.com/ClickHouse/ClickHouse/pull/6297) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复了崩溃 `extractAll()` 功能。 [\#6644](https://github.com/ClickHouse/ClickHouse/pull/6644) ([Artem Zuikov](https://github.com/4ertus2)) +- 查询转换 `MySQL`, `ODBC`, `JDBC` 表函数现在正常工作 `SELECT WHERE` 具有多个查询 `AND` 表达式。 [\#6381](https://github.com/ClickHouse/ClickHouse/issues/6381) [\#6676](https://github.com/ClickHouse/ClickHouse/pull/6676) ([dimarub2000](https://github.com/dimarub2000)) +- 添加了以前的声明检查MySQL8集成。 [\#6569](https://github.com/ClickHouse/ClickHouse/pull/6569) ([拉斐尔\*大卫\*蒂诺科](https://github.com/rafaeldtinoco)) + +#### 安全修复 {#security-fix-1} + +- 修复解压缩阶段编解码器中的两个漏洞(恶意用户可以制造压缩数据,导致解压缩中的缓冲区溢出)。 [\#6670](https://github.com/ClickHouse/ClickHouse/pull/6670) ([Artem Zuikov](https://github.com/4ertus2)) + +### 碌莽禄,拢,010-68520682\戮漏鹿芦,酶,虏卤赂拢,110102005602 {#clickhouse-release-19-13-3-26-2019-08-22} + +#### 错误修复 {#bug-fix-12} + +- 修复 `ALTER TABLE ... UPDATE` 查询表 `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([阿利沙平](https://github.com/alesapin)) +- 在使用IN子句时修复带有元组的子查询。 [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) +- 修复了一个问题,即如果一个陈旧的副本变为活动的,它可能仍然有被删除分区的数据部分。 [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +- 修正了解析CSV的问题 [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) +- 修正了系统中的数据竞赛。部件表和ALTER查询。 这修复 [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了可能导致内存损坏的突变中的错误代码。 修复了读取地址的段错误 `0x14c0` 这可能发生由于并发 `DROP TABLE` 和 `SELECT` 从 `system.parts` 或 `system.parts_columns`. 在准备突变查询时修复了竞争条件。 修复了由于 `OPTIMIZE` 复制的表和并发修改操作,如改变。 [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复后可能的数据丢失 `ALTER DELETE` 查询表跳过索引。 [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) + +#### 安全修复 {#security-fix-2} + +- 如果攻击者具有对ZooKeeper的写入访问权限,并且能够从ClickHouse运行的网络中运行可用的自定义服务器,则可以创建自定义构建的恶意服务器,该服务器将充当ClickHouse副本并将其注册到ZooKeeper中。 当另一个副本从恶意副本中获取数据部分时,它可以强制clickhouse-server写入文件系统上的任意路径。 由Yandex的信息安全团队Eldar Zaitov发现。 [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-13-2-19-2019-08-14} + +#### 新功能 {#new-feature-5} + +- 查询级别上的采样探查器。 [示例](https://gist.github.com/alexey-milovidov/92758583dd41c24c360fdb8d6a4da194). [\#4247](https://github.com/ClickHouse/ClickHouse/issues/4247) ([laplab](https://github.com/laplab)) [\#6124](https://github.com/ClickHouse/ClickHouse/pull/6124) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) [\#6250](https://github.com/ClickHouse/ClickHouse/pull/6250) [\#6283](https://github.com/ClickHouse/ClickHouse/pull/6283) [\#6386](https://github.com/ClickHouse/ClickHouse/pull/6386) +- 允许指定列的列表 `COLUMNS('regexp')` 表达的工作原理就像一个更复杂的变体 `*` 星号 [\#5951](https://github.com/ClickHouse/ClickHouse/pull/5951) ([mfridental](https://github.com/mfridental)), ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- `CREATE TABLE AS table_function()` 现在是可能的 [\#6057](https://github.com/ClickHouse/ClickHouse/pull/6057) ([dimarub2000](https://github.com/dimarub2000)) +- 亚当优化随机梯度下降默认情况下使用 `stochasticLinearRegression()` 和 `stochasticLogisticRegression()` 聚合函数,因为它显示了良好的质量,几乎没有任何调整。 [\#6000](https://github.com/ClickHouse/ClickHouse/pull/6000) ([Quid37](https://github.com/Quid37)) +- Added functions for working with the сustom week number [\#5212](https://github.com/ClickHouse/ClickHouse/pull/5212) ([杨小姐](https://github.com/andyyzh)) +- `RENAME` 查询现在适用于所有存储。 [\#5953](https://github.com/ClickHouse/ClickHouse/pull/5953) ([伊万](https://github.com/abyss7)) +- 现在客户端通过设置从服务器接收任何所需级别的日志 `send_logs_level` 无论服务器设置中指定的日志级别如何。 [\#5964](https://github.com/ClickHouse/ClickHouse/pull/5964) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) + +#### 向后不兼容的更改 {#backward-incompatible-change-4} + +- 设置 `input_format_defaults_for_omitted_fields` 默认情况下启用。 分布式表中的插入需要此设置在集群上相同(您需要在滚动更新之前设置它)。 它允许计算复杂的默认表达式的省略字段 `JSONEachRow` 和 `CSV*` 格式。 它应该是预期的行为,但可能导致可忽略不计的性能差异。 [\#6043](https://github.com/ClickHouse/ClickHouse/pull/6043) ([Artem Zuikov](https://github.com/4ertus2)), [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) + +#### 实验特点 {#experimental-features} + +- 新的查询处理管道。 使用 `experimental_use_processors=1` 选项来启用它。 用你自己的麻烦。 [\#4914](https://github.com/ClickHouse/ClickHouse/pull/4914) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +#### 错误修复 {#bug-fix-13} + +- Kafka集成已在此版本中修复。 +- 固定 `DoubleDelta` 编码 `Int64` 对于大 `DoubleDelta` 值,改进 `DoubleDelta` 编码为随机数据 `Int32`. [\#5998](https://github.com/ClickHouse/ClickHouse/pull/5998) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 固定的高估 `max_rows_to_read` 如果设置 `merge_tree_uniform_read_distribution` 置为0。 [\#6019](https://github.com/ClickHouse/ClickHouse/pull/6019) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 改进 {#improvement-4} + +- 如果抛出异常 `config.d` 文件没有相应的根元素作为配置文件 [\#6123](https://github.com/ClickHouse/ClickHouse/pull/6123) ([dimarub2000](https://github.com/dimarub2000)) + +#### 性能改进 {#performance-improvement-3} + +- 优化 `count()`. 现在它使用最小的列(如果可能的话)。 [\#6028](https://github.com/ClickHouse/ClickHouse/pull/6028) ([阿莫斯鸟](https://github.com/amosbird)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-5} + +- 在性能测试中报告内存使用情况。 [\#5899](https://github.com/ClickHouse/ClickHouse/pull/5899) ([akuzm](https://github.com/akuzm)) +- 修复构建与外部 `libcxx` [\#6010](https://github.com/ClickHouse/ClickHouse/pull/6010) ([伊万](https://github.com/abyss7)) +- 修复共享构建 `rdkafka` 图书馆 [\#6101](https://github.com/ClickHouse/ClickHouse/pull/6101) ([伊万](https://github.com/abyss7)) + +## ClickHouse释放19.11 {#clickhouse-release-19-11} + +### ClickHouse释放19.11.13.74,2019-11-01 {#clickhouse-release-19-11-13-74-2019-11-01} + +#### 错误修复 {#bug-fix-14} + +- 修复了罕见的崩溃 `ALTER MODIFY COLUMN` 当合并/更改部分之一为空(0行)时,垂直合并。 [\#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([阿利沙平](https://github.com/alesapin)) +- 手动更新 `SIMDJSON`. 这修复了stderr文件可能泛滥的错误json诊断消息。 [\#7548](https://github.com/ClickHouse/ClickHouse/pull/7548) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 修正错误 `mrk` 突变的文件扩展名 ([阿利沙平](https://github.com/alesapin)) + +### ClickHouse释放19.11.12.69,2019-10-02 {#clickhouse-release-19-11-12-69-2019-10-02} + +#### 错误修复 {#bug-fix-15} + +- 修复了大型表上复杂键的索引分析的性能下降。 这修复 [\#6924](https://github.com/ClickHouse/ClickHouse/issues/6924). [\#7075](https://github.com/ClickHouse/ClickHouse/pull/7075) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用分布式引擎在表中发送数据时避免罕见的SIGSEGV (`Failed to send batch: file with index XXXXX is absent`). [\#7032](https://github.com/ClickHouse/ClickHouse/pull/7032) ([Azat Khuzhin](https://github.com/azat)) +- 修复 `Unknown identifier` 有多个连接。 这修复 [\#5254](https://github.com/ClickHouse/ClickHouse/issues/5254). [\#7022](https://github.com/ClickHouse/ClickHouse/pull/7022) ([Artem Zuikov](https://github.com/4ertus2)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-11-11-57-2019-09-13} + +- 修复从Kafka空主题中选择时导致段错误的逻辑错误。 [\#6902](https://github.com/ClickHouse/ClickHouse/issues/6902) [\#6909](https://github.com/ClickHouse/ClickHouse/pull/6909) ([伊万](https://github.com/abyss7)) +- 修复功能 `АrrayEnumerateUniqRanked` 在参数中使用空数组。 [\#6928](https://github.com/ClickHouse/ClickHouse/pull/6928) ([proller](https://github.com/proller)) + +### ClickHouse释放19.11.10.54,2019-09-10 {#clickhouse-release-19-11-10-54-2019-09-10} + +#### 错误修复 {#bug-fix-16} + +- 手动存储Kafka消息的偏移量,以便能够一次性为所有分区提交它们。 修复潜在的重复 “one consumer - many partitions” 场景。 [\#6872](https://github.com/ClickHouse/ClickHouse/pull/6872) ([伊万](https://github.com/abyss7)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682 {#clickhouse-release-19-11-9-52-2019-09-6} + +- 改进缓存字典中的错误处理。 [\#6737](https://github.com/ClickHouse/ClickHouse/pull/6737) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 在功能固定错误 `arrayEnumerateUniqRanked`. [\#6779](https://github.com/ClickHouse/ClickHouse/pull/6779) ([proller](https://github.com/proller)) +- 修复 `JSONExtract` 功能,同时提取 `Tuple` 从JSON。 [\#6718](https://github.com/ClickHouse/ClickHouse/pull/6718) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复后可能的数据丢失 `ALTER DELETE` 查询表跳过索引。 [\#6224](https://github.com/ClickHouse/ClickHouse/issues/6224) [\#6282](https://github.com/ClickHouse/ClickHouse/pull/6282) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 固定性能测试。 [\#6392](https://github.com/ClickHouse/ClickHouse/pull/6392) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 实木复合地板:修复读取布尔列。 [\#6579](https://github.com/ClickHouse/ClickHouse/pull/6579) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了错误的行为 `nullIf` 常量参数的函数。 [\#6518](https://github.com/ClickHouse/ClickHouse/pull/6518) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) [\#6580](https://github.com/ClickHouse/ClickHouse/pull/6580) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复kafka服务器正常重启时的消息重复问题。 [\#6597](https://github.com/ClickHouse/ClickHouse/pull/6597) ([伊万](https://github.com/abyss7)) +- 修正了一个问题,当长 `ALTER UPDATE` 或 `ALTER DELETE` 可能会阻止常规合并运行。 如果没有足够的可用线程,则防止突变执行。 [\#6502](https://github.com/ClickHouse/ClickHouse/issues/6502) [\#6617](https://github.com/ClickHouse/ClickHouse/pull/6617) ([tavplubix](https://github.com/tavplubix)) +- 修正了处理错误 “timezone” 在服务器配置文件中。 [\#6709](https://github.com/ClickHouse/ClickHouse/pull/6709) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复卡夫卡测试。 [\#6805](https://github.com/ClickHouse/ClickHouse/pull/6805) ([伊万](https://github.com/abyss7)) + +#### 安全修复 {#security-fix-3} + +- 如果攻击者具有对ZooKeeper的写入访问权限,并且能够从运行ClickHouse的网络中运行可用的自定义服务器,则可以创建自定义构建的恶意服务器,该服务器将充当ClickHouse副本并将其注册到ZooKeeper中。 当另一个副本从恶意副本中获取数据部分时,它可以强制clickhouse-server写入文件系统上的任意路径。 由Yandex的信息安全团队Eldar Zaitov发现。 [\#6247](https://github.com/ClickHouse/ClickHouse/pull/6247) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-11-8-46-2019-08-22} + +#### 错误修复 {#bug-fix-17} + +- 修复 `ALTER TABLE ... UPDATE` 查询表 `enable_mixed_granularity_parts=1`. [\#6543](https://github.com/ClickHouse/ClickHouse/pull/6543) ([阿利沙平](https://github.com/alesapin)) +- 在使用IN子句时修复带有元组的子查询。 [\#6125](https://github.com/ClickHouse/ClickHouse/issues/6125) [\#6550](https://github.com/ClickHouse/ClickHouse/pull/6550) ([tavplubix](https://github.com/tavplubix)) +- 修复了一个问题,即如果一个陈旧的副本变为活动的,它可能仍然有被删除分区的数据部分。 [\#6522](https://github.com/ClickHouse/ClickHouse/issues/6522) [\#6523](https://github.com/ClickHouse/ClickHouse/pull/6523) ([tavplubix](https://github.com/tavplubix)) +- 修正了解析CSV的问题 [\#6426](https://github.com/ClickHouse/ClickHouse/issues/6426) [\#6559](https://github.com/ClickHouse/ClickHouse/pull/6559) ([tavplubix](https://github.com/tavplubix)) +- 修正了系统中的数据竞赛。部件表和ALTER查询。 这修复 [\#6245](https://github.com/ClickHouse/ClickHouse/issues/6245). [\#6513](https://github.com/ClickHouse/ClickHouse/pull/6513) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了可能导致内存损坏的突变中的错误代码。 修复了读取地址的段错误 `0x14c0` 这可能发生由于并发 `DROP TABLE` 和 `SELECT` 从 `system.parts` 或 `system.parts_columns`. 在准备突变查询时修复了竞争条件。 修复了由于 `OPTIMIZE` 复制的表和并发修改操作,如改变。 [\#6514](https://github.com/ClickHouse/ClickHouse/pull/6514) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-11-7-40-2019-08-14} + +#### 错误修复 {#bug-fix-18} + +- Kafka集成已在此版本中修复。 +- 使用时修复段错误 `arrayReduce` 对于不断的参数。 [\#6326](https://github.com/ClickHouse/ClickHouse/pull/6326) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定 `toFloat()` 单调性。 [\#6374](https://github.com/ClickHouse/ClickHouse/pull/6374) ([dimarub2000](https://github.com/dimarub2000)) +- 修复已启用的段错误 `optimize_skip_unused_shards` 还丢失了分片钥匙 [\#6384](https://github.com/ClickHouse/ClickHouse/pull/6384) ([CurtizJ](https://github.com/CurtizJ)) +- 的固定逻辑 `arrayEnumerateUniqRanked` 功能。 [\#6423](https://github.com/ClickHouse/ClickHouse/pull/6423) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从MySQL处理程序中删除了额外的详细日志记录。 [\#6389](https://github.com/ClickHouse/ClickHouse/pull/6389) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复错误的行为和可能的段错误 `topK` 和 `topKWeighted` 聚合函数。 [\#6404](https://github.com/ClickHouse/ClickHouse/pull/6404) ([CurtizJ](https://github.com/CurtizJ)) +- 不要公开虚拟列 `system.columns` 桌子 这是向后兼容所必需的。 [\#6406](https://github.com/ClickHouse/ClickHouse/pull/6406) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复复杂键缓存字典中字符串字段的内存分配错误。 [\#6447](https://github.com/ClickHouse/ClickHouse/pull/6447) ([阿利沙平](https://github.com/alesapin)) +- 修复创建新副本时启用自适应粒度的错误 `Replicated*MergeTree` 桌子 [\#6452](https://github.com/ClickHouse/ClickHouse/pull/6452) ([阿利沙平](https://github.com/alesapin)) +- 阅读Kafka消息时修复无限循环。 [\#6354](https://github.com/ClickHouse/ClickHouse/pull/6354) ([abyss7](https://github.com/abyss7)) +- 修复了由于SQL解析器中的堆栈溢出和堆栈溢出的可能性而导致服务器崩溃的编造查询的可能性 `Merge` 和 `Distributed` 表 [\#6433](https://github.com/ClickHouse/ClickHouse/pull/6433) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在小序列固定大猩猩编码错误。 [\#6444](https://github.com/ClickHouse/ClickHouse/pull/6444) ([Enmk](https://github.com/Enmk)) + +#### 改进 {#improvement-5} + +- 允许用户复盖 `poll_interval` 和 `idle_connection_timeout` 连接设置。 [\#6230](https://github.com/ClickHouse/ClickHouse/pull/6230) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### 碌莽禄,拢,010-68520682\戮漏鹿芦,酶,虏卤赂拢,110102003042 {#clickhouse-release-19-11-5-28-2019-08-05} + +#### 错误修复 {#bug-fix-19} + +- 修复了服务器超载时挂起查询的可能性。 [\#6301](https://github.com/ClickHouse/ClickHouse/pull/6301) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复yandexConsistentHash函数中的FPE。 这修复 [\#6304](https://github.com/ClickHouse/ClickHouse/issues/6304). [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正错误的转换 `LowCardinality` 类型 `AggregateFunctionFactory`. 这修复 [\#6257](https://github.com/ClickHouse/ClickHouse/issues/6257). [\#6281](https://github.com/ClickHouse/ClickHouse/pull/6281) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复解析 `bool` 从设置 `true` 和 `false` 配置文件中的字符串。 [\#6278](https://github.com/ClickHouse/ClickHouse/pull/6278) ([阿利沙平](https://github.com/alesapin)) +- 修复查询中不兼容的流头的罕见错误 `Distributed` 桌子结束 `MergeTree` 表时的一部分 `WHERE` 移动到 `PREWHERE`. [\#6236](https://github.com/ClickHouse/ClickHouse/pull/6236) ([阿利沙平](https://github.com/alesapin)) +- 在有符号类型的整数划分为无符号类型的固定溢出。 这修复 [\#6214](https://github.com/ClickHouse/ClickHouse/issues/6214). [\#6233](https://github.com/ClickHouse/ClickHouse/pull/6233) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 向后不兼容的更改 {#backward-incompatible-change-5} + +- `Kafka` 还是坏了 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-11-4-24-2019-08-01} + +#### 错误修复 {#bug-fix-20} + +- 修复编写具有自适应粒度的二级索引标记的错误。 [\#6126](https://github.com/ClickHouse/ClickHouse/pull/6126) ([阿利沙平](https://github.com/alesapin)) +- 修复 `WITH ROLLUP` 和 `WITH CUBE` 修饰符 `GROUP BY` 具有两级聚合。 [\#6225](https://github.com/ClickHouse/ClickHouse/pull/6225) ([安东\*波波夫](https://github.com/CurtizJ)) +- 固定挂起 `JSONExtractRaw` 功能。 固定 [\#6195](https://github.com/ClickHouse/ClickHouse/issues/6195) [\#6198](https://github.com/ClickHouse/ClickHouse/pull/6198) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复ExternalLoader::reloadOutdated()中的段错误。 [\#6082](https://github.com/ClickHouse/ClickHouse/pull/6082) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复了服务器可能关闭侦听套接字但不关闭并继续提供剩余查询的情况。 您最终可能会有两个正在运行的clickhouse服务器进程。 有时,服务器可能会返回错误 `bad_function_call` 对于剩余的查询。 [\#6231](https://github.com/ClickHouse/ClickHouse/pull/6231) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了通过ODBC,MySQL,ClickHouse和HTTP初始加载外部字典的更新字段无用和不正确的条件。 这修复 [\#6069](https://github.com/ClickHouse/ClickHouse/issues/6069) [\#6083](https://github.com/ClickHouse/ClickHouse/pull/6083) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在固定不相关的异常转换 `LowCardinality(Nullable)` to not-Nullable column in case if it doesn't contain Nulls (e.g. in query like `SELECT CAST(CAST('Hello' AS LowCardinality(Nullable(String))) AS String)`. [\#6094](https://github.com/ClickHouse/ClickHouse/issues/6094) [\#6119](https://github.com/ClickHouse/ClickHouse/pull/6119) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复非确定性结果 “uniq” 在极少数情况下聚合函数。 该错误存在于所有ClickHouse版本。 [\#6058](https://github.com/ClickHouse/ClickHouse/pull/6058) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- Segfault当我们在函数上设置了一点点太高的CIDR `IPv6CIDRToRange`. [\#6068](https://github.com/ClickHouse/ClickHouse/pull/6068) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 修复了服务器从许多不同上下文中抛出许多异常时的小内存泄漏。 [\#6144](https://github.com/ClickHouse/ClickHouse/pull/6144) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复消费者在订阅之前暂停而之后未恢复的情况。 [\#6075](https://github.com/ClickHouse/ClickHouse/pull/6075) ([伊万](https://github.com/abyss7))请注意,卡夫卡在这个版本中被打破。 +- 从以前的读取操作中清除Kafka数据缓冲区,并且完成了错误操作 [\#6026](https://github.com/ClickHouse/ClickHouse/pull/6026) ([尼古拉](https://github.com/bopohaa))请注意,卡夫卡在这个版本中被打破。 +- 由于 `StorageMergeTree::background_task_handle` 在初始化 `startup()` 该 `MergeTreeBlockOutputStream::write()` 可以尝试在初始化之前使用它。 只需检查它是否被初始化。 [\#6080](https://github.com/ClickHouse/ClickHouse/pull/6080) ([伊万](https://github.com/abyss7)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-6} + +- 新增官方 `rpm` 包. [\#5740](https://github.com/ClickHouse/ClickHouse/pull/5740) ([proller](https://github.com/proller)) ([阿利沙平](https://github.com/alesapin)) +- 添加构建能力 `.rpm` 和 `.tgz` 包 `packager` 脚本 [\#5769](https://github.com/ClickHouse/ClickHouse/pull/5769) ([阿利沙平](https://github.com/alesapin)) +- 修复了 “Arcadia” 构建系统。 [\#6223](https://github.com/ClickHouse/ClickHouse/pull/6223) ([proller](https://github.com/proller)) + +#### 向后不兼容的更改 {#backward-incompatible-change-6} + +- `Kafka` 在这个版本中被打破。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-11-3-11-2019-07-18} + +#### 新功能 {#new-feature-6} + +- 增加了对准备好的语句的支持。 [\#5331](https://github.com/ClickHouse/ClickHouse/pull/5331/) ([亚历山大](https://github.com/sanych73)) [\#5630](https://github.com/ClickHouse/ClickHouse/pull/5630) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- `DoubleDelta` 和 `Gorilla` 列编解ecs [\#5600](https://github.com/ClickHouse/ClickHouse/pull/5600) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 已添加 `os_thread_priority` 设置,允许控制 “nice” 操作系统用于调整动态调度优先级的查询处理线程的值。 它需要 `CAP_SYS_NICE` 能力的工作。 这实现了 [\#5858](https://github.com/ClickHouse/ClickHouse/issues/5858) [\#5909](https://github.com/ClickHouse/ClickHouse/pull/5909) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 执行 `_topic`, `_offset`, `_key` kafka引擎的列 [\#5382](https://github.com/ClickHouse/ClickHouse/pull/5382) ([伊万](https://github.com/abyss7))请注意,卡夫卡在这个版本中被打破。 +- 添加聚合函数组合 `-Resample` [\#5590](https://github.com/ClickHouse/ClickHouse/pull/5590) ([hcz](https://github.com/hczhcz)) +- 聚合函数 `groupArrayMovingSum(win_size)(x)` 和 `groupArrayMovingAvg(win_size)(x)`,计算移动和/平均有或没有窗口大小限制。 [\#5595](https://github.com/ClickHouse/ClickHouse/pull/5595) ([inv2004](https://github.com/inv2004)) +- 添加synonim `arrayFlatten` \<-\> `flatten` [\#5764](https://github.com/ClickHouse/ClickHouse/pull/5764) ([hcz](https://github.com/hczhcz)) +- Intergate H3功能 `geoToH3` 从尤伯杯. [\#4724](https://github.com/ClickHouse/ClickHouse/pull/4724) ([Remen Ivan](https://github.com/BHYCHIK)) [\#5805](https://github.com/ClickHouse/ClickHouse/pull/5805) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 错误修复 {#bug-fix-21} + +- 使用异步更新实现DNS缓存。 单独的线程解析所有主机并更新dns缓存(设置 `dns_cache_update_period`). 当主机的ip频繁更改时,它应该有所帮助。 [\#5857](https://github.com/ClickHouse/ClickHouse/pull/5857) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复段错误 `Delta` 影响值小于32位大小的列的编解ec。 该错误导致随机内存损坏。 [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([阿利沙平](https://github.com/alesapin)) +- 修复ttl合并中的段错误与块中的非物理列。 [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复在检查部分罕见的错误 `LowCardinality` 列。 前情提要 `checkDataPart` 总是失败的一部分 `LowCardinality` 列。 [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([阿利沙平](https://github.com/alesapin)) +- 避免在服务器线程池已满时挂起连接。 它是从连接重要 `remote` 当连接超时时,表函数或连接到没有副本的分片。 这修复 [\#5878](https://github.com/ClickHouse/ClickHouse/issues/5878) [\#5881](https://github.com/ClickHouse/ClickHouse/pull/5881) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 支持常量参数 `evalMLModel` 功能。 这修复 [\#5817](https://github.com/ClickHouse/ClickHouse/issues/5817) [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了ClickHouse将默认时区确定为 `UCT` 而不是 `UTC`. 这修复 [\#5804](https://github.com/ClickHouse/ClickHouse/issues/5804). [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定缓冲区下溢 `visitParamExtractRaw`. 这修复 [\#5901](https://github.com/ClickHouse/ClickHouse/issues/5901) [\#5902](https://github.com/ClickHouse/ClickHouse/pull/5902) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在分发 `DROP/ALTER/TRUNCATE/OPTIMIZE ON CLUSTER` 查询将直接在leader副本上执行。 [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([阿利沙平](https://github.com/alesapin)) +- 修复 `coalesce` 为 `ColumnConst` 与 `ColumnNullable` +相关变化. [\#5755](https://github.com/ClickHouse/ClickHouse/pull/5755) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复 `ReadBufferFromKafkaConsumer` 所以它不断阅读新的消息后 `commit()` 即使它之前停滞不前 [\#5852](https://github.com/ClickHouse/ClickHouse/pull/5852) ([伊万](https://github.com/abyss7)) +- 修复 `FULL` 和 `RIGHT` 加入时加入结果 `Nullable` 键在右表. [\#5859](https://github.com/ClickHouse/ClickHouse/pull/5859) ([Artem Zuikov](https://github.com/4ertus2)) +- 可能修复低优先级查询的无限休眠。 [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复争用条件,这导致某些查询可能不会出现在query\_log后 `SYSTEM FLUSH LOGS` 查询。 [\#5456](https://github.com/ClickHouse/ClickHouse/issues/5456) [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([安东\*波波夫](https://github.com/CurtizJ)) +- 固定 `heap-use-after-free` 由手表引起的ClusterCopier中的警告尝试使用已经删除的复印机对象。 [\#5871](https://github.com/ClickHouse/ClickHouse/pull/5871) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复错误 `StringRef` 由一些实现返回的指针 `IColumn::deserializeAndInsertFromArena`. 这个错误只影响单元测试。 [\#5973](https://github.com/ClickHouse/ClickHouse/pull/5973) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 防止源数组和中间数组连接掩蔽相同名称列的列。 [\#5941](https://github.com/ClickHouse/ClickHouse/pull/5941) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复插入并选择查询MySQL引擎与MySQL样式标识符引用。 [\#5704](https://github.com/ClickHouse/ClickHouse/pull/5704) ([张冬](https://github.com/zhang2014)) +- 现在 `CHECK TABLE` 查询可以与MergeTree引擎系列一起使用。 它返回检查状态和消息,如果任何为每个部分(或文件在simplier引擎的情况下)。 此外,修复获取损坏部分的错误。 [\#5865](https://github.com/ClickHouse/ClickHouse/pull/5865) ([阿利沙平](https://github.com/alesapin)) +- 修复SPLIT\_SHARED\_LIBRARY运行时 [\#5793](https://github.com/ClickHouse/ClickHouse/pull/5793) ([Danila Kutenin](https://github.com/danlark1)) +- 固定时区初始化时 `/etc/localtime` 是一个相对的符号链接,如 `../usr/share/zoneinfo/Europe/Moscow` [\#5922](https://github.com/ClickHouse/ClickHouse/pull/5922) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- clickhouse复印机:修复使用-关机后免费 [\#5752](https://github.com/ClickHouse/ClickHouse/pull/5752) ([proller](https://github.com/proller)) +- 更新 `simdjson`. 修复了一些无效的零字节Json成功解析的问题。 [\#5938](https://github.com/ClickHouse/ClickHouse/pull/5938) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复系统日志的关机 [\#5802](https://github.com/ClickHouse/ClickHouse/pull/5802) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复当invalidate\_query中的条件取决于字典时挂起。 [\#6011](https://github.com/ClickHouse/ClickHouse/pull/6011) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) + +#### 改进 {#improvement-6} + +- 允许群集配置中的无法解析的地址。 它们将被视为不可用,并尝试在每次连接尝试时解决。 这对Kubernetes特别有用。 这修复 [\#5714](https://github.com/ClickHouse/ClickHouse/issues/5714) [\#5924](https://github.com/ClickHouse/ClickHouse/pull/5924) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 关闭空闲TCP连接(默认情况下为一小时超时)。 这对于每台服务器上具有多个分布式表的大型集群尤其重要,因为每台服务器都可能保留与其他服务器的连接池,并且在高峰查询并发之后,连接将停 这修复 [\#5879](https://github.com/ClickHouse/ClickHouse/issues/5879) [\#5880](https://github.com/ClickHouse/ClickHouse/pull/5880) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更好的质量 `topK` 功能。 如果新元素具有更大的权重,则更改了SavingSpace set行为以删除最后一个元素。 [\#5833](https://github.com/ClickHouse/ClickHouse/issues/5833) [\#5850](https://github.com/ClickHouse/ClickHouse/pull/5850) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 与域一起使用的URL函数现在可以在没有方案的情况下适用于不完整的Url [\#5725](https://github.com/ClickHouse/ClickHouse/pull/5725) ([阿利沙平](https://github.com/alesapin)) +- 校验和添加到 `system.parts_columns` 桌子 [\#5874](https://github.com/ClickHouse/ClickHouse/pull/5874) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 已添加 `Enum` 数据类型作为synonim `Enum8` 或 `Enum16`. [\#5886](https://github.com/ClickHouse/ClickHouse/pull/5886) ([dimarub2000](https://github.com/dimarub2000)) +- 全位转置变种 `T64` 编解ec 可能会导致更好的压缩 `zstd`. [\#5742](https://github.com/ClickHouse/ClickHouse/pull/5742) ([Artem Zuikov](https://github.com/4ertus2)) +- 条件 `startsWith` 函数现在可以使用主键。 这修复 [\#5310](https://github.com/ClickHouse/ClickHouse/issues/5310) 和 [\#5882](https://github.com/ClickHouse/ClickHouse/issues/5882) [\#5919](https://github.com/ClickHouse/ClickHouse/pull/5919) ([dimarub2000](https://github.com/dimarub2000)) +- 允许使用 `clickhouse-copier` 通过允许空数据库名称来实现具有交叉复制的群集拓扑。 [\#5745](https://github.com/ClickHouse/ClickHouse/pull/5745) ([纳瓦托洛梅](https://github.com/nvartolomei)) +- 使用 `UTC` 作为系统上的默认时区,而不 `tzdata` (e.g. bare Docker container). Before this patch, error message `Could not determine local time zone` 被打印并且服务器或客户机拒绝启动。 [\#5827](https://github.com/ClickHouse/ClickHouse/pull/5827) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 返回对函数中浮点参数的支持 `quantileTiming` 为了向后兼容性。 [\#5911](https://github.com/ClickHouse/ClickHouse/pull/5911) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在错误消息中显示哪个表缺少列。 [\#5768](https://github.com/ClickHouse/ClickHouse/pull/5768) ([伊万](https://github.com/abyss7)) +- 不允许不同用户使用相同的query\_id运行查询 [\#5430](https://github.com/ClickHouse/ClickHouse/pull/5430) ([proller](https://github.com/proller)) +- 用于向Graphite发送指标的更强大的代码。 它甚至可以在长时间的多重工作 `RENAME TABLE` 操作。 [\#5875](https://github.com/ClickHouse/ClickHouse/pull/5875) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 当ThreadPool无法计划执行任务时,将显示更多信息错误消息。 这修复 [\#5305](https://github.com/ClickHouse/ClickHouse/issues/5305) [\#5801](https://github.com/ClickHouse/ClickHouse/pull/5801) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 反转ngramSearch更直观 [\#5807](https://github.com/ClickHouse/ClickHouse/pull/5807) ([Danila Kutenin](https://github.com/danlark1)) +- 在HDFS引擎生成器中添加用户解析 [\#5946](https://github.com/ClickHouse/ClickHouse/pull/5946) ([akonyaev90](https://github.com/akonyaev90)) +- 更新默认值 `max_ast_elements parameter` [\#5933](https://github.com/ClickHouse/ClickHouse/pull/5933) ([Artem Konovalov](https://github.com/izebit)) +- 增加了过时设置的概念。 过时的设置 `allow_experimental_low_cardinality_type` 可以没有效果使用。 [0f15c01c6802f7ce1a1494c12c846be8c98944cd](https://github.com/ClickHouse/ClickHouse/commit/0f15c01c6802f7ce1a1494c12c846be8c98944cd) [Alexey Milovidov](https://github.com/alexey-milovidov) + +#### 性能改进 {#performance-improvement-4} + +- 增加从合并表中选择的流数量,以便更均匀地分布线程。 添加设置 `max_streams_multiplier_for_merge_tables`. 这修复 [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5915](https://github.com/ClickHouse/ClickHouse/pull/5915) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-7} + +- 为与不同版本的clickhouse的客户端-服务器交互添加向后兼容性测试。 [\#5868](https://github.com/ClickHouse/ClickHouse/pull/5868) ([阿利沙平](https://github.com/alesapin)) +- 每个提交和拉取请求中的测试复盖率信息。 [\#5896](https://github.com/ClickHouse/ClickHouse/pull/5896) ([阿利沙平](https://github.com/alesapin)) +- 与address sanitizer合作,支持我们的自定义分alloc (`Arena` 和 `ArenaWithFreeLists`)为了更好地调试 “use-after-free” 错误。 [\#5728](https://github.com/ClickHouse/ClickHouse/pull/5728) ([akuzm](https://github.com/akuzm)) +- 切换到 [LLVM libunwind实现](https://github.com/llvm-mirror/libunwind) 用于C++异常处理和堆栈跟踪打印 [\#4828](https://github.com/ClickHouse/ClickHouse/pull/4828) ([尼基塔\*拉普科夫](https://github.com/laplab)) +- 添加来自-Weverything的两个警告 [\#5923](https://github.com/ClickHouse/ClickHouse/pull/5923) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 允许用内存消毒剂建立ClickHouse。 [\#3949](https://github.com/ClickHouse/ClickHouse/pull/3949) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 关于固定的ubsan报告 `bitTest` 在模糊测试功能。 [\#5943](https://github.com/ClickHouse/ClickHouse/pull/5943) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- Docker:增加了初始化需要身份验证的ClickHouse实例的可能性。 [\#5727](https://github.com/ClickHouse/ClickHouse/pull/5727) ([科尔维亚科夫\*安德烈](https://github.com/shurshun)) +- 将librdkafka更新到版本1.1.0 [\#5872](https://github.com/ClickHouse/ClickHouse/pull/5872) ([伊万](https://github.com/abyss7)) +- 为集成测试添加全局超时,并在测试代码中禁用其中一些。 [\#5741](https://github.com/ClickHouse/ClickHouse/pull/5741) ([阿利沙平](https://github.com/alesapin)) +- 修复一些ThreadSanitizer故障。 [\#5854](https://github.com/ClickHouse/ClickHouse/pull/5854) ([akuzm](https://github.com/akuzm)) +- 该 `--no-undefined` 选项强制链接器在链接时检查所有外部名称是否存在。 在拆分构建模式下跟踪库之间的真实依赖关系非常有用。 [\#5855](https://github.com/ClickHouse/ClickHouse/pull/5855) ([伊万](https://github.com/abyss7)) +- 增加了性能测试 [\#5797](https://github.com/ClickHouse/ClickHouse/issues/5797) [\#5914](https://github.com/ClickHouse/ClickHouse/pull/5914) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 与gcc-7固定兼容性。 [\#5840](https://github.com/ClickHouse/ClickHouse/pull/5840) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了对gcc-9的支持。 这修复 [\#5717](https://github.com/ClickHouse/ClickHouse/issues/5717) [\#5774](https://github.com/ClickHouse/ClickHouse/pull/5774) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了libunwind链接不正确时的错误。 [\#5948](https://github.com/ClickHouse/ClickHouse/pull/5948) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了PVS-Studio发现的一些警告。 [\#5921](https://github.com/ClickHouse/ClickHouse/pull/5921) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了初始支持 `clang-tidy` 静态分析仪。 [\#5806](https://github.com/ClickHouse/ClickHouse/pull/5806) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 转换BSD/Linux endian宏( ‘be64toh’ 和 ‘htobe64’)到Mac OS x当量 [\#5785](https://github.com/ClickHouse/ClickHouse/pull/5785) ([傅辰](https://github.com/fredchenbj)) +- 改进的集成测试指南. [\#5796](https://github.com/ClickHouse/ClickHouse/pull/5796) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 在macosx+gcc9修复构建 [\#5822](https://github.com/ClickHouse/ClickHouse/pull/5822) ([filimonov](https://github.com/filimonov)) +- 修复难以识别的错字:aggreAGte-\>aggregate。 [\#5753](https://github.com/ClickHouse/ClickHouse/pull/5753) ([akuzm](https://github.com/akuzm)) +- 修复freebsd构建 [\#5760](https://github.com/ClickHouse/ClickHouse/pull/5760) ([proller](https://github.com/proller)) +- 添加链接到实验YouTube频道的网站 [\#5845](https://github.com/ClickHouse/ClickHouse/pull/5845) ([伊万\*布林科夫](https://github.com/blinkov)) +- CMake:为复盖率标志添加选项:WITH\_COVERAGE [\#5776](https://github.com/ClickHouse/ClickHouse/pull/5776) ([proller](https://github.com/proller)) +- 修复一些内联PODArray的初始大小。 [\#5787](https://github.com/ClickHouse/ClickHouse/pull/5787) ([akuzm](https://github.com/akuzm)) +- clickhouse服务器.postinst:修复centos6的操作系统检测 [\#5788](https://github.com/ClickHouse/ClickHouse/pull/5788) ([proller](https://github.com/proller)) +- 添加Arch linux软件包生成。 [\#5719](https://github.com/ClickHouse/ClickHouse/pull/5719) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 拆分常见/配置.h by libs(dbms) [\#5715](https://github.com/ClickHouse/ClickHouse/pull/5715) ([proller](https://github.com/proller)) +- 修复了 “Arcadia” 构建平台 [\#5795](https://github.com/ClickHouse/ClickHouse/pull/5795) ([proller](https://github.com/proller)) +- 修复了非常规构建(gcc9,没有子模块) [\#5792](https://github.com/ClickHouse/ClickHouse/pull/5792) ([proller](https://github.com/proller)) +- 在unalignedStore中需要显式类型,因为它被证明容易出现错误 [\#5791](https://github.com/ClickHouse/ClickHouse/pull/5791) ([akuzm](https://github.com/akuzm)) +- 修复MacOS构建 [\#5830](https://github.com/ClickHouse/ClickHouse/pull/5830) ([filimonov](https://github.com/filimonov)) +- 关于具有更大数据集的新JIT功能的性能测试,请参阅此处 [\#5263](https://github.com/ClickHouse/ClickHouse/issues/5263) [\#5887](https://github.com/ClickHouse/ClickHouse/pull/5887) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 在压力测试中运行有状态测试 [12693e568722f11e19859742f56428455501fd2a](https://github.com/ClickHouse/ClickHouse/commit/12693e568722f11e19859742f56428455501fd2a) ([阿利沙平](https://github.com/alesapin)) + +#### 向后不兼容的更改 {#backward-incompatible-change-7} + +- `Kafka` 在这个版本中被打破。 +- 启用 `adaptive_index_granularity` =10mb默认为新 `MergeTree` 桌子 如果您在19.11+版本上创建了新的MergeTree表,则不可能降级到19.6之前的版本。 [\#5628](https://github.com/ClickHouse/ClickHouse/pull/5628) ([阿利沙平](https://github.com/alesapin)) +- 删除了Yandex使用的过时无证嵌入式字典。梅特里卡 功能 `OSIn`, `SEIn`, `OSToRoot`, `SEToRoot`, `OSHierarchy`, `SEHierarchy` 不再可用。 如果您正在使用这些功能,请写电子邮件至clickhouse-feedback@yandex-team.com注:在最后时刻,我们决定保持这些功能一段时间。 [\#5780](https://github.com/ClickHouse/ClickHouse/pull/5780) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +## ClickHouse释放19.10 {#clickhouse-release-19-10} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-10-1-5-2019-07-12} + +#### 新功能 {#new-feature-7} + +- 添加新列编解ec: `T64`. 为(U)IntX/EnumX/Data(时间)/DecimalX列制作。 它应该适用于具有常量或小范围值的列。 编解码器本身允许放大或缩小数据类型而无需重新压缩。 [\#5557](https://github.com/ClickHouse/ClickHouse/pull/5557) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加数据库引擎 `MySQL` 允许查看远程MySQL服务器中的所有表 [\#5599](https://github.com/ClickHouse/ClickHouse/pull/5599) ([张冬](https://github.com/zhang2014)) +- `bitmapContains` 执行。 这是2倍的速度比 `bitmapHasAny` 如果第二个位图包含一个元素。 [\#5535](https://github.com/ClickHouse/ClickHouse/pull/5535) ([余志昌](https://github.com/yuzhichang)) +- 支持 `crc32` 功能(与MySQL或PHP中的行为完全相同)。 如果您需要散列函数,请不要使用它。 [\#5661](https://github.com/ClickHouse/ClickHouse/pull/5661) ([Remen Ivan](https://github.com/BHYCHIK)) +- 已实施 `SYSTEM START/STOP DISTRIBUTED SENDS` 查询控制异步插入到 `Distributed` 桌子 [\#4935](https://github.com/ClickHouse/ClickHouse/pull/4935) ([张冬](https://github.com/zhang2014)) + +#### 错误修复 {#bug-fix-22} + +- 在执行突变时忽略查询执行限制和合并限制的最大部件大小。 [\#5659](https://github.com/ClickHouse/ClickHouse/pull/5659) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复可能导致重复数据删除正常块(极其罕见)和插入重复块(更常见)的错误。 [\#5549](https://github.com/ClickHouse/ClickHouse/pull/5549) ([阿利沙平](https://github.com/alesapin)) +- 功能修复 `arrayEnumerateUniqRanked` 对于具有空数组的参数 [\#5559](https://github.com/ClickHouse/ClickHouse/pull/5559) ([proller](https://github.com/proller)) +- 不要在没有轮询任何消息的情况下订阅Kafka主题。 [\#5698](https://github.com/ClickHouse/ClickHouse/pull/5698) ([伊万](https://github.com/abyss7)) +- 使设置 `join_use_nulls` 对于不能在Nullable内的类型不起作用 [\#5700](https://github.com/ClickHouse/ClickHouse/pull/5700) ([Olga Khvostikova](https://github.com/stavrolia)) +- 固定 `Incorrect size of index granularity` 错误 [\#5720](https://github.com/ClickHouse/ClickHouse/pull/5720) ([coraxster](https://github.com/coraxster)) +- 修正浮动到十进制转换溢出 [\#5607](https://github.com/ClickHouse/ClickHouse/pull/5607) ([coraxster](https://github.com/coraxster)) +- 冲洗缓冲区时 `WriteBufferFromHDFS`的析构函数被调用。 这修复了写入 `HDFS`. [\#5684](https://github.com/ClickHouse/ClickHouse/pull/5684) ([新东鹏](https://github.com/eejoin)) + +#### 改进 {#improvement-7} + +- 对待空单元格 `CSV` 作为默认值时的设置 `input_format_defaults_for_omitted_fields` 被启用。 [\#5625](https://github.com/ClickHouse/ClickHouse/pull/5625) ([akuzm](https://github.com/akuzm)) +- 外部字典的非阻塞加载。 [\#5567](https://github.com/ClickHouse/ClickHouse/pull/5567) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 可以根据设置动态更改已建立的连接的网络超时。 [\#4558](https://github.com/ClickHouse/ClickHouse/pull/4558) ([Konstantin Podshumok](https://github.com/podshumok)) +- 使用 “public\_suffix\_list” 对于功能 `firstSignificantSubdomain`, `cutToFirstSignificantSubdomain`. 它使用一个完美的哈希表生成 `gperf` 从文件生成的列表:https://publicsuffix.org/list/public\_suffix\_list.dat(例如,现在我们认识到域 `ac.uk` 作为非显着)。 [\#5030](https://github.com/ClickHouse/ClickHouse/pull/5030) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 通过 `IPv6` 系统表中的数据类型;统一客户端信息列 `system.processes` 和 `system.query_log` [\#5640](https://github.com/ClickHouse/ClickHouse/pull/5640) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用会话与MySQL兼容性协议的连接。 \#5476 [\#5646](https://github.com/ClickHouse/ClickHouse/pull/5646) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 支持更多 `ALTER` 查询 `ON CLUSTER`. [\#5593](https://github.com/ClickHouse/ClickHouse/pull/5593) [\#5613](https://github.com/ClickHouse/ClickHouse/pull/5613) ([sundyli](https://github.com/sundy-li)) +- 碌莽禄Support: `` 第1节 `clickhouse-local` 配置文件。 [\#5540](https://github.com/ClickHouse/ClickHouse/pull/5540) ([proller](https://github.com/proller)) +- 允许运行查询 `remote` 表函数 `clickhouse-local` [\#5627](https://github.com/ClickHouse/ClickHouse/pull/5627) ([proller](https://github.com/proller)) + +#### 性能改进 {#performance-improvement-5} + +- 添加在MergeTree列末尾写最后标记的可能性。 它允许避免对超出表数据范围的键进行无用的读取。 仅当使用自适应索引粒度时才启用此功能。 [\#5624](https://github.com/ClickHouse/ClickHouse/pull/5624) ([阿利沙平](https://github.com/alesapin)) +- 通过减少非常慢的文件系统上的MergeTree表的性能 `stat` syscalls [\#5648](https://github.com/ClickHouse/ClickHouse/pull/5648) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了从版本19.6中引入的MergeTree表读取时的性能下降。 修复#5631。 [\#5633](https://github.com/ClickHouse/ClickHouse/pull/5633) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-8} + +- 已实施 `TestKeeper` 作为用于测试的ZooKeeper接口的实现 [\#5643](https://github.com/ClickHouse/ClickHouse/pull/5643) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) ([levushkin aleksej](https://github.com/alexey-milovidov)) +- 从现在起 `.sql` 测试可以通过服务器隔离,并行运行,并使用随机数据库。 它允许更快地运行它们,使用自定义服务器配置添加新的测试,并确保不同的测试不会相互影响。 [\#5554](https://github.com/ClickHouse/ClickHouse/pull/5554) ([伊万](https://github.com/abyss7)) +- 删除 `` 和 `` 从性能测试 [\#5672](https://github.com/ClickHouse/ClickHouse/pull/5672) ([Olga Khvostikova](https://github.com/stavrolia)) +- 固定 “select\_format” 性能测试 `Pretty` 格式 [\#5642](https://github.com/ClickHouse/ClickHouse/pull/5642) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +## ClickHouse释放19.9 {#clickhouse-release-19-9} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-9-3-31-2019-07-05} + +#### 错误修复 {#bug-fix-23} + +- 修复增量编解码器中的段错误,这会影响值小于32位大小的列。 该错误导致随机内存损坏。 [\#5786](https://github.com/ClickHouse/ClickHouse/pull/5786) ([阿利沙平](https://github.com/alesapin)) +- 修复在检查部分低心率列中罕见的错误。 [\#5832](https://github.com/ClickHouse/ClickHouse/pull/5832) ([阿利沙平](https://github.com/alesapin)) +- 修复ttl合并中的段错误与块中的非物理列。 [\#5819](https://github.com/ClickHouse/ClickHouse/pull/5819) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复低优先级查询的潜在无限休眠。 [\#5842](https://github.com/ClickHouse/ClickHouse/pull/5842) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复ClickHouse如何将默认时区确定为UCT而不是UTC。 [\#5828](https://github.com/ClickHouse/ClickHouse/pull/5828) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复在领导者副本之前的跟随者副本上执行分布式删除/更改/截断/优化集群查询的错误。 现在他们将直接在领导者副本上执行。 [\#5757](https://github.com/ClickHouse/ClickHouse/pull/5757) ([阿利沙平](https://github.com/alesapin)) +- 修复了系统刷新日志查询后某些查询可能不会立即出现在query\_log中的竞争条件。 [\#5685](https://github.com/ClickHouse/ClickHouse/pull/5685) ([安东\*波波夫](https://github.com/CurtizJ)) +- 增加了对常量参数的缺失支持 `evalMLModel` 功能。 [\#5820](https://github.com/ClickHouse/ClickHouse/pull/5820) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-9-2-4-2019-06-24} + +#### 新功能 {#new-feature-8} + +- 打印有关冷冻部件的信息 `system.parts` 桌子 [\#5471](https://github.com/ClickHouse/ClickHouse/pull/5471) ([proller](https://github.com/proller)) +- 在clickhouse上询问客户端密码-如果未在参数中设置,则在tty上启动客户端 [\#5092](https://github.com/ClickHouse/ClickHouse/pull/5092) ([proller](https://github.com/proller)) +- 执行 `dictGet` 和 `dictGetOrDefault` 十进制类型的函数。 [\#5394](https://github.com/ClickHouse/ClickHouse/pull/5394) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 改进 {#improvement-8} + +- Debian的初始化:添加服务停止超时 [\#5522](https://github.com/ClickHouse/ClickHouse/pull/5522) ([proller](https://github.com/proller)) +- 默认情况下添加禁止设置,以创建具有可疑类型的表格 [\#5448](https://github.com/ClickHouse/ClickHouse/pull/5448) ([Olga Khvostikova](https://github.com/stavrolia)) +- 当不用作函数中的状态时,回归函数返回模型权重 `evalMLMethod`. [\#5411](https://github.com/ClickHouse/ClickHouse/pull/5411) ([Quid37](https://github.com/Quid37)) +- 重命名和改进回归方法。 [\#5492](https://github.com/ClickHouse/ClickHouse/pull/5492) ([Quid37](https://github.com/Quid37)) +- 更清晰的字符串搜索界面。 [\#5586](https://github.com/ClickHouse/ClickHouse/pull/5586) ([Danila Kutenin](https://github.com/danlark1)) + +#### 错误修复 {#bug-fix-24} + +- 修复Kafka中潜在的数据丢失 [\#5445](https://github.com/ClickHouse/ClickHouse/pull/5445) ([伊万](https://github.com/abyss7)) +- 修复潜在的无限循环 `PrettySpace` 使用零列调用时的格式 [\#5560](https://github.com/ClickHouse/ClickHouse/pull/5560) ([Olga Khvostikova](https://github.com/stavrolia)) +- 修正了线性模型中的UInt32溢出错误。 允许对非常量模型参数的eval ML模型。 [\#5516](https://github.com/ClickHouse/ClickHouse/pull/5516) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- `ALTER TABLE ... DROP INDEX IF EXISTS ...` 如果提供的索引不存在,则不应引发异常 [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([格列布\*诺维科夫](https://github.com/NanoBjorn)) +- 修复段错误 `bitmapHasAny` 在标量子查询中 [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([余志昌](https://github.com/yuzhichang)) +- 修复了复制连接池不重试解析主机时的错误,即使删除了DNS缓存。 [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([阿利沙平](https://github.com/alesapin)) +- 固定 `ALTER ... MODIFY TTL` 在ReplicatedMergeTree上。 [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复插入到具体化列的分布式表中 [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) +- 修复截断联接存储时的错误alloc [\#5437](https://github.com/ClickHouse/ClickHouse/pull/5437) ([TCeason](https://github.com/TCeason)) +- 在最近版本的包tzdata中,现在有些文件是符号链接。 当前用于检测默认时区的机制被打破,并为某些时区提供错误的名称。 现在至少我们强制时区名称到TZ的内容,如果提供。 [\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([伊万](https://github.com/abyss7)) +- 修复一些极为罕见的情况下,MultiVolnitsky搜索器时,在总和恒定针至少16KB长。 该算法错过或复盖以前的结果,这可能导致错误的结果 `multiSearchAny`. [\#5588](https://github.com/ClickHouse/ClickHouse/pull/5588) ([Danila Kutenin](https://github.com/danlark1)) +- 修复ExternalData请求的设置无法使用ClickHouse设置时的问题。 此外,现在,设置 `date_time_input_format` 和 `low_cardinality_allow_in_native_format` 由于名称的歧义,无法使用(在外部数据中,它可以解释为表格式,在查询中它可以是一个设置)。 [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila Kutenin](https://github.com/danlark1)) +- 修复只从FS中删除部件而不从Zookeeper中删除部件时的错误。 [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([阿利沙平](https://github.com/alesapin)) +- 从MySQL协议中删除调试日志记录 [\#5478](https://github.com/ClickHouse/ClickHouse/pull/5478) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在DDL查询处理过程中跳过ZNONODE [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) +- 修复混合 `UNION ALL` 结果列类型。 有些情况下,结果列的数据和列类型不一致。 [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) +- 在错误的整数上抛出异常 `dictGetT` 功能,而不是崩溃。 [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复散列字典中错误的element\_count和load\_factor `system.dictionaries` 桌子 [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-9} + +- 固定构建没有 `Brotli` HTTP压缩支持 (`ENABLE_BROTLI=OFF` cmake变量)。 [\#5521](https://github.com/ClickHouse/ClickHouse/pull/5521) ([Anton Yuzhaninov](https://github.com/citrin)) +- 包括ro哮。h为ro哮/咆哮。h [\#5523](https://github.com/ClickHouse/ClickHouse/pull/5523) ([Origej Desh](https://github.com/orivej)) +- 修复超扫描中的gcc9警告(#行指令是邪恶的!) [\#5546](https://github.com/ClickHouse/ClickHouse/pull/5546) ([Danila Kutenin](https://github.com/danlark1)) +- 使用gcc-9编译时修复所有警告。 修复一些contrib问题。 修复gcc9ICE并将其提交给bugzilla。 [\#5498](https://github.com/ClickHouse/ClickHouse/pull/5498) ([Danila Kutenin](https://github.com/danlark1)) +- 与lld固定链接 [\#5477](https://github.com/ClickHouse/ClickHouse/pull/5477) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除字典中未使用的专业化 [\#5452](https://github.com/ClickHouse/ClickHouse/pull/5452) ([Artem Zuikov](https://github.com/4ertus2)) +- 针对不同类型的文件进行格式化和解析表的改进性能测试 [\#5497](https://github.com/ClickHouse/ClickHouse/pull/5497) ([Olga Khvostikova](https://github.com/stavrolia)) +- 修复并行测试运行 [\#5506](https://github.com/ClickHouse/ClickHouse/pull/5506) ([proller](https://github.com/proller)) +- Docker:使用clickhouse-test中的configs [\#5531](https://github.com/ClickHouse/ClickHouse/pull/5531) ([proller](https://github.com/proller)) +- 修复编译为FreeBSD [\#5447](https://github.com/ClickHouse/ClickHouse/pull/5447) ([proller](https://github.com/proller)) +- 升级提升到1.70 [\#5570](https://github.com/ClickHouse/ClickHouse/pull/5570) ([proller](https://github.com/proller)) +- 修复构建clickhouse作为子模块 [\#5574](https://github.com/ClickHouse/ClickHouse/pull/5574) ([proller](https://github.com/proller)) +- 改进JSONExtract性能测试 [\#5444](https://github.com/ClickHouse/ClickHouse/pull/5444) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) + +## ClickHouse释放19.8 {#clickhouse-release-19-8} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-8-3-8-2019-06-11} + +#### 新功能 {#new-features} + +- 添加了与JSON一起使用的函数 [\#4686](https://github.com/ClickHouse/ClickHouse/pull/4686) ([hcz](https://github.com/hczhcz)) [\#5124](https://github.com/ClickHouse/ClickHouse/pull/5124). ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 添加一个函数basename,具有类似于basename函数的行为,它存在于许多语言中 (`os.path.basename` 在python中, `basename` in PHP, etc…). Work with both an UNIX-like path or a Windows path. [\#5136](https://github.com/ClickHouse/ClickHouse/pull/5136) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 已添加 `LIMIT n, m BY` 或 `LIMIT m OFFSET n BY` 为LIMIT BY子句设置n偏移量的语法。 [\#5138](https://github.com/ClickHouse/ClickHouse/pull/5138) ([安东\*波波夫](https://github.com/CurtizJ)) +- 增加了新的数据类型 `SimpleAggregateFunction`,它允许在一个具有光聚集的列 `AggregatingMergeTree`. 这只能用于简单的功能,如 `any`, `anyLast`, `sum`, `min`, `max`. [\#4629](https://github.com/ClickHouse/ClickHouse/pull/4629) ([Boris Granveaud](https://github.com/bgranvea)) +- 增加了对函数中非常量参数的支持 `ngramDistance` [\#5198](https://github.com/ClickHouse/ClickHouse/pull/5198) ([Danila Kutenin](https://github.com/danlark1)) +- 新增功能 `skewPop`, `skewSamp`, `kurtPop` 和 `kurtSamp` 分别计算序列偏度、样本偏度、峰度和样本峰度。 [\#5200](https://github.com/ClickHouse/ClickHouse/pull/5200) ([hcz](https://github.com/hczhcz)) +- 支持重命名操作 `MaterializeView` 存储。 [\#5209](https://github.com/ClickHouse/ClickHouse/pull/5209) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 添加了允许使用MySQL客户端连接到ClickHouse的服务器。 [\#4715](https://github.com/ClickHouse/ClickHouse/pull/4715) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 添加 `toDecimal*OrZero` 和 `toDecimal*OrNull` 功能。 [\#5291](https://github.com/ClickHouse/ClickHouse/pull/5291) ([Artem Zuikov](https://github.com/4ertus2)) +- 支持函数中的十进制类型: `quantile`, `quantiles`, `median`, `quantileExactWeighted`, `quantilesExactWeighted` 媒体加权。 [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) +- 已添加 `toValidUTF8` function, which replaces all invalid UTF-8 characters by replacement character � (U+FFFD). [\#5322](https://github.com/ClickHouse/ClickHouse/pull/5322) ([Danila Kutenin](https://github.com/danlark1)) +- 已添加 `format` 功能。 使用参数中列出的字符串格式化常量模式(简化的Python格式模式)。 [\#5330](https://github.com/ClickHouse/ClickHouse/pull/5330) ([Danila Kutenin](https://github.com/danlark1)) +- 已添加 `system.detached_parts` 表包含有关分离部分的信息 `MergeTree` 桌子 [\#5353](https://github.com/ClickHouse/ClickHouse/pull/5353) ([akuzm](https://github.com/akuzm)) +- 已添加 `ngramSearch` 函数来计算针和大海捞针之间的非对称差异。 [\#5418](https://github.com/ClickHouse/ClickHouse/pull/5418)[\#5422](https://github.com/ClickHouse/ClickHouse/pull/5422) ([Danila Kutenin](https://github.com/danlark1)) +- 使用聚合函数接口实现基本的机器学习方法(随机线性回归和逻辑回归)。 有不同的策略,用于更新模型权重(简单梯度下降,动量法,涅斯捷罗夫法)。 还支持自定义大小的小批次。 [\#4943](https://github.com/ClickHouse/ClickHouse/pull/4943) ([Quid37](https://github.com/Quid37)) +- 执行 `geohashEncode` 和 `geohashDecode` 功能。 [\#5003](https://github.com/ClickHouse/ClickHouse/pull/5003) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 添加聚合功能 `timeSeriesGroupSum`,从而可以聚合不同的时间序列,即采样时间戳不对齐。 它将在两个采样时间戳之间使用线性插值,然后将时间序列和在一起。 添加聚合功能 `timeSeriesGroupRateSum`,它计算时间序列的速率,然后将速率总和在一起。 [\#4542](https://github.com/ClickHouse/ClickHouse/pull/4542) ([刘杨宽](https://github.com/LiuYangkuan)) +- 新增功能 `IPv4CIDRtoIPv4Range` 和 `IPv6CIDRtoIPv6Range` 使用CIDR计算子网中IP的下限和上限。 [\#5095](https://github.com/ClickHouse/ClickHouse/pull/5095) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 添加一个X-ClickHouse-Summary头,当我们发送查询使用HTTP启用设置 `send_progress_in_http_headers`. 返回X-ClickHouse-Progress的常用信息,以及其他信息,例如在查询中插入了多少行和字节。 [\#5116](https://github.com/ClickHouse/ClickHouse/pull/5116) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) + +#### 改进 {#improvements} + +- 已添加 `max_parts_in_total` 设置表的MergeTree家族(默认:100 000)防止分区键的不安全规范\#5166. [\#5171](https://github.com/ClickHouse/ClickHouse/pull/5171) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- `clickhouse-obfuscator`:通过将初始种子与列名(而不是列位置)组合来派生单个列的种子。 这用于转换具有多个相关表的数据集,以便在转换后表将保持可联接。 [\#5178](https://github.com/ClickHouse/ClickHouse/pull/5178) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 新增功能 `JSONExtractRaw`, `JSONExtractKeyAndValues`. 重命名函数 `jsonExtract` 到 `JSONExtract`. 当出现问题时,这些函数返回对应的值,而不是 `NULL`. 修改功能 `JSONExtract`,现在它从最后一个参数中获取返回类型,并且不会注入nullables。 在AVX2指令不可用的情况下实现了回退到RapidJSON。 Simdjson库更新到新版本。 [\#5235](https://github.com/ClickHouse/ClickHouse/pull/5235) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 现在 `if` 和 `multiIf` 功能不依赖于条件的 `Nullable`,但依靠分支来实现sql兼容性。 [\#5238](https://github.com/ClickHouse/ClickHouse/pull/5238) ([吴健](https://github.com/janplus)) +- `In` 谓词现在生成 `Null` 结果来自 `Null` 输入像 `Equal` 功能。 [\#5152](https://github.com/ClickHouse/ClickHouse/pull/5152) ([吴健](https://github.com/janplus)) +- 检查来自Kafka的每个(flush\_interval/poll\_timeout)行数的时间限制。 这允许更频繁地中断Kafka consumer的读取,并检查顶级流的时间限制 [\#5249](https://github.com/ClickHouse/ClickHouse/pull/5249) ([伊万](https://github.com/abyss7)) +- 链接rdkafka捆绑的SASL。 它应该允许使用SASL SCRAM身份验证 [\#5253](https://github.com/ClickHouse/ClickHouse/pull/5253) ([伊万](https://github.com/abyss7)) +- 所有联接的RowRefList的批处理版本。 [\#5267](https://github.com/ClickHouse/ClickHouse/pull/5267) ([Artem Zuikov](https://github.com/4ertus2)) +- clickhouse服务器:更多信息侦听错误消息。 [\#5268](https://github.com/ClickHouse/ClickHouse/pull/5268) ([proller](https://github.com/proller)) +- 在clickhouse-复印机的功能支持字典 `` [\#5270](https://github.com/ClickHouse/ClickHouse/pull/5270) ([proller](https://github.com/proller)) +- 添加新设置 `kafka_commit_every_batch` 来规范卡夫卡的承诺政策。 + 它允许设置提交模式:在处理每批消息之后,或者在整个块写入存储之后。 这是在某些极端情况下丢失一些消息或阅读两次之间的权衡。 [\#5308](https://github.com/ClickHouse/ClickHouse/pull/5308) ([伊万](https://github.com/abyss7)) +- 赂眉露\>\> `windowFunnel` 支持其他无符号整数类型。 [\#5320](https://github.com/ClickHouse/ClickHouse/pull/5320) ([sundyli](https://github.com/sundy-li)) +- 允许对虚拟列进行阴影 `_table` 在合并引擎。 [\#5325](https://github.com/ClickHouse/ClickHouse/pull/5325) ([伊万](https://github.com/abyss7)) +- 赂眉露\>\> `sequenceMatch` 聚合函数支持其他无符号整数类型 [\#5339](https://github.com/ClickHouse/ClickHouse/pull/5339) ([sundyli](https://github.com/sundy-li)) +- 如果校验和不匹配很可能是由硬件故障引起的,则更好的错误消息。 [\#5355](https://github.com/ClickHouse/ClickHouse/pull/5355) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 检查基础表是否支持以下内容的采样 `StorageMerge` [\#5366](https://github.com/ClickHouse/ClickHouse/pull/5366) ([伊万](https://github.com/abyss7)) +- Сlose MySQL connections after their usage in external dictionaries. It is related to issue \#893. [\#5395](https://github.com/ClickHouse/ClickHouse/pull/5395) ([Clément Rodriguez](https://github.com/clemrodriguez)) +- MySQL线协议的改进。 将格式名称更改为MySQLWire。 使用RAII调用RSA\_free。 如果无法创建上下文,则禁用SSL。 [\#5419](https://github.com/ClickHouse/ClickHouse/pull/5419) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- clickhouse-client: allow to run with unaccessable history file (read-only, no disk space, file is directory, …). [\#5431](https://github.com/ClickHouse/ClickHouse/pull/5431) ([proller](https://github.com/proller)) +- 尊重异步插入到分布式表中的查询设置。 [\#4936](https://github.com/ClickHouse/ClickHouse/pull/4936) ([TCeason](https://github.com/TCeason)) +- 重命名函数 `leastSqr` 到 `simpleLinearRegression`, `LinearRegression` 到 `linearRegression`, `LogisticRegression` 到 `logisticRegression`. [\#5391](https://github.com/ClickHouse/ClickHouse/pull/5391) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +#### 性能改进 {#performance-improvements} + +- 在ALTER MODIFY查询中并行处理非复制MergeTree表的部分。 [\#4639](https://github.com/ClickHouse/ClickHouse/pull/4639) ([伊万库什](https://github.com/IvanKush)) +- Regular达式提取中的优化。 [\#5193](https://github.com/ClickHouse/ClickHouse/pull/5193) [\#5191](https://github.com/ClickHouse/ClickHouse/pull/5191) ([Danila Kutenin](https://github.com/danlark1)) +- 如果仅在join on部分中使用,则不要将右连接键列添加到join result。 [\#5260](https://github.com/ClickHouse/ClickHouse/pull/5260) ([Artem Zuikov](https://github.com/4ertus2)) +- 在第一个空响应之后冻结Kafka缓冲区。 它避免了多次调用 `ReadBuffer::next()` 对于一些行解析流的空结果。 [\#5283](https://github.com/ClickHouse/ClickHouse/pull/5283) ([伊万](https://github.com/abyss7)) +- `concat` 多个参数的函数优化。 [\#5357](https://github.com/ClickHouse/ClickHouse/pull/5357) ([Danila Kutenin](https://github.com/danlark1)) +- Query optimisation. Allow push down IN statement while rewriting commа/cross join into inner one. [\#5396](https://github.com/ClickHouse/ClickHouse/pull/5396) ([Artem Zuikov](https://github.com/4ertus2)) +- 使用reference one升级我们的LZ4实现以获得更快的解压缩。 [\#5070](https://github.com/ClickHouse/ClickHouse/pull/5070) ([Danila Kutenin](https://github.com/danlark1)) +- 实现了MSD基数排序(基于kxsort)和部分排序。 [\#5129](https://github.com/ClickHouse/ClickHouse/pull/5129) ([Evgenii Pravda](https://github.com/kvinty)) + +#### 错误修复 {#bug-fixes} + +- 修复推送需要列与联接 [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([张冬](https://github.com/zhang2014)) +- 修正了当ClickHouse由systemd运行时,命令 `sudo service clickhouse-server forcerestart` 没有按预期工作。 [\#5204](https://github.com/ClickHouse/ClickHouse/pull/5204) ([proller](https://github.com/proller)) +- 修复DataPartsExchange中的http错误代码(9009端口上的服务器间http服务器始终返回代码200,即使是错误)。 [\#5216](https://github.com/ClickHouse/ClickHouse/pull/5216) ([proller](https://github.com/proller)) +- 修复SimpleAggregateFunction字符串长于MAX\_SMALL\_STRING\_SIZE [\#5311](https://github.com/ClickHouse/ClickHouse/pull/5311) ([Azat Khuzhin](https://github.com/azat)) +- 修复错误 `Decimal` 到 `Nullable(Decimal)` 转换中。 支持其他十进制到十进制转换(包括不同的比例)。 [\#5350](https://github.com/ClickHouse/ClickHouse/pull/5350) ([Artem Zuikov](https://github.com/4ertus2)) +- 修正了simdjson库中导致错误计算的FPU clobbering `uniqHLL` 和 `uniqCombined` 聚合函数和数学函数,如 `log`. [\#5354](https://github.com/ClickHouse/ClickHouse/pull/5354) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定处理JSON函数中的混合常量/非常量情况。 [\#5435](https://github.com/ClickHouse/ClickHouse/pull/5435) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复 `retention` 功能。 现在所有满足一行数据的条件都被添加到数据状态。 [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) +- 修复结果类型 `quantileExact` 用小数。 [\#5304](https://github.com/ClickHouse/ClickHouse/pull/5304) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 文件 {#documentation} + +- 翻译文档 `CollapsingMergeTree` 到中国。 [\#5168](https://github.com/ClickHouse/ClickHouse/pull/5168) ([张风啸](https://github.com/AlexZFX)) +- 将一些关于表格引擎的文档翻译成中文。 + [\#5134](https://github.com/ClickHouse/ClickHouse/pull/5134) + [\#5328](https://github.com/ClickHouse/ClickHouse/pull/5328) + ([永远不会李](https://github.com/neverlee)) + +#### 构建/测试/打包改进 {#buildtestingpackaging-improvements} + +- 修复一些显示可能使用后免费的消毒剂报告。[\#5139](https://github.com/ClickHouse/ClickHouse/pull/5139) [\#5143](https://github.com/ClickHouse/ClickHouse/pull/5143) [\#5393](https://github.com/ClickHouse/ClickHouse/pull/5393) ([伊万](https://github.com/abyss7)) +- 为了方便起见,将性能测试从单独的目录中移出。 [\#5158](https://github.com/ClickHouse/ClickHouse/pull/5158) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复不正确的性能测试。 [\#5255](https://github.com/ClickHouse/ClickHouse/pull/5255) ([阿利沙平](https://github.com/alesapin)) +- 增加了一个工具来计算由位翻转引起的校验和,以调试硬件问题。 [\#5334](https://github.com/ClickHouse/ClickHouse/pull/5334) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使亚军脚本更有用。 [\#5340](https://github.com/ClickHouse/ClickHouse/pull/5340)[\#5360](https://github.com/ClickHouse/ClickHouse/pull/5360) ([filimonov](https://github.com/filimonov)) +- 添加如何编写性能测试的小指令。 [\#5408](https://github.com/ClickHouse/ClickHouse/pull/5408) ([阿利沙平](https://github.com/alesapin)) +- 添加在性能测试中创建,填写和删除查询中进行替换的功能 [\#5367](https://github.com/ClickHouse/ClickHouse/pull/5367) ([Olga Khvostikova](https://github.com/stavrolia)) + +## ClickHouse释放19.7 {#clickhouse-release-19-7} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-7-5-29-2019-07-05} + +#### 错误修复 {#bug-fix-25} + +- 使用JOIN修复某些查询中的性能回归。 [\#5192](https://github.com/ClickHouse/ClickHouse/pull/5192) ([张冬](https://github.com/zhang2014)) + +### 碌莽禄,拢,010-68520682\戮漏鹿芦,酶,虏卤赂拢,110102003042 {#clickhouse-release-19-7-5-27-2019-06-09} + +#### 新功能 {#new-features-1} + +- 添加位图相关功能 `bitmapHasAny` 和 `bitmapHasAll` 类似于 `hasAny` 和 `hasAll` 数组的函数。 [\#5279](https://github.com/ClickHouse/ClickHouse/pull/5279) ([塞尔吉\*弗拉季金](https://github.com/svladykin)) + +#### 错误修复 {#bug-fixes-1} + +- 修复段错误 `minmax` 具有空值的索引。 [\#5246](https://github.com/ClickHouse/ClickHouse/pull/5246) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 根据需要输出标记"LIMIT BY"中的所有输入列。 它修复 ‘Not found column’ 某些分布式查询中出错。 [\#5407](https://github.com/ClickHouse/ClickHouse/pull/5407) ([康斯坦丁\*潘](https://github.com/kvap)) +- 修复 “Column ‘0’ already exists” 错误 `SELECT .. PREWHERE` 在具有默认值的列上 [\#5397](https://github.com/ClickHouse/ClickHouse/pull/5397) ([proller](https://github.com/proller)) +- 修复 `ALTER MODIFY TTL` 查询开 `ReplicatedMergeTree`. [\#5539](https://github.com/ClickHouse/ClickHouse/pull/5539/commits) ([安东\*波波夫](https://github.com/CurtizJ)) +- 当Kafka消费者无法启动时,不要使服务器崩溃。 [\#5285](https://github.com/ClickHouse/ClickHouse/pull/5285) ([伊万](https://github.com/abyss7)) +- 固定位图函数产生错误的结果。 [\#5359](https://github.com/ClickHouse/ClickHouse/pull/5359) ([杨小姐](https://github.com/andyyzh)) +- 修复散列字典的element\_count(不包括重复项) [\#5440](https://github.com/ClickHouse/ClickHouse/pull/5440) ([Azat Khuzhin](https://github.com/azat)) +- 使用环境变量TZ的内容作为时区的名称。 在某些情况下,它有助于正确检测默认时区。[\#5443](https://github.com/ClickHouse/ClickHouse/pull/5443) ([伊万](https://github.com/abyss7)) +- 不要试图将整数转换为 `dictGetT` 功能,因为它不能正常工作。 而是抛出一个异常。 [\#5446](https://github.com/ClickHouse/ClickHouse/pull/5446) ([Artem Zuikov](https://github.com/4ertus2)) +- 在ExternalData HTTP请求修复设置。 [\#5455](https://github.com/ClickHouse/ClickHouse/pull/5455) ([Danila + 库特宁](https://github.com/danlark1)) +- 修复只从FS中删除部件而不从Zookeeper中删除部件时的错误。 [\#5520](https://github.com/ClickHouse/ClickHouse/pull/5520) ([阿利沙平](https://github.com/alesapin)) +- 修复分段故障 `bitmapHasAny` 功能。 [\#5528](https://github.com/ClickHouse/ClickHouse/pull/5528) ([余志昌](https://github.com/yuzhichang)) +- 修复了复制连接池不重试解析主机时的错误,即使删除了DNS缓存。 [\#5534](https://github.com/ClickHouse/ClickHouse/pull/5534) ([阿利沙平](https://github.com/alesapin)) +- 固定 `DROP INDEX IF EXISTS` 查询。 现在 `ALTER TABLE ... DROP INDEX IF EXISTS ...` 如果提供的索引不存在,查询不会引发异常。 [\#5524](https://github.com/ClickHouse/ClickHouse/pull/5524) ([格列布\*诺维科夫](https://github.com/NanoBjorn)) +- 修复联合所有超类型列。 有些情况下,结果列的数据和列类型不一致。 [\#5503](https://github.com/ClickHouse/ClickHouse/pull/5503) ([Artem Zuikov](https://github.com/4ertus2)) +- 在DDL查询处理过程中跳过ZNONODE。 之前,如果另一个节点删除znode在任务队列中,那一个 + 没有处理它,但已经得到子列表,将终止DDLWorker线程。 [\#5489](https://github.com/ClickHouse/ClickHouse/pull/5489) ([Azat Khuzhin](https://github.com/azat)) +- 修复插入到具体化列的分布式()表中。 [\#5429](https://github.com/ClickHouse/ClickHouse/pull/5429) ([Azat Khuzhin](https://github.com/azat)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-7-3-9-2019-05-30} + +#### 新功能 {#new-features-2} + +- 允许限制用户可以指定的设置的范围。 + 这些约束可以在用户设置配置文件中设置。 + [\#4931](https://github.com/ClickHouse/ClickHouse/pull/4931) ([维塔利 + 巴拉诺夫](https://github.com/vitlibar)) +- 添加该函数的第二个版本 `groupUniqArray` 用一个可选的 + `max_size` 限制结果数组大小的参数。 这 + 行为类似于 `groupArray(max_size)(x)` 功能。 + [\#5026](https://github.com/ClickHouse/ClickHouse/pull/5026) ([纪尧姆 + Tassery](https://github.com/YiuRULE)) +- 对于TSVWithNames/CSVWithNames输入文件格式,列顺序现在可以是 + 从文件头确定。 这是由控制 + `input_format_with_names_use_header` 参数。 + [\#5081](https://github.com/ClickHouse/ClickHouse/pull/5081) + ([亚历山大](https://github.com/Akazz)) + +#### 错误修复 {#bug-fixes-2} + +- 在合并过程中uncompressed\_cache+JOIN崩溃(#5197) + [\#5133](https://github.com/ClickHouse/ClickHouse/pull/5133) ([Danila + 库特宁](https://github.com/danlark1)) +- Clickhouse客户端查询到系统表上的分段错误。 \#5066 + [\#5127](https://github.com/ClickHouse/ClickHouse/pull/5127) + ([伊万](https://github.com/abyss7)) +- 通过KafkaEngine重负载数据丢失(#4736) + [\#5080](https://github.com/ClickHouse/ClickHouse/pull/5080) + ([伊万](https://github.com/abyss7)) +- 修复了在执行UNION查询时可能发生的非常罕见的数据争用条件,所有查询都涉及至少两个来自系统的选择。列,系统。表,系统。部件,系统。parts\_tables或Merge系列的表,并同时执行相关表的列的更改。 [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 性能改进 {#performance-improvements-1} + +- 使用基数排序按单个数字列进行排序 `ORDER BY` 没有 + `LIMIT`. [\#5106](https://github.com/ClickHouse/ClickHouse/pull/5106), + [\#4439](https://github.com/ClickHouse/ClickHouse/pull/4439) + ([Evgenii Pravda](https://github.com/kvinty), + [阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 文件 {#documentation-1} + +- 将某些表格引擎的文档翻译为中文。 + [\#5107](https://github.com/ClickHouse/ClickHouse/pull/5107), + [\#5094](https://github.com/ClickHouse/ClickHouse/pull/5094), + [\#5087](https://github.com/ClickHouse/ClickHouse/pull/5087) + ([张风啸](https://github.com/AlexZFX)), + [\#5068](https://github.com/ClickHouse/ClickHouse/pull/5068) ([从来没有 + 李](https://github.com/neverlee)) + +#### 构建/测试/打包改进 {#buildtestingpackaging-improvements-1} + +- 正确打印UTF-8字符 `clickhouse-test`. + [\#5084](https://github.com/ClickHouse/ClickHouse/pull/5084) + ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 为clickhouse-client添加命令行参数以始终加载建议 + 戴达 [\#5102](https://github.com/ClickHouse/ClickHouse/pull/5102) + ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 解决一些PVS-Studio警告。 + [\#5082](https://github.com/ClickHouse/ClickHouse/pull/5082) + ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新LZ4 [\#5040](https://github.com/ClickHouse/ClickHouse/pull/5040) ([Danila + 库特宁](https://github.com/danlark1)) +- 添加gperf以构建即将到来的拉取请求#5030的requirements。 + [\#5110](https://github.com/ClickHouse/ClickHouse/pull/5110) + ([proller](https://github.com/proller)) + +## ClickHouse释放19.6 {#clickhouse-release-19-6} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-6-3-18-2019-06-13} + +#### 错误修复 {#bug-fixes-3} + +- 修复了来自表函数的查询的条件下推 `mysql` 和 `odbc` 和相应的表引擎。 这修复了#3540和#2384。 [\#5313](https://github.com/ClickHouse/ClickHouse/pull/5313) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复动物园管理员的死锁。 [\#5297](https://github.com/ClickHouse/ClickHouse/pull/5297) ([github1youlc](https://github.com/github1youlc)) +- 允许在CSV中引用小数。 [\#5284](https://github.com/ClickHouse/ClickHouse/pull/5284) ([Artem Zuikov](https://github.com/4ertus2) +- 禁止从float Inf/NaN转换为小数(抛出异常)。 [\#5282](https://github.com/ClickHouse/ClickHouse/pull/5282) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复重命名查询中的数据竞赛。 [\#5247](https://github.com/ClickHouse/ClickHouse/pull/5247) ([张冬](https://github.com/zhang2014)) +- 暂时禁用LFAlloc。 使用LFAlloc可能会导致大量MAP\_FAILED在分配UncompressedCache时,并导致高负载服务器上的查询崩溃。 [cfdba93](https://github.com/ClickHouse/ClickHouse/commit/cfdba938ce22f16efeec504f7f90206a515b1280)([Danila Kutenin](https://github.com/danlark1)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-6-2-11-2019-05-13} + +#### 新功能 {#new-features-3} + +- 列和表的TTL表达式。 [\#4212](https://github.com/ClickHouse/ClickHouse/pull/4212) ([安东\*波波夫](https://github.com/CurtizJ)) +- 增加了对 `brotli` http响应的压缩(接受编码:br) [\#4388](https://github.com/ClickHouse/ClickHouse/pull/4388) ([米哈伊尔](https://github.com/fandyushin)) +- 增加了新功能 `isValidUTF8` 用于检查一组字节是否被正确地utf-8编码。 [\#4934](https://github.com/ClickHouse/ClickHouse/pull/4934) ([Danila Kutenin](https://github.com/danlark1)) +- 添加新的负载平衡策略 `first_or_random` 它将查询发送到第一个指定的主机,如果无法访问,则向分片的随机主机发送查询。 对于跨复制拓扑设置非常有用。 [\#5012](https://github.com/ClickHouse/ClickHouse/pull/5012) ([纳瓦托洛梅](https://github.com/nvartolomei)) + +#### 实验特点 {#experimental-features-1} + +- 添加设置 `index_granularity_bytes` (自适应索引粒度)对于MergeTree\*表族. [\#4826](https://github.com/ClickHouse/ClickHouse/pull/4826) ([阿利沙平](https://github.com/alesapin)) + +#### 改进 {#improvements-1} + +- 增加了对函数的非常量和负大小和长度参数的支持 `substringUTF8`. [\#4989](https://github.com/ClickHouse/ClickHouse/pull/4989) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在左联接中禁用向下推到右表,在右联接中禁用左表,并在完全联接中禁用两个表。 在某些情况下,这可以修复错误的连接结果。 [\#4846](https://github.com/ClickHouse/ClickHouse/pull/4846) ([伊万](https://github.com/abyss7)) +- `clickhouse-copier`:从自动上传任务配置 `--task-file` 备选案文 [\#4876](https://github.com/ClickHouse/ClickHouse/pull/4876) ([proller](https://github.com/proller)) +- 为存储工厂和表函数工厂添加了错别字处理程序。 [\#4891](https://github.com/ClickHouse/ClickHouse/pull/4891) ([Danila Kutenin](https://github.com/danlark1)) +- 支持不带子查询的多个联接的星号和限定星号 [\#4898](https://github.com/ClickHouse/ClickHouse/pull/4898) ([Artem Zuikov](https://github.com/4ertus2)) +- 使缺少列错误消息更加用户友好。 [\#4915](https://github.com/ClickHouse/ClickHouse/pull/4915) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 性能改进 {#performance-improvements-2} + +- ASOF加速显着 [\#4924](https://github.com/ClickHouse/ClickHouse/pull/4924) ([Martijn Bakker](https://github.com/Gladdy)) + +#### 向后不兼容的更改 {#backward-incompatible-changes} + +- HTTP头 `Query-Id` 改名为 `X-ClickHouse-Query-Id` 为了一致性。 [\#4972](https://github.com/ClickHouse/ClickHouse/pull/4972) ([米哈伊尔](https://github.com/fandyushin)) + +#### 错误修复 {#bug-fixes-4} + +- 修正了潜在的空指针取消引用 `clickhouse-copier`. [\#4900](https://github.com/ClickHouse/ClickHouse/pull/4900) ([proller](https://github.com/proller)) +- 修复了使用JOIN+ARRAY JOIN查询的错误 [\#4938](https://github.com/ClickHouse/ClickHouse/pull/4938) ([Artem Zuikov](https://github.com/4ertus2)) +- 固定挂在服务器的启动时,字典依赖于另一个字典通过引擎数据库=字典。 [\#4962](https://github.com/ClickHouse/ClickHouse/pull/4962) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- Partially fix distributed\_product\_mode = local. It's possible to allow columns of local tables in where/having/order by/… via table aliases. Throw exception if table does not have alias. There's not possible to access to the columns without table aliases yet. [\#4986](https://github.com/ClickHouse/ClickHouse/pull/4986) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复潜在的错误结果 `SELECT DISTINCT` 与 `JOIN` [\#5001](https://github.com/ClickHouse/ClickHouse/pull/5001) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复了在执行UNION查询时可能发生的非常罕见的数据争用条件,所有查询都涉及至少两个来自系统的选择。列,系统。表,系统。部件,系统。parts\_tables或Merge系列的表,并同时执行相关表的列的更改。 [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 构建/测试/打包改进 {#buildtestingpackaging-improvements-2} + +- 在不同的主机上运行clickhouse服务器时修复测试失败 [\#4713](https://github.com/ClickHouse/ClickHouse/pull/4713) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- clickhouse-test:在非tty环境中禁用颜色控制序列。 [\#4937](https://github.com/ClickHouse/ClickHouse/pull/4937) ([阿利沙平](https://github.com/alesapin)) +- clickhouse-test:允许使用任何测试数据库(删除 `test.` 在可能的情况下获得资格) [\#5008](https://github.com/ClickHouse/ClickHouse/pull/5008) ([proller](https://github.com/proller)) +- 修复ubsan错误 [\#5037](https://github.com/ClickHouse/ClickHouse/pull/5037) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- Yandex LFAlloc被添加到ClickHouse中,以不同的方式分配MarkCache和UncompressedCache数据,以更可靠地捕获段错误 [\#4995](https://github.com/ClickHouse/ClickHouse/pull/4995) ([Danila Kutenin](https://github.com/danlark1)) +- Python util帮助反向移植和更改日志。 [\#4949](https://github.com/ClickHouse/ClickHouse/pull/4949) ([伊万](https://github.com/abyss7)) + +## ClickHouse释放19.5 {#clickhouse-release-19-5} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-5-4-22-2019-05-13} + +#### 错误修复 {#bug-fixes-5} + +- 修正了位图\*功能中可能出现的崩溃 [\#5220](https://github.com/ClickHouse/ClickHouse/pull/5220) [\#5228](https://github.com/ClickHouse/ClickHouse/pull/5228) ([杨小姐](https://github.com/andyyzh)) +- 修复了在执行UNION查询时可能发生的非常罕见的数据争用条件,所有查询都涉及至少两个来自系统的选择。列,系统。表,系统。部件,系统。parts\_tables或Merge系列的表,并同时执行相关表的列的更改。 [\#5189](https://github.com/ClickHouse/ClickHouse/pull/5189) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正错误 `Set for IN is not created yet in case of using single LowCardinality column in the left part of IN`. 如果lowcardinality列是主键的一部分,则会发生此错误。 \#5031 [\#5154](https://github.com/ClickHouse/ClickHouse/pull/5154) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修改保留函数:如果一行同时满足第一个和第N个条件,则只有第一个满足的条件被添加到数据状态。 现在所有满足一行数据的条件都被添加到数据状态。 [\#5119](https://github.com/ClickHouse/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-5-3-8-2019-04-18} + +#### 错误修复 {#bug-fixes-6} + +- 固定设置类型 `max_partitions_per_insert_block` 从布尔到UInt64。 [\#5028](https://github.com/ClickHouse/ClickHouse/pull/5028) ([2.Mohammad Hossein Sekhavat](https://github.com/mhsekhavat)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-5-2-6-2019-04-15} + +#### 新功能 {#new-features-4} + +- [超扫描](https://github.com/intel/hyperscan) 添加了多个正则表达式匹配(函数 `multiMatchAny`, `multiMatchAnyIndex`, `multiFuzzyMatchAny`, `multiFuzzyMatchAnyIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780), [\#4841](https://github.com/ClickHouse/ClickHouse/pull/4841) ([Danila Kutenin](https://github.com/danlark1)) +- `multiSearchFirstPosition` 添加了功能。 [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) +- 为表实现每行的预定义表达式筛选器。 [\#4792](https://github.com/ClickHouse/ClickHouse/pull/4792) ([伊万](https://github.com/abyss7)) +- 一种基于bloom过滤器的新型数据跳过索引(可用于 `equal`, `in` 和 `like` 功能)。 [\#4499](https://github.com/ClickHouse/ClickHouse/pull/4499) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 已添加 `ASOF JOIN` 它允许运行连接到最新已知值的查询。 [\#4774](https://github.com/ClickHouse/ClickHouse/pull/4774) [\#4867](https://github.com/ClickHouse/ClickHouse/pull/4867) [\#4863](https://github.com/ClickHouse/ClickHouse/pull/4863) [\#4875](https://github.com/ClickHouse/ClickHouse/pull/4875) ([Martijn Bakker](https://github.com/Gladdy), [Artem Zuikov](https://github.com/4ertus2)) +- 重写多个 `COMMA JOIN` 到 `CROSS JOIN`. 然后将它们重写为 `INNER JOIN` 如果可能的话 [\#4661](https://github.com/ClickHouse/ClickHouse/pull/4661) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 改进 {#improvement-9} + +- `topK` 和 `topKWeighted` 现在支持自定义 `loadFactor` (修复问题 [\#4252](https://github.com/ClickHouse/ClickHouse/issues/4252)). [\#4634](https://github.com/ClickHouse/ClickHouse/pull/4634) ([基里尔丹信](https://github.com/kirillDanshin)) +- 允许使用 `parallel_replicas_count > 1` 即使对于没有采样的表(设置简单地忽略它们)。 在以前的版本中,它导致异常。 [\#4637](https://github.com/ClickHouse/ClickHouse/pull/4637) ([Alexey Elymanov](https://github.com/digitalist)) +- 支持 `CREATE OR REPLACE VIEW`. 允许在单个语句中创建视图或设置新定义。 [\#4654](https://github.com/ClickHouse/ClickHouse/pull/4654) ([Boris Granveaud](https://github.com/bgranvea)) +- `Buffer` 表引擎现在支持 `PREWHERE`. [\#4671](https://github.com/ClickHouse/ClickHouse/pull/4671) ([刘杨宽](https://github.com/LiuYangkuan)) +- 添加在zookeeper中启动没有元数据的复制表的能力 `readonly` 模式 [\#4691](https://github.com/ClickHouse/ClickHouse/pull/4691) ([阿利沙平](https://github.com/alesapin)) +- 在clickhouse客户端固定进度条闪烁。 使用时,这个问题最明显 `FORMAT Null` 随着流查询。 [\#4811](https://github.com/ClickHouse/ClickHouse/pull/4811) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 允许禁用功能 `hyperscan` 基于每个用户的库,以限制潜在的过度和不受控制的资源使用。 [\#4816](https://github.com/ClickHouse/ClickHouse/pull/4816) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加版本号记录所有错误。 [\#4824](https://github.com/ClickHouse/ClickHouse/pull/4824) ([proller](https://github.com/proller)) +- 增加了限制 `multiMatch` 需要字符串大小以适应的函数 `unsigned int`. 还增加了参数的数量限制 `multiSearch` 功能。 [\#4834](https://github.com/ClickHouse/ClickHouse/pull/4834) ([Danila Kutenin](https://github.com/danlark1)) +- 改进了超扫描暂存空间的使用和错误处理。 [\#4866](https://github.com/ClickHouse/ClickHouse/pull/4866) ([Danila Kutenin](https://github.com/danlark1)) +- 填充 `system.graphite_detentions` 从表配置 `*GraphiteMergeTree` 发动机表. [\#4584](https://github.com/ClickHouse/ClickHouse/pull/4584) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +- 重命名 `trigramDistance` 功能 `ngramDistance` 并添加更多的功能 `CaseInsensitive` 和 `UTF`. [\#4602](https://github.com/ClickHouse/ClickHouse/pull/4602) ([Danila Kutenin](https://github.com/danlark1)) +- 改进的数据跳过指数计算。 [\#4640](https://github.com/ClickHouse/ClickHouse/pull/4640) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 保持平凡, `DEFAULT`, `MATERIALIZED` 和 `ALIAS` 在一个列表中的列(修复问题 [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) + +#### 错误修复 {#bug-fix-26} + +- 避免 `std::terminate` 在内存分配失败的情况下。 现在 `std::bad_alloc` 按预期引发异常。 [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复capnproto从缓冲区读取。 有时文件没有通过HTTP成功加载。 [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([弗拉季斯拉夫](https://github.com/smirnov-vs)) +- 修复错误 `Unknown log entry type: 0` 后 `OPTIMIZE TABLE FINAL` 查询。 [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([阿莫斯鸟](https://github.com/amosbird)) +- 错误的参数 `hasAny` 或 `hasAll` 函数可能会导致段错误。 [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 执行时可能会发生死锁 `DROP DATABASE dictionary` 查询。 [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复未定义的行为 `median` 和 `quantile` 功能。 [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) +- 修复压缩级别检测时 `network_compression_method` 小写。 在19.1节中被打破。 [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) +- 固定的无知 `UTC` 设置(修复问题 [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) +- 修复 `histogram` 函数行为 `Distributed` 桌子 [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) +- 固定tsan报告 `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了由于系统日志使用中的争用条件而关闭的TSan报告。 修复了当part\_log启用时关机后的潜在使用。 [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复重新检查零件 `ReplicatedMergeTreeAlterThread` 在错误的情况下。 [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 对中间聚合函数状态的算术运算不适用于常量参数(如子查询结果)。 [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 始终在元数据中反引用列名。 否则,不可能创建一个名为列的表 `index` (由于格式错误,服务器无法重新启动 `ATTACH` 元数据中的查询)。 [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复崩溃 `ALTER ... MODIFY ORDER BY` 上 `Distributed` 桌子 [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) +- 修复段错误 `JOIN ON` 已启用 `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([张冬](https://github.com/zhang2014)) +- 修复kafka使用protobuf消息后添加无关行的错误。 [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复崩溃 `JOIN` 在不可为空的vs可为空的列上。 修复 `NULLs` 在右键 `ANY JOIN` + `join_use_nulls`. [\#4815](https://github.com/ClickHouse/ClickHouse/pull/4815) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复分段故障 `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) +- 在固定的竞争条件 `SELECT` 从 `system.tables` 如果同时重命名或更改表。 [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 获取已经过时的数据部分时修复了数据竞赛。 [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定罕见的数据竞赛,可以在发生 `RENAME` MergeTree家族的表. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正功能中的分段故障 `arrayIntersect`. 如果函数使用常量和普通参数混合调用,则可能会发生分段错误。 [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([钱丽祥](https://github.com/fancyqlx)) +- 固定读取 `Array(LowCardinality)` column在极少数情况下,当column包含一个长序列的空数组时。 [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复崩溃 `FULL/RIGHT JOIN` 当我们加入可为空vs不可为空时。 [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复 `No message received` 在副本之间获取部件时出现异常。 [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([阿利沙平](https://github.com/alesapin)) +- 固定 `arrayIntersect` 函数错误导致在单个数组中的几个重复值的情况下。 [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 在并发期间修复争用条件 `ALTER COLUMN` 可能导致服务器崩溃的查询(修复问题 [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) +- 修复不正确的结果 `FULL/RIGHT JOIN` 与常量列。 [\#4723](https://github.com/ClickHouse/ClickHouse/pull/4723) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复重复 `GLOBAL JOIN` 用星号。 [\#4705](https://github.com/ClickHouse/ClickHouse/pull/4705) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复参数扣除 `ALTER MODIFY` 列 `CODEC` 未指定列类型时。 [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([阿利沙平](https://github.com/alesapin)) +- 功能 `cutQueryStringAndFragment()` 和 `queryStringAndFragment()` 现在正常工作时 `URL` 包含一个片段,没有查询。 [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复设置时罕见的错误 `min_bytes_to_use_direct_io` 大于零,这发生在线程必须在列文件中向后寻找时。 [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([阿利沙平](https://github.com/alesapin)) +- 修复聚合函数的错误参数类型 `LowCardinality` 参数(修复问题 [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复错误的名称资格 `GLOBAL JOIN`. [\#4969](https://github.com/ClickHouse/ClickHouse/pull/4969) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复功能 `toISOWeek` 1970年的结果。 [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `DROP`, `TRUNCATE` 和 `OPTIMIZE` 查询重复,在执行时 `ON CLUSTER` 为 `ReplicatedMergeTree*` 表家庭. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([阿利沙平](https://github.com/alesapin)) + +#### 向后不兼容的更改 {#backward-incompatible-change-8} + +- 重命名设置 `insert_sample_with_metadata` 到设置 `input_format_defaults_for_omitted_fields`. [\#4771](https://github.com/ClickHouse/ClickHouse/pull/4771) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加设置 `max_partitions_per_insert_block` (默认值为100)。 如果插入的块包含较大数量的分区,则会引发异常。 如果要删除限制(不推荐),请将其设置为0。 [\#4845](https://github.com/ClickHouse/ClickHouse/pull/4845) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 多搜索功能被重命名 (`multiPosition` 到 `multiSearchAllPositions`, `multiSearch` 到 `multiSearchAny`, `firstMatch` 到 `multiSearchFirstIndex`). [\#4780](https://github.com/ClickHouse/ClickHouse/pull/4780) ([Danila Kutenin](https://github.com/danlark1)) + +#### 性能改进 {#performance-improvement-6} + +- 通过内联优化Volnitsky搜索器,为许多针或许多类似bigrams的查询提供约5-10%的搜索改进。 [\#4862](https://github.com/ClickHouse/ClickHouse/pull/4862) ([Danila Kutenin](https://github.com/danlark1)) +- 修复设置时的性能问题 `use_uncompressed_cache` 大于零时,即出现在所有读取缓存中包含的数据时。 [\#4913](https://github.com/ClickHouse/ClickHouse/pull/4913) ([阿利沙平](https://github.com/alesapin)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-10} + +- 强化调试构建:更精细的内存映射和ASLR;为标记缓存和索引添加内存保护。 这允许在ASan和MSan无法做到这一点的情况下找到更多的内存st脚错误。 [\#4632](https://github.com/ClickHouse/ClickHouse/pull/4632) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加对cmake变量的支持 `ENABLE_PROTOBUF`, `ENABLE_PARQUET` 和 `ENABLE_BROTLI` 它允许启用/禁用上述功能(与我们对librdkafka,mysql等所做的相同)。 [\#4669](https://github.com/ClickHouse/ClickHouse/pull/4669) ([Silviu Caragea](https://github.com/silviucpp)) +- 添加打印进程列表和堆栈跟踪的所有线程的能力,如果一些查询测试运行后挂起。 [\#4675](https://github.com/ClickHouse/ClickHouse/pull/4675) ([阿利沙平](https://github.com/alesapin)) +- 添加重试 `Connection loss` 错误 `clickhouse-test`. [\#4682](https://github.com/ClickHouse/ClickHouse/pull/4682) ([阿利沙平](https://github.com/alesapin)) +- 在打包程序脚本中添加使用vagrant的freebsd build和使用thread sanitizer的build。 [\#4712](https://github.com/ClickHouse/ClickHouse/pull/4712) [\#4748](https://github.com/ClickHouse/ClickHouse/pull/4748) ([阿利沙平](https://github.com/alesapin)) +- 现在用户要求用户密码 `'default'` 在安装过程中。 [\#4725](https://github.com/ClickHouse/ClickHouse/pull/4725) ([proller](https://github.com/proller)) +- 禁止在警告 `rdkafka` 图书馆. [\#4740](https://github.com/ClickHouse/ClickHouse/pull/4740) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 允许在没有ssl的情况下构建。 [\#4750](https://github.com/ClickHouse/ClickHouse/pull/4750) ([proller](https://github.com/proller)) +- 添加从自定义用户启动clickhouse服务器映像的方法。 [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +- 升级contrib升压到1.69. [\#4793](https://github.com/ClickHouse/ClickHouse/pull/4793) ([proller](https://github.com/proller)) +- 禁用使用 `mremap` 使用线程消毒剂编译时。 令人惊讶的是,TSan并没有拦截 `mremap` (虽然它确实拦截 `mmap`, `munmap` 这会导致误报。 修复了有状态测试中的TSan报告。 [\#4859](https://github.com/ClickHouse/ClickHouse/pull/4859) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 通过HTTP接口使用格式模式添加测试检查。 [\#4864](https://github.com/ClickHouse/ClickHouse/pull/4864) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) + +## ClickHouse释放19.4 {#clickhouse-release-19-4} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-4-4-33-2019-04-17} + +#### 错误修复 {#bug-fixes-7} + +- 避免 `std::terminate` 在内存分配失败的情况下。 现在 `std::bad_alloc` 按预期引发异常。 [\#4665](https://github.com/ClickHouse/ClickHouse/pull/4665) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复capnproto从缓冲区读取。 有时文件没有通过HTTP成功加载。 [\#4674](https://github.com/ClickHouse/ClickHouse/pull/4674) ([弗拉季斯拉夫](https://github.com/smirnov-vs)) +- 修复错误 `Unknown log entry type: 0` 后 `OPTIMIZE TABLE FINAL` 查询。 [\#4683](https://github.com/ClickHouse/ClickHouse/pull/4683) ([阿莫斯鸟](https://github.com/amosbird)) +- 错误的参数 `hasAny` 或 `hasAll` 函数可能会导致段错误。 [\#4698](https://github.com/ClickHouse/ClickHouse/pull/4698) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 执行时可能会发生死锁 `DROP DATABASE dictionary` 查询。 [\#4701](https://github.com/ClickHouse/ClickHouse/pull/4701) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复未定义的行为 `median` 和 `quantile` 功能。 [\#4702](https://github.com/ClickHouse/ClickHouse/pull/4702) ([hcz](https://github.com/hczhcz)) +- 修复压缩级别检测时 `network_compression_method` 小写。 在19.1节中被打破。 [\#4706](https://github.com/ClickHouse/ClickHouse/pull/4706) ([proller](https://github.com/proller)) +- 固定的无知 `UTC` 设置(修复问题 [\#4658](https://github.com/ClickHouse/ClickHouse/issues/4658)). [\#4718](https://github.com/ClickHouse/ClickHouse/pull/4718) ([proller](https://github.com/proller)) +- 修复 `histogram` 函数行为 `Distributed` 桌子 [\#4741](https://github.com/ClickHouse/ClickHouse/pull/4741) ([olegkv](https://github.com/olegkv)) +- 固定tsan报告 `destroy of a locked mutex`. [\#4742](https://github.com/ClickHouse/ClickHouse/pull/4742) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了由于系统日志使用中的争用条件而关闭的TSan报告。 修复了当part\_log启用时关机后的潜在使用。 [\#4758](https://github.com/ClickHouse/ClickHouse/pull/4758) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复重新检查零件 `ReplicatedMergeTreeAlterThread` 在错误的情况下。 [\#4772](https://github.com/ClickHouse/ClickHouse/pull/4772) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 对中间聚合函数状态的算术运算不适用于常量参数(如子查询结果)。 [\#4776](https://github.com/ClickHouse/ClickHouse/pull/4776) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 始终在元数据中反引用列名。 否则,不可能创建一个名为列的表 `index` (由于格式错误,服务器无法重新启动 `ATTACH` 元数据中的查询)。 [\#4782](https://github.com/ClickHouse/ClickHouse/pull/4782) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复崩溃 `ALTER ... MODIFY ORDER BY` 上 `Distributed` 桌子 [\#4790](https://github.com/ClickHouse/ClickHouse/pull/4790) ([TCeason](https://github.com/TCeason)) +- 修复段错误 `JOIN ON` 已启用 `enable_optimize_predicate_expression`. [\#4794](https://github.com/ClickHouse/ClickHouse/pull/4794) ([张冬](https://github.com/zhang2014)) +- 修复kafka使用protobuf消息后添加无关行的错误。 [\#4808](https://github.com/ClickHouse/ClickHouse/pull/4808) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复分段故障 `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) +- 在固定的竞争条件 `SELECT` 从 `system.tables` 如果同时重命名或更改表。 [\#4836](https://github.com/ClickHouse/ClickHouse/pull/4836) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 获取已经过时的数据部分时修复了数据竞赛。 [\#4839](https://github.com/ClickHouse/ClickHouse/pull/4839) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定罕见的数据竞赛,可以在发生 `RENAME` MergeTree家族的表. [\#4844](https://github.com/ClickHouse/ClickHouse/pull/4844) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正功能中的分段故障 `arrayIntersect`. 如果函数使用常量和普通参数混合调用,则可能会发生分段错误。 [\#4847](https://github.com/ClickHouse/ClickHouse/pull/4847) ([钱丽祥](https://github.com/fancyqlx)) +- 固定读取 `Array(LowCardinality)` column在极少数情况下,当column包含一个长序列的空数组时。 [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复 `No message received` 在副本之间获取部件时出现异常。 [\#4856](https://github.com/ClickHouse/ClickHouse/pull/4856) ([阿利沙平](https://github.com/alesapin)) +- 固定 `arrayIntersect` 函数错误导致在单个数组中的几个重复值的情况下。 [\#4871](https://github.com/ClickHouse/ClickHouse/pull/4871) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 在并发期间修复争用条件 `ALTER COLUMN` 可能导致服务器崩溃的查询(修复问题 [\#3421](https://github.com/ClickHouse/ClickHouse/issues/3421)). [\#4592](https://github.com/ClickHouse/ClickHouse/pull/4592) ([Alex Zatelepin](https://github.com/ztlpn)) +- 修复参数扣除 `ALTER MODIFY` 列 `CODEC` 未指定列类型时。 [\#4883](https://github.com/ClickHouse/ClickHouse/pull/4883) ([阿利沙平](https://github.com/alesapin)) +- 功能 `cutQueryStringAndFragment()` 和 `queryStringAndFragment()` 现在正常工作时 `URL` 包含一个片段,没有查询。 [\#4894](https://github.com/ClickHouse/ClickHouse/pull/4894) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复设置时罕见的错误 `min_bytes_to_use_direct_io` 大于零,这发生在线程必须在列文件中向后寻找时。 [\#4897](https://github.com/ClickHouse/ClickHouse/pull/4897) ([阿利沙平](https://github.com/alesapin)) +- 修复聚合函数的错误参数类型 `LowCardinality` 参数(修复问题 [\#4919](https://github.com/ClickHouse/ClickHouse/issues/4919)). [\#4922](https://github.com/ClickHouse/ClickHouse/pull/4922) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复功能 `toISOWeek` 1970年的结果。 [\#4988](https://github.com/ClickHouse/ClickHouse/pull/4988) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `DROP`, `TRUNCATE` 和 `OPTIMIZE` 查询重复,在执行时 `ON CLUSTER` 为 `ReplicatedMergeTree*` 表家庭. [\#4991](https://github.com/ClickHouse/ClickHouse/pull/4991) ([阿利沙平](https://github.com/alesapin)) + +#### 改进 {#improvements-2} + +- 保持平凡, `DEFAULT`, `MATERIALIZED` 和 `ALIAS` 在一个列表中的列(修复问题 [\#2867](https://github.com/ClickHouse/ClickHouse/issues/2867)). [\#4707](https://github.com/ClickHouse/ClickHouse/pull/4707) ([Alex Zatelepin](https://github.com/ztlpn)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-4-3-11-2019-04-02} + +#### 错误修复 {#bug-fixes-8} + +- 修复崩溃 `FULL/RIGHT JOIN` 当我们加入可为空vs不可为空时。 [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复分段故障 `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-11} + +- 添加从自定义用户启动clickhouse服务器映像的方法。 [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-4-2-7-2019-03-30} + +#### 错误修复 {#bug-fixes-9} + +- 固定读取 `Array(LowCardinality)` column在极少数情况下,当column包含一个长序列的空数组时。 [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-4-1-3-2019-03-19} + +#### 错误修复 {#bug-fixes-10} + +- 包含两个固定的远程查询 `LIMIT BY` 和 `LIMIT`. 以前,如果 `LIMIT BY` 和 `LIMIT` 用于远程查询, `LIMIT` 可能发生之前 `LIMIT BY`,这导致过滤的结果。 [\#4708](https://github.com/ClickHouse/ClickHouse/pull/4708) ([康斯坦丁\*潘](https://github.com/kvap)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-4-0-49-2019-03-09} + +#### 新功能 {#new-features-5} + +- 增加了全面支持 `Protobuf` 格式(输入和输出,嵌套数据结构)。 [\#4174](https://github.com/ClickHouse/ClickHouse/pull/4174) [\#4493](https://github.com/ClickHouse/ClickHouse/pull/4493) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 添加位图功能与Ro哮的位图。 [\#4207](https://github.com/ClickHouse/ClickHouse/pull/4207) ([杨小姐](https://github.com/andyyzh)) [\#4568](https://github.com/ClickHouse/ClickHouse/pull/4568) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 实木复合地板格式支持。 [\#4448](https://github.com/ClickHouse/ClickHouse/pull/4448) ([proller](https://github.com/proller)) +- 为模糊字符串比较添加了N-gram距离。 它类似于R语言中的q-gram指标。 [\#4466](https://github.com/ClickHouse/ClickHouse/pull/4466) ([Danila Kutenin](https://github.com/danlark1)) +- 结合专用聚合和保留模式中的石墨汇总规则。 [\#4426](https://github.com/ClickHouse/ClickHouse/pull/4426) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +- 已添加 `max_execution_speed` 和 `max_execution_speed_bytes` 限制资源使用。 已添加 `min_execution_speed_bytes` 设置以补充 `min_execution_speed`. [\#4430](https://github.com/ClickHouse/ClickHouse/pull/4430) ([张冬](https://github.com/zhang2014)) +- 实现功能 `flatten`. [\#4555](https://github.com/ClickHouse/ClickHouse/pull/4555) [\#4409](https://github.com/ClickHouse/ClickHouse/pull/4409) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov), [kzon](https://github.com/kzon)) +- 新增功能 `arrayEnumerateDenseRanked` 和 `arrayEnumerateUniqRanked` (这就像 `arrayEnumerateUniq` 但是允许微调数组深度以查看多维数组内部)。 [\#4475](https://github.com/ClickHouse/ClickHouse/pull/4475) ([proller](https://github.com/proller)) [\#4601](https://github.com/ClickHouse/ClickHouse/pull/4601) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- Multiple JOINS with some restrictions: no asterisks, no complex aliases in ON/WHERE/GROUP BY/… [\#4462](https://github.com/ClickHouse/ClickHouse/pull/4462) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 错误修复 {#bug-fixes-11} + +- 此版本还包含19.3和19.1中的所有错误修复。 +- 修正了数据跳过索引的错误:插入后颗粒顺序不正确。 [\#4407](https://github.com/ClickHouse/ClickHouse/pull/4407) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 固定 `set` ツ环板forョ `Nullable` 和 `LowCardinality` 列。 在它之前, `set` 索引与 `Nullable` 或 `LowCardinality` 列导致错误 `Data type must be deserialized with multiple streams` 同时选择。 [\#4594](https://github.com/ClickHouse/ClickHouse/pull/4594) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 正确设置完整的update\_time `executable` 字典更新. [\#4551](https://github.com/ClickHouse/ClickHouse/pull/4551) ([Tema Novikov](https://github.com/temoon)) +- 修复19.3中损坏的进度条。 [\#4627](https://github.com/ClickHouse/ClickHouse/pull/4627) ([filimonov](https://github.com/filimonov)) +- 在某些情况下,修复了内存区域收缩时MemoryTracker的不一致值。 [\#4619](https://github.com/ClickHouse/ClickHouse/pull/4619) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了ThreadPool中未定义的行为。 [\#4612](https://github.com/ClickHouse/ClickHouse/pull/4612) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了一个非常罕见的崩溃的消息 `mutex lock failed: Invalid argument` 当MergeTree表与SELECT同时删除时,可能会发生这种情况。 [\#4608](https://github.com/ClickHouse/ClickHouse/pull/4608) ([Alex Zatelepin](https://github.com/ztlpn)) +- ODBC驱动程序兼容 `LowCardinality` 数据类型。 [\#4381](https://github.com/ClickHouse/ClickHouse/pull/4381) ([proller](https://github.com/proller)) +- FreeBSD:修复程序 `AIOcontextPool: Found io_event with unknown id 0` 错误 [\#4438](https://github.com/ClickHouse/ClickHouse/pull/4438) ([urgordeadbeef](https://github.com/urgordeadbeef)) +- `system.part_log` 无论配置如何,都会创建表。 [\#4483](https://github.com/ClickHouse/ClickHouse/pull/4483) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复未定义的行为 `dictIsIn` 缓存字典功能。 [\#4515](https://github.com/ClickHouse/ClickHouse/pull/4515) ([阿利沙平](https://github.com/alesapin)) +- Fixed a deadlock when a SELECT query locks the same table multiple times (e.g. from different threads or when executing multiple subqueries) and there is a concurrent DDL query. [\#4535](https://github.com/ClickHouse/ClickHouse/pull/4535) ([Alex Zatelepin](https://github.com/ztlpn)) +- 默认情况下禁用compile\_expressions,直到我们得到自己 `llvm` contrib并且可以测试它 `clang` 和 `asan`. [\#4579](https://github.com/ClickHouse/ClickHouse/pull/4579) ([阿利沙平](https://github.com/alesapin)) +- 预防 `std::terminate` 当 `invalidate_query` 为 `clickhouse` 外部字典源返回了错误的结果集(空或一行以上或一列以上)。 固定的问题,当 `invalidate_query` 执行每五秒钟,无论到 `lifetime`. [\#4583](https://github.com/ClickHouse/ClickHouse/pull/4583) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免死锁时 `invalidate_query` 对于与字典 `clickhouse` 资料来源涉及 `system.dictionaries` 表或 `Dictionaries` 数据库(罕见的情况)。 [\#4599](https://github.com/ClickHouse/ClickHouse/pull/4599) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了交叉连接与空在哪里。 [\#4598](https://github.com/ClickHouse/ClickHouse/pull/4598) ([Artem Zuikov](https://github.com/4ertus2)) +- 在功能固定段错误 “replicate” 传递常量参数时。 [\#4603](https://github.com/ClickHouse/ClickHouse/pull/4603) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用谓词优化器修复lambda函数。 [\#4408](https://github.com/ClickHouse/ClickHouse/pull/4408) ([张冬](https://github.com/zhang2014)) +- 多个联接多个修复。 [\#4595](https://github.com/ClickHouse/ClickHouse/pull/4595) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 改进 {#improvements-3} + +- 在右表列的连接上部分支持别名。 [\#4412](https://github.com/ClickHouse/ClickHouse/pull/4412) ([Artem Zuikov](https://github.com/4ertus2)) +- 结果多加入了需要正确的结果,名称为使用中子选择. 替换平的别名来源中的名称结果。 [\#4474](https://github.com/ClickHouse/ClickHouse/pull/4474) ([Artem Zuikov](https://github.com/4ertus2)) +- 改进连接语句的下推逻辑。 [\#4387](https://github.com/ClickHouse/ClickHouse/pull/4387) ([伊万](https://github.com/abyss7)) + +#### 性能改进 {#performance-improvements-3} + +- 改进的启发式 “move to PREWHERE” 优化。 [\#4405](https://github.com/ClickHouse/ClickHouse/pull/4405) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用适当的查找表,使用HashTable的api用于8位和16位密钥。 [\#4536](https://github.com/ClickHouse/ClickHouse/pull/4536) ([阿莫斯鸟](https://github.com/amosbird)) +- 改进字符串比较的性能。 [\#4564](https://github.com/ClickHouse/ClickHouse/pull/4564) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在单独的线程中清理分布式DDL队列,以便它不会减慢处理分布式DDL任务的主循环。 [\#4502](https://github.com/ClickHouse/ClickHouse/pull/4502) ([Alex Zatelepin](https://github.com/ztlpn)) +- 当 `min_bytes_to_use_direct_io` 如果设置为1,则不是每个文件都使用O\_DIRECT模式打开,因为要读取的数据大小有时被一个压缩块的大小所低估。 [\#4526](https://github.com/ClickHouse/ClickHouse/pull/4526) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-12} + +- 增加了对clang-9的支持 [\#4604](https://github.com/ClickHouse/ClickHouse/pull/4604) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复错误 `__asm__` 说明(再次) [\#4621](https://github.com/ClickHouse/ClickHouse/pull/4621) ([Konstantin Podshumok](https://github.com/podshumok)) +- 添加指定设置的能力 `clickhouse-performance-test` 从命令行。 [\#4437](https://github.com/ClickHouse/ClickHouse/pull/4437) ([阿利沙平](https://github.com/alesapin)) +- 将字典测试添加到集成测试。 [\#4477](https://github.com/ClickHouse/ClickHouse/pull/4477) ([阿利沙平](https://github.com/alesapin)) +- 在网站上添加了来自基准测试的查询,以自动化性能测试。 [\#4496](https://github.com/ClickHouse/ClickHouse/pull/4496) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- `xxhash.h` 在外部lz4中不存在,因为它是一个实现细节,并且它的符号是命名空间的 `XXH_NAMESPACE` 麦克罗 当lz4是外部的,xxHash也必须是外部的,并且依赖者必须链接到它。 [\#4495](https://github.com/ClickHouse/ClickHouse/pull/4495) ([Origej Desh](https://github.com/orivej)) +- 固定的情况下,当 `quantileTiming` 聚合函数可以用负或浮点参数调用(这修复了使用未定义的行为消毒器的模糊测试)。 [\#4506](https://github.com/ClickHouse/ClickHouse/pull/4506) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 拼写错误更正。 [\#4531](https://github.com/ClickHouse/ClickHouse/pull/4531) ([sdk2](https://github.com/sdk2)) +- 在Mac上修复编译。 [\#4371](https://github.com/ClickHouse/ClickHouse/pull/4371) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- Freebsd和各种不寻常的构建配置的构建修复程序。 [\#4444](https://github.com/ClickHouse/ClickHouse/pull/4444) ([proller](https://github.com/proller)) + +## ClickHouse释放19.3 {#clickhouse-release-19-3} + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-3-9-1-2019-04-02} + +#### 错误修复 {#bug-fixes-12} + +- 修复崩溃 `FULL/RIGHT JOIN` 当我们加入可为空vs不可为空时。 [\#4855](https://github.com/ClickHouse/ClickHouse/pull/4855) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复分段故障 `clickhouse-copier`. [\#4835](https://github.com/ClickHouse/ClickHouse/pull/4835) ([proller](https://github.com/proller)) +- 固定读取 `Array(LowCardinality)` column在极少数情况下,当column包含一个长序列的空数组时。 [\#4850](https://github.com/ClickHouse/ClickHouse/pull/4850) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-13} + +- 添加从自定义用户启动clickhouse服务器映像的方法 [\#4753](https://github.com/ClickHouse/ClickHouse/pull/4753) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-3-7-2019-03-12} + +#### 错误修复 {#bug-fixes-13} + +- 修正了#3920中的错误。 此错误表现为随机缓存损坏(消息 `Unknown codec family code`, `Cannot seek through file`)和段错误。 这个错误最早出现在19.1版本中,并且存在于19.1.10和19.3.6之前的版本中。 [\#4623](https://github.com/ClickHouse/ClickHouse/pull/4623) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-3-6-2019-03-02} + +#### 错误修复 {#bug-fixes-14} + +- 当线程池中有超过1000个线程时, `std::terminate` 线程退出时可能发生。 [Azat Khuzhin](https://github.com/azat) [\#4485](https://github.com/ClickHouse/ClickHouse/pull/4485) [\#4505](https://github.com/ClickHouse/ClickHouse/pull/4505) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在可以创建 `ReplicatedMergeTree*` 对没有默认值的列进行注释的表和对没有注释和默认值的列进行编解码的表。 还修复编解码器的比较。 [\#4523](https://github.com/ClickHouse/ClickHouse/pull/4523) ([阿利沙平](https://github.com/alesapin)) +- 修复了与数组或元组联接时的崩溃。 [\#4552](https://github.com/ClickHouse/ClickHouse/pull/4552) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复了clickhouse-复印机中的消息崩溃 `ThreadStatus not created`. [\#4540](https://github.com/ClickHouse/ClickHouse/pull/4540) ([Artem Zuikov](https://github.com/4ertus2)) +- 如果使用分布式Ddl,则在服务器关闭时修复了挂机问题。 [\#4472](https://github.com/ClickHouse/ClickHouse/pull/4472) ([Alex Zatelepin](https://github.com/ztlpn)) +- 错误的列编号打印在有关文本格式分析的列数大于10的错误消息中。 [\#4484](https://github.com/ClickHouse/ClickHouse/pull/4484) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 构建/测试/打包改进 {#buildtestingpackaging-improvements-3} + +- 固定构建与启用AVX。 [\#4527](https://github.com/ClickHouse/ClickHouse/pull/4527) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 基于已知版本而不是编译它的内核启用扩展记帐和IO记帐。 [\#4541](https://github.com/ClickHouse/ClickHouse/pull/4541) ([纳瓦托洛梅](https://github.com/nvartolomei)) +- 允许跳过core\_dump的设置。size\_limit,如果限制设置失败,则警告而不是throw。 [\#4473](https://github.com/ClickHouse/ClickHouse/pull/4473) ([proller](https://github.com/proller)) +- 删除了 `inline` 标签 `void readBinary(...)` 在 `Field.cpp`. 也合并冗余 `namespace DB` 块。 [\#4530](https://github.com/ClickHouse/ClickHouse/pull/4530) ([hcz](https://github.com/hczhcz)) + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-3-5-2019-02-21} + +#### 错误修复 {#bug-fixes-15} + +- 修正了大型http插入查询处理的错误。 [\#4454](https://github.com/ClickHouse/ClickHouse/pull/4454) ([阿利沙平](https://github.com/alesapin)) +- 修正了向后不兼容的旧版本,由于错误的实现 `send_logs_level` 设置。 [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了表函数的向后不兼容性 `remote` 与列注释介绍. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-3-4-2019-02-16} + +#### 改进 {#improvements-4} + +- 执行以下操作时,表索引大小不考虑内存限制 `ATTACH TABLE` 查询。 避免了分离后无法连接表的可能性。 [\#4396](https://github.com/ClickHouse/ClickHouse/pull/4396) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 稍微提高了从ZooKeeper接收的最大字符串和数组大小的限制。 它允许继续与增加的尺寸工作 `CLIENT_JVMFLAGS=-Djute.maxbuffer=...` 在动物园管理员。 [\#4398](https://github.com/ClickHouse/ClickHouse/pull/4398) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 允许修复被遗弃的副本,即使它已经在其队列中拥有大量的节点。 [\#4399](https://github.com/ClickHouse/ClickHouse/pull/4399) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加一个必需的参数 `SET` 索引(最大存储行数)。 [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) + +#### 错误修复 {#bug-fixes-16} + +- 固定 `WITH ROLLUP` 单组结果 `LowCardinality` 钥匙 [\#4384](https://github.com/ClickHouse/ClickHouse/pull/4384) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 在设置索引固定错误(删除颗粒,如果它包含超过 `max_rows` 行)。 [\#4386](https://github.com/ClickHouse/ClickHouse/pull/4386) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 很多的FreeBSD构建修复。 [\#4397](https://github.com/ClickHouse/ClickHouse/pull/4397) ([proller](https://github.com/proller)) +- 固定别名替换查询与子查询包含相同的别名(问题 [\#4110](https://github.com/ClickHouse/ClickHouse/issues/4110)). [\#4351](https://github.com/ClickHouse/ClickHouse/pull/4351) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 构建/测试/打包改进 {#buildtestingpackaging-improvements-4} + +- 添加运行能力 `clickhouse-server` 对于docker镜像中的无状态测试。 [\#4347](https://github.com/ClickHouse/ClickHouse/pull/4347) ([瓦西里\*内姆科夫](https://github.com/Enmk)) + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-3-3-2019-02-13} + +#### 新功能 {#new-features-6} + +- 添加了 `KILL MUTATION` 允许删除由于某些原因卡住的突变的声明。 已添加 `latest_failed_part`, `latest_fail_time`, `latest_fail_reason` 字段到 `system.mutations` 表更容易排除故障。 [\#4287](https://github.com/ClickHouse/ClickHouse/pull/4287) ([Alex Zatelepin](https://github.com/ztlpn)) +- 添加聚合功能 `entropy` 计算香农熵 [\#4238](https://github.com/ClickHouse/ClickHouse/pull/4238) ([Quid37](https://github.com/Quid37)) +- 添加发送查询的功能 `INSERT INTO tbl VALUES (....` 到服务器而不拆分 `query` 和 `data` 零件。 [\#4301](https://github.com/ClickHouse/ClickHouse/pull/4301) ([阿利沙平](https://github.com/alesapin)) +- 通用实现 `arrayWithConstant` 添加了功能。 [\#4322](https://github.com/ClickHouse/ClickHouse/pull/4322) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 已实施 `NOT BETWEEN` 比较运算符。 [\#4228](https://github.com/ClickHouse/ClickHouse/pull/4228) ([Dmitry Naumov](https://github.com/nezed)) +- 执行 `sumMapFiltered` 为了能够限制其值将被求和的键的数量 `sumMap`. [\#4129](https://github.com/ClickHouse/ClickHouse/pull/4129) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +- 增加了支持 `Nullable` 类型 `mysql` 表功能。 [\#4198](https://github.com/ClickHouse/ClickHouse/pull/4198) ([Emmanuel Donin de Rosière](https://github.com/edonin)) +- 支持任意常量表达式 `LIMIT` 条款 [\#4246](https://github.com/ClickHouse/ClickHouse/pull/4246) ([k3box](https://github.com/k3box)) +- 已添加 `topKWeighted` 采用带有(无符号整数)权重的附加参数的聚合函数。 [\#4245](https://github.com/ClickHouse/ClickHouse/pull/4245) ([安德鲁\*戈尔曼](https://github.com/andrewgolman)) +- `StorageJoin` 现在支持 `join_any_take_last_row` 允许复盖同一键的现有值的设置。 [\#3973](https://github.com/ClickHouse/ClickHouse/pull/3973) ([阿莫斯鸟](https://github.com/amosbird) +- 添加功能 `toStartOfInterval`. [\#4304](https://github.com/ClickHouse/ClickHouse/pull/4304) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 已添加 `RowBinaryWithNamesAndTypes` 格式。 [\#4200](https://github.com/ClickHouse/ClickHouse/pull/4200) ([Oleg V.Kozlyuk](https://github.com/DarkWanderer)) +- 已添加 `IPv4` 和 `IPv6` 数据类型。 更有效的实现 `IPv*` 功能。 [\#3669](https://github.com/ClickHouse/ClickHouse/pull/3669) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 添加功能 `toStartOfTenMinutes()`. [\#4298](https://github.com/ClickHouse/ClickHouse/pull/4298) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 已添加 `Protobuf` 输出格式。 [\#4005](https://github.com/ClickHouse/ClickHouse/pull/4005) [\#4158](https://github.com/ClickHouse/ClickHouse/pull/4158) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 增加了对数据导入(插入)HTTP接口的brotli支持。 [\#4235](https://github.com/ClickHouse/ClickHouse/pull/4235) ([米哈伊尔](https://github.com/fandyushin)) +- 增加了提示,而用户做出错字的函数名称或键入命令行客户端。 [\#4239](https://github.com/ClickHouse/ClickHouse/pull/4239) ([Danila Kutenin](https://github.com/danlark1)) +- 已添加 `Query-Id` 到服务器的HTTP响应头。 [\#4231](https://github.com/ClickHouse/ClickHouse/pull/4231) ([米哈伊尔](https://github.com/fandyushin)) + +#### 实验特点 {#experimental-features-2} + +- 已添加 `minmax` 和 `set` MergeTree表引擎系列的数据跳过索引。 [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 增加了转换 `CROSS JOIN` 到 `INNER JOIN` 如果可能的话 [\#4221](https://github.com/ClickHouse/ClickHouse/pull/4221) [\#4266](https://github.com/ClickHouse/ClickHouse/pull/4266) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 错误修复 {#bug-fixes-17} + +- 固定 `Not found column` 对于重复的列 `JOIN ON` 科。 [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) +- 赂眉露\>\> `START REPLICATED SENDS` 命令开始复制发送。 [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([纳瓦托洛梅](https://github.com/nvartolomei)) +- 固定聚合函数执行 `Array(LowCardinality)` 争论。 [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) +- 修正了错误的行为,当做 `INSERT ... SELECT ... FROM file(...)` 查询和文件有 `CSVWithNames` 或 `TSVWIthNames` 格式和第一个数据行丢失。 [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 如果字典不可用,则修复了字典重新加载时的崩溃。 此错误出现在19.1.6中。 [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) +- 固定 `ALL JOIN` 右表中有重复项。 [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) +- 修正了分段故障 `use_uncompressed_cache=1` 和异常与错误的未压缩大小。 此错误出现在19.1.6中。 [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([阿利沙平](https://github.com/alesapin)) +- 固定 `compile_expressions` 错误与大(超过int16)日期的比较。 [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([阿利沙平](https://github.com/alesapin)) +- 从表函数选择时固定无限循环 `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 暂时禁用谓词优化 `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([张冬](https://github.com/zhang2014)) +- 固定 `Illegal instruction` 在旧Cpu上使用base64函数时出错。 仅当ClickHouse使用gcc-8编译时,才会重现此错误。 [\#4275](https://github.com/ClickHouse/ClickHouse/pull/4275) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定 `No message received` 通过TLS连接与PostgreSQL ODBC驱动程序交互时出错。 还修复了使用MySQL ODBC驱动程序时的段错误。 [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正错误的结果时 `Date` 和 `DateTime` 参数用于条件运算符(函数)的分支 `if`). 增加了函数的通用案例 `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- ClickHouse字典现在加载内 `clickhouse` 过程。 [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复死锁时 `SELECT` 从一个表 `File` 引擎被重试后 `No such file or directory` 错误 [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从选择时固定的竞争条件 `system.tables` 可能会给 `table doesn't exist` 错误 [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- `clickhouse-client` 如果在交互模式下运行,则在加载命令行建议的数据时可以在退出时段错误。 [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了一个错误,当包含突变的执行 `IN` 操作员产生了不正确的结果。 [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) +- 修正错误:如果有一个数据库 `Dictionary` 引擎中,所有字典在服务器启动时强制加载,如果有来自localhost的ClickHouse源字典,则字典无法加载。 [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了在服务器关闭时尝试再次创建系统日志时的错误。 [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 正确返回正确的类型和正确处理锁 `joinGet` 功能。 [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([阿莫斯鸟](https://github.com/amosbird)) +- 已添加 `sumMapWithOverflow` 功能。 [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +- 固定段错误 `allow_experimental_multiple_joins_emulation`. [52de2c](https://github.com/ClickHouse/ClickHouse/commit/52de2cd927f7b5257dd67e175f0a5560a48840d0) ([Artem Zuikov](https://github.com/4ertus2)) +- 修正错误与不正确 `Date` 和 `DateTime` 比较。 [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) +- 在未定义的行为消毒固定模糊测试:增加了参数类型检查 `quantile*Weighted` 家庭的功能。 [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了在删除旧数据部分时罕见的争用条件可能会失败 `File not found` 错误 [\#4378](https://github.com/ClickHouse/ClickHouse/pull/4378) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复缺少/etc/clickhouse-server/config的安装包。xml [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) + +#### 构建/测试/打包改进 {#buildtestingpackaging-improvements-5} + +- Debian软件包:根据配置正确的/etc/clickhouse-server/预处理链接。 [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) +- Freebsd的各种构建修复程序。 [\#4225](https://github.com/ClickHouse/ClickHouse/pull/4225) ([proller](https://github.com/proller)) +- 增加了在perftest中创建,填充和删除表的能力。 [\#4220](https://github.com/ClickHouse/ClickHouse/pull/4220) ([阿利沙平](https://github.com/alesapin)) +- 添加了一个脚本来检查重复的包括。 [\#4326](https://github.com/ClickHouse/ClickHouse/pull/4326) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了在性能测试中通过索引运行查询的能力。 [\#4264](https://github.com/ClickHouse/ClickHouse/pull/4264) ([阿利沙平](https://github.com/alesapin)) +- 建议安装带有调试符号的软件包。 [\#4274](https://github.com/ClickHouse/ClickHouse/pull/4274) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 重构性能测试。 更好的记录和信号处理。 [\#4171](https://github.com/ClickHouse/ClickHouse/pull/4171) ([阿利沙平](https://github.com/alesapin)) +- 将文档添加到匿名Yandex。Metrika数据集. [\#4164](https://github.com/ClickHouse/ClickHouse/pull/4164) ([阿利沙平](https://github.com/alesapin)) +- Аdded tool for converting an old month-partitioned part to the custom-partitioned format. [\#4195](https://github.com/ClickHouse/ClickHouse/pull/4195) ([Alex Zatelepin](https://github.com/ztlpn)) +- 添加了有关s3中两个数据集的文档。 [\#4144](https://github.com/ClickHouse/ClickHouse/pull/4144) ([阿利沙平](https://github.com/alesapin)) +- 增加了从拉请求描述创建更新日志的脚本。 [\#4169](https://github.com/ClickHouse/ClickHouse/pull/4169) [\#4173](https://github.com/ClickHouse/ClickHouse/pull/4173) ([KochetovNicolai](https://github.com/KochetovNicolai)) ([KochetovNicolai](https://github.com/KochetovNicolai)) +- 为ClickHouse添加了木偶模块。 [\#4182](https://github.com/ClickHouse/ClickHouse/pull/4182) ([Maxim Fedotov](https://github.com/MaxFedotov)) +- 添加了一组无证函数的文档。 [\#4168](https://github.com/ClickHouse/ClickHouse/pull/4168) ([张冬](https://github.com/zhang2014)) +- ARM构建修复。 [\#4210](https://github.com/ClickHouse/ClickHouse/pull/4210)[\#4306](https://github.com/ClickHouse/ClickHouse/pull/4306) [\#4291](https://github.com/ClickHouse/ClickHouse/pull/4291) ([proller](https://github.com/proller)) ([proller](https://github.com/proller)) +- 字典测试现在能够从运行 `ctest`. [\#4189](https://github.com/ClickHouse/ClickHouse/pull/4189) ([proller](https://github.com/proller)) +- 现在 `/etc/ssl` 用作带有SSL证书的默认目录。 [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在开始时添加了检查SSE和AVX指令。 [\#4234](https://github.com/ClickHouse/ClickHouse/pull/4234) ([Igr](https://github.com/igron99)) +- 初始化脚本将等待服务器,直到启动。 [\#4281](https://github.com/ClickHouse/ClickHouse/pull/4281) ([proller](https://github.com/proller)) + +#### 向后不兼容的更改 {#backward-incompatible-changes-1} + +- 已删除 `allow_experimental_low_cardinality_type` 设置。 `LowCardinality` 数据类型已准备就绪。 [\#4323](https://github.com/ClickHouse/ClickHouse/pull/4323) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 根据可用内存量减少标记高速缓存大小和未压缩高速缓存大小。 [\#4240](https://github.com/ClickHouse/ClickHouse/pull/4240) ([Lopatin Konstantin](https://github.com/k-lopatin) +- 添加关键字 `INDEX` 在 `CREATE TABLE` 查询。 具有名称的列 `index` 必须使用反引号或双引号引用: `` `index` ``. [\#4143](https://github.com/ClickHouse/ClickHouse/pull/4143) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- `sumMap` 现在提升结果类型而不是溢出。 老 `sumMap` 行为可以通过使用获得 `sumMapWithOverflow` 功能。 [\#4151](https://github.com/ClickHouse/ClickHouse/pull/4151) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) + +#### 性能改进 {#performance-improvements-4} + +- `std::sort` 改为 `pdqsort` 对于没有 `LIMIT`. [\#4236](https://github.com/ClickHouse/ClickHouse/pull/4236) ([Evgenii Pravda](https://github.com/kvinty)) +- 现在服务器重用全局线程池中的线程。 这会影响某些角落情况下的性能。 [\#4150](https://github.com/ClickHouse/ClickHouse/pull/4150) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 改进 {#improvements-5} + +- 实现了对FreeBSD的AIO支持。 [\#4305](https://github.com/ClickHouse/ClickHouse/pull/4305) ([urgordeadbeef](https://github.com/urgordeadbeef)) +- `SELECT * FROM a JOIN b USING a, b` 现在回来 `a` 和 `b` 列仅从左表。 [\#4141](https://github.com/ClickHouse/ClickHouse/pull/4141) ([Artem Zuikov](https://github.com/4ertus2)) +- 允许 `-C` 客户端的选项作为工作 `-c` 选项。 [\#4232](https://github.com/ClickHouse/ClickHouse/pull/4232) ([syominsergey](https://github.com/syominsergey)) +- 现在选项 `--password` 无值使用需要从标准输入的密码。 [\#4230](https://github.com/ClickHouse/ClickHouse/pull/4230) ([BSD\_Conqueror](https://github.com/bsd-conqueror)) +- 在包含字符串文字中添加了非转义元字符的突出显示 `LIKE` 表达式或正则表达式。 [\#4327](https://github.com/ClickHouse/ClickHouse/pull/4327) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加取消HTTP只读查询,如果客户端套接字消失。 [\#4213](https://github.com/ClickHouse/ClickHouse/pull/4213) ([纳瓦托洛梅](https://github.com/nvartolomei)) +- 现在,服务器报告进度,以保持客户端连接活跃。 [\#4215](https://github.com/ClickHouse/ClickHouse/pull/4215) ([伊万](https://github.com/abyss7)) +- 稍微好一点的消息与优化查询的原因 `optimize_throw_if_noop` 设置已启用。 [\#4294](https://github.com/ClickHouse/ClickHouse/pull/4294) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了支持 `--version` clickhouse服务器的选项。 [\#4251](https://github.com/ClickHouse/ClickHouse/pull/4251) ([Lopatin Konstantin](https://github.com/k-lopatin)) +- 已添加 `--help/-h` 选项 `clickhouse-server`. [\#4233](https://github.com/ClickHouse/ClickHouse/pull/4233) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 增加了对具有聚合函数状态结果的标量子查询的支持。 [\#4348](https://github.com/ClickHouse/ClickHouse/pull/4348) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 改进服务器关闭时间并改变等待时间。 [\#4372](https://github.com/ClickHouse/ClickHouse/pull/4372) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加了有关replicated\_can\_become\_leader设置到系统的信息。如果副本不会尝试成为领导者,则添加日志记录。 [\#4379](https://github.com/ClickHouse/ClickHouse/pull/4379) ([Alex Zatelepin](https://github.com/ztlpn)) + +## ClickHouse释放19.1 {#clickhouse-release-19-1} + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-1-14-2019-03-14} + +- 修正错误 `Column ... queried more than once` 这可能发生,如果设置 `asterisk_left_columns_only` 在使用的情况下设置为1 `GLOBAL JOIN` 与 `SELECT *` (罕见的情况)。 该问题在19.3及更新版本中不存在。 [6bac7d8d](https://github.com/ClickHouse/ClickHouse/pull/4692/commits/6bac7d8d11a9b0d6de0b32b53c47eb2f6f8e7062) ([Artem Zuikov](https://github.com/4ertus2)) + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-1-13-2019-03-12} + +此版本包含与19.3.7完全相同的补丁集。 + +### 碌莽禄,拢,010-68520682\戮卤篓拢,010-68520682\ {#clickhouse-release-19-1-10-2019-03-03} + +此版本包含与19.3.6完全相同的补丁集。 + +## ClickHouse释放19.1 {#clickhouse-release-19-1-1} + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-1-9-2019-02-21} + +#### 错误修复 {#bug-fixes-18} + +- 修正了向后不兼容的旧版本,由于错误的实现 `send_logs_level` 设置。 [\#4445](https://github.com/ClickHouse/ClickHouse/pull/4445) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了表函数的向后不兼容性 `remote` 与列注释介绍. [\#4446](https://github.com/ClickHouse/ClickHouse/pull/4446) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-1-8-2019-02-16} + +#### 错误修复 {#bug-fixes-19} + +- 修复缺少/etc/clickhouse-server/config的安装包。xml [\#4343](https://github.com/ClickHouse/ClickHouse/pull/4343) ([proller](https://github.com/proller)) + +## ClickHouse释放19.1 {#clickhouse-release-19-1-2} + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-1-7-2019-02-15} + +#### 错误修复 {#bug-fixes-20} + +- 正确返回正确的类型和正确处理锁 `joinGet` 功能。 [\#4153](https://github.com/ClickHouse/ClickHouse/pull/4153) ([阿莫斯鸟](https://github.com/amosbird)) +- 修复了在服务器关闭时尝试再次创建系统日志时的错误。 [\#4254](https://github.com/ClickHouse/ClickHouse/pull/4254) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正错误:如果有一个数据库 `Dictionary` 引擎中,所有字典在服务器启动时强制加载,如果有来自localhost的ClickHouse源字典,则字典无法加载。 [\#4255](https://github.com/ClickHouse/ClickHouse/pull/4255) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了一个错误,当包含突变的执行 `IN` 操作员产生了不正确的结果。 [\#4099](https://github.com/ClickHouse/ClickHouse/pull/4099) ([Alex Zatelepin](https://github.com/ztlpn)) +- `clickhouse-client` 如果在交互模式下运行,则在加载命令行建议的数据时可以在退出时段错误。 [\#4317](https://github.com/ClickHouse/ClickHouse/pull/4317) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从选择时固定的竞争条件 `system.tables` 可能会给 `table doesn't exist` 错误 [\#4313](https://github.com/ClickHouse/ClickHouse/pull/4313) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复死锁时 `SELECT` 从一个表 `File` 引擎被重试后 `No such file or directory` 错误 [\#4161](https://github.com/ClickHouse/ClickHouse/pull/4161) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了一个问题:本地ClickHouse字典通过TCP加载,但应该在进程中加载。 [\#4166](https://github.com/ClickHouse/ClickHouse/pull/4166) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定 `No message received` 通过TLS连接与PostgreSQL ODBC驱动程序交互时出错。 还修复了使用MySQL ODBC驱动程序时的段错误。 [\#4170](https://github.com/ClickHouse/ClickHouse/pull/4170) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 暂时禁用谓词优化 `ORDER BY`. [\#3890](https://github.com/ClickHouse/ClickHouse/pull/3890) ([张冬](https://github.com/zhang2014)) +- 从表函数选择时固定无限循环 `numbers(0)`. [\#4280](https://github.com/ClickHouse/ClickHouse/pull/4280) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定 `compile_expressions` 错误与大(超过int16)日期的比较。 [\#4341](https://github.com/ClickHouse/ClickHouse/pull/4341) ([阿利沙平](https://github.com/alesapin)) +- 修正了分段故障 `uncompressed_cache=1` 和异常与错误的未压缩大小。 [\#4186](https://github.com/ClickHouse/ClickHouse/pull/4186) ([阿利沙平](https://github.com/alesapin)) +- 固定 `ALL JOIN` 右表中有重复项。 [\#4184](https://github.com/ClickHouse/ClickHouse/pull/4184) ([Artem Zuikov](https://github.com/4ertus2)) +- 修正了错误的行为,当做 `INSERT ... SELECT ... FROM file(...)` 查询和文件有 `CSVWithNames` 或 `TSVWIthNames` 格式和第一个数据行丢失。 [\#4297](https://github.com/ClickHouse/ClickHouse/pull/4297) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定聚合函数执行 `Array(LowCardinality)` 争论。 [\#4055](https://github.com/ClickHouse/ClickHouse/pull/4055) ([KochetovNicolai](https://github.com/KochetovNicolai)) +- Debian软件包:根据配置正确的/etc/clickhouse-server/预处理链接。 [\#4205](https://github.com/ClickHouse/ClickHouse/pull/4205) ([proller](https://github.com/proller)) +- 在未定义的行为消毒固定模糊测试:增加了参数类型检查 `quantile*Weighted` 家庭的功能。 [\#4145](https://github.com/ClickHouse/ClickHouse/pull/4145) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 赂眉露\>\> `START REPLICATED SENDS` 命令开始复制发送。 [\#4229](https://github.com/ClickHouse/ClickHouse/pull/4229) ([纳瓦托洛梅](https://github.com/nvartolomei)) +- 固定 `Not found column` 对于联接部分中的重复列。 [\#4279](https://github.com/ClickHouse/ClickHouse/pull/4279) ([Artem Zuikov](https://github.com/4ertus2)) +- 现在 `/etc/ssl` 用作带有SSL证书的默认目录。 [\#4167](https://github.com/ClickHouse/ClickHouse/pull/4167) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 如果字典不可用,则修复了字典重新加载时的崩溃。 [\#4188](https://github.com/ClickHouse/ClickHouse/pull/4188) ([proller](https://github.com/proller)) +- 修正错误与不正确 `Date` 和 `DateTime` 比较。 [\#4237](https://github.com/ClickHouse/ClickHouse/pull/4237) ([valexey](https://github.com/valexey)) +- 修正错误的结果时 `Date` 和 `DateTime` 参数用于条件运算符(函数)的分支 `if`). 增加了函数的通用案例 `if`. [\#4243](https://github.com/ClickHouse/ClickHouse/pull/4243) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +### 碌莽禄,拢,010-68520682\ {#clickhouse-release-19-1-6-2019-01-24} + +#### 新功能 {#new-features-7} + +- 自定义每列压缩编解码器的表。 [\#3899](https://github.com/ClickHouse/ClickHouse/pull/3899) [\#4111](https://github.com/ClickHouse/ClickHouse/pull/4111) ([阿利沙平](https://github.com/alesapin), [张冬](https://github.com/zhang2014), [阿纳托利](https://github.com/Sindbag)) +- 添加压缩编解ec `Delta`. [\#4052](https://github.com/ClickHouse/ClickHouse/pull/4052) ([阿利沙平](https://github.com/alesapin)) +- 允许 `ALTER` 压缩编解ecs。 [\#4054](https://github.com/ClickHouse/ClickHouse/pull/4054) ([阿利沙平](https://github.com/alesapin)) +- 新增功能 `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` 对于SQL标准的兼容性。 [\#3826](https://github.com/ClickHouse/ClickHouse/pull/3826) ([伊万\*布林科夫](https://github.com/blinkov)) +- 支持写入 `HDFS` 表和 `hdfs` 表功能。 [\#4084](https://github.com/ClickHouse/ClickHouse/pull/4084) ([阿利沙平](https://github.com/alesapin)) +- 增加了从big haystack中搜索多个常量字符串的功能: `multiPosition`, `multiSearch` ,`firstMatch` 也与 `-UTF8`, `-CaseInsensitive`,和 `-CaseInsensitiveUTF8` 变体。 [\#4053](https://github.com/ClickHouse/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1)) +- 修剪未使用的碎片,如果 `SELECT` 通过分片键查询过滤器(设置 `optimize_skip_unused_shards`). [\#3851](https://github.com/ClickHouse/ClickHouse/pull/3851) ([Gleb Kanterov](https://github.com/kanterov), [伊万](https://github.com/abyss7)) +- 允许 `Kafka` 引擎忽略每个块的解析错误数。 [\#4094](https://github.com/ClickHouse/ClickHouse/pull/4094) ([伊万](https://github.com/abyss7)) +- 增加了对 `CatBoost` 多类模型评估。 功能 `modelEvaluate` 返回带有多类模型的每类原始预测的元组。 `libcatboostmodel.so` 应建立与 [\#607](https://github.com/catboost/catboost/pull/607). [\#3959](https://github.com/ClickHouse/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) +- 新增功能 `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [\#4097](https://github.com/ClickHouse/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea)) +- 添加了哈希函数 `xxHash64` 和 `xxHash32`. [\#3905](https://github.com/ClickHouse/ClickHouse/pull/3905) ([filimonov](https://github.com/filimonov)) +- 已添加 `gccMurmurHash` 散列函数(GCC风味杂音散列),它使用相同的散列种子 [海湾合作委员会](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [\#4000](https://github.com/ClickHouse/ClickHouse/pull/4000) ([sundyli](https://github.com/sundy-li)) +- 添加了哈希函数 `javaHash`, `hiveHash`. [\#3811](https://github.com/ClickHouse/ClickHouse/pull/3811) ([上书结365](https://github.com/shangshujie365)) +- 添加表功能 `remoteSecure`. 函数的工作原理为 `remote`,但使用安全连接。 [\#4088](https://github.com/ClickHouse/ClickHouse/pull/4088) ([proller](https://github.com/proller)) + +#### 实验特点 {#experimental-features-3} + +- 添加了多个联接仿真 (`allow_experimental_multiple_joins_emulation` 设置)。 [\#3946](https://github.com/ClickHouse/ClickHouse/pull/3946) ([Artem Zuikov](https://github.com/4ertus2)) + +#### 错误修复 {#bug-fixes-21} + +- 赂眉露\>\> `compiled_expression_cache_size` 默认情况下设置有限,以降低内存消耗。 [\#4041](https://github.com/ClickHouse/ClickHouse/pull/4041) ([阿利沙平](https://github.com/alesapin)) +- 修复导致执行更改复制表的线程和从ZooKeeper更新配置的线程中挂断的错误。 [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3891](https://github.com/ClickHouse/ClickHouse/issues/3891) [\#3934](https://github.com/ClickHouse/ClickHouse/pull/3934) ([Alex Zatelepin](https://github.com/ztlpn)) +- 修复了执行分布式ALTER任务时的争用条件。 争用条件导致多个副本试图执行任务和所有副本,除了一个失败与ZooKeeper错误。 [\#3904](https://github.com/ClickHouse/ClickHouse/pull/3904) ([Alex Zatelepin](https://github.com/ztlpn)) +- 修复错误时 `from_zk` 在对ZooKeeper的请求超时后,配置元素没有刷新。 [\#2947](https://github.com/ClickHouse/ClickHouse/issues/2947) [\#3947](https://github.com/ClickHouse/ClickHouse/pull/3947) ([Alex Zatelepin](https://github.com/ztlpn)) +- 修复IPv4子网掩码错误前缀的错误。 [\#3945](https://github.com/ClickHouse/ClickHouse/pull/3945) ([阿利沙平](https://github.com/alesapin)) +- 固定崩溃 (`std::terminate`)在极少数情况下,由于资源耗尽而无法创建新线程。 [\#3956](https://github.com/ClickHouse/ClickHouse/pull/3956) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正错误时 `remote` 表函数执行时,错误的限制被用于 `getStructureOfRemoteTable`. [\#4009](https://github.com/ClickHouse/ClickHouse/pull/4009) ([阿利沙平](https://github.com/alesapin)) +- 修复netlink套接字的泄漏。 它们被放置在一个池中,在那里它们永远不会被删除,并且当所有当前套接字都在使用时,在新线程开始时创建了新的套接字。 [\#4017](https://github.com/ClickHouse/ClickHouse/pull/4017) ([Alex Zatelepin](https://github.com/ztlpn)) +- 修复关闭错误 `/proc/self/fd` 目录早于所有fds被读取 `/proc` 分叉后 `odbc-bridge` 子进程。 [\#4120](https://github.com/ClickHouse/ClickHouse/pull/4120) ([阿利沙平](https://github.com/alesapin)) +- 在主键中使用字符串的情况下,固定字符串到UInt单调转换。 [\#3870](https://github.com/ClickHouse/ClickHouse/pull/3870) ([张冬](https://github.com/zhang2014)) +- 整数转换函数单调性计算中的固定误差。 [\#3921](https://github.com/ClickHouse/ClickHouse/pull/3921) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复段错误 `arrayEnumerateUniq`, `arrayEnumerateDense` 函数在一些无效的参数的情况下。 [\#3909](https://github.com/ClickHouse/ClickHouse/pull/3909) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在StorageMerge修复UB。 [\#3910](https://github.com/ClickHouse/ClickHouse/pull/3910) ([阿莫斯鸟](https://github.com/amosbird)) +- 修正函数中的段错误 `addDays`, `subtractDays`. [\#3913](https://github.com/ClickHouse/ClickHouse/pull/3913) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正错误:功能 `round`, `floor`, `trunc`, `ceil` 在整数参数和大负比例执行时可能会返回虚假结果。 [\#3914](https://github.com/ClickHouse/ClickHouse/pull/3914) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了一个错误引起的 ‘kill query sync’ 从而导致核心转储。 [\#3916](https://github.com/ClickHouse/ClickHouse/pull/3916) ([muVulDeePecker](https://github.com/fancyqlx)) +- 修复空复制队列后延迟较长的bug。 [\#3928](https://github.com/ClickHouse/ClickHouse/pull/3928) [\#3932](https://github.com/ClickHouse/ClickHouse/pull/3932) ([阿利沙平](https://github.com/alesapin)) +- 修复了插入到表中的过多内存使用情况 `LowCardinality` 主键。 [\#3955](https://github.com/ClickHouse/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai)) +- 固定 `LowCardinality` 序列化 `Native` 在空数组的情况下格式化。 [\#3907](https://github.com/ClickHouse/ClickHouse/issues/3907) [\#4011](https://github.com/ClickHouse/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai)) +- 固定不正确的结果,而使用distinct通过单LowCardinality数字列。 [\#3895](https://github.com/ClickHouse/ClickHouse/issues/3895) [\#4012](https://github.com/ClickHouse/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai)) +- 固定专门的聚合与LowCardinality键(以防万一 `compile` 设置已启用)。 [\#3886](https://github.com/ClickHouse/ClickHouse/pull/3886) ([KochetovNicolai](https://github.com/KochetovNicolai)) +- 修复复制表查询的用户和密码转发。 [\#3957](https://github.com/ClickHouse/ClickHouse/pull/3957) ([阿利沙平](https://github.com/alesapin)) ([小路](https://github.com/nicelulu)) +- 修复了在重新加载字典时在字典数据库中列出表时可能发生的非常罕见的争用条件。 [\#3970](https://github.com/ClickHouse/ClickHouse/pull/3970) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了与ROLLUP或CUBE一起使用时的错误结果。 [\#3756](https://github.com/ClickHouse/ClickHouse/issues/3756) [\#3837](https://github.com/ClickHouse/ClickHouse/pull/3837) ([周三](https://github.com/reflection)) +- 用于查询的固定列别名 `JOIN ON` 语法和分布式表。 [\#3980](https://github.com/ClickHouse/ClickHouse/pull/3980) ([张冬](https://github.com/zhang2014)) +- 在内部实现固定的错误 `quantileTDigest` (由阿尔乔姆Vakhrushev发现)。 这个错误从来没有发生在ClickHouse中,只有那些直接使用ClickHouse代码库作为库的人才有关。 [\#3935](https://github.com/ClickHouse/ClickHouse/pull/3935) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 改进 {#improvements-6} + +- 支持 `IF NOT EXISTS` 在 `ALTER TABLE ADD COLUMN` 发言以及 `IF EXISTS` 在 `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [\#3900](https://github.com/ClickHouse/ClickHouse/pull/3900) ([Boris Granveaud](https://github.com/bgranvea)) +- 功能 `parseDateTimeBestEffort`:支持格式 `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` 和相似。 [\#3922](https://github.com/ClickHouse/ClickHouse/pull/3922) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- `CapnProtoInputStream` 现在支持锯齿结构。 [\#4063](https://github.com/ClickHouse/ClickHouse/pull/4063) ([Odin Hultgren Van Der Horst](https://github.com/Miniwoffer)) +- 可用性改进:增加了从数据目录的所有者启动服务器进程的检查。 如果数据属于非root用户,则不允许从root用户启动服务器。 [\#3785](https://github.com/ClickHouse/ClickHouse/pull/3785) ([谢尔盖-v-加尔采夫](https://github.com/sergey-v-galtsev)) +- 在分析具有联接的查询期间检查所需列的更好的逻辑。 [\#3930](https://github.com/ClickHouse/ClickHouse/pull/3930) ([Artem Zuikov](https://github.com/4ertus2)) +- 减少在单个服务器中有大量分布式表的情况下的连接数。 [\#3726](https://github.com/ClickHouse/ClickHouse/pull/3726) ([张冬](https://github.com/zhang2014)) +- 支持的总计行 `WITH TOTALS` 查询ODBC驱动程序。 [\#3836](https://github.com/ClickHouse/ClickHouse/pull/3836) ([Maksim Koritckiy](https://github.com/nightweb)) +- 允许使用 `Enum`s为if函数内的整数。 [\#3875](https://github.com/ClickHouse/ClickHouse/pull/3875) ([伊万](https://github.com/abyss7)) +- 已添加 `low_cardinality_allow_in_native_format` 设置。 如果禁用,请不要使用 `LowCadrinality` 输入 `Native` 格式。 [\#3879](https://github.com/ClickHouse/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai)) +- 从编译表达式缓存中删除了一些冗余对象以降低内存使用率。 [\#4042](https://github.com/ClickHouse/ClickHouse/pull/4042) ([阿利沙平](https://github.com/alesapin)) +- 添加检查 `SET send_logs_level = 'value'` 查询接受适当的值。 [\#3873](https://github.com/ClickHouse/ClickHouse/pull/3873) ([Sabyanin马克西姆](https://github.com/s-mx)) +- 固定数据类型检查类型转换功能。 [\#3896](https://github.com/ClickHouse/ClickHouse/pull/3896) ([张冬](https://github.com/zhang2014)) + +#### 性能改进 {#performance-improvements-5} + +- 添加MergeTree设置 `use_minimalistic_part_header_in_zookeeper`. 如果启用,复制的表将在单个零件znode中存储紧凑零件元数据。 这可以显着减少ZooKeeper快照大小(特别是如果表有很多列)。 请注意,启用此设置后,您将无法降级到不支持它的版本。 [\#3960](https://github.com/ClickHouse/ClickHouse/pull/3960) ([Alex Zatelepin](https://github.com/ztlpn)) +- 为函数添加基于DFA的实现 `sequenceMatch` 和 `sequenceCount` 以防模式不包含时间。 [\#4004](https://github.com/ClickHouse/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +- 整数序列化的性能改进。 [\#3968](https://github.com/ClickHouse/ClickHouse/pull/3968) ([阿莫斯鸟](https://github.com/amosbird)) +- 零左填充PODArray,使-1元素始终有效并归零。 它用于无分支计算偏移量。 [\#3920](https://github.com/ClickHouse/ClickHouse/pull/3920) ([阿莫斯鸟](https://github.com/amosbird)) +- 还原 `jemalloc` 版本导致性能下降。 [\#4018](https://github.com/ClickHouse/ClickHouse/pull/4018) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 向后不兼容的更改 {#backward-incompatible-changes-2} + +- 删除无证功能 `ALTER MODIFY PRIMARY KEY` 因为它被 `ALTER MODIFY ORDER BY` 指挥部 [\#3887](https://github.com/ClickHouse/ClickHouse/pull/3887) ([Alex Zatelepin](https://github.com/ztlpn)) +- 删除功能 `shardByHash`. [\#3833](https://github.com/ClickHouse/ClickHouse/pull/3833) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 禁止使用具有结果类型的标量子查询 `AggregateFunction`. [\#3865](https://github.com/ClickHouse/ClickHouse/pull/3865) ([伊万](https://github.com/abyss7)) + +#### 构建/测试/打包改进 {#buildtestingpackaging-improvements-6} + +- 增加了对PowerPC的支持 (`ppc64le`)建设。 [\#4132](https://github.com/ClickHouse/ClickHouse/pull/4132) ([Danila Kutenin](https://github.com/danlark1)) +- 有状态功能测试在公共可用数据集上运行。 [\#3969](https://github.com/ClickHouse/ClickHouse/pull/3969) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了服务器无法启动时的错误 `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` Docker或systemd-nspawn中的消息。 [\#4136](https://github.com/ClickHouse/ClickHouse/pull/4136) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新 `rdkafka` 库v1.0.0-RC5。 使用cppkafka而不是原始的C接口。 [\#4025](https://github.com/ClickHouse/ClickHouse/pull/4025) ([伊万](https://github.com/abyss7)) +- 更新 `mariadb-client` 图书馆. 修复了UBSan发现的问题之一。 [\#3924](https://github.com/ClickHouse/ClickHouse/pull/3924) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- UBSan版本的一些修复。 [\#3926](https://github.com/ClickHouse/ClickHouse/pull/3926) [\#3021](https://github.com/ClickHouse/ClickHouse/pull/3021) [\#3948](https://github.com/ClickHouse/ClickHouse/pull/3948) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了使用UBSan构建的每次提交运行的测试。 +- 增加了PVS-Studio静态分析器的每次提交运行。 +- 修复了PVS-Studio发现的错误。 [\#4013](https://github.com/ClickHouse/ClickHouse/pull/4013) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了glibc兼容性问题。 [\#4100](https://github.com/ClickHouse/ClickHouse/pull/4100) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 将Docker映像移动到18.10并为glibc\>=2.28添加兼容性文件 [\#3965](https://github.com/ClickHouse/ClickHouse/pull/3965) ([阿利沙平](https://github.com/alesapin)) +- 如果用户不想在服务器码头镜像中播放目录,请添加env变量。 [\#3967](https://github.com/ClickHouse/ClickHouse/pull/3967) ([阿利沙平](https://github.com/alesapin)) +- 启用了大多数来自警告 `-Weverything` 在叮当声。 已启用 `-Wpedantic`. [\#3986](https://github.com/ClickHouse/ClickHouse/pull/3986) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了一些只在clang8中可用的警告。 [\#3993](https://github.com/ClickHouse/ClickHouse/pull/3993) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 链接到 `libLLVM` 在使用共享链接时,而不是单独的LLVM库。 [\#3989](https://github.com/ClickHouse/ClickHouse/pull/3989) ([Origej Desh](https://github.com/orivej)) +- 为测试图像添加了消毒变量。 [\#4072](https://github.com/ClickHouse/ClickHouse/pull/4072) ([阿利沙平](https://github.com/alesapin)) +- `clickhouse-server` debian软件包会推荐 `libcap2-bin` 使用包 `setcap` 设置功能的工具。 这是可选的。 [\#4093](https://github.com/ClickHouse/ClickHouse/pull/4093) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改进的编译时间,固定包括。 [\#3898](https://github.com/ClickHouse/ClickHouse/pull/3898) ([proller](https://github.com/proller)) +- 添加了哈希函数的性能测试。 [\#3918](https://github.com/ClickHouse/ClickHouse/pull/3918) ([filimonov](https://github.com/filimonov)) +- 固定循环库依赖。 [\#3958](https://github.com/ClickHouse/ClickHouse/pull/3958) ([proller](https://github.com/proller)) +- 改进的编译与低可用内存。 [\#4030](https://github.com/ClickHouse/ClickHouse/pull/4030) ([proller](https://github.com/proller)) +- 添加了测试脚本,以重现性能下降 `jemalloc`. [\#4036](https://github.com/ClickHouse/ClickHouse/pull/4036) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了在下面的注释和字符串文字拼写错误 `dbms`. [\#4122](https://github.com/ClickHouse/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) +- 修正了错别字的评论。 [\#4089](https://github.com/ClickHouse/ClickHouse/pull/4089) ([Evgenii Pravda](https://github.com/kvinty)) + +## [2018年的更新日志](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2018.md) {#changelog-for-2018} diff --git a/docs/zh/whats_new/changelog/index.md b/docs/zh/whats_new/changelog/index.md new file mode 100644 index 00000000000..90bb7abe0b0 --- /dev/null +++ b/docs/zh/whats_new/changelog/index.md @@ -0,0 +1,665 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +--- + +## 碌莽禄release拢.0755-88888888 {#clickhouse-release-v20-3} + +### ClickHouse版本v20.3.4.10,2020-03-20 {#clickhouse-release-v20-3-4-10-2020-03-20} + +#### 错误修复 {#bug-fix} + +- 此版本还包含20.1.8.41的所有错误修复 +- 修复丢失 `rows_before_limit_at_least` 用于通过http进行查询(使用处理器管道)。 这修复 [\#9730](https://github.com/ClickHouse/ClickHouse/issues/9730). [\#9757](https://github.com/ClickHouse/ClickHouse/pull/9757) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +### ClickHouse释放v20.3.3.6,2020-03-17 {#clickhouse-release-v20-3-3-6-2020-03-17} + +#### 错误修复 {#bug-fix-1} + +- 此版本还包含20.1.7.38的所有错误修复 +- 修复复制中的错误,如果用户在以前的版本上执行了突变,则不允许复制工作。 这修复 [\#9645](https://github.com/ClickHouse/ClickHouse/issues/9645). [\#9652](https://github.com/ClickHouse/ClickHouse/pull/9652) ([阿利沙平](https://github.com/alesapin)). 它使版本20.3再次向后兼容。 +- 添加设置 `use_compact_format_in_distributed_parts_names` 它允许写文件 `INSERT` 查询到 `Distributed` 表格格式更紧凑。 这修复 [\#9647](https://github.com/ClickHouse/ClickHouse/issues/9647). [\#9653](https://github.com/ClickHouse/ClickHouse/pull/9653) ([阿利沙平](https://github.com/alesapin)). 它使版本20.3再次向后兼容。 + +### ClickHouse版本v20.3.2.1,2020-03-12 {#clickhouse-release-v20-3-2-1-2020-03-12} + +#### 向后不兼容的更改 {#backward-incompatible-change} + +- 修正了这个问题 `file name too long` 当发送数据 `Distributed` 大量副本的表。 修复了服务器日志中显示副本凭据的问题。 磁盘上的目录名格式已更改为 `[shard{shard_index}[_replica{replica_index}]]`. [\#8911](https://github.com/ClickHouse/ClickHouse/pull/8911) ([米哈伊尔\*科罗托夫](https://github.com/millb))升级到新版本后,您将无法在没有人工干预的情况下降级,因为旧的服务器版本无法识别新的目录格式。 如果要降级,则必须手动将相应的目录重命名为旧格式。 仅当您使用了异步时,此更改才相关 `INSERT`s到 `Distributed` 桌子 在版本20.3.3中,我们将介绍一个设置,让您逐渐启用新格式。 +- 更改了mutation命令的复制日志条目的格式。 在安装新版本之前,您必须等待旧的突变处理。 +- 实现简单的内存分析器,将堆栈跟踪转储到 `system.trace_log` 超过软分配限制的每N个字节 [\#8765](https://github.com/ClickHouse/ClickHouse/pull/8765) ([伊万](https://github.com/abyss7)) [\#9472](https://github.com/ClickHouse/ClickHouse/pull/9472) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov))列 `system.trace_log` 从改名 `timer_type` 到 `trace_type`. 这将需要改变第三方性能分析和flamegraph处理工具。 +- 在任何地方使用操作系统线程id,而不是内部线程编号。 这修复 [\#7477](https://github.com/ClickHouse/ClickHouse/issues/7477) 老 `clickhouse-client` 无法接收从服务器发送的日志,当设置 `send_logs_level` 已启用,因为结构化日志消息的名称和类型已更改。 另一方面,不同的服务器版本可以相互发送不同类型的日志。 当你不使用 `send_logs_level` 设置,你不应该关心。 [\#8954](https://github.com/ClickHouse/ClickHouse/pull/8954) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除 `indexHint` 功能 [\#9542](https://github.com/ClickHouse/ClickHouse/pull/9542) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除 `findClusterIndex`, `findClusterValue` 功能。 这修复 [\#8641](https://github.com/ClickHouse/ClickHouse/issues/8641). 如果您正在使用这些功能,请发送电子邮件至 `clickhouse-feedback@yandex-team.com` [\#9543](https://github.com/ClickHouse/ClickHouse/pull/9543) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在不允许创建列或添加列 `SELECT` 子查询作为默认表达式。 [\#9481](https://github.com/ClickHouse/ClickHouse/pull/9481) ([阿利沙平](https://github.com/alesapin)) +- 需要联接中的子查询的别名。 [\#9274](https://github.com/ClickHouse/ClickHouse/pull/9274) ([Artem Zuikov](https://github.com/4ertus2)) +- 改进 `ALTER MODIFY/ADD` 查询逻辑。 现在你不能 `ADD` 不带类型的列, `MODIFY` 默认表达式不改变列的类型和 `MODIFY` type不会丢失默认表达式值。 修复 [\#8669](https://github.com/ClickHouse/ClickHouse/issues/8669). [\#9227](https://github.com/ClickHouse/ClickHouse/pull/9227) ([阿利沙平](https://github.com/alesapin)) +- 要求重新启动服务器以应用日志记录配置中的更改。 这是一种临时解决方法,可以避免服务器将日志记录到已删除的日志文件中的错误(请参阅 [\#8696](https://github.com/ClickHouse/ClickHouse/issues/8696)). [\#8707](https://github.com/ClickHouse/ClickHouse/pull/8707) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 设置 `experimental_use_processors` 默认情况下启用。 此设置允许使用新的查询管道。 这是内部重构,我们期望没有明显的变化。 如果您将看到任何问题,请将其设置为返回零。 [\#8768](https://github.com/ClickHouse/ClickHouse/pull/8768) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 新功能 {#new-feature} + +- 添加 `Avro` 和 `AvroConfluent` 输入/输出格式 [\#8571](https://github.com/ClickHouse/ClickHouse/pull/8571) ([安德鲁Onyshchuk](https://github.com/oandrew)) [\#8957](https://github.com/ClickHouse/ClickHouse/pull/8957) ([安德鲁Onyshchuk](https://github.com/oandrew)) [\#8717](https://github.com/ClickHouse/ClickHouse/pull/8717) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 过期密钥的多线程和非阻塞更新 `cache` 字典(可选的权限读取旧的)。 [\#8303](https://github.com/ClickHouse/ClickHouse/pull/8303) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 添加查询 `ALTER ... MATERIALIZE TTL`. 它运行突变,强制通过TTL删除过期的数据,并重新计算所有部分有关ttl的元信息。 [\#8775](https://github.com/ClickHouse/ClickHouse/pull/8775) ([安东\*波波夫](https://github.com/CurtizJ)) +- 如果需要,从HashJoin切换到MergeJoin(在磁盘上 [\#9082](https://github.com/ClickHouse/ClickHouse/pull/9082) ([Artem Zuikov](https://github.com/4ertus2)) +- 已添加 `MOVE PARTITION` 命令 `ALTER TABLE` [\#4729](https://github.com/ClickHouse/ClickHouse/issues/4729) [\#6168](https://github.com/ClickHouse/ClickHouse/pull/6168) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 动态地从配置文件重新加载存储配置。 [\#8594](https://github.com/ClickHouse/ClickHouse/pull/8594) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 允许更改 `storage_policy` 为了不那么富有的人。 [\#8107](https://github.com/ClickHouse/ClickHouse/pull/8107) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 增加了对s3存储和表功能的globs/通配符的支持。 [\#8851](https://github.com/ClickHouse/ClickHouse/pull/8851) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 执行 `bitAnd`, `bitOr`, `bitXor`, `bitNot` 为 `FixedString(N)` 数据类型。 [\#9091](https://github.com/ClickHouse/ClickHouse/pull/9091) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 添加功能 `bitCount`. 这修复 [\#8702](https://github.com/ClickHouse/ClickHouse/issues/8702). [\#8708](https://github.com/ClickHouse/ClickHouse/pull/8708) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) [\#8749](https://github.com/ClickHouse/ClickHouse/pull/8749) ([ikopylov](https://github.com/ikopylov)) +- 添加 `generateRandom` 表函数生成具有给定模式的随机行。 允许用数据填充任意测试表。 [\#8994](https://github.com/ClickHouse/ClickHouse/pull/8994) ([Ilya Yatsishin](https://github.com/qoega)) +- `JSONEachRowFormat`:当对象包含在顶层数组中时,支持特殊情况。 [\#8860](https://github.com/ClickHouse/ClickHouse/pull/8860) ([克鲁格洛夫\*帕维尔](https://github.com/Avogar)) +- 现在可以创建一个列 `DEFAULT` 取决于默认列的表达式 `ALIAS` 表达。 [\#9489](https://github.com/ClickHouse/ClickHouse/pull/9489) ([阿利沙平](https://github.com/alesapin)) +- 允许指定 `--limit` 超过源数据大小 `clickhouse-obfuscator`. 数据将以不同的随机种子重复。 [\#9155](https://github.com/ClickHouse/ClickHouse/pull/9155) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 已添加 `groupArraySample` 功能(类似于 `groupArray`)与reservior采样算法。 [\#8286](https://github.com/ClickHouse/ClickHouse/pull/8286) ([阿莫斯鸟](https://github.com/amosbird)) +- 现在,您可以监视更新队列的大小 `cache`/`complex_key_cache` 通过系统指标字典。 [\#9413](https://github.com/ClickHouse/ClickHouse/pull/9413) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 允许使用CRLF作为CSV输出格式的行分隔符与设置 `output_format_csv_crlf_end_of_line` 设置为1 [\#8934](https://github.com/ClickHouse/ClickHouse/pull/8934) [\#8935](https://github.com/ClickHouse/ClickHouse/pull/8935) [\#8963](https://github.com/ClickHouse/ClickHouse/pull/8963) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 实现的更多功能 [H3](https://github.com/uber/h3) API: `h3GetBaseCell`, `h3HexAreaM2`, `h3IndexesAreNeighbors`, `h3ToChildren`, `h3ToString` 和 `stringToH3` [\#8938](https://github.com/ClickHouse/ClickHouse/pull/8938) ([Nico Mandery](https://github.com/nmandery)) +- 引入新设置: `max_parser_depth` 控制最大堆栈大小并允许大型复杂查询。 这修复 [\#6681](https://github.com/ClickHouse/ClickHouse/issues/6681) 和 [\#7668](https://github.com/ClickHouse/ClickHouse/issues/7668). [\#8647](https://github.com/ClickHouse/ClickHouse/pull/8647) ([马克西姆\*斯米尔诺夫](https://github.com/qMBQx8GH)) +- 添加设置 `force_optimize_skip_unused_shards` 如果无法跳过未使用的分片,则设置为抛出 [\#8805](https://github.com/ClickHouse/ClickHouse/pull/8805) ([Azat Khuzhin](https://github.com/azat)) +- 允许配置多个磁盘/卷用于存储数据发送 `Distributed` 发动机 [\#8756](https://github.com/ClickHouse/ClickHouse/pull/8756) ([Azat Khuzhin](https://github.com/azat)) +- 支持存储策略 (``)用于存储临时数据。 [\#8750](https://github.com/ClickHouse/ClickHouse/pull/8750) ([Azat Khuzhin](https://github.com/azat)) +- 已添加 `X-ClickHouse-Exception-Code` 如果在发送数据之前引发异常,则设置的HTTP头。 这实现了 [\#4971](https://github.com/ClickHouse/ClickHouse/issues/4971). [\#8786](https://github.com/ClickHouse/ClickHouse/pull/8786) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 添加功能 `ifNotFinite`. 这只是一个句法糖: `ifNotFinite(x, y) = isFinite(x) ? x : y`. [\#8710](https://github.com/ClickHouse/ClickHouse/pull/8710) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 已添加 `last_successful_update_time` 列中 `system.dictionaries` 表 [\#9394](https://github.com/ClickHouse/ClickHouse/pull/9394) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 添加 `blockSerializedSize` 功能(磁盘大小不压缩) [\#8952](https://github.com/ClickHouse/ClickHouse/pull/8952) ([Azat Khuzhin](https://github.com/azat)) +- 添加功能 `moduloOrZero` [\#9358](https://github.com/ClickHouse/ClickHouse/pull/9358) ([hcz](https://github.com/hczhcz)) +- 添加系统表 `system.zeros` 和 `system.zeros_mt` 以及故事功能 `zeros()` 和 `zeros_mt()`. 表(和表函数)包含具有名称的单列 `zero` 和类型 `UInt8`. 此列包含零。 为了测试目的,需要它作为生成许多行的最快方法。 这修复 [\#6604](https://github.com/ClickHouse/ClickHouse/issues/6604) [\#9593](https://github.com/ClickHouse/ClickHouse/pull/9593) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) + +#### 实验特点 {#experimental-feature} + +- 添加新的紧凑格式的部件 `MergeTree`-家庭表中的所有列都存储在一个文件中。 它有助于提高小型和频繁插入的性能。 旧的格式(每列一个文件)现在被称为wide。 数据存储格式由设置控制 `min_bytes_for_wide_part` 和 `min_rows_for_wide_part`. [\#8290](https://github.com/ClickHouse/ClickHouse/pull/8290) ([安东\*波波夫](https://github.com/CurtizJ)) +- 支持S3存储 `Log`, `TinyLog` 和 `StripeLog` 桌子 [\#8862](https://github.com/ClickHouse/ClickHouse/pull/8862) ([帕维尔\*科瓦连科](https://github.com/Jokser)) + +#### 错误修复 {#bug-fix-2} + +- 修正了日志消息中不一致的空格。 [\#9322](https://github.com/ClickHouse/ClickHouse/pull/9322) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复在创建表时将未命名元组数组展平为嵌套结构的错误。 [\#8866](https://github.com/ClickHouse/ClickHouse/pull/8866) ([achulkov2](https://github.com/achulkov2)) +- 修复了以下问题 “Too many open files” 如果有太多的文件匹配glob模式可能会发生错误 `File` 表或 `file` 表功能。 现在文件懒洋洋地打开。 这修复 [\#8857](https://github.com/ClickHouse/ClickHouse/issues/8857) [\#8861](https://github.com/ClickHouse/ClickHouse/pull/8861) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除临时表现在只删除临时表。 [\#8907](https://github.com/ClickHouse/ClickHouse/pull/8907) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 当我们关闭服务器或分离/附加表时删除过时的分区。 [\#8602](https://github.com/ClickHouse/ClickHouse/pull/8602) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 默认磁盘如何计算可用空间 `data` 子目录。 修复了可用空间量计算不正确的问题,如果 `data` 目录被安装到一个单独的设备(罕见的情况)。 这修复 [\#7441](https://github.com/ClickHouse/ClickHouse/issues/7441) [\#9257](https://github.com/ClickHouse/ClickHouse/pull/9257) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 允许逗号(交叉)与IN()内部连接。 [\#9251](https://github.com/ClickHouse/ClickHouse/pull/9251) ([Artem Zuikov](https://github.com/4ertus2)) +- 如果在WHERE部分中有\[NOT\]LIKE运算符,则允许将CROSS重写为INNER JOIN。 [\#9229](https://github.com/ClickHouse/ClickHouse/pull/9229) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复后可能不正确的结果 `GROUP BY` 启用设置 `distributed_aggregation_memory_efficient`. 修复 [\#9134](https://github.com/ClickHouse/ClickHouse/issues/9134). [\#9289](https://github.com/ClickHouse/ClickHouse/pull/9289) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 找到的键在缓存字典的指标中被计为错过。 [\#9411](https://github.com/ClickHouse/ClickHouse/pull/9411) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 修复引入的复制协议不兼容 [\#8598](https://github.com/ClickHouse/ClickHouse/issues/8598). [\#9412](https://github.com/ClickHouse/ClickHouse/pull/9412) ([阿利沙平](https://github.com/alesapin)) +- 在固定的竞争条件 `queue_task_handle` 在启动 `ReplicatedMergeTree` 桌子 [\#9552](https://github.com/ClickHouse/ClickHouse/pull/9552) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 令牌 `NOT` 没有工作 `SHOW TABLES NOT LIKE` 查询 [\#8727](https://github.com/ClickHouse/ClickHouse/issues/8727) [\#8940](https://github.com/ClickHouse/ClickHouse/pull/8940) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加范围检查功能 `h3EdgeLengthM`. 如果没有这个检查,缓冲区溢出是可能的。 [\#8945](https://github.com/ClickHouse/ClickHouse/pull/8945) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了多个参数(超过10)的三元逻辑运算批量计算中的错误。 [\#8718](https://github.com/ClickHouse/ClickHouse/pull/8718) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 修复PREWHERE优化的错误,这可能导致段错误或 `Inconsistent number of columns got from MergeTreeRangeReader` 例外。 [\#9024](https://github.com/ClickHouse/ClickHouse/pull/9024) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复意外 `Timeout exceeded while reading from socket` 异常,在实际超时之前以及启用查询探查器时,在安全连接上随机发生。 还添加 `connect_timeout_with_failover_secure_ms` 设置(默认100ms),这是类似于 `connect_timeout_with_failover_ms`,但用于安全连接(因为SSL握手比普通TCP连接慢) [\#9026](https://github.com/ClickHouse/ClickHouse/pull/9026) ([tavplubix](https://github.com/tavplubix)) +- 修复突变最终确定的错误,当突变可能处于以下状态时 `parts_to_do=0` 和 `is_done=0`. [\#9022](https://github.com/ClickHouse/ClickHouse/pull/9022) ([阿利沙平](https://github.com/alesapin)) +- 使用新的任何连接逻辑 `partial_merge_join` 设置。 有可能使 `ANY|ALL|SEMI LEFT` 和 `ALL INNER` 加入与 `partial_merge_join=1` 现在 [\#8932](https://github.com/ClickHouse/ClickHouse/pull/8932) ([Artem Zuikov](https://github.com/4ertus2)) +- Shard现在将从发起者获得的设置夹到shard的constaints,而不是抛出异常。 此修补程序允许将查询发送到具有另一个约束的分片。 [\#9447](https://github.com/ClickHouse/ClickHouse/pull/9447) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修正了内存管理问题 `MergeTreeReadPool`. [\#8791](https://github.com/ClickHouse/ClickHouse/pull/8791) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复 `toDecimal*OrNull()` 使用字符串调用时的函数系列 `e`. 修复 [\#8312](https://github.com/ClickHouse/ClickHouse/issues/8312) [\#8764](https://github.com/ClickHouse/ClickHouse/pull/8764) ([Artem Zuikov](https://github.com/4ertus2)) +- 请确保 `FORMAT Null` 不向客户端发送数据。 [\#8767](https://github.com/ClickHouse/ClickHouse/pull/8767) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 修复时间戳中的错误 `LiveViewBlockInputStream` 不会更新。 `LIVE VIEW` 是一个实验特征。 [\#8644](https://github.com/ClickHouse/ClickHouse/pull/8644) ([vxider](https://github.com/Vxider)) [\#8625](https://github.com/ClickHouse/ClickHouse/pull/8625) ([vxider](https://github.com/Vxider)) +- 固定 `ALTER MODIFY TTL` 不允许删除旧ttl表达式的错误行为。 [\#8422](https://github.com/ClickHouse/ClickHouse/pull/8422) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复了MergeTreeIndexSet中的UBSan报告。 这修复 [\#9250](https://github.com/ClickHouse/ClickHouse/issues/9250) [\#9365](https://github.com/ClickHouse/ClickHouse/pull/9365) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定的行为 `match` 和 `extract` 当干草堆有零字节的函数。 当干草堆不变时,这种行为是错误的。 这修复 [\#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) [\#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) [\#9345](https://github.com/ClickHouse/ClickHouse/pull/9345) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免从apache Avro第三方库中的析构函数抛出。 [\#9066](https://github.com/ClickHouse/ClickHouse/pull/9066) ([安德鲁Onyshchuk](https://github.com/oandrew)) +- 不要提交从轮询的批次 `Kafka` 部分,因为它可能会导致数据漏洞。 [\#8876](https://github.com/ClickHouse/ClickHouse/pull/8876) ([filimonov](https://github.com/filimonov)) +- 修复 `joinGet` 使用可为空的返回类型。 https://github.com/ClickHouse/ClickHouse/issues/8919 [\#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) ([阿莫斯鸟](https://github.com/amosbird)) +- 修复压缩时的数据不兼容 `T64` 编解ec [\#9016](https://github.com/ClickHouse/ClickHouse/pull/9016) ([Artem Zuikov](https://github.com/4ertus2))修复数据类型id `T64` 在受影响的版本中导致错误(de)压缩的压缩编解ec。 [\#9033](https://github.com/ClickHouse/ClickHouse/pull/9033) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加设置 `enable_early_constant_folding` 并禁用它在某些情况下,导致错误。 [\#9010](https://github.com/ClickHouse/ClickHouse/pull/9010) ([Artem Zuikov](https://github.com/4ertus2)) +- 使用VIEW修复下推谓词优化器并启用测试 [\#9011](https://github.com/ClickHouse/ClickHouse/pull/9011) ([张冬](https://github.com/zhang2014)) +- 修复段错误 `Merge` 表,从读取时可能发生 `File` 储存 [\#9387](https://github.com/ClickHouse/ClickHouse/pull/9387) ([tavplubix](https://github.com/tavplubix)) +- 添加了对存储策略的检查 `ATTACH PARTITION FROM`, `REPLACE PARTITION`, `MOVE TO TABLE`. 否则,它可以使部分数据重新启动后无法访问,并阻止ClickHouse启动。 [\#9383](https://github.com/ClickHouse/ClickHouse/pull/9383) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复改变,如果有TTL设置表。 [\#8800](https://github.com/ClickHouse/ClickHouse/pull/8800) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复在以下情况下可能发生的竞争条件 `SYSTEM RELOAD ALL DICTIONARIES` 在某些字典被修改/添加/删除时执行。 [\#8801](https://github.com/ClickHouse/ClickHouse/pull/8801) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 在以前的版本 `Memory` 数据库引擎使用空数据路径,因此在以下位置创建表 `path` directory (e.g. `/var/lib/clickhouse/`), not in data directory of database (e.g. `/var/lib/clickhouse/db_name`). [\#8753](https://github.com/ClickHouse/ClickHouse/pull/8753) ([tavplubix](https://github.com/tavplubix)) +- 修复了关于缺少默认磁盘或策略的错误日志消息。 [\#9530](https://github.com/ClickHouse/ClickHouse/pull/9530) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复数组类型的bloom\_filter索引的not(has())。 [\#9407](https://github.com/ClickHouse/ClickHouse/pull/9407) ([achimbab](https://github.com/achimbab)) +- 允许表中的第一列 `Log` 引擎是别名 [\#9231](https://github.com/ClickHouse/ClickHouse/pull/9231) ([伊万](https://github.com/abyss7)) +- 从读取时修复范围的顺序 `MergeTree` 表中的一个线程。 它可能会导致例外 `MergeTreeRangeReader` 或错误的查询结果。 [\#9050](https://github.com/ClickHouse/ClickHouse/pull/9050) ([安东\*波波夫](https://github.com/CurtizJ)) +- 赂眉露\>\> `reinterpretAsFixedString` 返回 `FixedString` 而不是 `String`. [\#9052](https://github.com/ClickHouse/ClickHouse/pull/9052) ([安德鲁Onyshchuk](https://github.com/oandrew)) +- 避免极少数情况下,当用户可以得到错误的错误消息 (`Success` 而不是详细的错误描述)。 [\#9457](https://github.com/ClickHouse/ClickHouse/pull/9457) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用时不要崩溃 `Template` 使用空行模板格式化。 [\#8785](https://github.com/ClickHouse/ClickHouse/pull/8785) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 系统表的元数据文件可能在错误的位置创建 [\#8653](https://github.com/ClickHouse/ClickHouse/pull/8653) ([tavplubix](https://github.com/tavplubix))修复 [\#8581](https://github.com/ClickHouse/ClickHouse/issues/8581). +- 修复缓存字典中exception\_ptr上的数据竞赛 [\#8303](https://github.com/ClickHouse/ClickHouse/issues/8303). [\#9379](https://github.com/ClickHouse/ClickHouse/pull/9379) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 不要为查询引发异常 `ATTACH TABLE IF NOT EXISTS`. 以前它是抛出,如果表已经存在,尽管 `IF NOT EXISTS` 条款 [\#8967](https://github.com/ClickHouse/ClickHouse/pull/8967) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复了异常消息中丢失的关闭paren。 [\#8811](https://github.com/ClickHouse/ClickHouse/pull/8811) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免消息 `Possible deadlock avoided` 在clickhouse客户端在交互模式下启动。 [\#9455](https://github.com/ClickHouse/ClickHouse/pull/9455) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了base64编码值末尾填充格式错误的问题。 更新base64库。 这修复 [\#9491](https://github.com/ClickHouse/ClickHouse/issues/9491),关闭 [\#9492](https://github.com/ClickHouse/ClickHouse/issues/9492) [\#9500](https://github.com/ClickHouse/ClickHouse/pull/9500) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 防止丢失数据 `Kafka` 在极少数情况下,在读取后缀之后但在提交之前发生异常。 修复 [\#9378](https://github.com/ClickHouse/ClickHouse/issues/9378) [\#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) ([filimonov](https://github.com/filimonov)) +- 在固定的异常 `DROP TABLE IF EXISTS` [\#8663](https://github.com/ClickHouse/ClickHouse/pull/8663) ([尼基塔\*瓦西列夫](https://github.com/nikvas0)) +- 修复当用户尝试崩溃 `ALTER MODIFY SETTING` 对于老格式化 `MergeTree` 表引擎家族. [\#9435](https://github.com/ClickHouse/ClickHouse/pull/9435) ([阿利沙平](https://github.com/alesapin)) +- 支持在JSON相关函数中不适合Int64的UInt64号码。 更新SIMDJSON掌握。 这修复 [\#9209](https://github.com/ClickHouse/ClickHouse/issues/9209) [\#9344](https://github.com/ClickHouse/ClickHouse/pull/9344) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 当使用非严格单调函数索引时,固定执行反转谓词。 [\#9223](https://github.com/ClickHouse/ClickHouse/pull/9223) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 不要试图折叠 `IN` 常量在 `GROUP BY` [\#8868](https://github.com/ClickHouse/ClickHouse/pull/8868) ([阿莫斯鸟](https://github.com/amosbird)) +- 修复bug `ALTER DELETE` 突变导致索引损坏。 这修复 [\#9019](https://github.com/ClickHouse/ClickHouse/issues/9019) 和 [\#8982](https://github.com/ClickHouse/ClickHouse/issues/8982). 另外修复极其罕见的竞争条件 `ReplicatedMergeTree` `ALTER` 查询。 [\#9048](https://github.com/ClickHouse/ClickHouse/pull/9048) ([阿利沙平](https://github.com/alesapin)) +- 当设置 `compile_expressions` 被启用,你可以得到 `unexpected column` 在 `LLVMExecutableFunction` 当我们使用 `Nullable` 类型 [\#8910](https://github.com/ClickHouse/ClickHouse/pull/8910) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 多个修复 `Kafka` 引擎:1)修复在消费者组重新平衡期间出现的重复项。 2)修复罕见 ‘holes’ 当数据从一个轮询的几个分区轮询并部分提交时出现(现在我们总是处理/提交整个轮询的消息块)。 3)通过块大小修复刷新(在此之前,只有超时刷新才能正常工作)。 4)更好的订阅程序(与分配反馈)。 5)使测试工作得更快(默认时间间隔和超时)。 由于数据之前没有被块大小刷新(根据文档),pr可能会导致默认设置的性能下降(由于更频繁和更小的刷新不太理想)。 如果您在更改后遇到性能问题-请增加 `kafka_max_block_size` 在表中的更大的值(例如 `CREATE TABLE ...Engine=Kafka ... SETTINGS ... kafka_max_block_size=524288`). 修复 [\#7259](https://github.com/ClickHouse/ClickHouse/issues/7259) [\#8917](https://github.com/ClickHouse/ClickHouse/pull/8917) ([filimonov](https://github.com/filimonov)) +- 修复 `Parameter out of bound` 在PREWHERE优化之后的某些查询中出现异常。 [\#8914](https://github.com/ClickHouse/ClickHouse/pull/8914) ([Baudouin Giard](https://github.com/bgiard)) +- 修正了函数参数混合常量的情况 `arrayZip`. [\#8705](https://github.com/ClickHouse/ClickHouse/pull/8705) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 执行时 `CREATE` 查询,在存储引擎参数中折叠常量表达式。 将空数据库名称替换为当前数据库。 修复 [\#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [\#3492](https://github.com/ClickHouse/ClickHouse/issues/3492) [\#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) ([tavplubix](https://github.com/tavplubix)) +- 现在不可能创建或添加具有简单循环别名的列,如 `a DEFAULT b, b DEFAULT a`. [\#9603](https://github.com/ClickHouse/ClickHouse/pull/9603) ([阿利沙平](https://github.com/alesapin)) +- 修正了双重移动可能会损坏原始部分的错误。 这是相关的,如果你使用 `ALTER TABLE MOVE` [\#8680](https://github.com/ClickHouse/ClickHouse/pull/8680) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 允许 `interval` 用于正确解析的标识符,而无需反引号。 当一个查询不能被执行,即使固定的问题 `interval` 标识符用反引号或双引号括起来。 这修复 [\#9124](https://github.com/ClickHouse/ClickHouse/issues/9124). [\#9142](https://github.com/ClickHouse/ClickHouse/pull/9142) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了模糊测试和不正确的行为 `bitTestAll`/`bitTestAny` 功能。 [\#9143](https://github.com/ClickHouse/ClickHouse/pull/9143) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复可能的崩溃/错误的行数 `LIMIT n WITH TIES` 当有很多行等于第n行时。 [\#9464](https://github.com/ClickHouse/ClickHouse/pull/9464) ([tavplubix](https://github.com/tavplubix)) +- 使用enabled编写的部件修复突变 `insert_quorum`. [\#9463](https://github.com/ClickHouse/ClickHouse/pull/9463) ([阿利沙平](https://github.com/alesapin)) +- 修复数据竞赛破坏 `Poco::HTTPServer`. 当服务器启动并立即关闭时,可能会发生这种情况。 [\#9468](https://github.com/ClickHouse/ClickHouse/pull/9468) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复运行时显示误导性错误消息的错误 `SHOW CREATE TABLE a_table_that_does_not_exist`. [\#8899](https://github.com/ClickHouse/ClickHouse/pull/8899) ([achulkov2](https://github.com/achulkov2)) +- 固定 `Parameters are out of bound` 例外在一些罕见的情况下,当我们在一个常数 `SELECT` 条款时,我们有一个 `ORDER BY` 和一个 `LIMIT` 条款 [\#8892](https://github.com/ClickHouse/ClickHouse/pull/8892) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 修复突变定稿,当已经完成突变可以有状态 `is_done=0`. [\#9217](https://github.com/ClickHouse/ClickHouse/pull/9217) ([阿利沙平](https://github.com/alesapin)) +- 防止执行 `ALTER ADD INDEX` 对于旧语法的MergeTree表,因为它不起作用。 [\#8822](https://github.com/ClickHouse/ClickHouse/pull/8822) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 在服务器启动时不要访问表,这 `LIVE VIEW` 取决于,所以服务器将能够启动。 也删除 `LIVE VIEW` 分离时的依赖关系 `LIVE VIEW`. `LIVE VIEW` 是一个实验特征。 [\#8824](https://github.com/ClickHouse/ClickHouse/pull/8824) ([tavplubix](https://github.com/tavplubix)) +- 修复可能的段错误 `MergeTreeRangeReader`,同时执行 `PREWHERE`. [\#9106](https://github.com/ClickHouse/ClickHouse/pull/9106) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复与列Ttl可能不匹配的校验和。 [\#9451](https://github.com/ClickHouse/ClickHouse/pull/9451) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修正了一个错误,当部分没有被移动的情况下,只有一个卷的TTL规则在后台。 [\#8672](https://github.com/ClickHouse/ClickHouse/pull/8672) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修正了这个问题 `Method createColumn() is not implemented for data type Set`. 这修复 [\#7799](https://github.com/ClickHouse/ClickHouse/issues/7799). [\#8674](https://github.com/ClickHouse/ClickHouse/pull/8674) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在我们将尝试更频繁地完成突变。 [\#9427](https://github.com/ClickHouse/ClickHouse/pull/9427) ([阿利沙平](https://github.com/alesapin)) +- 修复 `intDiv` 减一个常数 [\#9351](https://github.com/ClickHouse/ClickHouse/pull/9351) ([hcz](https://github.com/hczhcz)) +- 修复可能的竞争条件 `BlockIO`. [\#9356](https://github.com/ClickHouse/ClickHouse/pull/9356) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复尝试使用/删除时导致服务器终止的错误 `Kafka` 使用错误的参数创建的表。 [\#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) ([filimonov](https://github.com/filimonov)) +- 增加了解决方法,如果操作系统返回错误的结果 `timer_create` 功能。 [\#8837](https://github.com/ClickHouse/ClickHouse/pull/8837) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在使用固定错误 `min_marks_for_seek` 参数。 修复了分布式表中没有分片键时的错误消息,并且我们尝试跳过未使用的分片。 [\#8908](https://github.com/ClickHouse/ClickHouse/pull/8908) ([Azat Khuzhin](https://github.com/azat)) + +#### 改进 {#improvement} + +- 执行 `ALTER MODIFY/DROP` 对突变的顶部查询 `ReplicatedMergeTree*` 引擎家族. 现在 `ALTERS` 仅在元数据更新阶段阻止,之后不阻止。 [\#8701](https://github.com/ClickHouse/ClickHouse/pull/8701) ([阿利沙平](https://github.com/alesapin)) +- 添加重写交叉到内部连接的能力 `WHERE` 包含未编译名称的部分。 [\#9512](https://github.com/ClickHouse/ClickHouse/pull/9512) ([Artem Zuikov](https://github.com/4ertus2)) +- 赂眉露\>\> `SHOW TABLES` 和 `SHOW DATABASES` 查询支持 `WHERE` 表达式和 `FROM`/`IN` [\#9076](https://github.com/ClickHouse/ClickHouse/pull/9076) ([sundyli](https://github.com/sundy-li)) +- 添加了一个设置 `deduplicate_blocks_in_dependent_materialized_views`. [\#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) ([urykhy](https://github.com/urykhy)) +- 在最近的变化之后,MySQL客户端开始以十六进制打印二进制字符串,从而使它们不可读 ([\#9032](https://github.com/ClickHouse/ClickHouse/issues/9032)). ClickHouse中的解决方法是将字符串列标记为UTF-8,这并不总是如此,但通常是这种情况。 [\#9079](https://github.com/ClickHouse/ClickHouse/pull/9079) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 添加对字符串和FixedString键的支持 `sumMap` [\#8903](https://github.com/ClickHouse/ClickHouse/pull/8903) ([Baudouin Giard](https://github.com/bgiard)) +- 支持SummingMergeTree地图中的字符串键 [\#8933](https://github.com/ClickHouse/ClickHouse/pull/8933) ([Baudouin Giard](https://github.com/bgiard)) +- 即使线程已抛出异常,也向线程池发送线程终止信号 [\#8736](https://github.com/ClickHouse/ClickHouse/pull/8736) ([丁香飞](https://github.com/dingxiangfei2009)) +- 允许设置 `query_id` 在 `clickhouse-benchmark` [\#9416](https://github.com/ClickHouse/ClickHouse/pull/9416) ([安东\*波波夫](https://github.com/CurtizJ)) +- 不要让奇怪的表达 `ALTER TABLE ... PARTITION partition` 查询。 这个地址 [\#7192](https://github.com/ClickHouse/ClickHouse/issues/7192) [\#8835](https://github.com/ClickHouse/ClickHouse/pull/8835) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 表 `system.table_engines` 现在提供有关功能支持的信息(如 `supports_ttl` 或 `supports_sort_order`). [\#8830](https://github.com/ClickHouse/ClickHouse/pull/8830) ([Max Akhmedov](https://github.com/zlobober)) +- 启用 `system.metric_log` 默认情况下。 它将包含具有ProfileEvents值的行,CurrentMetrics收集与 “collect\_interval\_milliseconds” 间隔(默认情况下为一秒)。 该表非常小(通常以兆字节为单位),默认情况下收集此数据是合理的。 [\#9225](https://github.com/ClickHouse/ClickHouse/pull/9225) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries. Fixes [\#6964](https://github.com/ClickHouse/ClickHouse/issues/6964) [\#8874](https://github.com/ClickHouse/ClickHouse/pull/8874) ([伊万](https://github.com/abyss7)) +- 现在是暂时的 `LIVE VIEW` 创建者 `CREATE LIVE VIEW name WITH TIMEOUT [42] ...` 而不是 `CREATE TEMPORARY LIVE VIEW ...`,因为以前的语法不符合 `CREATE TEMPORARY TABLE ...` [\#9131](https://github.com/ClickHouse/ClickHouse/pull/9131) ([tavplubix](https://github.com/tavplubix)) +- 添加text\_log。级别配置参数,以限制进入 `system.text_log` 表 [\#8809](https://github.com/ClickHouse/ClickHouse/pull/8809) ([Azat Khuzhin](https://github.com/azat)) +- 允许根据TTL规则将下载的部分放入磁盘/卷 [\#8598](https://github.com/ClickHouse/ClickHouse/pull/8598) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 对于外部MySQL字典,允许将MySQL连接池共同化为 “share” 他们在字典中。 此选项显着减少到MySQL服务器的连接数。 [\#9409](https://github.com/ClickHouse/ClickHouse/pull/9409) ([Clément Rodriguez](https://github.com/clemrodriguez)) +- 显示分位数的最近查询执行时间 `clickhouse-benchmark` 输出而不是插值值。 最好显示与某些查询的执行时间相对应的值。 [\#8712](https://github.com/ClickHouse/ClickHouse/pull/8712) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 可以在将数据插入到Kafka时为消息添加密钥和时间戳。 修复 [\#7198](https://github.com/ClickHouse/ClickHouse/issues/7198) [\#8969](https://github.com/ClickHouse/ClickHouse/pull/8969) ([filimonov](https://github.com/filimonov)) +- 如果服务器从终端运行,请按颜色突出显示线程号,查询id和日志优先级。 这是为了提高开发人员相关日志消息的可读性。 [\#8961](https://github.com/ClickHouse/ClickHouse/pull/8961) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更好的异常消息,同时加载表 `Ordinary` 数据库。 [\#9527](https://github.com/ClickHouse/ClickHouse/pull/9527) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 执行 `arraySlice` 对于具有聚合函数状态的数组。 这修复 [\#9388](https://github.com/ClickHouse/ClickHouse/issues/9388) [\#9391](https://github.com/ClickHouse/ClickHouse/pull/9391) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 允许在in运算符的右侧使用常量函数和常量数组。 [\#8813](https://github.com/ClickHouse/ClickHouse/pull/8813) ([安东\*波波夫](https://github.com/CurtizJ)) +- 如果在获取系统数据时发生了zookeeper异常。副本,将其显示在单独的列中。 这实现了 [\#9137](https://github.com/ClickHouse/ClickHouse/issues/9137) [\#9138](https://github.com/ClickHouse/ClickHouse/pull/9138) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 原子删除destroy上的MergeTree数据部分。 [\#8402](https://github.com/ClickHouse/ClickHouse/pull/8402) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 支持分布式表的行级安全性。 [\#8926](https://github.com/ClickHouse/ClickHouse/pull/8926) ([伊万](https://github.com/abyss7)) +- Now we recognize suffix (like KB, KiB…) in settings values. [\#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 在构建大型连接的结果时防止内存不足。 [\#8637](https://github.com/ClickHouse/ClickHouse/pull/8637) ([Artem Zuikov](https://github.com/4ertus2)) +- 在交互模式下为建议添加群集名称 `clickhouse-client`. [\#8709](https://github.com/ClickHouse/ClickHouse/pull/8709) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries [\#8820](https://github.com/ClickHouse/ClickHouse/pull/8820) ([伊万](https://github.com/abyss7)) +- 添加列 `exception_code` 在 `system.query_log` 桌子 [\#8770](https://github.com/ClickHouse/ClickHouse/pull/8770) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 在端口上启用MySQL兼容服务器 `9004` 在默认服务器配置文件中。 在配置的例子固定密码生成命令。 [\#8771](https://github.com/ClickHouse/ClickHouse/pull/8771) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 如果文件系统是只读的,请防止在关闭时中止。 这修复 [\#9094](https://github.com/ClickHouse/ClickHouse/issues/9094) [\#9100](https://github.com/ClickHouse/ClickHouse/pull/9100) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 当HTTP POST查询中需要长度时,更好的异常消息。 [\#9453](https://github.com/ClickHouse/ClickHouse/pull/9453) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加 `_path` 和 `_file` 虚拟列 `HDFS` 和 `File` 发动机和 `hdfs` 和 `file` 表函数 [\#8489](https://github.com/ClickHouse/ClickHouse/pull/8489) ([Olga Khvostikova](https://github.com/stavrolia)) +- 修复错误 `Cannot find column` 同时插入到 `MATERIALIZED VIEW` 在情况下,如果新列被添加到视图的内部表。 [\#8766](https://github.com/ClickHouse/ClickHouse/pull/8766) [\#8788](https://github.com/ClickHouse/ClickHouse/pull/8788) ([vzakaznikov](https://github.com/vzakaznikov)) [\#8788](https://github.com/ClickHouse/ClickHouse/issues/8788) [\#8806](https://github.com/ClickHouse/ClickHouse/pull/8806) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) [\#8803](https://github.com/ClickHouse/ClickHouse/pull/8803) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 通过最终更新后发送进度(如日志)修复本机客户端-服务器协议的进度。 这可能仅与使用本机协议的某些第三方工具相关。 [\#9495](https://github.com/ClickHouse/ClickHouse/pull/9495) ([Azat Khuzhin](https://github.com/azat)) +- 添加系统指标跟踪使用MySQL协议的客户端连接数 ([\#9013](https://github.com/ClickHouse/ClickHouse/issues/9013)). [\#9015](https://github.com/ClickHouse/ClickHouse/pull/9015) ([尤金\*克里莫夫](https://github.com/Slach)) +- 从现在开始,HTTP响应将有 `X-ClickHouse-Timezone` 标题设置为相同的时区值 `SELECT timezone()` 会报告。 [\#9493](https://github.com/ClickHouse/ClickHouse/pull/9493) ([Denis Glazachev](https://github.com/traceon)) + +#### 性能改进 {#performance-improvement} + +- 使用IN提高分析指标的性能 [\#9261](https://github.com/ClickHouse/ClickHouse/pull/9261) ([安东\*波波夫](https://github.com/CurtizJ)) +- 逻辑函数+代码清理更简单,更有效的代码。 跟进到 [\#8718](https://github.com/ClickHouse/ClickHouse/issues/8718) [\#8728](https://github.com/ClickHouse/ClickHouse/pull/8728) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 整体性能改善(范围为5%。.通过确保使用C++20功能进行更严格的别名处理,对于受影响的查询来说,这是200%)。 [\#9304](https://github.com/ClickHouse/ClickHouse/pull/9304) ([阿莫斯鸟](https://github.com/amosbird)) +- 比较函数的内部循环更严格的别名。 [\#9327](https://github.com/ClickHouse/ClickHouse/pull/9327) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 对于算术函数的内部循环更严格的别名。 [\#9325](https://github.com/ClickHouse/ClickHouse/pull/9325) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- ColumnVector::replicate()的实现速度快约3倍,通过该实现ColumnConst::convertToFullColumn()。 在实现常数时,也将在测试中有用。 [\#9293](https://github.com/ClickHouse/ClickHouse/pull/9293) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 另一个小的性能改进 `ColumnVector::replicate()` (这加快了 `materialize` 函数和高阶函数),甚至进一步改进 [\#9293](https://github.com/ClickHouse/ClickHouse/issues/9293) [\#9442](https://github.com/ClickHouse/ClickHouse/pull/9442) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 改进的性能 `stochasticLinearRegression` 聚合函数。 此补丁由英特尔贡献。 [\#8652](https://github.com/ClickHouse/ClickHouse/pull/8652) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 提高性能 `reinterpretAsFixedString` 功能。 [\#9342](https://github.com/ClickHouse/ClickHouse/pull/9342) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 不要向客户端发送块 `Null` 处理器管道中的格式。 [\#8797](https://github.com/ClickHouse/ClickHouse/pull/8797) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) [\#8767](https://github.com/ClickHouse/ClickHouse/pull/8767) ([Alexander Kuzmenkov](https://github.com/akuzm)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement} + +- 异常处理现在可以在适用于Linux的Windows子系统上正常工作。 看https://github.com/ClickHouse-Extras/libunwind/pull/3 这修复 [\#6480](https://github.com/ClickHouse/ClickHouse/issues/6480) [\#9564](https://github.com/ClickHouse/ClickHouse/pull/9564) ([sobolevsv](https://github.com/sobolevsv)) +- 替换 `readline` 与 `replxx` 对于在交互式线编辑 `clickhouse-client` [\#8416](https://github.com/ClickHouse/ClickHouse/pull/8416) ([伊万](https://github.com/abyss7)) +- 在FunctionsComparison中更好的构建时间和更少的模板实例化。 [\#9324](https://github.com/ClickHouse/ClickHouse/pull/9324) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了与集成 `clang-tidy` 在线人 另请参阅 [\#6044](https://github.com/ClickHouse/ClickHouse/issues/6044) [\#9566](https://github.com/ClickHouse/ClickHouse/pull/9566) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在我们使用CI链接ClickHouse `lld` 即使是 `gcc`. [\#9049](https://github.com/ClickHouse/ClickHouse/pull/9049) ([阿利沙平](https://github.com/alesapin)) +- 允许随机线程调度和插入毛刺时 `THREAD_FUZZER_*` 设置环境变量。 这有助于测试。 [\#9459](https://github.com/ClickHouse/ClickHouse/pull/9459) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在无状态测试中启用安全套接字 [\#9288](https://github.com/ClickHouse/ClickHouse/pull/9288) ([tavplubix](https://github.com/tavplubix)) +- 使SPLIT\_SHARED\_LIBRARIES=OFF更强大 [\#9156](https://github.com/ClickHouse/ClickHouse/pull/9156) ([Azat Khuzhin](https://github.com/azat)) +- 赂眉露\>\> “performance\_introspection\_and\_logging” 测试可靠的随机服务器卡住。 这可能发生在CI环境中。 另请参阅 [\#9515](https://github.com/ClickHouse/ClickHouse/issues/9515) [\#9528](https://github.com/ClickHouse/ClickHouse/pull/9528) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在样式检查中验证XML。 [\#9550](https://github.com/ClickHouse/ClickHouse/pull/9550) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修正了测试中的竞争条件 `00738_lock_for_inner_table`. 这个测试依赖于睡眠。 [\#9555](https://github.com/ClickHouse/ClickHouse/pull/9555) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除类型的性能测试 `once`. 这是在统计比较模式下运行所有性能测试(更可靠)所需的。 [\#9557](https://github.com/ClickHouse/ClickHouse/pull/9557) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了算术函数的性能测试。 [\#9326](https://github.com/ClickHouse/ClickHouse/pull/9326) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了性能测试 `sumMap` 和 `sumMapWithOverflow` 聚合函数。 后续行动 [\#8933](https://github.com/ClickHouse/ClickHouse/issues/8933) [\#8947](https://github.com/ClickHouse/ClickHouse/pull/8947) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 通过样式检查确保错误代码的样式。 [\#9370](https://github.com/ClickHouse/ClickHouse/pull/9370) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 为测试历史添加脚本。 [\#8796](https://github.com/ClickHouse/ClickHouse/pull/8796) ([阿利沙平](https://github.com/alesapin)) +- 添加GCC警告 `-Wsuggest-override` 找到并修复所有地方 `override` 必须使用关键字。 [\#8760](https://github.com/ClickHouse/ClickHouse/pull/8760) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +- 在Mac OS X下忽略弱符号,因为它必须被定义 [\#9538](https://github.com/ClickHouse/ClickHouse/pull/9538) ([已删除用户](https://github.com/ghost)) +- 规范性能测试中某些查询的运行时间。 这是在准备在比较模式下运行所有性能测试时完成的。 [\#9565](https://github.com/ClickHouse/ClickHouse/pull/9565) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复一些测试,以支持pytest与查询测试 [\#9062](https://github.com/ClickHouse/ClickHouse/pull/9062) ([伊万](https://github.com/abyss7)) +- 使用MSan在生成中启用SSL,因此在运行无状态测试时,服务器不会在启动时失败 [\#9531](https://github.com/ClickHouse/ClickHouse/pull/9531) ([tavplubix](https://github.com/tavplubix)) +- 修复测试结果中的数据库替换 [\#9384](https://github.com/ClickHouse/ClickHouse/pull/9384) ([Ilya Yatsishin](https://github.com/qoega)) +- 针对其他平台构建修复程序 [\#9381](https://github.com/ClickHouse/ClickHouse/pull/9381) ([proller](https://github.com/proller)) [\#8755](https://github.com/ClickHouse/ClickHouse/pull/8755) ([proller](https://github.com/proller)) [\#8631](https://github.com/ClickHouse/ClickHouse/pull/8631) ([proller](https://github.com/proller)) +- 将磁盘部分添加到无状态复盖率测试docker映像 [\#9213](https://github.com/ClickHouse/ClickHouse/pull/9213) ([帕维尔\*科瓦连科](https://github.com/Jokser)) +- 使用GRPC构建时,摆脱源代码树中的文件 [\#9588](https://github.com/ClickHouse/ClickHouse/pull/9588) ([阿莫斯鸟](https://github.com/amosbird)) +- 通过从上下文中删除SessionCleaner来缩短构建时间。 让SessionCleaner的代码更简单。 [\#9232](https://github.com/ClickHouse/ClickHouse/pull/9232) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新了clickhouse-test脚本中挂起查询的检查 [\#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 从存储库中删除了一些无用的文件。 [\#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更改类型的数学perftests从 `once` 到 `loop`. [\#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [\#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html) +- 抑制MSan下的一些测试失败。 [\#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 加速 “exception while insert” 测试 此测试通常在具有复盖率的调试版本中超时。 [\#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新 `libcxx` 和 `libcxxabi` 为了主人 在准备 [\#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [\#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复flacky测试 `00910_zookeeper_test_alter_compression_codecs`. [\#9525](https://github.com/ClickHouse/ClickHouse/pull/9525) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 清理重复的链接器标志。 确保链接器不会查找意想不到的符号。 [\#9433](https://github.com/ClickHouse/ClickHouse/pull/9433) ([阿莫斯鸟](https://github.com/amosbird)) +- 添加 `clickhouse-odbc` 驱动程序进入测试图像。 这允许通过自己的ODBC驱动程序测试ClickHouse与ClickHouse的交互。 [\#9348](https://github.com/ClickHouse/ClickHouse/pull/9348) ([filimonov](https://github.com/filimonov)) +- 修复单元测试中的几个错误。 [\#9047](https://github.com/ClickHouse/ClickHouse/pull/9047) ([阿利沙平](https://github.com/alesapin)) +- 启用 `-Wmissing-include-dirs` GCC警告消除所有不存在的包括-主要是由于CMake脚本错误 [\#8704](https://github.com/ClickHouse/ClickHouse/pull/8704) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +- 描述查询探查器无法工作的原因。 这是用于 [\#9049](https://github.com/ClickHouse/ClickHouse/issues/9049) [\#9144](https://github.com/ClickHouse/ClickHouse/pull/9144) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 将OpenSSL更新到上游主机。 修复了TLS连接可能会失败并显示消息的问题 `OpenSSL SSL_read: error:14094438:SSL routines:ssl3_read_bytes:tlsv1 alert internal error` 和 `SSL Exception: error:2400006E:random number generator::error retrieving entropy`. 该问题出现在版本20.1中。 [\#8956](https://github.com/ClickHouse/ClickHouse/pull/8956) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新服务器的Dockerfile [\#8893](https://github.com/ClickHouse/ClickHouse/pull/8893) ([Ilya Mazaev](https://github.com/ne-ray)) +- Build-gcc-from-sources脚本中的小修复 [\#8774](https://github.com/ClickHouse/ClickHouse/pull/8774) ([Michael Nacharov](https://github.com/mnach)) +- 替换 `numbers` 到 `zeros` 在perftests其中 `number` 不使用列。 这将导致更干净的测试结果。 [\#9600](https://github.com/ClickHouse/ClickHouse/pull/9600) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复列构造函数中使用initializer\_list时堆栈溢出问题。 [\#9367](https://github.com/ClickHouse/ClickHouse/pull/9367) ([已删除用户](https://github.com/ghost)) +- 将librdkafka升级到v1.3.0。 启用bund绑 `rdkafka` 和 `gsasl` mac OS X上的库 [\#9000](https://github.com/ClickHouse/ClickHouse/pull/9000) ([安德鲁Onyshchuk](https://github.com/oandrew)) +- 在GCC9.2.0上构建修复程序 [\#9306](https://github.com/ClickHouse/ClickHouse/pull/9306) ([vxider](https://github.com/Vxider)) + +## 碌莽禄.拢.0755-88888888 {#clickhouse-release-v20-1} + +### ClickHouse版本v20.1.8.41,2020-03-20 {#clickhouse-release-v20-1-8-41-2020-03-20} + +#### 错误修复 {#bug-fix-3} + +- 修复可能的永久性 `Cannot schedule a task` 错误(由于未处理的异常 `ParallelAggregatingBlockInputStream::Handler::onFinish/onFinishThread`). 这修复 [\#6833](https://github.com/ClickHouse/ClickHouse/issues/6833). [\#9154](https://github.com/ClickHouse/ClickHouse/pull/9154) ([Azat Khuzhin](https://github.com/azat)) +- 修复过多的内存消耗 `ALTER` 查询(突变)。 这修复 [\#9533](https://github.com/ClickHouse/ClickHouse/issues/9533) 和 [\#9670](https://github.com/ClickHouse/ClickHouse/issues/9670). [\#9754](https://github.com/ClickHouse/ClickHouse/pull/9754) ([阿利沙平](https://github.com/alesapin)) +- 修复外部字典DDL中反引用的错误。 这修复 [\#9619](https://github.com/ClickHouse/ClickHouse/issues/9619). [\#9734](https://github.com/ClickHouse/ClickHouse/pull/9734) ([阿利沙平](https://github.com/alesapin)) + +### ClickHouse释放v20.1.7.38,2020-03-18 {#clickhouse-release-v20-1-7-38-2020-03-18} + +#### 错误修复 {#bug-fix-4} + +- 修正了不正确的内部函数名称 `sumKahan` 和 `sumWithOverflow`. 在远程查询中使用此函数时,我会导致异常。 [\#9636](https://github.com/ClickHouse/ClickHouse/pull/9636) ([Azat Khuzhin](https://github.com/azat)). 这个问题是在所有ClickHouse版本。 +- 允许 `ALTER ON CLUSTER` 的 `Distributed` 具有内部复制的表。 这修复 [\#3268](https://github.com/ClickHouse/ClickHouse/issues/3268). [\#9617](https://github.com/ClickHouse/ClickHouse/pull/9617) ([shinoi2](https://github.com/shinoi2)). 这个问题是在所有ClickHouse版本。 +- 修复可能的异常 `Size of filter doesn't match size of column` 和 `Invalid number of rows in Chunk` 在 `MergeTreeRangeReader`. 它们可能在执行时出现 `PREWHERE` 在某些情况下。 修复 [\#9132](https://github.com/ClickHouse/ClickHouse/issues/9132). [\#9612](https://github.com/ClickHouse/ClickHouse/pull/9612) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复了这个问题:如果你编写一个简单的算术表达式,则不会保留时区 `time + 1` (与像这样的表达形成对比 `time + INTERVAL 1 SECOND`). 这修复 [\#5743](https://github.com/ClickHouse/ClickHouse/issues/5743). [\#9323](https://github.com/ClickHouse/ClickHouse/pull/9323) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)). 这个问题是在所有ClickHouse版本。 +- 现在不可能创建或添加具有简单循环别名的列,如 `a DEFAULT b, b DEFAULT a`. [\#9603](https://github.com/ClickHouse/ClickHouse/pull/9603) ([阿利沙平](https://github.com/alesapin)) +- 修复了base64编码值末尾填充格式错误的问题。 更新base64库。 这修复 [\#9491](https://github.com/ClickHouse/ClickHouse/issues/9491),关闭 [\#9492](https://github.com/ClickHouse/ClickHouse/issues/9492) [\#9500](https://github.com/ClickHouse/ClickHouse/pull/9500) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复数据竞赛破坏 `Poco::HTTPServer`. 当服务器启动并立即关闭时,可能会发生这种情况。 [\#9468](https://github.com/ClickHouse/ClickHouse/pull/9468) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复可能的崩溃/错误的行数 `LIMIT n WITH TIES` 当有很多行等于第n行时。 [\#9464](https://github.com/ClickHouse/ClickHouse/pull/9464) ([tavplubix](https://github.com/tavplubix)) +- 修复与列Ttl可能不匹配的校验和。 [\#9451](https://github.com/ClickHouse/ClickHouse/pull/9451) ([安东\*波波夫](https://github.com/CurtizJ)) +- 修复当用户尝试崩溃 `ALTER MODIFY SETTING` 对于老格式化 `MergeTree` 表引擎家族. [\#9435](https://github.com/ClickHouse/ClickHouse/pull/9435) ([阿利沙平](https://github.com/alesapin)) +- 现在我们将尝试更频繁地完成突变。 [\#9427](https://github.com/ClickHouse/ClickHouse/pull/9427) ([阿利沙平](https://github.com/alesapin)) +- 修复引入的复制协议不兼容 [\#8598](https://github.com/ClickHouse/ClickHouse/issues/8598). [\#9412](https://github.com/ClickHouse/ClickHouse/pull/9412) ([阿利沙平](https://github.com/alesapin)) +- 修复数组类型的bloom\_filter索引的not(has())。 [\#9407](https://github.com/ClickHouse/ClickHouse/pull/9407) ([achimbab](https://github.com/achimbab)) +- 固定的行为 `match` 和 `extract` 当干草堆有零字节的函数。 当干草堆不变时,这种行为是错误的。 这修复 [\#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) [\#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) [\#9345](https://github.com/ClickHouse/ClickHouse/pull/9345) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-1} + +- 异常处理现在可以在适用于Linux的Windows子系统上正常工作。 看https://github.com/ClickHouse-Extras/libunwind/pull/3 这修复 [\#6480](https://github.com/ClickHouse/ClickHouse/issues/6480) [\#9564](https://github.com/ClickHouse/ClickHouse/pull/9564) ([sobolevsv](https://github.com/sobolevsv)) + +### ClickHouse释放v20.1.6.30,2020-03-05 {#clickhouse-release-v20-1-6-30-2020-03-05} + +#### 错误修复 {#bug-fix-5} + +- 修复压缩时的数据不兼容 `T64` 编解ec + [\#9039](https://github.com/ClickHouse/ClickHouse/pull/9039) [(abyss7)](https://github.com/abyss7) +- 在一个线程中从MergeTree表中读取时修复范围顺序。 修复 [\#8964](https://github.com/ClickHouse/ClickHouse/issues/8964). + [\#9050](https://github.com/ClickHouse/ClickHouse/pull/9050) [(CurtizJ))](https://github.com/CurtizJ) +- 修复可能的段错误 `MergeTreeRangeReader`,同时执行 `PREWHERE`. 修复 [\#9064](https://github.com/ClickHouse/ClickHouse/issues/9064). + [\#9106](https://github.com/ClickHouse/ClickHouse/pull/9106) [(CurtizJ))](https://github.com/CurtizJ) +- 修复 `reinterpretAsFixedString` 返回 `FixedString` 而不是 `String`. + [\#9052](https://github.com/ClickHouse/ClickHouse/pull/9052) [(oandrew)](https://github.com/oandrew) +- 修复 `joinGet` 使用可为空的返回类型。 修复 [\#8919](https://github.com/ClickHouse/ClickHouse/issues/8919) + [\#9014](https://github.com/ClickHouse/ClickHouse/pull/9014) [(amosbird)](https://github.com/amosbird) +- 修复bittestall/bitTestAny函数的模糊测试和不正确的行为。 + [\#9143](https://github.com/ClickHouse/ClickHouse/pull/9143) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 修复当干草堆有零字节时匹配和提取函数的行为。 当干草堆不变时,这种行为是错误的。 修复 [\#9160](https://github.com/ClickHouse/ClickHouse/issues/9160) + [\#9163](https://github.com/ClickHouse/ClickHouse/pull/9163) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 当使用非严格单调函数索引时,固定执行反转谓词。 修复 [\#9034](https://github.com/ClickHouse/ClickHouse/issues/9034) + [\#9223](https://github.com/ClickHouse/ClickHouse/pull/9223) [(Akazz)](https://github.com/Akazz) +- 允许重写 `CROSS` 到 `INNER JOIN` 如果有 `[NOT] LIKE` 操作员在 `WHERE` 科。 修复 [\#9191](https://github.com/ClickHouse/ClickHouse/issues/9191) + [\#9229](https://github.com/ClickHouse/ClickHouse/pull/9229) [(4ertus2)](https://github.com/4ertus2) +- 允许使用日志引擎的表中的第一列成为别名。 + [\#9231](https://github.com/ClickHouse/ClickHouse/pull/9231) [(abyss7)](https://github.com/abyss7) +- 允许逗号加入 `IN()` 进去 修复 [\#7314](https://github.com/ClickHouse/ClickHouse/issues/7314). + [\#9251](https://github.com/ClickHouse/ClickHouse/pull/9251) [(4ertus2)](https://github.com/4ertus2) +- 改进 `ALTER MODIFY/ADD` 查询逻辑。 现在你不能 `ADD` 不带类型的列, `MODIFY` 默认表达式不改变列的类型和 `MODIFY` type不会丢失默认表达式值。 修复 [\#8669](https://github.com/ClickHouse/ClickHouse/issues/8669). + [\#9227](https://github.com/ClickHouse/ClickHouse/pull/9227) [(alesapin)](https://github.com/alesapin) +- 修复突变最终确定,当已经完成突变时可以具有状态is\_done=0。 + [\#9217](https://github.com/ClickHouse/ClickHouse/pull/9217) [(alesapin)](https://github.com/alesapin) +- 碌莽禄Support: “Processors” 管道系统.数字和系统.numbers\_mt 这也修复了错误时 `max_execution_time` 不被尊重。 + [\#7796](https://github.com/ClickHouse/ClickHouse/pull/7796) [(KochetovNicolai)](https://github.com/KochetovNicolai) +- 修复错误的计数 `DictCacheKeysRequestedFound` 公制。 + [\#9411](https://github.com/ClickHouse/ClickHouse/pull/9411) [(nikitamikhaylov)](https://github.com/nikitamikhaylov) +- 添加了对存储策略的检查 `ATTACH PARTITION FROM`, `REPLACE PARTITION`, `MOVE TO TABLE` 否则可能使部分数据在重新启动后无法访问,并阻止ClickHouse启动。 + [\#9383](https://github.com/ClickHouse/ClickHouse/pull/9383) [(excitoon)](https://github.com/excitoon) +- 在固定的瑞银报告 `MergeTreeIndexSet`. 这修复 [\#9250](https://github.com/ClickHouse/ClickHouse/issues/9250) + [\#9365](https://github.com/ClickHouse/ClickHouse/pull/9365) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 在BlockIO中修复可能的数据集。 + [\#9356](https://github.com/ClickHouse/ClickHouse/pull/9356) [(KochetovNicolai)](https://github.com/KochetovNicolai) +- 支持 `UInt64` 在JSON相关函数中不适合Int64的数字。 更新 `SIMDJSON` 为了主人 这修复 [\#9209](https://github.com/ClickHouse/ClickHouse/issues/9209) + [\#9344](https://github.com/ClickHouse/ClickHouse/pull/9344) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 如果将数据目录挂载到单独的设备,则修复可用空间量计算不正确时的问题。 对于默认磁盘,计算数据子目录的可用空间。 这修复 [\#7441](https://github.com/ClickHouse/ClickHouse/issues/7441) + [\#9257](https://github.com/ClickHouse/ClickHouse/pull/9257) [(米尔布)](https://github.com/millb) +- 修复TLS连接可能会失败并显示消息时的问题 `OpenSSL SSL_read: error:14094438:SSL routines:ssl3_read_bytes:tlsv1 alert internal error and SSL Exception: error:2400006E:random number generator::error retrieving entropy.` 将OpenSSL更新到上游主机。 + [\#8956](https://github.com/ClickHouse/ClickHouse/pull/8956) [(阿列克谢-米洛维多夫)](https://github.com/alexey-milovidov) +- 执行时 `CREATE` 查询,在存储引擎参数中折叠常量表达式。 将空数据库名称替换为当前数据库。 修复 [\#6508](https://github.com/ClickHouse/ClickHouse/issues/6508), [\#3492](https://github.com/ClickHouse/ClickHouse/issues/3492). 还修复了ClickHouseDictionarySource中检查本地地址。 + [\#9262](https://github.com/ClickHouse/ClickHouse/pull/9262) [(tabplubix)](https://github.com/tavplubix) +- 修复段错误 `StorageMerge`,从StorageFile读取时可能发生。 + [\#9387](https://github.com/ClickHouse/ClickHouse/pull/9387) [(tabplubix)](https://github.com/tavplubix) +- 防止丢失数据 `Kafka` 在极少数情况下,在读取后缀之后但在提交之前发生异常。 修复 [\#9378](https://github.com/ClickHouse/ClickHouse/issues/9378). 相关: [\#7175](https://github.com/ClickHouse/ClickHouse/issues/7175) + [\#9507](https://github.com/ClickHouse/ClickHouse/pull/9507) [(菲利蒙诺夫)](https://github.com/filimonov) +- 修复尝试使用/删除时导致服务器终止的错误 `Kafka` 使用错误的参数创建的表。 修复 [\#9494](https://github.com/ClickHouse/ClickHouse/issues/9494). 结合 [\#9507](https://github.com/ClickHouse/ClickHouse/issues/9507). + [\#9513](https://github.com/ClickHouse/ClickHouse/pull/9513) [(菲利蒙诺夫)](https://github.com/filimonov) + +#### 新功能 {#new-feature-1} + +- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 这个新功能是由Altinity的特殊要求添加到错误修正版本中的。 + [\#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) + +### ClickHouse版本v20.1.2.4,2020-01-22 {#clickhouse-release-v20-1-2-4-2020-01-22} + +#### 向后不兼容的更改 {#backward-incompatible-change-1} + +- 使设置 `merge_tree_uniform_read_distribution` 过时了 服务器仍可识别此设置,但无效。 [\#8308](https://github.com/ClickHouse/ClickHouse/pull/8308) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更改函数的返回类型 `greatCircleDistance` 到 `Float32` 因为现在计算的结果是 `Float32`. [\#7993](https://github.com/ClickHouse/ClickHouse/pull/7993) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在预计查询参数表示为 “escaped” 格式。 例如,要传递字符串 `ab` 你必须写 `a\tb` 或 `a\b` 并分别, `a%5Ctb` 或 `a%5C%09b` 在URL中。 这是需要添加传递NULL作为的可能性 `\N`. 这修复 [\#7488](https://github.com/ClickHouse/ClickHouse/issues/7488). [\#8517](https://github.com/ClickHouse/ClickHouse/pull/8517) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 启用 `use_minimalistic_part_header_in_zookeeper` 设置 `ReplicatedMergeTree` 默认情况下。 这将显着减少存储在ZooKeeper中的数据量。 自19.1版本以来支持此设置,我们已经在多个服务的生产中使用它,半年以上没有任何问题。 如果您有机会降级到19.1以前的版本,请禁用此设置。 [\#6850](https://github.com/ClickHouse/ClickHouse/pull/6850) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 数据跳过索引已准备就绪并默认启用。 设置 `allow_experimental_data_skipping_indices`, `allow_experimental_cross_to_join_conversion` 和 `allow_experimental_multiple_joins_emulation` 现在已经过时,什么也不做。 [\#7974](https://github.com/ClickHouse/ClickHouse/pull/7974) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加新建 `ANY JOIN` 逻辑 `StorageJoin` 符合 `JOIN` 操作。 要在不改变行为的情况下进行升级,您需要添加 `SETTINGS any_join_distinct_right_table_keys = 1` 引擎联接表元数据或在升级后重新创建这些表。 [\#8400](https://github.com/ClickHouse/ClickHouse/pull/8400) ([Artem Zuikov](https://github.com/4ertus2)) +- 要求重新启动服务器以应用日志记录配置中的更改。 这是一种临时解决方法,可以避免服务器将日志记录到已删除的日志文件中的错误(请参阅 [\#8696](https://github.com/ClickHouse/ClickHouse/issues/8696)). [\#8707](https://github.com/ClickHouse/ClickHouse/pull/8707) ([Alexander Kuzmenkov](https://github.com/akuzm)) + +#### 新功能 {#new-feature-2} + +- 添加了有关部件路径的信息 `system.merges`. [\#8043](https://github.com/ClickHouse/ClickHouse/pull/8043) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 添加执行能力 `SYSTEM RELOAD DICTIONARY` 查询中 `ON CLUSTER` 模式 [\#8288](https://github.com/ClickHouse/ClickHouse/pull/8288) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) +- 添加执行能力 `CREATE DICTIONARY` 查询中 `ON CLUSTER` 模式 [\#8163](https://github.com/ClickHouse/ClickHouse/pull/8163) ([阿利沙平](https://github.com/alesapin)) +- 现在用户的个人资料 `users.xml` 可以继承多个配置文件。 [\#8343](https://github.com/ClickHouse/ClickHouse/pull/8343) ([Mikhail f. Shiryaev](https://github.com/Felixoid)) +- 已添加 `system.stack_trace` 允许查看所有服务器线程的堆栈跟踪的表。 这对于开发人员反省服务器状态非常有用。 这修复 [\#7576](https://github.com/ClickHouse/ClickHouse/issues/7576). [\#8344](https://github.com/ClickHouse/ClickHouse/pull/8344) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加 `DateTime64` 具有可配置子秒精度的数据类型。 [\#7170](https://github.com/ClickHouse/ClickHouse/pull/7170) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 添加表函数 `clusterAllReplicas` 这允许查询集群中的所有节点。 [\#8493](https://github.com/ClickHouse/ClickHouse/pull/8493) ([kiran sunkari](https://github.com/kiransunkari)) +- 添加聚合函数 `categoricalInformationValue` 其计算出离散特征的信息值。 [\#8117](https://github.com/ClickHouse/ClickHouse/pull/8117) ([hcz](https://github.com/hczhcz)) +- 加快数据文件的解析 `CSV`, `TSV` 和 `JSONEachRow` 通过并行进行格式化。 [\#7780](https://github.com/ClickHouse/ClickHouse/pull/7780) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 添加功能 `bankerRound` 它执行银行家的四舍五入。 [\#8112](https://github.com/ClickHouse/ClickHouse/pull/8112) ([hcz](https://github.com/hczhcz)) +- 支持区域名称的嵌入式字典中的更多语言: ‘ru’, ‘en’, ‘ua’, ‘uk’, ‘by’, ‘kz’, ‘tr’, ‘de’, ‘uz’, ‘lv’, ‘lt’, ‘et’, ‘pt’, ‘he’, ‘vi’. [\#8189](https://github.com/ClickHouse/ClickHouse/pull/8189) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改进的一致性 `ANY JOIN` 逻辑 现在 `t1 ANY LEFT JOIN t2` 等于 `t2 ANY RIGHT JOIN t1`. [\#7665](https://github.com/ClickHouse/ClickHouse/pull/7665) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加设置 `any_join_distinct_right_table_keys` 这使旧的行为 `ANY INNER JOIN`. [\#7665](https://github.com/ClickHouse/ClickHouse/pull/7665) ([Artem Zuikov](https://github.com/4ertus2)) +- 添加新建 `SEMI` 和 `ANTI JOIN`. 老 `ANY INNER JOIN` 行为现在可作为 `SEMI LEFT JOIN`. [\#7665](https://github.com/ClickHouse/ClickHouse/pull/7665) ([Artem Zuikov](https://github.com/4ertus2)) +- 已添加 `Distributed` 格式 `File` 发动机和 `file` 表函数,它允许从读 `.bin` 通过异步插入生成的文件 `Distributed` 桌子 [\#8535](https://github.com/ClickHouse/ClickHouse/pull/8535) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 添加可选的重置列参数 `runningAccumulate` 这允许为每个新的键值重置聚合结果。 [\#8326](https://github.com/ClickHouse/ClickHouse/pull/8326) ([谢尔盖\*科诺年科](https://github.com/kononencheg)) +- 添加使用ClickHouse作为普罗米修斯端点的能力。 [\#7900](https://github.com/ClickHouse/ClickHouse/pull/7900) ([vdimir](https://github.com/Vdimir)) +- 添加部分 `` 在 `config.xml` 这将限制允许的主机用于远程表引擎和表函数 `URL`, `S3`, `HDFS`. [\#7154](https://github.com/ClickHouse/ClickHouse/pull/7154) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 添加功能 `greatCircleAngle` 它计算球体上的距离(以度为单位)。 [\#8105](https://github.com/ClickHouse/ClickHouse/pull/8105) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改变地球半径与h3库一致。 [\#8105](https://github.com/ClickHouse/ClickHouse/pull/8105) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 已添加 `JSONCompactEachRow` 和 `JSONCompactEachRowWithNamesAndTypes` 输入和输出格式。 [\#7841](https://github.com/ClickHouse/ClickHouse/pull/7841) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 增加了与文件相关的表引擎和表函数的功能 (`File`, `S3`, `URL`, `HDFS`)它允许读取和写入 `gzip` 基于附加引擎参数或文件扩展名的文件。 [\#7840](https://github.com/ClickHouse/ClickHouse/pull/7840) ([安德烈\*博德罗夫](https://github.com/apbodrov)) +- 添加了 `randomASCII(length)` 函数,生成一个字符串与一个随机集 [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) 可打印字符。 [\#8401](https://github.com/ClickHouse/ClickHouse/pull/8401) ([刺刀](https://github.com/BayoNet)) +- 添加功能 `JSONExtractArrayRaw` 它返回从未解析的json数组元素上的数组 `JSON` 字符串。 [\#8081](https://github.com/ClickHouse/ClickHouse/pull/8081) ([Oleg Matrokhin](https://github.com/errx)) +- 添加 `arrayZip` 函数允许将多个长度相等的数组合成一个元组数组。 [\#8149](https://github.com/ClickHouse/ClickHouse/pull/8149) ([张冬](https://github.com/zhang2014)) +- 添加根据配置的磁盘之间移动数据的能力 `TTL`-表达式为 `*MergeTree` 表引擎家族. [\#8140](https://github.com/ClickHouse/ClickHouse/pull/8140) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 增加了新的聚合功能 `avgWeighted` 其允许计算加权平均值。 [\#7898](https://github.com/ClickHouse/ClickHouse/pull/7898) ([安德烈\*博德罗夫](https://github.com/apbodrov)) +- 现在并行解析默认启用 `TSV`, `TSKV`, `CSV` 和 `JSONEachRow` 格式。 [\#7894](https://github.com/ClickHouse/ClickHouse/pull/7894) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 从添加几个地理功能 `H3` 图书馆: `h3GetResolution`, `h3EdgeAngle`, `h3EdgeLength`, `h3IsValid` 和 `h3kRing`. [\#8034](https://github.com/ClickHouse/ClickHouse/pull/8034) ([Konstantin Malanchev](https://github.com/hombit)) +- 增加了对brotli的支持 (`br`)压缩文件相关的存储和表函数。 这修复 [\#8156](https://github.com/ClickHouse/ClickHouse/issues/8156). [\#8526](https://github.com/ClickHouse/ClickHouse/pull/8526) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 添加 `groupBit*` 功能的 `SimpleAggregationFunction` 类型。 [\#8485](https://github.com/ClickHouse/ClickHouse/pull/8485) ([纪尧姆\*塔瑟里](https://github.com/YiuRULE)) + +#### 错误修复 {#bug-fix-6} + +- 修复重命名表 `Distributed` 引擎 修复问题 [\#7868](https://github.com/ClickHouse/ClickHouse/issues/7868). [\#8306](https://github.com/ClickHouse/ClickHouse/pull/8306) ([tavplubix](https://github.com/tavplubix)) +- 现在字典支持 `EXPRESSION` 对于非ClickHouse SQL方言中任意字符串中的属性。 [\#8098](https://github.com/ClickHouse/ClickHouse/pull/8098) ([阿利沙平](https://github.com/alesapin)) +- 修复损坏 `INSERT SELECT FROM mysql(...)` 查询。 这修复 [\#8070](https://github.com/ClickHouse/ClickHouse/issues/8070) 和 [\#7960](https://github.com/ClickHouse/ClickHouse/issues/7960). [\#8234](https://github.com/ClickHouse/ClickHouse/pull/8234) ([tavplubix](https://github.com/tavplubix)) +- 修复错误 “Mismatch column sizes” 插入默认值时 `Tuple` 从 `JSONEachRow`. 这修复 [\#5653](https://github.com/ClickHouse/ClickHouse/issues/5653). [\#8606](https://github.com/ClickHouse/ClickHouse/pull/8606) ([tavplubix](https://github.com/tavplubix)) +- 现在将在使用的情况下抛出一个异常 `WITH TIES` 旁边的 `LIMIT BY`. 还增加了使用能力 `TOP` 与 `LIMIT BY`. 这修复 [\#7472](https://github.com/ClickHouse/ClickHouse/issues/7472). [\#7637](https://github.com/ClickHouse/ClickHouse/pull/7637) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 从新鲜的glibc版本中修复unintendent依赖关系 `clickhouse-odbc-bridge` 二进制 [\#8046](https://github.com/ClickHouse/ClickHouse/pull/8046) ([阿莫斯鸟](https://github.com/amosbird)) +- 修正错误的检查功能 `*MergeTree` 引擎家族. 现在,当我们在最后一个颗粒和最后一个标记(非最终)中有相同数量的行时,它不会失败。 [\#8047](https://github.com/ClickHouse/ClickHouse/pull/8047) ([阿利沙平](https://github.com/alesapin)) +- 修复插入 `Enum*` 列后 `ALTER` 查询,当基础数值类型等于表指定类型时。 这修复 [\#7836](https://github.com/ClickHouse/ClickHouse/issues/7836). [\#7908](https://github.com/ClickHouse/ClickHouse/pull/7908) ([安东\*波波夫](https://github.com/CurtizJ)) +- 允许非常数负 “size” 函数的参数 `substring`. 这是不允许的错误。 这修复 [\#4832](https://github.com/ClickHouse/ClickHouse/issues/4832). [\#7703](https://github.com/ClickHouse/ClickHouse/pull/7703) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复当错误数量的参数传递到解析错误 `(O|J)DBC` 表引擎。 [\#7709](https://github.com/ClickHouse/ClickHouse/pull/7709) ([阿利沙平](https://github.com/alesapin)) +- 将日志发送到syslog时使用正在运行的clickhouse进程的命令名。 在以前的版本中,使用空字符串而不是命令名称。 [\#8460](https://github.com/ClickHouse/ClickHouse/pull/8460) ([Michael Nacharov](https://github.com/mnach)) +- 修复检查允许的主机 `localhost`. 这个公关修复了在提供的解决方案 [\#8241](https://github.com/ClickHouse/ClickHouse/pull/8241). [\#8342](https://github.com/ClickHouse/ClickHouse/pull/8342) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复罕见的崩溃 `argMin` 和 `argMax` 长字符串参数的函数,当结果被用于 `runningAccumulate` 功能。 这修复 [\#8325](https://github.com/ClickHouse/ClickHouse/issues/8325) [\#8341](https://github.com/ClickHouse/ClickHouse/pull/8341) ([恐龙](https://github.com/769344359)) +- 修复表的内存过度使用 `Buffer` 引擎 [\#8345](https://github.com/ClickHouse/ClickHouse/pull/8345) ([Azat Khuzhin](https://github.com/azat)) +- 修正了可以采取的功能中的潜在错误 `NULL` 作为参数之一,并返回非NULL。 [\#8196](https://github.com/ClickHouse/ClickHouse/pull/8196) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在线程池中更好地计算后台进程的指标 `MergeTree` 表引擎. [\#8194](https://github.com/ClickHouse/ClickHouse/pull/8194) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复功能 `IN` 里面 `WHERE` 存在行级表筛选器时的语句。 修复 [\#6687](https://github.com/ClickHouse/ClickHouse/issues/6687) [\#8357](https://github.com/ClickHouse/ClickHouse/pull/8357) ([伊万](https://github.com/abyss7)) +- 现在,如果整数值没有完全解析设置值,则会引发异常。 [\#7678](https://github.com/ClickHouse/ClickHouse/pull/7678) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 修复当聚合函数用于查询具有两个以上本地分片的分布式表时出现的异常。 [\#8164](https://github.com/ClickHouse/ClickHouse/pull/8164) ([小路](https://github.com/nicelulu)) +- 现在,bloom filter可以处理零长度数组,并且不执行冗余计算。 [\#8242](https://github.com/ClickHouse/ClickHouse/pull/8242) ([achimbab](https://github.com/achimbab)) +- 修正了通过匹配客户端主机来检查客户端主机是否允许 `host_regexp` 在指定 `users.xml`. [\#8241](https://github.com/ClickHouse/ClickHouse/pull/8241) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 放松不明确的列检查,导致多个误报 `JOIN ON` 科。 [\#8385](https://github.com/ClickHouse/ClickHouse/pull/8385) ([Artem Zuikov](https://github.com/4ertus2)) +- 修正了可能的服务器崩溃 (`std::terminate`)当服务器不能发送或写入数据 `JSON` 或 `XML` 格式与值 `String` 数据类型(需要 `UTF-8` 验证)或使用Brotli算法或其他一些罕见情况下压缩结果数据时。 这修复 [\#7603](https://github.com/ClickHouse/ClickHouse/issues/7603) [\#8384](https://github.com/ClickHouse/ClickHouse/pull/8384) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复竞争条件 `StorageDistributedDirectoryMonitor` 被线人发现 这修复 [\#8364](https://github.com/ClickHouse/ClickHouse/issues/8364). [\#8383](https://github.com/ClickHouse/ClickHouse/pull/8383) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在背景合并 `*MergeTree` 表引擎家族更准确地保留存储策略卷顺序。 [\#8549](https://github.com/ClickHouse/ClickHouse/pull/8549) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 现在表引擎 `Kafka` 与正常工作 `Native` 格式。 这修复 [\#6731](https://github.com/ClickHouse/ClickHouse/issues/6731) [\#7337](https://github.com/ClickHouse/ClickHouse/issues/7337) [\#8003](https://github.com/ClickHouse/ClickHouse/issues/8003). [\#8016](https://github.com/ClickHouse/ClickHouse/pull/8016) ([filimonov](https://github.com/filimonov)) +- 固定格式与标题(如 `CSVWithNames`)这是抛出关于EOF表引擎的异常 `Kafka`. [\#8016](https://github.com/ClickHouse/ClickHouse/pull/8016) ([filimonov](https://github.com/filimonov)) +- 修复了从子查询右侧部分制作set的错误 `IN` 科。 这修复 [\#5767](https://github.com/ClickHouse/ClickHouse/issues/5767) 和 [\#2542](https://github.com/ClickHouse/ClickHouse/issues/2542). [\#7755](https://github.com/ClickHouse/ClickHouse/pull/7755) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 从存储读取时修复可能的崩溃 `File`. [\#7756](https://github.com/ClickHouse/ClickHouse/pull/7756) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 在固定的文件读取 `Parquet` 包含类型列的格式 `list`. [\#8334](https://github.com/ClickHouse/ClickHouse/pull/8334) ([马苏兰](https://github.com/maxulan)) +- 修复错误 `Not found column` 对于分布式查询 `PREWHERE` 条件取决于采样键if `max_parallel_replicas > 1`. [\#7913](https://github.com/ClickHouse/ClickHouse/pull/7913) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复错误 `Not found column` 如果使用查询 `PREWHERE` 依赖于表的别名,结果集由于主键条件而为空。 [\#7911](https://github.com/ClickHouse/ClickHouse/pull/7911) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 函数的固定返回类型 `rand` 和 `randConstant` 在情况下 `Nullable` 争论。 现在函数总是返回 `UInt32` 而且从来没有 `Nullable(UInt32)`. [\#8204](https://github.com/ClickHouse/ClickHouse/pull/8204) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 禁用谓词下推 `WITH FILL` 表达。 这修复 [\#7784](https://github.com/ClickHouse/ClickHouse/issues/7784). [\#7789](https://github.com/ClickHouse/ClickHouse/pull/7789) ([张冬](https://github.com/zhang2014)) +- 修正错误 `count()` 结果 `SummingMergeTree` 当 `FINAL` 部分被使用。 [\#3280](https://github.com/ClickHouse/ClickHouse/issues/3280) [\#7786](https://github.com/ClickHouse/ClickHouse/pull/7786) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 修复来自远程服务器的常量函数可能不正确的结果。 它发生在具有以下功能的查询中 `version()`, `uptime()` 等。 它为不同的服务器返回不同的常量值。 这修复 [\#7666](https://github.com/ClickHouse/ClickHouse/issues/7666). [\#7689](https://github.com/ClickHouse/ClickHouse/pull/7689) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复下推谓词优化中导致错误结果的复杂错误。 这解决了下推谓词优化的很多问题。 [\#8503](https://github.com/ClickHouse/ClickHouse/pull/8503) ([张冬](https://github.com/zhang2014)) +- 修复崩溃 `CREATE TABLE .. AS dictionary` 查询。 [\#8508](https://github.com/ClickHouse/ClickHouse/pull/8508) ([Azat Khuzhin](https://github.com/azat)) +- 一些改进ClickHouse语法 `.g4` 文件 [\#8294](https://github.com/ClickHouse/ClickHouse/pull/8294) ([太阳里](https://github.com/taiyang-li)) +- 修复导致崩溃的错误 `JOIN`s与表与发动机 `Join`. 这修复 [\#7556](https://github.com/ClickHouse/ClickHouse/issues/7556) [\#8254](https://github.com/ClickHouse/ClickHouse/issues/8254) [\#7915](https://github.com/ClickHouse/ClickHouse/issues/7915) [\#8100](https://github.com/ClickHouse/ClickHouse/issues/8100). [\#8298](https://github.com/ClickHouse/ClickHouse/pull/8298) ([Artem Zuikov](https://github.com/4ertus2)) +- 修复冗余字典重新加载 `CREATE DATABASE`. [\#7916](https://github.com/ClickHouse/ClickHouse/pull/7916) ([Azat Khuzhin](https://github.com/azat)) +- 限制从读取流的最大数量 `StorageFile` 和 `StorageHDFS`. 修复https://github.com/ClickHouse/ClickHouse/issues/7650. [\#7981](https://github.com/ClickHouse/ClickHouse/pull/7981) ([阿利沙平](https://github.com/alesapin)) +- 修复bug `ALTER ... MODIFY ... CODEC` 查询,当用户同时指定默认表达式和编解ec。 修复 [8593](https://github.com/ClickHouse/ClickHouse/issues/8593). [\#8614](https://github.com/ClickHouse/ClickHouse/pull/8614) ([阿利沙平](https://github.com/alesapin)) +- 修复列的后台合并错误 `SimpleAggregateFunction(LowCardinality)` 类型。 [\#8613](https://github.com/ClickHouse/ClickHouse/pull/8613) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 固定类型签入功能 `toDateTime64`. [\#8375](https://github.com/ClickHouse/ClickHouse/pull/8375) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 现在服务器不崩溃 `LEFT` 或 `FULL JOIN` 与和加入引擎和不支持 `join_use_nulls` 设置。 [\#8479](https://github.com/ClickHouse/ClickHouse/pull/8479) ([Artem Zuikov](https://github.com/4ertus2)) +- 现在 `DROP DICTIONARY IF EXISTS db.dict` 查询不会抛出异常,如果 `db` 根本不存在 [\#8185](https://github.com/ClickHouse/ClickHouse/pull/8185) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 修复表函数中可能出现的崩溃 (`file`, `mysql`, `remote`)引用删除引起的 `IStorage` 对象。 修复插入表函数时指定的列的不正确解析。 [\#7762](https://github.com/ClickHouse/ClickHouse/pull/7762) ([tavplubix](https://github.com/tavplubix)) +- 确保网络启动前 `clickhouse-server`. 这修复 [\#7507](https://github.com/ClickHouse/ClickHouse/issues/7507). [\#8570](https://github.com/ClickHouse/ClickHouse/pull/8570) ([余志昌](https://github.com/yuzhichang)) +- 修复安全连接的超时处理,因此查询不会无限挂起。 这修复 [\#8126](https://github.com/ClickHouse/ClickHouse/issues/8126). [\#8128](https://github.com/ClickHouse/ClickHouse/pull/8128) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `clickhouse-copier`并发工人之间的冗余争用。 [\#7816](https://github.com/ClickHouse/ClickHouse/pull/7816) ([丁香飞](https://github.com/dingxiangfei2009)) +- 现在突变不会跳过附加的部分,即使它们的突变版本比当前的突变版本大。 [\#7812](https://github.com/ClickHouse/ClickHouse/pull/7812) ([余志昌](https://github.com/yuzhichang)) [\#8250](https://github.com/ClickHouse/ClickHouse/pull/8250) ([阿利沙平](https://github.com/alesapin)) +- 忽略冗余副本 `*MergeTree` 数据部分移动到另一个磁盘和服务器重新启动后。 [\#7810](https://github.com/ClickHouse/ClickHouse/pull/7810) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复崩溃 `FULL JOIN` 与 `LowCardinality` 在 `JOIN` 钥匙 [\#8252](https://github.com/ClickHouse/ClickHouse/pull/8252) ([Artem Zuikov](https://github.com/4ertus2)) +- 禁止在插入查询中多次使用列名,如 `INSERT INTO tbl (x, y, x)`. 这修复 [\#5465](https://github.com/ClickHouse/ClickHouse/issues/5465), [\#7681](https://github.com/ClickHouse/ClickHouse/issues/7681). [\#7685](https://github.com/ClickHouse/ClickHouse/pull/7685) ([阿利沙平](https://github.com/alesapin)) +- 增加了回退,用于检测未知Cpu的物理CPU内核数量(使用逻辑CPU内核数量)。 这修复 [\#5239](https://github.com/ClickHouse/ClickHouse/issues/5239). [\#7726](https://github.com/ClickHouse/ClickHouse/pull/7726) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `There's no column` 实例化列和别名列出错。 [\#8210](https://github.com/ClickHouse/ClickHouse/pull/8210) ([Artem Zuikov](https://github.com/4ertus2)) +- 固定切断崩溃时 `EXISTS` 查询没有使用 `TABLE` 或 `DICTIONARY` 预选赛 就像 `EXISTS t`. 这修复 [\#8172](https://github.com/ClickHouse/ClickHouse/issues/8172). 此错误在版本19.17中引入。 [\#8213](https://github.com/ClickHouse/ClickHouse/pull/8213) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复罕见错误 `"Sizes of columns doesn't match"` 使用时可能会出现 `SimpleAggregateFunction` 列。 [\#7790](https://github.com/ClickHouse/ClickHouse/pull/7790) ([Boris Granveaud](https://github.com/bgranvea)) +- 修正错误,其中用户空 `allow_databases` 可以访问所有数据库(和相同的 `allow_dictionaries`). [\#7793](https://github.com/ClickHouse/ClickHouse/pull/7793) ([DeifyTheGod](https://github.com/DeifyTheGod)) +- 修复客户端崩溃时,服务器已经从客户端断开连接。 [\#8071](https://github.com/ClickHouse/ClickHouse/pull/8071) ([Azat Khuzhin](https://github.com/azat)) +- 修复 `ORDER BY` 在按主键前缀和非主键后缀排序的情况下的行为。 [\#7759](https://github.com/ClickHouse/ClickHouse/pull/7759) ([安东\*波波夫](https://github.com/CurtizJ)) +- 检查表中是否存在合格列。 这修复 [\#6836](https://github.com/ClickHouse/ClickHouse/issues/6836). [\#7758](https://github.com/ClickHouse/ClickHouse/pull/7758) ([Artem Zuikov](https://github.com/4ertus2)) +- 固定行为 `ALTER MOVE` 合并完成后立即运行移动指定的超部分。 修复 [\#8103](https://github.com/ClickHouse/ClickHouse/issues/8103). [\#8104](https://github.com/ClickHouse/ClickHouse/pull/8104) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 使用时修复可能的服务器崩溃 `UNION` 具有不同数量的列。 修复 [\#7279](https://github.com/ClickHouse/ClickHouse/issues/7279). [\#7929](https://github.com/ClickHouse/ClickHouse/pull/7929) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复函数结果子字符串的大小 `substr` 负大小。 [\#8589](https://github.com/ClickHouse/ClickHouse/pull/8589) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在服务器不执行部分突变 `MergeTree` 如果后台池中没有足够的可用线程。 [\#8588](https://github.com/ClickHouse/ClickHouse/pull/8588) ([tavplubix](https://github.com/tavplubix)) +- 修复格式化时的小错字 `UNION ALL` AST. [\#7999](https://github.com/ClickHouse/ClickHouse/pull/7999) ([litao91](https://github.com/litao91)) +- 修正了负数不正确的布隆过滤结果。 这修复 [\#8317](https://github.com/ClickHouse/ClickHouse/issues/8317). [\#8566](https://github.com/ClickHouse/ClickHouse/pull/8566) ([张冬](https://github.com/zhang2014)) +- 在解压缩固定潜在的缓冲区溢出。 恶意用户可以传递捏造的压缩数据,这将导致缓冲区后读取。 这个问题是由Yandex信息安全团队的Eldar Zaitov发现的。 [\#8404](https://github.com/ClickHouse/ClickHouse/pull/8404) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复因整数溢出而导致的错误结果 `arrayIntersect`. [\#7777](https://github.com/ClickHouse/ClickHouse/pull/7777) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在 `OPTIMIZE TABLE` query不会等待脱机副本执行该操作。 [\#8314](https://github.com/ClickHouse/ClickHouse/pull/8314) ([javi santana](https://github.com/javisantana)) +- 固定 `ALTER TTL` 解析器 `Replicated*MergeTree` 桌子 [\#8318](https://github.com/ClickHouse/ClickHouse/pull/8318) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复服务器和客户端之间的通信,以便服务器在查询失败后读取临时表信息。 [\#8084](https://github.com/ClickHouse/ClickHouse/pull/8084) ([Azat Khuzhin](https://github.com/azat)) +- 修复 `bitmapAnd` 在聚合位图和标量位图相交时出现函数错误。 [\#8082](https://github.com/ClickHouse/ClickHouse/pull/8082) ([黄月](https://github.com/moon03432)) +- 完善的定义 `ZXid` 根据动物园管理员的程序员指南,它修复了错误 `clickhouse-cluster-copier`. [\#8088](https://github.com/ClickHouse/ClickHouse/pull/8088) ([丁香飞](https://github.com/dingxiangfei2009)) +- `odbc` 表函数现在尊重 `external_table_functions_use_nulls` 设置。 [\#7506](https://github.com/ClickHouse/ClickHouse/pull/7506) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 修正了导致罕见的数据竞赛的错误。 [\#8143](https://github.com/ClickHouse/ClickHouse/pull/8143) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 现在 `SYSTEM RELOAD DICTIONARY` 完全重新加载字典,忽略 `update_field`. 这修复 [\#7440](https://github.com/ClickHouse/ClickHouse/issues/7440). [\#8037](https://github.com/ClickHouse/ClickHouse/pull/8037) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 添加检查字典是否存在于创建查询的能力。 [\#8032](https://github.com/ClickHouse/ClickHouse/pull/8032) ([阿利沙平](https://github.com/alesapin)) +- 修复 `Float*` 解析中 `Values` 格式。 这修复 [\#7817](https://github.com/ClickHouse/ClickHouse/issues/7817). [\#7870](https://github.com/ClickHouse/ClickHouse/pull/7870) ([tavplubix](https://github.com/tavplubix)) +- 修复崩溃时,我们不能在一些后台操作保留空间 `*MergeTree` 表引擎家族. [\#7873](https://github.com/ClickHouse/ClickHouse/pull/7873) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复表包含合并操作时的崩溃 `SimpleAggregateFunction(LowCardinality)` 列。 这修复 [\#8515](https://github.com/ClickHouse/ClickHouse/issues/8515). [\#8522](https://github.com/ClickHouse/ClickHouse/pull/8522) ([Azat Khuzhin](https://github.com/azat)) +- 恢复对所有ICU区域设置的支持,并添加对常量表达式应用排序规则的功能。 还添加语言名称 `system.collations` 桌子 [\#8051](https://github.com/ClickHouse/ClickHouse/pull/8051) ([阿利沙平](https://github.com/alesapin)) +- 修正错误时,外部字典与零最小寿命 (`LIFETIME(MIN 0 MAX N)`, `LIFETIME(N)`)不要在后台更新。 [\#7983](https://github.com/ClickHouse/ClickHouse/pull/7983) ([阿利沙平](https://github.com/alesapin)) +- 修复当clickhouse源外部字典在查询中有子查询时崩溃。 [\#8351](https://github.com/ClickHouse/ClickHouse/pull/8351) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 修复文件扩展名不正确的解析表与引擎 `URL`. 这修复 [\#8157](https://github.com/ClickHouse/ClickHouse/issues/8157). [\#8419](https://github.com/ClickHouse/ClickHouse/pull/8419) ([安德烈\*博德罗夫](https://github.com/apbodrov)) +- 修复 `CHECK TABLE` 查询为 `*MergeTree` 表没有关键. 修复 [\#7543](https://github.com/ClickHouse/ClickHouse/issues/7543). [\#7979](https://github.com/ClickHouse/ClickHouse/pull/7979) ([阿利沙平](https://github.com/alesapin)) +- 固定转换 `Float64` 到MySQL类型。 [\#8079](https://github.com/ClickHouse/ClickHouse/pull/8079) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 现在,如果表没有完全删除,因为服务器崩溃,服务器将尝试恢复并加载它。 [\#8176](https://github.com/ClickHouse/ClickHouse/pull/8176) ([tavplubix](https://github.com/tavplubix)) +- 修复了表函数中的崩溃 `file` 同时插入到不存在的文件。 现在在这种情况下,文件将被创建,然后插入将被处理。 [\#8177](https://github.com/ClickHouse/ClickHouse/pull/8177) ([Olga Khvostikova](https://github.com/stavrolia)) +- 修复罕见的死锁时,可能发生 `trace_log` 处于启用状态。 [\#7838](https://github.com/ClickHouse/ClickHouse/pull/7838) ([filimonov](https://github.com/filimonov)) +- 添加能力与不同类型的工作,除了 `Date` 在 `RangeHashed` 从DDL查询创建的外部字典。 修复 [7899](https://github.com/ClickHouse/ClickHouse/issues/7899). [\#8275](https://github.com/ClickHouse/ClickHouse/pull/8275) ([阿利沙平](https://github.com/alesapin)) +- 修复崩溃时 `now64()` 用另一个函数的结果调用。 [\#8270](https://github.com/ClickHouse/ClickHouse/pull/8270) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 修正了通过mysql有线协议检测客户端IP连接的错误。 [\#7743](https://github.com/ClickHouse/ClickHouse/pull/7743) ([Dmitry Muzyka](https://github.com/dmitriy-myz)) +- 修复空阵列处理 `arraySplit` 功能。 这修复 [\#7708](https://github.com/ClickHouse/ClickHouse/issues/7708). [\#7747](https://github.com/ClickHouse/ClickHouse/pull/7747) ([hcz](https://github.com/hczhcz)) +- 修复了以下问题 `pid-file` 另一个运行 `clickhouse-server` 可能会被删除。 [\#8487](https://github.com/ClickHouse/ClickHouse/pull/8487) ([徐伟清](https://github.com/weiqxu)) +- 修复字典重新加载,如果它有 `invalidate_query`,停止更新,并在以前的更新尝试一些异常。 [\#8029](https://github.com/ClickHouse/ClickHouse/pull/8029) ([阿利沙平](https://github.com/alesapin)) +- 修正了功能错误 `arrayReduce` 这可能会导致 “double free” 和聚合函数组合器中的错误 `Resample` 这可能会导致内存泄漏。 添加聚合功能 `aggThrow`. 此功能可用于测试目的。 [\#8446](https://github.com/ClickHouse/ClickHouse/pull/8446) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 改进 {#improvement-1} + +- 改进了使用时的日志记录 `S3` 表引擎。 [\#8251](https://github.com/ClickHouse/ClickHouse/pull/8251) ([Grigory Pervakov](https://github.com/GrigoryPervakov)) +- 在调用时未传递任何参数时打印帮助消息 `clickhouse-local`. 这修复 [\#5335](https://github.com/ClickHouse/ClickHouse/issues/5335). [\#8230](https://github.com/ClickHouse/ClickHouse/pull/8230) ([安德烈\*纳戈尔尼](https://github.com/Melancholic)) +- 添加设置 `mutations_sync` 这允许等待 `ALTER UPDATE/DELETE` 同步查询。 [\#8237](https://github.com/ClickHouse/ClickHouse/pull/8237) ([阿利沙平](https://github.com/alesapin)) +- 允许设置相对 `user_files_path` 在 `config.xml` (在类似的方式 `format_schema_path`). [\#7632](https://github.com/ClickHouse/ClickHouse/pull/7632) ([hcz](https://github.com/hczhcz)) +- 为转换函数添加非法类型的异常 `-OrZero` 后缀 [\#7880](https://github.com/ClickHouse/ClickHouse/pull/7880) ([安德烈\*科尼亚耶夫](https://github.com/akonyaev90)) +- 简化在分布式查询中发送到分片的数据头的格式。 [\#8044](https://github.com/ClickHouse/ClickHouse/pull/8044) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- `Live View` 表引擎重构。 [\#8519](https://github.com/ClickHouse/ClickHouse/pull/8519) ([vzakaznikov](https://github.com/vzakaznikov)) +- 为从DDL查询创建的外部字典添加额外的检查。 [\#8127](https://github.com/ClickHouse/ClickHouse/pull/8127) ([阿利沙平](https://github.com/alesapin)) +- 修复错误 `Column ... already exists` 使用时 `FINAL` 和 `SAMPLE` together, e.g. `select count() from table final sample 1/2`. 修复 [\#5186](https://github.com/ClickHouse/ClickHouse/issues/5186). [\#7907](https://github.com/ClickHouse/ClickHouse/pull/7907) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在表的第一个参数 `joinGet` 函数可以是表标识符。 [\#7707](https://github.com/ClickHouse/ClickHouse/pull/7707) ([阿莫斯鸟](https://github.com/amosbird)) +- 允许使用 `MaterializedView` 与上面的子查询 `Kafka` 桌子 [\#8197](https://github.com/ClickHouse/ClickHouse/pull/8197) ([filimonov](https://github.com/filimonov)) +- 现在后台在磁盘之间移动,运行它的seprate线程池。 [\#7670](https://github.com/ClickHouse/ClickHouse/pull/7670) ([Vladimir Chebotarev](https://github.com/excitoon)) +- `SYSTEM RELOAD DICTIONARY` 现在同步执行。 [\#8240](https://github.com/ClickHouse/ClickHouse/pull/8240) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 堆栈跟踪现在显示物理地址(对象文件中的偏移量),而不是虚拟内存地址(加载对象文件的位置)。 这允许使用 `addr2line` 当二进制独立于位置并且ASLR处于活动状态时。 这修复 [\#8360](https://github.com/ClickHouse/ClickHouse/issues/8360). [\#8387](https://github.com/ClickHouse/ClickHouse/pull/8387) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 支持行级安全筛选器的新语法: `
`. 修复 [\#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [\#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7)) +- 现在 `cityHash` 功能可以与工作 `Decimal` 和 `UUID` 类型。 修复 [\#5184](https://github.com/ClickHouse/ClickHouse/issues/5184). [\#7693](https://github.com/ClickHouse/ClickHouse/pull/7693) ([米哈伊尔\*科罗托夫](https://github.com/millb)) +- 从系统日志中删除了固定的索引粒度(它是1024),因为它在实现自适应粒度之后已经过时。 [\#7698](https://github.com/ClickHouse/ClickHouse/pull/7698) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 当ClickHouse在没有SSL的情况下编译时,启用MySQL兼容服务器。 [\#7852](https://github.com/ClickHouse/ClickHouse/pull/7852) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 现在服务器校验和分布式批处理,这在批处理中损坏数据的情况下提供了更多详细的错误。 [\#7914](https://github.com/ClickHouse/ClickHouse/pull/7914) ([Azat Khuzhin](https://github.com/azat)) +- 碌莽禄Support: `DROP DATABASE`, `DETACH TABLE`, `DROP TABLE` 和 `ATTACH TABLE` 为 `MySQL` 数据库引擎。 [\#8202](https://github.com/ClickHouse/ClickHouse/pull/8202) ([张冬](https://github.com/zhang2014)) +- 在S3表功能和表引擎中添加身份验证。 [\#7623](https://github.com/ClickHouse/ClickHouse/pull/7623) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 增加了检查额外的部分 `MergeTree` 在不同的磁盘上,为了不允许错过未定义磁盘上的数据部分。 [\#8118](https://github.com/ClickHouse/ClickHouse/pull/8118) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 启用Mac客户端和服务器的SSL支持。 [\#8297](https://github.com/ClickHouse/ClickHouse/pull/8297) ([伊万](https://github.com/abyss7)) +- 现在ClickHouse可以作为MySQL联合服务器(参见https://dev.mysql.com/doc/refman/5.7/en/federated-create-server.html)。 [\#7717](https://github.com/ClickHouse/ClickHouse/pull/7717) ([Maxim Fedotov](https://github.com/MaxFedotov)) +- `clickhouse-client` 现在只能启用 `bracketed-paste` 当多查询处于打开状态且多行处于关闭状态时。 这修复(#7757)\[https://github.com/ClickHouse/ClickHouse/issues/7757。 [\#7761](https://github.com/ClickHouse/ClickHouse/pull/7761) ([阿莫斯鸟](https://github.com/amosbird)) +- 碌莽禄Support: `Array(Decimal)` 在 `if` 功能。 [\#7721](https://github.com/ClickHouse/ClickHouse/pull/7721) ([Artem Zuikov](https://github.com/4ertus2)) +- 支持小数 `arrayDifference`, `arrayCumSum` 和 `arrayCumSumNegative` 功能。 [\#7724](https://github.com/ClickHouse/ClickHouse/pull/7724) ([Artem Zuikov](https://github.com/4ertus2)) +- 已添加 `lifetime` 列到 `system.dictionaries` 桌子 [\#6820](https://github.com/ClickHouse/ClickHouse/issues/6820) [\#7727](https://github.com/ClickHouse/ClickHouse/pull/7727) ([kekekekule](https://github.com/kekekekule)) +- 改进了检查不同磁盘上的现有部件 `*MergeTree` 表引擎. 地址 [\#7660](https://github.com/ClickHouse/ClickHouse/issues/7660). [\#8440](https://github.com/ClickHouse/ClickHouse/pull/8440) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 集成与 `AWS SDK` 为 `S3` 交互允许使用开箱即用的所有S3功能。 [\#8011](https://github.com/ClickHouse/ClickHouse/pull/8011) ([帕维尔\*科瓦连科](https://github.com/Jokser)) +- 增加了对子查询的支持 `Live View` 桌子 [\#7792](https://github.com/ClickHouse/ClickHouse/pull/7792) ([vzakaznikov](https://github.com/vzakaznikov)) +- 检查使用 `Date` 或 `DateTime` 从列 `TTL` 表达式已删除。 [\#7920](https://github.com/ClickHouse/ClickHouse/pull/7920) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 有关磁盘的信息已添加到 `system.detached_parts` 桌子 [\#7833](https://github.com/ClickHouse/ClickHouse/pull/7833) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 现在设置 `max_(table|partition)_size_to_drop` 无需重新启动即可更改。 [\#7779](https://github.com/ClickHouse/ClickHouse/pull/7779) ([Grigory Pervakov](https://github.com/GrigoryPervakov)) +- 错误消息的可用性略好。 要求用户不要删除下面的行 `Stack trace:`. [\#7897](https://github.com/ClickHouse/ClickHouse/pull/7897) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更好地阅读消息 `Kafka` 引擎在各种格式后 [\#7935](https://github.com/ClickHouse/ClickHouse/issues/7935). [\#8035](https://github.com/ClickHouse/ClickHouse/pull/8035) ([伊万](https://github.com/abyss7)) +- 与不支持MySQL客户端更好的兼容性 `sha2_password` 验证插件。 [\#8036](https://github.com/ClickHouse/ClickHouse/pull/8036) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 支持MySQL兼容性服务器中的更多列类型。 [\#7975](https://github.com/ClickHouse/ClickHouse/pull/7975) ([尤里\*巴拉诺夫](https://github.com/yurriy)) +- 执行 `ORDER BY` 优化 `Merge`, `Buffer` 和 `Materilized View` 存储与底层 `MergeTree` 桌子 [\#8130](https://github.com/ClickHouse/ClickHouse/pull/8130) ([安东\*波波夫](https://github.com/CurtizJ)) +- 现在我们总是使用POSIX实现 `getrandom` 与旧内核更好的兼容性(\<3.17)。 [\#7940](https://github.com/ClickHouse/ClickHouse/pull/7940) ([阿莫斯鸟](https://github.com/amosbird)) +- 更好地检查移动ttl规则中的有效目标。 [\#8410](https://github.com/ClickHouse/ClickHouse/pull/8410) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 更好地检查损坏的刀片批次 `Distributed` 表引擎。 [\#7933](https://github.com/ClickHouse/ClickHouse/pull/7933) ([Azat Khuzhin](https://github.com/azat)) +- 添加带有部件名称数组的列,这些部件将来必须处理突变 `system.mutations` 桌子 [\#8179](https://github.com/ClickHouse/ClickHouse/pull/8179) ([阿利沙平](https://github.com/alesapin)) +- 处理器的并行合并排序优化。 [\#8552](https://github.com/ClickHouse/ClickHouse/pull/8552) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 设置 `mark_cache_min_lifetime` 现在已经过时了,什么也不做。 在以前的版本中,标记缓存可以在内存中增长大于 `mark_cache_size` 以容纳内的数据 `mark_cache_min_lifetime` 秒。 这导致了混乱和比预期更高的内存使用率,这在内存受限的系统上尤其糟糕。 如果您在安装此版本后会看到性能下降,则应增加 `mark_cache_size`. [\#8484](https://github.com/ClickHouse/ClickHouse/pull/8484) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 准备使用 `tid` 到处都是 这是必要的 [\#7477](https://github.com/ClickHouse/ClickHouse/issues/7477). [\#8276](https://github.com/ClickHouse/ClickHouse/pull/8276) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +#### 性能改进 {#performance-improvement-1} + +- 处理器管道中的性能优化。 [\#7988](https://github.com/ClickHouse/ClickHouse/pull/7988) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 缓存字典中过期密钥的非阻塞更新(具有读取旧密钥的权限)。 [\#8303](https://github.com/ClickHouse/ClickHouse/pull/8303) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 没有编译ClickHouse `-fno-omit-frame-pointer` 在全球范围内多余一个寄存器。 [\#8097](https://github.com/ClickHouse/ClickHouse/pull/8097) ([阿莫斯鸟](https://github.com/amosbird)) +- 加速 `greatCircleDistance` 功能,并为它添加性能测试。 [\#7307](https://github.com/ClickHouse/ClickHouse/pull/7307) ([Olga Khvostikova](https://github.com/stavrolia)) +- 改进的功能性能 `roundDown`. [\#8465](https://github.com/ClickHouse/ClickHouse/pull/8465) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改进的性能 `max`, `min`, `argMin`, `argMax` 为 `DateTime64` 数据类型。 [\#8199](https://github.com/ClickHouse/ClickHouse/pull/8199) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 改进了无限制或大限制和外部排序的排序性能。 [\#8545](https://github.com/ClickHouse/ClickHouse/pull/8545) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改进的性能格式化浮点数高达6倍。 [\#8542](https://github.com/ClickHouse/ClickHouse/pull/8542) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 改进的性能 `modulo` 功能。 [\#7750](https://github.com/ClickHouse/ClickHouse/pull/7750) ([阿莫斯鸟](https://github.com/amosbird)) +- 优化 `ORDER BY` 并与单列键合并。 [\#8335](https://github.com/ClickHouse/ClickHouse/pull/8335) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更好地实施 `arrayReduce`, `-Array` 和 `-State` 组合子 [\#7710](https://github.com/ClickHouse/ClickHouse/pull/7710) ([阿莫斯鸟](https://github.com/amosbird)) +- 现在 `PREWHERE` 应优化为至少一样高效 `WHERE`. [\#7769](https://github.com/ClickHouse/ClickHouse/pull/7769) ([阿莫斯鸟](https://github.com/amosbird)) +- 改进方式 `round` 和 `roundBankers` 处理负数。 [\#8229](https://github.com/ClickHouse/ClickHouse/pull/8229) ([hcz](https://github.com/hczhcz)) +- 改进的解码性能 `DoubleDelta` 和 `Gorilla` 编解码器大约30-40%。 这修复 [\#7082](https://github.com/ClickHouse/ClickHouse/issues/7082). [\#8019](https://github.com/ClickHouse/ClickHouse/pull/8019) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 改进的性能 `base64` 相关功能。 [\#8444](https://github.com/ClickHouse/ClickHouse/pull/8444) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 增加了一个功能 `geoDistance`. 它类似于 `greatCircleDistance` 但使用近似于WGS-84椭球模型。 两个功能的性能几乎相同。 [\#8086](https://github.com/ClickHouse/ClickHouse/pull/8086) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更快 `min` 和 `max` 聚合函数 `Decimal` 数据类型。 [\#8144](https://github.com/ClickHouse/ClickHouse/pull/8144) ([Artem Zuikov](https://github.com/4ertus2)) +- 矢量化处理 `arrayReduce`. [\#7608](https://github.com/ClickHouse/ClickHouse/pull/7608) ([阿莫斯鸟](https://github.com/amosbird)) +- `if` 链现在优化为 `multiIf`. [\#8355](https://github.com/ClickHouse/ClickHouse/pull/8355) ([kamalov-ruslan](https://github.com/kamalov-ruslan)) +- 修复性能回归 `Kafka` 表引擎在19.15中引入。 这修复 [\#7261](https://github.com/ClickHouse/ClickHouse/issues/7261). [\#7935](https://github.com/ClickHouse/ClickHouse/pull/7935) ([filimonov](https://github.com/filimonov)) +- 已删除 “pie” 代码生成 `gcc` 从Debian软件包偶尔带来默认情况下。 [\#8483](https://github.com/ClickHouse/ClickHouse/pull/8483) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 并行解析数据格式 [\#6553](https://github.com/ClickHouse/ClickHouse/pull/6553) ([尼基塔\*米哈伊洛夫](https://github.com/nikitamikhaylov)) +- 启用优化的解析器 `Values` 默认使用表达式 (`input_format_values_deduce_templates_of_expressions=1`). [\#8231](https://github.com/ClickHouse/ClickHouse/pull/8231) ([tavplubix](https://github.com/tavplubix)) + +#### 构建/测试/包装改进 {#buildtestingpackaging-improvement-2} + +- 构建修复 `ARM` 而在最小模式。 [\#8304](https://github.com/ClickHouse/ClickHouse/pull/8304) ([proller](https://github.com/proller)) +- 添加复盖文件刷新 `clickhouse-server` 当不调用std::atexit时。 还略微改进了无状态测试的复盖率日志记录。 [\#8267](https://github.com/ClickHouse/ClickHouse/pull/8267) ([阿利沙平](https://github.com/alesapin)) +- 更新contrib中的LLVM库。 避免从操作系统包中使用LLVM。 [\#8258](https://github.com/ClickHouse/ClickHouse/pull/8258) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使bund绑 `curl` 建立完全安静。 [\#8232](https://github.com/ClickHouse/ClickHouse/pull/8232) [\#8203](https://github.com/ClickHouse/ClickHouse/pull/8203) ([帕维尔\*科瓦连科](https://github.com/Jokser)) +- 修复一些 `MemorySanitizer` 警告。 [\#8235](https://github.com/ClickHouse/ClickHouse/pull/8235) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 使用 `add_warning` 和 `no_warning` 宏 `CMakeLists.txt`. [\#8604](https://github.com/ClickHouse/ClickHouse/pull/8604) ([伊万](https://github.com/abyss7)) +- 添加对Minio S3兼容对象的支持(https://min.io/)为了更好的集成测试。 [\#7863](https://github.com/ClickHouse/ClickHouse/pull/7863) [\#7875](https://github.com/ClickHouse/ClickHouse/pull/7875) ([帕维尔\*科瓦连科](https://github.com/Jokser)) +- 导入 `libc` 标题到contrib。 它允许在各种系统中使构建更加一致(仅适用于 `x86_64-linux-gnu`). [\#5773](https://github.com/ClickHouse/ClickHouse/pull/5773) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除 `-fPIC` 从一些图书馆。 [\#8464](https://github.com/ClickHouse/ClickHouse/pull/8464) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 清洁 `CMakeLists.txt` 对于卷曲。 看https://github.com/ClickHouse/ClickHouse/pull/8011\#issuecomment-569478910 [\#8459](https://github.com/ClickHouse/ClickHouse/pull/8459) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 无声警告 `CapNProto` 图书馆. [\#8220](https://github.com/ClickHouse/ClickHouse/pull/8220) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 为短字符串优化哈希表添加性能测试。 [\#7679](https://github.com/ClickHouse/ClickHouse/pull/7679) ([阿莫斯鸟](https://github.com/amosbird)) +- 现在ClickHouse将建立在 `AArch64` 即使 `MADV_FREE` 不可用。 这修复 [\#8027](https://github.com/ClickHouse/ClickHouse/issues/8027). [\#8243](https://github.com/ClickHouse/ClickHouse/pull/8243) ([阿莫斯鸟](https://github.com/amosbird)) +- 更新 `zlib-ng` 来解决记忆消毒的问题 [\#7182](https://github.com/ClickHouse/ClickHouse/pull/7182) [\#8206](https://github.com/ClickHouse/ClickHouse/pull/8206) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 在非Linux系统上启用内部MySQL库,因为操作系统包的使用非常脆弱,通常根本不起作用。 这修复 [\#5765](https://github.com/ClickHouse/ClickHouse/issues/5765). [\#8426](https://github.com/ClickHouse/ClickHouse/pull/8426) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复了启用后在某些系统上构建的问题 `libc++`. 这取代了 [\#8374](https://github.com/ClickHouse/ClickHouse/issues/8374). [\#8380](https://github.com/ClickHouse/ClickHouse/pull/8380) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 赂眉露\>\> `Field` 方法更类型安全,以找到更多的错误。 [\#7386](https://github.com/ClickHouse/ClickHouse/pull/7386) [\#8209](https://github.com/ClickHouse/ClickHouse/pull/8209) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 添加丢失的文件到 `libc-headers` 子模块。 [\#8507](https://github.com/ClickHouse/ClickHouse/pull/8507) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复错误 `JSON` 引用性能测试输出。 [\#8497](https://github.com/ClickHouse/ClickHouse/pull/8497) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 现在堆栈跟踪显示 `std::exception` 和 `Poco::Exception`. 在以前的版本中,它仅适用于 `DB::Exception`. 这改进了诊断。 [\#8501](https://github.com/ClickHouse/ClickHouse/pull/8501) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 移植 `clock_gettime` 和 `clock_nanosleep` 对于新鲜的glibc版本。 [\#8054](https://github.com/ClickHouse/ClickHouse/pull/8054) ([阿莫斯鸟](https://github.com/amosbird)) +- 启用 `part_log` 在示例配置开发人员。 [\#8609](https://github.com/ClickHouse/ClickHouse/pull/8609) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复重新加载的异步性质 `01036_no_superfluous_dict_reload_on_create_database*`. [\#8111](https://github.com/ClickHouse/ClickHouse/pull/8111) ([Azat Khuzhin](https://github.com/azat)) +- 固定编解码器性能测试。 [\#8615](https://github.com/ClickHouse/ClickHouse/pull/8615) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 添加安装脚本 `.tgz` 为他们构建和文档。 [\#8612](https://github.com/ClickHouse/ClickHouse/pull/8612) [\#8591](https://github.com/ClickHouse/ClickHouse/pull/8591) ([阿利沙平](https://github.com/alesapin)) +- 删除旧 `ZSTD` 测试(它是在2016年创建的,以重现zstd1.0版本之前的错误)。 这修复 [\#8618](https://github.com/ClickHouse/ClickHouse/issues/8618). [\#8619](https://github.com/ClickHouse/ClickHouse/pull/8619) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 固定构建在Mac OS卡特琳娜。 [\#8600](https://github.com/ClickHouse/ClickHouse/pull/8600) ([meo](https://github.com/meob)) +- 增加编解码器性能测试中的行数,以使结果显着。 [\#8574](https://github.com/ClickHouse/ClickHouse/pull/8574) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- 在调试版本中,处理 `LOGICAL_ERROR` 异常作为断言失败,使得它们更容易被注意到。 [\#8475](https://github.com/ClickHouse/ClickHouse/pull/8475) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 使与格式相关的性能测试更具确定性。 [\#8477](https://github.com/ClickHouse/ClickHouse/pull/8477) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新 `lz4` 来修复记忆消毒器的故障 [\#8181](https://github.com/ClickHouse/ClickHouse/pull/8181) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 在异常处理中抑制已知MemorySanitizer误报。 [\#8182](https://github.com/ClickHouse/ClickHouse/pull/8182) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 更新 `gcc` 和 `g++` 到版本9在 `build/docker/build.sh` [\#7766](https://github.com/ClickHouse/ClickHouse/pull/7766) ([TLightSky](https://github.com/tlightsky)) +- 添加性能测试用例来测试 `PREWHERE` 比 `WHERE`. [\#7768](https://github.com/ClickHouse/ClickHouse/pull/7768) ([阿莫斯鸟](https://github.com/amosbird)) +- 在修复一个笨拙的测试方面取得了进展。 [\#8621](https://github.com/ClickHouse/ClickHouse/pull/8621) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 避免从MemorySanitizer报告数据 `libunwind`. [\#8539](https://github.com/ClickHouse/ClickHouse/pull/8539) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 更新 `libc++` 到最新版本。 [\#8324](https://github.com/ClickHouse/ClickHouse/pull/8324) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从源头构建ICU库。 这修复 [\#6460](https://github.com/ClickHouse/ClickHouse/issues/6460). [\#8219](https://github.com/ClickHouse/ClickHouse/pull/8219) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 从切换 `libressl` 到 `openssl`. ClickHouse应在此更改后支持TLS1.3和SNI。 这修复 [\#8171](https://github.com/ClickHouse/ClickHouse/issues/8171). [\#8218](https://github.com/ClickHouse/ClickHouse/pull/8218) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用时固定的UBSan报告 `chacha20_poly1305` 从SSL(发生在连接到https://yandex.ru/)。 [\#8214](https://github.com/ClickHouse/ClickHouse/pull/8214) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复默认密码文件的模式 `.deb` linux发行版。 [\#8075](https://github.com/ClickHouse/ClickHouse/pull/8075) ([proller](https://github.com/proller)) +- 改进的表达式获取 `clickhouse-server` PID输入 `clickhouse-test`. [\#8063](https://github.com/ClickHouse/ClickHouse/pull/8063) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) +- 更新contrib/googletest到v1.10.0。 [\#8587](https://github.com/ClickHouse/ClickHouse/pull/8587) ([Alexander Burmak](https://github.com/Alex-Burmak)) +- 修复了ThreadSaninitizer报告 `base64` 图书馆. 还将此库更新到最新版本,但无关紧要。 这修复 [\#8397](https://github.com/ClickHouse/ClickHouse/issues/8397). [\#8403](https://github.com/ClickHouse/ClickHouse/pull/8403) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 修复 `00600_replace_running_query` 对于处理器。 [\#8272](https://github.com/ClickHouse/ClickHouse/pull/8272) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 删除支持 `tcmalloc` 为了使 `CMakeLists.txt` 更简单 [\#8310](https://github.com/ClickHouse/ClickHouse/pull/8310) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 发布海湾合作委员会构建现在使用 `libc++` 而不是 `libstdc++`. 最近 `libc++` 只与叮当一起使用。 这将提高构建配置的一致性和可移植性。 [\#8311](https://github.com/ClickHouse/ClickHouse/pull/8311) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 使用MemorySanitizer启用ICU库进行构建。 [\#8222](https://github.com/ClickHouse/ClickHouse/pull/8222) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 禁止从警告 `CapNProto` 图书馆. [\#8224](https://github.com/ClickHouse/ClickHouse/pull/8224) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 删除代码的特殊情况 `tcmalloc`,因为它不再受支持。 [\#8225](https://github.com/ClickHouse/ClickHouse/pull/8225) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在CI coverage任务中,优雅地终止服务器以允许它保存coverage报告。 这修复了我们最近看到的不完整的复盖率报告。 [\#8142](https://github.com/ClickHouse/ClickHouse/pull/8142) ([阿利沙平](https://github.com/alesapin)) +- 针对所有编解码器的性能测试 `Float64` 和 `UInt64` 值。 [\#8349](https://github.com/ClickHouse/ClickHouse/pull/8349) ([瓦西里\*内姆科夫](https://github.com/Enmk)) +- `termcap` 非常不推荐使用,并导致各种问题(f.g.missing “up” 帽和呼应 `^J` 而不是多行)。 帮个忙 `terminfo` 或bund绑 `ncurses`. [\#7737](https://github.com/ClickHouse/ClickHouse/pull/7737) ([阿莫斯鸟](https://github.com/amosbird)) +- 修复 `test_storage_s3` 集成测试。 [\#7734](https://github.com/ClickHouse/ClickHouse/pull/7734) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 碌莽禄Support: `StorageFile(, null)` 将块插入给定格式的文件而不实际写入磁盘。 这是性能测试所必需的。 [\#8455](https://github.com/ClickHouse/ClickHouse/pull/8455) ([阿莫斯鸟](https://github.com/amosbird)) +- 添加参数 `--print-time` 功能测试打印每个测试的执行时间。 [\#8001](https://github.com/ClickHouse/ClickHouse/pull/8001) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 添加断言 `KeyCondition` 同时评估RPN。 这将修复来自gcc-9的警告。 [\#8279](https://github.com/ClickHouse/ClickHouse/pull/8279) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 在CI构建中转储cmake选项。 [\#8273](https://github.com/ClickHouse/ClickHouse/pull/8273) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 不要为某些fat库生成调试信息。 [\#8271](https://github.com/ClickHouse/ClickHouse/pull/8271) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 赂眉露\>\> `log_to_console.xml` 始终登录到stderr,无论它是否交互。 [\#8395](https://github.com/ClickHouse/ClickHouse/pull/8395) ([Alexander Kuzmenkov](https://github.com/akuzm)) +- 删除了一些未使用的功能 `clickhouse-performance-test` 工具 [\#8555](https://github.com/ClickHouse/ClickHouse/pull/8555) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 现在我们也将搜索 `lld-X` 与相应的 `clang-X` 版本。 [\#8092](https://github.com/ClickHouse/ClickHouse/pull/8092) ([阿利沙平](https://github.com/alesapin)) +- 实木复合地板建设改善。 [\#8421](https://github.com/ClickHouse/ClickHouse/pull/8421) ([马苏兰](https://github.com/maxulan)) +- 更多海湾合作委员会警告 [\#8221](https://github.com/ClickHouse/ClickHouse/pull/8221) ([kreuzerkrieg](https://github.com/kreuzerkrieg)) +- Arch Linux的软件包现在允许运行ClickHouse服务器,而不仅仅是客户端。 [\#8534](https://github.com/ClickHouse/ClickHouse/pull/8534) ([Vladimir Chebotarev](https://github.com/excitoon)) +- 修复与处理器的测试。 微小的性能修复。 [\#7672](https://github.com/ClickHouse/ClickHouse/pull/7672) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) +- 更新contrib/protobuf。 [\#8256](https://github.com/ClickHouse/ClickHouse/pull/8256) ([Matwey V.Kornilov](https://github.com/matwey)) +- 在准备切换到c++20作为新年庆祝活动。 “May the C++ force be with ClickHouse.” [\#8447](https://github.com/ClickHouse/ClickHouse/pull/8447) ([阿莫斯鸟](https://github.com/amosbird)) + +#### 实验特点 {#experimental-feature-1} + +- 增加了实验设置 `min_bytes_to_use_mmap_io`. 它允许读取大文件,而无需将数据从内核复制到用户空间。 默认情况下禁用该设置。 建议的阈值大约是64MB,因为mmap/munmap很慢。 [\#8520](https://github.com/ClickHouse/ClickHouse/pull/8520) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) +- 返工配额作为访问控制系统的一部分。 增加了新表 `system.quotas`,新功能 `currentQuota`, `currentQuotaKey`,新的SQL语法 `CREATE QUOTA`, `ALTER QUOTA`, `DROP QUOTA`, `SHOW QUOTA`. [\#7257](https://github.com/ClickHouse/ClickHouse/pull/7257) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 允许跳过带有警告的未知设置,而不是引发异常。 [\#7653](https://github.com/ClickHouse/ClickHouse/pull/7653) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) +- 重新设计的行策略作为访问控制系统的一部分。 增加了新表 `system.row_policies`,新功能 `currentRowPolicies()`,新的SQL语法 `CREATE POLICY`, `ALTER POLICY`, `DROP POLICY`, `SHOW CREATE POLICY`, `SHOW POLICIES`. [\#7808](https://github.com/ClickHouse/ClickHouse/pull/7808) ([维塔利\*巴拉诺夫](https://github.com/vitlibar)) + +#### 安全修复 {#security-fix} + +- 修正了读取目录结构中的表的可能性 `File` 表引擎。 这修复 [\#8536](https://github.com/ClickHouse/ClickHouse/issues/8536). [\#8537](https://github.com/ClickHouse/ClickHouse/pull/8537) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) + +## [更新日志2019](https://github.com/ClickHouse/ClickHouse/blob/master/docs/en/changelog/2019.md) {#changelog-for-2019} diff --git a/docs/zh/whats_new/index.md b/docs/zh/whats_new/index.md new file mode 100644 index 00000000000..75a13a72bac --- /dev/null +++ b/docs/zh/whats_new/index.md @@ -0,0 +1,8 @@ +--- +machine_translated: true +machine_translated_rev: b111334d6614a02564cf32f379679e9ff970d9b1 +toc_folder_title: "\u65B0\u589E\u5185\u5BB9" +toc_priority: 72 +--- + + diff --git a/docs/zh/roadmap.md b/docs/zh/whats_new/roadmap.md similarity index 99% rename from docs/zh/roadmap.md rename to docs/zh/whats_new/roadmap.md index 49532c046f5..47e09bc3c78 100644 --- a/docs/zh/roadmap.md +++ b/docs/zh/whats_new/roadmap.md @@ -1,3 +1,4 @@ + # 规划 {#gui-hua} ## Q1 2020 {#q1-2020} diff --git a/docs/zh/security_changelog.md b/docs/zh/whats_new/security_changelog.md similarity index 99% rename from docs/zh/security_changelog.md rename to docs/zh/whats_new/security_changelog.md index e35d6a7c632..6315398371f 100644 --- a/docs/zh/security_changelog.md +++ b/docs/zh/whats_new/security_changelog.md @@ -1,3 +1,4 @@ + ## 修复于 ClickHouse Release 18.12.13, 2018-09-10 {#xiu-fu-yu-clickhouse-release-18-12-13-2018-09-10} ### CVE-2018-14672 {#cve-2018-14672} diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index f7141449f54..8fffbec4fab 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -67,16 +67,19 @@ void Suggest::load(const ConnectionParameters & connection_parameters, size_t su Suggest::Suggest() { /// Keywords may be not up to date with ClickHouse parser. - words = {"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", - "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", - "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", - "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", - "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", - "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", - "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", - "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", - "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", - "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE"}; + words = {"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", + "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", + "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", + "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", + "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", + "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", + "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", + "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", + "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", + "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", + "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", + "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "FOR", "RANDOMIZED", + "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP"}; } void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) diff --git a/programs/server/MySQLHandler.cpp b/programs/server/MySQLHandler.cpp index 3e1432dbfce..b72aa8104d3 100644 --- a/programs/server/MySQLHandler.cpp +++ b/programs/server/MySQLHandler.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #if USE_POCO_NETSSL #include @@ -268,7 +269,8 @@ void MySQLHandler::comPing() packet_sender->sendPacket(OK_Packet(0x0, client_capability_flags, 0, 0, 0), true); } -static bool isFederatedServerSetupCommand(const String & query); +static bool isFederatedServerSetupSetCommand(const String & query); +static bool isFederatedServerSetupSelectVarCommand(const String & query); void MySQLHandler::comQuery(ReadBuffer & payload) { @@ -276,7 +278,7 @@ void MySQLHandler::comQuery(ReadBuffer & payload) // This is a workaround in order to support adding ClickHouse to MySQL using federated server. // As Clickhouse doesn't support these statements, we just send OK packet in response. - if (isFederatedServerSetupCommand(query)) + if (isFederatedServerSetupSetCommand(query)) { packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); } @@ -288,10 +290,11 @@ void MySQLHandler::comQuery(ReadBuffer & payload) // Translate query from MySQL to ClickHouse. // This is a temporary workaround until ClickHouse supports the syntax "@@var_name". - if (query == "select @@version_comment limit 1") // MariaDB client starts session with that query + if (isFederatedServerSetupSelectVarCommand(query)) { should_replace = true; } + // This is a workaround in order to support adding ClickHouse to MySQL using federated server. if (0 == strncasecmp("SHOW TABLE STATUS LIKE", query.c_str(), 22)) { @@ -358,11 +361,27 @@ void MySQLHandlerSSL::finishHandshakeSSL(size_t packet_size, char * buf, size_t #endif -static bool isFederatedServerSetupCommand(const String & query) +static bool isFederatedServerSetupSetCommand(const String & query) { - return 0 == strncasecmp("SET NAMES", query.c_str(), 9) || 0 == strncasecmp("SET character_set_results", query.c_str(), 25) - || 0 == strncasecmp("SET FOREIGN_KEY_CHECKS", query.c_str(), 22) || 0 == strncasecmp("SET AUTOCOMMIT", query.c_str(), 14) - || 0 == strncasecmp("SET SESSION TRANSACTION ISOLATION LEVEL", query.c_str(), 39); + static const std::regex expr{ + "(^(SET NAMES(.*)))" + "|(^(SET character_set_results(.*)))" + "|(^(SET FOREIGN_KEY_CHECKS(.*)))" + "|(^(SET AUTOCOMMIT(.*)))" + "|(^(SET sql_mode(.*)))" + "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))" + , std::regex::icase}; + return 1 == std::regex_match(query, expr); +} + +static bool isFederatedServerSetupSelectVarCommand(const String & query) +{ + static const std::regex expr{ + "|(^(SELECT @@(.*)))" + "|(^((/\\*(.*)\\*/)([ \t]*)(SELECT([ \t]*)@@(.*))))" + "|(^((/\\*(.*)\\*/)([ \t]*)(SHOW VARIABLES(.*))))" + , std::regex::icase}; + return 1 == std::regex_match(query, expr); } const String MySQLHandler::show_table_status_replacement_query("SELECT" diff --git a/programs/server/config.d/path.xml b/programs/server/config.d/path.xml index 14b7deb9de0..8db1d18e8c7 100644 --- a/programs/server/config.d/path.xml +++ b/programs/server/config.d/path.xml @@ -3,4 +3,5 @@ ./tmp/ ./user_files/ ./format_schemas/ + ./access/ diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 14775f7a4de..cf788a0a63e 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -408,9 +408,10 @@ boost::shared_ptr ContextAccess::calculateResultAccess(bool static const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; static const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; static const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; + static const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; if (readonly_) - merged_access->revoke(write_table_access | table_and_dictionary_ddl | AccessType::SYSTEM | AccessType::KILL_QUERY | AccessType::ACCESS_MANAGEMENT); + merged_access->revoke(write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY); if (readonly_ == 1) { diff --git a/src/Access/EnabledRowPolicies.cpp b/src/Access/EnabledRowPolicies.cpp index a525fb65606..56c73aaf40d 100644 --- a/src/Access/EnabledRowPolicies.cpp +++ b/src/Access/EnabledRowPolicies.cpp @@ -1,7 +1,5 @@ #include -#include -#include -#include +#include #include #include @@ -35,19 +33,17 @@ ASTPtr EnabledRowPolicies::getCondition(const String & database, const String & ASTPtr EnabledRowPolicies::getCondition(const String & database, const String & table_name, ConditionType type, const ASTPtr & extra_condition) const { - ASTPtr main_condition = getCondition(database, table_name, type); - if (!main_condition) - return extra_condition; - if (!extra_condition) - return main_condition; - auto function = std::make_shared(); - auto exp_list = std::make_shared(); - function->name = "and"; - function->arguments = exp_list; - function->children.push_back(exp_list); - exp_list->children.push_back(main_condition); - exp_list->children.push_back(extra_condition); - return function; + ASTPtr condition = getCondition(database, table_name, type); + if (condition && extra_condition) + condition = makeASTForLogicalAnd({condition, extra_condition}); + else if (!condition) + condition = extra_condition; + + bool value; + if (tryGetLiteralBool(condition.get(), value) && value) + condition = nullptr; /// The condition is always true, no need to check it. + + return condition; } diff --git a/src/Access/RowPolicyCache.cpp b/src/Access/RowPolicyCache.cpp index 9509923adbf..44f2cd160d4 100644 --- a/src/Access/RowPolicyCache.cpp +++ b/src/Access/RowPolicyCache.cpp @@ -1,97 +1,19 @@ #include #include #include -#include -#include #include #include +#include #include #include #include #include -#include -#include namespace DB { namespace { - bool tryGetLiteralBool(const IAST & ast, bool & value) - { - try - { - if (const ASTLiteral * literal = ast.as()) - { - value = !literal->value.isNull() && applyVisitor(FieldVisitorConvertToNumber(), literal->value); - return true; - } - return false; - } - catch (...) - { - return false; - } - } - - ASTPtr applyFunctionAND(ASTs arguments) - { - bool const_arguments = true; - boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool - { - bool b; - if (!tryGetLiteralBool(*argument, b)) - return false; - const_arguments &= b; - return true; - }); - - if (!const_arguments) - return std::make_shared(Field{UInt8(0)}); - if (arguments.empty()) - return std::make_shared(Field{UInt8(1)}); - if (arguments.size() == 1) - return arguments[0]; - - auto function = std::make_shared(); - auto exp_list = std::make_shared(); - function->name = "and"; - function->arguments = exp_list; - function->children.push_back(exp_list); - exp_list->children = std::move(arguments); - return function; - } - - - ASTPtr applyFunctionOR(ASTs arguments) - { - bool const_arguments = false; - boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool - { - bool b; - if (!tryGetLiteralBool(*argument, b)) - return false; - const_arguments |= b; - return true; - }); - - if (const_arguments) - return std::make_shared(Field{UInt8(1)}); - if (arguments.empty()) - return std::make_shared(Field{UInt8(0)}); - if (arguments.size() == 1) - return arguments[0]; - - auto function = std::make_shared(); - auto exp_list = std::make_shared(); - function->name = "or"; - function->arguments = exp_list; - function->children.push_back(exp_list); - exp_list->children = std::move(arguments); - return function; - } - - using ConditionType = RowPolicy::ConditionType; constexpr size_t MAX_CONDITION_TYPE = RowPolicy::MAX_CONDITION_TYPE; @@ -111,10 +33,16 @@ namespace ASTPtr getResult() && { /// Process permissive conditions. - restrictions.push_back(applyFunctionOR(std::move(permissions))); + restrictions.push_back(makeASTForLogicalOr(std::move(permissions))); /// Process restrictive conditions. - return applyFunctionAND(std::move(restrictions)); + auto condition = makeASTForLogicalAnd(std::move(restrictions)); + + bool value; + if (tryGetLiteralBool(condition.get(), value) && value) + condition = nullptr; /// The condition is always true, no need to check it. + + return condition; } private: diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp index 42e133b4561..60766e32361 100644 --- a/src/Common/ThreadFuzzer.cpp +++ b/src/Common/ThreadFuzzer.cpp @@ -8,6 +8,7 @@ #include +#include #include #include @@ -18,8 +19,15 @@ #include + /// We will also wrap some thread synchronization functions to inject sleep/migration before or after. -#if defined(OS_LINUX) +#if defined(OS_LINUX) && !defined(THREAD_SANITIZER) && !defined(MEMORY_SANITIZER) + #define THREAD_FUZZER_WRAP_PTHREAD 1 +#else + #define THREAD_FUZZER_WRAP_PTHREAD 0 +#endif + +#if THREAD_FUZZER_WRAP_PTHREAD # define FOR_EACH_WRAPPED_FUNCTION(M) \ M(int, pthread_mutex_lock, pthread_mutex_t * arg) \ M(int, pthread_mutex_unlock, pthread_mutex_t * arg) @@ -66,7 +74,7 @@ static void initFromEnv(std::atomic & what, const char * name) static std::atomic num_cpus = 0; -#if defined(OS_LINUX) +#if THREAD_FUZZER_WRAP_PTHREAD # define DEFINE_WRAPPER_PARAMS(RET, NAME, ...) \ static std::atomic NAME##_before_yield_probability = 0; \ static std::atomic NAME##_before_migrate_probability = 0; \ @@ -97,7 +105,7 @@ void ThreadFuzzer::initConfiguration() initFromEnv(sleep_probability, "THREAD_FUZZER_SLEEP_PROBABILITY"); initFromEnv(sleep_time_us, "THREAD_FUZZER_SLEEP_TIME_US"); -#if defined(OS_LINUX) +#if THREAD_FUZZER_WRAP_PTHREAD # define INIT_WRAPPER_PARAMS(RET, NAME, ...) \ initFromEnv(NAME##_before_yield_probability, "THREAD_FUZZER_" #NAME "_BEFORE_YIELD_PROBABILITY"); \ initFromEnv(NAME##_before_migrate_probability, "THREAD_FUZZER_" #NAME "_BEFORE_MIGRATE_PROBABILITY"); \ @@ -118,7 +126,7 @@ void ThreadFuzzer::initConfiguration() bool ThreadFuzzer::isEffective() const { -#if defined(OS_LINUX) +#if THREAD_FUZZER_WRAP_PTHREAD # define CHECK_WRAPPER_PARAMS(RET, NAME, ...) \ if (NAME##_before_yield_probability.load(std::memory_order_relaxed)) \ return true; \ @@ -236,7 +244,7 @@ void ThreadFuzzer::setup() /// We expect that for every function like pthread_mutex_lock there is the same function with two underscores prefix. /// NOTE We cannot use dlsym(... RTLD_NEXT), because it will call pthread_mutex_lock and it will lead to infinite recursion. -#if defined(OS_LINUX) +#if THREAD_FUZZER_WRAP_PTHREAD # define MAKE_WRAPPER(RET, NAME, ...) \ extern "C" RET __##NAME(__VA_ARGS__); /* NOLINT */ \ extern "C" RET NAME(__VA_ARGS__) /* NOLINT */ \ diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index 31e7c2eb612..71ddbbd92ea 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -46,7 +46,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob char point; std::istringstream iss_range(buffer); iss_range >> range_begin >> point >> point >> range_end; - assert(iss_range.good()); + assert(!iss_range.fail()); bool leading_zeros = buffer[0] == '0'; size_t num_len = std::to_string(range_end).size(); if (leading_zeros) diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 40e5682565d..f1cea04dc29 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ #if USE_MYSQL #include +#include #endif @@ -49,6 +51,15 @@ DatabasePtr DatabaseFactory::get( } } +template +static inline ValueType safeGetLiteralValue(const ASTPtr &ast, const String &engine_name) +{ + if (!ast || !ast->as()) + throw Exception("Database engine " + engine_name + " requested literal argument.", ErrorCodes::BAD_ARGUMENTS); + + return ast->as()->value.safeGet(); +} + DatabasePtr DatabaseFactory::getImpl( const String & database_name, const String & metadata_path, const ASTStorage * engine_define, Context & context) { @@ -79,11 +90,14 @@ DatabasePtr DatabaseFactory::getImpl( throw Exception("MySQL Database require mysql_hostname, mysql_database_name, mysql_username, mysql_password arguments.", ErrorCodes::BAD_ARGUMENTS); - const auto & arguments = engine->arguments->children; - const auto & host_name_and_port = arguments[0]->as()->value.safeGet(); - const auto & database_name_in_mysql = arguments[1]->as()->value.safeGet(); - const auto & mysql_user_name = arguments[2]->as()->value.safeGet(); - const auto & mysql_user_password = arguments[3]->as()->value.safeGet(); + + ASTs & arguments = engine->arguments->children; + arguments[1] = evaluateConstantExpressionOrIdentifierAsLiteral(arguments[1], context); + + const auto & host_name_and_port = safeGetLiteralValue(arguments[0], "MySQL"); + const auto & database_name_in_mysql = safeGetLiteralValue(arguments[1], "MySQL"); + const auto & mysql_user_name = safeGetLiteralValue(arguments[2], "MySQL"); + const auto & mysql_user_password = safeGetLiteralValue(arguments[3], "MySQL"); try { @@ -114,7 +128,7 @@ DatabasePtr DatabaseFactory::getImpl( const auto & arguments = engine->arguments->children; - const auto cache_expiration_time_seconds = arguments[0]->as()->value.safeGet(); + const auto cache_expiration_time_seconds = safeGetLiteralValue(arguments[0], "Lazy"); return std::make_shared(database_name, metadata_path, cache_expiration_time_seconds, context); } diff --git a/src/Functions/FunctionHelpers.h b/src/Functions/FunctionHelpers.h index 00957935448..34aa0add6e1 100644 --- a/src/Functions/FunctionHelpers.h +++ b/src/Functions/FunctionHelpers.h @@ -24,6 +24,12 @@ const Type * checkAndGetDataType(const IDataType * data_type) return typeid_cast(data_type); } +template +bool checkDataTypes(const IDataType * data_type) +{ + return (... || typeid_cast(data_type)); +} + template const ColumnConst * checkAndGetColumnConst(const IColumn * column) { diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 100737b43c7..d201b967fb1 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1496,10 +1496,12 @@ struct ToStringMonotonicity IFunction::Monotonicity positive(true, true); IFunction::Monotonicity not_monotonic; - /// `toString` function is monotonous if the argument is Date or DateTime, or non-negative numbers with the same number of symbols. + auto type_ptr = &type; + if (auto * low_cardinality_type = checkAndGetDataType(type_ptr)) + type_ptr = low_cardinality_type->getDictionaryType().get(); - if (checkAndGetDataType(&type) - || typeid_cast(&type)) + /// `toString` function is monotonous if the argument is Date or DateTime or String, or non-negative numbers with the same number of symbols. + if (checkDataTypes(type_ptr)) return positive; if (left.isNull() || right.isNull()) diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 19eb2b42360..3aca41a9c9a 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -108,7 +108,7 @@ namespace S3 /// Case when bucket name represented in domain name of S3 URL. /// E.g. (https://bucket-name.s3.Region.amazonaws.com/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access - static const RE2 virtual_hosted_style_pattern("(.+\\.)?s3[.\\-][a-z0-9\\-.]+"); + static const RE2 virtual_hosted_style_pattern(R"((.+\.)?s3[.\-][a-z0-9\-.]+)"); /// Case when bucket name and key represented in path of S3 URL. /// E.g. (https://s3.Region.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access diff --git a/dbms/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp similarity index 100% rename from dbms/src/IO/tests/gtest_s3_uri.cpp rename to src/IO/tests/gtest_s3_uri.cpp diff --git a/src/Interpreters/InterpreterCreateQuotaQuery.cpp b/src/Interpreters/InterpreterCreateQuotaQuery.cpp index 13e772965ff..80987993c96 100644 --- a/src/Interpreters/InterpreterCreateQuotaQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuotaQuery.cpp @@ -34,7 +34,7 @@ void updateQuotaFromQueryImpl(Quota & quota, const ASTCreateQuotaQuery & query, auto duration = query_limits.duration; auto it = boost::range::find_if(quota_all_limits, [&](const Quota::Limits & x) { return x.duration == duration; }); - if (query_limits.unset_tracking) + if (query_limits.drop) { if (it != quota_all_limits.end()) quota_all_limits.erase(it); @@ -59,6 +59,8 @@ void updateQuotaFromQueryImpl(Quota & quota, const ASTCreateQuotaQuery & query, { if (query_limits.max[resource_type]) quota_limits.max[resource_type] = *query_limits.max[resource_type]; + else + quota_limits.max[resource_type] = Quota::UNLIMITED; } } diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index d2f435106a8..4c2dcc19a88 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -101,6 +102,8 @@ namespace query->settings = profile.elements.toAST(); else query->settings = profile.elements.toASTWithNames(*manager); + if (query->settings) + query->settings->setUseInheritKeyword(true); } if (!profile.to_roles.empty()) @@ -133,7 +136,7 @@ namespace create_query_limits.duration = limits.duration; create_query_limits.randomize_interval = limits.randomize_interval; for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) - if (limits.max[resource_type]) + if (limits.max[resource_type] != Quota::UNLIMITED) create_query_limits.max[resource_type] = limits.max[resource_type]; query->all_limits.push_back(create_query_limits); } diff --git a/src/Parsers/ASTCreateQuotaQuery.cpp b/src/Parsers/ASTCreateQuotaQuery.cpp index 8fa0dbb0d31..cd064756fb6 100644 --- a/src/Parsers/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/ASTCreateQuotaQuery.cpp @@ -28,16 +28,17 @@ namespace } - void formatLimit(ResourceType resource_type, ResourceAmount max, const IAST::FormatSettings & settings) + void formatLimit(ResourceType resource_type, ResourceAmount max, bool first, const IAST::FormatSettings & settings) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX " << Quota::resourceTypeToKeyword(resource_type) - << (settings.hilite ? IAST::hilite_none : ""); + if (first) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX" << (settings.hilite ? IAST::hilite_none : ""); + else + settings.ostr << ","; - settings.ostr << (settings.hilite ? IAST::hilite_operator : "") << " = " << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << " " << (settings.hilite ? IAST::hilite_keyword : "") << Quota::resourceTypeToKeyword(resource_type) + << (settings.hilite ? IAST::hilite_none : "") << " "; - if (max == Quota::UNLIMITED) - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ANY" << (settings.hilite ? IAST::hilite_none : ""); - else if (resource_type == Quota::EXECUTION_TIME) + if (resource_type == Quota::EXECUTION_TIME) settings.ostr << Quota::executionTimeToSeconds(max); else settings.ostr << max; @@ -59,9 +60,9 @@ namespace << interval_kind.toKeyword() << (settings.hilite ? IAST::hilite_none : ""); - if (limits.unset_tracking) + if (limits.drop) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " UNSET TRACKING" << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " NO LIMITS" << (settings.hilite ? IAST::hilite_none : ""); } else { @@ -70,14 +71,12 @@ namespace { if (limits.max[resource_type]) { - if (limit_found) - settings.ostr << ","; + formatLimit(resource_type, *limits.max[resource_type], !limit_found, settings); limit_found = true; - formatLimit(resource_type, *limits.max[resource_type], settings); } } if (!limit_found) - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TRACKING" << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TRACKING ONLY" << (settings.hilite ? IAST::hilite_none : ""); } } diff --git a/src/Parsers/ASTCreateQuotaQuery.h b/src/Parsers/ASTCreateQuotaQuery.h index 09ceaea9825..70f8cba6de0 100644 --- a/src/Parsers/ASTCreateQuotaQuery.h +++ b/src/Parsers/ASTCreateQuotaQuery.h @@ -13,17 +13,16 @@ class ASTExtendedRoleSet; /** CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | - * [SET] TRACKING} [,...]] + * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} [,...] | + * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] * * ALTER QUOTA [IF EXISTS] name * [RENAME TO new_name] * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | - * [SET] TRACKING | - * UNSET TRACKING} [,...]] + * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} [,...] | + * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] */ class ASTCreateQuotaQuery : public IAST, public ASTQueryWithOnCluster @@ -48,7 +47,7 @@ public: struct Limits { std::optional max[MAX_RESOURCE_TYPE]; - bool unset_tracking = false; + bool drop = false; std::chrono::seconds duration = std::chrono::seconds::zero(); bool randomize_interval = false; }; diff --git a/src/Parsers/ASTCreateSettingsProfileQuery.h b/src/Parsers/ASTCreateSettingsProfileQuery.h index cc133397db4..eabe1ba441b 100644 --- a/src/Parsers/ASTCreateSettingsProfileQuery.h +++ b/src/Parsers/ASTCreateSettingsProfileQuery.h @@ -12,10 +12,12 @@ class ASTExtendedRoleSet; /** CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] + * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] * * ALTER SETTINGS PROFILE [IF EXISTS] name * [RENAME TO new_name] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] + * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] */ class ASTCreateSettingsProfileQuery : public IAST, public ASTQueryWithOnCluster { diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/ASTCreateUserQuery.cpp index d901ed8f5a1..c8e2a76dfa2 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/ASTCreateUserQuery.cpp @@ -109,7 +109,7 @@ namespace { if (std::exchange(need_comma, true)) settings.ostr << ", "; - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NAME REGEXP " << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "REGEXP " << (settings.hilite ? IAST::hilite_none : ""); bool need_comma2 = false; for (const auto & host_regexp : name_regexps) { diff --git a/src/Parsers/ASTCreateUserQuery.h b/src/Parsers/ASTCreateUserQuery.h index 5a5cc0d9550..54dc51d783b 100644 --- a/src/Parsers/ASTCreateUserQuery.h +++ b/src/Parsers/ASTCreateUserQuery.h @@ -13,14 +13,14 @@ class ASTSettingsProfileElements; /** CREATE USER [IF NOT EXISTS | OR REPLACE] name * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] - * [HOST {LOCAL | NAME 'name' | NAME REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...]] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}] - * [[ADD|DROP] HOST {LOCAL | NAME 'name' | NAME REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] */ diff --git a/src/Parsers/ASTSettingsProfileElement.cpp b/src/Parsers/ASTSettingsProfileElement.cpp index b3f4032d14c..24f1aa60813 100644 --- a/src/Parsers/ASTSettingsProfileElement.cpp +++ b/src/Parsers/ASTSettingsProfileElement.cpp @@ -25,7 +25,8 @@ void ASTSettingsProfileElement::formatImpl(const FormatSettings & settings, Form { if (!parent_profile.empty()) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "PROFILE " << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << (use_inherit_keyword ? "INHERIT" : "PROFILE") << " " + << (settings.hilite ? IAST::hilite_none : ""); formatProfileNameOrID(parent_profile, id_mode, settings); return; } @@ -85,4 +86,11 @@ void ASTSettingsProfileElements::formatImpl(const FormatSettings & settings, For } } + +void ASTSettingsProfileElements::setUseInheritKeyword(bool use_inherit_keyword_) +{ + for (auto & element : elements) + element->use_inherit_keyword = use_inherit_keyword_; +} + } diff --git a/src/Parsers/ASTSettingsProfileElement.h b/src/Parsers/ASTSettingsProfileElement.h index 0470b51cf85..ee1ee28c383 100644 --- a/src/Parsers/ASTSettingsProfileElement.h +++ b/src/Parsers/ASTSettingsProfileElement.h @@ -19,6 +19,7 @@ public: Field max_value; std::optional readonly; bool id_mode = false; /// If true then `parent_profile` keeps UUID, not a name. + bool use_inherit_keyword = false; /// If true then this element is a part of ASTCreateSettingsProfileQuery. bool empty() const { return parent_profile.empty() && name.empty(); } @@ -41,5 +42,7 @@ public: String getID(char) const override { return "SettingsProfileElements"; } ASTPtr clone() const override { return std::make_shared(*this); } void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + + void setUseInheritKeyword(bool use_inherit_keyword_); }; } diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 925140bd25e..5bfbf1ed476 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -126,7 +126,7 @@ public: return parse(pos, node, expected); } - virtual ~IParser() {} + virtual ~IParser() = default; }; using ParserPtr = std::unique_ptr; diff --git a/src/Parsers/ParserCreateQuotaQuery.cpp b/src/Parsers/ParserCreateQuotaQuery.cpp index 66e72ee4968..6007d6206ec 100644 --- a/src/Parsers/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/ParserCreateQuotaQuery.cpp @@ -63,12 +63,22 @@ namespace }); } - bool parseLimit(IParserBase::Pos & pos, Expected & expected, ResourceType & resource_type, ResourceAmount & max) + bool parseLimit(IParserBase::Pos & pos, Expected & expected, bool first, ResourceType & resource_type, ResourceAmount & max) { return IParserBase::wrapParseImpl(pos, [&] { - if (!ParserKeyword{"MAX"}.ignore(pos, expected)) - return false; + if (first) + { + if (!ParserKeyword{"MAX"}.ignore(pos, expected)) + return false; + } + else + { + if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) + return false; + + ParserKeyword{"MAX"}.ignore(pos, expected); + } bool resource_type_set = false; for (auto rt : ext::range_with_static_cast(Quota::MAX_RESOURCE_TYPE)) @@ -83,9 +93,6 @@ namespace if (!resource_type_set) return false; - if (!ParserToken{TokenType::Equals}.ignore(pos, expected)) - return false; - ASTPtr max_ast; if (ParserNumber{}.parse(pos, max_ast, expected)) { @@ -95,10 +102,6 @@ namespace else max = applyVisitor(FieldVisitorConvertToNumber(), max_field); } - else if (ParserKeyword{"ANY"}.ignore(pos, expected)) - { - max = Quota::UNLIMITED; - } else return false; @@ -106,18 +109,7 @@ namespace }); } - bool parseCommaAndLimit(IParserBase::Pos & pos, Expected & expected, ResourceType & resource_type, ResourceAmount & max) - { - return IParserBase::wrapParseImpl(pos, [&] - { - if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) - return false; - - return parseLimit(pos, expected, resource_type, max); - }); - } - - bool parseLimits(IParserBase::Pos & pos, Expected & expected, bool alter, ASTCreateQuotaQuery::Limits & limits) + bool parseLimits(IParserBase::Pos & pos, Expected & expected, ASTCreateQuotaQuery::Limits & limits) { return IParserBase::wrapParseImpl(pos, [&] { @@ -142,23 +134,22 @@ namespace new_limits.duration = std::chrono::seconds(static_cast(num_intervals * interval_kind.toAvgSeconds())); - if (alter && ParserKeyword{"UNSET TRACKING"}.ignore(pos, expected)) + if (ParserKeyword{"NO LIMITS"}.ignore(pos, expected)) { - new_limits.unset_tracking = true; + new_limits.drop = true; } - else if (ParserKeyword{"SET TRACKING"}.ignore(pos, expected) || ParserKeyword{"TRACKING"}.ignore(pos, expected)) + else if (ParserKeyword{"TRACKING ONLY"}.ignore(pos, expected)) { } else { - ParserKeyword{"SET"}.ignore(pos, expected); ResourceType resource_type; ResourceAmount max; - if (!parseLimit(pos, expected, resource_type, max)) + if (!parseLimit(pos, expected, true, resource_type, max)) return false; new_limits.max[resource_type] = max; - while (parseCommaAndLimit(pos, expected, resource_type, max)) + while (parseLimit(pos, expected, false, resource_type, max)) new_limits.max[resource_type] = max; } @@ -167,7 +158,7 @@ namespace }); } - bool parseAllLimits(IParserBase::Pos & pos, Expected & expected, bool alter, std::vector & all_limits) + bool parseAllLimits(IParserBase::Pos & pos, Expected & expected, std::vector & all_limits) { return IParserBase::wrapParseImpl(pos, [&] { @@ -175,7 +166,7 @@ namespace do { ASTCreateQuotaQuery::Limits limits; - if (!parseLimits(pos, expected, alter, limits)) + if (!parseLimits(pos, expected, limits)) { all_limits.resize(old_size); return false; @@ -199,6 +190,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -238,16 +237,10 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!parseIdentifierOrStringLiteral(pos, expected, name)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_name; std::optional key_type; std::vector all_limits; + String cluster; while (true) { @@ -257,7 +250,10 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!key_type && parseKeyType(pos, expected, key_type)) continue; - if (parseAllLimits(pos, expected, alter, all_limits)) + if (parseAllLimits(pos, expected, all_limits)) + continue; + + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; break; @@ -266,6 +262,9 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe std::shared_ptr roles; parseToRoles(pos, expected, attach_mode, roles); + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + auto query = std::make_shared(); node = query; diff --git a/src/Parsers/ParserCreateQuotaQuery.h b/src/Parsers/ParserCreateQuotaQuery.h index 18e6ef6f9f7..786c8292b15 100644 --- a/src/Parsers/ParserCreateQuotaQuery.h +++ b/src/Parsers/ParserCreateQuotaQuery.h @@ -9,17 +9,16 @@ namespace DB * CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | - * [SET] TRACKING} [,...]] + * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} [,...] | + * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] * * ALTER QUOTA [IF EXISTS] name * [RENAME TO new_name] * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {[SET] MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = {number | ANY} } [,...] | - * [SET] TRACKING | - * UNSET TRACKING} [,...]] + * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} } [,...] | + * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] */ class ParserCreateQuotaQuery : public IParserBase diff --git a/src/Parsers/ParserCreateRoleQuery.cpp b/src/Parsers/ParserCreateRoleQuery.cpp index 05143108480..2a6f2dd2c90 100644 --- a/src/Parsers/ParserCreateRoleQuery.cpp +++ b/src/Parsers/ParserCreateRoleQuery.cpp @@ -41,6 +41,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -80,15 +88,10 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parseRoleName(pos, expected, name)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_name; std::shared_ptr settings; + String cluster; + while (true) { if (alter && parseRenameTo(pos, expected, new_name)) @@ -97,6 +100,9 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (parseSettings(pos, expected, attach_mode, settings)) continue; + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) + continue; + break; } diff --git a/src/Parsers/ParserCreateRowPolicyQuery.cpp b/src/Parsers/ParserCreateRowPolicyQuery.cpp index 8bfe54b87b2..b6840f0ed6a 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/ParserCreateRowPolicyQuery.cpp @@ -83,14 +83,13 @@ namespace static constexpr char delete_op[] = "DELETE"; std::vector ops; - bool keyword_for = false; if (ParserKeyword{"FOR"}.ignore(pos, expected)) { - keyword_for = true; do { if (ParserKeyword{"SELECT"}.ignore(pos, expected)) ops.push_back(select_op); +#if 0 /// INSERT, UPDATE, DELETE are not supported yet else if (ParserKeyword{"INSERT"}.ignore(pos, expected)) ops.push_back(insert_op); else if (ParserKeyword{"UPDATE"}.ignore(pos, expected)) @@ -100,6 +99,7 @@ namespace else if (ParserKeyword{"ALL"}.ignore(pos, expected)) { } +#endif else return false; } @@ -109,9 +109,11 @@ namespace if (ops.empty()) { ops.push_back(select_op); +#if 0 /// INSERT, UPDATE, DELETE are not supported yet ops.push_back(insert_op); ops.push_back(update_op); ops.push_back(delete_op); +#endif } std::optional filter; @@ -123,14 +125,15 @@ namespace if (!parseConditionalExpression(pos, expected, filter)) return false; } +#if 0 /// INSERT, UPDATE, DELETE are not supported yet if (ParserKeyword{"WITH CHECK"}.ignore(pos, expected)) { keyword_with_check = true; if (!parseConditionalExpression(pos, expected, check)) return false; } - - if (!keyword_for && !keyword_using && !keyword_with_check) +#endif + if (!keyword_using && !keyword_with_check) return false; if (filter && !check && !alter) @@ -200,6 +203,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -243,16 +254,10 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & || !parseDatabaseAndTableName(pos, expected, database, table_name)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_policy_name; std::optional is_restrictive; std::vector> conditions; + String cluster; while (true) { @@ -265,12 +270,18 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (parseMultipleConditions(pos, expected, alter, conditions)) continue; + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) + continue; + break; } std::shared_ptr roles; parseToRoles(pos, expected, attach_mode, roles); + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + auto query = std::make_shared(); node = query; diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/ParserCreateSettingsProfileQuery.cpp index 4d3ed2f6e63..83d0f0c1d91 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/ParserCreateSettingsProfileQuery.cpp @@ -33,7 +33,7 @@ namespace return false; ASTPtr new_settings_ast; - if (!ParserSettingsProfileElements{}.useIDMode(id_mode).parse(pos, new_settings_ast, expected)) + if (!ParserSettingsProfileElements{}.useIDMode(id_mode).enableInheritKeyword(true).parse(pos, new_settings_ast, expected)) return false; if (!settings) @@ -57,6 +57,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -96,15 +104,10 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (!parseIdentifierOrStringLiteral(pos, expected, name)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_name; std::shared_ptr settings; + String cluster; + while (true) { if (alter && parseRenameTo(pos, expected, new_name)) @@ -113,12 +116,18 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (parseSettings(pos, expected, attach_mode, settings)) continue; + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) + continue; + break; } std::shared_ptr to_roles; parseToRoles(pos, expected, attach_mode, to_roles); + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + auto query = std::make_shared(); node = query; diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.h b/src/Parsers/ParserCreateSettingsProfileQuery.h index 6797fc884fa..073a8ca75ae 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.h +++ b/src/Parsers/ParserCreateSettingsProfileQuery.h @@ -7,11 +7,11 @@ namespace DB { /** Parses queries like * CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name - * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] + * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...] * * ALTER SETTINGS PROFILE [IF EXISTS] name * [RENAME TO new_name] - * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] + * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...] */ class ParserCreateSettingsProfileQuery : public IParserBase { diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index 3968c26d42e..76a06a0282f 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -166,7 +166,7 @@ namespace { new_hosts.addLocalHost(); } - else if (ParserKeyword{"NAME REGEXP"}.ignore(pos, expected)) + else if (ParserKeyword{"REGEXP"}.ignore(pos, expected)) { ASTPtr ast; if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) @@ -250,6 +250,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -290,13 +298,6 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parseUserName(pos, expected, name, host_pattern)) return false; - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } - String new_name; std::optional new_host_pattern; std::optional authentication; @@ -305,6 +306,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec std::optional remove_hosts; std::shared_ptr default_roles; std::shared_ptr settings; + String cluster; while (true) { @@ -320,6 +322,9 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!default_roles && parseDefaultRoles(pos, expected, attach_mode, default_roles)) continue; + if (cluster.empty() && parseOnCluster(pos, expected, cluster)) + continue; + if (alter) { if (new_name.empty() && parseRenameTo(pos, expected, new_name, new_host_pattern)) diff --git a/src/Parsers/ParserCreateUserQuery.h b/src/Parsers/ParserCreateUserQuery.h index 4b2af34c003..d609894a7ec 100644 --- a/src/Parsers/ParserCreateUserQuery.h +++ b/src/Parsers/ParserCreateUserQuery.h @@ -8,13 +8,13 @@ namespace DB /** Parses queries like * CREATE USER [IF NOT EXISTS | OR REPLACE] name * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] - * [HOST {LOCAL | NAME 'name' | NAME REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}] - * [[ADD|DROP] HOST {LOCAL | NAME 'name' | NAME REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] */ class ParserCreateUserQuery : public IParserBase diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/ParserGrantQuery.cpp index f6eecbe5dba..64dde8f6524 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/ParserGrantQuery.cpp @@ -237,6 +237,14 @@ namespace return true; }); } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } } @@ -260,11 +268,8 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); bool grant_option = false; bool admin_option = false; @@ -281,10 +286,16 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parseAccessRightsElements(pos, expected, elements) && !parseRoles(pos, expected, attach, roles)) return false; + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + std::shared_ptr to_roles; if (!parseToRoles(pos, expected, kind, to_roles)) return false; + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + if (kind == Kind::GRANT) { if (ParserKeyword{"WITH GRANT OPTION"}.ignore(pos, expected)) @@ -293,6 +304,9 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) admin_option = true; } + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + if (grant_option && roles) throw Exception("GRANT OPTION should be specified for access types", ErrorCodes::SYNTAX_ERROR); if (admin_option && !elements.empty()) diff --git a/src/Parsers/ParserSettingsProfileElement.cpp b/src/Parsers/ParserSettingsProfileElement.cpp index 06fa58fde4e..31bc339f544 100644 --- a/src/Parsers/ParserSettingsProfileElement.cpp +++ b/src/Parsers/ParserSettingsProfileElement.cpp @@ -108,7 +108,8 @@ bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected Field max_value; std::optional readonly; - if (ParserKeyword{"PROFILE"}.ignore(pos, expected)) + if (ParserKeyword{"PROFILE"}.ignore(pos, expected) || + (enable_inherit_keyword && ParserKeyword{"INHERIT"}.ignore(pos, expected))) { if (!parseProfileNameOrID(pos, expected, id_mode, parent_profile)) return false; @@ -120,9 +121,15 @@ bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected return false; name = getIdentifierName(name_ast); + bool has_value_or_constraint = false; while (parseValue(pos, expected, value) || parseMinMaxValue(pos, expected, min_value, max_value) || parseReadonlyOrWritableKeyword(pos, expected, readonly)) - ; + { + has_value_or_constraint = true; + } + + if (!has_value_or_constraint) + return false; } auto result = std::make_shared(); @@ -133,6 +140,7 @@ bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected result->max_value = std::move(max_value); result->readonly = readonly; result->id_mode = id_mode; + result->use_inherit_keyword = enable_inherit_keyword; node = result; return true; } @@ -142,12 +150,15 @@ bool ParserSettingsProfileElements::parseImpl(Pos & pos, ASTPtr & node, Expected { std::vector> elements; - if (!ParserKeyword{"NONE"}.ignore(pos, expected)) + if (ParserKeyword{"NONE"}.ignore(pos, expected)) + { + } + else { do { ASTPtr ast; - if (!ParserSettingsProfileElement{}.useIDMode(id_mode).parse(pos, ast, expected)) + if (!ParserSettingsProfileElement{}.useIDMode(id_mode).enableInheritKeyword(enable_inherit_keyword).parse(pos, ast, expected)) return false; auto element = typeid_cast>(ast); elements.push_back(std::move(element)); diff --git a/src/Parsers/ParserSettingsProfileElement.h b/src/Parsers/ParserSettingsProfileElement.h index ec8e1abb5b5..309c797e645 100644 --- a/src/Parsers/ParserSettingsProfileElement.h +++ b/src/Parsers/ParserSettingsProfileElement.h @@ -12,6 +12,7 @@ class ParserSettingsProfileElement : public IParserBase { public: ParserSettingsProfileElement & useIDMode(bool enable_) { id_mode = enable_; return *this; } + ParserSettingsProfileElement & enableInheritKeyword(bool enable_) { enable_inherit_keyword = enable_; return *this; } protected: const char * getName() const override { return "SettingsProfileElement"; } @@ -19,6 +20,7 @@ protected: private: bool id_mode = false; + bool enable_inherit_keyword = false; }; @@ -26,6 +28,7 @@ class ParserSettingsProfileElements : public IParserBase { public: ParserSettingsProfileElements & useIDMode(bool enable_) { id_mode = enable_; return *this; } + ParserSettingsProfileElements & enableInheritKeyword(bool enable_) { enable_inherit_keyword = enable_; return *this; } protected: const char * getName() const override { return "SettingsProfileElements"; } @@ -33,4 +36,7 @@ protected: private: bool id_mode = false; -};} + bool enable_inherit_keyword = false; +}; + +} diff --git a/src/Parsers/makeASTForLogicalFunction.cpp b/src/Parsers/makeASTForLogicalFunction.cpp new file mode 100644 index 00000000000..eaae38740aa --- /dev/null +++ b/src/Parsers/makeASTForLogicalFunction.cpp @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ +ASTPtr makeASTForLogicalNot(ASTPtr argument) +{ + bool b; + if (tryGetLiteralBool(argument.get(), b)) + return std::make_shared(Field{UInt8(!b)}); + + auto function = std::make_shared(); + auto exp_list = std::make_shared(); + function->name = "not"; + function->arguments = exp_list; + function->children.push_back(exp_list); + exp_list->children.push_back(argument); + return function; +} + + +ASTPtr makeASTForLogicalAnd(ASTs && arguments) +{ + bool partial_result = true; + boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool + { + bool b; + if (!tryGetLiteralBool(argument.get(), b)) + return false; + partial_result &= b; + return true; + }); + + if (!partial_result) + return std::make_shared(Field{UInt8(0)}); + if (arguments.empty()) + return std::make_shared(Field{UInt8(1)}); + if (arguments.size() == 1) + return arguments[0]; + + auto function = std::make_shared(); + auto exp_list = std::make_shared(); + function->name = "and"; + function->arguments = exp_list; + function->children.push_back(exp_list); + exp_list->children = std::move(arguments); + return function; +} + + +ASTPtr makeASTForLogicalOr(ASTs && arguments) +{ + bool partial_result = false; + boost::range::remove_erase_if(arguments, [&](const ASTPtr & argument) -> bool + { + bool b; + if (!tryGetLiteralBool(argument.get(), b)) + return false; + partial_result |= b; + return true; + }); + + if (partial_result) + return std::make_shared(Field{UInt8(1)}); + if (arguments.empty()) + return std::make_shared(Field{UInt8(0)}); + if (arguments.size() == 1) + return arguments[0]; + + auto function = std::make_shared(); + auto exp_list = std::make_shared(); + function->name = "or"; + function->arguments = exp_list; + function->children.push_back(exp_list); + exp_list->children = std::move(arguments); + return function; +} + + +bool tryGetLiteralBool(const IAST * ast, bool & value) +{ + if (!ast) + return false; + + try + { + if (const ASTLiteral * literal = ast->as()) + { + value = !literal->value.isNull() && applyVisitor(FieldVisitorConvertToNumber(), literal->value); + return true; + } + return false; + } + catch (...) + { + return false; + } +} +} diff --git a/src/Parsers/makeASTForLogicalFunction.h b/src/Parsers/makeASTForLogicalFunction.h new file mode 100644 index 00000000000..5c1096cab6e --- /dev/null +++ b/src/Parsers/makeASTForLogicalFunction.h @@ -0,0 +1,19 @@ +#pragma once + +#include + + +namespace DB +{ +/// Makes an AST calculating NOT argument. +ASTPtr makeASTForLogicalNot(ASTPtr argument); + +/// Makes an AST calculating argument1 AND argument2 AND ... AND argumentN. +ASTPtr makeASTForLogicalAnd(ASTs && arguments); + +/// Makes an AST calculating argument1 OR argument2 OR ... OR argumentN. +ASTPtr makeASTForLogicalOr(ASTs && arguments); + +/// Tries to extract a literal bool from AST. +bool tryGetLiteralBool(const IAST * ast, bool & value); +} diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index af17a026927..80b7d4c019e 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -59,6 +61,26 @@ namespace ErrorCodes extern const int CANNOT_LINK; } +static void writeBlockConvert(const Context & context, const BlockOutputStreamPtr & out, const Block & block, const size_t repeats) +{ + if (!blocksHaveEqualStructure(out->getHeader(), block)) + { + ConvertingBlockInputStream convert(context, + std::make_shared(block), + out->getHeader(), + ConvertingBlockInputStream::MatchColumnsMode::Name); + auto adopted_block = convert.read(); + + for (size_t i = 0; i < repeats; ++i) + out->write(adopted_block); + } + else + { + for (size_t i = 0; i < repeats; ++i) + out->write(block); + } +} + DistributedBlockOutputStream::DistributedBlockOutputStream( const Context & context_, StorageDistributed & storage_, const ASTPtr & query_ast_, const ClusterPtr & cluster_, @@ -306,14 +328,12 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp InterpreterInsertQuery interp(query_ast, *job.local_context); auto block_io = interp.execute(); - assertBlocksHaveEqualStructure(block_io.out->getHeader(), shard_block, "flushing shard block for " + storage.getStorageID().getNameForLogs()); + job.stream = block_io.out; job.stream->writePrefix(); } - size_t num_repetitions = shard_info.getLocalNodeCount(); - for (size_t i = 0; i < num_repetitions; ++i) - job.stream->write(shard_block); + writeBlockConvert(context, job.stream, shard_block, shard_info.getLocalNodeCount()); } job.blocks_written += 1; @@ -547,13 +567,8 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_ auto block_io = interp.execute(); - assertBlocksHaveEqualStructure(block_io.out->getHeader(), block, "flushing " + storage.getStorageID().getNameForLogs()); - block_io.out->writePrefix(); - - for (size_t i = 0; i < repeats; ++i) - block_io.out->write(block); - + writeBlockConvert(context, block_io.out, block, repeats); block_io.out->writeSuffix(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index fda0a8eb5a8..72255081e6b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -147,11 +147,11 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block) /// That is, do not insert the same data to the same partition twice. block_id = part->info.partition_id + "_" + toString(hash_value.words[0]) + "_" + toString(hash_value.words[1]); - LOG_DEBUG(log, "Wrote block with ID '" << block_id << "', " << block.rows() << " rows"); + LOG_DEBUG(log, "Wrote block with ID '" << block_id << "', " << current_block.block.rows() << " rows"); } else { - LOG_DEBUG(log, "Wrote block with " << block.rows() << " rows"); + LOG_DEBUG(log, "Wrote block with " << current_block.block.rows() << " rows"); } try diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 31456c8d1f1..1af86f7d5f1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -246,6 +246,11 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( createTableIfNotExists(); + /// We have to check granularity on other replicas. If it's fixed we + /// must create our new replica with fixed granularity and store this + /// information in /replica/metadata. + other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); + checkTableStructure(zookeeper_path); Coordination::Stat metadata_stat; @@ -256,11 +261,14 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( } else { + /// In old tables this node may missing or be empty String replica_metadata; bool replica_metadata_exists = current_zookeeper->tryGet(replica_path + "/metadata", replica_metadata); if (!replica_metadata_exists || replica_metadata.empty()) { + /// We have to check shared node granularity before we create ours. + other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); ReplicatedMergeTreeTableMetadata current_metadata(*this); current_zookeeper->createOrUpdate(replica_path + "/metadata", current_metadata.toString(), zkutil::CreateMode::Persistent); } @@ -291,7 +299,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( createNewZooKeeperNodes(); - other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); } diff --git a/tests/integration/test_access_control_on_cluster/configs/users.d/access_management.xml b/tests/integration/helpers/0_common_instance_users.xml similarity index 100% rename from tests/integration/test_access_control_on_cluster/configs/users.d/access_management.xml rename to tests/integration/helpers/0_common_instance_users.xml diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5dc93cb338a..69f8206b2c1 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -923,6 +923,7 @@ class ClickHouseInstance: # The file is named with 0_ prefix to be processed before other configuration overloads. shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), self.config_d_dir) + shutil.copy(p.join(HELPERS_DIR, '0_common_instance_users.xml'), users_d_dir) # Generate and write macros file macros = self.macros.copy() diff --git a/tests/integration/test_adaptive_granularity_different_settings/__init__.py b/tests/integration/test_adaptive_granularity_different_settings/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_adaptive_granularity_different_settings/test.py b/tests/integration/test_adaptive_granularity_different_settings/test.py new file mode 100644 index 00000000000..b066c437e06 --- /dev/null +++ b/tests/integration/test_adaptive_granularity_different_settings/test.py @@ -0,0 +1,49 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', with_zookeeper=True) +node2 = cluster.add_instance('node2', with_zookeeper=True) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def test_attach_detach(start_cluster): + + node1.query(""" + CREATE TABLE test (key UInt64) + ENGINE = ReplicatedMergeTree('/clickhouse/test', '1') + ORDER BY tuple() + SETTINGS index_granularity_bytes = 0""") + + node1.query("INSERT INTO test VALUES (1), (2)") + + node2.query(""" + CREATE TABLE test (key UInt64) + ENGINE = ReplicatedMergeTree('/clickhouse/test', '2') + ORDER BY tuple()""") + + node2.query("INSERT INTO test VALUES (3), (4)") + + node1.query("SYSTEM SYNC REPLICA test") + node2.query("SYSTEM SYNC REPLICA test") + + assert node1.query("SELECT COUNT() FROM test") == "4\n" + assert node2.query("SELECT COUNT() FROM test") == "4\n" + + node1.query("DETACH TABLE test") + node2.query("DETACH TABLE test") + + node1.query("ATTACH TABLE test") + node2.query("ATTACH TABLE test") + + assert node1.query("SELECT COUNT() FROM test") == "4\n" + assert node2.query("SELECT COUNT() FROM test") == "4\n" diff --git a/tests/integration/test_allowed_client_hosts/configs/users.xml b/tests/integration/test_allowed_client_hosts/configs/users.xml deleted file mode 100644 index 3142ec5355a..00000000000 --- a/tests/integration/test_allowed_client_hosts/configs/users.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - default - - - - diff --git a/tests/integration/test_authentication/test.py b/tests/integration/test_authentication/test.py index b7ffd1ed35b..483b59813e5 100644 --- a/tests/integration/test_authentication/test.py +++ b/tests/integration/test_authentication/test.py @@ -2,7 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', config_dir="configs") +instance = cluster.add_instance('instance') @pytest.fixture(scope="module", autouse=True) diff --git a/tests/integration/test_disk_access_storage/configs/users.d/access_management.xml b/tests/integration/test_disk_access_storage/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_disk_access_storage/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_disk_access_storage/test.py b/tests/integration/test_disk_access_storage/test.py index 1f6577b9dd1..babceee7c76 100644 --- a/tests/integration/test_disk_access_storage/test.py +++ b/tests/integration/test_disk_access_storage/test.py @@ -2,7 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', config_dir='configs', stay_alive=True) +instance = cluster.add_instance('instance', stay_alive=True) @pytest.fixture(scope="module", autouse=True) @@ -22,7 +22,7 @@ def create_entities(): instance.query("CREATE USER u2 IDENTIFIED BY 'qwerty' HOST LOCAL DEFAULT ROLE rx") instance.query("CREATE SETTINGS PROFILE s2 SETTINGS PROFILE s1 TO u2") instance.query("CREATE ROW POLICY p ON mydb.mytable FOR SELECT USING a<1000 TO u1, u2") - instance.query("CREATE QUOTA q FOR INTERVAL 1 HOUR SET MAX QUERIES = 100 TO ALL EXCEPT rx") + instance.query("CREATE QUOTA q FOR INTERVAL 1 HOUR MAX QUERIES 100 TO ALL EXCEPT rx") @pytest.fixture(autouse=True) @@ -41,13 +41,13 @@ def test_create(): assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 SETTINGS PROFILE s1\n" assert instance.query("SHOW CREATE USER u2") == "CREATE USER u2 HOST LOCAL DEFAULT ROLE rx\n" assert instance.query("SHOW CREATE ROW POLICY p ON mydb.mytable") == "CREATE ROW POLICY p ON mydb.mytable FOR SELECT USING a < 1000 TO u1, u2\n" - assert instance.query("SHOW CREATE QUOTA q") == "CREATE QUOTA q KEYED BY \\'none\\' FOR INTERVAL 1 HOUR MAX QUERIES = 100 TO ALL EXCEPT rx\n" + assert instance.query("SHOW CREATE QUOTA q") == "CREATE QUOTA q KEYED BY \\'none\\' FOR INTERVAL 1 HOUR MAX QUERIES 100 TO ALL EXCEPT rx\n" assert instance.query("SHOW GRANTS FOR u1") == "" assert instance.query("SHOW GRANTS FOR u2") == "GRANT rx TO u2\n" assert instance.query("SHOW CREATE ROLE rx") == "CREATE ROLE rx SETTINGS PROFILE s1\n" assert instance.query("SHOW GRANTS FOR rx") == "" assert instance.query("SHOW CREATE SETTINGS PROFILE s1") == "CREATE SETTINGS PROFILE s1 SETTINGS max_memory_usage = 123456789 MIN 100000000 MAX 200000000\n" - assert instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE s2 SETTINGS PROFILE s1 TO u2\n" + assert instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE s2 SETTINGS INHERIT s1 TO u2\n" check() instance.restart_clickhouse() # Check persistency @@ -77,7 +77,7 @@ def test_alter(): assert instance.query("SHOW GRANTS FOR rx") == "GRANT SELECT ON mydb.* TO rx WITH GRANT OPTION\n" assert instance.query("SHOW GRANTS FOR ry") == "GRANT rx TO ry WITH ADMIN OPTION\n" assert instance.query("SHOW CREATE SETTINGS PROFILE s1") == "CREATE SETTINGS PROFILE s1 SETTINGS max_memory_usage = 987654321 READONLY\n" - assert instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE s2 SETTINGS PROFILE s1 TO u2\n" + assert instance.query("SHOW CREATE SETTINGS PROFILE s2") == "CREATE SETTINGS PROFILE s2 SETTINGS INHERIT s1 TO u2\n" check() instance.restart_clickhouse() # Check persistency diff --git a/tests/integration/test_enabling_access_management/__init__.py b/tests/integration/test_enabling_access_management/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_enabling_access_management/configs/users.d/extra_users.xml b/tests/integration/test_enabling_access_management/configs/users.d/extra_users.xml new file mode 100644 index 00000000000..7d87a29a915 --- /dev/null +++ b/tests/integration/test_enabling_access_management/configs/users.d/extra_users.xml @@ -0,0 +1,13 @@ + + + + + readonly + 1 + + + + default + + + diff --git a/tests/integration/test_enabling_access_management/test.py b/tests/integration/test_enabling_access_management/test.py new file mode 100644 index 00000000000..abb8cd6c07a --- /dev/null +++ b/tests/integration/test_enabling_access_management/test.py @@ -0,0 +1,24 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', config_dir="configs") + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_enabling_access_management(): + instance.query("CREATE USER Alex", user='default') + assert instance.query("SHOW CREATE USER Alex", user='default') == "CREATE USER Alex\n" + assert instance.query("SHOW CREATE USER Alex", user='readonly') == "CREATE USER Alex\n" + assert "Not enough privileges" in instance.query_and_get_error("SHOW CREATE USER Alex", user='xyz') + + assert "Cannot execute query in readonly mode" in instance.query_and_get_error("CREATE USER Robin", user='readonly') + assert "Not enough privileges" in instance.query_and_get_error("CREATE USER Robin", user='xyz') diff --git a/tests/integration/test_grant_and_revoke/configs/users.d/access_management.xml b/tests/integration/test_grant_and_revoke/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_grant_and_revoke/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 25e0e9882de..6f4b0be5325 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -3,7 +3,7 @@ from helpers.cluster import ClickHouseCluster import re cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', config_dir="configs") +instance = cluster.add_instance('instance') @pytest.fixture(scope="module", autouse=True) diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 86e0b9df5fd..2791cc7b382 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -5,6 +5,7 @@ import pymysql.cursors import pytest from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) clickhouse_node = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_mysql=True) @@ -92,7 +93,7 @@ def test_clickhouse_dml_for_mysql_database(started_cluster): with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', port=3308)) as mysql_node: mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") mysql_node.query('CREATE TABLE `test_database`.`test_table` ( `i``d` int(11) NOT NULL, PRIMARY KEY (`i``d`)) ENGINE=InnoDB;') - clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL('mysql1:3306', 'test_database', 'root', 'clickhouse')") + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL('mysql1:3306', test_database, 'root', 'clickhouse')") assert clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`").rstrip() == '0' clickhouse_node.query("INSERT INTO `test_database`.`test_table`(`i\`d`) select number from numbers(10000)") @@ -116,7 +117,17 @@ def test_clickhouse_join_for_mysql_database(started_cluster): clickhouse_node.query("CREATE TABLE default.t1_remote_mysql AS mysql('mysql1:3306','test','t1_mysql_local','root','clickhouse')") clickhouse_node.query("CREATE TABLE default.t2_remote_mysql AS mysql('mysql1:3306','test','t2_mysql_local','root','clickhouse')") assert clickhouse_node.query("SELECT s.pays " - "FROM default.t1_remote_mysql AS s " - "LEFT JOIN default.t1_remote_mysql AS s_ref " - "ON (s_ref.opco = s.opco AND s_ref.service = s.service)") == '' + "FROM default.t1_remote_mysql AS s " + "LEFT JOIN default.t1_remote_mysql AS s_ref " + "ON (s_ref.opco = s.opco AND s_ref.service = s.service)") == '' mysql_node.query("DROP DATABASE test") + + +def test_bad_arguments_for_mysql_database_engine(started_cluster): + with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', port=3308)) as mysql_node: + with pytest.raises(QueryRuntimeException) as exception: + mysql_node.query("CREATE DATABASE IF NOT EXISTS test_bad_arguments DEFAULT CHARACTER SET 'utf8'") + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL('mysql1:3306', test_bad_arguments, root, 'clickhouse')") + + assert 'Database engine MySQL requested literal argument.' in str(exception.value) + mysql_node.query("DROP DATABASE test_bad_arguments") diff --git a/tests/integration/test_mysql_protocol/clients/java/0.reference b/tests/integration/test_mysql_protocol/clients/java/0.reference new file mode 100644 index 00000000000..bcf9e3dde94 --- /dev/null +++ b/tests/integration/test_mysql_protocol/clients/java/0.reference @@ -0,0 +1,15 @@ +33jdbc +44ck +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 diff --git a/tests/integration/test_mysql_protocol/clients/java/Dockerfile b/tests/integration/test_mysql_protocol/clients/java/Dockerfile new file mode 100644 index 00000000000..96713a68e66 --- /dev/null +++ b/tests/integration/test_mysql_protocol/clients/java/Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:18.04 + +RUN apt-get update && \ + apt-get install -y software-properties-common build-essential openjdk-8-jdk libmysql-java curl + +RUN rm -rf \ + /var/lib/apt/lists/* \ + /var/cache/debconf \ + /tmp/* \ +RUN apt-get clean + +ARG ver=5.1.46 +RUN curl -L -o /mysql-connector-java-${ver}.jar https://repo1.maven.org/maven2/mysql/mysql-connector-java/${ver}/mysql-connector-java-${ver}.jar +ENV CLASSPATH=$CLASSPATH:/mysql-connector-java-${ver}.jar + +WORKDIR /jdbc +COPY Test.java Test.java +RUN javac Test.java diff --git a/tests/integration/test_mysql_protocol/clients/java/Test.java b/tests/integration/test_mysql_protocol/clients/java/Test.java new file mode 100644 index 00000000000..50ce824f67c --- /dev/null +++ b/tests/integration/test_mysql_protocol/clients/java/Test.java @@ -0,0 +1,76 @@ +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; + +class JavaConnectorTest { + private static final String CREATE_TABLE_SQL = "CREATE TABLE IF NOT EXISTS default.test1 (age Int32, name String) Engine = Memory"; + private static final String INSERT_SQL = "INSERT INTO default.test1 VALUES(33, 'jdbc'),(44, 'ck')"; + private static final String SELECT_SQL = "SELECT * FROM default.test1"; + private static final String SELECT_NUMBER_SQL = "SELECT * FROM system.numbers LIMIT 13"; + private static final String DROP_TABLE_SQL = "DROP TABLE default.test1"; + + public static void main(String[] args) { + int i = 0; + String host = "127.0.0.1"; + String port = "9004"; + String user = "default"; + String password = ""; + String database = "default"; + while (i < args.length) { + switch (args[i]) { + case "--host": + host = args[++i]; + break; + case "--port": + port = args[++i]; + break; + case "--user": + user = args[++i]; + break; + case "--password": + password = args[++i]; + break; + case "--database": + database = args[++i]; + break; + default: + i++; + break; + } + } + + String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?maxAllowedPacket=67108864&useSSL=false", host, port, database); + + Connection conn = null; + Statement stmt = null; + try { + conn = DriverManager.getConnection(jdbcUrl, user, password); + stmt = conn.createStatement(); + stmt.executeUpdate(CREATE_TABLE_SQL); + stmt.executeUpdate(INSERT_SQL); + + ResultSet rs = stmt.executeQuery(SELECT_SQL); + while (rs.next()) { + System.out.print(rs.getString("age")); + System.out.print(rs.getString("name")); + System.out.println(); + } + + stmt.executeUpdate(DROP_TABLE_SQL); + + rs = stmt.executeQuery(SELECT_NUMBER_SQL); + while (rs.next()) { + System.out.print(rs.getString(1)); + System.out.println(); + } + + stmt.close(); + conn.close(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } +} diff --git a/tests/integration/test_mysql_protocol/clients/java/docker_compose.yml b/tests/integration/test_mysql_protocol/clients/java/docker_compose.yml new file mode 100644 index 00000000000..dbe404232a0 --- /dev/null +++ b/tests/integration/test_mysql_protocol/clients/java/docker_compose.yml @@ -0,0 +1,8 @@ +version: '2.2' +services: + java1: + build: + context: ./ + network: host + # to keep container running + command: sleep infinity diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 7987076c29a..b5ee3cecec9 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -79,6 +79,13 @@ def nodejs_container(): yield docker.from_env().containers.get(cluster.project_name + '_mysqljs1_1') +@pytest.fixture(scope='module') +def java_container(): + docker_compose = os.path.join(SCRIPT_DIR, 'clients', 'java', 'docker_compose.yml') + subprocess.check_call(['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d', '--build']) + yield docker.from_env().containers.get(cluster.project_name + '_java1_1') + + def test_mysql_client(mysql_client, server_address): # type: (Container, str) -> None code, (stdout, stderr) = mysql_client.exec_run(''' @@ -266,6 +273,21 @@ def test_mysqljs_client(server_address, nodejs_container): assert code == 1 +def test_java_client(server_address, java_container): + # type: (str, Container) -> None + with open(os.path.join(SCRIPT_DIR, 'clients', 'java', '0.reference')) as fp: + reference = fp.read() + + code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user user_with_empty_password --database ' + 'abc'.format(host=server_address, port=server_port), demux=True) + assert code == 1 + + code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user user_with_empty_password --database ' + 'default'.format(host=server_address, port=server_port), demux=True) + assert code == 0 + assert stdout == reference + + def test_types(server_address): client = pymysql.connections.Connection(host=server_address, user='default', password='123', database='default', port=server_port) diff --git a/tests/integration/test_quota/configs/users.d/access_management.xml b/tests/integration/test_quota/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_quota/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_authentication/configs/users.d/access_management.xml b/tests/integration/test_quota/configs/users.d/assign_myquota.xml similarity index 60% rename from tests/integration/test_authentication/configs/users.d/access_management.xml rename to tests/integration/test_quota/configs/users.d/assign_myquota.xml index 7e799cb7b10..8b98ade8aeb 100644 --- a/tests/integration/test_authentication/configs/users.d/access_management.xml +++ b/tests/integration/test_quota/configs/users.d/assign_myquota.xml @@ -1,7 +1,7 @@ - 1 + myQuota diff --git a/tests/integration/test_quota/configs/users.d/drop_default_quota.xml b/tests/integration/test_quota/configs/users.d/drop_default_quota.xml new file mode 100644 index 00000000000..5f53ecf5f49 --- /dev/null +++ b/tests/integration/test_quota/configs/users.d/drop_default_quota.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/integration/test_quota/configs/users.xml b/tests/integration/test_quota/configs/users.xml deleted file mode 100644 index 4412345a731..00000000000 --- a/tests/integration/test_quota/configs/users.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - ::/0 - - default - myQuota - - - diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 85d2ded16c1..ae68a34a03e 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -180,7 +180,7 @@ def test_reload_users_xml_by_timer(): def test_dcl_introspection(): assert instance.query("SHOW QUOTAS") == "myQuota\n" - assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES = 1000, MAX READ ROWS = 1000 TO default\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES 1000, READ ROWS 1000 TO default\n" expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=0/1000 errors=0 result_rows=0 result_bytes=0 read_rows=0/1000 read_bytes=0 execution_time=0" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE CURRENT")) @@ -193,7 +193,7 @@ def test_dcl_introspection(): # Add interval. copy_quota_xml('two_intervals.xml') assert instance.query("SHOW QUOTAS") == "myQuota\n" - assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES = 1000, MAX READ ROWS = 1000, FOR RANDOMIZED INTERVAL 2 YEAR MAX RESULT BYTES = 30000, MAX READ BYTES = 20000, MAX EXECUTION TIME = 120 TO default\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES 1000, READ ROWS 1000, FOR RANDOMIZED INTERVAL 2 YEAR MAX RESULT BYTES 30000, READ BYTES 20000, EXECUTION TIME 120 TO default\n" expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=1/1000 errors=0 result_rows=50 result_bytes=200 read_rows=50/1000 read_bytes=200 execution_time=.*\n"\ "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=0 errors=0 result_rows=0 result_bytes=0/30000 read_rows=0 read_bytes=0/20000 execution_time=0/120" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) @@ -201,8 +201,8 @@ def test_dcl_introspection(): # Drop interval, add quota. copy_quota_xml('two_quotas.xml') assert instance.query("SHOW QUOTAS") == "myQuota\nmyQuota2\n" - assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES = 1000, MAX READ ROWS = 1000 TO default\n" - assert instance.query("SHOW CREATE QUOTA myQuota2") == "CREATE QUOTA myQuota2 KEYED BY \\'client key or user name\\' FOR RANDOMIZED INTERVAL 1 HOUR MAX RESULT ROWS = 4000, MAX RESULT BYTES = 400000, MAX READ ROWS = 4000, MAX READ BYTES = 400000, MAX EXECUTION TIME = 60, FOR INTERVAL 1 MONTH MAX EXECUTION TIME = 1800\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES 1000, READ ROWS 1000 TO default\n" + assert instance.query("SHOW CREATE QUOTA myQuota2") == "CREATE QUOTA myQuota2 KEYED BY \\'client key or user name\\' FOR RANDOMIZED INTERVAL 1 HOUR MAX RESULT ROWS 4000, RESULT BYTES 400000, READ ROWS 4000, READ BYTES 400000, EXECUTION TIME 60, FOR INTERVAL 1 MONTH MAX EXECUTION TIME 1800\n" expected_usage = "myQuota key=\\\\'default\\\\' interval=\[.*\] queries=1/1000 errors=0 result_rows=50 result_bytes=200 read_rows=50/1000 read_bytes=200 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) @@ -212,9 +212,9 @@ def test_dcl_management(): assert instance.query("SHOW QUOTAS") == "" assert instance.query("SHOW QUOTA USAGE") == "" - instance.query("CREATE QUOTA qA FOR INTERVAL 15 MONTH SET MAX QUERIES = 123 TO CURRENT_USER") + instance.query("CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER") assert instance.query("SHOW QUOTAS") == "qA\n" - assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 5 QUARTER MAX QUERIES = 123 TO default\n" + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 5 QUARTER MAX QUERIES 123 TO default\n" expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=0/123 errors=0 result_rows=0 result_bytes=0 read_rows=0 read_bytes=0 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) @@ -222,14 +222,14 @@ def test_dcl_management(): expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=1/123 errors=0 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) - instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES = 321, MAX ERRORS = 10, FOR INTERVAL 0.5 HOUR MAX EXECUTION TIME = 0.5") - assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 30 MINUTE MAX EXECUTION TIME = 0.5, FOR INTERVAL 5 QUARTER MAX QUERIES = 321, MAX ERRORS = 10 TO default\n" + instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 321, MAX ERRORS 10, FOR INTERVAL 0.5 HOUR MAX EXECUTION TIME 0.5") + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 30 MINUTE MAX EXECUTION TIME 0.5, FOR INTERVAL 5 QUARTER MAX QUERIES 321, ERRORS 10 TO default\n" expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=0 errors=0 result_rows=0 result_bytes=0 read_rows=0 read_bytes=0 execution_time=.*/0.5\n"\ "qA key=\\\\'\\\\' interval=\[.*\] queries=1/321 errors=0/10 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) - instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH UNSET TRACKING, FOR RANDOMIZED INTERVAL 16 MONTH SET TRACKING, FOR INTERVAL 1800 SECOND UNSET TRACKING") - assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING TO default\n" + instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH NO LIMITS, FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY, FOR INTERVAL 1800 SECOND NO LIMITS") + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY TO default\n" expected_usage = "qA key=\\\\'\\\\' interval=\[.*\] queries=0 errors=0 result_rows=0 result_bytes=0 read_rows=0 read_bytes=0 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) @@ -238,7 +238,7 @@ def test_dcl_management(): assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) instance.query("ALTER QUOTA qA RENAME TO qB") - assert instance.query("SHOW CREATE QUOTA qB") == "CREATE QUOTA qB KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING TO default\n" + assert instance.query("SHOW CREATE QUOTA qB") == "CREATE QUOTA qB KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY TO default\n" expected_usage = "qB key=\\\\'\\\\' interval=\[.*\] queries=1 errors=0 result_rows=50 result_bytes=200 read_rows=50 read_bytes=200 execution_time=.*" assert re.match(expected_usage, instance.query("SHOW QUOTA USAGE")) diff --git a/tests/integration/test_row_policy/configs/users.d/access_management.xml b/tests/integration/test_row_policy/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_row_policy/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 7087e6aafae..3a5b7340528 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -113,6 +113,9 @@ def test_prewhere_not_supported(): assert expected_error in instance.query_and_get_error("SELECT * FROM mydb.filtered_table2 PREWHERE 1") assert expected_error in instance.query_and_get_error("SELECT * FROM mydb.filtered_table3 PREWHERE 1") + # However PREWHERE should still work for user without filtering. + assert instance.query("SELECT * FROM mydb.filtered_table1 PREWHERE 1", user="another") == "0\t0\n0\t1\n1\t0\n1\t1\n" + def test_single_table_name(): copy_policy_xml('tag_with_table_name.xml') diff --git a/tests/integration/test_settings_constraints_distributed/configs/config.d/remote_servers.xml b/tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml similarity index 100% rename from tests/integration/test_settings_constraints_distributed/configs/config.d/remote_servers.xml rename to tests/integration/test_settings_constraints_distributed/configs/remote_servers.xml diff --git a/tests/integration/test_settings_constraints_distributed/configs/users.d/access_management.xml b/tests/integration/test_settings_constraints_distributed/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_settings_constraints_distributed/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_settings_constraints_distributed/test.py b/tests/integration/test_settings_constraints_distributed/test.py index a58c037a2fc..51999902e7d 100644 --- a/tests/integration/test_settings_constraints_distributed/test.py +++ b/tests/integration/test_settings_constraints_distributed/test.py @@ -8,9 +8,9 @@ from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', config_dir="configs") -node2 = cluster.add_instance('node2', config_dir="configs") -distributed = cluster.add_instance('distributed', config_dir="configs") +node1 = cluster.add_instance('node1') +node2 = cluster.add_instance('node2') +distributed = cluster.add_instance('distributed', main_configs=["configs/remote_servers.xml"]) @pytest.fixture(scope="module") diff --git a/tests/integration/test_settings_profile/configs/users.d/access_management.xml b/tests/integration/test_settings_profile/configs/users.d/access_management.xml deleted file mode 100644 index 7e799cb7b10..00000000000 --- a/tests/integration/test_settings_profile/configs/users.d/access_management.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 6866c6b3901..8b9d023d56f 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -2,7 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', config_dir="configs") +instance = cluster.add_instance('instance') @pytest.fixture(scope="module", autouse=True) @@ -31,22 +31,26 @@ def reset_after_test(): def test_settings_profile(): # Set settings and constraints via CREATE SETTINGS PROFILE ... TO user instance.query("CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n" assert "Setting max_memory_usage shouldn't be less than 90000000" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin") instance.query("ALTER SETTINGS PROFILE xyz TO NONE") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n" instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") # Set settings and constraints via CREATE USER ... SETTINGS PROFILE instance.query("ALTER USER robin SETTINGS PROFILE xyz") + assert instance.query("SHOW CREATE USER robin") == "CREATE USER robin SETTINGS PROFILE xyz\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n" assert "Setting max_memory_usage shouldn't be less than 90000000" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin") instance.query("ALTER USER robin SETTINGS NONE") + assert instance.query("SHOW CREATE USER robin") == "CREATE USER robin\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n" instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") @@ -57,6 +61,8 @@ def test_settings_profile_from_granted_role(): instance.query("CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000") instance.query("CREATE ROLE worker SETTINGS PROFILE xyz") instance.query("GRANT worker TO robin") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000\n" + assert instance.query("SHOW CREATE ROLE worker") == "CREATE ROLE worker SETTINGS PROFILE xyz\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n" assert "Setting max_memory_usage shouldn't be less than 90000000" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin") @@ -68,17 +74,20 @@ def test_settings_profile_from_granted_role(): instance.query("ALTER ROLE worker SETTINGS NONE") instance.query("GRANT worker TO robin") + assert instance.query("SHOW CREATE ROLE worker") == "CREATE ROLE worker\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n" instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") # Set settings and constraints via CREATE SETTINGS PROFILE ... TO granted role instance.query("ALTER SETTINGS PROFILE xyz TO worker") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO worker\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000001\n" assert "Setting max_memory_usage shouldn't be less than 90000000" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") assert "Setting max_memory_usage shouldn't be greater than 110000000" in instance.query_and_get_error("SET max_memory_usage = 120000000", user="robin") instance.query("ALTER SETTINGS PROFILE xyz TO NONE") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "10000000000\n" instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") @@ -87,6 +96,8 @@ def test_settings_profile_from_granted_role(): def test_inheritance_of_settings_profile(): instance.query("CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000002 READONLY") instance.query("CREATE SETTINGS PROFILE alpha SETTINGS PROFILE xyz TO robin") + assert instance.query("SHOW CREATE SETTINGS PROFILE xyz") == "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000002 READONLY\n" + assert instance.query("SHOW CREATE SETTINGS PROFILE alpha") == "CREATE SETTINGS PROFILE alpha SETTINGS INHERIT xyz TO robin\n" assert instance.query("SELECT value FROM system.settings WHERE name = 'max_memory_usage'", user="robin") == "100000002\n" assert "Setting max_memory_usage should not be changed" in instance.query_and_get_error("SET max_memory_usage = 80000000", user="robin") diff --git a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.reference b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.reference index e69de29bb2d..573541ac970 100644 --- a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.reference +++ b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql index 6b23c72981a..33f16eb241c 100644 --- a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql +++ b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql @@ -4,6 +4,6 @@ DROP TABLE IF EXISTS underlying_00967; CREATE TABLE dist_00967 (key UInt64) Engine=Distributed('test_shard_localhost', currentDatabase(), underlying_00967); -- fails for TinyLog()/MergeTree()/... but not for Memory() CREATE TABLE underlying_00967 (key Nullable(UInt64)) Engine=TinyLog(); -INSERT INTO dist_00967 SELECT toUInt64(number) FROM system.numbers LIMIT 1; -- { serverError 171; } +INSERT INTO dist_00967 SELECT toUInt64(number) FROM system.numbers LIMIT 1; SELECT * FROM dist_00967; diff --git a/tests/queries/0_stateless/01033_quota_dcl.reference b/tests/queries/0_stateless/01033_quota_dcl.reference index 7f92f992dd5..7bd2d2923d2 100644 --- a/tests/queries/0_stateless/01033_quota_dcl.reference +++ b/tests/queries/0_stateless/01033_quota_dcl.reference @@ -1,2 +1,2 @@ default -CREATE QUOTA default KEYED BY \'user name\' FOR INTERVAL 1 HOUR TRACKING TO default, readonly +CREATE QUOTA default KEYED BY \'user name\' FOR INTERVAL 1 HOUR TRACKING ONLY TO default, readonly diff --git a/tests/queries/0_stateless/01075_allowed_client_hosts.reference b/tests/queries/0_stateless/01075_allowed_client_hosts.reference index 0082653059c..73f54c6027a 100644 --- a/tests/queries/0_stateless/01075_allowed_client_hosts.reference +++ b/tests/queries/0_stateless/01075_allowed_client_hosts.reference @@ -8,10 +8,10 @@ CREATE USER test_user_01075 HOST LOCAL, IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765 CREATE USER test_user_01075 HOST LOCAL CREATE USER test_user_01075 HOST NONE CREATE USER test_user_01075 HOST LIKE \'@.somesite.com\' -CREATE USER test_user_01075 HOST NAME REGEXP \'.*.anothersite.com\' -CREATE USER test_user_01075 HOST NAME REGEXP \'.*.anothersite.com\', \'.*.anothersite.org\' -CREATE USER test_user_01075 HOST NAME REGEXP \'.*.anothersite2.com\', \'.*.anothersite2.org\' -CREATE USER test_user_01075 HOST NAME REGEXP \'.*.anothersite3.com\', \'.*.anothersite3.org\' +CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite.com\' +CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite.com\', \'.*.anothersite.org\' +CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite2.com\', \'.*.anothersite2.org\' +CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite3.com\', \'.*.anothersite3.org\' CREATE USER `test_user_01075_x@localhost` HOST LOCAL CREATE USER test_user_01075_x CREATE USER `test_user_01075_x@192.168.23.15` HOST LIKE \'192.168.23.15\' diff --git a/tests/queries/0_stateless/01075_allowed_client_hosts.sql b/tests/queries/0_stateless/01075_allowed_client_hosts.sql index e0b1c0f9905..2960a93f0f2 100644 --- a/tests/queries/0_stateless/01075_allowed_client_hosts.sql +++ b/tests/queries/0_stateless/01075_allowed_client_hosts.sql @@ -30,16 +30,16 @@ SHOW CREATE USER test_user_01075; ALTER USER test_user_01075 HOST LIKE '@.somesite.com'; SHOW CREATE USER test_user_01075; -ALTER USER test_user_01075 HOST NAME REGEXP '.*\.anothersite\.com'; +ALTER USER test_user_01075 HOST REGEXP '.*\.anothersite\.com'; SHOW CREATE USER test_user_01075; -ALTER USER test_user_01075 HOST NAME REGEXP '.*\.anothersite\.com', '.*\.anothersite\.org'; +ALTER USER test_user_01075 HOST REGEXP '.*\.anothersite\.com', '.*\.anothersite\.org'; SHOW CREATE USER test_user_01075; -ALTER USER test_user_01075 HOST NAME REGEXP '.*\.anothersite2\.com', NAME REGEXP '.*\.anothersite2\.org'; +ALTER USER test_user_01075 HOST REGEXP '.*\.anothersite2\.com', REGEXP '.*\.anothersite2\.org'; SHOW CREATE USER test_user_01075; -ALTER USER test_user_01075 HOST NAME REGEXP '.*\.anothersite3\.com' HOST NAME REGEXP '.*\.anothersite3\.org'; +ALTER USER test_user_01075 HOST REGEXP '.*\.anothersite3\.com' HOST REGEXP '.*\.anothersite3\.org'; SHOW CREATE USER test_user_01075; DROP USER test_user_01075; diff --git a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.reference b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql new file mode 100644 index 00000000000..af88c5af53a --- /dev/null +++ b/tests/queries/0_stateless/01114_mysql_database_engine_segfault.sql @@ -0,0 +1 @@ +CREATE DATABASE conv_main ENGINE = MySQL('127.0.0.1:3456', conv_main, 'metrika', 'password'); -- { serverError 501 } diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference b/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference index 2fc36ed5c97..45a4fb75db8 100644 --- a/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference +++ b/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference @@ -1,10 +1 @@ 8 -8 -8 -8 -8 -8 -8 -8 -8 -8 diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh b/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh index 5f7b76c0e99..25a8cdb12ea 100755 --- a/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh +++ b/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh @@ -18,7 +18,7 @@ echo " insert into tableB select number, number % 100000, addDays(toDate('2020-01-01'), number % 90) from numbers(50000000); " | $CLICKHOUSE_CLIENT -n -for i in {1..10}; do echo " +for i in {1..1}; do echo " SELECT tableName FROM ( diff --git a/tests/queries/0_stateless/01234_to_string_monotonic.reference b/tests/queries/0_stateless/01234_to_string_monotonic.reference new file mode 100644 index 00000000000..75404a347a4 --- /dev/null +++ b/tests/queries/0_stateless/01234_to_string_monotonic.reference @@ -0,0 +1,2 @@ +1234 +1234 diff --git a/tests/queries/0_stateless/01234_to_string_monotonic.sql b/tests/queries/0_stateless/01234_to_string_monotonic.sql new file mode 100644 index 00000000000..87324fdda27 --- /dev/null +++ b/tests/queries/0_stateless/01234_to_string_monotonic.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; + +CREATE TABLE test1 (s String) ENGINE = MergeTree ORDER BY s SETTINGS index_granularity = 1; +CREATE TABLE test2 (s LowCardinality(String)) ENGINE = MergeTree ORDER BY s SETTINGS index_granularity = 1; + +INSERT INTO test1 SELECT toString(number) FROM numbers(10000); +INSERT INTO test2 SELECT toString(number) FROM numbers(10000); + +SELECT s FROM test1 WHERE toString(s) = '1234' SETTINGS max_rows_to_read = 2; +SELECT s FROM test2 WHERE toString(s) = '1234' SETTINGS max_rows_to_read = 2; + +DROP TABLE test1; +DROP TABLE test2; diff --git a/website/benchmark_hardware.html b/website/benchmark_hardware.html index ab75e7ca063..64eb576cc66 100644 --- a/website/benchmark_hardware.html +++ b/website/benchmark_hardware.html @@ -2427,6 +2427,57 @@ var results = [0.011, 0.007, 0.007] ] }, + + { + "system": "AMD EPYC 7702, 256 cores, 512 GiB, NVMe SSD, version 19.16", + "time": "2020-04-09 00:00:00", + "result": + [ +[0.103, 0.038, 0.043], +[0.072, 0.042, 0.044], +[0.118, 0.051, 0.057], +[0.222, 0.054, 0.051], +[0.339, 0.193, 0.215], +[0.376, 0.189, 0.175], +[0.114, 0.040, 0.052], +[0.085, 0.055, 0.049], +[0.354, 0.180, 0.168], +[0.372, 0.172, 0.161], +[0.276, 0.105, 0.100], +[0.259, 0.110, 0.115], +[0.399, 0.222, 0.207], +[0.586, 0.261, 0.262], +[0.394, 0.251, 0.228], +[0.350, 0.194, 0.189], +[0.705, 0.468, 0.462], +[0.653, 0.368, 0.381], +[1.285, 0.826, 0.922], +[0.223, 0.032, 0.036], +[1.690, 0.186, 0.178], +[1.916, 0.231, 0.189], +[3.551, 0.602, 0.595], +[3.198, 0.607, 0.478], +[0.530, 0.143, 0.138], +[0.311, 0.079, 0.090], +[0.554, 0.137, 0.134], +[1.775, 0.305, 0.293], +[1.480, 0.257, 0.276], +[0.864, 0.838, 0.795], +[0.529, 0.183, 0.177], +[1.051, 0.226, 0.230], +[1.719, 1.074, 1.075], +[2.134, 0.856, 0.873], +[2.123, 0.829, 0.846], +[0.380, 0.285, 0.280], +[0.193, 0.187, 0.183], +[0.080, 0.080, 0.080], +[0.077, 0.066, 0.068], +[0.432, 0.405, 0.444], +[0.050, 0.038, 0.037], +[0.032, 0.028, 0.025], +[0.010, 0.010, 0.008] + ] + }, ]; @@ -2862,6 +2913,7 @@ Results for Pinebook Pro are from Aleksey R. @kITerE.
Results for AMD Ryzen are from Alexey Milovidov. Firefox was running in background.
Results for Azure E32s are from Piotr Maśko.
Results for MacBook Pro are from Denis Glazachev. MacOS Catalina Version 10.15.4 (19E266). For "drop caches", the "Free Up RAM" in CleanMyMac is used.
+Results for AMD EPYC 7702 are from Peng Gao in sina.com.
Xeon Gold 6230 server is using 4 x SAMSUNG datacenter class SSD in RAID-10.
Results for Yandex Managed ClickHouse for "cold cache" are biased and should not be compared, because cache was not flushed for every next query.