diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index f25bcdb91e1..39506186732 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -962,7 +962,7 @@ void BaseDaemon::setupWatchdog() if (WIFEXITED(status)) { logger().information(fmt::format("Child process exited normally with code {}.", WEXITSTATUS(status))); - _exit(status); + _exit(WEXITSTATUS(status)); } if (WIFSIGNALED(status)) @@ -980,7 +980,7 @@ void BaseDaemon::setupWatchdog() logger().fatal(fmt::format("Child process was terminated by signal {}.", sig)); if (sig == SIGINT || sig == SIGTERM || sig == SIGQUIT) - _exit(status); + _exit(128 + sig); } } else diff --git a/docker/test/coverage/run.sh b/docker/test/coverage/run.sh index e2369a28a9a..11b6ce13ea1 100755 --- a/docker/test/coverage/run.sh +++ b/docker/test/coverage/run.sh @@ -102,11 +102,11 @@ else echo "No failed tests" fi -mkdir -p $COVERAGE_DIR -mv /*.profraw $COVERAGE_DIR +mkdir -p "$COVERAGE_DIR" +mv /*.profraw "$COVERAGE_DIR" -mkdir -p $SOURCE_DIR/obj-x86_64-linux-gnu -cd $SOURCE_DIR/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd / -llvm-profdata-11 merge -sparse ${COVERAGE_DIR}/* -o clickhouse.profdata -llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex $IGNORE > output.lcov -genhtml output.lcov --ignore-errors source --output-directory ${OUTPUT_DIR} +mkdir -p "$SOURCE_DIR"/obj-x86_64-linux-gnu +cd "$SOURCE_DIR"/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd / +llvm-profdata-11 merge -sparse "${COVERAGE_DIR}"/* -o clickhouse.profdata +llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex "$IGNORE" > output.lcov +genhtml output.lcov --ignore-errors source --output-directory "${OUTPUT_DIR}" diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index a918cc44420..c782ac49d27 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -65,7 +65,7 @@ function start_server { set -m # Spawn server in its own process groups local opts=( - --config-file="$FASTTEST_DATA/config.xml" + --config-file "$FASTTEST_DATA/config.xml" -- --path "$FASTTEST_DATA" --user_files_path "$FASTTEST_DATA/user_files" diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index e6e987e1d94..309328bc8e2 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -55,9 +55,9 @@ function run_tests() ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip') fi - for i in $(seq 1 $NUM_TRIES); do + for _ in $(seq 1 "$NUM_TRIES"); do clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt - if [ ${PIPESTATUS[0]} -ne "0" ]; then + if [ "${PIPESTATUS[0]}" -ne "0" ]; then break; fi done @@ -65,4 +65,4 @@ function run_tests() export -f run_tests -timeout $MAX_RUN_TIME bash -c run_tests ||: +timeout "$MAX_RUN_TIME" bash -c run_tests ||: diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 239a074969c..f7555231ffb 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -8,4 +8,5 @@ CMD cd /ClickHouse/utils/check-style && \ ./check-style -n | tee /test_output/style_output.txt && \ ./check-typos | tee /test_output/typos_output.txt && \ ./check-whitespaces -n | tee /test_output/whitespaces_output.txt && \ - ./check-duplicate-includes.sh | tee /test_output/duplicate_output.txt + ./check-duplicate-includes.sh | tee /test_output/duplicate_output.txt && \ + ./shellcheck-run.sh | tee /test_output/shellcheck_output.txt diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 71788e9eff4..e5f836e3b5a 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -726,7 +726,7 @@ log_queries=1 ## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms} -Minimal time for the query to run to get to the following tables: +If enabled (non-zero), queries faster then the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables: - `system.query_log` - `system.query_thread_log` @@ -2470,6 +2470,31 @@ Possible values: Default value: `0`. +## union_default_mode {#union-default-mode} + +Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`. + +Possible values: + +- `'DISTINCT'` — ClickHouse outputs rows as a result of combining queries removing duplicate rows. +- `'ALL'` — ClickHouse outputs all rows as a result of combining queries including duplicate rows. +- `''` — Clickhouse generates an exception when used with `UNION`. + +Default value: `''`. + +See examples in [UNION](../../sql-reference/statements/select/union.md). + +## data_type_default_nullable {#data_type_default_nullable} + +Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable). + +Possible values: + +- 1 — The data types in column definitions are set to `Nullable` by default. +- 0 — The data types in column definitions are set to not `Nullable` by default. + +Default value: `0`. + ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold} Enables special logic to perform merges on replicas. diff --git a/docs/en/operations/system-tables/errors.md b/docs/en/operations/system-tables/errors.md index 53e8a397217..ec874efd711 100644 --- a/docs/en/operations/system-tables/errors.md +++ b/docs/en/operations/system-tables/errors.md @@ -1,12 +1,12 @@ # system.errors {#system_tables-errors} -Contains error codes with number of times they have been triggered. +Contains error codes with the number of times they have been triggered. Columns: - `name` ([String](../../sql-reference/data-types/string.md)) — name of the error (`errorCodeToName`). - `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — code number of the error. -- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) - number of times this error has been happened. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened. **Example** diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 3594737c18a..9394426b20b 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -157,14 +157,14 @@ Levels are the same as in URLHierarchy. This function is specific to Yandex.Metr ## farmHash64 {#farmhash64} -Produces a 64-bit [FarmHash](https://github.com/google/farmhash) or Fingerprint value. Prefer `farmFingerprint64` for a stable and portable value. +Produces a 64-bit [FarmHash](https://github.com/google/farmhash) or Fingerprint value. `farmFingerprint64` is preferred for a stable and portable value. ``` sql farmFingerprint64(par1, ...) farmHash64(par1, ...) ``` -These functions use the `Fingerprint64` and `Hash64` method respectively from all [available methods](https://github.com/google/farmhash/blob/master/src/farmhash.h). +These functions use the `Fingerprint64` and `Hash64` methods respectively from all [available methods](https://github.com/google/farmhash/blob/master/src/farmhash.h). **Parameters** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a846a01f11f..83f2705693a 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -558,4 +558,46 @@ Result: └─────┘ ``` +## encodeXMLComponent {#encode-xml-component} + +Escapes characters to place string into XML text node or attribute. + +The following five XML predefined entities will be replaced: `<`, `&`, `>`, `"`, `'`. + +**Syntax** + +``` sql +encodeXMLComponent(x) +``` + +**Parameters** + +- `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md). + +**Returned value(s)** + +- The sequence of characters with escape characters. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT encodeXMLComponent('Hello, "world"!'); +SELECT encodeXMLComponent('<123>'); +SELECT encodeXMLComponent('&clickhouse'); +SELECT encodeXMLComponent('\'foo\''); +``` + +Result: + +``` text +Hello, "world"! +<123> +&clickhouse +'foo' +``` + [Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 82ef5610868..4036974dd37 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -400,7 +400,8 @@ Result: └──────────────────────────────────────────────────────────────────────────────────────────┘ ``` -**See also** +**See Also** + - [extractAllGroupsVertical](#extractallgroups-vertical) ## extractAllGroupsVertical {#extractallgroups-vertical} @@ -440,7 +441,8 @@ Result: └────────────────────────────────────────────────────────────────────────────────────────┘ ``` -**See also** +**See Also** + - [extractAllGroupsHorizontal](#extractallgroups-horizontal) ## like(haystack, pattern), haystack LIKE pattern operator {#function-like} @@ -726,4 +728,51 @@ Result: Returns the number of regular expression matches for a `pattern` in a `haystack`. +**Syntax** + +``` sql +countMatches(haystack, pattern) +``` + +**Parameters** + +- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- The number of matches. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Examples** + +Query: + +``` sql +SELECT countMatches('foobar.com', 'o+'); +``` + +Result: + +``` text +┌─countMatches('foobar.com', 'o+')─┐ +│ 2 │ +└──────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT countMatches('aaaa', 'aa'); +``` + +Result: + +``` text +┌─countMatches('aaaa', 'aa')────┐ +│ 2 │ +└───────────────────────────────┘ +``` + [Original article](https://clickhouse.tech/docs/en/query_language/functions/string_search_functions/) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index e9952fc76fd..b1a5fdb19b5 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -16,8 +16,8 @@ By default, tables are created only on the current server. Distributed DDL queri ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec] [TTL expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [compression_codec] [TTL expr2], + name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec] [TTL expr1], + name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|ALIAS expr2] [compression_codec] [TTL expr2], ... ) ENGINE = engine ``` @@ -57,6 +57,14 @@ In all cases, if `IF NOT EXISTS` is specified, the query won’t return an error There can be other clauses after the `ENGINE` clause in the query. See detailed documentation on how to create tables in the descriptions of [table engines](../../../engines/table-engines/index.md#table_engines). +## NULL Or NOT NULL Modifiers {#null-modifiers} + +`NULL` and `NOT NULL` modifiers after data type in column definition allow or do not allow it to be [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable). + +If the type is not `Nullable` and if `NULL` is specified, it will be treated as `Nullable`; if `NOT NULL` is specified, then no. For example, `INT NULL` is the same as `Nullable(INT)`. If the type is `Nullable` and `NULL` or `NOT NULL` modifiers are specified, the exception will be thrown. + +See also [data_type_default_nullable](../../../operations/settings/settings.md#data_type_default_nullable) setting. + ## Default Values {#create-default-values} The column description can specify an expression for a default value, in one of the following ways: `DEFAULT expr`, `MATERIALIZED expr`, `ALIAS expr`. diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 60c769c4660..ed69198ed4d 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -46,7 +46,7 @@ Specifics of each optional clause are covered in separate sections, which are li - [SELECT clause](#select-clause) - [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) - [LIMIT clause](../../../sql-reference/statements/select/limit.md) -- [UNION clause](../../../sql-reference/statements/select/union-all.md) +- [UNION clause](../../../sql-reference/statements/select/union.md) - [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md) - [FORMAT clause](../../../sql-reference/statements/select/format.md) diff --git a/docs/en/sql-reference/statements/select/union-all.md b/docs/en/sql-reference/statements/select/union-all.md deleted file mode 100644 index f150efbdc80..00000000000 --- a/docs/en/sql-reference/statements/select/union-all.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -toc_title: UNION ---- - -# UNION ALL Clause {#union-all-clause} - -You can use `UNION ALL` to combine any number of `SELECT` queries by extending their results. Example: - -``` sql -SELECT CounterID, 1 AS table, toInt64(count()) AS c - FROM test.hits - GROUP BY CounterID - -UNION ALL - -SELECT CounterID, 2 AS table, sum(Sign) AS c - FROM test.visits - GROUP BY CounterID - HAVING c > 0 -``` - -Result columns are matched by their index (order inside `SELECT`). If column names do not match, names for the final result are taken from the first query. - -Type casting is performed for unions. For example, if two queries being combined have the same field with non-`Nullable` and `Nullable` types from a compatible type, the resulting `UNION ALL` has a `Nullable` type field. - -Queries that are parts of `UNION ALL` can’t be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION ALL` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. - -# UNION DISTINCT Clause {#union-distinct-clause} -The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`. - -# UNION Clause {#union-clause} -By default, `UNION` has the same behavior as `UNION DISTINCT`, but you can specify union mode by setting `union_default_mode`, values can be 'ALL', 'DISTINCT' or empty string. However, if you use `UNION` with setting `union_default_mode` to empty string, it will throw an exception. - - -## Implementation Details {#implementation-details} - -Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneously, and their results can be mixed together. diff --git a/docs/en/sql-reference/statements/select/union.md b/docs/en/sql-reference/statements/select/union.md new file mode 100644 index 00000000000..cf18ff7a4a2 --- /dev/null +++ b/docs/en/sql-reference/statements/select/union.md @@ -0,0 +1,81 @@ +--- +toc_title: UNION +--- + +# UNION Clause {#union-clause} + +You can use `UNION` with explicitly specifying `UNION ALL` or `UNION DISTINCT`. + +If you don't specify `ALL` or `DISTINCT`, it will depend on the `union_default_mode` setting. The difference between `UNION ALL` and `UNION DISTINCT` is that `UNION DISTINCT` will do a distinct transform for union result, it is equivalent to `SELECT DISTINCT` from a subquery containing `UNION ALL`. + +You can use `UNION` to combine any number of `SELECT` queries by extending their results. Example: + +``` sql +SELECT CounterID, 1 AS table, toInt64(count()) AS c + FROM test.hits + GROUP BY CounterID + +UNION ALL + +SELECT CounterID, 2 AS table, sum(Sign) AS c + FROM test.visits + GROUP BY CounterID + HAVING c > 0 +``` + +Result columns are matched by their index (order inside `SELECT`). If column names do not match, names for the final result are taken from the first query. + +Type casting is performed for unions. For example, if two queries being combined have the same field with non-`Nullable` and `Nullable` types from a compatible type, the resulting `UNION` has a `Nullable` type field. + +Queries that are parts of `UNION` can be enclosed in round brackets. [ORDER BY](../../../sql-reference/statements/select/order-by.md) and [LIMIT](../../../sql-reference/statements/select/limit.md) are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with `UNION` in a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. + +If you use `UNION` without explicitly specifying `UNION ALL` or `UNION DISTINCT`, you can specify the union mode using the [union_default_mode](../../../operations/settings/settings.md#union-default-mode) setting. The setting values can be `ALL`, `DISTINCT` or an empty string. However, if you use `UNION` with `union_default_mode` setting to empty string, it will throw an exception. The following examples demonstrate the results of queries with different values setting. + +Query: + +```sql +SET union_default_mode = 'DISTINCT'; +SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 2; +``` + +Result: + +```text +┌─1─┐ +│ 1 │ +└───┘ +┌─1─┐ +│ 2 │ +└───┘ +┌─1─┐ +│ 3 │ +└───┘ +``` + +Query: + +```sql +SET union_default_mode = 'ALL'; +SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 2; +``` + +Result: + +```text +┌─1─┐ +│ 1 │ +└───┘ +┌─1─┐ +│ 2 │ +└───┘ +┌─1─┐ +│ 2 │ +└───┘ +┌─1─┐ +│ 3 │ +└───┘ +``` + +Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneously, and their results can be mixed together. + +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/union/) diff --git a/docs/es/sql-reference/statements/select/index.md b/docs/es/sql-reference/statements/select/index.md index a5ff9820a2b..653f737b1d0 100644 --- a/docs/es/sql-reference/statements/select/index.md +++ b/docs/es/sql-reference/statements/select/index.md @@ -44,7 +44,7 @@ Los detalles de cada cláusula opcional se cubren en secciones separadas, que se - [Cláusula HAVING](having.md) - [Cláusula SELECT](#select-clause) - [Cláusula LIMIT](limit.md) -- [UNION ALL cláusula](union-all.md) +- [UNION ALL cláusula](union.md) ## SELECT Cláusula {#select-clause} diff --git a/docs/es/sql-reference/statements/select/union-all.md b/docs/es/sql-reference/statements/select/union.md similarity index 97% rename from docs/es/sql-reference/statements/select/union-all.md rename to docs/es/sql-reference/statements/select/union.md index b2b45ba770e..d3aec34ba4b 100644 --- a/docs/es/sql-reference/statements/select/union-all.md +++ b/docs/es/sql-reference/statements/select/union.md @@ -3,7 +3,7 @@ machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd --- -# UNION ALL Cláusula {#union-all-clause} +# UNION Cláusula {#union-clause} Usted puede utilizar `UNION ALL` combinar cualquier número de `SELECT` consultas extendiendo sus resultados. Ejemplo: diff --git a/docs/fa/sql-reference/statements/select/index.md b/docs/fa/sql-reference/statements/select/index.md index 2ab3fea2ff1..90541b80636 100644 --- a/docs/fa/sql-reference/statements/select/index.md +++ b/docs/fa/sql-reference/statements/select/index.md @@ -44,7 +44,7 @@ SELECT [DISTINCT] expr_list - [داشتن بند](having.md) - [انتخاب بند](#select-clause) - [بند محدود](limit.md) -- [اتحادیه همه بند](union-all.md) +- [اتحادیه همه بند](union.md) ## انتخاب بند {#select-clause} diff --git a/docs/fa/sql-reference/statements/select/union-all.md b/docs/fa/sql-reference/statements/select/union.md similarity index 97% rename from docs/fa/sql-reference/statements/select/union-all.md rename to docs/fa/sql-reference/statements/select/union.md index 3c4fe5c1546..03d723e2338 100644 --- a/docs/fa/sql-reference/statements/select/union-all.md +++ b/docs/fa/sql-reference/statements/select/union.md @@ -3,7 +3,7 @@ machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd --- -# اتحادیه همه بند {#union-all-clause} +# اتحادیه همه بند {#union-clause} شما می توانید استفاده کنید `UNION ALL` برای ترکیب هر تعداد از `SELECT` نمایش داده شد با گسترش نتایج خود را. مثال: diff --git a/docs/fr/sql-reference/statements/select/index.md b/docs/fr/sql-reference/statements/select/index.md index 5073469e651..1d53ae80eb4 100644 --- a/docs/fr/sql-reference/statements/select/index.md +++ b/docs/fr/sql-reference/statements/select/index.md @@ -44,7 +44,7 @@ Spécificités de chaque clause facultative, sont couverts dans des sections dis - [Clause HAVING](having.md) - [Clause SELECT](#select-clause) - [Clause LIMIT](limit.md) -- [Clause UNION ALL](union-all.md) +- [Clause UNION ALL](union.md) ## Clause SELECT {#select-clause} diff --git a/docs/fr/sql-reference/statements/select/union-all.md b/docs/fr/sql-reference/statements/select/union.md similarity index 97% rename from docs/fr/sql-reference/statements/select/union-all.md rename to docs/fr/sql-reference/statements/select/union.md index 63e9987965f..9ae65ebcf72 100644 --- a/docs/fr/sql-reference/statements/select/union-all.md +++ b/docs/fr/sql-reference/statements/select/union.md @@ -3,7 +3,7 @@ machine_translated: true machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd --- -# Clause UNION ALL {#union-all-clause} +# Clause UNION ALL {#union-clause} Vous pouvez utiliser `UNION ALL` à combiner `SELECT` requêtes en étendant leurs résultats. Exemple: diff --git a/docs/ja/sql-reference/statements/select/union-all.md b/docs/ja/sql-reference/statements/select/union-all.md deleted file mode 120000 index 837caae2698..00000000000 --- a/docs/ja/sql-reference/statements/select/union-all.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/union-all.md \ No newline at end of file diff --git a/docs/ja/sql-reference/statements/select/union.md b/docs/ja/sql-reference/statements/select/union.md new file mode 100644 index 00000000000..0eb8db0be7a --- /dev/null +++ b/docs/ja/sql-reference/statements/select/union.md @@ -0,0 +1 @@ +../../../../en/sql-reference/statements/select/union.md \ No newline at end of file diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 0992e6ce82d..c4f5cdaf2ca 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -408,11 +408,11 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; - `'best_effort'` — включает расширенный парсинг. - ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`. +ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`. - `'basic'` — используется базовый парсер. - ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS`. Например, `'2019-08-20 10:18:56'`. +ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `'2019-08-20 10:18:56'` или `2019-08-20`. Значение по умолчанию: `'basic'`. @@ -691,6 +691,21 @@ ClickHouse использует этот параметр при чтении д log_queries=1 ``` +## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms} + +Минимальное время выполнения запроса для логгирования в системные таблицы: + +- `system.query_log` +- `system.query_thread_log` + +В случае ненулевого порога `log_queries_min_query_duration_ms`, в лог будут записываться лишь события об окончании выполнения запроса: + +- `QUERY_FINISH` +- `EXCEPTION_WHILE_PROCESSING` + +- Тип: milliseconds +- Значение по умолчанию: 0 (логгировать все запросы) + ## log_queries_min_type {#settings-log-queries-min-type} Задаёт минимальный уровень логирования в `query_log`. @@ -2324,6 +2339,20 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow; Значение по умолчанию: `0`. +## union_default_mode {#union-default-mode} + +Устанавливает режим объединения результатов `SELECT` запросов. Настройка используется только при совместном использовании с [UNION](../../sql-reference/statements/select/union.md) без явного указания `UNION ALL` или `UNION DISTINCT`. + +Возможные значения: + +- `'DISTINCT'` — ClickHouse выводит строки в результате объединения результатов запросов, удаляя повторяющиеся строки. +- `'ALL'` — ClickHouse выводит все строки в результате объединения результатов запросов, включая повторяющиеся строки. +- `''` — Clickhouse генерирует исключение при использовании с `UNION`. + +Значение по умолчанию: `''`. + +Смотрите примеры в разделе [UNION](../../sql-reference/statements/select/union.md). + ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold} Включает особую логику выполнения слияний на репликах. diff --git a/docs/ru/operations/system-tables/errors.md b/docs/ru/operations/system-tables/errors.md new file mode 100644 index 00000000000..3a824c8c834 --- /dev/null +++ b/docs/ru/operations/system-tables/errors.md @@ -0,0 +1,23 @@ +# system.errors {#system_tables-errors} + +Содержит коды ошибок с указанием количества срабатываний. + +Столбцы: + +- `name` ([String](../../sql-reference/data-types/string.md)) — название ошибки (`errorCodeToName`). +- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — номер кода ошибки. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество ошибок. + +**Пример** + +``` sql +SELECT * +FROM system.errors +WHERE value > 0 +ORDER BY code ASC +LIMIT 1 + +┌─name─────────────┬─code─┬─value─┐ +│ CANNOT_OPEN_FILE │ 76 │ 1 │ +└──────────────────┴──────┴───────┘ +``` diff --git a/docs/ru/sql-reference/data-types/date.md b/docs/ru/sql-reference/data-types/date.md index 9bcae2c1d72..490bc5c28b4 100644 --- a/docs/ru/sql-reference/data-types/date.md +++ b/docs/ru/sql-reference/data-types/date.md @@ -9,4 +9,39 @@ toc_title: Date Дата хранится без учёта часового пояса. +## Примеры {#examples} + +**1.** Создание таблицы и добавление в неё данных: + +``` sql +CREATE TABLE dt +( + `timestamp` Date, + `event_id` UInt8 +) +ENGINE = TinyLog; +``` + +``` sql +INSERT INTO dt Values (1546300800, 1), ('2019-01-01', 2); +``` + +``` sql +SELECT * FROM dt; +``` + +``` text +┌──timestamp─┬─event_id─┐ +│ 2019-01-01 │ 1 │ +│ 2019-01-01 │ 2 │ +└────────────┴──────────┘ +``` + +## Смотрите также {#see-also} + +- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) +- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) +- [Тип данных `DateTime`](../../sql-reference/data-types/datetime.md) + + [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/date/) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 87c5da68f35..9894fa2802b 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -116,12 +116,14 @@ FROM dt ## See Also {#see-also} -- [Функции преобразования типов](../../sql-reference/data-types/datetime.md) -- [Функции для работы с датой и временем](../../sql-reference/data-types/datetime.md) -- [Функции для работы с массивами](../../sql-reference/data-types/datetime.md) -- [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format) -- [Конфигурационный параметр сервера `timezone`](../../sql-reference/data-types/datetime.md#server_configuration_parameters-timezone) -- [Операторы для работы с датой и временем](../../sql-reference/data-types/datetime.md#operators-datetime) +- [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) +- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) +- [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) +- [Настройка `date_time_input_format`](../../operations/settings/settings/#settings-date_time_input_format) +- [Настройка `date_time_output_format`](../../operations/settings/settings/) +- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) - [Тип данных `Date`](date.md) +- [Тип данных `DateTime64`](datetime64.md) [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/datetime/) diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md index 0a602e44636..6576bf9dc0d 100644 --- a/docs/ru/sql-reference/data-types/datetime64.md +++ b/docs/ru/sql-reference/data-types/datetime64.md @@ -92,11 +92,12 @@ FROM dt ## See Also {#see-also} -- [Функции преобразования типов](../../sql-reference/data-types/datetime64.md) -- [Функции для работы с датой и временем](../../sql-reference/data-types/datetime64.md) -- [Функции для работы с массивами](../../sql-reference/data-types/datetime64.md) +- [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) +- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) +- [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) - [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format) -- [Конфигурационный параметр сервера `timezone`](../../sql-reference/data-types/datetime64.md#server_configuration_parameters-timezone) -- [Операторы для работы с датой и временем](../../sql-reference/data-types/datetime64.md#operators-datetime) +- [Настройка `date_time_output_format`](../../operations/settings/settings.md) +- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) - [Тип данных `Date`](date.md) - [Тип данных `DateTime`](datetime.md) diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md index 92fc69227f4..f7820889ea9 100644 --- a/docs/ru/sql-reference/functions/hash-functions.md +++ b/docs/ru/sql-reference/functions/hash-functions.md @@ -153,15 +153,18 @@ SELECT groupBitXor(cityHash64(*)) FROM table `URLHash(s, N)` - вычислить хэш от строки до N-го уровня в иерархии URL, без одного завершающего символа `/`, `?` или `#` на конце, если там такой есть. Уровни аналогичные URLHierarchy. Функция специфична для Яндекс.Метрики. +## farmFingerprint64 {#farmfingerprint64} + ## farmHash64 {#farmhash64} -Генерирует 64-х битное значение [FarmHash](https://github.com/google/farmhash). +Создает 64-битное значение [FarmHash](https://github.com/google/farmhash), независимое от платформы (архитектуры сервера), что важно, если значения сохраняются или используются для разбиения данных на группы. ``` sql +farmFingerprint64(par1, ...) farmHash64(par1, ...) ``` -Из всех [доступных методов](https://github.com/google/farmhash/blob/master/src/farmhash.h) функция использует `Hash64`. +Эти функции используют методы `Fingerprint64` и `Hash64` из всех [доступных методов](https://github.com/google/farmhash/blob/master/src/farmhash.h). **Параметры** diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index e2dd667fc04..68afb3e24ce 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -1686,6 +1686,26 @@ SELECT countDigits(toDecimal32(1, 9)), countDigits(toDecimal32(-1, 9)), 10 10 19 19 39 39 ``` +## errorCodeToName {#error-code-to-name} + +**Возвращаемое значение** + +- Название переменной для кода ошибки. + +Тип: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). + +**Синтаксис** + +``` sql +errorCodeToName(1) +``` + +Результат: + +``` text +UNSUPPORTED_METHOD +``` + ## tcpPort {#tcpPort} Вовращает номер TCP порта, который использует сервер для [нативного протокола](../../interfaces/tcp.md). diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index e4d9316cbf3..e8cbb8deec4 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -521,6 +521,57 @@ SELECT * FROM Months WHERE ilike(name, '%j%') !!! note "Примечание" Для случая UTF-8 мы используем триграммное расстояние. Вычисление n-граммного расстояния не совсем честное. Мы используем 2-х байтные хэши для хэширования n-грамм, а затем вычисляем (не)симметрическую разность между хэш таблицами – могут возникнуть коллизии. В формате UTF-8 без учета регистра мы не используем честную функцию `tolower` – мы обнуляем 5-й бит (нумерация с нуля) каждого байта кодовой точки, а также первый бит нулевого байта, если байтов больше 1 – это работает для латиницы и почти для всех кириллических букв. + +## countMatches(haystack, pattern) {#countmatcheshaystack-pattern} + +Возвращает количество совпадений, найденных в строке `haystack`, для регулярного выражения `pattern`. + +**Синтаксис** + +``` sql +countMatches(haystack, pattern) +``` + +**Параметры** + +- `haystack` — строка, по которой выполняется поиск. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `pattern` — регулярное выражение, построенное по синтаксическим правилам [re2](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Количество совпадений. + +Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Примеры** + +Запрос: + +``` sql +SELECT countMatches('foobar.com', 'o+'); +``` + +Результат: + +``` text +┌─countMatches('foobar.com', 'o+')─┐ +│ 2 │ +└──────────────────────────────────┘ +``` + +Запрос: + +``` sql +SELECT countMatches('aaaa', 'aa'); +``` + +Результат: + +``` text +┌─countMatches('aaaa', 'aa')────┐ +│ 2 │ +└───────────────────────────────┘ +``` ## countSubstrings {#countSubstrings} diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index c2e05f05079..bf4ae44a6f1 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -44,7 +44,7 @@ SELECT [DISTINCT] expr_list - [Секция SELECT](#select-clause) - [Секция DISTINCT](distinct.md) - [Секция LIMIT](limit.md) -- [Секция UNION ALL](union-all.md) +- [Секция UNION ALL](union.md) - [Секция INTO OUTFILE](into-outfile.md) - [Секция FORMAT](format.md) diff --git a/docs/ru/sql-reference/statements/select/union-all.md b/docs/ru/sql-reference/statements/select/union-all.md deleted file mode 100644 index b9d1f485a7b..00000000000 --- a/docs/ru/sql-reference/statements/select/union-all.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -toc_title: UNION ALL ---- - -# Секция UNION ALL {#union-all-clause} - -Вы можете использовать `UNION ALL` чтобы объединить любое количество `SELECT` запросы путем расширения их результатов. Пример: - -``` sql -SELECT CounterID, 1 AS table, toInt64(count()) AS c - FROM test.hits - GROUP BY CounterID - -UNION ALL - -SELECT CounterID, 2 AS table, sum(Sign) AS c - FROM test.visits - GROUP BY CounterID - HAVING c > 0 -``` - -Результирующие столбцы сопоставляются по их индексу (порядку внутри `SELECT`). Если имена столбцов не совпадают, то имена для конечного результата берутся из первого запроса. - -При объединении выполняет приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION ALL` данные будут иметь `Nullable` тип. - -Запросы, которые являются частью `UNION ALL` не могут быть заключен в круглые скобки. [ORDER BY](order-by.md) и [LIMIT](limit.md) применяются к отдельным запросам, а не к конечному результату. Если вам нужно применить преобразование к конечному результату, вы можете разместить все объединенные с помощью `UNION ALL` запросы в подзапрос в секции [FROM](from.md). - -## Ограничения {#limitations} - -Поддерживается только `UNION ALL`. Обычный `UNION` (`UNION DISTINCT`) не поддерживается. Если вам это нужно `UNION DISTINCT`, вы можете написать `SELECT DISTINCT` из подзапроса, содержащего `UNION ALL`. - -## Детали реализации {#implementation-details} - -Запросы, которые являются частью `UNION ALL` выполняются параллельно, и их результаты могут быть смешаны вместе. diff --git a/docs/ru/sql-reference/statements/select/union.md b/docs/ru/sql-reference/statements/select/union.md new file mode 100644 index 00000000000..8f1dc11c802 --- /dev/null +++ b/docs/ru/sql-reference/statements/select/union.md @@ -0,0 +1,81 @@ +--- +toc_title: UNION +--- + +# Секция UNION {#union-clause} + +Вы можете использовать `UNION` в двух режимах: `UNION ALL` или `UNION DISTINCT`. + +Если `UNION` используется без указания `ALL` или `DISTINCT`, то его поведение определяется настройкой `union_default_mode`. Разница между `UNION ALL` и `UNION DISTINCT` в том, что `UNION DISTINCT` выполняет явное преобразование для результата объединения. Это равнозначно выражению `SELECT DISTINCT` из подзапроса, содержащего `UNION ALL`. + +Чтобы объединить любое количество `SELECT` запросов путем объединения их результатов, вы можете использовать `UNION`. Пример: + +``` sql +SELECT CounterID, 1 AS table, toInt64(count()) AS c + FROM test.hits + GROUP BY CounterID + +UNION ALL + +SELECT CounterID, 2 AS table, sum(Sign) AS c + FROM test.visits + GROUP BY CounterID + HAVING c > 0 +``` + +Результирующие столбцы сопоставляются по их индексу (порядку внутри `SELECT`). Если имена столбцов не совпадают, то имена для конечного результата берутся из первого запроса. + +При объединении выполняет приведение типов. Например, если два запроса имеют одно и то же поле с не-`Nullable` и `Nullable` совместимыми типами, полученные в результате `UNION` данные будут иметь `Nullable` тип. + +Запросы, которые являются частью `UNION`, могут быть заключены в круглые скобки. [ORDER BY](order-by.md) и [LIMIT](limit.md) применяются к отдельным запросам, а не к конечному результату. Если вам нужно применить преобразование к конечному результату, вы можете разместить все объединенные с помощью `UNION` запросы в подзапрос в секции [FROM](from.md). + +Если используете `UNION` без явного указания `UNION ALL` или `UNION DISTINCT`, то вы можете указать режим объединения с помощью настройки [union_default_mode](../../../operations/settings/settings.md#union-default-mode), значениями которой могут быть `ALL`, `DISTINCT` или пустая строка. Однако если вы используете `UNION` с настройкой `union_default_mode`, значением которой является пустая строка, то будет сгенерировано исключение. В следующих примерах продемонстрированы результаты запросов при разных значениях настройки. + +Запрос: + +```sql +SET union_default_mode = 'DISTINCT'; +SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 2; +``` + +Результат: + +```text +┌─1─┐ +│ 1 │ +└───┘ +┌─1─┐ +│ 2 │ +└───┘ +┌─1─┐ +│ 3 │ +└───┘ +``` + +Запрос: + +```sql +SET union_default_mode = 'ALL'; +SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 2; +``` + +Результат: + +```text +┌─1─┐ +│ 1 │ +└───┘ +┌─1─┐ +│ 2 │ +└───┘ +┌─1─┐ +│ 2 │ +└───┘ +┌─1─┐ +│ 3 │ +└───┘ +``` + +Запросы, которые являются частью `UNION/UNION ALL/UNION DISTINCT`, выполняются параллельно, и их результаты могут быть смешаны вместе. + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/union/) diff --git a/docs/tr/sql-reference/statements/select/union-all.md b/docs/tr/sql-reference/statements/select/union-all.md deleted file mode 120000 index 837caae2698..00000000000 --- a/docs/tr/sql-reference/statements/select/union-all.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/select/union-all.md \ No newline at end of file diff --git a/docs/tr/sql-reference/statements/select/union.md b/docs/tr/sql-reference/statements/select/union.md new file mode 100644 index 00000000000..0eb8db0be7a --- /dev/null +++ b/docs/tr/sql-reference/statements/select/union.md @@ -0,0 +1 @@ +../../../../en/sql-reference/statements/select/union.md \ No newline at end of file diff --git a/docs/zh/interfaces/formats.md b/docs/zh/interfaces/formats.md index 58d06916ed8..b37ef559aa7 100644 --- a/docs/zh/interfaces/formats.md +++ b/docs/zh/interfaces/formats.md @@ -1,179 +1,237 @@ -# 输入输出格式 {#formats} +--- +toc_priority: 21 +toc_title: 输入/输出格式 +--- -ClickHouse 可以接受多种数据格式,可以在 (`INSERT`) 以及 (`SELECT`) 请求中使用。 +# 输入/输出格式 {#formats} -下列表格列出了支持的数据格式以及在 (`INSERT`) 以及 (`SELECT`) 请求中使用它们的方式。 +ClickHouse可以接受和返回各种格式的数据。输入支持的格式可以用来解析提供给`INSERT`的数据,可以从文件备份表(如File, URL或HDFS)执行`SELECT`,或者读取外部字典。输出支持的格式可用于获取`SELECT`的结果,并支持执行`INSERT`文件的表中。 -| 格式 | INSERT | SELECT | -|-----------------------------------------------------------------|--------|--------| -| [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✗ | ✔ | -| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | -| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | -| [模板](#format-template) | ✔ | ✔ | -| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | -| [CSV](#csv) | ✔ | ✔ | -| [CSVWithNames](#csvwithnames) | ✔ | ✔ | -| [自定义分离](#format-customseparated) | ✔ | ✔ | -| [值](#data-format-values) | ✔ | ✔ | -| [垂直](#vertical) | ✗ | ✔ | -| VerticalRaw | ✗ | ✔ | -| [JSON](#json) | ✗ | ✔ | -| [JSONCompact](#jsoncompact) | ✗ | ✔ | -| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | -| [TSKV](#tskv) | ✔ | ✔ | -| [漂亮](#pretty) | ✗ | ✔ | -| [PrettyCompact](#prettycompact) | ✗ | ✔ | -| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | -| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | -| [PrettySpace](#prettyspace) | ✗ | ✔ | -| [Protobuf](#protobuf) | ✔ | ✔ | -| [Avro](#data-format-avro) | ✔ | ✔ | -| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | -| [镶木地板](#data-format-parquet) | ✔ | ✔ | -| [ORC](#data-format-orc) | ✔ | ✗ | -| [RowBinary](#rowbinary) | ✔ | ✔ | -| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [本地人](#native) | ✔ | ✔ | -| [Null](#null) | ✗ | ✔ | -| [XML](#xml) | ✗ | ✔ | -| [CapnProto](#capnproto) | ✔ | ✔ | +以下是支持的格式: + +| 格式 | 输入 | 输出 | +|-----------------------------------------------------------------------------------------|-------|--------| +| [TabSeparated](#tabseparated) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | +| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | +| [Template](#format-template) | ✔ | ✔ | +| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | +| [CSV](#csv) | ✔ | ✔ | +| [CSVWithNames](#csvwithnames) | ✔ | ✔ | +| [CustomSeparated](#format-customseparated) | ✔ | ✔ | +| [Values](#data-format-values) | ✔ | ✔ | +| [Vertical](#vertical) | ✗ | ✔ | +| [VerticalRaw](#verticalraw) | ✗ | ✔ | +| [JSON](#json) | ✗ | ✔ | +| [JSONAsString](#jsonasstring) | ✔ | ✗ | +| [JSONString](#jsonstring) | ✗ | ✔ | +| [JSONCompact](#jsoncompact) | ✗ | ✔ | +| [JSONCompactString](#jsoncompactstring) | ✗ | ✔ | +| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | +| [JSONStringEachRow](#jsonstringeachrow) | ✔ | ✔ | +| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress) | ✗ | ✔ | +| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | +| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONCompactStringEachRow](#jsoncompactstringeachrow) | ✔ | ✔ | +| [JSONCompactStringEachRowWithNamesAndTypes](#jsoncompactstringeachrowwithnamesandtypes) | ✔ | ✔ | +| [TSKV](#tskv) | ✔ | ✔ | +| [Pretty](#pretty) | ✗ | ✔ | +| [PrettyCompact](#prettycompact) | ✗ | ✔ | +| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | +| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | +| [PrettySpace](#prettyspace) | ✗ | ✔ | +| [Protobuf](#protobuf) | ✔ | ✔ | +| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [Avro](#data-format-avro) | ✔ | ✔ | +| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | +| [Parquet](#data-format-parquet) | ✔ | ✔ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | +| [ORC](#data-format-orc) | ✔ | ✗ | +| [RowBinary](#rowbinary) | ✔ | ✔ | +| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [Native](#native) | ✔ | ✔ | +| [Null](#null) | ✗ | ✔ | +| [XML](#xml) | ✗ | ✔ | +| [CapnProto](#capnproto) | ✔ | ✗ | +| [LineAsString](#lineasstring) | ✔ | ✗ | + +您可以使用ClickHouse设置控制一些格式处理参数。更多详情设置请参考[设置](../operations/settings/settings.md) ## TabSeparated {#tabseparated} -在 TabSeparated 格式中,数据按行写入。每行包含由制表符分隔的值。除了行中的最后一个值(后面紧跟换行符)之外,每个值都跟随一个制表符。 在任何地方都可以使用严格的 Unix 命令行。最后一行还必须在最后包含换行符。值以文本格式编写,不包含引号,并且要转义特殊字符。 +在TabSeparated分隔格式中,数据按行写入。每行包含由制表符分隔的值。每个值后跟一个制表符,除了行中最后一个值后跟换行。在任何地方都采用严格的Unix换行。最后一行还必须在末尾包含换行。值以文本格式编写,不包含引号,并使用转义的特殊字符。 -这种格式也可以用 `TSV` 来表示。 +这种格式也可以用`TSV`来表示。 -TabSeparated 格式非常方便用于自定义程序或脚本处理数据。HTTP 客户端接口默认会用这种格式,命令行客户端批量模式下也会用这种格式。这种格式允许在不同数据库之间传输数据。例如,从 MYSQL 中导出数据然后导入到 ClickHouse 中,反之亦然。 +`TabSeparated`格式便于使用自定义程序和脚本处理数据。默认情况下,它在HTTP接口和命令行客户端的批处理模式中使用。这种格式还允许在不同dbms之间传输数据。例如,您可以从MySQL获取转储并将其上传到ClickHouse,反之亦然。 -TabSeparated 格式支持输出数据总值(当使用 WITH TOTALS) 以及极值(当 ‘extremes’ 设置是1)。这种情况下,总值和极值输出在主数据的后面。主要的数据,总值,极值会以一个空行隔开,例如: +`TabSeparated`格式支持输出total值(与TOTALS一起使用时)和extreme值(当`extreme`被设置为1时)。在这种情况下,total值和extreme值会在主数据后输出。主要结果、总值和极值之间用空行分隔。示例: ``` sql SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT TabSeparated`` ``` - 2014-03-17 1406958 - 2014-03-18 1383658 - 2014-03-19 1405797 - 2014-03-20 1353623 - 2014-03-21 1245779 - 2014-03-22 1031592 - 2014-03-23 1046491 +``` text +2014-03-17 1406958 +2014-03-18 1383658 +2014-03-19 1405797 +2014-03-20 1353623 +2014-03-21 1245779 +2014-03-22 1031592 +2014-03-23 1046491 - 1970-01-01 8873898 +1970-01-01 8873898 - 2014-03-17 1031592 - 2014-03-23 1406958 +2014-03-17 1031592 +2014-03-23 1406958 +``` -### 数据解析方式 {#shu-ju-jie-xi-fang-shi} +### 数据格式化 {#data-formatting} -整数以十进制形式写入。数字在开头可以包含额外的 `+` 字符(解析时忽略,格式化时不记录)。非负数不能包含负号。 读取时,允许将空字符串解析为零,或者(对于带符号的类型)将仅包含负号的字符串解析为零。 不符合相应数据类型的数字可能会被解析为不同的数字,而不会显示错误消息。 +整数是用十进制形式写的。数字可以在开头包含一个额外的`+`字符(解析时忽略,格式化时不记录)。非负数不能包含负号。在读取时,允许将空字符串解析为零,或者(对于有符号类型)将仅由一个负号组成的字符串解析为零。不符合相应数据类型的数字可以被解析为不同的数字,而不会出现错误消息。 -浮点数以十进制形式写入。点号用作小数点分隔符。支持指数等符号,如’inf’,‘+ inf’,‘-inf’和’nan’。 浮点数的输入可以以小数点开始或结束。 -格式化的时候,浮点数的精确度可能会丢失。 -解析的时候,没有严格需要去读取与机器可以表示的最接近的数值。 +浮点数以十进制形式书写。`.`号用作十进制分隔符。支持指数符号,如`inf`、`+inf`、`-inf`和`nan`。浮点数的条目可以以小数点开始或结束。 +在格式化期间,浮点数可能会丢失准确性。 +在解析期间,并不严格要求读取与机器可以表示的最接近的数值。 -日期会以 YYYY-MM-DD 格式写入和解析,但会以任何字符作为分隔符。 -带时间的日期会以 YYYY-MM-DD hh:mm:ss 格式写入和解析,但会以任何字符作为分隔符。 -这一切都发生在客户端或服务器启动时的系统时区(取决于哪一种格式的数据)。对于具有时间的日期,夏时制时间未指定。 因此,如果转储在夏令时中有时间,则转储不会明确地匹配数据,解析将选择两者之一。 -在读取操作期间,不正确的日期和具有时间的日期可以使用自然溢出或空日期和时间进行分析,而不会出现错误消息。 +日期以YYYY-MM-DD格式编写,并以相同的格式解析,但使用任何字符作为分隔符。 +日期和时间以`YYYY-MM-DD hh:mm:ss`的格式书写,并以相同的格式解析,但使用任何字符作为分隔符。 +这一切都发生在客户端或服务器启动时的系统时区(取决于它们对数据的格式)。对于带有时间的日期,夏时制时间未指定。因此,如果转储在夏令时有时间,则转储不会明确地与数据匹配,解析将选择这两次中的一次。 +在读取操作期间,不正确的日期和具有时间的日期可以使用自然溢出或null日期和时间进行分析,而不会出现错误消息。 -有个例外情况,Unix 时间戳格式(10个十进制数字)也支持使用时间解析日期。结果不是时区相关的。格式 YYYY-MM-DD hh:mm:ss和 NNNNNNNNNN 会自动区分。 +有个例外情况,Unix时间戳格式也支持用时间解析日期(如果它恰好由10个十进制数字组成)。其结果与时间区域无关。格式`YYYY-MM-DD hh:mm:ss`和`NNNNNNNNNN`是自动区分的。 -字符串以反斜线转义的特殊字符输出。 以下转义序列用于输出:`\b`,`\f`,`\r`,`\n`,`\t`,`\0`,`\'`,`\\`。 解析还支持`\a`,`\v`和`\xHH`(十六进制转义字符)和任何`\c`字符,其中`c`是任何字符(这些序列被转换为`c`)。 因此,读取数据支持可以将换行符写为`\n`或`\`的格式,或者换行。例如,字符串 `Hello world` 在单词之间换行而不是空格可以解析为以下任何形式: +字符串以反斜杠转义的特殊字符输出。下面的转义序列用于输出:`\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\'`, `\\`。解析还支持`\a`、`\v`和`\xHH`(十六进制转义字符)和任何`\c`字符,其中`c`是任何字符(这些序列被转换为`c`)。因此,读取数据支持这样一种格式,即可以将换行符写成`\n`或`\`,或者写成换行符。例如,字符串`Hello world`在单词之间有换行符,而不是空格,可以用以下语法进行解析: - Hello\nworld +``` text +Hello\nworld - Hello\ - world +Hello\ +world +``` -第二种形式是支持的,因为 MySQL 读取 tab-separated 格式数据集的时候也会使用它。 +第二种形式是支持的,因为MySQL读取tab-separated格式数据集的时候也会使用它。 -在 TabSeparated 格式中传递数据时需要转义的最小字符集为:Tab,换行符(LF)和反斜杠。 +在TabSeparated分隔格式传递数据时需要转义的最小字符集:`Tab`、换行符(LF)和反斜杠。 -只有一小组符号会被转义。你可以轻易地找到一个字符串值,但这不会正常在你的终端显示。 +只有一小部分符号被转义。您可以很容易地找到一个字符串值,而您的终端将在输出中不显示它。 -数组写在方括号内的逗号分隔值列表中。 通常情况下,数组中的数字项目会被拼凑,但日期,带时间的日期以及字符串将使用与上面相同的转义规则用单引号引起来。 +数组写在方括号内的逗号分隔值列表中。数组中的数字项按正常格式进行格式化。`Date`和`DateTime`类型用单引号表示。字符串使用与上面相同的转义规则在单引号中编写。 -[NULL](../sql-reference/syntax.md) 将输出为 `\N`。 +[NULL](../sql-reference/syntax.md)将输出为`\N`。 + +[Nested](../sql-reference/data-types/nested-data-structures/nested.md)结构的每个元素都表示为数组。 + +示例: + +``` sql +CREATE TABLE nestedt +( + `id` UInt8, + `aux` Nested( + a UInt8, + b String + ) +) +ENGINE = TinyLog +``` + +``` sql +INSERT INTO nestedt Values ( 1, [1], ['a']) +``` + +``` sql +SELECT * FROM nestedt FORMAT TSV +``` + +``` text +1 [1] ['a'] +``` ## TabSeparatedRaw {#tabseparatedraw} -与 `TabSeparated` 格式不一样的是,行数据是不会被转义的。 -该格式仅适用于输出查询结果,但不适用于解析输入(将数据插入到表中)。 +与`TabSeparated`格式的不同之处在于,写入的行没有转义。 +使用这种格式解析时,每个字段中不允许使用制表符或换行符。 -这种格式也可以使用名称 `TSVRaw` 来表示。 +这种格式也可以使用名称`TSVRaw`来表示。 ## TabSeparatedWithNames {#tabseparatedwithnames} -与 `TabSeparated` 格式不一样的是,第一行会显示列的名称。 -在解析过程中,第一行完全被忽略。您不能使用列名来确定其位置或检查其正确性。 -(未来可能会加入解析头行的功能) +与`TabSeparated`格式不同的是列名写在第一行。 +在解析过程中,第一行被完全忽略。不能使用列名来确定它们的位置或检查它们的正确性。 +(将来可能会添加对头行解析的支持。) -这种格式也可以使用名称 `TSVWithNames` 来表示。 +这种格式也可以使用名称`TSVWithNames`来表示。 ## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes} -与 `TabSeparated` 格式不一样的是,第一行会显示列的名称,第二行会显示列的类型。 -在解析过程中,第一行和第二行完全被忽略。 +与`TabSeparated`格式不同的是列名写在第一行,而列类型写在第二行。 +在解析过程中,将完全忽略第一行和第二行。 -这种格式也可以使用名称 `TSVWithNamesAndTypes` 来表示。 +这种格式也可以使用名称`TSVWithNamesAndTypes`来表示。 -## 模板 {#format-template} +## Template {#format-template} -此格式允许为具有指定转义规则的值指定带有占位符的自定义格式字符串。 +此格式允许指定带有占位符的自定义格式字符串,这些占位符用于指定转义规则。 -它使用设置 `format_schema`, `format_schema_rows`, `format_schema_rows_between_delimiter` and some settings of other formats (e.g. `output_format_json_quote_64bit_integers` 使用时 `JSON` 逃跑,进一步查看) +它使用设置`format_schema`, `format_schema_rows`, `format_schema_rows_between_delimiter`以及其他格式的一些设置(例如转义`JSON`时使用`output_format_json_quote_64bit_integers`) -格式字符串 `format_schema_rows` 使用以下语法指定行格式: +设置`format_template_row`指定文件的路径,该文件包含以下语法的行格式字符串: `delimiter_1${column_1:serializeAs_1}delimiter_2${column_2:serializeAs_2} ... delimiter_N`, - where `delimiter_i` is a delimiter between values (`$` symbol can be escaped as `$$`), - `column_i` is a name of a column whose values are to be selected or inserted (if empty, then column will be skipped), - `serializeAs_i` is an escaping rule for the column values. The following escaping rules are supported: +其中,`delimiter_i`是值之间的分隔符(`$`符号可以转义为`$$`), +`column_i`是要选择或插入其值的列的名称或索引(如果为空,则跳过该列), +`serializeAs_i`是列值的转义规则。支持以下转义规则: - - `CSV`, `JSON`, `XML` (similarly to the formats of the same names) - - `Escaped` (similarly to `TSV`) - - `Quoted` (similarly to `Values`) - - `Raw` (without escaping, similarly to `TSVRaw`) - - `None` (no escaping rule, see further) +- `CSV`, `JSON`, `XML` (类似于相同名称的格式) +- `Escaped` (类似于`TSV`) +- `Quoted` (类似于`Values`) +- `Raw` (类似于`TSVRaw`) +- `None` - If escaping rule is omitted, then`None` will be used. `XML` and `Raw` are suitable only for output. +如果省略了转义规则,那么将使用`None`。`XML`和`Raw`只适用于输出。 - So, for the following format string: +对于下面的格式字符串: `Search phrase: ${SearchPhrase:Quoted}, count: ${c:Escaped}, ad price: $$${price:JSON};` - the values of `SearchPhrase`, `c` and `price` columns, which are escaped as `Quoted`, `Escaped` and `JSON` will be printed (for select) or will be expected (for insert) between `Search phrase: `, `, count: `, `, ad price: $` and `;` delimiters respectively. For example: +`SearchPhrase`、`c`和`price`列的值被转义为`quotation`、`Escaped`和`JSON`将分别在`Search phrase:`, `, count: `, `, ad price: $`和`;`分隔符之间打印(用于选择)或expected(用于插入)。例如: - `Search phrase: 'bathroom interior design', count: 2166, ad price: $3;` +`Search phrase: 'bathroom interior design', count: 2166, ad price: $3;` -该 `format_schema_rows_between_delimiter` setting指定行之间的分隔符,该分隔符在除最后一行之外的每一行之后打印(或预期) (`\n` 默认情况下) +`format_template_rows_between_delimiter`设置指定行之间的分隔符,它将打印(或expected)在每一行之后,最后一行除外(默认为`\n`)。 -格式字符串 `format_schema` 具有相同的语法 `format_schema_rows` 并允许指定前缀,后缀和打印一些附加信息的方式。 它包含以下占位符而不是列名: +设置`format_template_resultset`指定文件路径,该文件包含resultset的格式字符串。resultset的格式字符串与row的格式字符串具有相同的语法,允许指定前缀、后缀和打印一些附加信息的方法。它包含以下占位符而不是列名: -- `data` 包含数据的行 `format_schema_rows` 格式,由分隔 `format_schema_rows_between_delimiter`. 此占位符必须是格式字符串中的第一个占位符。 -- `totals` 是包含总值的行 `format_schema_rows` 格式(与总计一起使用时) -- `min` 是具有最小值的行 `format_schema_rows` 格式(当极值设置为1时) -- `max` 是具有最大值的行 `format_schema_rows` 格式(当极值设置为1时) -- `rows` 输出行总数 -- `rows_before_limit` 是没有限制的最小行数。 仅当查询包含LIMIT时输出。 如果查询包含GROUP BY,则rows_before_limit_at_least是没有限制的确切行数。 -- `time` 请求执行时间以秒为单位 +- `data` `format_template_row`格式的数据行,由`format_template_rows_between_delimiter`分隔。此占位符必须是格式字符串中的第一个占位符。 +- `totals` `format_template_row`格式的总值(和WITH TOTALS一起使用) +- `min` `format_template_row`格式的最小值(当极值设置为1时) +- `max` `format_template_row`格式的最大值(当极值设置为1时) +- `rows` 输出行的总数 +- `rows_before_limit` 没有LIMIT的最小行数。仅当查询包含LIMIT时输出。如果查询包含GROUP BY,那么rows_before_limit_at_least就是没有LIMIT的确切行数。 +- `time` 请求执行时间(秒) - `rows_read` 已读取的行数 -- `bytes_read` 被读取的字节数(未压缩) +- `bytes_read` 已读取(未压缩)的字节数 -占位符 `data`, `totals`, `min` 和 `max` 必须没有指定转义规则(或 `None` 必须明确指定)。 其余的占位符可能具有指定的任何转义规则。 -如果 `format_schema` 设置为空字符串, `${data}` 用作默认值。 -对于插入查询格式允许跳过一些列或一些字段,如果前缀或后缀(见示例)。 +占位符`data`、`totals`、`min`和`max`必须没有指定转义规则(或者必须显式指定`None`)。其余占位符可以指定任何转义规则。 +如果`format_template_resultset`设置为空字符串,则使用`${data}`作为默认值。 +对于insert查询,格式允许跳过某些列或某些字段的前缀或后缀(参见示例)。 -`Select` 示例: +Select示例: ``` sql -SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase ORDER BY c DESC LIMIT 5 -FORMAT Template -SETTINGS format_schema = ' +SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase ORDER BY c DESC LIMIT 5 FORMAT Template SETTINGS +format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format', format_template_rows_between_delimiter = '\n ' +``` + +`/some/path/resultset.format`: + +``` text +