Merge remote-tracking branch 'origin/master' into aku/faster-perf

This commit is contained in:
Alexander Kuzmenkov 2020-09-14 10:18:28 +03:00
commit 30bc5e19b7
139 changed files with 3807 additions and 851 deletions

View File

@ -17,5 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse Data Integration Virtual Meetup](https://www.eventbrite.com/e/clickhouse-september-virtual-meetup-data-integration-tickets-117421895049) on September 10, 2020.
* [ClickHouse talk at Ya.Subbotnik (in Russian)](https://ya.cc/t/cIBI-3yECj5JF) on September 12, 2020.

View File

@ -1,8 +1,6 @@
#pragma once
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <string>
#include <type_traits>

View File

@ -10,42 +10,51 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
The supported formats are:
| Format | Input | Output |
|-----------------------------------------------------------------|-------|--------|
| [TabSeparated](#tabseparated) | ✔ | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
| [Template](#format-template) | ✔ | ✔ |
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
| [VerticalRaw](#verticalraw) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ |
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
| [TSKV](#tskv) | ✔ | ✔ |
| [Pretty](#pretty) | ✗ | ✔ |
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [Avro](#data-format-avro) | ✔ | ✔ |
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [Arrow](#data-format-arrow) | ✔ | ✔ |
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✗ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
| [CapnProto](#capnproto) | ✔ | ✗ |
| Format | Input | Output |
|-----------------------------------------------------------------------------------------|-------|--------|
| [TabSeparated](#tabseparated) | ✔ | ✔ |
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
| [Template](#format-template) | ✔ | ✔ |
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
| [VerticalRaw](#verticalraw) | ✗ | ✔ |
| [JSON](#json) | ✗ | ✔ |
| [JSONString](#jsonstring) | ✗ | ✔ |
| [JSONCompact](#jsoncompact) | ✗ | ✔ |
| [JSONCompactString](#jsoncompactstring) | ✗ | ✔ |
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
| [JSONStringEachRow](#jsonstringeachrow) | ✔ | ✔ |
| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress) | ✗ | ✔ |
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
| [JSONCompactStringEachRow](#jsoncompactstringeachrow) | ✔ | ✔ |
| [JSONCompactStringEachRowWithNamesAndTypes](#jsoncompactstringeachrowwithnamesandtypes) | ✔ | ✔ |
| [TSKV](#tskv) | ✔ | ✔ |
| [Pretty](#pretty) | ✗ | ✔ |
| [PrettyCompact](#prettycompact) | ✗ | ✔ |
| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ |
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [Avro](#data-format-avro) | ✔ | ✔ |
| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [Arrow](#data-format-arrow) | ✔ | ✔ |
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✗ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
| [CapnProto](#capnproto) | ✔ | ✗ |
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.
@ -392,62 +401,41 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA
"meta":
[
{
"name": "SearchPhrase",
"name": "'hello'",
"type": "String"
},
{
"name": "c",
"name": "multiply(42, number)",
"type": "UInt64"
},
{
"name": "range(5)",
"type": "Array(UInt8)"
}
],
"data":
[
{
"SearchPhrase": "",
"c": "8267016"
"'hello'": "hello",
"multiply(42, number)": "0",
"range(5)": [0,1,2,3,4]
},
{
"SearchPhrase": "bathroom interior design",
"c": "2166"
"'hello'": "hello",
"multiply(42, number)": "42",
"range(5)": [0,1,2,3,4]
},
{
"SearchPhrase": "yandex",
"c": "1655"
},
{
"SearchPhrase": "spring 2014 fashion",
"c": "1549"
},
{
"SearchPhrase": "freeform photos",
"c": "1480"
"'hello'": "hello",
"multiply(42, number)": "84",
"range(5)": [0,1,2,3,4]
}
],
"totals":
{
"SearchPhrase": "",
"c": "8873898"
},
"rows": 3,
"extremes":
{
"min":
{
"SearchPhrase": "",
"c": "1480"
},
"max":
{
"SearchPhrase": "",
"c": "8267016"
}
},
"rows": 5,
"rows_before_limit_at_least": 141137
"rows_before_limit_at_least": 3
}
```
@ -468,63 +456,165 @@ ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `n
See also the [JSONEachRow](#jsoneachrow) format.
## JSONString {#jsonstring}
Differs from JSON only in that data fields are output in strings, not in typed json values.
Example:
```json
{
"meta":
[
{
"name": "'hello'",
"type": "String"
},
{
"name": "multiply(42, number)",
"type": "UInt64"
},
{
"name": "range(5)",
"type": "Array(UInt8)"
}
],
"data":
[
{
"'hello'": "hello",
"multiply(42, number)": "0",
"range(5)": "[0,1,2,3,4]"
},
{
"'hello'": "hello",
"multiply(42, number)": "42",
"range(5)": "[0,1,2,3,4]"
},
{
"'hello'": "hello",
"multiply(42, number)": "84",
"range(5)": "[0,1,2,3,4]"
}
],
"rows": 3,
"rows_before_limit_at_least": 3
}
```
## JSONCompact {#jsoncompact}
## JSONCompactString {#jsoncompactstring}
Differs from JSON only in that data rows are output in arrays, not in objects.
Example:
``` json
// JSONCompact
{
"meta":
[
{
"name": "SearchPhrase",
"name": "'hello'",
"type": "String"
},
{
"name": "c",
"name": "multiply(42, number)",
"type": "UInt64"
},
{
"name": "range(5)",
"type": "Array(UInt8)"
}
],
"data":
[
["", "8267016"],
["bathroom interior design", "2166"],
["yandex", "1655"],
["fashion trends spring 2014", "1549"],
["freeform photo", "1480"]
["hello", "0", [0,1,2,3,4]],
["hello", "42", [0,1,2,3,4]],
["hello", "84", [0,1,2,3,4]]
],
"totals": ["","8873898"],
"rows": 3,
"extremes":
{
"min": ["","1480"],
"max": ["","8267016"]
},
"rows": 5,
"rows_before_limit_at_least": 141137
"rows_before_limit_at_least": 3
}
```
This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
See also the `JSONEachRow` format.
```json
// JSONCompactString
{
"meta":
[
{
"name": "'hello'",
"type": "String"
},
{
"name": "multiply(42, number)",
"type": "UInt64"
},
{
"name": "range(5)",
"type": "Array(UInt8)"
}
],
## JSONEachRow {#jsoneachrow}
"data":
[
["hello", "0", "[0,1,2,3,4]"],
["hello", "42", "[0,1,2,3,4]"],
["hello", "84", "[0,1,2,3,4]"]
],
When using this format, ClickHouse outputs rows as separated, newline-delimited JSON objects, but the data as a whole is not valid JSON.
"rows": 3,
``` json
{"SearchPhrase":"curtain designs","count()":"1064"}
{"SearchPhrase":"baku","count()":"1000"}
{"SearchPhrase":"","count()":"8267016"}
"rows_before_limit_at_least": 3
}
```
When inserting the data, you should provide a separate JSON object for each row.
## JSONEachRow {#jsoneachrow}
## JSONStringEachRow {#jsonstringeachrow}
## JSONCompactEachRow {#jsoncompacteachrow}
## JSONCompactStringEachRow {#jsoncompactstringeachrow}
When using these formats, ClickHouse outputs rows as separated, newline-delimited JSON values, but the data as a whole is not valid JSON.
``` json
{"some_int":42,"some_str":"hello","some_tuple":[1,"a"]} // JSONEachRow
[42,"hello",[1,"a"]] // JSONCompactEachRow
["42","hello","(2,'a')"] // JSONCompactStringsEachRow
```
When inserting the data, you should provide a separate JSON value for each row.
## JSONEachRowWithProgress {#jsoneachrowwithprogress}
## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress}
Differs from JSONEachRow/JSONStringEachRow in that ClickHouse will also yield progress information as JSON objects.
```json
{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}}
{"row":{"'hello'":"hello","multiply(42, number)":"42","range(5)":[0,1,2,3,4]}}
{"row":{"'hello'":"hello","multiply(42, number)":"84","range(5)":[0,1,2,3,4]}}
{"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}}
```
## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes}
## JSONCompactStringEachRowWithNamesAndTypes {#jsoncompactstringeachrowwithnamesandtypes}
Differs from JSONCompactEachRow/JSONCompactStringEachRow in that the column names and types are written as the first two rows.
```json
["'hello'", "multiply(42, number)", "range(5)"]
["String", "UInt64", "Array(UInt8)"]
["hello", "0", [0,1,2,3,4]]
["hello", "42", [0,1,2,3,4]]
["hello", "84", [0,1,2,3,4]]
```
### Inserting Data {#inserting-data}

View File

@ -6,6 +6,7 @@ Columns:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution.
- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name.
- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value.
@ -16,18 +17,18 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10
```
``` text
┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬────value─┐
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pmuzzy │ 0
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pdirty │ 4214
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.run_intervals │ 0
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.num_runs │ 0 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.retained │ 17657856
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.mapped │ 71471104
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.resident │ 61538304 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.metadata │ 6199264 │
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.allocated │ 38074336
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.epoch │ 2
└────────────┴─────────────────────┴──────────────────────────────────────────┴──────────┘
┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬────value─┐
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128
└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴──────────┘
```
**See Also**

View File

@ -10,12 +10,16 @@ Columns:
- `progress` (Float64) — The percentage of completed work from 0 to 1.
- `num_parts` (UInt64) — The number of pieces to be merged.
- `result_part_name` (String) — The name of the part that will be formed as the result of merging.
- `is_mutation` (UInt8) - 1 if this process is a part mutation.
- `is_mutation` (UInt8) 1 if this process is a part mutation.
- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks.
- `total_size_marks` (UInt64) — The total number of marks in the merged parts.
- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed.
- `rows_read` (UInt64) — Number of rows read.
- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed.
- `rows_written` (UInt64) — Number of rows written.
- `memory_usage` (UInt64) — Memory consumption of the merge process.
- `thread_id` (UInt64) — Thread ID of the merge process.
- `merge_type` — The type of current merge. Empty if it's an mutation.
- `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merges) <!--hide-->

View File

@ -23,28 +23,28 @@ SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical;
``` text
Row 1:
──────
event_date: 2020-02-18
event_time: 2020-02-18 07:15:33
milliseconds: 554
ProfileEvent_Query: 0
ProfileEvent_SelectQuery: 0
ProfileEvent_InsertQuery: 0
ProfileEvent_FileOpen: 0
ProfileEvent_Seek: 0
ProfileEvent_ReadBufferFromFileDescriptorRead: 1
ProfileEvent_ReadBufferFromFileDescriptorReadFailed: 0
ProfileEvent_ReadBufferFromFileDescriptorReadBytes: 0
ProfileEvent_WriteBufferFromFileDescriptorWrite: 1
ProfileEvent_WriteBufferFromFileDescriptorWriteFailed: 0
ProfileEvent_WriteBufferFromFileDescriptorWriteBytes: 56
event_date: 2020-09-05
event_time: 2020-09-05 16:22:33
event_time_microseconds: 2020-09-05 16:22:33.196807
milliseconds: 196
ProfileEvent_Query: 0
ProfileEvent_SelectQuery: 0
ProfileEvent_InsertQuery: 0
ProfileEvent_FailedQuery: 0
ProfileEvent_FailedSelectQuery: 0
...
CurrentMetric_Query: 0
CurrentMetric_Merge: 0
CurrentMetric_PartMutation: 0
CurrentMetric_ReplicatedFetch: 0
CurrentMetric_ReplicatedSend: 0
CurrentMetric_ReplicatedChecks: 0
...
CurrentMetric_Revision: 54439
CurrentMetric_VersionInteger: 20009001
CurrentMetric_RWLockWaitingReaders: 0
CurrentMetric_RWLockWaitingWriters: 0
CurrentMetric_RWLockActiveReaders: 0
CurrentMetric_RWLockActiveWriters: 0
CurrentMetric_GlobalThread: 74
CurrentMetric_GlobalThreadActive: 26
CurrentMetric_LocalThread: 0
CurrentMetric_LocalThreadActive: 0
CurrentMetric_DistributedFilesToInsert: 0
```
**See also**

View File

@ -515,6 +515,29 @@ SELECT
└────────────────┴────────────┘
```
## formatReadableQuantity(x) {#formatreadablequantityx}
Accepts the number. Returns a rounded number with a suffix (thousand, million, billion, etc.) as a string.
It is useful for reading big numbers by human.
Example:
``` sql
SELECT
arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number,
formatReadableQuantity(number) AS number_for_humans
```
``` text
┌─────────number─┬─number_for_humans─┐
│ 1024 │ 1.02 thousand │
│ 1234000 │ 1.23 million │
│ 4567000000 │ 4.57 billion │
│ 98765432101234 │ 98.77 trillion │
└────────────────┴───────────────────┘
```
## least(a, b) {#leasta-b}
Returns the smallest value from a and b.

View File

@ -1,20 +1,18 @@
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 49
toc_title: Copia de seguridad de datos
---
# Copia de seguridad de datos {#data-backup}
Mientras [replicación](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [no puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas garantías no cubren todos los casos posibles y pueden eludirse.
Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse.
Para mitigar eficazmente los posibles errores humanos, debe preparar cuidadosamente una estrategia para realizar copias de seguridad y restaurar sus datos **previamente**.
Cada empresa tiene diferentes recursos disponibles y requisitos comerciales, por lo que no existe una solución universal para las copias de seguridad y restauraciones de ClickHouse que se adapten a cada situación. Lo que funciona para un gigabyte de datos probablemente no funcionará para decenas de petabytes. Hay una variedad de posibles enfoques con sus propios pros y contras, que se discutirán a continuación. Es una buena idea utilizar varios enfoques en lugar de solo uno para compensar sus diversas deficiencias.
Cada empresa tiene diferentes recursos disponibles y requisitos comerciales, por lo que no existe una solución universal para las copias de seguridad y restauraciones de ClickHouse que se adapten a cada situación. Lo que funciona para un gigabyte de datos probablemente no funcionará para decenas de petabytes. Hay una variedad de posibles enfoques con sus propios pros y contras, que se discutirán a continuación. Es una buena idea utilizar varios enfoques en lugar de uno solo para compensar sus diversas deficiencias.
!!! note "Nota"
Tenga en cuenta que si realizó una copia de seguridad de algo y nunca intentó restaurarlo, es probable que la restauración no funcione correctamente cuando realmente la necesite (o al menos tomará más tiempo de lo que las empresas pueden tolerar). Por lo tanto, cualquiera que sea el enfoque de copia de seguridad que elija, asegúrese de automatizar el proceso de restauración también y practicarlo en un clúster de ClickHouse de repuesto regularmente.
Tenga en cuenta que si realizó una copia de seguridad de algo y nunca intentó restaurarlo, es probable que la restauración no funcione correctamente cuando realmente la necesite (o al menos tomará más tiempo de lo que las empresas pueden tolerar). Por lo tanto, cualquiera que sea el enfoque de copia de seguridad que elija, asegúrese de automatizar el proceso de restauración también y ponerlo en practica en un clúster de ClickHouse de repuesto regularmente.
## Duplicar datos de origen en otro lugar {#duplicating-source-data-somewhere-else}
@ -32,7 +30,7 @@ Para volúmenes de datos más pequeños, un simple `INSERT INTO ... SELECT ...`
## Manipulaciones con piezas {#manipulations-with-parts}
ClickHouse permite usar el `ALTER TABLE ... FREEZE PARTITION ...` consulta para crear una copia local de particiones de tabla. Esto se implementa utilizando enlaces duros al `/var/lib/clickhouse/shadow/` carpeta, por lo que generalmente no consume espacio adicional en disco para datos antiguos. Las copias creadas de archivos no son manejadas por el servidor ClickHouse, por lo que puede dejarlas allí: tendrá una copia de seguridad simple que no requiere ningún sistema externo adicional, pero seguirá siendo propenso a problemas de hardware. Por esta razón, es mejor copiarlos de forma remota en otra ubicación y luego eliminar las copias locales. Los sistemas de archivos distribuidos y los almacenes de objetos siguen siendo una buena opción para esto, pero los servidores de archivos conectados normales con una capacidad lo suficientemente grande podrían funcionar también (en este caso, la transferencia ocurrirá a través del sistema de archivos de red o tal vez [rsync](https://en.wikipedia.org/wiki/Rsync)).
ClickHouse permite usar la consulta `ALTER TABLE ... FREEZE PARTITION ...` para crear una copia local de particiones de tabla. Esto se implementa utilizando enlaces duros a la carpeta `/var/lib/clickhouse/shadow/`, por lo que generalmente no consume espacio adicional en disco para datos antiguos. Las copias creadas de archivos no son manejadas por el servidor ClickHouse, por lo que puede dejarlas allí: tendrá una copia de seguridad simple que no requiere ningún sistema externo adicional, pero seguirá siendo propenso a problemas de hardware. Por esta razón, es mejor copiarlos de forma remota en otra ubicación y luego eliminar las copias locales. Los sistemas de archivos distribuidos y los almacenes de objetos siguen siendo una buena opción para esto, pero los servidores de archivos conectados normales con una capacidad lo suficientemente grande podrían funcionar también (en este caso, la transferencia ocurrirá a través del sistema de archivos de red o tal vez [rsync](https://en.wikipedia.org/wiki/Rsync)).
Para obtener más información sobre las consultas relacionadas con las manipulaciones de particiones, consulte [Documentación de ALTER](../sql-reference/statements/alter.md#alter_manipulations-with-partitions).

View File

@ -28,6 +28,8 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [Arrow](#data-format-arrow) | ✔ | ✔ |
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✗ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
@ -947,6 +949,12 @@ ClickHouse пишет и читает сообщения `Protocol Buffers` в
## Avro {#data-format-avro}
[Apache Avro](https://avro.apache.org/) — это ориентированный на строки фреймворк для сериализации данных. Разработан в рамках проекта Apache Hadoop.
В ClickHouse формат Avro поддерживает чтение и запись [файлов данных Avro](https://avro.apache.org/docs/current/spec.html#Object+Container+Files).
[Логические типы Avro](https://avro.apache.org/docs/current/spec.html#Logical+Types)
## AvroConfluent {#data-format-avro-confluent}
Для формата `AvroConfluent` ClickHouse поддерживает декодирование сообщений `Avro` с одним объектом. Такие сообщения используются с [Kafka] (http://kafka.apache.org/) и реестром схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html).
@ -996,7 +1004,7 @@ SELECT * FROM topic1_stream;
## Parquet {#data-format-parquet}
[Apache Parquet](http://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи.
[Apache Parquet](https://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи.
### Соответствие типов данных {#sootvetstvie-tipov-dannykh}
@ -1042,6 +1050,16 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../engines/table-engines/integrations/hdfs.md).
## Arrow {data-format-arrow}
[Apache Arrow](https://arrow.apache.org/) поставляется с двумя встроенными поколоночнами форматами хранения. ClickHouse поддерживает операции чтения и записи для этих форматов.
`Arrow` — это Apache Arrow's "file mode" формат. Он предназначен для произвольного доступа в памяти.
## ArrowStream {data-format-arrow-stream}
`ArrowStream` — это Apache Arrow's "stream mode" формат. Он предназначен для обработки потоков в памяти.
## ORC {#data-format-orc}
[Apache ORC](https://orc.apache.org/) - это column-oriented формат данных, распространённый в экосистеме Hadoop. Вы можете только вставлять данные этого формата в ClickHouse.

View File

@ -508,6 +508,29 @@ SELECT
└────────────────┴────────────┘
```
## formatReadableQuantity(x) {#formatreadablequantityx}
Принимает число. Возвращает округленное число с суффиксом (thousand, million, billion и т.д.) в виде строки.
Облегчает визуальное восприятие больших чисел живым человеком.
Пример:
``` sql
SELECT
arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number,
formatReadableQuantity(number) AS number_for_humans
```
``` text
┌─────────number─┬─number_for_humans─┐
│ 1024 │ 1.02 thousand │
│ 1234000 │ 1.23 million │
│ 4567000000 │ 4.57 billion │
│ 98765432101234 │ 98.77 trillion │
└────────────────┴───────────────────┘
```
## least(a, b) {#leasta-b}
Возвращает наименьшее значение из a и b.

View File

@ -55,4 +55,50 @@ FROM numbers(3)
└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
```
# Случайные функции для работы со строками {#random-functions-for-working-with-strings}
## randomString {#random-string}
## randomFixedString {#random-fixed-string}
## randomPrintableASCII {#random-printable-ascii}
## randomStringUTF8 {#random-string-utf8}
## fuzzBits {#fuzzbits}
**Синтаксис**
``` sql
fuzzBits([s], [prob])
```
Инвертирует каждый бит `s` с вероятностью `prob`.
**Параметры**
- `s``String` or `FixedString`
- `prob` — constant `Float32/64`
**Возвращаемое значение**
Измененная случайным образом строка с тем же типом, что и `s`.
**Пример**
Запрос:
``` sql
SELECT fuzzBits(materialize('abacaba'), 0.1)
FROM numbers(3)
```
Результат:
``` text
┌─fuzzBits(materialize('abacaba'), 0.1)─┐
│ abaaaja │
│ a*cjab+ │
│ aeca2A │
└───────────────────────────────────────┘
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/random_functions/) <!--hide-->

View File

@ -513,4 +513,95 @@ SELECT parseDateTimeBestEffort('10 20:19')
- [toDate](#todate)
- [toDateTime](#todatetime)
## toUnixTimestamp64Milli
## toUnixTimestamp64Micro
## toUnixTimestamp64Nano
Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды.
Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что возвращаемое значение - это временная метка в UTC, а не в часовом поясе `DateTime64`.
**Синтаксис**
``` sql
toUnixTimestamp64Milli(value)
```
**Параметры**
- `value` — значение `DateTime64` с любой точностью.
**Возвращаемое значение**
- Значение `value`, преобразованное в тип данных `Int64`.
**Примеры**
Запрос:
``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Milli(dt64)
```
Ответ:
``` text
┌─toUnixTimestamp64Milli(dt64)─┐
│ 1568650812345 │
└──────────────────────────────┘
```
Запрос:
``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Nano(dt64)
```
Ответ:
``` text
┌─toUnixTimestamp64Nano(dt64)─┐
│ 1568650812345678000 │
└─────────────────────────────┘
```
## fromUnixTimestamp64Milli
## fromUnixTimestamp64Micro
## fromUnixTimestamp64Nano
Преобразует значение `Int64` в значение `DateTime64` с фиксированной точностью менее одной секунды и дополнительным часовым поясом. Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что входное значение обрабатывается как метка времени UTC, а не метка времени в заданном (или неявном) часовом поясе.
**Синтаксис**
``` sql
fromUnixTimestamp64Milli(value [, ti])
```
**Параметры**
- `value` — значение типы `Int64` с любой точностью.
- `timezone` — (не обязательный параметр) часовой пояс в формате `String` для возвращаемого результата.
**Возвращаемое значение**
- Значение `value`, преобразованное в тип данных `DateTime64`.
**Пример**
Запрос:
``` sql
WITH CAST(1234567891011, 'Int64') AS i64
SELECT fromUnixTimestamp64Milli(i64, 'UTC')
```
Ответ:
``` text
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
│ 2009-02-13 23:31:31.011 │
└──────────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) <!--hide-->

View File

@ -5,13 +5,15 @@ toc_title: Представление
# CREATE VIEW {#create-view}
``` sql
CREATE [MATERIALIZED] VIEW [IF NOT EXISTS] [db.]table_name [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
```
Создаёт представление. Представления бывают двух видов - обычные и материализованные (MATERIALIZED).
Обычные представления не хранят никаких данных, а всего лишь производят чтение из другой таблицы. То есть, обычное представление - не более чем сохранённый запрос. При чтении из представления, этот сохранённый запрос, используется в качестве подзапроса в секции FROM.
## Обычные представления {#normal}
``` sql
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
```
Normal views dont store any data, they just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
Для примера, пусть вы создали представление:
@ -31,15 +33,24 @@ SELECT a, b, c FROM view
SELECT a, b, c FROM (SELECT ...)
```
Материализованные (MATERIALIZED) представления хранят данные, преобразованные соответствующим запросом SELECT.
## Материализованные представления {#materialized}
При создании материализованного представления без использования `TO [db].[table]`, нужно обязательно указать ENGINE - движок таблицы для хранения данных.
``` sql
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
```
Материализованные (MATERIALIZED) представления хранят данные, преобразованные соответствующим запросом [SELECT](../../../sql-reference/statements/select/index.md).
При создании материализованного представления без использования `TO [db].[table]`, нужно обязательно указать `ENGINE` - движок таблицы для хранения данных.
При создании материализованного представления с испольованием `TO [db].[table]`, нельзя указывать `POPULATE`
Материализованное представление устроено следующим образом: при вставке данных в таблицу, указанную в SELECT-е, кусок вставляемых данных преобразуется этим запросом SELECT, и полученный результат вставляется в представление.
Если указано POPULATE, то при создании представления, в него будут вставлены имеющиеся данные таблицы, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Иначе, представление будет содержать только данные, вставляемые в таблицу после создания представления. Не рекомендуется использовать POPULATE, так как вставляемые в таблицу данные во время создания представления, не попадут в него.
!!! important "Важно"
Материализованные представлени в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление.
Если указано `POPULATE`, то при создании представления, в него будут вставлены имеющиеся данные таблицы, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Иначе, представление будет содержать только данные, вставляемые в таблицу после создания представления. Не рекомендуется использовать POPULATE, так как вставляемые в таблицу данные во время создания представления, не попадут в него.
Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`.
@ -50,4 +61,4 @@ SELECT a, b, c FROM (SELECT ...)
Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`.
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view)
<!--hide-->
<!--hide-->

View File

@ -5,18 +5,35 @@ toc_title: DROP
# DROP {#drop}
Запрос имеет два вида: `DROP DATABASE` и `DROP TABLE`.
Удаляет существующий объект.
Если указано `IF EXISTS` - не выдавать ошибку, если объекта не существует.
## DROP DATABASE {#drop-database}
``` sql
DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster]
```
Удаляет все таблицы в базе данных db, затем удаляет саму базу данных db.
## DROP TABLE {#drop-table}
``` sql
DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
Удаляет таблицу.
Если указано `IF EXISTS` - не выдавать ошибку, если таблица не существует или база данных не существует.
## DROP DICTIONARY {#drop-dictionary}
``` sql
DROP DICTIONARY [IF EXISTS] [db.]name
```
Удаляет словарь.
## DROP USER {#drop-user-statement}
@ -41,6 +58,7 @@ DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
```
## DROP ROW POLICY {#drop-row-policy-statement}
Удаляет политику доступа к строкам.
@ -80,5 +98,13 @@ DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
```
## DROP VIEW {#drop-view}
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/drop/) <!--hide-->
``` sql
DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
Удаляет представление. Представления могут быть удалены и командой `DROP TABLE`, но команда `DROP VIEW` проверяет, что `[db.]name` является представлением.
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/drop/) <!--hide-->

View File

@ -16,6 +16,7 @@ option (ENABLE_CLICKHOUSE_COMPRESSOR "Enable clickhouse-compressor" ${ENABLE_CLI
option (ENABLE_CLICKHOUSE_COPIER "Enable clickhouse-copier" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_FORMAT "Enable clickhouse-format" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_OBFUSCATOR "Enable clickhouse-obfuscator" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_GIT_IMPORT "Enable clickhouse-git-import" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "Enable clickhouse-odbc-bridge" ${ENABLE_CLICKHOUSE_ALL})
if (CLICKHOUSE_SPLIT_BINARY)
@ -91,21 +92,22 @@ add_subdirectory (copier)
add_subdirectory (format)
add_subdirectory (obfuscator)
add_subdirectory (install)
add_subdirectory (git-import)
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
add_subdirectory (odbc-bridge)
endif ()
if (CLICKHOUSE_ONE_SHARED)
add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK})
target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE})
add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES})
target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK})
target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_GIT_IMPORT_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE})
set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "")
install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
endif()
if (CLICKHOUSE_SPLIT_BINARY)
set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-copier)
set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-git-import clickhouse-copier)
if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge)
@ -149,6 +151,9 @@ else ()
if (ENABLE_CLICKHOUSE_OBFUSCATOR)
clickhouse_target_link_split_lib(clickhouse obfuscator)
endif ()
if (ENABLE_CLICKHOUSE_GIT_IMPORT)
clickhouse_target_link_split_lib(clickhouse git-import)
endif ()
if (ENABLE_CLICKHOUSE_INSTALL)
clickhouse_target_link_split_lib(clickhouse install)
endif ()
@ -199,6 +204,11 @@ else ()
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator)
endif ()
if (ENABLE_CLICKHOUSE_GIT_IMPORT)
add_custom_target (clickhouse-git-import ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-git-import DEPENDS clickhouse)
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import)
endif ()
if(ENABLE_CLICKHOUSE_ODBC_BRIDGE)
list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge)
endif()

View File

@ -902,74 +902,127 @@ private:
return processMultiQuery(text);
}
bool processMultiQuery(const String & text)
bool processMultiQuery(const String & all_queries_text)
{
const bool test_mode = config().has("testmode");
{ /// disable logs if expects errors
TestHint test_hint(test_mode, text);
TestHint test_hint(test_mode, all_queries_text);
if (test_hint.clientError() || test_hint.serverError())
processTextAsSingleQuery("SET send_logs_level = 'none'");
}
/// Several queries separated by ';'.
/// INSERT data is ended by the end of line, not ';'.
/// An exception is VALUES format where we also support semicolon in
/// addition to end of line.
const char * begin = text.data();
const char * end = begin + text.size();
const char * this_query_begin = all_queries_text.data();
const char * all_queries_end = all_queries_text.data() + all_queries_text.size();
while (begin < end)
while (this_query_begin < all_queries_end)
{
const char * pos = begin;
ASTPtr orig_ast = parseQuery(pos, end, true);
// Use the token iterator to skip any whitespace, semicolons and
// comments at the beginning of the query. An example from regression
// tests:
// insert into table t values ('invalid'); -- { serverError 469 }
// select 1
// Here the test hint comment gets parsed as a part of second query.
// We parse the `INSERT VALUES` up to the semicolon, and the rest
// looks like a two-line query:
// -- { serverError 469 }
// select 1
// and we expect it to fail with error 469, but this hint is actually
// for the previous query. Test hints should go after the query, so
// we can fix this by skipping leading comments. Token iterator skips
// comments and whitespace by itself, so we only have to check for
// semicolons.
// The code block is to limit visibility of `tokens` because we have
// another such variable further down the code, and get warnings for
// that.
{
Tokens tokens(this_query_begin, all_queries_end);
IParser::Pos token_iterator(tokens,
context.getSettingsRef().max_parser_depth);
while (token_iterator->type == TokenType::Semicolon
&& token_iterator.isValid())
{
++token_iterator;
}
this_query_begin = token_iterator->begin;
if (this_query_begin >= all_queries_end)
{
break;
}
}
if (!orig_ast)
// Try to parse the query.
const char * this_query_end = this_query_begin;
parsed_query = parseQuery(this_query_end, all_queries_end, true);
if (!parsed_query)
{
if (ignore_error)
{
Tokens tokens(begin, end);
Tokens tokens(this_query_begin, all_queries_end);
IParser::Pos token_iterator(tokens, context.getSettingsRef().max_parser_depth);
while (token_iterator->type != TokenType::Semicolon && token_iterator.isValid())
++token_iterator;
begin = token_iterator->end;
this_query_begin = token_iterator->end;
continue;
}
return true;
}
auto * insert = orig_ast->as<ASTInsertQuery>();
if (insert && insert->data)
// INSERT queries may have the inserted data in the query text
// that follow the query itself, e.g. "insert into t format CSV 1;2".
// They need special handling. First of all, here we find where the
// inserted data ends. In multy-query mode, it is delimited by a
// newline.
// The VALUES format needs even more handling -- we also allow the
// data to be delimited by semicolon. This case is handled later by
// the format parser itself.
auto * insert_ast = parsed_query->as<ASTInsertQuery>();
if (insert_ast && insert_ast->data)
{
pos = find_first_symbols<'\n'>(insert->data, end);
insert->end = pos;
this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end);
insert_ast->end = this_query_end;
query_to_send = all_queries_text.substr(
this_query_begin - all_queries_text.data(),
insert_ast->data - this_query_begin);
}
else
{
query_to_send = all_queries_text.substr(
this_query_begin - all_queries_text.data(),
this_query_end - this_query_begin);
}
String str = text.substr(begin - text.data(), pos - begin);
// full_query is the query + inline INSERT data.
full_query = all_queries_text.substr(
this_query_begin - all_queries_text.data(),
this_query_end - this_query_begin);
begin = pos;
while (isWhitespaceASCII(*begin) || *begin == ';')
++begin;
TestHint test_hint(test_mode, str);
// Look for the hint in the text of query + insert data, if any.
// e.g. insert into t format CSV 'a' -- { serverError 123 }.
TestHint test_hint(test_mode, full_query);
expected_client_error = test_hint.clientError();
expected_server_error = test_hint.serverError();
try
{
auto ast_to_process = orig_ast;
if (insert && insert->data)
processParsedSingleQuery();
if (insert_ast && insert_ast->data)
{
ast_to_process = nullptr;
processTextAsSingleQuery(str);
}
else
{
parsed_query = ast_to_process;
full_query = str;
query_to_send = str;
processParsedSingleQuery();
// For VALUES format: use the end of inline data as reported
// by the format parser (it is saved in sendData()). This
// allows us to handle queries like:
// insert into t values (1); select 1
//, where the inline data is delimited by semicolon and not
// by a newline.
this_query_end = parsed_query->as<ASTInsertQuery>()->end;
}
}
catch (...)
@ -977,7 +1030,7 @@ private:
last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
actual_client_error = last_exception_received_from_server->code();
if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error))
std::cerr << "Error on processing query: " << str << std::endl << last_exception_received_from_server->message();
std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message();
received_exception_from_server = true;
}
@ -991,6 +1044,8 @@ private:
else
return false;
}
this_query_begin = this_query_end;
}
return true;
@ -1415,7 +1470,7 @@ private:
void sendData(Block & sample, const ColumnsDescription & columns_description)
{
/// If INSERT data must be sent.
const auto * parsed_insert_query = parsed_query->as<ASTInsertQuery>();
auto * parsed_insert_query = parsed_query->as<ASTInsertQuery>();
if (!parsed_insert_query)
return;
@ -1424,6 +1479,9 @@ private:
/// Send data contained in the query.
ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data);
sendDataFrom(data_in, sample, columns_description);
// Remember where the data ended. We use this info later to determine
// where the next query begins.
parsed_insert_query->end = data_in.buffer().begin() + data_in.count();
}
else if (!is_interactive)
{

View File

@ -12,5 +12,6 @@
#cmakedefine01 ENABLE_CLICKHOUSE_COMPRESSOR
#cmakedefine01 ENABLE_CLICKHOUSE_FORMAT
#cmakedefine01 ENABLE_CLICKHOUSE_OBFUSCATOR
#cmakedefine01 ENABLE_CLICKHOUSE_GIT_IMPORT
#cmakedefine01 ENABLE_CLICKHOUSE_INSTALL
#cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE

View File

@ -0,0 +1,10 @@
set (CLICKHOUSE_GIT_IMPORT_SOURCES git-import.cpp)
set (CLICKHOUSE_GIT_IMPORT_LINK
PRIVATE
boost::program_options
dbms
)
clickhouse_program_add(git-import)

View File

@ -0,0 +1,2 @@
int mainEntryClickHouseGitImport(int argc, char ** argv);
int main(int argc_, char ** argv_) { return mainEntryClickHouseGitImport(argc_, argv_); }

File diff suppressed because it is too large Load Diff

View File

@ -205,6 +205,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
"clickhouse-benchmark",
"clickhouse-copier",
"clickhouse-obfuscator",
"clickhouse-git-import",
"clickhouse-compressor",
"clickhouse-format",
"clickhouse-extract-from-config"

View File

@ -46,6 +46,9 @@ int mainEntryClickHouseClusterCopier(int argc, char ** argv);
#if ENABLE_CLICKHOUSE_OBFUSCATOR
int mainEntryClickHouseObfuscator(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_GIT_IMPORT
int mainEntryClickHouseGitImport(int argc, char ** argv);
#endif
#if ENABLE_CLICKHOUSE_INSTALL
int mainEntryClickHouseInstall(int argc, char ** argv);
int mainEntryClickHouseStart(int argc, char ** argv);
@ -91,6 +94,9 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
#if ENABLE_CLICKHOUSE_OBFUSCATOR
{"obfuscator", mainEntryClickHouseObfuscator},
#endif
#if ENABLE_CLICKHOUSE_GIT_IMPORT
{"git-import", mainEntryClickHouseGitImport},
#endif
#if ENABLE_CLICKHOUSE_INSTALL
{"install", mainEntryClickHouseInstall},
{"start", mainEntryClickHouseStart},

View File

@ -181,6 +181,15 @@ void AccessControlManager::addUsersConfigStorage(
const String & preprocessed_dir_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
{
if (auto users_config_storage = typeid_cast<std::shared_ptr<UsersConfigAccessStorage>>(storage))
{
if (users_config_storage->getStoragePath() == users_config_path_)
return;
}
}
auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); };
auto new_storage = std::make_shared<UsersConfigAccessStorage>(storage_name_, check_setting_name_function);
new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_);
@ -210,17 +219,36 @@ void AccessControlManager::startPeriodicReloadingUsersConfigs()
void AccessControlManager::addDiskStorage(const String & directory_, bool readonly_)
{
addStorage(std::make_shared<DiskAccessStorage>(directory_, readonly_));
addDiskStorage(DiskAccessStorage::STORAGE_TYPE, directory_, readonly_);
}
void AccessControlManager::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
{
if (auto disk_storage = typeid_cast<std::shared_ptr<DiskAccessStorage>>(storage))
{
if (disk_storage->isStoragePathEqual(directory_))
{
if (readonly_)
disk_storage->setReadOnly(readonly_);
return;
}
}
}
addStorage(std::make_shared<DiskAccessStorage>(storage_name_, directory_, readonly_));
}
void AccessControlManager::addMemoryStorage(const String & storage_name_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
{
if (auto memory_storage = typeid_cast<std::shared_ptr<MemoryAccessStorage>>(storage))
return;
}
addStorage(std::make_shared<MemoryAccessStorage>(storage_name_));
}

View File

@ -218,6 +218,16 @@ namespace
}
/// Converts a path to an absolute path and append it with a separator.
String makeDirectoryPathCanonical(const String & directory_path)
{
auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path);
if (canonical_directory_path.has_filename())
canonical_directory_path += std::filesystem::path::preferred_separator;
return canonical_directory_path;
}
/// Calculates the path to a file named <id>.sql for saving an access entity.
String getEntityFilePath(const String & directory_path, const UUID & id)
{
@ -298,22 +308,17 @@ DiskAccessStorage::DiskAccessStorage(const String & directory_path_, bool readon
{
}
DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_)
: IAccessStorage(storage_name_)
{
auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path_);
if (canonical_directory_path.has_filename())
canonical_directory_path += std::filesystem::path::preferred_separator;
directory_path = makeDirectoryPathCanonical(directory_path_);
readonly = readonly_;
std::error_code create_dir_error_code;
std::filesystem::create_directories(canonical_directory_path, create_dir_error_code);
std::filesystem::create_directories(directory_path, create_dir_error_code);
if (!std::filesystem::exists(canonical_directory_path) || !std::filesystem::is_directory(canonical_directory_path) || create_dir_error_code)
throw Exception("Couldn't create directory " + canonical_directory_path.string() + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST);
directory_path = canonical_directory_path;
readonly = readonly_;
if (!std::filesystem::exists(directory_path) || !std::filesystem::is_directory(directory_path) || create_dir_error_code)
throw Exception("Couldn't create directory " + directory_path + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST);
bool should_rebuild_lists = std::filesystem::exists(getNeedRebuildListsMarkFilePath(directory_path));
if (!should_rebuild_lists)
@ -337,6 +342,12 @@ DiskAccessStorage::~DiskAccessStorage()
}
bool DiskAccessStorage::isStoragePathEqual(const String & directory_path_) const
{
return getStoragePath() == makeDirectoryPathCanonical(directory_path_);
}
void DiskAccessStorage::clear()
{
entries_by_id.clear();
@ -426,33 +437,41 @@ bool DiskAccessStorage::writeLists()
void DiskAccessStorage::scheduleWriteLists(EntityType type)
{
if (failed_to_write_lists)
return;
return; /// We don't try to write list files after the first fail.
/// The next restart of the server will invoke rebuilding of the list files.
bool already_scheduled = !types_of_lists_to_write.empty();
types_of_lists_to_write.insert(type);
if (already_scheduled)
return;
if (lists_writing_thread_is_waiting)
return; /// If the lists' writing thread is still waiting we can update `types_of_lists_to_write` easily,
/// without restarting that thread.
if (lists_writing_thread.joinable())
lists_writing_thread.join();
/// Create the 'need_rebuild_lists.mark' file.
/// This file will be used later to find out if writing lists is successful or not.
std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)};
startListsWritingThread();
lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
lists_writing_thread_is_waiting = true;
}
void DiskAccessStorage::startListsWritingThread()
void DiskAccessStorage::listsWritingThreadFunc()
{
if (lists_writing_thread.joinable())
std::unique_lock lock{mutex};
{
if (!lists_writing_thread_exited)
return;
lists_writing_thread.detach();
/// It's better not to write the lists files too often, that's why we need
/// the following timeout.
const auto timeout = std::chrono::minutes(1);
SCOPE_EXIT({ lists_writing_thread_is_waiting = false; });
if (lists_writing_thread_should_exit.wait_for(lock, timeout) != std::cv_status::timeout)
return; /// The destructor requires us to exit.
}
lists_writing_thread_exited = false;
lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
writeLists();
}
@ -466,21 +485,6 @@ void DiskAccessStorage::stopListsWritingThread()
}
void DiskAccessStorage::listsWritingThreadFunc()
{
std::unique_lock lock{mutex};
SCOPE_EXIT({ lists_writing_thread_exited = true; });
/// It's better not to write the lists files too often, that's why we need
/// the following timeout.
const auto timeout = std::chrono::minutes(1);
if (lists_writing_thread_should_exit.wait_for(lock, timeout) != std::cv_status::timeout)
return; /// The destructor requires us to exit.
writeLists();
}
/// Reads and parses all the "<id>.sql" files from a specified directory
/// and then saves the files "users.list", "roles.list", etc. to the same directory.
bool DiskAccessStorage::rebuildLists()

View File

@ -18,7 +18,11 @@ public:
~DiskAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
String getStoragePath() const override { return directory_path; }
bool isStoragePathEqual(const String & directory_path_) const;
void setReadOnly(bool readonly_) { readonly = readonly_; }
bool isStorageReadOnly() const override { return readonly; }
private:
@ -42,9 +46,8 @@ private:
void scheduleWriteLists(EntityType type);
bool rebuildLists();
void startListsWritingThread();
void stopListsWritingThread();
void listsWritingThreadFunc();
void stopListsWritingThread();
void insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, Notifications & notifications);
void removeNoLock(const UUID & id, Notifications & notifications);
@ -67,14 +70,14 @@ private:
void prepareNotifications(const UUID & id, const Entry & entry, bool remove, Notifications & notifications) const;
String directory_path;
bool readonly;
std::atomic<bool> readonly;
std::unordered_map<UUID, Entry> entries_by_id;
std::unordered_map<std::string_view, Entry *> entries_by_name_and_type[static_cast<size_t>(EntityType::MAX)];
boost::container::flat_set<EntityType> types_of_lists_to_write;
bool failed_to_write_lists = false; /// Whether writing of the list files has been failed since the recent restart of the server.
ThreadFromGlobalPool lists_writing_thread; /// List files are written in a separate thread.
std::condition_variable lists_writing_thread_should_exit; /// Signals `lists_writing_thread` to exit.
std::atomic<bool> lists_writing_thread_exited = false;
bool lists_writing_thread_is_waiting = false;
mutable std::list<OnChangedHandler> handlers_by_type[static_cast<size_t>(EntityType::MAX)];
mutable std::mutex mutex;
};

View File

@ -7,6 +7,7 @@
#include <common/unaligned.h>
#include <Core/Field.h>
#include <Core/BigInt.h>
#include <Common/assert_cast.h>
namespace DB
@ -130,7 +131,7 @@ public:
void insertFrom(const IColumn & src, size_t n) override
{
data.push_back(static_cast<const Self &>(src).getData()[n]);
data.push_back(assert_cast<const Self &>(src).getData()[n]);
}
void insertData(const char * pos, size_t) override
@ -205,14 +206,14 @@ public:
/// This method implemented in header because it could be possibly devirtualized.
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
{
return CompareHelper<T>::compare(data[n], static_cast<const Self &>(rhs_).data[m], nan_direction_hint);
return CompareHelper<T>::compare(data[n], assert_cast<const Self &>(rhs_).data[m], nan_direction_hint);
}
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override
{
return this->template doCompareColumn<Self>(static_cast<const Self &>(rhs), rhs_row_num, row_indexes,
return this->template doCompareColumn<Self>(assert_cast<const Self &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}

View File

@ -68,8 +68,14 @@ String Macros::expand(const String & s,
res += database_name;
else if (macro_name == "table" && !table_name.empty())
res += table_name;
else if (macro_name == "uuid" && uuid != UUIDHelpers::Nil)
else if (macro_name == "uuid")
{
if (uuid == UUIDHelpers::Nil)
throw Exception("Macro 'uuid' and empty arguments of ReplicatedMergeTree "
"are supported only for ON CLUSTER queries with Atomic database engine",
ErrorCodes::SYNTAX_ERROR);
res += toString(uuid);
}
else
throw Exception("No macro '" + macro_name +
"' in config while processing substitutions in '" + s + "' at '"

View File

@ -57,7 +57,16 @@ ShellCommand::~ShellCommand()
LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString(retcode));
}
else if (!wait_called)
tryWait();
{
try
{
tryWait();
}
catch (...)
{
tryLogCurrentException(getLogger());
}
}
}
void ShellCommand::logCommand(const char * filename, char * const argv[])
@ -74,7 +83,8 @@ void ShellCommand::logCommand(const char * filename, char * const argv[])
LOG_TRACE(ShellCommand::getLogger(), "Will start shell command '{}' with arguments {}", filename, args.str());
}
std::unique_ptr<ShellCommand> ShellCommand::executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor)
std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor)
{
logCommand(filename, argv);
@ -130,7 +140,8 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(const char * filename, c
_exit(int(ReturnCodes::CANNOT_EXEC));
}
std::unique_ptr<ShellCommand> res(new ShellCommand(pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor));
std::unique_ptr<ShellCommand> res(new ShellCommand(
pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor));
LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid);
@ -143,7 +154,8 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(const char * filename, c
}
std::unique_ptr<ShellCommand> ShellCommand::execute(const std::string & command, bool pipe_stdin_only, bool terminate_in_destructor)
std::unique_ptr<ShellCommand> ShellCommand::execute(
const std::string & command, bool pipe_stdin_only, bool terminate_in_destructor)
{
/// Arguments in non-constant chunks of memory (as required for `execv`).
/// Moreover, their copying must be done before calling `vfork`, so after `vfork` do a minimum of things.
@ -157,7 +169,8 @@ std::unique_ptr<ShellCommand> ShellCommand::execute(const std::string & command,
}
std::unique_ptr<ShellCommand> ShellCommand::executeDirect(const std::string & path, const std::vector<std::string> & arguments, bool terminate_in_destructor)
std::unique_ptr<ShellCommand> ShellCommand::executeDirect(
const std::string & path, const std::vector<std::string> & arguments, bool terminate_in_destructor)
{
size_t argv_sum_size = path.size() + 1;
for (const auto & arg : arguments)
@ -186,6 +199,10 @@ int ShellCommand::tryWait()
{
wait_called = true;
in.close();
out.close();
err.close();
LOG_TRACE(getLogger(), "Will wait for shell command pid {}", pid);
int status = 0;

View File

@ -50,21 +50,22 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
uint64_t buf = 0;
buffer.readStrict(c);
auto cc = static_cast<uint8_t>(c);
if (cc < 0xfc)
switch (cc)
{
return cc;
}
else if (cc < 0xfd)
{
buffer.readStrict(reinterpret_cast<char *>(&buf), 2);
}
else if (cc < 0xfe)
{
buffer.readStrict(reinterpret_cast<char *>(&buf), 3);
}
else
{
buffer.readStrict(reinterpret_cast<char *>(&buf), 8);
/// NULL
case 0xfb:
break;
case 0xfc:
buffer.readStrict(reinterpret_cast<char *>(&buf), 2);
break;
case 0xfd:
buffer.readStrict(reinterpret_cast<char *>(&buf), 3);
break;
case 0xfe:
buffer.readStrict(reinterpret_cast<char *>(&buf), 8);
break;
default:
return cc;
}
return buf;
}

View File

@ -171,7 +171,7 @@ namespace MySQLReplication
/// Ignore MySQL 8.0 optional metadata fields.
/// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
payload.ignore(payload.available() - CHECKSUM_CRC32_SIGNATURE_LENGTH);
payload.ignoreAll();
}
/// Types that do not used in the binlog event:
@ -221,6 +221,7 @@ namespace MySQLReplication
}
case MYSQL_TYPE_NEWDECIMAL:
case MYSQL_TYPE_STRING: {
/// Big-Endian
auto b0 = UInt16(meta[pos] << 8);
auto b1 = UInt8(meta[pos + 1]);
column_meta.emplace_back(UInt16(b0 + b1));
@ -231,6 +232,7 @@ namespace MySQLReplication
case MYSQL_TYPE_BIT:
case MYSQL_TYPE_VARCHAR:
case MYSQL_TYPE_VAR_STRING: {
/// Little-Endian
auto b0 = UInt8(meta[pos]);
auto b1 = UInt16(meta[pos + 1] << 8);
column_meta.emplace_back(UInt16(b0 + b1));
@ -911,7 +913,7 @@ namespace MySQLReplication
break;
}
}
payload.tryIgnore(CHECKSUM_CRC32_SIGNATURE_LENGTH);
payload.ignoreAll();
}
}

View File

@ -283,6 +283,7 @@ int main(int argc, char ** argv)
}
{
/// mysql_protocol --host=172.17.0.3 --user=root --password=123 --db=sbtest
try
{
boost::program_options::options_description desc("Allowed options");

View File

@ -308,16 +308,30 @@ ReturnType DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer
const DataTypePtr & nested_data_type)
{
return safeDeserialize<ReturnType>(column, *nested_data_type,
[&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); },
[&istr]
{
return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr);
},
[&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextQuoted(nested, istr, settings); });
}
void DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
safeDeserialize(column, *nested_data_type,
[&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); },
[this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); });
deserializeWholeText<void>(column, istr, settings, nested_data_type);
}
template <typename ReturnType>
ReturnType DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,
const DataTypePtr & nested_data_type)
{
return safeDeserialize<ReturnType>(column, *nested_data_type,
[&istr]
{
return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr)
|| checkStringByFirstCharacterAndAssertTheRest("ᴺᵁᴸᴸ", istr);
},
[&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); });
}
@ -544,6 +558,7 @@ DataTypePtr removeNullable(const DataTypePtr & type)
}
template bool DataTypeNullable::deserializeWholeText<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
template bool DataTypeNullable::deserializeTextEscaped<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
template bool DataTypeNullable::deserializeTextQuoted<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested);
template bool DataTypeNullable::deserializeTextCSV<bool>(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);

View File

@ -103,6 +103,8 @@ public:
/// If ReturnType is bool, check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false)
/// If ReturnType is void, deserialize Nullable(T)
template <typename ReturnType = bool>
static ReturnType deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested);

View File

@ -195,6 +195,7 @@ void MaterializeMySQLSyncThread::synchronization(const String & mysql_version)
}
catch (...)
{
client.disconnect();
tryLogCurrentException(log);
getDatabase(database_name).setException(std::current_exception());
}
@ -206,6 +207,7 @@ void MaterializeMySQLSyncThread::stopSynchronization()
{
sync_quit = true;
background_thread_pool->join();
client.disconnect();
}
}

View File

@ -324,13 +324,86 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm
target = std::move(file_segmentation_engine);
}
/// File Segmentation Engines for parallel reading
void registerFileSegmentationEngineTabSeparated(FormatFactory & factory);
void registerFileSegmentationEngineCSV(FormatFactory & factory);
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
void registerFileSegmentationEngineRegexp(FormatFactory & factory);
void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
/// Formats for both input/output.
void registerInputFormatNative(FormatFactory & factory);
void registerOutputFormatNative(FormatFactory & factory);
void registerInputFormatProcessorNative(FormatFactory & factory);
void registerOutputFormatProcessorNative(FormatFactory & factory);
void registerInputFormatProcessorRowBinary(FormatFactory & factory);
void registerOutputFormatProcessorRowBinary(FormatFactory & factory);
void registerInputFormatProcessorTabSeparated(FormatFactory & factory);
void registerOutputFormatProcessorTabSeparated(FormatFactory & factory);
void registerInputFormatProcessorValues(FormatFactory & factory);
void registerOutputFormatProcessorValues(FormatFactory & factory);
void registerInputFormatProcessorCSV(FormatFactory & factory);
void registerOutputFormatProcessorCSV(FormatFactory & factory);
void registerInputFormatProcessorTSKV(FormatFactory & factory);
void registerOutputFormatProcessorTSKV(FormatFactory & factory);
void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
void registerInputFormatProcessorProtobuf(FormatFactory & factory);
void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
void registerInputFormatProcessorTemplate(FormatFactory & factory);
void registerOutputFormatProcessorTemplate(FormatFactory & factory);
void registerInputFormatProcessorMsgPack(FormatFactory & factory);
void registerOutputFormatProcessorMsgPack(FormatFactory & factory);
void registerInputFormatProcessorORC(FormatFactory & factory);
void registerOutputFormatProcessorORC(FormatFactory & factory);
void registerInputFormatProcessorParquet(FormatFactory & factory);
void registerOutputFormatProcessorParquet(FormatFactory & factory);
void registerInputFormatProcessorArrow(FormatFactory & factory);
void registerOutputFormatProcessorArrow(FormatFactory & factory);
void registerInputFormatProcessorAvro(FormatFactory & factory);
void registerOutputFormatProcessorAvro(FormatFactory & factory);
/// Output only (presentational) formats.
void registerOutputFormatNull(FormatFactory & factory);
void registerOutputFormatProcessorPretty(FormatFactory & factory);
void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory);
void registerOutputFormatProcessorPrettySpace(FormatFactory & factory);
void registerOutputFormatProcessorVertical(FormatFactory & factory);
void registerOutputFormatProcessorJSON(FormatFactory & factory);
void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory);
void registerOutputFormatProcessorXML(FormatFactory & factory);
void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
void registerOutputFormatProcessorNull(FormatFactory & factory);
void registerOutputFormatProcessorMySQLWire(FormatFactory & factory);
void registerOutputFormatProcessorMarkdown(FormatFactory & factory);
void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory);
/// Input only formats.
void registerInputFormatProcessorRegexp(FormatFactory & factory);
void registerInputFormatProcessorJSONAsString(FormatFactory & factory);
void registerInputFormatProcessorLineAsString(FormatFactory & factory);
void registerInputFormatProcessorCapnProto(FormatFactory & factory);
FormatFactory::FormatFactory()
{
registerFileSegmentationEngineTabSeparated(*this);
registerFileSegmentationEngineCSV(*this);
registerFileSegmentationEngineJSONEachRow(*this);
registerFileSegmentationEngineRegexp(*this);
registerFileSegmentationEngineJSONAsString(*this);
registerInputFormatNative(*this);
registerOutputFormatNative(*this);
registerOutputFormatProcessorJSONEachRowWithProgress(*this);
registerInputFormatProcessorNative(*this);
registerOutputFormatProcessorNative(*this);
registerInputFormatProcessorRowBinary(*this);
@ -349,8 +422,11 @@ FormatFactory::FormatFactory()
registerOutputFormatProcessorJSONCompactEachRow(*this);
registerInputFormatProcessorProtobuf(*this);
registerOutputFormatProcessorProtobuf(*this);
registerInputFormatProcessorTemplate(*this);
registerOutputFormatProcessorTemplate(*this);
registerInputFormatProcessorMsgPack(*this);
registerOutputFormatProcessorMsgPack(*this);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorCapnProto(*this);
registerInputFormatProcessorORC(*this);
registerOutputFormatProcessorORC(*this);
registerInputFormatProcessorParquet(*this);
@ -360,18 +436,6 @@ FormatFactory::FormatFactory()
registerInputFormatProcessorAvro(*this);
registerOutputFormatProcessorAvro(*this);
#endif
registerInputFormatProcessorTemplate(*this);
registerOutputFormatProcessorTemplate(*this);
registerInputFormatProcessorRegexp(*this);
registerInputFormatProcessorMsgPack(*this);
registerOutputFormatProcessorMsgPack(*this);
registerInputFormatProcessorJSONAsString(*this);
registerFileSegmentationEngineTabSeparated(*this);
registerFileSegmentationEngineCSV(*this);
registerFileSegmentationEngineJSONEachRow(*this);
registerFileSegmentationEngineRegexp(*this);
registerFileSegmentationEngineJSONAsString(*this);
registerOutputFormatNull(*this);
@ -381,12 +445,20 @@ FormatFactory::FormatFactory()
registerOutputFormatProcessorVertical(*this);
registerOutputFormatProcessorJSON(*this);
registerOutputFormatProcessorJSONCompact(*this);
registerOutputFormatProcessorJSONEachRowWithProgress(*this);
registerOutputFormatProcessorXML(*this);
registerOutputFormatProcessorODBCDriver2(*this);
registerOutputFormatProcessorNull(*this);
registerOutputFormatProcessorMySQLWire(*this);
registerOutputFormatProcessorMarkdown(*this);
registerOutputFormatProcessorPostgreSQLWire(*this);
registerInputFormatProcessorRegexp(*this);
registerInputFormatProcessorJSONAsString(*this);
registerInputFormatProcessorLineAsString(*this);
#if !defined(ARCADIA_BUILD)
registerInputFormatProcessorCapnProto(*this);
#endif
}
FormatFactory & FormatFactory::instance()

View File

@ -141,73 +141,4 @@ private:
const Creators & getCreators(const String & name) const;
};
/// Formats for both input/output.
void registerInputFormatNative(FormatFactory & factory);
void registerOutputFormatNative(FormatFactory & factory);
void registerInputFormatProcessorNative(FormatFactory & factory);
void registerOutputFormatProcessorNative(FormatFactory & factory);
void registerInputFormatProcessorRowBinary(FormatFactory & factory);
void registerOutputFormatProcessorRowBinary(FormatFactory & factory);
void registerInputFormatProcessorTabSeparated(FormatFactory & factory);
void registerOutputFormatProcessorTabSeparated(FormatFactory & factory);
void registerInputFormatProcessorValues(FormatFactory & factory);
void registerOutputFormatProcessorValues(FormatFactory & factory);
void registerInputFormatProcessorCSV(FormatFactory & factory);
void registerOutputFormatProcessorCSV(FormatFactory & factory);
void registerInputFormatProcessorTSKV(FormatFactory & factory);
void registerOutputFormatProcessorTSKV(FormatFactory & factory);
void registerInputFormatProcessorJSONEachRow(FormatFactory & factory);
void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory);
void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory);
void registerInputFormatProcessorParquet(FormatFactory & factory);
void registerOutputFormatProcessorParquet(FormatFactory & factory);
void registerInputFormatProcessorArrow(FormatFactory & factory);
void registerOutputFormatProcessorArrow(FormatFactory & factory);
void registerInputFormatProcessorProtobuf(FormatFactory & factory);
void registerOutputFormatProcessorProtobuf(FormatFactory & factory);
void registerInputFormatProcessorAvro(FormatFactory & factory);
void registerOutputFormatProcessorAvro(FormatFactory & factory);
void registerInputFormatProcessorTemplate(FormatFactory & factory);
void registerOutputFormatProcessorTemplate(FormatFactory & factory);
void registerInputFormatProcessorMsgPack(FormatFactory & factory);
void registerOutputFormatProcessorMsgPack(FormatFactory & factory);
void registerInputFormatProcessorORC(FormatFactory & factory);
void registerOutputFormatProcessorORC(FormatFactory & factory);
/// File Segmentation Engines for parallel reading
void registerFileSegmentationEngineTabSeparated(FormatFactory & factory);
void registerFileSegmentationEngineCSV(FormatFactory & factory);
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
void registerFileSegmentationEngineRegexp(FormatFactory & factory);
void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
/// Output only (presentational) formats.
void registerOutputFormatNull(FormatFactory & factory);
void registerOutputFormatProcessorPretty(FormatFactory & factory);
void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory);
void registerOutputFormatProcessorPrettySpace(FormatFactory & factory);
void registerOutputFormatProcessorPrettyASCII(FormatFactory & factory);
void registerOutputFormatProcessorVertical(FormatFactory & factory);
void registerOutputFormatProcessorJSON(FormatFactory & factory);
void registerOutputFormatProcessorJSONCompact(FormatFactory & factory);
void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory);
void registerOutputFormatProcessorXML(FormatFactory & factory);
void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
void registerOutputFormatProcessorNull(FormatFactory & factory);
void registerOutputFormatProcessorMySQLWire(FormatFactory & factory);
void registerOutputFormatProcessorMarkdown(FormatFactory & factory);
void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory);
/// Input only formats.
void registerInputFormatProcessorCapnProto(FormatFactory & factory);
void registerInputFormatProcessorRegexp(FormatFactory & factory);
void registerInputFormatProcessorJSONAsString(FormatFactory & factory);
}

View File

@ -53,8 +53,28 @@ endif()
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR})
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
target_compile_options(clickhouse_functions PRIVATE "-g0")
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
set (STRIP_DSF_DEFAULT ON)
else()
set (STRIP_DSF_DEFAULT OFF)
endif()
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
"Do not generate debugger info for ClickHouse functions.
Provides faster linking and lower binary size.
Tradeoff is the inability to debug some source files with e.g. gdb
(empty stack frames and no local variables)."
${STRIP_DSF_DEFAULT})
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
message(WARNING "Not generating debugger info for ClickHouse functions")
target_compile_options(clickhouse_functions PRIVATE "-g0")
else()
message(STATUS "Generating debugger info for ClickHouse functions")
endif()
if (USE_ICU)
target_link_libraries (clickhouse_functions PRIVATE ${ICU_LIBRARIES})

View File

@ -561,6 +561,8 @@ public:
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
class FunctionBinaryArithmetic : public IFunction
{
static constexpr const bool is_plus = IsOperation<Op>::plus;
static constexpr const bool is_minus = IsOperation<Op>::minus;
static constexpr const bool is_multiply = IsOperation<Op>::multiply;
static constexpr const bool is_division = IsOperation<Op>::division;
@ -612,9 +614,7 @@ class FunctionBinaryArithmetic : public IFunction
/// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval.
/// We construct another function (example: addMonths) and call it.
static constexpr bool function_is_plus = IsOperation<Op>::plus;
static constexpr bool function_is_minus = IsOperation<Op>::minus;
if constexpr (!function_is_plus && !function_is_minus)
if constexpr (!is_plus && !is_minus)
return {};
const DataTypePtr & type_time = first_is_date_or_datetime ? type0 : type1;
@ -631,21 +631,21 @@ class FunctionBinaryArithmetic : public IFunction
return {};
}
if (second_is_date_or_datetime && function_is_minus)
if (second_is_date_or_datetime && is_minus)
throw Exception("Wrong order of arguments for function " + getName() + ": argument of type Interval cannot be first.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
std::string function_name;
if (interval_data_type)
{
function_name = String(function_is_plus ? "add" : "subtract") + interval_data_type->getKind().toString() + 's';
function_name = String(is_plus ? "add" : "subtract") + interval_data_type->getKind().toString() + 's';
}
else
{
if (isDate(type_time))
function_name = function_is_plus ? "addDays" : "subtractDays";
function_name = is_plus ? "addDays" : "subtractDays";
else
function_name = function_is_plus ? "addSeconds" : "subtractSeconds";
function_name = is_plus ? "addSeconds" : "subtractSeconds";
}
return FunctionFactory::instance().get(function_name, context);
@ -653,7 +653,7 @@ class FunctionBinaryArithmetic : public IFunction
bool isAggregateMultiply(const DataTypePtr & type0, const DataTypePtr & type1) const
{
if constexpr (!IsOperation<Op>::multiply)
if constexpr (!is_multiply)
return false;
WhichDataType which0(type0);
@ -665,7 +665,7 @@ class FunctionBinaryArithmetic : public IFunction
bool isAggregateAddition(const DataTypePtr & type0, const DataTypePtr & type1) const
{
if constexpr (!IsOperation<Op>::plus)
if constexpr (!is_plus)
return false;
WhichDataType which0(type0);
@ -994,8 +994,6 @@ public:
if constexpr (!std::is_same_v<ResultDataType, InvalidType>)
{
constexpr bool result_is_decimal = IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>;
using T0 = typename LeftDataType::FieldType;
using T1 = typename RightDataType::FieldType;
using ResultType = typename ResultDataType::FieldType;
@ -1003,112 +1001,91 @@ public:
using ColVecT1 = std::conditional_t<IsDecimalNumber<T1>, ColumnDecimal<T1>, ColumnVector<T1>>;
using ColVecResult = std::conditional_t<IsDecimalNumber<ResultType>, ColumnDecimal<ResultType>, ColumnVector<ResultType>>;
/// Decimal operations need scale. Operations are on result type.
using OpImpl = std::conditional_t<IsDataTypeDecimal<ResultDataType>,
DecimalBinaryOperation<T0, T1, Op, ResultType>,
BinaryOperationImpl<T0, T1, Op<T0, T1>, ResultType>>;
auto col_left_raw = block.getByPosition(arguments[0]).column.get();
auto col_right_raw = block.getByPosition(arguments[1]).column.get();
if (auto col_left = checkAndGetColumnConst<ColVecT0>(col_left_raw))
{
if (auto col_right = checkAndGetColumnConst<ColVecT1>(col_right_raw))
{
/// the only case with a non-vector result
if constexpr (result_is_decimal)
{
ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
scale_a = right.getScaleMultiplier();
auto res = OpImpl::constantConstant(col_left->template getValue<T0>(), col_right->template getValue<T1>(),
scale_a, scale_b, check_decimal_overflow);
block.getByPosition(result).column =
ResultDataType(type.getPrecision(), type.getScale()).createColumnConst(
col_left->size(), toField(res, type.getScale()));
}
else
{
auto res = OpImpl::constantConstant(col_left->template getValue<T0>(), col_right->template getValue<T1>());
block.getByPosition(result).column = ResultDataType().createColumnConst(col_left->size(), toField(res));
}
return true;
}
}
auto col_left_const = checkAndGetColumnConst<ColVecT0>(col_left_raw);
auto col_right_const = checkAndGetColumnConst<ColVecT1>(col_right_raw);
typename ColVecResult::MutablePtr col_res = nullptr;
if constexpr (result_is_decimal)
auto col_left = checkAndGetColumn<ColVecT0>(col_left_raw);
auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw);
if constexpr (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>)
{
using OpImpl = DecimalBinaryOperation<T0, T1, Op, ResultType>;
ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
col_res = ColVecResult::create(0, type.getScale());
}
else
col_res = ColVecResult::create();
auto & vec_res = col_res->getData();
vec_res.resize(block.rows());
typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
scale_a = right.getScaleMultiplier();
if (auto col_left_const = checkAndGetColumnConst<ColVecT0>(col_left_raw))
{
if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
/// non-vector result
if (col_left_const && col_right_const)
{
if constexpr (result_is_decimal)
{
ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
auto res = OpImpl::constantConstant(col_left_const->template getValue<T0>(), col_right_const->template getValue<T1>(),
scale_a, scale_b, check_decimal_overflow);
typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
scale_a = right.getScaleMultiplier();
block.getByPosition(result).column = ResultDataType(type.getPrecision(), type.getScale()).createColumnConst(
col_left_const->size(), toField(res, type.getScale()));
return true;
}
OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData(), vec_res,
scale_a, scale_b, check_decimal_overflow);
}
else
OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData().data(), vec_res.data(), vec_res.size());
col_res = ColVecResult::create(0, type.getScale());
auto & vec_res = col_res->getData();
vec_res.resize(block.rows());
if (col_left && col_right)
{
OpImpl::vectorVector(col_left->getData(), col_right->getData(), vec_res, scale_a, scale_b, check_decimal_overflow);
}
else if (col_left_const && col_right)
{
OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData(), vec_res,
scale_a, scale_b, check_decimal_overflow);
}
else if (col_left && col_right_const)
{
OpImpl::vectorConstant(col_left->getData(), col_right_const->template getValue<T1>(), vec_res,
scale_a, scale_b, check_decimal_overflow);
}
else
return false;
}
else if (auto col_left = checkAndGetColumn<ColVecT0>(col_left_raw))
else
{
if constexpr (result_is_decimal)
using OpImpl = BinaryOperationImpl<T0, T1, Op<T0, T1>, ResultType>;
/// non-vector result
if (col_left_const && col_right_const)
{
ResultDataType type = decimalResultType<is_multiply, is_division>(left, right);
auto res = OpImpl::constantConstant(col_left_const->template getValue<T0>(), col_right_const->template getValue<T1>());
block.getByPosition(result).column = ResultDataType().createColumnConst(col_left_const->size(), toField(res));
return true;
}
typename ResultDataType::FieldType scale_a = type.scaleFactorFor(left, is_multiply);
typename ResultDataType::FieldType scale_b = type.scaleFactorFor(right, is_multiply || is_division);
if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
scale_a = right.getScaleMultiplier();
col_res = ColVecResult::create();
auto & vec_res = col_res->getData();
vec_res.resize(block.rows());
if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
{
OpImpl::vectorVector(col_left->getData(), col_right->getData(), vec_res, scale_a, scale_b,
check_decimal_overflow);
}
else if (auto col_right_const = checkAndGetColumnConst<ColVecT1>(col_right_raw))
{
OpImpl::vectorConstant(col_left->getData(), col_right_const->template getValue<T1>(), vec_res,
scale_a, scale_b, check_decimal_overflow);
}
else
return false;
if (col_left && col_right)
{
OpImpl::vectorVector(col_left->getData().data(), col_right->getData().data(), vec_res.data(), vec_res.size());
}
else if (col_left_const && col_right)
{
OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData().data(), vec_res.data(), vec_res.size());
}
else if (col_left && col_right_const)
{
OpImpl::vectorConstant(col_left->getData().data(), col_right_const->template getValue<T1>(), vec_res.data(), vec_res.size());
}
else
{
if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
OpImpl::vectorVector(col_left->getData().data(), col_right->getData().data(), vec_res.data(), vec_res.size());
else if (auto col_right_const = checkAndGetColumnConst<ColVecT1>(col_right_raw))
OpImpl::vectorConstant(col_left->getData().data(), col_right_const->template getValue<T1>(), vec_res.data(), vec_res.size());
else
return false;
}
return false;
}
else
return false;
block.getByPosition(result).column = std::move(col_res);
return true;

View File

@ -1,10 +1,10 @@
#include <Functions/FunctionJoinGet.h>
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/Context.h>
#include <Interpreters/HashJoin.h>
#include <Columns/ColumnString.h>
#include <Storages/StorageJoin.h>
@ -16,19 +16,35 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <bool or_null>
void ExecutableFunctionJoinGet<or_null>::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t)
{
Block keys;
for (size_t i = 2; i < arguments.size(); ++i)
{
auto key = block.getByPosition(arguments[i]);
keys.insert(std::move(key));
}
block.getByPosition(result) = join->joinGet(keys, result_block);
}
template <bool or_null>
ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const Block &, const ColumnNumbers &, size_t) const
{
return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, Block{{return_type->createColumn(), return_type, attr_name}});
}
static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & context)
{
if (arguments.size() != 3)
throw Exception{"Function joinGet takes 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
String join_name;
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
{
join_name = name_col->getValue<String>();
}
else
throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function joinGet, expected a const string.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
throw Exception(
"Illegal type " + arguments[0].type->getName() + " of first argument of function joinGet, expected a const string.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
size_t dot = join_name.find('.');
String database_name;
@ -43,10 +59,12 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co
++dot;
}
String table_name = join_name.substr(dot);
if (table_name.empty())
throw Exception("joinGet does not allow empty table name", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
auto table = DatabaseCatalog::instance().getTable({database_name, table_name}, context);
auto storage_join = std::dynamic_pointer_cast<StorageJoin>(table);
if (!storage_join)
throw Exception{"Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
throw Exception("Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
String attr_name;
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
@ -54,57 +72,30 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co
attr_name = name_col->getValue<String>();
}
else
throw Exception{"Illegal type " + arguments[1].type->getName()
+ " of second argument of function joinGet, expected a const string.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
throw Exception(
"Illegal type " + arguments[1].type->getName() + " of second argument of function joinGet, expected a const string.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_pair(storage_join, attr_name);
}
template <bool or_null>
FunctionBaseImplPtr JoinGetOverloadResolver<or_null>::build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const
{
if (arguments.size() < 3)
throw Exception(
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+ ", should be greater or equal to 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
auto [storage_join, attr_name] = getJoin(arguments, context);
auto join = storage_join->getJoin();
DataTypes data_types(arguments.size());
DataTypes data_types(arguments.size() - 2);
for (size_t i = 2; i < arguments.size(); ++i)
data_types[i - 2] = arguments[i].type;
auto return_type = join->joinGetCheckAndGetReturnType(data_types, attr_name, or_null);
auto table_lock = storage_join->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
for (size_t i = 0; i < arguments.size(); ++i)
data_types[i] = arguments[i].type;
auto return_type = join->joinGetReturnType(attr_name, or_null);
return std::make_unique<FunctionJoinGet<or_null>>(table_lock, storage_join, join, attr_name, data_types, return_type);
}
template <bool or_null>
DataTypePtr JoinGetOverloadResolver<or_null>::getReturnType(const ColumnsWithTypeAndName & arguments) const
{
auto [storage_join, attr_name] = getJoin(arguments, context);
auto join = storage_join->getJoin();
return join->joinGetReturnType(attr_name, or_null);
}
template <bool or_null>
void ExecutableFunctionJoinGet<or_null>::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
{
auto ctn = block.getByPosition(arguments[2]);
if (isColumnConst(*ctn.column))
ctn.column = ctn.column->cloneResized(1);
ctn.name = ""; // make sure the key name never collide with the join columns
Block key_block = {ctn};
join->joinGet(key_block, attr_name, or_null);
auto & result_ctn = key_block.getByPosition(1);
if (isColumnConst(*ctn.column))
result_ctn.column = ColumnConst::create(result_ctn.column, input_rows_count);
block.getByPosition(result) = result_ctn;
}
template <bool or_null>
ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const Block &, const ColumnNumbers &, size_t) const
{
return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, attr_name);
}
void registerFunctionJoinGet(FunctionFactory & factory)
{
// joinGet

View File

@ -13,14 +13,14 @@ template <bool or_null>
class ExecutableFunctionJoinGet final : public IExecutableFunctionImpl
{
public:
ExecutableFunctionJoinGet(HashJoinPtr join_, String attr_name_)
: join(std::move(join_)), attr_name(std::move(attr_name_)) {}
ExecutableFunctionJoinGet(HashJoinPtr join_, const Block & result_block_)
: join(std::move(join_)), result_block(result_block_) {}
static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
@ -28,7 +28,7 @@ public:
private:
HashJoinPtr join;
const String attr_name;
Block result_block;
};
template <bool or_null>
@ -77,13 +77,14 @@ public:
String getName() const override { return name; }
FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override;
DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override;
DataTypePtr getReturnType(const ColumnsWithTypeAndName &) const override { return {}; } // Not used
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; }
private:
const Context & context;

View File

@ -9,6 +9,7 @@ void registerFunctionsFormatting(FunctionFactory & factory)
{
factory.registerFunction<FunctionBitmaskToList>();
factory.registerFunction<FunctionFormatReadableSize>();
factory.registerFunction<FunctionFormatReadableQuantity>();
}
}

View File

@ -202,4 +202,80 @@ private:
}
};
class FunctionFormatReadableQuantity : public IFunction
{
public:
static constexpr auto name = "formatReadableQuantity";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionFormatReadableQuantity>(); }
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override { return 1; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
const IDataType & type = *arguments[0];
if (!isNativeNumber(type))
throw Exception("Cannot format " + type.getName() + " as quantity", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
}
bool useDefaultImplementationForConstants() const override { return true; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) const override
{
if (!(executeType<UInt8>(block, arguments, result)
|| executeType<UInt16>(block, arguments, result)
|| executeType<UInt32>(block, arguments, result)
|| executeType<UInt64>(block, arguments, result)
|| executeType<Int8>(block, arguments, result)
|| executeType<Int16>(block, arguments, result)
|| executeType<Int32>(block, arguments, result)
|| executeType<Int64>(block, arguments, result)
|| executeType<Float32>(block, arguments, result)
|| executeType<Float64>(block, arguments, result)))
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
private:
template <typename T>
bool executeType(Block & block, const ColumnNumbers & arguments, size_t result) const
{
if (const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(block.getByPosition(arguments[0]).column.get()))
{
auto col_to = ColumnString::create();
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
ColumnString::Chars & data_to = col_to->getChars();
ColumnString::Offsets & offsets_to = col_to->getOffsets();
size_t size = vec_from.size();
data_to.resize(size * 2);
offsets_to.resize(size);
WriteBufferFromVector<ColumnString::Chars> buf_to(data_to);
for (size_t i = 0; i < size; ++i)
{
formatReadableQuantity(static_cast<double>(vec_from[i]), buf_to);
writeChar(0, buf_to);
offsets_to[i] = buf_to.count();
}
buf_to.finalize();
block.getByPosition(result).column = std::move(col_to);
return true;
}
return false;
}
};
}

View File

@ -3,5 +3,17 @@ add_headers_and_sources(clickhouse_functions_gatherutils .)
add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers})
target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms)
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
check_cxx_compiler_flag(suggest-override HAS_SUGGEST_OVERRIDE)
check_cxx_compiler_flag(suggest-destructor-override HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
if (HAS_SUGGEST_OVERRIDE)
target_compile_definitions(clickhouse_functions_gatherutils PRIVATE HAS_SUGGEST_OVERRIDE)
endif()
if (HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
target_compile_definitions(clickhouse_functions_gatherutils PRIVATE HAS_SUGGEST_DESTRUCTOR_OVERRIDE)
endif()
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
endif()

View File

@ -129,9 +129,13 @@ struct NumericArraySource : public ArraySourceImpl<NumericArraySource<T>>
#pragma GCC diagnostic ignored "-Wsuggest-override"
#elif __clang_major__ >= 11
#pragma GCC diagnostic push
#ifdef HAS_SUGGEST_OVERRIDE
#pragma GCC diagnostic ignored "-Wsuggest-override"
#endif
#ifdef HAS_SUGGEST_DESTRUCTOR_OVERRIDE
#pragma GCC diagnostic ignored "-Wsuggest-destructor-override"
#endif
#endif
template <typename Base>
struct ConstSource : public Base

View File

@ -3,8 +3,9 @@ add_headers_and_sources(clickhouse_functions_url .)
add_library(clickhouse_functions_url ${clickhouse_functions_url_sources} ${clickhouse_functions_url_headers})
target_link_libraries(clickhouse_functions_url PRIVATE dbms)
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
target_compile_options(clickhouse_functions_url PRIVATE "-g0")
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions_url PRIVATE "-g0")
endif()
# TODO: move Functions/Regexps.h to some lib and use here
target_link_libraries(clickhouse_functions_url PRIVATE hyperscan)

View File

@ -3,5 +3,6 @@ add_headers_and_sources(clickhouse_functions_array .)
add_library(clickhouse_functions_array ${clickhouse_functions_array_sources} ${clickhouse_functions_array_headers})
target_link_libraries(clickhouse_functions_array PRIVATE dbms clickhouse_functions_gatherutils)
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
target_compile_options(clickhouse_functions_array PRIVATE "-g0")
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions_array PRIVATE "-g0")
endif()

View File

@ -13,7 +13,6 @@ template <typename A, typename B>
struct DivideFloatingImpl
{
using ResultType = typename NumberTraits::ResultOfFloatingPointDivision<A, B>::Type;
static const constexpr bool allow_decimal = true;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>

View File

@ -604,7 +604,6 @@ private:
const ColumnUInt8 * cond_col, Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
{
/// Convert both columns to the common type (if needed).
const ColumnWithTypeAndName & arg1 = block.getByPosition(arguments[1]);
const ColumnWithTypeAndName & arg2 = block.getByPosition(arguments[2]);
@ -765,10 +764,22 @@ private:
return ColumnNullable::create(materialized, ColumnUInt8::create(column->size(), 0));
}
static ColumnPtr getNestedColumn(const ColumnPtr & column)
/// Return nested column recursively removing Nullable, examples:
/// Nullable(size = 1, Int32(size = 1), UInt8(size = 1)) -> Int32(size = 1)
/// Const(size = 0, Nullable(size = 1, Int32(size = 1), UInt8(size = 1))) ->
/// Const(size = 0, Int32(size = 1))
static ColumnPtr recursiveGetNestedColumnWithoutNullable(const ColumnPtr & column)
{
if (const auto * nullable = checkAndGetColumn<ColumnNullable>(*column))
{
/// Nullable cannot contain Nullable
return nullable->getNestedColumnPtr();
}
else if (const auto * column_const = checkAndGetColumn<ColumnConst>(*column))
{
/// Save Constant, but remove Nullable
return ColumnConst::create(recursiveGetNestedColumnWithoutNullable(column_const->getDataColumnPtr()), column->size());
}
return column;
}
@ -826,12 +837,12 @@ private:
{
arg_cond,
{
getNestedColumn(arg_then.column),
recursiveGetNestedColumnWithoutNullable(arg_then.column),
removeNullable(arg_then.type),
""
},
{
getNestedColumn(arg_else.column),
recursiveGetNestedColumnWithoutNullable(arg_else.column),
removeNullable(arg_else.type),
""
},

View File

@ -9,7 +9,6 @@ template <typename A, typename B>
struct MinusImpl
{
using ResultType = typename NumberTraits::ResultOfSubtraction<A, B>::Type;
static const constexpr bool allow_decimal = true;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>

View File

@ -9,7 +9,6 @@ template <typename A, typename B>
struct MultiplyImpl
{
using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
static const constexpr bool allow_decimal = true;
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>

View File

@ -9,8 +9,8 @@ template <typename A, typename B>
struct PlusImpl
{
using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
static const constexpr bool allow_decimal = true;
static const constexpr bool allow_fixed_string = false;
static const constexpr bool is_commutative = true;
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)

View File

@ -77,6 +77,9 @@ ReadBufferFromFile::~ReadBufferFromFile()
void ReadBufferFromFile::close()
{
if (fd < 0)
return;
if (0 != ::close(fd))
throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE);

View File

@ -92,6 +92,9 @@ WriteBufferFromFile::~WriteBufferFromFile()
/// Close file before destruction of object.
void WriteBufferFromFile::close()
{
if (fd < 0)
return;
next();
if (0 != ::close(fd))

View File

@ -2,6 +2,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeString.h>
#include <Interpreters/AsynchronousMetrics.h>
@ -13,10 +14,11 @@ Block AsynchronousMetricLogElement::createBlock()
{
ColumnsWithTypeAndName columns;
columns.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
columns.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
columns.emplace_back(std::make_shared<DataTypeString>(), "name");
columns.emplace_back(std::make_shared<DataTypeFloat64>(), "value");
columns.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
columns.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
columns.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds");
columns.emplace_back(std::make_shared<DataTypeString>(), "name");
columns.emplace_back(std::make_shared<DataTypeFloat64>(), "value");
return Block(columns);
}
@ -28,6 +30,7 @@ void AsynchronousMetricLogElement::appendToBlock(MutableColumns & columns) const
columns[column_idx++]->insert(event_date);
columns[column_idx++]->insert(event_time);
columns[column_idx++]->insert(event_time_microseconds);
columns[column_idx++]->insert(metric_name);
columns[column_idx++]->insert(value);
}
@ -38,6 +41,11 @@ inline UInt64 time_in_milliseconds(std::chrono::time_point<std::chrono::system_c
return std::chrono::duration_cast<std::chrono::milliseconds>(timepoint.time_since_epoch()).count();
}
inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
}
inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
@ -50,6 +58,7 @@ void AsynchronousMetricLog::addValues(const AsynchronousMetricValues & values)
const auto now = std::chrono::system_clock::now();
element.event_time = time_in_seconds(now);
element.event_time_microseconds = time_in_microseconds(now);
element.event_date = DateLUT::instance().toDayNum(element.event_time);
for (const auto & [key, value] : values)

View File

@ -22,6 +22,7 @@ struct AsynchronousMetricLogElement
{
UInt16 event_date;
time_t event_time;
UInt64 event_time_microseconds;
std::string metric_name;
double value;

View File

@ -23,6 +23,7 @@
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/CompressionCodecSelector.h>
#include <Storages/StorageS3Settings.h>
#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
#include <Disks/DiskLocal.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/ActionLocksManager.h>
@ -430,6 +431,7 @@ struct ContextShared
if (system_logs)
system_logs->shutdown();
TemporaryLiveViewCleaner::shutdown();
DatabaseCatalog::shutdown();
/// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
@ -487,6 +489,12 @@ Context Context::createGlobal(ContextShared * shared)
return res;
}
void Context::initGlobal()
{
DatabaseCatalog::init(this);
TemporaryLiveViewCleaner::init(*this);
}
SharedContextHolder Context::createShared()
{
return SharedContextHolder(std::make_unique<ContextShared>());

View File

@ -445,11 +445,7 @@ public:
void makeQueryContext() { query_context = this; }
void makeSessionContext() { session_context = this; }
void makeGlobalContext()
{
global_context = this;
DatabaseCatalog::init(this);
}
void makeGlobalContext() { initGlobal(); global_context = this; }
const Settings & getSettingsRef() const { return settings; }
@ -625,6 +621,8 @@ public:
private:
std::unique_lock<std::recursive_mutex> getLock() const;
void initGlobal();
/// Compute and set actual user settings, client_info.current_user should be set
void calculateAccessRights();

View File

@ -893,6 +893,8 @@ private:
cancelLoading(info);
}
putBackFinishedThreadsToPool();
/// All loadings have unique loading IDs.
size_t loading_id = next_id_counter++;
info.loading_id = loading_id;
@ -914,6 +916,21 @@ private:
}
}
void putBackFinishedThreadsToPool()
{
for (auto loading_id : recently_finished_loadings)
{
auto it = loading_threads.find(loading_id);
if (it != loading_threads.end())
{
auto thread = std::move(it->second);
loading_threads.erase(it);
thread.join(); /// It's very likely that `thread` has already finished.
}
}
recently_finished_loadings.clear();
}
static void cancelLoading(Info & info)
{
if (!info.isLoading())
@ -1095,12 +1112,11 @@ private:
}
min_id_to_finish_loading_dependencies.erase(std::this_thread::get_id());
auto it = loading_threads.find(loading_id);
if (it != loading_threads.end())
{
it->second.detach();
loading_threads.erase(it);
}
/// Add `loading_id` to the list of recently finished loadings.
/// This list is used to later put the threads which finished loading back to the thread pool.
/// (We can't put the loading thread back to the thread pool immediately here because at this point
/// the loading thread is about to finish but it's not finished yet right now.)
recently_finished_loadings.push_back(loading_id);
}
/// Calculate next update time for loaded_object. Can be called without mutex locking,
@ -1158,6 +1174,7 @@ private:
bool always_load_everything = false;
std::atomic<bool> enable_async_loading = false;
std::unordered_map<size_t, ThreadFromGlobalPool> loading_threads;
std::vector<size_t> recently_finished_loadings;
std::unordered_map<std::thread::id, size_t> min_id_to_finish_loading_dependencies;
size_t next_id_counter = 1; /// should always be > 0
mutable pcg64 rnd_engine{randomSeed()};

View File

@ -41,6 +41,7 @@ namespace ErrorCodes
extern const int SYNTAX_ERROR;
extern const int SET_SIZE_LIMIT_EXCEEDED;
extern const int TYPE_MISMATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
@ -1109,27 +1110,34 @@ void HashJoin::joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed)
block = block.cloneWithColumns(std::move(dst_columns));
}
static void checkTypeOfKey(const Block & block_left, const Block & block_right)
{
const auto & [c1, left_type_origin, left_name] = block_left.safeGetByPosition(0);
const auto & [c2, right_type_origin, right_name] = block_right.safeGetByPosition(0);
auto left_type = removeNullable(left_type_origin);
auto right_type = removeNullable(right_type_origin);
if (!left_type->equals(*right_type))
throw Exception("Type mismatch of columns to joinGet by: "
+ left_name + " " + left_type->getName() + " at left, "
+ right_name + " " + right_type->getName() + " at right",
ErrorCodes::TYPE_MISMATCH);
}
DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null) const
DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const
{
std::shared_lock lock(data->rwlock);
size_t num_keys = data_types.size();
if (right_table_keys.columns() != num_keys)
throw Exception(
"Number of arguments for function joinGet" + toString(or_null ? "OrNull" : "")
+ " doesn't match: passed, should be equal to " + toString(num_keys),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (size_t i = 0; i < num_keys; ++i)
{
const auto & left_type_origin = data_types[i];
const auto & [c2, right_type_origin, right_name] = right_table_keys.safeGetByPosition(i);
auto left_type = removeNullable(left_type_origin);
auto right_type = removeNullable(right_type_origin);
if (!left_type->equals(*right_type))
throw Exception(
"Type mismatch in joinGet key " + toString(i) + ": found type " + left_type->getName() + ", while the needed type is "
+ right_type->getName(),
ErrorCodes::TYPE_MISMATCH);
}
if (!sample_block_with_columns_to_add.has(column_name))
throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
auto elem = sample_block_with_columns_to_add.getByName(column_name);
if (or_null)
elem.type = makeNullable(elem.type);
@ -1138,34 +1146,33 @@ DataTypePtr HashJoin::joinGetReturnType(const String & column_name, bool or_null
template <typename Maps>
void HashJoin::joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const
ColumnWithTypeAndName HashJoin::joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const
{
joinBlockImpl<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::RightAny>(
block, {block.getByPosition(0).name}, block_with_columns_to_add, maps_);
// Assemble the key block with correct names.
Block keys;
for (size_t i = 0; i < block.columns(); ++i)
{
auto key = block.getByPosition(i);
key.name = key_names_right[i];
keys.insert(std::move(key));
}
joinBlockImpl<ASTTableJoin::Kind::Left, ASTTableJoin::Strictness::Any>(
keys, key_names_right, block_with_columns_to_add, maps_);
return keys.getByPosition(keys.columns() - 1);
}
// TODO: support composite key
// TODO: return multiple columns as named tuple
// TODO: return array of values when strictness == ASTTableJoin::Strictness::All
void HashJoin::joinGet(Block & block, const String & column_name, bool or_null) const
ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block_with_columns_to_add) const
{
std::shared_lock lock(data->rwlock);
if (key_names_right.size() != 1)
throw Exception("joinGet only supports StorageJoin containing exactly one key", ErrorCodes::UNSUPPORTED_JOIN_KEYS);
checkTypeOfKey(block, right_table_keys);
auto elem = sample_block_with_columns_to_add.getByName(column_name);
if (or_null)
elem.type = makeNullable(elem.type);
elem.column = elem.type->createColumn();
if ((strictness == ASTTableJoin::Strictness::Any || strictness == ASTTableJoin::Strictness::RightAny) &&
kind == ASTTableJoin::Kind::Left)
{
joinGetImpl(block, {elem}, std::get<MapsOne>(data->maps));
return joinGetImpl(block, block_with_columns_to_add, std::get<MapsOne>(data->maps));
}
else
throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN);

View File

@ -162,11 +162,11 @@ public:
*/
void joinBlock(Block & block, ExtraBlockPtr & not_processed) override;
/// Infer the return type for joinGet function
DataTypePtr joinGetReturnType(const String & column_name, bool or_null) const;
/// Check joinGet arguments and infer the return type.
DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const;
/// Used by joinGet function that turns StorageJoin into a dictionary
void joinGet(Block & block, const String & column_name, bool or_null) const;
/// Used by joinGet function that turns StorageJoin into a dictionary.
ColumnWithTypeAndName joinGet(const Block & block, const Block & block_with_columns_to_add) const;
/** Keep "totals" (separate part of dataset, see WITH TOTALS) to use later.
*/
@ -389,7 +389,7 @@ private:
void joinBlockImplCross(Block & block, ExtraBlockPtr & not_processed) const;
template <typename Maps>
void joinGetImpl(Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const;
ColumnWithTypeAndName joinGetImpl(const Block & block, const Block & block_with_columns_to_add, const Maps & maps_) const;
static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes);
};

View File

@ -10,6 +10,7 @@ namespace DB
{
class Context;
using DatabaseAndTable = std::pair<DatabasePtr, StoragePtr>;
class AccessRightsElements;
/** Allow to either drop table with all its data (DROP),
* or remove information about table (just forget) from server (DETACH),

View File

@ -2,6 +2,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
namespace DB
@ -11,9 +12,10 @@ Block MetricLogElement::createBlock()
{
ColumnsWithTypeAndName columns_with_type_and_name;
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "milliseconds");
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds");
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "milliseconds");
for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
{
@ -41,6 +43,7 @@ void MetricLogElement::appendToBlock(MutableColumns & columns) const
columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time));
columns[column_idx++]->insert(event_time);
columns[column_idx++]->insert(event_time_microseconds);
columns[column_idx++]->insert(milliseconds);
for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
@ -80,6 +83,10 @@ inline UInt64 time_in_milliseconds(std::chrono::time_point<std::chrono::system_c
return std::chrono::duration_cast<std::chrono::milliseconds>(timepoint.time_since_epoch()).count();
}
inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
}
inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
{
@ -102,6 +109,7 @@ void MetricLog::metricThreadFunction()
MetricLogElement elem;
elem.event_time = std::chrono::system_clock::to_time_t(current_time);
elem.event_time_microseconds = time_in_microseconds(current_time);
elem.milliseconds = time_in_milliseconds(current_time) - time_in_seconds(current_time) * 1000;
elem.profile_events.resize(ProfileEvents::end());

View File

@ -18,6 +18,7 @@ namespace DB
struct MetricLogElement
{
time_t event_time{};
UInt64 event_time_microseconds{};
UInt64 milliseconds{};
std::vector<ProfileEvents::Count> profile_events;

View File

@ -28,7 +28,7 @@ inline bool functionIsLikeOperator(const std::string & name)
inline bool functionIsJoinGet(const std::string & name)
{
return name == "joinGet" || startsWith(name, "dictGet");
return startsWith(name, "joinGet");
}
inline bool functionIsDictGet(const std::string & name)

View File

@ -110,7 +110,7 @@ void ASTColumnsReplaceTransformer::replaceChildren(ASTPtr & node, const ASTPtr &
if (const auto * id = child->as<ASTIdentifier>())
{
if (id->shortName() == name)
child = replacement;
child = replacement->clone();
}
else
replaceChildren(child, replacement, name);

View File

@ -1,4 +1,5 @@
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h>
#include <Formats/FormatFactory.h>
@ -19,8 +20,9 @@ JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat(ReadBuffer &
const Block & header_,
Params params_,
const FormatSettings & format_settings_,
bool with_names_)
: IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), with_names(with_names_)
bool with_names_,
bool yield_strings_)
: IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), with_names(with_names_), yield_strings(yield_strings_)
{
const auto & sample = getPort().getHeader();
size_t num_columns = sample.columns();
@ -200,10 +202,26 @@ void JSONCompactEachRowRowInputFormat::readField(size_t index, MutableColumns &
{
read_columns[index] = true;
const auto & type = data_types[index];
if (format_settings.null_as_default && !type->isNullable())
read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type);
if (yield_strings)
{
String str;
readJSONString(str, in);
ReadBufferFromString buf(str);
if (format_settings.null_as_default && !type->isNullable())
read_columns[index] = DataTypeNullable::deserializeWholeText(*columns[index], buf, format_settings, type);
else
type->deserializeAsWholeText(*columns[index], buf, format_settings);
}
else
type->deserializeAsTextJSON(*columns[index], in, format_settings);
{
if (format_settings.null_as_default && !type->isNullable())
read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type);
else
type->deserializeAsTextJSON(*columns[index], in, format_settings);
}
}
catch (Exception & e)
{
@ -225,7 +243,7 @@ void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
IRowInputFormat::Params params,
const FormatSettings & settings)
{
return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, false);
return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, false, false);
});
factory.registerInputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", [](
@ -234,7 +252,25 @@ void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
IRowInputFormat::Params params,
const FormatSettings & settings)
{
return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, true);
return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, true, false);
});
factory.registerInputFormatProcessor("JSONCompactStringsEachRow", [](
ReadBuffer & buf,
const Block & sample,
IRowInputFormat::Params params,
const FormatSettings & settings)
{
return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, false, true);
});
factory.registerInputFormatProcessor("JSONCompactStringsEachRowWithNamesAndTypes", [](
ReadBuffer & buf,
const Block & sample,
IRowInputFormat::Params params,
const FormatSettings & settings)
{
return std::make_shared<JSONCompactEachRowRowInputFormat>(buf, sample, std::move(params), settings, true, true);
});
}

View File

@ -1,7 +1,5 @@
#pragma once
#pragma once
#include <Core/Block.h>
#include <Processors/Formats/IRowInputFormat.h>
#include <Formats/FormatSettings.h>
@ -12,12 +10,23 @@ namespace DB
class ReadBuffer;
/** A stream for reading data in JSONCompactEachRow and JSONCompactEachRowWithNamesAndTypes formats
/** A stream for reading data in a bunch of formats:
* - JSONCompactEachRow
* - JSONCompactEachRowWithNamesAndTypes
* - JSONCompactStringsEachRow
* - JSONCompactStringsEachRowWithNamesAndTypes
*
*/
class JSONCompactEachRowRowInputFormat : public IRowInputFormat
{
public:
JSONCompactEachRowRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_, bool with_names_);
JSONCompactEachRowRowInputFormat(
ReadBuffer & in_,
const Block & header_,
Params params_,
const FormatSettings & format_settings_,
bool with_names_,
bool yield_strings_);
String getName() const override { return "JSONCompactEachRowRowInputFormat"; }
@ -48,7 +57,10 @@ private:
/// This is for the correct exceptions in skipping unknown fields.
std::vector<String> names_of_columns;
/// For *WithNamesAndTypes formats.
bool with_names;
/// For JSONCompactString* formats.
bool yield_strings;
};
}

View File

@ -12,8 +12,9 @@ JSONCompactEachRowRowOutputFormat::JSONCompactEachRowRowOutputFormat(WriteBuffer
const Block & header_,
FormatFactory::WriteCallback callback,
const FormatSettings & settings_,
bool with_names_)
: IRowOutputFormat(header_, out_, callback), settings(settings_), with_names(with_names_)
bool with_names_,
bool yield_strings_)
: IRowOutputFormat(header_, out_, callback), settings(settings_), with_names(with_names_), yield_strings(yield_strings_)
{
const auto & sample = getPort(PortKind::Main).getHeader();
NamesAndTypesList columns(sample.getNamesAndTypesList());
@ -23,7 +24,15 @@ JSONCompactEachRowRowOutputFormat::JSONCompactEachRowRowOutputFormat(WriteBuffer
void JSONCompactEachRowRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
type.serializeAsTextJSON(column, row_num, out, settings);
if (yield_strings)
{
WriteBufferFromOwnString buf;
type.serializeAsText(column, row_num, buf, settings);
writeJSONString(buf.str(), out, settings);
}
else
type.serializeAsTextJSON(column, row_num, out, settings);
}
@ -97,7 +106,7 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, false);
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, false, false);
});
factory.registerOutputFormatProcessor("JSONCompactEachRowWithNamesAndTypes", [](
@ -106,7 +115,25 @@ void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory)
FormatFactory::WriteCallback callback,
const FormatSettings &format_settings)
{
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, true);
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, true, false);
});
factory.registerOutputFormatProcessor("JSONCompactStringsEachRow", [](
WriteBuffer & buf,
const Block & sample,
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, false, true);
});
factory.registerOutputFormatProcessor("JSONCompactStringsEachRowWithNamesAndTypes", [](
WriteBuffer &buf,
const Block &sample,
FormatFactory::WriteCallback callback,
const FormatSettings &format_settings)
{
return std::make_shared<JSONCompactEachRowRowOutputFormat>(buf, sample, callback, format_settings, true, true);
});
}

View File

@ -15,7 +15,13 @@ namespace DB
class JSONCompactEachRowRowOutputFormat : public IRowOutputFormat
{
public:
JSONCompactEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_, bool with_names);
JSONCompactEachRowRowOutputFormat(
WriteBuffer & out_,
const Block & header_,
FormatFactory::WriteCallback callback,
const FormatSettings & settings_,
bool with_names_,
bool yield_strings_);
String getName() const override { return "JSONCompactEachRowRowOutputFormat"; }
@ -41,5 +47,6 @@ private:
NamesAndTypes fields;
bool with_names;
bool yield_strings;
};
}

View File

@ -8,15 +8,28 @@ namespace DB
{
JSONCompactRowOutputFormat::JSONCompactRowOutputFormat(
WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_)
: JSONRowOutputFormat(out_, header, callback, settings_)
WriteBuffer & out_,
const Block & header,
FormatFactory::WriteCallback callback,
const FormatSettings & settings_,
bool yield_strings_)
: JSONRowOutputFormat(out_, header, callback, settings_, yield_strings_)
{
}
void JSONCompactRowOutputFormat::writeField(const IColumn & column, const IDataType & type, size_t row_num)
{
type.serializeAsTextJSON(column, row_num, *ostr, settings);
if (yield_strings)
{
WriteBufferFromOwnString buf;
type.serializeAsText(column, row_num, buf, settings);
writeJSONString(buf.str(), *ostr, settings);
}
else
type.serializeAsTextJSON(column, row_num, *ostr, settings);
++field_number;
}
@ -83,7 +96,16 @@ void registerOutputFormatProcessorJSONCompact(FormatFactory & factory)
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONCompactRowOutputFormat>(buf, sample, callback, format_settings);
return std::make_shared<JSONCompactRowOutputFormat>(buf, sample, callback, format_settings, false);
});
factory.registerOutputFormatProcessor("JSONCompactStrings", [](
WriteBuffer & buf,
const Block & sample,
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONCompactRowOutputFormat>(buf, sample, callback, format_settings, true);
});
}

View File

@ -11,12 +11,17 @@ namespace DB
struct FormatSettings;
/** The stream for outputting data in the JSONCompact format.
/** The stream for outputting data in the JSONCompact- formats.
*/
class JSONCompactRowOutputFormat : public JSONRowOutputFormat
{
public:
JSONCompactRowOutputFormat(WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_);
JSONCompactRowOutputFormat(
WriteBuffer & out_,
const Block & header,
FormatFactory::WriteCallback callback,
const FormatSettings & settings_,
bool yield_strings_);
String getName() const override { return "JSONCompactRowOutputFormat"; }
@ -37,7 +42,6 @@ protected:
}
void writeTotalsFieldDelimiter() override;
};
}

View File

@ -1,4 +1,5 @@
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromString.h>
#include <Processors/Formats/Impl/JSONEachRowRowInputFormat.h>
#include <Formats/JSONEachRowUtils.h>
@ -29,8 +30,12 @@ enum
JSONEachRowRowInputFormat::JSONEachRowRowInputFormat(
ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
: IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns())
ReadBuffer & in_,
const Block & header_,
Params params_,
const FormatSettings & format_settings_,
bool yield_strings_)
: IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()), yield_strings(yield_strings_)
{
size_t num_columns = getPort().getHeader().columns();
for (size_t i = 0; i < num_columns; ++i)
@ -135,10 +140,26 @@ void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns
{
seen_columns[index] = read_columns[index] = true;
const auto & type = getPort().getHeader().getByPosition(index).type;
if (format_settings.null_as_default && !type->isNullable())
read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type);
if (yield_strings)
{
String str;
readJSONString(str, in);
ReadBufferFromString buf(str);
if (format_settings.null_as_default && !type->isNullable())
read_columns[index] = DataTypeNullable::deserializeWholeText(*columns[index], buf, format_settings, type);
else
type->deserializeAsWholeText(*columns[index], buf, format_settings);
}
else
type->deserializeAsTextJSON(*columns[index], in, format_settings);
{
if (format_settings.null_as_default && !type->isNullable())
read_columns[index] = DataTypeNullable::deserializeTextJSON(*columns[index], in, format_settings, type);
else
type->deserializeAsTextJSON(*columns[index], in, format_settings);
}
}
catch (Exception & e)
{
@ -318,13 +339,23 @@ void registerInputFormatProcessorJSONEachRow(FormatFactory & factory)
IRowInputFormat::Params params,
const FormatSettings & settings)
{
return std::make_shared<JSONEachRowRowInputFormat>(buf, sample, std::move(params), settings);
return std::make_shared<JSONEachRowRowInputFormat>(buf, sample, std::move(params), settings, false);
});
factory.registerInputFormatProcessor("JSONStringsEachRow", [](
ReadBuffer & buf,
const Block & sample,
IRowInputFormat::Params params,
const FormatSettings & settings)
{
return std::make_shared<JSONEachRowRowInputFormat>(buf, sample, std::move(params), settings, true);
});
}
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory)
{
factory.registerFileSegmentationEngine("JSONEachRow", &fileSegmentationEngineJSONEachRowImpl);
factory.registerFileSegmentationEngine("JSONStringsEachRow", &fileSegmentationEngineJSONEachRowImpl);
}
}

View File

@ -20,7 +20,12 @@ class ReadBuffer;
class JSONEachRowRowInputFormat : public IRowInputFormat
{
public:
JSONEachRowRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_);
JSONEachRowRowInputFormat(
ReadBuffer & in_,
const Block & header_,
Params params_,
const FormatSettings & format_settings_,
bool yield_strings_);
String getName() const override { return "JSONEachRowRowInputFormat"; }
@ -75,6 +80,8 @@ private:
bool data_in_square_brackets = false;
bool allow_new_rows = true;
bool yield_strings;
};
}

View File

@ -8,8 +8,13 @@ namespace DB
{
JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_)
: IRowOutputFormat(header_, out_, callback), settings(settings_)
JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat(
WriteBuffer & out_,
const Block & header_,
FormatFactory::WriteCallback callback,
const FormatSettings & settings_,
bool yield_strings_)
: IRowOutputFormat(header_, out_, callback), settings(settings_), yield_strings(yield_strings_)
{
const auto & sample = getPort(PortKind::Main).getHeader();
size_t columns = sample.columns();
@ -27,7 +32,17 @@ void JSONEachRowRowOutputFormat::writeField(const IColumn & column, const IDataT
{
writeString(fields[field_number], out);
writeChar(':', out);
type.serializeAsTextJSON(column, row_num, out, settings);
if (yield_strings)
{
WriteBufferFromOwnString buf;
type.serializeAsText(column, row_num, buf, settings);
writeJSONString(buf.str(), out, settings);
}
else
type.serializeAsTextJSON(column, row_num, out, settings);
++field_number;
}
@ -59,7 +74,16 @@ void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory)
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, callback, format_settings);
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, callback, format_settings, false);
});
factory.registerOutputFormatProcessor("JSONStringsEachRow", [](
WriteBuffer & buf,
const Block & sample,
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, callback, format_settings, true);
});
}

View File

@ -15,7 +15,12 @@ namespace DB
class JSONEachRowRowOutputFormat : public IRowOutputFormat
{
public:
JSONEachRowRowOutputFormat(WriteBuffer & out_, const Block & header_, FormatFactory::WriteCallback callback, const FormatSettings & settings_);
JSONEachRowRowOutputFormat(
WriteBuffer & out_,
const Block & header_,
FormatFactory::WriteCallback callback,
const FormatSettings & settings_,
bool yield_strings_);
String getName() const override { return "JSONEachRowRowOutputFormat"; }
@ -35,6 +40,9 @@ private:
Names fields;
FormatSettings settings;
protected:
bool yield_strings;
};
}

View File

@ -36,7 +36,16 @@ void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factor
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, callback, format_settings);
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, callback, format_settings, false);
});
factory.registerOutputFormatProcessor("JSONStringsEachRowWithProgress", [](
WriteBuffer & buf,
const Block & sample,
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, callback, format_settings, true);
});
}

View File

@ -7,8 +7,13 @@
namespace DB
{
JSONRowOutputFormat::JSONRowOutputFormat(WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_)
: IRowOutputFormat(header, out_, callback), settings(settings_)
JSONRowOutputFormat::JSONRowOutputFormat(
WriteBuffer & out_,
const Block & header,
FormatFactory::WriteCallback callback,
const FormatSettings & settings_,
bool yield_strings_)
: IRowOutputFormat(header, out_, callback), settings(settings_), yield_strings(yield_strings_)
{
const auto & sample = getPort(PortKind::Main).getHeader();
NamesAndTypesList columns(sample.getNamesAndTypesList());
@ -71,7 +76,17 @@ void JSONRowOutputFormat::writeField(const IColumn & column, const IDataType & t
writeCString("\t\t\t", *ostr);
writeString(fields[field_number].name, *ostr);
writeCString(": ", *ostr);
type.serializeAsTextJSON(column, row_num, *ostr, settings);
if (yield_strings)
{
WriteBufferFromOwnString buf;
type.serializeAsText(column, row_num, buf, settings);
writeJSONString(buf.str(), *ostr, settings);
}
else
type.serializeAsTextJSON(column, row_num, *ostr, settings);
++field_number;
}
@ -80,7 +95,17 @@ void JSONRowOutputFormat::writeTotalsField(const IColumn & column, const IDataTy
writeCString("\t\t", *ostr);
writeString(fields[field_number].name, *ostr);
writeCString(": ", *ostr);
type.serializeAsTextJSON(column, row_num, *ostr, settings);
if (yield_strings)
{
WriteBufferFromOwnString buf;
type.serializeAsText(column, row_num, buf, settings);
writeJSONString(buf.str(), *ostr, settings);
}
else
type.serializeAsTextJSON(column, row_num, *ostr, settings);
++field_number;
}
@ -249,7 +274,16 @@ void registerOutputFormatProcessorJSON(FormatFactory & factory)
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONRowOutputFormat>(buf, sample, callback, format_settings);
return std::make_shared<JSONRowOutputFormat>(buf, sample, callback, format_settings, false);
});
factory.registerOutputFormatProcessor("JSONStrings", [](
WriteBuffer & buf,
const Block & sample,
FormatFactory::WriteCallback callback,
const FormatSettings & format_settings)
{
return std::make_shared<JSONRowOutputFormat>(buf, sample, callback, format_settings, true);
});
}

View File

@ -16,7 +16,12 @@ namespace DB
class JSONRowOutputFormat : public IRowOutputFormat
{
public:
JSONRowOutputFormat(WriteBuffer & out_, const Block & header, FormatFactory::WriteCallback callback, const FormatSettings & settings_);
JSONRowOutputFormat(
WriteBuffer & out_,
const Block & header,
FormatFactory::WriteCallback callback,
const FormatSettings & settings_,
bool yield_strings_);
String getName() const override { return "JSONRowOutputFormat"; }
@ -78,6 +83,8 @@ protected:
Progress progress;
Stopwatch watch;
FormatSettings settings;
bool yield_strings;
};
}

View File

@ -0,0 +1,85 @@
#include <Processors/Formats/Impl/LineAsStringRowInputFormat.h>
#include <Formats/JSONEachRowUtils.h>
#include <common/find_symbols.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_QUERY;
}
LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) :
IRowInputFormat(header_, in_, std::move(params_)), buf(in)
{
if (header_.columns() > 1 || header_.getDataTypes()[0]->getTypeId() != TypeIndex::String)
{
throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::INCORRECT_QUERY);
}
}
void LineAsStringRowInputFormat::resetParser()
{
IRowInputFormat::resetParser();
buf.reset();
}
void LineAsStringRowInputFormat::readLineObject(IColumn & column)
{
PeekableReadBufferCheckpoint checkpoint{buf};
bool newline = true;
bool over = false;
char * pos;
while (newline)
{
pos = find_first_symbols<'\n', '\\'>(buf.position(), buf.buffer().end());
buf.position() = pos;
if (buf.position() == buf.buffer().end())
{
over = true;
break;
}
else if (*buf.position() == '\n')
{
newline = false;
}
else if (*buf.position() == '\\')
{
++buf.position();
if (!buf.eof())
++buf.position();
}
}
buf.makeContinuousMemoryFromCheckpointToPos();
char * end = over ? buf.position(): ++buf.position();
buf.rollbackToCheckpoint();
column.insertData(buf.position(), end - (over ? 0 : 1) - buf.position());
buf.position() = end;
}
bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
{
if (!buf.eof())
readLineObject(*columns[0]);
return !buf.eof();
}
void registerInputFormatProcessorLineAsString(FormatFactory & factory)
{
factory.registerInputFormatProcessor("LineAsString", [](
ReadBuffer & buf,
const Block & sample,
const RowInputFormatParams & params,
const FormatSettings &)
{
return std::make_shared<LineAsStringRowInputFormat>(sample, buf, params);
});
}
}

View File

@ -0,0 +1,31 @@
#pragma once
#include <Processors/Formats/IRowInputFormat.h>
#include <Formats/FormatFactory.h>
#include <IO/PeekableReadBuffer.h>
namespace DB
{
class ReadBuffer;
/// This format parses a sequence of Line objects separated by newlines, spaces and/or comma.
/// Each Line object is parsed as a whole to string.
/// This format can only parse a table with single field of type String.
class LineAsStringRowInputFormat : public IRowInputFormat
{
public:
LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_);
bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
String getName() const override { return "LineAsStringRowInputFormat"; }
void resetParser() override;
private:
void readLineObject(IColumn & column);
PeekableReadBuffer buf;
};
}

View File

@ -33,6 +33,7 @@ SRCS(
Formats/Impl/JSONEachRowRowOutputFormat.cpp
Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp
Formats/Impl/JSONRowOutputFormat.cpp
Formats/Impl/LineAsStringRowInputFormat.cpp
Formats/Impl/MarkdownRowOutputFormat.cpp
Formats/Impl/MsgPackRowInputFormat.cpp
Formats/Impl/MsgPackRowOutputFormat.cpp

View File

@ -16,27 +16,17 @@ class LiveViewBlockInputStream : public IBlockInputStream
using NonBlockingResult = std::pair<Block, bool>;
public:
~LiveViewBlockInputStream() override
{
/// Start storage no users thread
/// if we are the last active user
if (!storage->is_dropped && blocks_ptr.use_count() < 3)
storage->startNoUsersThread(temporary_live_view_timeout_sec);
}
LiveViewBlockInputStream(std::shared_ptr<StorageLiveView> storage_,
std::shared_ptr<BlocksPtr> blocks_ptr_,
std::shared_ptr<BlocksMetadataPtr> blocks_metadata_ptr_,
std::shared_ptr<bool> active_ptr_,
const bool has_limit_, const UInt64 limit_,
const UInt64 heartbeat_interval_sec_,
const UInt64 temporary_live_view_timeout_sec_)
const UInt64 heartbeat_interval_sec_)
: storage(std::move(storage_)), blocks_ptr(std::move(blocks_ptr_)),
blocks_metadata_ptr(std::move(blocks_metadata_ptr_)),
active_ptr(std::move(active_ptr_)),
has_limit(has_limit_), limit(limit_),
heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000),
temporary_live_view_timeout_sec(temporary_live_view_timeout_sec_)
heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000)
{
/// grab active pointer
active = active_ptr.lock();
@ -205,7 +195,6 @@ private:
Int64 num_updates = -1;
bool end_of_blocks = false;
UInt64 heartbeat_interval_usec;
UInt64 temporary_live_view_timeout_sec;
UInt64 last_event_timestamp_usec = 0;
};

View File

@ -34,13 +34,6 @@ class LiveViewEventsBlockInputStream : public IBlockInputStream
using NonBlockingResult = std::pair<Block, bool>;
public:
~LiveViewEventsBlockInputStream() override
{
/// Start storage no users thread
/// if we are the last active user
if (!storage->is_dropped && blocks_ptr.use_count() < 3)
storage->startNoUsersThread(temporary_live_view_timeout_sec);
}
/// length default -2 because we want LIMIT to specify number of updates so that LIMIT 1 waits for 1 update
/// and LIMIT 0 just returns data without waiting for any updates
LiveViewEventsBlockInputStream(std::shared_ptr<StorageLiveView> storage_,
@ -48,14 +41,12 @@ public:
std::shared_ptr<BlocksMetadataPtr> blocks_metadata_ptr_,
std::shared_ptr<bool> active_ptr_,
const bool has_limit_, const UInt64 limit_,
const UInt64 heartbeat_interval_sec_,
const UInt64 temporary_live_view_timeout_sec_)
const UInt64 heartbeat_interval_sec_)
: storage(std::move(storage_)), blocks_ptr(std::move(blocks_ptr_)),
blocks_metadata_ptr(std::move(blocks_metadata_ptr_)),
active_ptr(std::move(active_ptr_)), has_limit(has_limit_),
limit(limit_),
heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000),
temporary_live_view_timeout_sec(temporary_live_view_timeout_sec_)
heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000)
{
/// grab active pointer
active = active_ptr.lock();
@ -236,7 +227,6 @@ private:
Int64 num_updates = -1;
bool end_of_blocks = false;
UInt64 heartbeat_interval_usec;
UInt64 temporary_live_view_timeout_sec;
UInt64 last_event_timestamp_usec = 0;
Poco::Timestamp timestamp;
};

View File

@ -12,10 +12,8 @@ limitations under the License. */
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTWatchQuery.h>
#include <Parsers/ASTDropQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterDropQuery.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/OneBlockInputStream.h>
@ -31,6 +29,7 @@ limitations under the License. */
#include <Storages/LiveView/LiveViewBlockOutputStream.h>
#include <Storages/LiveView/LiveViewEventsBlockInputStream.h>
#include <Storages/LiveView/StorageBlocks.h>
#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
#include <Storages/StorageFactory.h>
#include <Parsers/ASTTablesInSelectQuery.h>
@ -276,7 +275,7 @@ StorageLiveView::StorageLiveView(
if (query.live_view_timeout)
{
is_temporary = true;
temporary_live_view_timeout = *query.live_view_timeout;
temporary_live_view_timeout = std::chrono::seconds{*query.live_view_timeout};
}
blocks_ptr = std::make_shared<BlocksPtr>();
@ -384,128 +383,21 @@ void StorageLiveView::checkTableCanBeDropped() const
}
}
void StorageLiveView::noUsersThread(std::shared_ptr<StorageLiveView> storage, const UInt64 & timeout)
{
bool drop_table = false;
if (storage->shutdown_called)
return;
auto table_id = storage->getStorageID();
{
while (true)
{
std::unique_lock lock(storage->no_users_thread_wakeup_mutex);
if (!storage->no_users_thread_condition.wait_for(lock, std::chrono::seconds(timeout), [&] { return storage->no_users_thread_wakeup; }))
{
storage->no_users_thread_wakeup = false;
if (storage->shutdown_called)
return;
if (storage->hasUsers())
return;
if (!DatabaseCatalog::instance().getDependencies(table_id).empty())
continue;
drop_table = true;
}
break;
}
}
if (drop_table)
{
if (DatabaseCatalog::instance().tryGetTable(table_id, storage->global_context))
{
try
{
/// We create and execute `drop` query for this table
auto drop_query = std::make_shared<ASTDropQuery>();
drop_query->database = table_id.database_name;
drop_query->table = table_id.table_name;
drop_query->kind = ASTDropQuery::Kind::Drop;
ASTPtr ast_drop_query = drop_query;
InterpreterDropQuery drop_interpreter(ast_drop_query, storage->global_context);
drop_interpreter.execute();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
}
void StorageLiveView::startNoUsersThread(const UInt64 & timeout)
{
bool expected = false;
if (!start_no_users_thread_called.compare_exchange_strong(expected, true))
return;
if (is_temporary)
{
std::lock_guard no_users_thread_lock(no_users_thread_mutex);
if (shutdown_called)
return;
if (no_users_thread.joinable())
{
{
std::lock_guard lock(no_users_thread_wakeup_mutex);
no_users_thread_wakeup = true;
no_users_thread_condition.notify_one();
}
no_users_thread.join();
}
{
std::lock_guard lock(no_users_thread_wakeup_mutex);
no_users_thread_wakeup = false;
}
if (!is_dropped)
no_users_thread = std::thread(&StorageLiveView::noUsersThread,
std::static_pointer_cast<StorageLiveView>(shared_from_this()), timeout);
}
start_no_users_thread_called = false;
}
void StorageLiveView::startup()
{
startNoUsersThread(temporary_live_view_timeout);
if (is_temporary)
TemporaryLiveViewCleaner::instance().addView(std::static_pointer_cast<StorageLiveView>(shared_from_this()));
}
void StorageLiveView::shutdown()
{
shutdown_called = true;
DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID());
bool expected = false;
if (!shutdown_called.compare_exchange_strong(expected, true))
return;
/// WATCH queries should be stopped after setting shutdown_called to true.
/// Otherwise livelock is possible for LiveView table in Atomic database:
/// WATCH query will wait for table to be dropped and DatabaseCatalog will wait for queries to finish
{
std::lock_guard no_users_thread_lock(no_users_thread_mutex);
if (no_users_thread.joinable())
{
{
std::lock_guard lock(no_users_thread_wakeup_mutex);
no_users_thread_wakeup = true;
no_users_thread_condition.notify_one();
}
}
}
}
StorageLiveView::~StorageLiveView()
{
shutdown();
{
std::lock_guard lock(no_users_thread_mutex);
if (no_users_thread.joinable())
no_users_thread.detach();
}
}
void StorageLiveView::drop()
@ -573,18 +465,7 @@ BlockInputStreams StorageLiveView::watch(
auto reader = std::make_shared<LiveViewEventsBlockInputStream>(
std::static_pointer_cast<StorageLiveView>(shared_from_this()),
blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
context.getSettingsRef().live_view_heartbeat_interval.totalSeconds(),
temporary_live_view_timeout);
{
std::lock_guard no_users_thread_lock(no_users_thread_mutex);
if (no_users_thread.joinable())
{
std::lock_guard lock(no_users_thread_wakeup_mutex);
no_users_thread_wakeup = true;
no_users_thread_condition.notify_one();
}
}
context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
{
std::lock_guard lock(mutex);
@ -604,18 +485,7 @@ BlockInputStreams StorageLiveView::watch(
auto reader = std::make_shared<LiveViewBlockInputStream>(
std::static_pointer_cast<StorageLiveView>(shared_from_this()),
blocks_ptr, blocks_metadata_ptr, active_ptr, has_limit, limit,
context.getSettingsRef().live_view_heartbeat_interval.totalSeconds(),
temporary_live_view_timeout);
{
std::lock_guard no_users_thread_lock(no_users_thread_mutex);
if (no_users_thread.joinable())
{
std::lock_guard lock(no_users_thread_wakeup_mutex);
no_users_thread_wakeup = true;
no_users_thread_condition.notify_one();
}
}
context.getSettingsRef().live_view_heartbeat_interval.totalSeconds());
{
std::lock_guard lock(mutex);

View File

@ -38,6 +38,10 @@ using ASTPtr = std::shared_ptr<IAST>;
using BlocksMetadataPtr = std::shared_ptr<BlocksMetadata>;
using MergeableBlocksPtr = std::shared_ptr<MergeableBlocks>;
class Pipe;
using Pipes = std::vector<Pipe>;
class StorageLiveView final : public ext::shared_ptr_helper<StorageLiveView>, public IStorage
{
friend struct ext::shared_ptr_helper<StorageLiveView>;
@ -70,7 +74,9 @@ public:
NamesAndTypesList getVirtuals() const override;
bool isTemporary() { return is_temporary; }
bool isTemporary() const { return is_temporary; }
std::chrono::seconds getTimeout() const { return temporary_live_view_timeout; }
/// Check if we have any readers
/// must be called with mutex locked
@ -85,11 +91,7 @@ public:
{
return active_ptr.use_count() > 1;
}
/// No users thread mutex, predicate and wake up condition
void startNoUsersThread(const UInt64 & timeout);
std::mutex no_users_thread_wakeup_mutex;
bool no_users_thread_wakeup = false;
std::condition_variable no_users_thread_condition;
/// Get blocks hash
/// must be called with mutex locked
String getBlocksHashKey()
@ -175,6 +177,8 @@ private:
std::unique_ptr<Context> live_view_context;
bool is_temporary = false;
std::chrono::seconds temporary_live_view_timeout;
/// Mutex to protect access to sample block and inner_blocks_query
mutable std::mutex sample_block_lock;
mutable Block sample_block;
@ -193,14 +197,7 @@ private:
std::shared_ptr<BlocksMetadataPtr> blocks_metadata_ptr;
MergeableBlocksPtr mergeable_blocks;
/// Background thread for temporary tables
/// which drops this table if there are no users
static void noUsersThread(std::shared_ptr<StorageLiveView> storage, const UInt64 & timeout);
std::mutex no_users_thread_mutex;
std::thread no_users_thread;
std::atomic<bool> shutdown_called = false;
std::atomic<bool> start_no_users_thread_called = false;
UInt64 temporary_live_view_timeout;
StorageLiveView(
const StorageID & table_id_,

View File

@ -0,0 +1,148 @@
#include <Storages/LiveView/TemporaryLiveViewCleaner.h>
#include <Storages/LiveView/StorageLiveView.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/InterpreterDropQuery.h>
#include <Parsers/ASTDropQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
void executeDropQuery(const StorageID & storage_id, Context & context)
{
if (!DatabaseCatalog::instance().isTableExist(storage_id, context))
return;
try
{
/// We create and execute `drop` query for this table
auto drop_query = std::make_shared<ASTDropQuery>();
drop_query->database = storage_id.database_name;
drop_query->table = storage_id.table_name;
drop_query->kind = ASTDropQuery::Kind::Drop;
ASTPtr ast_drop_query = drop_query;
InterpreterDropQuery drop_interpreter(ast_drop_query, context);
drop_interpreter.execute();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
std::unique_ptr<TemporaryLiveViewCleaner> TemporaryLiveViewCleaner::the_instance;
void TemporaryLiveViewCleaner::init(Context & global_context_)
{
if (the_instance)
throw Exception("TemporaryLiveViewCleaner already initialized", ErrorCodes::LOGICAL_ERROR);
the_instance.reset(new TemporaryLiveViewCleaner(global_context_));
}
void TemporaryLiveViewCleaner::shutdown()
{
the_instance.reset();
}
TemporaryLiveViewCleaner::TemporaryLiveViewCleaner(Context & global_context_)
: global_context(global_context_)
{
}
TemporaryLiveViewCleaner::~TemporaryLiveViewCleaner()
{
stopBackgroundThread();
}
void TemporaryLiveViewCleaner::addView(const std::shared_ptr<StorageLiveView> & view)
{
if (!view->isTemporary())
return;
auto current_time = std::chrono::system_clock::now();
auto time_of_next_check = current_time + view->getTimeout();
std::lock_guard lock{mutex};
/// Keep the vector `views` sorted by time of next check.
StorageAndTimeOfCheck storage_and_time_of_check{view, time_of_next_check};
views.insert(std::upper_bound(views.begin(), views.end(), storage_and_time_of_check), storage_and_time_of_check);
if (!background_thread.joinable())
background_thread = ThreadFromGlobalPool{&TemporaryLiveViewCleaner::backgroundThreadFunc, this};
background_thread_wake_up.notify_one();
}
void TemporaryLiveViewCleaner::backgroundThreadFunc()
{
std::unique_lock lock{mutex};
while (!background_thread_should_exit && !views.empty())
{
background_thread_wake_up.wait_until(lock, views.front().time_of_check);
if (background_thread_should_exit)
return;
auto current_time = std::chrono::system_clock::now();
std::vector<StorageID> storages_to_drop;
auto it = views.begin();
while (it != views.end())
{
std::shared_ptr<StorageLiveView> storage = it->storage.lock();
auto & time_of_check = it->time_of_check;
if (!storage)
{
/// Storage has been already removed.
it = views.erase(it);
continue;
}
++it;
if (current_time < time_of_check)
break; /// It's not the time to check it yet.
time_of_check = current_time + storage->getTimeout();
auto storage_id = storage->getStorageID();
if (storage->hasUsers() || !DatabaseCatalog::instance().getDependencies(storage_id).empty())
continue;
storages_to_drop.emplace_back(storage_id);
}
lock.unlock();
for (const auto & storage_id : storages_to_drop)
executeDropQuery(storage_id, global_context);
lock.lock();
}
}
void TemporaryLiveViewCleaner::stopBackgroundThread()
{
std::lock_guard lock{mutex};
if (background_thread.joinable())
{
background_thread_should_exit = true;
background_thread_wake_up.notify_one();
background_thread.join();
}
}
}

View File

@ -0,0 +1,51 @@
#pragma once
#include <Common/ThreadPool.h>
#include <chrono>
namespace DB
{
class StorageLiveView;
struct StorageID;
/// This class removes temporary live views in the background thread when it's possible.
/// There should only a single instance of this class.
class TemporaryLiveViewCleaner
{
public:
static TemporaryLiveViewCleaner & instance() { return *the_instance; }
/// Drops a specified live view after a while if it's temporary.
void addView(const std::shared_ptr<StorageLiveView> & view);
/// Should be called once.
static void init(Context & global_context_);
static void shutdown();
private:
friend std::unique_ptr<TemporaryLiveViewCleaner>::deleter_type;
TemporaryLiveViewCleaner(Context & global_context_);
~TemporaryLiveViewCleaner();
void backgroundThreadFunc();
void stopBackgroundThread();
struct StorageAndTimeOfCheck
{
std::weak_ptr<StorageLiveView> storage;
std::chrono::system_clock::time_point time_of_check;
bool operator <(const StorageAndTimeOfCheck & other) const { return time_of_check < other.time_of_check; }
};
static std::unique_ptr<TemporaryLiveViewCleaner> the_instance;
Context & global_context;
std::mutex mutex;
std::vector<StorageAndTimeOfCheck> views;
ThreadFromGlobalPool background_thread;
std::atomic<bool> background_thread_should_exit = false;
std::condition_variable background_thread_wake_up;
};
}

View File

@ -0,0 +1,26 @@
#include <Storages/MergeTree/MergeAlgorithm.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
String toString(MergeAlgorithm merge_algorithm)
{
switch (merge_algorithm)
{
case MergeAlgorithm::Undecided:
return "Undecided";
case MergeAlgorithm::Horizontal:
return "Horizontal";
case MergeAlgorithm::Vertical:
return "Vertical";
}
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeAlgorithm {}", static_cast<UInt64>(merge_algorithm));
}
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <Core/Types.h>
namespace DB
{
/// Algorithm of Merge.
enum class MergeAlgorithm
{
Undecided, /// Not running yet
Horizontal, /// per-row merge of all columns
Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns
};
String toString(MergeAlgorithm merge_algorithm);
}

View File

@ -24,6 +24,7 @@ MergeListElement::MergeListElement(const std::string & database_, const std::str
, num_parts{future_part.parts.size()}
, thread_id{getThreadId()}
, merge_type{future_part.merge_type}
, merge_algorithm{MergeAlgorithm::Undecided}
{
for (const auto & source_part : future_part.parts)
{
@ -74,6 +75,7 @@ MergeInfo MergeListElement::getInfo() const
res.memory_usage = memory_tracker.get();
res.thread_id = thread_id;
res.merge_type = toString(merge_type);
res.merge_algorithm = toString(merge_algorithm);
for (const auto & source_part_name : source_part_names)
res.source_part_names.emplace_back(source_part_name);

View File

@ -3,6 +3,7 @@
#include <Common/Stopwatch.h>
#include <Common/CurrentMetrics.h>
#include <Common/MemoryTracker.h>
#include <Storages/MergeTree/MergeAlgorithm.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeType.h>
#include <memory>
@ -47,6 +48,7 @@ struct MergeInfo
UInt64 memory_usage;
UInt64 thread_id;
std::string merge_type;
std::string merge_algorithm;
};
struct FutureMergedMutatedPart;
@ -90,6 +92,7 @@ struct MergeListElement : boost::noncopyable
UInt64 thread_id;
MergeType merge_type;
MergeAlgorithm merge_algorithm;
MergeListElement(const std::string & database, const std::string & table, const FutureMergedMutatedPart & future_part);

View File

@ -62,10 +62,6 @@ namespace ErrorCodes
extern const int ABORTED;
}
using MergeAlgorithm = MergeTreeDataMergerMutator::MergeAlgorithm;
/// Do not start to merge parts, if free space is less than sum size of parts times specified coefficient.
/// This value is chosen to not allow big merges to eat all free space. Thus allowing small merges to proceed.
static const double DISK_USAGE_COEFFICIENT_TO_SELECT = 2;
@ -699,6 +695,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
size_t sum_input_rows_upper_bound = merge_entry->total_rows_count;
MergeAlgorithm merge_alg = chooseMergeAlgorithm(parts, sum_input_rows_upper_bound, gathering_columns, deduplicate, need_remove_expired_values);
merge_entry->merge_algorithm = merge_alg;
LOG_DEBUG(log, "Selected MergeAlgorithm: {}", ((merge_alg == MergeAlgorithm::Vertical) ? "Vertical" : "Horizontal"));
@ -1238,7 +1235,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
}
MergeTreeDataMergerMutator::MergeAlgorithm MergeTreeDataMergerMutator::chooseMergeAlgorithm(
MergeAlgorithm MergeTreeDataMergerMutator::chooseMergeAlgorithm(
const MergeTreeData::DataPartsVector & parts, size_t sum_rows_upper_bound,
const NamesAndTypesList & gathering_columns, bool deduplicate, bool need_remove_expired_values) const
{

View File

@ -6,6 +6,7 @@
#include <functional>
#include <Common/ActionBlocker.h>
#include <Storages/MergeTree/TTLMergeSelector.h>
#include <Storages/MergeTree/MergeAlgorithm.h>
#include <Storages/MergeTree/MergeType.h>
@ -226,12 +227,6 @@ public :
ActionBlocker merges_blocker;
ActionBlocker ttl_merges_blocker;
enum class MergeAlgorithm
{
Horizontal, /// per-row merge of all columns
Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns
};
private:
MergeAlgorithm chooseMergeAlgorithm(

View File

@ -395,9 +395,10 @@ static StoragePtr create(const StorageFactory::Arguments & args)
if (replicated)
{
bool has_arguments = arg_num + 2 <= arg_cnt && engine_args[arg_num]->as<ASTLiteral>() && engine_args[arg_num + 1]->as<ASTLiteral>();
bool has_arguments = arg_num + 2 <= arg_cnt;
bool has_valid_arguments = has_arguments && engine_args[arg_num]->as<ASTLiteral>() && engine_args[arg_num + 1]->as<ASTLiteral>();
if (has_arguments)
if (has_valid_arguments)
{
const auto * ast = engine_args[arg_num]->as<ASTLiteral>();
if (ast && ast->value.getType() == Field::Types::String)
@ -420,7 +421,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
"No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN);
++arg_num;
}
else if (is_extended_storage_def)
else if (is_extended_storage_def && !has_arguments)
{
/// Try use default values if arguments are not specified.
/// It works for ON CLUSTER queries when database engine is Atomic and there are {shard} and {replica} in config.
@ -428,7 +429,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
replica_name = "{replica}"; /// TODO maybe use hostname if {replica} is not defined?
}
else
throw Exception("Expected zookeper_path and replica_name arguments", ErrorCodes::BAD_ARGUMENTS);
throw Exception("Expected two string literal arguments: zookeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS);
/// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries
bool is_on_cluster = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;

View File

@ -105,14 +105,14 @@ SelectQueryDescription SelectQueryDescription::getSelectQueryFromASTForMatView(c
if (new_select.list_of_selects->children.size() != 1)
throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW);
SelectQueryDescription result;
result.inner_query = new_select.list_of_selects->children.at(0)->clone();
auto & select_query = result.inner_query->as<ASTSelectQuery &>();
auto & new_inner_query = new_select.list_of_selects->children.at(0);
auto & select_query = new_inner_query->as<ASTSelectQuery &>();
checkAllowedQueries(select_query);
SelectQueryDescription result;
result.select_table_id = extractDependentTableFromSelectQuery(select_query, context);
result.select_query = select->clone();
result.select_query = new_select.clone();
result.inner_query = new_inner_query->clone();
return result;
}

View File

@ -216,7 +216,31 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsFinalizingTask)", [this] { mutationsFinalizingTask(); });
if (global_context.hasZooKeeper())
current_zookeeper = global_context.getZooKeeper();
{
/// It's possible for getZooKeeper() to timeout if zookeeper host(s) can't
/// be reached. In such cases Poco::Exception is thrown after a connection
/// timeout - refer to src/Common/ZooKeeper/ZooKeeperImpl.cpp:866 for more info.
///
/// Side effect of this is that the CreateQuery gets interrupted and it exits.
/// But the data Directories for the tables being created aren't cleaned up.
/// This unclean state will hinder table creation on any retries and will
/// complain that the Directory for table already exists.
///
/// To achieve a clean state on failed table creations, catch this error and
/// call dropIfEmpty() method only if the operation isn't ATTACH then proceed
/// throwing the exception. Without this, the Directory for the tables need
/// to be manually deleted before retrying the CreateQuery.
try
{
current_zookeeper = global_context.getZooKeeper();
}
catch (...)
{
if (!attach)
dropIfEmpty();
throw;
}
}
bool skip_sanity_checks = false;
@ -239,7 +263,10 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
if (!current_zookeeper)
{
if (!attach)
{
dropIfEmpty();
throw Exception("Can't create replicated table without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
}
/// Do not activate the replica. It will be readonly.
LOG_ERROR(log, "No ZooKeeper: table will be in readonly mode.");
@ -589,7 +616,10 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr
return true;
}
throw Exception("Cannot create table, because it is created concurrently every time or because of logical error", ErrorCodes::LOGICAL_ERROR);
/// Do not use LOGICAL_ERROR code, because it may happen if user has specified wrong zookeeper_path
throw Exception("Cannot create table, because it is created concurrently every time "
"or because of wrong zookeeper_path "
"or because of logical error", ErrorCodes::REPLICA_IS_ALREADY_EXIST);
}
void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metadata_snapshot)

View File

@ -31,6 +31,7 @@ NamesAndTypesList StorageSystemMerges::getNamesAndTypes()
{"memory_usage", std::make_shared<DataTypeUInt64>()},
{"thread_id", std::make_shared<DataTypeUInt64>()},
{"merge_type", std::make_shared<DataTypeString>()},
{"merge_algorithm", std::make_shared<DataTypeString>()},
};
}
@ -67,9 +68,15 @@ void StorageSystemMerges::fillData(MutableColumns & res_columns, const Context &
res_columns[i++]->insert(merge.memory_usage);
res_columns[i++]->insert(merge.thread_id);
if (!merge.is_mutation)
{
res_columns[i++]->insert(merge.merge_type);
res_columns[i++]->insert(merge.merge_algorithm);
}
else
{
res_columns[i++]->insertDefault();
res_columns[i++]->insertDefault();
}
}
}

View File

@ -22,6 +22,7 @@ SRCS(
IStorage.cpp
KeyDescription.cpp
LiveView/StorageLiveView.cpp
LiveView/TemporaryLiveViewCleaner.cpp
MergeTree/ActiveDataPartSet.cpp
MergeTree/AllMergeSelector.cpp
MergeTree/BackgroundProcessingPool.cpp
@ -36,6 +37,7 @@ SRCS(
MergeTree/KeyCondition.cpp
MergeTree/LevelMergeSelector.cpp
MergeTree/localBackup.cpp
MergeTree/MergeAlgorithm.cpp
MergeTree/MergedBlockOutputStream.cpp
MergeTree/MergedColumnOnlyOutputStream.cpp
MergeTree/MergeList.cpp

View File

@ -298,7 +298,7 @@ def test_mixed_granularity_single_node(start_dynamic_cluster, node):
#still works
assert node.query("SELECT count() from table_with_default_granularity") == '6\n'
@pytest.mark.skip(reason="flaky")
def test_version_update_two_nodes(start_dynamic_cluster):
node11.query("INSERT INTO table_with_default_granularity VALUES (toDate('2018-10-01'), 1, 333), (toDate('2018-10-02'), 2, 444)")
node12.query("SYSTEM SYNC REPLICA table_with_default_granularity", timeout=20)

Some files were not shown because too many files have changed in this diff Show More