mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-19 14:11:58 +00:00
Merge branch 'master' of https://github.com/ClickHouse/ClickHouse
This commit is contained in:
commit
5e45179998
@ -17,5 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [ClickHouse Data Integration Virtual Meetup](https://www.eventbrite.com/e/clickhouse-september-virtual-meetup-data-integration-tickets-117421895049) on September 10, 2020.
|
||||
* [ClickHouse talk at Ya.Subbotnik (in Russian)](https://ya.cc/t/cIBI-3yECj5JF) on September 12, 2020.
|
||||
|
@ -1,8 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
|
@ -6,6 +6,7 @@ Columns:
|
||||
|
||||
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
|
||||
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
|
||||
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution.
|
||||
- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name.
|
||||
- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value.
|
||||
|
||||
@ -16,18 +17,18 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬────value─┐
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pmuzzy │ 0 │
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pdirty │ 4214 │
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.run_intervals │ 0 │
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.num_runs │ 0 │
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.retained │ 17657856 │
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.mapped │ 71471104 │
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.resident │ 61538304 │
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.metadata │ 6199264 │
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.allocated │ 38074336 │
|
||||
│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.epoch │ 2 │
|
||||
└────────────┴─────────────────────┴──────────────────────────────────────────┴──────────┘
|
||||
┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │
|
||||
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │
|
||||
└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
@ -23,28 +23,28 @@ SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical;
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
event_date: 2020-02-18
|
||||
event_time: 2020-02-18 07:15:33
|
||||
milliseconds: 554
|
||||
ProfileEvent_Query: 0
|
||||
ProfileEvent_SelectQuery: 0
|
||||
ProfileEvent_InsertQuery: 0
|
||||
ProfileEvent_FileOpen: 0
|
||||
ProfileEvent_Seek: 0
|
||||
ProfileEvent_ReadBufferFromFileDescriptorRead: 1
|
||||
ProfileEvent_ReadBufferFromFileDescriptorReadFailed: 0
|
||||
ProfileEvent_ReadBufferFromFileDescriptorReadBytes: 0
|
||||
ProfileEvent_WriteBufferFromFileDescriptorWrite: 1
|
||||
ProfileEvent_WriteBufferFromFileDescriptorWriteFailed: 0
|
||||
ProfileEvent_WriteBufferFromFileDescriptorWriteBytes: 56
|
||||
event_date: 2020-09-05
|
||||
event_time: 2020-09-05 16:22:33
|
||||
event_time_microseconds: 2020-09-05 16:22:33.196807
|
||||
milliseconds: 196
|
||||
ProfileEvent_Query: 0
|
||||
ProfileEvent_SelectQuery: 0
|
||||
ProfileEvent_InsertQuery: 0
|
||||
ProfileEvent_FailedQuery: 0
|
||||
ProfileEvent_FailedSelectQuery: 0
|
||||
...
|
||||
CurrentMetric_Query: 0
|
||||
CurrentMetric_Merge: 0
|
||||
CurrentMetric_PartMutation: 0
|
||||
CurrentMetric_ReplicatedFetch: 0
|
||||
CurrentMetric_ReplicatedSend: 0
|
||||
CurrentMetric_ReplicatedChecks: 0
|
||||
...
|
||||
CurrentMetric_Revision: 54439
|
||||
CurrentMetric_VersionInteger: 20009001
|
||||
CurrentMetric_RWLockWaitingReaders: 0
|
||||
CurrentMetric_RWLockWaitingWriters: 0
|
||||
CurrentMetric_RWLockActiveReaders: 0
|
||||
CurrentMetric_RWLockActiveWriters: 0
|
||||
CurrentMetric_GlobalThread: 74
|
||||
CurrentMetric_GlobalThreadActive: 26
|
||||
CurrentMetric_LocalThread: 0
|
||||
CurrentMetric_LocalThreadActive: 0
|
||||
CurrentMetric_DistributedFilesToInsert: 0
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
@ -515,6 +515,29 @@ SELECT
|
||||
└────────────────┴────────────┘
|
||||
```
|
||||
|
||||
## formatReadableQuantity(x) {#formatreadablequantityx}
|
||||
|
||||
Accepts the number. Returns a rounded number with a suffix (thousand, million, billion, etc.) as a string.
|
||||
|
||||
It is useful for reading big numbers by human.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number,
|
||||
formatReadableQuantity(number) AS number_for_humans
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─────────number─┬─number_for_humans─┐
|
||||
│ 1024 │ 1.02 thousand │
|
||||
│ 1234000 │ 1.23 million │
|
||||
│ 4567000000 │ 4.57 billion │
|
||||
│ 98765432101234 │ 98.77 trillion │
|
||||
└────────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
## least(a, b) {#leasta-b}
|
||||
|
||||
Returns the smallest value from a and b.
|
||||
|
@ -28,6 +28,8 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
|
||||
| [PrettySpace](#prettyspace) | ✗ | ✔ |
|
||||
| [Protobuf](#protobuf) | ✔ | ✔ |
|
||||
| [Parquet](#data-format-parquet) | ✔ | ✔ |
|
||||
| [Arrow](#data-format-arrow) | ✔ | ✔ |
|
||||
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
|
||||
| [ORC](#data-format-orc) | ✔ | ✗ |
|
||||
| [RowBinary](#rowbinary) | ✔ | ✔ |
|
||||
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
|
||||
@ -947,6 +949,12 @@ ClickHouse пишет и читает сообщения `Protocol Buffers` в
|
||||
|
||||
## Avro {#data-format-avro}
|
||||
|
||||
[Apache Avro](https://avro.apache.org/) — это ориентированный на строки фреймворк для сериализации данных. Разработан в рамках проекта Apache Hadoop.
|
||||
|
||||
В ClickHouse формат Avro поддерживает чтение и запись [файлов данных Avro](https://avro.apache.org/docs/current/spec.html#Object+Container+Files).
|
||||
|
||||
[Логические типы Avro](https://avro.apache.org/docs/current/spec.html#Logical+Types)
|
||||
|
||||
## AvroConfluent {#data-format-avro-confluent}
|
||||
|
||||
Для формата `AvroConfluent` ClickHouse поддерживает декодирование сообщений `Avro` с одним объектом. Такие сообщения используются с [Kafka] (http://kafka.apache.org/) и реестром схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html).
|
||||
@ -996,7 +1004,7 @@ SELECT * FROM topic1_stream;
|
||||
|
||||
## Parquet {#data-format-parquet}
|
||||
|
||||
[Apache Parquet](http://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи.
|
||||
[Apache Parquet](https://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи.
|
||||
|
||||
### Соответствие типов данных {#sootvetstvie-tipov-dannykh}
|
||||
|
||||
@ -1042,6 +1050,16 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
|
||||
|
||||
Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../engines/table-engines/integrations/hdfs.md).
|
||||
|
||||
## Arrow {data-format-arrow}
|
||||
|
||||
[Apache Arrow](https://arrow.apache.org/) поставляется с двумя встроенными поколоночнами форматами хранения. ClickHouse поддерживает операции чтения и записи для этих форматов.
|
||||
|
||||
`Arrow` — это Apache Arrow's "file mode" формат. Он предназначен для произвольного доступа в памяти.
|
||||
|
||||
## ArrowStream {data-format-arrow-stream}
|
||||
|
||||
`ArrowStream` — это Apache Arrow's "stream mode" формат. Он предназначен для обработки потоков в памяти.
|
||||
|
||||
## ORC {#data-format-orc}
|
||||
|
||||
[Apache ORC](https://orc.apache.org/) - это column-oriented формат данных, распространённый в экосистеме Hadoop. Вы можете только вставлять данные этого формата в ClickHouse.
|
||||
|
@ -508,6 +508,29 @@ SELECT
|
||||
└────────────────┴────────────┘
|
||||
```
|
||||
|
||||
## formatReadableQuantity(x) {#formatreadablequantityx}
|
||||
|
||||
Принимает число. Возвращает округленное число с суффиксом (thousand, million, billion и т.д.) в виде строки.
|
||||
|
||||
Облегчает визуальное восприятие больших чисел живым человеком.
|
||||
|
||||
Пример:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number,
|
||||
formatReadableQuantity(number) AS number_for_humans
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─────────number─┬─number_for_humans─┐
|
||||
│ 1024 │ 1.02 thousand │
|
||||
│ 1234000 │ 1.23 million │
|
||||
│ 4567000000 │ 4.57 billion │
|
||||
│ 98765432101234 │ 98.77 trillion │
|
||||
└────────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
## least(a, b) {#leasta-b}
|
||||
|
||||
Возвращает наименьшее значение из a и b.
|
||||
|
@ -55,4 +55,50 @@ FROM numbers(3)
|
||||
└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
|
||||
```
|
||||
|
||||
# Случайные функции для работы со строками {#random-functions-for-working-with-strings}
|
||||
|
||||
## randomString {#random-string}
|
||||
|
||||
## randomFixedString {#random-fixed-string}
|
||||
|
||||
## randomPrintableASCII {#random-printable-ascii}
|
||||
|
||||
## randomStringUTF8 {#random-string-utf8}
|
||||
|
||||
## fuzzBits {#fuzzbits}
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
fuzzBits([s], [prob])
|
||||
```
|
||||
Инвертирует каждый бит `s` с вероятностью `prob`.
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `s` — `String` or `FixedString`
|
||||
- `prob` — constant `Float32/64`
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
Измененная случайным образом строка с тем же типом, что и `s`.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT fuzzBits(materialize('abacaba'), 0.1)
|
||||
FROM numbers(3)
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─fuzzBits(materialize('abacaba'), 0.1)─┐
|
||||
│ abaaaja │
|
||||
│ a*cjab+ │
|
||||
│ aeca2A │
|
||||
└───────────────────────────────────────┘
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/random_functions/) <!--hide-->
|
||||
|
@ -513,4 +513,95 @@ SELECT parseDateTimeBestEffort('10 20:19')
|
||||
- [toDate](#todate)
|
||||
- [toDateTime](#todatetime)
|
||||
|
||||
## toUnixTimestamp64Milli
|
||||
## toUnixTimestamp64Micro
|
||||
## toUnixTimestamp64Nano
|
||||
|
||||
Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды.
|
||||
Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что возвращаемое значение - это временная метка в UTC, а не в часовом поясе `DateTime64`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
toUnixTimestamp64Milli(value)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `value` — значение `DateTime64` с любой точностью.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Значение `value`, преобразованное в тип данных `Int64`.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
|
||||
SELECT toUnixTimestamp64Milli(dt64)
|
||||
```
|
||||
|
||||
Ответ:
|
||||
|
||||
``` text
|
||||
┌─toUnixTimestamp64Milli(dt64)─┐
|
||||
│ 1568650812345 │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
|
||||
SELECT toUnixTimestamp64Nano(dt64)
|
||||
```
|
||||
|
||||
Ответ:
|
||||
|
||||
``` text
|
||||
┌─toUnixTimestamp64Nano(dt64)─┐
|
||||
│ 1568650812345678000 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## fromUnixTimestamp64Milli
|
||||
## fromUnixTimestamp64Micro
|
||||
## fromUnixTimestamp64Nano
|
||||
|
||||
Преобразует значение `Int64` в значение `DateTime64` с фиксированной точностью менее одной секунды и дополнительным часовым поясом. Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что входное значение обрабатывается как метка времени UTC, а не метка времени в заданном (или неявном) часовом поясе.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
fromUnixTimestamp64Milli(value [, ti])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `value` — значение типы `Int64` с любой точностью.
|
||||
- `timezone` — (не обязательный параметр) часовой пояс в формате `String` для возвращаемого результата.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Значение `value`, преобразованное в тип данных `DateTime64`.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
WITH CAST(1234567891011, 'Int64') AS i64
|
||||
SELECT fromUnixTimestamp64Milli(i64, 'UTC')
|
||||
```
|
||||
|
||||
Ответ:
|
||||
|
||||
``` text
|
||||
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
|
||||
│ 2009-02-13 23:31:31.011 │
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) <!--hide-->
|
||||
|
@ -902,74 +902,127 @@ private:
|
||||
return processMultiQuery(text);
|
||||
}
|
||||
|
||||
bool processMultiQuery(const String & text)
|
||||
bool processMultiQuery(const String & all_queries_text)
|
||||
{
|
||||
const bool test_mode = config().has("testmode");
|
||||
|
||||
{ /// disable logs if expects errors
|
||||
TestHint test_hint(test_mode, text);
|
||||
TestHint test_hint(test_mode, all_queries_text);
|
||||
if (test_hint.clientError() || test_hint.serverError())
|
||||
processTextAsSingleQuery("SET send_logs_level = 'none'");
|
||||
}
|
||||
|
||||
/// Several queries separated by ';'.
|
||||
/// INSERT data is ended by the end of line, not ';'.
|
||||
/// An exception is VALUES format where we also support semicolon in
|
||||
/// addition to end of line.
|
||||
|
||||
const char * begin = text.data();
|
||||
const char * end = begin + text.size();
|
||||
const char * this_query_begin = all_queries_text.data();
|
||||
const char * all_queries_end = all_queries_text.data() + all_queries_text.size();
|
||||
|
||||
while (begin < end)
|
||||
while (this_query_begin < all_queries_end)
|
||||
{
|
||||
const char * pos = begin;
|
||||
ASTPtr orig_ast = parseQuery(pos, end, true);
|
||||
// Use the token iterator to skip any whitespace, semicolons and
|
||||
// comments at the beginning of the query. An example from regression
|
||||
// tests:
|
||||
// insert into table t values ('invalid'); -- { serverError 469 }
|
||||
// select 1
|
||||
// Here the test hint comment gets parsed as a part of second query.
|
||||
// We parse the `INSERT VALUES` up to the semicolon, and the rest
|
||||
// looks like a two-line query:
|
||||
// -- { serverError 469 }
|
||||
// select 1
|
||||
// and we expect it to fail with error 469, but this hint is actually
|
||||
// for the previous query. Test hints should go after the query, so
|
||||
// we can fix this by skipping leading comments. Token iterator skips
|
||||
// comments and whitespace by itself, so we only have to check for
|
||||
// semicolons.
|
||||
// The code block is to limit visibility of `tokens` because we have
|
||||
// another such variable further down the code, and get warnings for
|
||||
// that.
|
||||
{
|
||||
Tokens tokens(this_query_begin, all_queries_end);
|
||||
IParser::Pos token_iterator(tokens,
|
||||
context.getSettingsRef().max_parser_depth);
|
||||
while (token_iterator->type == TokenType::Semicolon
|
||||
&& token_iterator.isValid())
|
||||
{
|
||||
++token_iterator;
|
||||
}
|
||||
this_query_begin = token_iterator->begin;
|
||||
if (this_query_begin >= all_queries_end)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!orig_ast)
|
||||
// Try to parse the query.
|
||||
const char * this_query_end = this_query_begin;
|
||||
parsed_query = parseQuery(this_query_end, all_queries_end, true);
|
||||
|
||||
if (!parsed_query)
|
||||
{
|
||||
if (ignore_error)
|
||||
{
|
||||
Tokens tokens(begin, end);
|
||||
Tokens tokens(this_query_begin, all_queries_end);
|
||||
IParser::Pos token_iterator(tokens, context.getSettingsRef().max_parser_depth);
|
||||
while (token_iterator->type != TokenType::Semicolon && token_iterator.isValid())
|
||||
++token_iterator;
|
||||
begin = token_iterator->end;
|
||||
this_query_begin = token_iterator->end;
|
||||
|
||||
continue;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
auto * insert = orig_ast->as<ASTInsertQuery>();
|
||||
|
||||
if (insert && insert->data)
|
||||
// INSERT queries may have the inserted data in the query text
|
||||
// that follow the query itself, e.g. "insert into t format CSV 1;2".
|
||||
// They need special handling. First of all, here we find where the
|
||||
// inserted data ends. In multy-query mode, it is delimited by a
|
||||
// newline.
|
||||
// The VALUES format needs even more handling -- we also allow the
|
||||
// data to be delimited by semicolon. This case is handled later by
|
||||
// the format parser itself.
|
||||
auto * insert_ast = parsed_query->as<ASTInsertQuery>();
|
||||
if (insert_ast && insert_ast->data)
|
||||
{
|
||||
pos = find_first_symbols<'\n'>(insert->data, end);
|
||||
insert->end = pos;
|
||||
this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end);
|
||||
insert_ast->end = this_query_end;
|
||||
query_to_send = all_queries_text.substr(
|
||||
this_query_begin - all_queries_text.data(),
|
||||
insert_ast->data - this_query_begin);
|
||||
}
|
||||
else
|
||||
{
|
||||
query_to_send = all_queries_text.substr(
|
||||
this_query_begin - all_queries_text.data(),
|
||||
this_query_end - this_query_begin);
|
||||
}
|
||||
|
||||
String str = text.substr(begin - text.data(), pos - begin);
|
||||
// full_query is the query + inline INSERT data.
|
||||
full_query = all_queries_text.substr(
|
||||
this_query_begin - all_queries_text.data(),
|
||||
this_query_end - this_query_begin);
|
||||
|
||||
begin = pos;
|
||||
while (isWhitespaceASCII(*begin) || *begin == ';')
|
||||
++begin;
|
||||
|
||||
TestHint test_hint(test_mode, str);
|
||||
// Look for the hint in the text of query + insert data, if any.
|
||||
// e.g. insert into t format CSV 'a' -- { serverError 123 }.
|
||||
TestHint test_hint(test_mode, full_query);
|
||||
expected_client_error = test_hint.clientError();
|
||||
expected_server_error = test_hint.serverError();
|
||||
|
||||
try
|
||||
{
|
||||
auto ast_to_process = orig_ast;
|
||||
if (insert && insert->data)
|
||||
processParsedSingleQuery();
|
||||
|
||||
if (insert_ast && insert_ast->data)
|
||||
{
|
||||
ast_to_process = nullptr;
|
||||
processTextAsSingleQuery(str);
|
||||
}
|
||||
else
|
||||
{
|
||||
parsed_query = ast_to_process;
|
||||
full_query = str;
|
||||
query_to_send = str;
|
||||
processParsedSingleQuery();
|
||||
// For VALUES format: use the end of inline data as reported
|
||||
// by the format parser (it is saved in sendData()). This
|
||||
// allows us to handle queries like:
|
||||
// insert into t values (1); select 1
|
||||
//, where the inline data is delimited by semicolon and not
|
||||
// by a newline.
|
||||
this_query_end = parsed_query->as<ASTInsertQuery>()->end;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
@ -977,7 +1030,7 @@ private:
|
||||
last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
|
||||
actual_client_error = last_exception_received_from_server->code();
|
||||
if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error))
|
||||
std::cerr << "Error on processing query: " << str << std::endl << last_exception_received_from_server->message();
|
||||
std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message();
|
||||
received_exception_from_server = true;
|
||||
}
|
||||
|
||||
@ -991,6 +1044,8 @@ private:
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
this_query_begin = this_query_end;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -1415,7 +1470,7 @@ private:
|
||||
void sendData(Block & sample, const ColumnsDescription & columns_description)
|
||||
{
|
||||
/// If INSERT data must be sent.
|
||||
const auto * parsed_insert_query = parsed_query->as<ASTInsertQuery>();
|
||||
auto * parsed_insert_query = parsed_query->as<ASTInsertQuery>();
|
||||
if (!parsed_insert_query)
|
||||
return;
|
||||
|
||||
@ -1424,6 +1479,9 @@ private:
|
||||
/// Send data contained in the query.
|
||||
ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data);
|
||||
sendDataFrom(data_in, sample, columns_description);
|
||||
// Remember where the data ended. We use this info later to determine
|
||||
// where the next query begins.
|
||||
parsed_insert_query->end = data_in.buffer().begin() + data_in.count();
|
||||
}
|
||||
else if (!is_interactive)
|
||||
{
|
||||
|
@ -68,8 +68,14 @@ String Macros::expand(const String & s,
|
||||
res += database_name;
|
||||
else if (macro_name == "table" && !table_name.empty())
|
||||
res += table_name;
|
||||
else if (macro_name == "uuid" && uuid != UUIDHelpers::Nil)
|
||||
else if (macro_name == "uuid")
|
||||
{
|
||||
if (uuid == UUIDHelpers::Nil)
|
||||
throw Exception("Macro 'uuid' and empty arguments of ReplicatedMergeTree "
|
||||
"are supported only for ON CLUSTER queries with Atomic database engine",
|
||||
ErrorCodes::SYNTAX_ERROR);
|
||||
res += toString(uuid);
|
||||
}
|
||||
else
|
||||
throw Exception("No macro '" + macro_name +
|
||||
"' in config while processing substitutions in '" + s + "' at '"
|
||||
|
@ -50,21 +50,22 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer)
|
||||
uint64_t buf = 0;
|
||||
buffer.readStrict(c);
|
||||
auto cc = static_cast<uint8_t>(c);
|
||||
if (cc < 0xfc)
|
||||
switch (cc)
|
||||
{
|
||||
return cc;
|
||||
}
|
||||
else if (cc < 0xfd)
|
||||
{
|
||||
buffer.readStrict(reinterpret_cast<char *>(&buf), 2);
|
||||
}
|
||||
else if (cc < 0xfe)
|
||||
{
|
||||
buffer.readStrict(reinterpret_cast<char *>(&buf), 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.readStrict(reinterpret_cast<char *>(&buf), 8);
|
||||
/// NULL
|
||||
case 0xfb:
|
||||
break;
|
||||
case 0xfc:
|
||||
buffer.readStrict(reinterpret_cast<char *>(&buf), 2);
|
||||
break;
|
||||
case 0xfd:
|
||||
buffer.readStrict(reinterpret_cast<char *>(&buf), 3);
|
||||
break;
|
||||
case 0xfe:
|
||||
buffer.readStrict(reinterpret_cast<char *>(&buf), 8);
|
||||
break;
|
||||
default:
|
||||
return cc;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ namespace MySQLReplication
|
||||
|
||||
/// Ignore MySQL 8.0 optional metadata fields.
|
||||
/// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/
|
||||
payload.ignore(payload.available() - CHECKSUM_CRC32_SIGNATURE_LENGTH);
|
||||
payload.ignoreAll();
|
||||
}
|
||||
|
||||
/// Types that do not used in the binlog event:
|
||||
@ -221,6 +221,7 @@ namespace MySQLReplication
|
||||
}
|
||||
case MYSQL_TYPE_NEWDECIMAL:
|
||||
case MYSQL_TYPE_STRING: {
|
||||
/// Big-Endian
|
||||
auto b0 = UInt16(meta[pos] << 8);
|
||||
auto b1 = UInt8(meta[pos + 1]);
|
||||
column_meta.emplace_back(UInt16(b0 + b1));
|
||||
@ -231,6 +232,7 @@ namespace MySQLReplication
|
||||
case MYSQL_TYPE_BIT:
|
||||
case MYSQL_TYPE_VARCHAR:
|
||||
case MYSQL_TYPE_VAR_STRING: {
|
||||
/// Little-Endian
|
||||
auto b0 = UInt8(meta[pos]);
|
||||
auto b1 = UInt16(meta[pos + 1] << 8);
|
||||
column_meta.emplace_back(UInt16(b0 + b1));
|
||||
@ -911,7 +913,7 @@ namespace MySQLReplication
|
||||
break;
|
||||
}
|
||||
}
|
||||
payload.tryIgnore(CHECKSUM_CRC32_SIGNATURE_LENGTH);
|
||||
payload.ignoreAll();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -283,6 +283,7 @@ int main(int argc, char ** argv)
|
||||
}
|
||||
|
||||
{
|
||||
/// mysql_protocol --host=172.17.0.3 --user=root --password=123 --db=sbtest
|
||||
try
|
||||
{
|
||||
boost::program_options::options_description desc("Allowed options");
|
||||
|
@ -195,6 +195,7 @@ void MaterializeMySQLSyncThread::synchronization(const String & mysql_version)
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
client.disconnect();
|
||||
tryLogCurrentException(log);
|
||||
getDatabase(database_name).setException(std::current_exception());
|
||||
}
|
||||
@ -206,6 +207,7 @@ void MaterializeMySQLSyncThread::stopSynchronization()
|
||||
{
|
||||
sync_quit = true;
|
||||
background_thread_pool->join();
|
||||
client.disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -390,6 +390,7 @@ void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory);
|
||||
|
||||
void registerInputFormatProcessorRegexp(FormatFactory & factory);
|
||||
void registerInputFormatProcessorJSONAsString(FormatFactory & factory);
|
||||
void registerInputFormatProcessorLineAsString(FormatFactory & factory);
|
||||
void registerInputFormatProcessorCapnProto(FormatFactory & factory);
|
||||
|
||||
FormatFactory::FormatFactory()
|
||||
@ -454,6 +455,7 @@ FormatFactory::FormatFactory()
|
||||
|
||||
registerInputFormatProcessorRegexp(*this);
|
||||
registerInputFormatProcessorJSONAsString(*this);
|
||||
registerInputFormatProcessorLineAsString(*this);
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
registerInputFormatProcessorCapnProto(*this);
|
||||
#endif
|
||||
|
@ -53,8 +53,28 @@ endif()
|
||||
|
||||
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR})
|
||||
|
||||
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
|
||||
target_compile_options(clickhouse_functions PRIVATE "-g0")
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
|
||||
OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
|
||||
set (STRIP_DSF_DEFAULT ON)
|
||||
else()
|
||||
set (STRIP_DSF_DEFAULT OFF)
|
||||
endif()
|
||||
|
||||
|
||||
option(STRIP_DEBUG_SYMBOLS_FUNCTIONS
|
||||
"Do not generate debugger info for ClickHouse functions.
|
||||
Provides faster linking and lower binary size.
|
||||
Tradeoff is the inability to debug some source files with e.g. gdb
|
||||
(empty stack frames and no local variables)."
|
||||
${STRIP_DSF_DEFAULT})
|
||||
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
message(WARNING "Not generating debugger info for ClickHouse functions")
|
||||
target_compile_options(clickhouse_functions PRIVATE "-g0")
|
||||
else()
|
||||
message(STATUS "Generating debugger info for ClickHouse functions")
|
||||
endif()
|
||||
|
||||
if (USE_ICU)
|
||||
target_link_libraries (clickhouse_functions PRIVATE ${ICU_LIBRARIES})
|
||||
|
@ -9,6 +9,7 @@ void registerFunctionsFormatting(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionBitmaskToList>();
|
||||
factory.registerFunction<FunctionFormatReadableSize>();
|
||||
factory.registerFunction<FunctionFormatReadableQuantity>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -202,4 +202,80 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class FunctionFormatReadableQuantity : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "formatReadableQuantity";
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionFormatReadableQuantity>(); }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const IDataType & type = *arguments[0];
|
||||
|
||||
if (!isNativeNumber(type))
|
||||
throw Exception("Cannot format " + type.getName() + " as quantity", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
if (!(executeType<UInt8>(block, arguments, result)
|
||||
|| executeType<UInt16>(block, arguments, result)
|
||||
|| executeType<UInt32>(block, arguments, result)
|
||||
|| executeType<UInt64>(block, arguments, result)
|
||||
|| executeType<Int8>(block, arguments, result)
|
||||
|| executeType<Int16>(block, arguments, result)
|
||||
|| executeType<Int32>(block, arguments, result)
|
||||
|| executeType<Int64>(block, arguments, result)
|
||||
|| executeType<Float32>(block, arguments, result)
|
||||
|| executeType<Float64>(block, arguments, result)))
|
||||
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
|
||||
+ " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
bool executeType(Block & block, const ColumnNumbers & arguments, size_t result) const
|
||||
{
|
||||
if (const ColumnVector<T> * col_from = checkAndGetColumn<ColumnVector<T>>(block.getByPosition(arguments[0]).column.get()))
|
||||
{
|
||||
auto col_to = ColumnString::create();
|
||||
|
||||
const typename ColumnVector<T>::Container & vec_from = col_from->getData();
|
||||
ColumnString::Chars & data_to = col_to->getChars();
|
||||
ColumnString::Offsets & offsets_to = col_to->getOffsets();
|
||||
size_t size = vec_from.size();
|
||||
data_to.resize(size * 2);
|
||||
offsets_to.resize(size);
|
||||
|
||||
WriteBufferFromVector<ColumnString::Chars> buf_to(data_to);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
formatReadableQuantity(static_cast<double>(vec_from[i]), buf_to);
|
||||
writeChar(0, buf_to);
|
||||
offsets_to[i] = buf_to.count();
|
||||
}
|
||||
|
||||
buf_to.finalize();
|
||||
block.getByPosition(result).column = std::move(col_to);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -3,5 +3,6 @@ add_headers_and_sources(clickhouse_functions_gatherutils .)
|
||||
add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers})
|
||||
target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms)
|
||||
|
||||
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
|
||||
target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
|
||||
endif()
|
||||
|
@ -3,8 +3,9 @@ add_headers_and_sources(clickhouse_functions_url .)
|
||||
add_library(clickhouse_functions_url ${clickhouse_functions_url_sources} ${clickhouse_functions_url_headers})
|
||||
target_link_libraries(clickhouse_functions_url PRIVATE dbms)
|
||||
|
||||
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
|
||||
target_compile_options(clickhouse_functions_url PRIVATE "-g0")
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
target_compile_options(clickhouse_functions_url PRIVATE "-g0")
|
||||
endif()
|
||||
|
||||
# TODO: move Functions/Regexps.h to some lib and use here
|
||||
target_link_libraries(clickhouse_functions_url PRIVATE hyperscan)
|
||||
|
@ -3,5 +3,6 @@ add_headers_and_sources(clickhouse_functions_array .)
|
||||
add_library(clickhouse_functions_array ${clickhouse_functions_array_sources} ${clickhouse_functions_array_headers})
|
||||
target_link_libraries(clickhouse_functions_array PRIVATE dbms clickhouse_functions_gatherutils)
|
||||
|
||||
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
|
||||
target_compile_options(clickhouse_functions_array PRIVATE "-g0")
|
||||
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
|
||||
target_compile_options(clickhouse_functions_array PRIVATE "-g0")
|
||||
endif()
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Interpreters/AsynchronousMetrics.h>
|
||||
|
||||
@ -13,10 +14,11 @@ Block AsynchronousMetricLogElement::createBlock()
|
||||
{
|
||||
ColumnsWithTypeAndName columns;
|
||||
|
||||
columns.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
|
||||
columns.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
|
||||
columns.emplace_back(std::make_shared<DataTypeString>(), "name");
|
||||
columns.emplace_back(std::make_shared<DataTypeFloat64>(), "value");
|
||||
columns.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
|
||||
columns.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
|
||||
columns.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds");
|
||||
columns.emplace_back(std::make_shared<DataTypeString>(), "name");
|
||||
columns.emplace_back(std::make_shared<DataTypeFloat64>(), "value");
|
||||
|
||||
return Block(columns);
|
||||
}
|
||||
@ -28,6 +30,7 @@ void AsynchronousMetricLogElement::appendToBlock(MutableColumns & columns) const
|
||||
|
||||
columns[column_idx++]->insert(event_date);
|
||||
columns[column_idx++]->insert(event_time);
|
||||
columns[column_idx++]->insert(event_time_microseconds);
|
||||
columns[column_idx++]->insert(metric_name);
|
||||
columns[column_idx++]->insert(value);
|
||||
}
|
||||
@ -38,6 +41,11 @@ inline UInt64 time_in_milliseconds(std::chrono::time_point<std::chrono::system_c
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(timepoint.time_since_epoch()).count();
|
||||
}
|
||||
|
||||
inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
|
||||
{
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
|
||||
}
|
||||
|
||||
|
||||
inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
|
||||
{
|
||||
@ -50,6 +58,7 @@ void AsynchronousMetricLog::addValues(const AsynchronousMetricValues & values)
|
||||
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
element.event_time = time_in_seconds(now);
|
||||
element.event_time_microseconds = time_in_microseconds(now);
|
||||
element.event_date = DateLUT::instance().toDayNum(element.event_time);
|
||||
|
||||
for (const auto & [key, value] : values)
|
||||
|
@ -22,6 +22,7 @@ struct AsynchronousMetricLogElement
|
||||
{
|
||||
UInt16 event_date;
|
||||
time_t event_time;
|
||||
UInt64 event_time_microseconds;
|
||||
std::string metric_name;
|
||||
double value;
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -11,9 +12,10 @@ Block MetricLogElement::createBlock()
|
||||
{
|
||||
ColumnsWithTypeAndName columns_with_type_and_name;
|
||||
|
||||
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
|
||||
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
|
||||
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "milliseconds");
|
||||
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDate>(), "event_date");
|
||||
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime>(), "event_time");
|
||||
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeDateTime64>(6), "event_time_microseconds");
|
||||
columns_with_type_and_name.emplace_back(std::make_shared<DataTypeUInt64>(), "milliseconds");
|
||||
|
||||
for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
|
||||
{
|
||||
@ -41,6 +43,7 @@ void MetricLogElement::appendToBlock(MutableColumns & columns) const
|
||||
|
||||
columns[column_idx++]->insert(DateLUT::instance().toDayNum(event_time));
|
||||
columns[column_idx++]->insert(event_time);
|
||||
columns[column_idx++]->insert(event_time_microseconds);
|
||||
columns[column_idx++]->insert(milliseconds);
|
||||
|
||||
for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
|
||||
@ -80,6 +83,10 @@ inline UInt64 time_in_milliseconds(std::chrono::time_point<std::chrono::system_c
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(timepoint.time_since_epoch()).count();
|
||||
}
|
||||
|
||||
inline UInt64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
|
||||
{
|
||||
return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
|
||||
}
|
||||
|
||||
inline UInt64 time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
|
||||
{
|
||||
@ -102,6 +109,7 @@ void MetricLog::metricThreadFunction()
|
||||
|
||||
MetricLogElement elem;
|
||||
elem.event_time = std::chrono::system_clock::to_time_t(current_time);
|
||||
elem.event_time_microseconds = time_in_microseconds(current_time);
|
||||
elem.milliseconds = time_in_milliseconds(current_time) - time_in_seconds(current_time) * 1000;
|
||||
|
||||
elem.profile_events.resize(ProfileEvents::end());
|
||||
|
@ -18,6 +18,7 @@ namespace DB
|
||||
struct MetricLogElement
|
||||
{
|
||||
time_t event_time{};
|
||||
UInt64 event_time_microseconds{};
|
||||
UInt64 milliseconds{};
|
||||
|
||||
std::vector<ProfileEvents::Count> profile_events;
|
||||
|
85
src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
Normal file
85
src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
#include <Processors/Formats/Impl/LineAsStringRowInputFormat.h>
|
||||
#include <Formats/JSONEachRowUtils.h>
|
||||
#include <common/find_symbols.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_) :
|
||||
IRowInputFormat(header_, in_, std::move(params_)), buf(in)
|
||||
{
|
||||
if (header_.columns() > 1 || header_.getDataTypes()[0]->getTypeId() != TypeIndex::String)
|
||||
{
|
||||
throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::INCORRECT_QUERY);
|
||||
}
|
||||
}
|
||||
|
||||
void LineAsStringRowInputFormat::resetParser()
|
||||
{
|
||||
IRowInputFormat::resetParser();
|
||||
buf.reset();
|
||||
}
|
||||
|
||||
void LineAsStringRowInputFormat::readLineObject(IColumn & column)
|
||||
{
|
||||
PeekableReadBufferCheckpoint checkpoint{buf};
|
||||
bool newline = true;
|
||||
bool over = false;
|
||||
|
||||
char * pos;
|
||||
|
||||
while (newline)
|
||||
{
|
||||
pos = find_first_symbols<'\n', '\\'>(buf.position(), buf.buffer().end());
|
||||
buf.position() = pos;
|
||||
if (buf.position() == buf.buffer().end())
|
||||
{
|
||||
over = true;
|
||||
break;
|
||||
}
|
||||
else if (*buf.position() == '\n')
|
||||
{
|
||||
newline = false;
|
||||
}
|
||||
else if (*buf.position() == '\\')
|
||||
{
|
||||
++buf.position();
|
||||
if (!buf.eof())
|
||||
++buf.position();
|
||||
}
|
||||
}
|
||||
|
||||
buf.makeContinuousMemoryFromCheckpointToPos();
|
||||
char * end = over ? buf.position(): ++buf.position();
|
||||
buf.rollbackToCheckpoint();
|
||||
column.insertData(buf.position(), end - (over ? 0 : 1) - buf.position());
|
||||
buf.position() = end;
|
||||
}
|
||||
|
||||
bool LineAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &)
|
||||
{
|
||||
if (!buf.eof())
|
||||
readLineObject(*columns[0]);
|
||||
|
||||
return !buf.eof();
|
||||
}
|
||||
|
||||
void registerInputFormatProcessorLineAsString(FormatFactory & factory)
|
||||
{
|
||||
factory.registerInputFormatProcessor("LineAsString", [](
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
const RowInputFormatParams & params,
|
||||
const FormatSettings &)
|
||||
{
|
||||
return std::make_shared<LineAsStringRowInputFormat>(sample, buf, params);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
31
src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
Normal file
31
src/Processors/Formats/Impl/LineAsStringRowInputFormat.h
Normal file
@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <Processors/Formats/IRowInputFormat.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <IO/PeekableReadBuffer.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ReadBuffer;
|
||||
|
||||
/// This format parses a sequence of Line objects separated by newlines, spaces and/or comma.
|
||||
/// Each Line object is parsed as a whole to string.
|
||||
/// This format can only parse a table with single field of type String.
|
||||
|
||||
class LineAsStringRowInputFormat : public IRowInputFormat
|
||||
{
|
||||
public:
|
||||
LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_);
|
||||
|
||||
bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
|
||||
String getName() const override { return "LineAsStringRowInputFormat"; }
|
||||
void resetParser() override;
|
||||
|
||||
private:
|
||||
void readLineObject(IColumn & column);
|
||||
|
||||
PeekableReadBuffer buf;
|
||||
};
|
||||
|
||||
}
|
@ -33,6 +33,7 @@ SRCS(
|
||||
Formats/Impl/JSONEachRowRowOutputFormat.cpp
|
||||
Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp
|
||||
Formats/Impl/JSONRowOutputFormat.cpp
|
||||
Formats/Impl/LineAsStringRowInputFormat.cpp
|
||||
Formats/Impl/MarkdownRowOutputFormat.cpp
|
||||
Formats/Impl/MsgPackRowInputFormat.cpp
|
||||
Formats/Impl/MsgPackRowOutputFormat.cpp
|
||||
|
@ -395,9 +395,10 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
||||
|
||||
if (replicated)
|
||||
{
|
||||
bool has_arguments = arg_num + 2 <= arg_cnt && engine_args[arg_num]->as<ASTLiteral>() && engine_args[arg_num + 1]->as<ASTLiteral>();
|
||||
bool has_arguments = arg_num + 2 <= arg_cnt;
|
||||
bool has_valid_arguments = has_arguments && engine_args[arg_num]->as<ASTLiteral>() && engine_args[arg_num + 1]->as<ASTLiteral>();
|
||||
|
||||
if (has_arguments)
|
||||
if (has_valid_arguments)
|
||||
{
|
||||
const auto * ast = engine_args[arg_num]->as<ASTLiteral>();
|
||||
if (ast && ast->value.getType() == Field::Types::String)
|
||||
@ -420,7 +421,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
||||
"No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN);
|
||||
++arg_num;
|
||||
}
|
||||
else if (is_extended_storage_def)
|
||||
else if (is_extended_storage_def && !has_arguments)
|
||||
{
|
||||
/// Try use default values if arguments are not specified.
|
||||
/// It works for ON CLUSTER queries when database engine is Atomic and there are {shard} and {replica} in config.
|
||||
@ -428,7 +429,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
||||
replica_name = "{replica}"; /// TODO maybe use hostname if {replica} is not defined?
|
||||
}
|
||||
else
|
||||
throw Exception("Expected zookeper_path and replica_name arguments", ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception("Expected two string literal arguments: zookeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
/// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries
|
||||
bool is_on_cluster = args.local_context.getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
|
||||
|
@ -0,0 +1,3 @@
|
||||
<yandex>
|
||||
<asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s>
|
||||
</yandex>
|
59
tests/integration/test_asynchronous_metric_log_table/test.py
Normal file
59
tests/integration/test_asynchronous_metric_log_table/test.py
Normal file
@ -0,0 +1,59 @@
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node1 = cluster.add_instance('node1', with_zookeeper=True,
|
||||
main_configs=['configs/asynchronous_metrics_update_period_s.xml'])
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
yield cluster
|
||||
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
# Tests that the event_time_microseconds field in system.asynchronous_metric_log table gets populated.
|
||||
# asynchronous metrics are updated once every 60s by default. To make the test run faster, the setting
|
||||
# asynchronous_metric_update_period_s is being set to 2s so that the metrics are populated faster and
|
||||
# are available for querying during the test.
|
||||
def test_event_time_microseconds_field(started_cluster):
|
||||
try:
|
||||
cluster.start()
|
||||
node1.query("SET log_queries = 1;")
|
||||
node1.query("CREATE DATABASE replica;")
|
||||
query_create = '''CREATE TABLE replica.test
|
||||
(
|
||||
id Int64,
|
||||
event_time DateTime
|
||||
)
|
||||
Engine=MergeTree()
|
||||
PARTITION BY toYYYYMMDD(event_time)
|
||||
ORDER BY id;'''
|
||||
time.sleep(2)
|
||||
node1.query(query_create)
|
||||
node1.query('''INSERT INTO replica.test VALUES (1, now())''')
|
||||
node1.query("SYSTEM FLUSH LOGS;")
|
||||
#query assumes that the event_time field is accurate
|
||||
equals_query = '''WITH (
|
||||
(
|
||||
SELECT event_time_microseconds
|
||||
FROM system.asynchronous_metric_log
|
||||
ORDER BY event_time DESC
|
||||
LIMIT 1
|
||||
) AS time_with_microseconds,
|
||||
(
|
||||
SELECT event_time
|
||||
FROM system.asynchronous_metric_log
|
||||
ORDER BY event_time DESC
|
||||
LIMIT 1
|
||||
) AS time)
|
||||
SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')'''
|
||||
assert "ok\n" in node1.query(equals_query)
|
||||
finally:
|
||||
cluster.shutdown()
|
@ -2,3 +2,5 @@
|
||||
2 Hello, world 00000000-0000-0000-0000-000000000000 2016-01-02 2016-01-02 03:04:00 [0,1]
|
||||
3 hello, world! ab41bdd6-5cd4-11e7-907b-a6006ad3dba0 2016-01-03 2016-01-02 03:00:00 []
|
||||
4 World ab41bdd6-5cd4-11e7-907b-a6006ad3dba0 2016-01-04 2016-12-11 10:09:08 [3,2,1]
|
||||
11111
|
||||
1
|
||||
|
@ -5,3 +5,12 @@ INSERT INTO insert VALUES (1, 'Hello', 'ab41bdd6-5cd4-11e7-907b-a6006ad3dba0', '
|
||||
|
||||
SELECT * FROM insert ORDER BY i;
|
||||
DROP TABLE insert;
|
||||
|
||||
-- Test the case where the VALUES are delimited by semicolon and a query follows
|
||||
-- w/o newline. With most formats the query in the same line would be ignored or
|
||||
-- lead to an error, but VALUES are an exception and support semicolon delimiter,
|
||||
-- in addition to the newline.
|
||||
create table if not exists t_306 (a int) engine Memory;
|
||||
insert into t_306 values (1); select 11111;
|
||||
select * from t_306;
|
||||
drop table if exists t_306;
|
||||
|
@ -174,6 +174,7 @@ SELECT sipHash64(NULL);
|
||||
SELECT protocol(NULL);
|
||||
SELECT toInt16OrZero(NULL);
|
||||
SELECT formatReadableSize(NULL);
|
||||
SELECT formatReadableQuantity(NULL);
|
||||
SELECT concatAssumeInjective(NULL);
|
||||
SELECT toString(NULL);
|
||||
SELECT MACStringToNum(NULL);
|
||||
|
@ -0,0 +1,8 @@
|
||||
"id" : 1,
|
||||
"date" : "01.01.2020",
|
||||
"string" : "123{{{\\"\\\\",
|
||||
"array" : [1, 2, 3],
|
||||
|
||||
Finally implement this new feature.
|
||||
42 ClickHouse
|
||||
42 ClickHouse is a `fast` #open-source# (OLAP) database "management" :system:
|
31
tests/queries/0_stateless/01460_line_as_string_format.sh
Executable file
31
tests/queries/0_stateless/01460_line_as_string_format.sh
Executable file
@ -0,0 +1,31 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS line_as_string1";
|
||||
$CLICKHOUSE_CLIENT --query="CREATE TABLE line_as_string1(field String) ENGINE = Memory";
|
||||
|
||||
echo '"id" : 1,
|
||||
"date" : "01.01.2020",
|
||||
"string" : "123{{{\"\\",
|
||||
"array" : [1, 2, 3],
|
||||
|
||||
Finally implement this new feature.' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string1 FORMAT LineAsString";
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string1";
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string1"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS line_as_string2";
|
||||
$CLICKHOUSE_CLIENT --query="create table line_as_string2(
|
||||
a UInt64 default 42,
|
||||
b String materialized toString(a),
|
||||
c String
|
||||
) engine=MergeTree() order by tuple();";
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) values ('ClickHouse')";
|
||||
|
||||
echo 'ClickHouse is a `fast` #open-source# (OLAP) 'database' "management" :system:' | $CLICKHOUSE_CLIENT --query="INSERT INTO line_as_string2(c) FORMAT LineAsString";
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM line_as_string2 order by c";
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE line_as_string2"
|
@ -0,0 +1,2 @@
|
||||
01473_metric_log_table_event_start_time_microseconds_test
|
||||
ok
|
24
tests/queries/0_stateless/01473_event_time_microseconds.sql
Normal file
24
tests/queries/0_stateless/01473_event_time_microseconds.sql
Normal file
@ -0,0 +1,24 @@
|
||||
-- This file contains tests for the event_time_microseconds field for various tables.
|
||||
-- Note: Only event_time_microseconds for asynchronous_metric_log table is tested via
|
||||
-- an integration test as those metrics take 60s by default to be updated.
|
||||
-- Refer: tests/integration/test_asynchronous_metric_log_table.
|
||||
|
||||
set log_queries = 1;
|
||||
|
||||
select '01473_metric_log_table_event_start_time_microseconds_test';
|
||||
system flush logs;
|
||||
-- query assumes that the event_time field is accurate.
|
||||
WITH (
|
||||
(
|
||||
SELECT event_time_microseconds
|
||||
FROM system.metric_log
|
||||
ORDER BY event_time DESC
|
||||
LIMIT 1
|
||||
) AS time_with_microseconds,
|
||||
(
|
||||
SELECT event_time
|
||||
FROM system.metric_log
|
||||
ORDER BY event_time DESC
|
||||
LIMIT 1
|
||||
) AS time)
|
||||
SELECT if(dateDiff('second', toDateTime(time_with_microseconds), toDateTime(time)) = 0, 'ok', 'fail')
|
@ -0,0 +1 @@
|
||||
SELECT NULL = countEqual(materialize([arrayJoin([NULL, NULL, NULL]), NULL AS x, arrayJoin([255, 1025, NULL, NULL]), arrayJoin([2, 1048576, NULL, NULL])]), materialize(x)) format Null;
|
@ -0,0 +1,50 @@
|
||||
1.00 1.00 1.00
|
||||
2.72 2.00 2.00
|
||||
7.39 7.00 7.00
|
||||
20.09 20.00 20.00
|
||||
54.60 54.00 54.00
|
||||
148.41 148.00 148.00
|
||||
403.43 403.00 403.00
|
||||
1.10 thousand 1.10 thousand 1.10 thousand
|
||||
2.98 thousand 2.98 thousand 2.98 thousand
|
||||
8.10 thousand 8.10 thousand 8.10 thousand
|
||||
22.03 thousand 22.03 thousand 22.03 thousand
|
||||
59.87 thousand 59.87 thousand 59.87 thousand
|
||||
162.75 thousand 162.75 thousand 162.75 thousand
|
||||
442.41 thousand 442.41 thousand 442.41 thousand
|
||||
1.20 million 1.20 million 1.20 million
|
||||
3.27 million 3.27 million 3.27 million
|
||||
8.89 million 8.89 million 8.89 million
|
||||
24.15 million 24.15 million 24.15 million
|
||||
65.66 million 65.66 million 65.66 million
|
||||
178.48 million 178.48 million 178.48 million
|
||||
485.17 million 485.17 million 485.17 million
|
||||
1.32 billion 1.32 billion 1.32 billion
|
||||
3.58 billion 3.58 billion -2.15 billion
|
||||
9.74 billion 9.74 billion -2.15 billion
|
||||
26.49 billion 26.49 billion -2.15 billion
|
||||
72.00 billion 72.00 billion -2.15 billion
|
||||
195.73 billion 195.73 billion -2.15 billion
|
||||
532.05 billion 532.05 billion -2.15 billion
|
||||
1.45 trillion 1.45 trillion -2.15 billion
|
||||
3.93 trillion 3.93 trillion -2.15 billion
|
||||
10.69 trillion 10.69 trillion -2.15 billion
|
||||
29.05 trillion 29.05 trillion -2.15 billion
|
||||
78.96 trillion 78.96 trillion -2.15 billion
|
||||
214.64 trillion 214.64 trillion -2.15 billion
|
||||
583.46 trillion 583.46 trillion -2.15 billion
|
||||
1.59 quadrillion 1.59 quadrillion -2.15 billion
|
||||
4.31 quadrillion 4.31 quadrillion -2.15 billion
|
||||
11.72 quadrillion 11.72 quadrillion -2.15 billion
|
||||
31.86 quadrillion 31.86 quadrillion -2.15 billion
|
||||
86.59 quadrillion 86.59 quadrillion -2.15 billion
|
||||
235.39 quadrillion 235.39 quadrillion -2.15 billion
|
||||
639.84 quadrillion 639.84 quadrillion -2.15 billion
|
||||
1739.27 quadrillion 1739.27 quadrillion -2.15 billion
|
||||
4727.84 quadrillion 4727.84 quadrillion -2.15 billion
|
||||
12851.60 quadrillion 12851.60 quadrillion -2.15 billion
|
||||
34934.27 quadrillion 0.00 -2.15 billion
|
||||
94961.19 quadrillion 0.00 -2.15 billion
|
||||
258131.29 quadrillion 0.00 -2.15 billion
|
||||
701673.59 quadrillion 0.00 -2.15 billion
|
||||
1907346.57 quadrillion 0.00 -2.15 billion
|
@ -0,0 +1,4 @@
|
||||
WITH round(exp(number), 6) AS x, toUInt64(x) AS y, toInt32(x) AS z
|
||||
SELECT formatReadableQuantity(x), formatReadableQuantity(y), formatReadableQuantity(z)
|
||||
FROM system.numbers
|
||||
LIMIT 50;
|
@ -1,5 +1,6 @@
|
||||
v20.8.2.3-stable 2020-09-08
|
||||
v20.7.2.30-stable 2020-08-31
|
||||
v20.6.6.7-stable 2020-09-11
|
||||
v20.6.5.8-stable 2020-09-03
|
||||
v20.6.4.44-stable 2020-08-20
|
||||
v20.6.3.28-stable 2020-08-07
|
||||
|
|
Loading…
Reference in New Issue
Block a user