Merge remote-tracking branch 'origin' into fix-buffer-overflow-in-token-extractor

This commit is contained in:
Alexey Milovidov 2021-04-01 16:30:10 +03:00
commit fbb4b05df0
31 changed files with 525 additions and 108 deletions

View File

@ -25,6 +25,12 @@ namespace common
return x - y;
}
template <typename T>
inline auto NO_SANITIZE_UNDEFINED negateIgnoreOverflow(T x)
{
return -x;
}
template <typename T>
inline bool addOverflow(T x, T y, T & res)
{

View File

@ -59,6 +59,7 @@ toc_title: Adopters
| <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
| <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
| <a href="https://instabug.com/" class="favicon">Instabug</a> | APM Platform | Main product | — | — | [A quote from Co-Founder](https://altinity.com/) |
| <a href="https://www.instana.com" class="favicon">Instana</a> | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
| <a href="https://integros.com" class="favicon">Integros</a> | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| <a href="https://ippon.tech" class="favicon">Ippon Technologies</a> | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) |
@ -78,6 +79,7 @@ toc_title: Adopters
| <a href="https://mux.com/" class="favicon">MUX</a> | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) |
| <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
| <a href="https://www.netskope.com/" class="favicon">Netskope</a> | Network Security | — | — | — | [Job advertisement, March 2021](https://www.mendeley.com/careers/job/senior-software-developer-backend-developer-1346348) |
| <a href="https://niclabs.cl/" class="favicon">NIC Labs</a> | Network Monitoring | RaTA-DNS | — | — | [Blog post, March 2021](https://niclabs.cl/ratadns/2021/03/Clickhouse) |
| <a href="https://getnoc.com/" class="favicon">NOC Project</a> | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) |
| <a href="https://www.nuna.com/" class="favicon">Nuna Inc.</a> | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) |
| <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
@ -98,6 +100,7 @@ toc_title: Adopters
| <a href="https://rspamd.com/" class="favicon">Rspamd</a> | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) |
| <a href="https://rusiem.com/en" class="favicon">RuSIEM</a> | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) |
| <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
| <a href="https://www.sberbank.com/index" class="favicon">Sber</a> | Banking, Fintech, Retail, Cloud, Media | — | — | — | [Job advertisement, March 2021](https://career.habr.com/vacancies/1000073536) |
| <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
| <a href="https://segment.com/" class="favicon">Segment</a> | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) |
| <a href="https://sembot.io/" class="favicon">sembot.io</a> | Shopping Ads | — | — | — | A comment on LinkedIn, 2020 |
@ -119,6 +122,7 @@ toc_title: Adopters
| <a href="https://www.tinybird.co/" class="favicon">Tinybird</a> | Real-time Data Products | Data processing | — | — | [Official website](https://www.tinybird.co/) |
| <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
| <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
| <a href="https://hello.utmstat.com/" class="favicon">UTMSTAT</a> | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) |
| <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
| <a href="https://www.vmware.com/" class="favicon">VMWare</a> | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) |
| <a href="https://www.walmartlabs.com/" class="favicon">Walmart Labs</a> | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) |
@ -133,6 +137,7 @@ toc_title: Adopters
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
| <a href="https://www.yotascale.com/" class="favicon">Yotascale</a> | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) |
| <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
| <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |

View File

@ -385,8 +385,6 @@ reinterpretAsUUID(fixed_string)
- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
## reinterpret(x, T) {#type_conversion_function-reinterpret}
**Returned value**
- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
@ -398,9 +396,7 @@ String to UUID.
Query:
``` sql
SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
reinterpret(toInt8(1), 'Float32') as int_to_float,
reinterpret('1', 'UInt32') as string_to_int;
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')));
```
Result:
@ -431,15 +427,51 @@ Result:
└─────────────────────┘
```
## reinterpret(x, T) {#type_conversion_function-reinterpret}
Use the same source in-memory bytes sequence for `x` value and reinterpret it to destination type
Query:
```sql
SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
reinterpret(toInt8(1), 'Float32') as int_to_float,
reinterpret('1', 'UInt32') as string_to_int;
```
Result:
```
┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐
│ 255 │ 1e-45 │ 49 │
└─────────────┴──────────────┴───────────────┘
```
## CAST(x, T) {#type_conversion_function-cast}
Converts input value `x` to the `T` data type.
Converts input value `x` to the `T` data type. Unlike to `reinterpret` function use external representation of `x` value.
The syntax `CAST(x AS t)` is also supported.
Note, that if value `x` does not fit the bounds of type T, the function overflows. For example, CAST(-1, 'UInt8') returns 255.
**Example**
**Examples**
Query:
```sql
SELECT
cast(toInt8(-1), 'UInt8') AS cast_int_to_uint,
cast(toInt8(1), 'Float32') AS cast_int_to_float,
cast('1', 'UInt32') AS cast_string_to_int
```
Result:
```
┌─cast_int_to_uint─┬─cast_int_to_float─┬─cast_string_to_int─┐
│ 255 │ 1 │ 1 │
└──────────────────┴───────────────────┴────────────────────┘
```
Query:
@ -634,6 +666,7 @@ Result:
```
## parseDateTimeBestEffort {#parsedatetimebesteffort}
## parseDateTime32BestEffort {#parsedatetime32besteffort}
Converts a date and time in the [String](../../sql-reference/data-types/string.md) representation to [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) data type.
@ -822,10 +855,12 @@ Result:
```
## parseDateTimeBestEffortOrNull {#parsedatetimebesteffortornull}
## parseDateTime32BestEffortOrNull {#parsedatetime32besteffortornull}
Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns null when it encounters a date format that cannot be processed.
Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns `NULL` when it encounters a date format that cannot be processed.
## parseDateTimeBestEffortOrZero {#parsedatetimebesteffortorzero}
## parseDateTime32BestEffortOrZero {#parsedatetime32besteffortorzero}
Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed.
@ -1001,6 +1036,57 @@ Result:
└─────────────────────────────────┘
```
## parseDateTime64BestEffort {#parsedatetime64besteffort}
Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and return `DateTime64(3)` or `DateTime64(6)` data types.
**Syntax**
``` sql
parseDateTime64BestEffort(time_string [, precision [, time_zone]])
```
**Parameters**
- `time_string` — String containing a date or date with time to convert. [String](../../sql-reference/data-types/string.md).
- `precision``3` for milliseconds, `6` for microseconds. Default `3`. Optional [UInt8](../../sql-reference/data-types/int-uint.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Examples**
Query:
```sql
SELECT parseDateTime64BestEffort('2021-01-01') AS a, toTypeName(a) AS t
UNION ALL
SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346') AS a, toTypeName(a) AS t
UNION ALL
SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',6) AS a, toTypeName(a) AS t
UNION ALL
SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',3,'Europe/Moscow') AS a, toTypeName(a) AS t
FORMAT PrettyCompactMonoBlcok
```
Result:
```
┌──────────────────────────a─┬─t──────────────────────────────┐
│ 2021-01-01 01:01:00.123000 │ DateTime64(3) │
│ 2021-01-01 00:00:00.000000 │ DateTime64(3) │
│ 2021-01-01 01:01:00.123460 │ DateTime64(6) │
│ 2020-12-31 22:01:00.123000 │ DateTime64(3, 'Europe/Moscow') │
└────────────────────────────┴────────────────────────────────┘
```
## parseDateTime64BestEffortOrNull {#parsedatetime32besteffortornull}
Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort) except that it returns `NULL` when it encounters a date format that cannot be processed.
## parseDateTime64BestEffortOrZero {#parsedatetime64besteffortorzero}
Same as for [parseDateTime64BestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed.
## toLowCardinality {#tolowcardinality}
Converts input parameter to the [LowCardianlity](../../sql-reference/data-types/lowcardinality.md) version of same data type.
@ -1009,7 +1095,7 @@ To convert data from the `LowCardinality` data type use the [CAST](#type_convers
**Syntax**
``` sql
```sql
toLowCardinality(expr)
```
@ -1027,7 +1113,7 @@ Type: `LowCardinality(expr_result_type)`
Query:
``` sql
```sql
SELECT toLowCardinality('1');
```
@ -1045,7 +1131,8 @@ Result:
## toUnixTimestamp64Nano {#tounixtimestamp64nano}
Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. Please note that output value is a timestamp in UTC, not in timezone of `DateTime64`.
Converts a `DateTime64` to a `Int64` value with fixed sub-second precision.
Input value is scaled up or down appropriately depending on it precision. Please note that output value is a timestamp in UTC, not in timezone of `DateTime64`.
**Syntax**
@ -1078,6 +1165,8 @@ Result:
└──────────────────────────────┘
```
Query:
``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Nano(dt64);

View File

@ -423,15 +423,51 @@ SELECT uuid = uuid2;
└─────────────────────┘
```
## reinterpret(x, T) {#type_conversion_function-reinterpret}
Использует туже самую исходную последовательность байт в памяти для значения `x` и переинтерпретирует ее как конечный тип данных
Запрос:
```sql
SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
reinterpret(toInt8(1), 'Float32') as int_to_float,
reinterpret('1', 'UInt32') as string_to_int;
```
Результат:
```
┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐
│ 255 │ 1e-45 │ 49 │
└─────────────┴──────────────┴───────────────┘
```
## CAST(x, T) {#type_conversion_function-cast}
Преобразует входное значение `x` в указанный тип данных `T`.
Преобразует входное значение `x` в указанный тип данных `T`. В отличии от функции `reinterpret` использует внешнее представление значения `x`.
Поддерживается также синтаксис `CAST(x AS t)`.
Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255.
**Пример**
**Примеры**
Запрос:
```sql
SELECT
cast(toInt8(-1), 'UInt8') AS cast_int_to_uint,
cast(toInt8(1), 'Float32') AS cast_int_to_float,
cast('1', 'UInt32') AS cast_string_to_int
```
Результат:
```
┌─cast_int_to_uint─┬─cast_int_to_float─┬─cast_string_to_int─┐
│ 255 │ 1 │ 1 │
└──────────────────┴───────────────────┴────────────────────┘
```
Запрос:
@ -511,7 +547,8 @@ SELECT cast(-1, 'UInt8') as uint8;
``` text
┌─uint8─┐
│ 255 │
└─────
└───────┘
```
Запрос:
@ -627,6 +664,7 @@ SELECT
```
## parseDateTimeBestEffort {#parsedatetimebesteffort}
## parseDateTime32BestEffort {#parsedatetime32besteffort}
Преобразует дату и время в [строковом](../../sql-reference/functions/type-conversion-functions.md) представлении к типу данных [DateTime](../../sql-reference/functions/type-conversion-functions.md#data_type-datetime).
@ -814,6 +852,16 @@ AS parseDateTimeBestEffortUS;
└─────────────────────────——┘
```
## parseDateTimeBestEffortOrNull {#parsedatetimebesteffortornull}
## parseDateTime32BestEffortOrNull {#parsedatetime32besteffortornull}
Работает также как [parseDateTimeBestEffort](#parsedatetimebesteffort), но возвращает `NULL` когда получает формат даты который не может быть обработан.
## parseDateTimeBestEffortOrZero {#parsedatetimebesteffortorzero}
## parseDateTime32BestEffortOrZero {#parsedatetime32besteffortorzero}
Работает также как [parseDateTimeBestEffort](#parsedatetimebesteffort), но возвращает нулевую дату или нулевую дату и время когда получает формат даты который не может быть обработан.
## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает `NULL`, если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
@ -986,9 +1034,100 @@ SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOr
└─────────────────────────────────┘
```
## toUnixTimestamp64Milli
## toUnixTimestamp64Micro
## toUnixTimestamp64Nano
## parseDateTime64BestEffort {#parsedatetime64besteffort}
Работает также как функция [parseDateTimeBestEffort](#parsedatetimebesteffort) но также понимамет милисекунды и микросекунды и возвращает `DateTime64(3)` или `DateTime64(6)` типы данных в зависимости от заданной точности.
**Syntax**
``` sql
parseDateTime64BestEffort(time_string [, precision [, time_zone]])
```
**Parameters**
- `time_string` — String containing a date or date with time to convert. [String](../../sql-reference/data-types/string.md).
- `precision``3` for milliseconds, `6` for microseconds. Default `3`. Optional [UInt8](../../sql-reference/data-types/int-uint.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Примеры**
Запрос:
```sql
SELECT parseDateTime64BestEffort('2021-01-01') AS a, toTypeName(a) AS t
UNION ALL
SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346') AS a, toTypeName(a) AS t
UNION ALL
SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',6) AS a, toTypeName(a) AS t
UNION ALL
SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',3,'Europe/Moscow') AS a, toTypeName(a) AS t
FORMAT PrettyCompactMonoBlcok
```
Результат:
```
┌──────────────────────────a─┬─t──────────────────────────────┐
│ 2021-01-01 01:01:00.123000 │ DateTime64(3) │
│ 2021-01-01 00:00:00.000000 │ DateTime64(3) │
│ 2021-01-01 01:01:00.123460 │ DateTime64(6) │
│ 2020-12-31 22:01:00.123000 │ DateTime64(3, 'Europe/Moscow') │
└────────────────────────────┴────────────────────────────────┘
```
## parseDateTime64BestEffortOrNull {#parsedatetime32besteffortornull}
Работает также как функция [parseDateTime64BestEffort](#parsedatetime64besteffort) но возвращает `NULL` когда встречает формат даты который не может обработать.
## parseDateTime64BestEffortOrZero {#parsedatetime64besteffortorzero}
Работает также как функция [parseDateTime64BestEffort](#parsedatetimebesteffort) но возвращает "нулевую" дату и время когда встречает формат даты который не может обработать.
## toLowCardinality {#tolowcardinality}
Преобразует входные данные в версию [LowCardianlity](../data-types/lowcardinality.md) того же типа данных.
Чтобы преобразовать данные из типа `LowCardinality`, используйте функцию [CAST](#type_conversion_function-cast). Например, `CAST(x as String)`.
**Синтаксис**
```sql
toLowCardinality(expr)
```
**Аргументы**
- `expr` — [выражение](../syntax.md#syntax-expressions), которое в результате преобразуется в один из [поддерживаемых типов данных](../data-types/index.md#data_types).
**Возвращаемое значение**
- Результат преобразования `expr`.
Тип: `LowCardinality(expr_result_type)`
**Пример**
Запрос:
```sql
SELECT toLowCardinality('1');
```
Результат:
```text
┌─toLowCardinality('1')─┐
│ 1 │
└───────────────────────┘
```
## toUnixTimestamp64Milli {#tounixtimestamp64milli}
## toUnixTimestamp64Micro {#tounixtimestamp64micro}
## toUnixTimestamp64Nano {#tounixtimestamp64nano}
Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды.
Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что возвращаемое значение - это временная метка в UTC, а не в часовом поясе `DateTime64`.
@ -1039,9 +1178,11 @@ SELECT toUnixTimestamp64Nano(dt64);
└─────────────────────────────┘
```
## fromUnixTimestamp64Milli
## fromUnixTimestamp64Micro
## fromUnixTimestamp64Nano
## fromUnixTimestamp64Milli {#fromunixtimestamp64milli}
## fromUnixTimestamp64Micro {#fromunixtimestamp64micro}
## fromUnixTimestamp64Nano {#fromunixtimestamp64nano}
Преобразует значение `Int64` в значение `DateTime64` с фиксированной точностью менее одной секунды и дополнительным часовым поясом. Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что входное значение обрабатывается как метка времени UTC, а не метка времени в заданном (или неявном) часовом поясе.
@ -1077,45 +1218,6 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC');
└──────────────────────────────────────┘
```
## toLowCardinality {#tolowcardinality}
Преобразует входные данные в версию [LowCardianlity](../data-types/lowcardinality.md) того же типа данных.
Чтобы преобразовать данные из типа `LowCardinality`, используйте функцию [CAST](#type_conversion_function-cast). Например, `CAST(x as String)`.
**Синтаксис**
```sql
toLowCardinality(expr)
```
**Аргументы**
- `expr` — [выражение](../syntax.md#syntax-expressions), которое в результате преобразуется в один из [поддерживаемых типов данных](../data-types/index.md#data_types).
**Возвращаемое значение**
- Результат преобразования `expr`.
Тип: `LowCardinality(expr_result_type)`
**Пример**
Запрос:
```sql
SELECT toLowCardinality('1');
```
Результат:
```text
┌─toLowCardinality('1')─┐
│ 1 │
└───────────────────────┘
```
## formatRow {#formatrow}
Преобразует произвольные выражения в строку заданного формата.

View File

@ -0,0 +1,62 @@
#pragma once
#include <stdexcept>
#include <cstddef>
#include <cstdlib>
#include <Common/CurrentMemoryTracker.h>
/// Implementation of std::allocator interface that tracks memory with MemoryTracker.
/// NOTE We already plug MemoryTracker into new/delete operators. So, everything works even with default allocator.
/// But it is enabled only if jemalloc is used (to obtain the size of the allocation on call to delete).
/// And jemalloc is disabled for builds with sanitizers. In these cases memory was not always tracked.
template <typename T>
struct AllocatorWithMemoryTracking
{
typedef T value_type;
AllocatorWithMemoryTracking() = default;
template <typename U>
constexpr AllocatorWithMemoryTracking(const AllocatorWithMemoryTracking<U> &) noexcept
{
}
[[nodiscard]] T * allocate(size_t n)
{
if (n > std::numeric_limits<size_t>::max() / sizeof(T))
throw std::bad_alloc();
size_t bytes = n * sizeof(T);
CurrentMemoryTracker::alloc(bytes);
T * p = static_cast<T *>(malloc(bytes));
if (!p)
throw std::bad_alloc();
return p;
}
void deallocate(T * p, size_t n) noexcept
{
free(p);
size_t bytes = n * sizeof(T);
CurrentMemoryTracker::free(bytes);
}
};
template <typename T, typename U>
bool operator==(const AllocatorWithMemoryTracking <T> &, const AllocatorWithMemoryTracking <U> &)
{
return true;
}
template <typename T, typename U>
bool operator!=(const AllocatorWithMemoryTracking <T> &, const AllocatorWithMemoryTracking <U> &)
{
return false;
}

View File

@ -8,6 +8,7 @@
#include <Common/Exception.h>
#include <Common/UInt128.h>
#include <Common/AllocatorWithMemoryTracking.h>
#include <Core/Types.h>
#include <Core/Defines.h>
#include <Core/DecimalFunctions.h>
@ -35,7 +36,7 @@ template <typename T>
using NearestFieldType = typename NearestFieldTypeImpl<T>::Type;
class Field;
using FieldVector = std::vector<Field>;
using FieldVector = std::vector<Field, AllocatorWithMemoryTracking<Field>>;
/// Array and Tuple use the same storage type -- FieldVector, but we declare
/// distinct types for them, so that the caller can choose whether it wants to

View File

@ -41,7 +41,7 @@ void NativeBlockOutputStream::flush()
}
void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit)
static void writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit)
{
/** If there are columns-constants - then we materialize them.
* (Since the data type does not know how to serialize / deserialize constants.)

View File

@ -30,8 +30,6 @@ public:
void write(const Block & block) override;
void flush() override;
static void writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit);
String getContentType() const override { return "application/octet-stream"; }
private:

View File

@ -6,7 +6,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include "Core/ColumnWithTypeAndName.h"
#include <common/arithmeticOverflow.h>
#include <Core/ColumnWithTypeAndName.h>
namespace DB
{
@ -159,27 +161,27 @@ private:
if constexpr (is_str_key)
{
// have to use Field structs to get strings
key = arg.key_column.operator[](offset + j).get<KeyType>();
key = arg.key_column[offset + j].get<KeyType>();
}
else
{
key = assert_cast<const ColumnVector<KeyType> &>(arg.key_column).getData()[offset + j];
}
auto value = arg.val_column.operator[](offset + j).get<ValType>();
ValType value = arg.val_column[offset + j].get<ValType>();
if constexpr (op_type == OpTypes::ADD)
{
const auto [it, inserted] = summing_map.insert({key, value});
if (!inserted)
it->second += value;
it->second = common::addIgnoreOverflow(it->second, value);
}
else
{
static_assert(op_type == OpTypes::SUBTRACT);
const auto [it, inserted] = summing_map.insert({key, first ? value : -value});
const auto [it, inserted] = summing_map.insert({key, first ? value : common::negateIgnoreOverflow(value)});
if (!inserted)
it->second -= value;
it->second = common::subIgnoreOverflow(it->second, value);
}
}

View File

@ -49,7 +49,6 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int SYNTAX_ERROR;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
}
@ -610,8 +609,8 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
node->frame.begin_preceding = false;
if (node->frame.begin_type == WindowFrame::BoundaryType::Unbounded)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Frame start UNBOUNDED FOLLOWING is not implemented");
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Frame start cannot be UNBOUNDED FOLLOWING");
}
}
else
@ -667,8 +666,8 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
node->frame.end_preceding = true;
if (node->frame.end_type == WindowFrame::BoundaryType::Unbounded)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Frame end UNBOUNDED PRECEDING is not implemented");
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Frame end cannot be UNBOUNDED PRECEDING");
}
}
else if (keyword_following.ignore(pos, expected))

View File

@ -80,9 +80,11 @@ namespace DB
}
void ParallelFormattingOutputFormat::collectorThreadFunction()
void ParallelFormattingOutputFormat::collectorThreadFunction(const ThreadGroupStatusPtr & thread_group)
{
setThreadName("Collector");
if (thread_group)
CurrentThread::attachToIfDetached(thread_group);
try
{
@ -135,9 +137,11 @@ namespace DB
}
void ParallelFormattingOutputFormat::formatterThreadFunction(size_t current_unit_number)
void ParallelFormattingOutputFormat::formatterThreadFunction(size_t current_unit_number, const ThreadGroupStatusPtr & thread_group)
{
setThreadName("Formatter");
if (thread_group)
CurrentThread::attachToIfDetached(thread_group);
try
{

View File

@ -76,7 +76,10 @@ public:
/// Just heuristic. We need one thread for collecting, one thread for receiving chunks
/// and n threads for formatting.
processing_units.resize(params.max_threads_for_parallel_formatting + 2);
collector_thread = ThreadFromGlobalPool([&] { collectorThreadFunction(); });
collector_thread = ThreadFromGlobalPool([thread_group = CurrentThread::getGroup(), this]
{
collectorThreadFunction(thread_group);
});
LOG_TRACE(&Poco::Logger::get("ParallelFormattingOutputFormat"), "Parallel formatting is being used");
}
@ -200,14 +203,17 @@ private:
void scheduleFormatterThreadForUnitWithNumber(size_t ticket_number)
{
pool.scheduleOrThrowOnError([this, ticket_number] { formatterThreadFunction(ticket_number); });
pool.scheduleOrThrowOnError([this, thread_group = CurrentThread::getGroup(), ticket_number]
{
formatterThreadFunction(ticket_number, thread_group);
});
}
/// Collects all temporary buffers into main WriteBuffer.
void collectorThreadFunction();
void collectorThreadFunction(const ThreadGroupStatusPtr & thread_group);
/// This function is executed in ThreadPool and the only purpose of it is to format one Chunk into a continuous buffer in memory.
void formatterThreadFunction(size_t current_unit_number);
void formatterThreadFunction(size_t current_unit_number, const ThreadGroupStatusPtr & thread_group);
};
}

View File

@ -66,14 +66,16 @@ void CollapsingSortedAlgorithm::insertRow(RowRef & row)
merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows());
}
void CollapsingSortedAlgorithm::insertRows()
std::optional<Chunk> CollapsingSortedAlgorithm::insertRows()
{
if (count_positive == 0 && count_negative == 0)
{
/// No input rows have been read.
return;
return {};
}
std::optional<Chunk> res;
if (last_is_positive || count_positive != count_negative)
{
if (count_positive <= count_negative && !only_positive_sign)
@ -86,6 +88,9 @@ void CollapsingSortedAlgorithm::insertRows()
if (count_positive >= count_negative)
{
if (merged_data.hasEnoughRows())
res = merged_data.pull();
insertRow(last_positive_row);
if (out_row_sources_buf)
@ -107,10 +112,16 @@ void CollapsingSortedAlgorithm::insertRows()
out_row_sources_buf->write(
reinterpret_cast<const char *>(current_row_sources.data()),
current_row_sources.size() * sizeof(RowSourcePart));
return res;
}
IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge()
{
/// Rare case, which may happen when index_granularity is 1, but we needed to insert 2 rows inside insertRows().
if (merged_data.hasEnoughRows())
return Status(merged_data.pull());
/// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size`
while (queue.isValid())
{
@ -132,15 +143,14 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge()
setRowRef(last_row, current);
bool key_differs = !last_row.hasEqualSortColumnsWith(current_row);
/// if there are enough rows and the last one is calculated completely
if (key_differs && merged_data.hasEnoughRows())
return Status(merged_data.pull());
if (key_differs)
{
/// if there are enough rows and the last one is calculated completely
if (merged_data.hasEnoughRows())
return Status(merged_data.pull());
/// We write data for the previous primary key.
insertRows();
auto res = insertRows();
current_row.swap(last_row);
@ -151,6 +161,12 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge()
first_negative_pos = 0;
last_positive_pos = 0;
current_row_sources.resize(0);
/// Here we can return ready chunk.
/// Next iteration, last_row == current_row, and all the counters are zeroed.
/// So, current_row should be correctly processed.
if (res)
return Status(std::move(*res));
}
/// Initially, skip all rows. On insert, unskip "corner" rows.
@ -194,7 +210,15 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge()
}
}
insertRows();
if (auto res = insertRows())
{
/// Queue is empty, and we have inserted all the rows.
/// Set counter to zero so that insertRows() will return immediately next time.
count_positive = 0;
count_negative = 0;
return Status(std::move(*res));
}
return Status(merged_data.pull(), true);
}

View File

@ -66,7 +66,11 @@ private:
void reportIncorrectData();
void insertRow(RowRef & row);
void insertRows();
/// Insert ready rows into merged_data. We may want to insert 0, 1 or 2 rows.
/// It may happen that 2 rows is going to be inserted and, but merged data has free space only for 1 row.
/// In this case, Chunk with ready is pulled from merged_data before the second insertion.
std::optional<Chunk> insertRows();
};
}

View File

@ -1409,22 +1409,49 @@ void TCPHandler::sendData(const Block & block)
{
initBlockOutput(block);
writeVarUInt(Protocol::Server::Data, *out);
/// Send external table name (empty name is the main table)
writeStringBinary("", *out);
auto prev_bytes_written_out = out->count();
auto prev_bytes_written_compressed_out = state.maybe_compressed_out->count();
/// For testing hedged requests
const Settings & settings = query_context->getSettingsRef();
if (block.rows() > 0 && settings.sleep_in_send_data_ms.totalMilliseconds())
try
{
out->next();
std::chrono::milliseconds ms(settings.sleep_in_send_data_ms.totalMilliseconds());
std::this_thread::sleep_for(ms);
}
writeVarUInt(Protocol::Server::Data, *out);
/// Send external table name (empty name is the main table)
writeStringBinary("", *out);
state.block_out->write(block);
state.maybe_compressed_out->next();
out->next();
/// For testing hedged requests
const Settings & settings = query_context->getSettingsRef();
if (block.rows() > 0 && settings.sleep_in_send_data_ms.totalMilliseconds())
{
out->next();
std::chrono::milliseconds ms(settings.sleep_in_send_data_ms.totalMilliseconds());
std::this_thread::sleep_for(ms);
}
state.block_out->write(block);
state.maybe_compressed_out->next();
out->next();
}
catch (...)
{
/// In case of unsuccessful write, if the buffer with written data was not flushed,
/// we will rollback write to avoid breaking the protocol.
/// (otherwise the client will not be able to receive exception after unfinished data
/// as it will expect the continuation of the data).
/// It looks like hangs on client side or a message like "Data compressed with different methods".
if (state.compression == Protocol::Compression::Enable)
{
auto extra_bytes_written_compressed = state.maybe_compressed_out->count() - prev_bytes_written_compressed_out;
if (state.maybe_compressed_out->offset() >= extra_bytes_written_compressed)
state.maybe_compressed_out->position() -= extra_bytes_written_compressed;
}
auto extra_bytes_written_out = out->count() - prev_bytes_written_out;
if (out->offset() >= extra_bytes_written_out)
out->position() -= extra_bytes_written_out;
throw;
}
}

View File

@ -253,12 +253,20 @@ def test_rabbitmq_csv_with_delimiter(rabbitmq_cluster):
@pytest.mark.timeout(240)
def test_rabbitmq_tsv_with_delimiter(rabbitmq_cluster):
instance.query('''
DROP TABLE IF EXISTS test.view;
DROP TABLE IF EXISTS test.consumer;
CREATE TABLE test.rabbitmq (key UInt64, value UInt64)
ENGINE = RabbitMQ
SETTINGS rabbitmq_host_port = 'rabbitmq1:5672',
rabbitmq_exchange_name = 'tsv',
rabbitmq_format = 'TSV',
rabbitmq_queue_base = 'tsv',
rabbitmq_row_delimiter = '\\n';
CREATE TABLE test.view (key UInt64, value UInt64)
ENGINE = MergeTree()
ORDER BY key;
CREATE MATERIALIZED VIEW test.consumer TO test.view AS
SELECT * FROM test.rabbitmq;
''')
credentials = pika.PlainCredentials('root', 'clickhouse')
@ -272,13 +280,11 @@ def test_rabbitmq_tsv_with_delimiter(rabbitmq_cluster):
for message in messages:
channel.basic_publish(exchange='tsv', routing_key='', body=message)
connection.close()
time.sleep(1)
result = ''
while True:
result += instance.query('SELECT * FROM test.rabbitmq ORDER BY key', ignore_error=True)
result = instance.query('SELECT * FROM test.view ORDER BY key')
if rabbitmq_check_result(result):
break

View File

@ -58,7 +58,7 @@ OPTIMIZE TABLE four_rows_per_granule FINAL;
SELECT COUNT(*) FROM four_rows_per_granule;
SELECT distinct(marks) from system.parts WHERE table = 'four_rows_per_granule' and database=currentDatabase() and active=1;
SELECT sum(marks) from system.parts WHERE table = 'four_rows_per_granule' and database=currentDatabase() and active=1;
INSERT INTO four_rows_per_granule (p, k, v1, v2, Sign) VALUES ('2018-05-15', 1, 1000, 2000, 1), ('2018-05-16', 2, 3000, 4000, 1), ('2018-05-17', 3, 5000, 6000, 1), ('2018-05-18', 4, 7000, 8000, 1);

View File

@ -0,0 +1,4 @@
-8191 8193
-8191 8193
0 2
0 2

View File

@ -0,0 +1,53 @@
DROP TABLE IF EXISTS collapsing_table;
SET optimize_on_insert = 0;
CREATE TABLE collapsing_table
(
key UInt64,
value UInt64,
Sign Int8
)
ENGINE = CollapsingMergeTree(Sign)
ORDER BY key
SETTINGS
vertical_merge_algorithm_min_rows_to_activate=0,
vertical_merge_algorithm_min_columns_to_activate=0,
min_bytes_for_wide_part = 0;
INSERT INTO collapsing_table SELECT if(number == 8192, 8191, number), 1, if(number == 8192, +1, -1) FROM numbers(8193);
SELECT sum(Sign), count() from collapsing_table;
OPTIMIZE TABLE collapsing_table FINAL;
SELECT sum(Sign), count() from collapsing_table;
DROP TABLE IF EXISTS collapsing_table;
DROP TABLE IF EXISTS collapsing_suspicious_granularity;
CREATE TABLE collapsing_suspicious_granularity
(
key UInt64,
value UInt64,
Sign Int8
)
ENGINE = CollapsingMergeTree(Sign)
ORDER BY key
SETTINGS
vertical_merge_algorithm_min_rows_to_activate=0,
vertical_merge_algorithm_min_columns_to_activate=0,
min_bytes_for_wide_part = 0,
index_granularity = 1;
INSERT INTO collapsing_suspicious_granularity VALUES (1, 1, -1) (1, 1, 1);
SELECT sum(Sign), count() from collapsing_suspicious_granularity;
OPTIMIZE TABLE collapsing_suspicious_granularity FINAL;
SELECT sum(Sign), count() from collapsing_suspicious_granularity;
DROP TABLE IF EXISTS collapsing_suspicious_granularity;

View File

@ -0,0 +1 @@
\N (([0,10,255],[-9223372036854775808,1025,0]),[255,NULL]) \N ([0,255],3,[-2]) [NULL]

View File

@ -0,0 +1 @@
SELECT toInt32([toUInt8(NULL)], NULL), (mapSubtract(([toUInt8(256), 10], [toInt32(-9223372036854775808), 1025]), ([toUInt8(65535), 0], [toInt16(0.), -9223372036854775808])), [toUInt8(-1), toInt32(([toUInt8(9223372036854775807), -1], [toInt32(255), 65536]), NULL)]), toUInt8(([2, 9223372036854775807], [toFloat32('0.0000065536'), 2]), 9223372036854775807, NULL), ([toUInt8(1024), 255], toUInt8(3), [toInt16(-2)]), [NULL];

View File

@ -0,0 +1,2 @@
SET max_memory_usage = '500M';
SELECT sumMap([number], [number]) FROM system.numbers_mt; -- { serverError 241 }

View File

@ -0,0 +1 @@
Code: 241

View File

@ -0,0 +1,7 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&max_memory_usage=1G" -d "SELECT range(65535) FROM system.one ARRAY JOIN range(65536) AS number" | grep -oF 'Code: 241'

View File

@ -0,0 +1,2 @@
SET max_memory_usage = '1G';
SELECT range(65535) FROM system.one ARRAY JOIN range(65536) AS number; -- { serverError 241 }

View File

@ -0,0 +1,2 @@
Code: 241
Code: 241

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
$CLICKHOUSE_CLIENT --compress 0 --max_memory_usage 1G --query "SELECT range(65535) FROM system.one ARRAY JOIN range(65536) AS number" 2>&1 | grep -oF 'Code: 241' | head -n1
$CLICKHOUSE_CLIENT --compress 1 --max_memory_usage 1G --query "SELECT range(65535) FROM system.one ARRAY JOIN range(65536) AS number" 2>&1 | grep -oF 'Code: 241' | head -n1

View File

@ -39,6 +39,7 @@ v20.9.5.5-stable 2020-11-13
v20.9.4.76-stable 2020-10-29
v20.9.3.45-stable 2020-10-09
v20.9.2.20-stable 2020-09-22
v20.8.15.11-lts 2021-04-01
v20.8.14.4-lts 2021-03-03
v20.8.13.15-lts 2021-02-20
v20.8.12.2-lts 2021-01-16

1 v21.3.4.25-lts 2021-03-28
39 v20.9.4.76-stable 2020-10-29
40 v20.9.3.45-stable 2020-10-09
41 v20.9.2.20-stable 2020-09-22
42 v20.8.15.11-lts 2021-04-01
43 v20.8.14.4-lts 2021-03-03
44 v20.8.13.15-lts 2021-02-20
45 v20.8.12.2-lts 2021-01-16