From d5bc9b514cdd4e106eb5a92bf0c2c650b333c0a8 Mon Sep 17 00:00:00 2001 From: cnmade Date: Wed, 23 Feb 2022 11:09:35 +0800 Subject: [PATCH 01/50] Translate zh/faq/general/who-use: rename old file --- ...{who-is-using-clickhouse.md => who-is-using-clickhouse.md.bak} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/zh/faq/general/{who-is-using-clickhouse.md => who-is-using-clickhouse.md.bak} (100%) diff --git a/docs/zh/faq/general/who-is-using-clickhouse.md b/docs/zh/faq/general/who-is-using-clickhouse.md.bak similarity index 100% rename from docs/zh/faq/general/who-is-using-clickhouse.md rename to docs/zh/faq/general/who-is-using-clickhouse.md.bak From 65f2b2e8cf984a856c710be32df91f592cb39dff Mon Sep 17 00:00:00 2001 From: cnmade Date: Wed, 23 Feb 2022 11:10:42 +0800 Subject: [PATCH 02/50] Translate zh/faq/general/who-use: reimport file --- .../zh/faq/general/who-is-using-clickhouse.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 docs/zh/faq/general/who-is-using-clickhouse.md diff --git a/docs/zh/faq/general/who-is-using-clickhouse.md b/docs/zh/faq/general/who-is-using-clickhouse.md new file mode 100644 index 00000000000..b7ff867d726 --- /dev/null +++ b/docs/zh/faq/general/who-is-using-clickhouse.md @@ -0,0 +1,19 @@ +--- +title: Who is using ClickHouse? +toc_hidden: true +toc_priority: 9 +--- + +# Who Is Using ClickHouse? {#who-is-using-clickhouse} + +Being an open-source product makes this question not so straightforward to answer. You do not have to tell anyone if you want to start using ClickHouse, you just go grab source code or pre-compiled packages. There’s no contract to sign and the [Apache 2.0 license](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) allows for unconstrained software distribution. + +Also, the technology stack is often in a grey zone of what’s covered by an NDA. Some companies consider technologies they use as a competitive advantage even if they are open-source and do not allow employees to share any details publicly. Some see some PR risks and allow employees to share implementation details only with their PR department approval. + +So how to tell who is using ClickHouse? + +One way is to **ask around**. If it’s not in writing, people are much more willing to share what technologies are used in their companies, what the use cases are, what kind of hardware is used, data volumes, etc. We’re talking with users regularly on [ClickHouse Meetups](https://www.youtube.com/channel/UChtmrD-dsdpspr42P_PyRAw/playlists) all over the world and have heard stories about 1000+ companies that use ClickHouse. Unfortunately, that’s not reproducible and we try to treat such stories as if they were told under NDA to avoid any potential troubles. But you can come to any of our future meetups and talk with other users on your own. There are multiple ways how meetups are announced, for example, you can subscribe to [our Twitter](http://twitter.com/ClickHouseDB/). + +The second way is to look for companies **publicly saying** that they use ClickHouse. It’s more substantial because there’s usually some hard evidence like a blog post, talk video recording, slide deck, etc. We collect the collection of links to such evidence on our **[Adopters](../../introduction/adopters.md)** page. Feel free to contribute the story of your employer or just some links you’ve stumbled upon (but try not to violate your NDA in the process). + +You can find names of very large companies in the adopters list, like Bloomberg, Cisco, China Telecom, Tencent, or Uber, but with the first approach, we found that there are many more. For example, if you take [the list of largest IT companies by Forbes (2020)](https://www.forbes.com/sites/hanktucker/2020/05/13/worlds-largest-technology-companies-2020-apple-stays-on-top-zoom-and-uber-debut/) over half of them are using ClickHouse in some way. Also, it would be unfair not to mention [Yandex](../../introduction/history.md), the company which initially open-sourced ClickHouse in 2016 and happens to be one of the largest IT companies in Europe. From 5f6f89c91e7581a81cd501ff92af4afa379c87d6 Mon Sep 17 00:00:00 2001 From: cnmade Date: Wed, 23 Feb 2022 11:21:53 +0800 Subject: [PATCH 03/50] Translate zh/faq/general/who-use: translate to zh --- docs/zh/faq/general/who-is-using-clickhouse.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/zh/faq/general/who-is-using-clickhouse.md b/docs/zh/faq/general/who-is-using-clickhouse.md index b7ff867d726..3bfd94719d8 100644 --- a/docs/zh/faq/general/who-is-using-clickhouse.md +++ b/docs/zh/faq/general/who-is-using-clickhouse.md @@ -1,19 +1,19 @@ --- -title: Who is using ClickHouse? +title: 谁在使用 ClickHouse? toc_hidden: true toc_priority: 9 --- -# Who Is Using ClickHouse? {#who-is-using-clickhouse} +# 谁在使用 ClickHouse? {#who-is-using-clickhouse} -Being an open-source product makes this question not so straightforward to answer. You do not have to tell anyone if you want to start using ClickHouse, you just go grab source code or pre-compiled packages. There’s no contract to sign and the [Apache 2.0 license](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) allows for unconstrained software distribution. +作为一个开源产品,这个问题的答案并不那么简单。如果你想开始使用ClickHouse,你不需要告诉任何人,你只需要获取源代码或预编译包。不需要签署任何合同,[Apache 2.0许可证](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE)允许不受约束的软件分发。 -Also, the technology stack is often in a grey zone of what’s covered by an NDA. Some companies consider technologies they use as a competitive advantage even if they are open-source and do not allow employees to share any details publicly. Some see some PR risks and allow employees to share implementation details only with their PR department approval. +此外,技术堆栈通常处于保密协议所涵盖的灰色地带。一些公司认为他们使用的技术是一种竞争优势,即使这些技术是开源的,并且不允许员工公开分享任何细节。一些公司看到了一些公关风险,只允许员工在获得公关部门批准后分享实施细节。 -So how to tell who is using ClickHouse? +那么,如何辨别谁在使用ClickHouse呢? -One way is to **ask around**. If it’s not in writing, people are much more willing to share what technologies are used in their companies, what the use cases are, what kind of hardware is used, data volumes, etc. We’re talking with users regularly on [ClickHouse Meetups](https://www.youtube.com/channel/UChtmrD-dsdpspr42P_PyRAw/playlists) all over the world and have heard stories about 1000+ companies that use ClickHouse. Unfortunately, that’s not reproducible and we try to treat such stories as if they were told under NDA to avoid any potential troubles. But you can come to any of our future meetups and talk with other users on your own. There are multiple ways how meetups are announced, for example, you can subscribe to [our Twitter](http://twitter.com/ClickHouseDB/). +一种方法是询问周围的人。如果不是书面形式,人们更愿意分享他们公司使用的技术、用例、使用的硬件类型、数据量等。我们定期在[ClickHouse meetup](https://www.youtube.com/channel/UChtmrD-dsdpspr42P_PyRAw/playlists)上与世界各地的用户进行交流,并听到了大约1000多家使用ClickHouse的公司的故事。不幸的是,这是不可复制的,我们试图把这些故事当作是在保密协议下被告知的,以避免任何潜在的麻烦。但你可以参加我们未来的任何聚会,并与其他用户单独交谈。有多种方式宣布聚会,例如,你可以订阅[我们的Twitter](http://twitter.com/ClickHouseDB/)。 -The second way is to look for companies **publicly saying** that they use ClickHouse. It’s more substantial because there’s usually some hard evidence like a blog post, talk video recording, slide deck, etc. We collect the collection of links to such evidence on our **[Adopters](../../introduction/adopters.md)** page. Feel free to contribute the story of your employer or just some links you’ve stumbled upon (but try not to violate your NDA in the process). +第二种方法是寻找**公开表示**使用ClickHouse的公司。因为通常会有一些确凿的证据,如博客文章、谈话视频录音、幻灯片等。我们在我们的[**Adopters**](../../introduction/adopters.md)页面上收集指向此类证据的链接。你可以随意提供你雇主的故事,或者只是一些你偶然发现的链接(但尽量不要在这个过程中违反保密协议)。 -You can find names of very large companies in the adopters list, like Bloomberg, Cisco, China Telecom, Tencent, or Uber, but with the first approach, we found that there are many more. For example, if you take [the list of largest IT companies by Forbes (2020)](https://www.forbes.com/sites/hanktucker/2020/05/13/worlds-largest-technology-companies-2020-apple-stays-on-top-zoom-and-uber-debut/) over half of them are using ClickHouse in some way. Also, it would be unfair not to mention [Yandex](../../introduction/history.md), the company which initially open-sourced ClickHouse in 2016 and happens to be one of the largest IT companies in Europe. +你可以在采用者名单中找到一些非常大的公司,比如彭博社、思科、中国电信、腾讯或优步,但通过第一种方法,我们发现还有更多。例如,如果你看看《福布斯》[(2020年)列出的最大IT公司名单](https://www.forbes.com/sites/hanktucker/2020/05/13/worlds-largest-technology-companies-2020-apple-stays-on-top-zoom-and-uber-debut/),超过一半的公司都在以某种方式使用ClickHouse。此外,不提[Yandex](../../introduction/history.md)是不公平的,该公司最初于2016年开放ClickHouse,碰巧是欧洲最大的it公司之一。 \ No newline at end of file From 2bf12030dc0a277891dfa4cc9d560a9b518ac448 Mon Sep 17 00:00:00 2001 From: cnmade Date: Wed, 23 Feb 2022 11:22:53 +0800 Subject: [PATCH 04/50] Translate zh/faq/general/who-use: remove bak file --- docs/zh/faq/general/who-is-using-clickhouse.md.bak | 1 - 1 file changed, 1 deletion(-) delete mode 120000 docs/zh/faq/general/who-is-using-clickhouse.md.bak diff --git a/docs/zh/faq/general/who-is-using-clickhouse.md.bak b/docs/zh/faq/general/who-is-using-clickhouse.md.bak deleted file mode 120000 index b4e9782df7e..00000000000 --- a/docs/zh/faq/general/who-is-using-clickhouse.md.bak +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/general/who-is-using-clickhouse.md \ No newline at end of file From b5ab8f97c6f17f8f3b67dbc3db65c08e516a745e Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 23 Feb 2022 14:31:53 +0000 Subject: [PATCH 05/50] Fix untuple condition in IN function --- src/Functions/in.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp index 87e5886b247..469b98ed00d 100644 --- a/src/Functions/in.cpp +++ b/src/Functions/in.cpp @@ -121,7 +121,8 @@ public: auto set = column_set->getData(); auto set_types = set->getDataTypes(); - if (tuple && (set_types.size() != 1 || !set_types[0]->equals(*type_tuple))) + + if (tuple && set_types.size() != 1 && set_types.size() == tuple->tupleSize()) { const auto & tuple_columns = tuple->getColumns(); const DataTypes & tuple_types = type_tuple->getElements(); From 2d0616bd6af7cd76268027f9ce86ac3b0331b146 Mon Sep 17 00:00:00 2001 From: SiderZhang Date: Wed, 23 Feb 2022 23:27:38 +0800 Subject: [PATCH 06/50] add a Zeppelin interpreter for ClickHouse --- docs/en/interfaces/third-party/gui.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 393974c60c4..1d1d3612ce5 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -143,6 +143,10 @@ Features: - Backup and restore. - RBAC. +### Zeppelin-Interpreter-for-ClickHouse {#zeppelin-interpreter-for-clickhouse} + +[Zeppelin-Interpreter-for-ClickHouse](https://github.com/SiderZhang/Zeppelin-Interpreter-for-ClickHouse) is a Zeppelin interpreter for ClickHouse. Compared with JDBC interpreter, it can provide better timeout control for long running queries. + ## Commercial {#commercial} ### DataGrip {#datagrip} From fd03f3c3095c357bf618ce1ad80a8804ee0a1ee7 Mon Sep 17 00:00:00 2001 From: tangjiangling Date: Thu, 24 Feb 2022 00:32:09 +0800 Subject: [PATCH 07/50] Update datetimes range in doc --- docs/en/sql-reference/data-types/date.md | 2 ++ docs/en/sql-reference/data-types/datetime.md | 2 +- docs/en/sql-reference/data-types/datetime64.md | 2 +- docs/ja/sql-reference/data-types/date.md | 2 ++ docs/ja/sql-reference/data-types/datetime.md | 2 +- docs/ja/sql-reference/data-types/datetime64.md | 2 ++ docs/ru/sql-reference/data-types/date.md | 2 ++ docs/ru/sql-reference/data-types/datetime.md | 2 +- docs/ru/sql-reference/data-types/datetime64.md | 2 +- docs/zh/sql-reference/data-types/date.md | 2 ++ docs/zh/sql-reference/data-types/datetime.md | 2 ++ docs/zh/sql-reference/data-types/datetime64.md | 2 ++ 12 files changed, 19 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index bd39de14d47..828f9ee70f5 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -7,6 +7,8 @@ toc_title: Date A date. Stored in two bytes as the number of days since 1970-01-01 (unsigned). Allows storing values from just after the beginning of the Unix Epoch to the upper threshold defined by a constant at the compilation stage (currently, this is until the year 2149, but the final fully-supported year is 2148). +Supported range of values: \[1970-01-01, 2149-06-06\]. + The date value is stored without the time zone. **Example** diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 1f9a79b5444..5daddfb7fd9 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -13,7 +13,7 @@ Syntax: DateTime([timezone]) ``` -Supported range of values: \[1970-01-01 00:00:00, 2105-12-31 23:59:59\]. +Supported range of values: \[1970-01-01 00:00:00, 2106-02-07 06:28:15\]. Resolution: 1 second. diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index bac1a080a80..c0f69f2ba85 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -18,7 +18,7 @@ DateTime64(precision, [timezone]) Internally, stores data as a number of ‘ticks’ since epoch start (1970-01-01 00:00:00 UTC) as Int64. The tick resolution is determined by the precision parameter. Additionally, the `DateTime64` type can store time zone that is the same for the entire column, that affects how the values of the `DateTime64` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01.000’). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. See details in [DateTime](../../sql-reference/data-types/datetime.md). -Supported range from January 1, 1925 till November 11, 2283. +Supported range of values: \[1925-01-01 00:00:00, 2283-11-11 23:59:59.99999999\] (Note: The precision of the maximum value is 8). ## Examples {#examples} diff --git a/docs/ja/sql-reference/data-types/date.md b/docs/ja/sql-reference/data-types/date.md index 47b105627c2..20ce8d524ea 100644 --- a/docs/ja/sql-reference/data-types/date.md +++ b/docs/ja/sql-reference/data-types/date.md @@ -7,6 +7,8 @@ toc_title: "\u65E5\u4ED8" 日付型です。 1970-01-01 からの日数が2バイトの符号なし整数として格納されます。 UNIX時間の開始直後から、変換段階で定数として定義される上限しきい値までの値を格納できます(現在は2106年までですが、一年分を完全にサポートしているのは2105年までです)。 +サポートされる値の範囲: \[1970-01-01, 2149-06-06\]. + 日付値は、タイムゾーンなしで格納されます。 [元の記事](https://clickhouse.com/docs/en/data_types/date/) diff --git a/docs/ja/sql-reference/data-types/datetime.md b/docs/ja/sql-reference/data-types/datetime.md index 50f06cee21c..5db7288634f 100644 --- a/docs/ja/sql-reference/data-types/datetime.md +++ b/docs/ja/sql-reference/data-types/datetime.md @@ -15,7 +15,7 @@ toc_title: DateTime DateTime([timezone]) ``` -サポートされる値の範囲: \[1970-01-01 00:00:00, 2105-12-31 23:59:59\]. +サポートされる値の範囲: \[1970-01-01 00:00:00, 2106-02-07 06:28:15\]. 解像度:1秒. diff --git a/docs/ja/sql-reference/data-types/datetime64.md b/docs/ja/sql-reference/data-types/datetime64.md index 12a31595b70..7d1e560654d 100644 --- a/docs/ja/sql-reference/data-types/datetime64.md +++ b/docs/ja/sql-reference/data-types/datetime64.md @@ -19,6 +19,8 @@ DateTime64(precision, [timezone]) 内部的には、データを ‘ticks’ エポック開始(1970-01-01 00:00:00UTC)以来、Int64として。 目盛りの解像度は、精度パラメータによって決定されます。 さらに、 `DateTime64` 型は、列全体で同じタイムゾーンを格納することができます。 `DateTime64` 型の値はテキスト形式で表示され、文字列として指定された値がどのように解析されるか (‘2020-01-01 05:00:01.000’). タイムゾーンは、テーブルの行(またはresultset)には格納されませんが、列メタデータに格納されます。 詳細はを参照。 [DateTime](datetime.md). +サポートされる値の範囲: \[1925-01-01 00:00:00, 2283-11-11 23:59:59.99999999\] (注)最大値の精度は、8). + ## 例 {#examples} **1.** テーブルの作成 `DateTime64`-列を入力し、そこにデータを挿入する: diff --git a/docs/ru/sql-reference/data-types/date.md b/docs/ru/sql-reference/data-types/date.md index 17b4ec99d9a..7157f0dc4c7 100644 --- a/docs/ru/sql-reference/data-types/date.md +++ b/docs/ru/sql-reference/data-types/date.md @@ -7,6 +7,8 @@ toc_title: Date Дата. Хранится в двух байтах в виде (беззнакового) числа дней, прошедших от 1970-01-01. Позволяет хранить значения от чуть больше, чем начала unix-эпохи до верхнего порога, определяющегося константой на этапе компиляции (сейчас - до 2106 года, последний полностью поддерживаемый год - 2105). +Диапазон значений: \[1970-01-01, 2149-06-06\]. + Дата хранится без учёта часового пояса. **Пример** diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index c9804f57c33..804d590e65d 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -13,7 +13,7 @@ toc_title: DateTime DateTime([timezone]) ``` -Диапазон значений: \[1970-01-01 00:00:00, 2105-12-31 23:59:59\]. +Диапазон значений: \[1970-01-01 00:00:00, 2106-02-07 06:28:15\]. Точность: 1 секунда. diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md index 869543dbbaf..01ee26d3496 100644 --- a/docs/ru/sql-reference/data-types/datetime64.md +++ b/docs/ru/sql-reference/data-types/datetime64.md @@ -18,7 +18,7 @@ DateTime64(precision, [timezone]) Данные хранятся в виде количества ‘тиков’, прошедших с момента начала эпохи (1970-01-01 00:00:00 UTC), в Int64. Размер тика определяется параметром precision. Дополнительно, тип `DateTime64` позволяет хранить часовой пояс, единый для всей колонки, который влияет на то, как будут отображаться значения типа `DateTime64` в текстовом виде и как будут парситься значения заданные в виде строк (‘2020-01-01 05:00:01.000’). Часовой пояс не хранится в строках таблицы (выборки), а хранится в метаданных колонки. Подробнее см. [DateTime](datetime.md). -Поддерживаются значения от 1 января 1925 г. и до 11 ноября 2283 г. +Диапазон значений: \[1925-01-01 00:00:00, 2283-11-11 23:59:59.99999999\] (Примечание: Точность максимального значения составляет 8). ## Примеры {#examples} diff --git a/docs/zh/sql-reference/data-types/date.md b/docs/zh/sql-reference/data-types/date.md index 8f1e0752179..ab5d3acae1b 100644 --- a/docs/zh/sql-reference/data-types/date.md +++ b/docs/zh/sql-reference/data-types/date.md @@ -2,4 +2,6 @@ 日期类型,用两个字节存储,表示从 1970-01-01 (无符号) 到当前的日期值。允许存储从 Unix 纪元开始到编译阶段定义的上限阈值常量(目前上限是2106年,但最终完全支持的年份为2105)。最小值输出为1970-01-01。 +值的范围: \[1970-01-01, 2149-06-06\]。 + 日期中没有存储时区信息。 diff --git a/docs/zh/sql-reference/data-types/datetime.md b/docs/zh/sql-reference/data-types/datetime.md index 0b3a7524f63..b6c8c3d2d35 100644 --- a/docs/zh/sql-reference/data-types/datetime.md +++ b/docs/zh/sql-reference/data-types/datetime.md @@ -2,6 +2,8 @@ 时间戳类型。用四个字节(无符号的)存储 Unix 时间戳)。允许存储与日期类型相同的范围内的值。最小值为 1970-01-01 00:00:00。时间戳类型值精确到秒(没有闰秒)。 +值的范围: \[1970-01-01 00:00:00, 2106-02-07 06:28:15\]。 + ## 时区 {#shi-qu} 使用启动客户端或服务器时的系统时区,时间戳是从文本(分解为组件)转换为二进制并返回。在文本格式中,有关夏令时的信息会丢失。 diff --git a/docs/zh/sql-reference/data-types/datetime64.md b/docs/zh/sql-reference/data-types/datetime64.md index 46e8e9a5fa4..4a112275259 100644 --- a/docs/zh/sql-reference/data-types/datetime64.md +++ b/docs/zh/sql-reference/data-types/datetime64.md @@ -19,6 +19,8 @@ DateTime64(precision, [timezone]) 在内部,此类型以Int64类型将数据存储为自Linux纪元开始(1970-01-01 00:00:00UTC)的时间刻度数(ticks)。时间刻度的分辨率由precision参数确定。此外,`DateTime64` 类型可以像存储其他数据列一样存储时区信息,时区会影响 `DateTime64` 类型的值如何以文本格式显示,以及如何解析以字符串形式指定的时间数据 (‘2020-01-01 05:00:01.000’)。时区不存储在表的行中(也不在resultset中),而是存储在列的元数据中。详细信息请参考 [DateTime](datetime.md) 数据类型. +值的范围: \[1925-01-01 00:00:00, 2283-11-11 23:59:59.99999999\] (注意: 最大值的精度是8)。 + ## 示例 {#examples} **1.** 创建一个具有 `DateTime64` 类型列的表,并向其中插入数据: From 606be191b0068c67450e8497a64aacabef90660e Mon Sep 17 00:00:00 2001 From: tangjiangling Date: Thu, 24 Feb 2022 01:12:51 +0800 Subject: [PATCH 08/50] Empty commit From ffae1345461a82d4fb23e310a6b6f291c791ba8f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 23 Feb 2022 21:10:27 +0100 Subject: [PATCH 09/50] Update gui.md --- docs/en/interfaces/third-party/gui.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 1d1d3612ce5..c0e270b7207 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -145,7 +145,7 @@ Features: ### Zeppelin-Interpreter-for-ClickHouse {#zeppelin-interpreter-for-clickhouse} -[Zeppelin-Interpreter-for-ClickHouse](https://github.com/SiderZhang/Zeppelin-Interpreter-for-ClickHouse) is a Zeppelin interpreter for ClickHouse. Compared with JDBC interpreter, it can provide better timeout control for long running queries. +[Zeppelin-Interpreter-for-ClickHouse](https://github.com/SiderZhang/Zeppelin-Interpreter-for-ClickHouse) is a [Zeppelin](https://zeppelin.apache.org) interpreter for ClickHouse. Compared with JDBC interpreter, it can provide better timeout control for long running queries. ## Commercial {#commercial} From 3deef45e068162c0124de179dccefa8c211af928 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 23 Feb 2022 21:41:57 +0100 Subject: [PATCH 10/50] Add a blog post about version 22.2 --- .../blog/en/2022/clickhouse-v22.2-released.md | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 website/blog/en/2022/clickhouse-v22.2-released.md diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md new file mode 100644 index 00000000000..9abdf6d5b43 --- /dev/null +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -0,0 +1,90 @@ +--- +title: 'ClickHouse 22.2 Released' +image: 'https://blog-images.clickhouse.com/en/2022/clickhouse-v22-2/featured.jpg' +date: '2022-02-23' +author: 'Alexey Milovidov' +tags: ['company', 'community'] +--- + +We prepared a new ClickHouse release 22.2, so it's nice if you have tried it on 2022-02-22. If not, you can try it today. + +The 22.2 release includes 2140 new commits from 118 contributors, including 41 new contributors: + +> Aaron Katz, Andre Marianiello, Andrew, Andrii Buriachevskyi, Brian Hunter, CoolT2, Federico Rodriguez, Filippov Denis, Gaurav Kumar, Geoff Genz, HarryLeeIBM, Heena Bansal, ILya Limarenko, Igor Nikonov, IlyaTsoi, Jake Liu, JaySon-Huang, Lemore, Leonid Krylov, Michail Safronov, Mikhail Fursov, Nikita, RogerYK, Roy Bellingan, Saad Ur Rahman, W, Yakov Olkhovskiy, alexeypavlenko, cnmade, grantovsky, hanqf-git, liuneng1994, mlkui, s-kat, tesw yew isal, vahid-sohrabloo, yakov-olkhovskiy, zhifeng, zkun, zxealous, 박동철. + +Let me tell what is the most interesting in 22.2. + +## Projections are production ready + +Projections allow you to have multiple data representations in the same table. For example, you can have data aggregations along with the raw data. There are no restrictions on what aggregate functions can be used - you can have count distinct, quantiles and whatever you want. You can have data in multiple different sorting orders. ClickHouse will automatically select the most suitable projection for your query, so the query will be automatically optimized. + +Projections are somewhat similar to Materialized Views, which also allows you to have incremental aggregation and multiple sorting orders. But unlike Materialized Views, projections are updated atomically and consistently with the main table. The data for projections is being stored in the same "data parts" of the table and is being merged in the same way as the main data. + +The feature is developed by **Amos Bird**, a prominent ClickHouse contributor. The [prototype](https://github.com/ClickHouse/ClickHouse/pull/20202) has been available since Feb 2021, it has been merged in the main codebase by **Nikolai Kochetov** in May 2021 under experimental flag, and after 21 follow-up pull requests we ensured that it passed the full set of test suites and enabled it by default. + +Read an example of how to optimize queries with projections [in our docs](https://clickhouse.com/docs/en/getting-started/example-datasets/uk-price-paid/#speedup-with-projections). + +## Control of file creation and rewriting on data export + +When you export your data with `INSERT INTO TABLE FUNCTION` statement into `file`, `s3` or `hdfs` and the target file already exists, now you can control how to deal with it: you can append new data into the file if it is possible or to rewrite it with a new data, or create another file with similar name like 'data.1.parquet.gz'. + +Some storage systems like `s3` and some formats like `Parquet` don't support data appending. In previous ClickHouse versions, if you insert multiple times into a file with Parquet data format, you will end up with a file that is not recognized by other systems. Now you can choose between throwing exceptions on subsequent inserts or creating more files. + +So, the new settings were introduced: `s3_truncate_on_insert`, `s3_create_new_file_on_insert`, `hdfs_truncate_on_insert`, `hdfs_create_new_file_on_insert`, `engine_file_allow_create_multiple_files`. + +This feature [is developed](https://github.com/ClickHouse/ClickHouse/pull/33302) by **Pavel Kruglov**. + +## Custom deduplication token + +`ReplicatedMergeTree` and `MergeTree` types of tables implement block-level deduplication. When a block of data is inserted, it's cryptographic hash is calculated and if the same block was already inserted before, then the duplicate is skipped and insert query succeeds. This makes it possible to implement exactly-once semantics for inserts. + +In ClickHouse version 22.2 you can provide your own deduplication token instead of an automatically calculated hash. This makes sense if you already have batch identifiers from some other system and you want to reuse them. It also makes sense when blocks can be identical but they should actually be inserted multiple times. Or the opposite - when blocks contain some random data and you want to deduplicate only by significant columns. + +This is implemented by adding a setting `insert_deduplication_token`. The feature contributed by **Igor Nikonov**. + +## DEFAULT keyword for INSERT + +A small addition for SQL compatibility - now we allow using `DEFAULT` keyword instead of a value in `INSERT INTO ... VALUES` statement. It looks like this: + +`INSERT INTO test VALUES (1, 'Hello', DEFAULT)` + +Thanks for this feature to **Andrii Buriachevskyi**. + +## EPHEMERAL columns + +A column in a table can have `DEFAULT` expression like `c INT DEFAULT a + b`. In ClickHouse you can also use `MATERIALIZED` instead of `DEFAULT` if you want the column to be always calculated with the provided expression instead of allowing a user to insert data. And you can use `ALIAS` if you don't want the column to be stored at all but instead to be calculated on the fly if referenced. + +Since version 22.2 a new type of column is added: `EPHEMERAL` column. The user can insert data into this column but the column is not stored in a table, it's ephemeral. The purpose of this column is to provide data to calculate other columns that can reference it with `DEFAULT` or `MATERIALIZED` expressions. + +This feature is made by **Yakov Olkhovskiy**. + +## Improvements for multi-disk configuration + +You can configure multiple disks to store ClickHouse data instead of managing RAID and ClickHouse will automatically manage the data placement. + +Since version 22.2 ClickHouse can automatically repair broken disks without server restart by downloading the missing parts from replicas and placing them on the healthy disks. + +This feature is implemented by **Amos Bird** and is already being used for more than 1.5 years in production in KuaiShou. + +Another improvement is the option to specify TTL MOVE TO DISK/VOLUME **IF EXISTS**. It allows replicas with non-uniform disk configuration and to have one replica to move old data to cold storage while another replica will have all the data on hot storage. Data will be moved only on replicas that have the specified disk or volume, hence *if exists*. This is developed by **Anton Popov**. + +## Flexible memory limits + +We split per-query and per-user memory limits to a pair of hard and soft limits. The settings `max_memory_usage` and `max_memory_usage_for_user` act as hard limits. When memory consumption is approaching the hard limit, an exception will be thrown. Two other settings: `max_guaranteed_memory_usage` and `max_guaranteed_memory_usage_for_user` act like soft limits. + +A query will be allowed to use more memory than a soft limit if there is available memory. But if there will be memory shortage (with relative to per user hard limit or total per server memory consumption), we calculate the "overcommit ratio" - how more memory every query is consumed in relative to the soft limit, and we will kill the most overcommitted query to let other queries to run. + +In short, your query will not be limited to a few gigabytes of RAM if you have hundreds gigabytes available. + +This experimental feature is implemented by **Dmitry Novik** and is continuing to be developed. + +## Shell-style comments in SQL + +Now we allow comments starting with `# ` or `#!`, similarly to MySQL. The variant with `#!` allows using shell scripts with "shebang" interpreted by `clickhouse-local`. + +This feature is contributed by **Aaron Katz**. Very nice. + + +## ... And Many More + +Read the [full changelog](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) for the 22.2 release and follow [the roadmap](https://github.com/ClickHouse/ClickHouse/issues/32513). From a66a217b3fbcb29e82fd5f3940186f88385a7227 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 23 Feb 2022 21:46:03 +0100 Subject: [PATCH 11/50] Add a blog post about version 22.2 --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 9abdf6d5b43..ea4b6d73f1e 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -87,4 +87,6 @@ This feature is contributed by **Aaron Katz**. Very nice. ## ... And Many More +Maxim Kita, Danila Kutenin, Anton Popov, zhanglistar, Federico Rodriguez, Raúl Marín, Amos Bird and Alexey Milovidov have contributed a ton of performance optimizations for this release. We are obsessed with high performance, as usual :) + Read the [full changelog](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) for the 22.2 release and follow [the roadmap](https://github.com/ClickHouse/ClickHouse/issues/32513). From a736c58f87b98980183b2cbc25701063dfb2bf20 Mon Sep 17 00:00:00 2001 From: Rich Raposa Date: Wed, 23 Feb 2022 14:56:13 -0700 Subject: [PATCH 12/50] Update clickhouse-v22.2-released.md This looks great. I just proposed some grammatical changes. --- .../blog/en/2022/clickhouse-v22.2-released.md | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index ea4b6d73f1e..3987b3b36b3 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -6,57 +6,55 @@ author: 'Alexey Milovidov' tags: ['company', 'community'] --- -We prepared a new ClickHouse release 22.2, so it's nice if you have tried it on 2022-02-22. If not, you can try it today. - -The 22.2 release includes 2140 new commits from 118 contributors, including 41 new contributors: +On 2022-02-22, we released version 22.2 of ClickHouse! (That's a lot of 2's!) This latest release includes 2,140 new commits from 118 contributors, including 41 new contributors: > Aaron Katz, Andre Marianiello, Andrew, Andrii Buriachevskyi, Brian Hunter, CoolT2, Federico Rodriguez, Filippov Denis, Gaurav Kumar, Geoff Genz, HarryLeeIBM, Heena Bansal, ILya Limarenko, Igor Nikonov, IlyaTsoi, Jake Liu, JaySon-Huang, Lemore, Leonid Krylov, Michail Safronov, Mikhail Fursov, Nikita, RogerYK, Roy Bellingan, Saad Ur Rahman, W, Yakov Olkhovskiy, alexeypavlenko, cnmade, grantovsky, hanqf-git, liuneng1994, mlkui, s-kat, tesw yew isal, vahid-sohrabloo, yakov-olkhovskiy, zhifeng, zkun, zxealous, 박동철. -Let me tell what is the most interesting in 22.2. +Let me tell what is most interesting in 22.2... ## Projections are production ready -Projections allow you to have multiple data representations in the same table. For example, you can have data aggregations along with the raw data. There are no restrictions on what aggregate functions can be used - you can have count distinct, quantiles and whatever you want. You can have data in multiple different sorting orders. ClickHouse will automatically select the most suitable projection for your query, so the query will be automatically optimized. +Projections allow you to have multiple data representations in the same table. For example, you can have data aggregations along with the raw data. There are no restrictions on which aggregate functions can be used - you can have count distinct, quantiles, or whatever you want. You can have data in multiple different sorting orders. ClickHouse will automatically select the most suitable projection for your query, so the query will be automatically optimized. -Projections are somewhat similar to Materialized Views, which also allows you to have incremental aggregation and multiple sorting orders. But unlike Materialized Views, projections are updated atomically and consistently with the main table. The data for projections is being stored in the same "data parts" of the table and is being merged in the same way as the main data. +Projections are somewhat similar to Materialized Views, which also allow you to have incremental aggregation and multiple sorting orders. But unlike Materialized Views, projections are updated atomically and consistently with the main table. The data for projections is being stored in the same "data parts" of the table and is being merged in the same way as the main data. -The feature is developed by **Amos Bird**, a prominent ClickHouse contributor. The [prototype](https://github.com/ClickHouse/ClickHouse/pull/20202) has been available since Feb 2021, it has been merged in the main codebase by **Nikolai Kochetov** in May 2021 under experimental flag, and after 21 follow-up pull requests we ensured that it passed the full set of test suites and enabled it by default. +The feature was developed by **Amos Bird**, a prominent ClickHouse contributor. The [prototype](https://github.com/ClickHouse/ClickHouse/pull/20202) has been available since Feb 2021, it has been merged in the main codebase by **Nikolai Kochetov** in May 2021 under experimental flag, and after 21 follow-up pull requests we ensured that it passed the full set of test suites and enabled it by default. Read an example of how to optimize queries with projections [in our docs](https://clickhouse.com/docs/en/getting-started/example-datasets/uk-price-paid/#speedup-with-projections). ## Control of file creation and rewriting on data export -When you export your data with `INSERT INTO TABLE FUNCTION` statement into `file`, `s3` or `hdfs` and the target file already exists, now you can control how to deal with it: you can append new data into the file if it is possible or to rewrite it with a new data, or create another file with similar name like 'data.1.parquet.gz'. +When you export your data with an `INSERT INTO TABLE FUNCTION` statement into `file`, `s3` or `hdfs` and the target file already exists, now you can control how to deal with it: you can append new data into the file if it is possible, or rewrite it with a new data, or create another file with a similar name like 'data.1.parquet.gz'. Some storage systems like `s3` and some formats like `Parquet` don't support data appending. In previous ClickHouse versions, if you insert multiple times into a file with Parquet data format, you will end up with a file that is not recognized by other systems. Now you can choose between throwing exceptions on subsequent inserts or creating more files. -So, the new settings were introduced: `s3_truncate_on_insert`, `s3_create_new_file_on_insert`, `hdfs_truncate_on_insert`, `hdfs_create_new_file_on_insert`, `engine_file_allow_create_multiple_files`. +So, new settings were introduced: `s3_truncate_on_insert`, `s3_create_new_file_on_insert`, `hdfs_truncate_on_insert`, `hdfs_create_new_file_on_insert`, `engine_file_allow_create_multiple_files`. -This feature [is developed](https://github.com/ClickHouse/ClickHouse/pull/33302) by **Pavel Kruglov**. +This feature [was developed](https://github.com/ClickHouse/ClickHouse/pull/33302) by **Pavel Kruglov**. ## Custom deduplication token -`ReplicatedMergeTree` and `MergeTree` types of tables implement block-level deduplication. When a block of data is inserted, it's cryptographic hash is calculated and if the same block was already inserted before, then the duplicate is skipped and insert query succeeds. This makes it possible to implement exactly-once semantics for inserts. +`ReplicatedMergeTree` and `MergeTree` types of tables implement block-level deduplication. When a block of data is inserted, it's cryptographic hash is calculated and if the same block was already inserted before, then the duplicate is skipped and the insert query succeeds. This makes it possible to implement exactly-once semantics for inserts. In ClickHouse version 22.2 you can provide your own deduplication token instead of an automatically calculated hash. This makes sense if you already have batch identifiers from some other system and you want to reuse them. It also makes sense when blocks can be identical but they should actually be inserted multiple times. Or the opposite - when blocks contain some random data and you want to deduplicate only by significant columns. -This is implemented by adding a setting `insert_deduplication_token`. The feature contributed by **Igor Nikonov**. +This is implemented by adding the setting `insert_deduplication_token`. The feature was contributed by **Igor Nikonov**. ## DEFAULT keyword for INSERT -A small addition for SQL compatibility - now we allow using `DEFAULT` keyword instead of a value in `INSERT INTO ... VALUES` statement. It looks like this: +A small addition for SQL compatibility - now we allow using the `DEFAULT` keyword instead of a value in `INSERT INTO ... VALUES` statement. It looks like this: `INSERT INTO test VALUES (1, 'Hello', DEFAULT)` -Thanks for this feature to **Andrii Buriachevskyi**. +Thanks to **Andrii Buriachevskyi** for this feature. ## EPHEMERAL columns -A column in a table can have `DEFAULT` expression like `c INT DEFAULT a + b`. In ClickHouse you can also use `MATERIALIZED` instead of `DEFAULT` if you want the column to be always calculated with the provided expression instead of allowing a user to insert data. And you can use `ALIAS` if you don't want the column to be stored at all but instead to be calculated on the fly if referenced. +A column in a table can have a `DEFAULT` expression like `c INT DEFAULT a + b`. In ClickHouse you can also use `MATERIALIZED` instead of `DEFAULT` if you want the column to be always calculated with the provided expression instead of allowing a user to insert data. And you can use `ALIAS` if you don't want the column to be stored at all but instead to be calculated on the fly if referenced. Since version 22.2 a new type of column is added: `EPHEMERAL` column. The user can insert data into this column but the column is not stored in a table, it's ephemeral. The purpose of this column is to provide data to calculate other columns that can reference it with `DEFAULT` or `MATERIALIZED` expressions. -This feature is made by **Yakov Olkhovskiy**. +This feature was made by **Yakov Olkhovskiy**. ## Improvements for multi-disk configuration @@ -66,7 +64,7 @@ Since version 22.2 ClickHouse can automatically repair broken disks without serv This feature is implemented by **Amos Bird** and is already being used for more than 1.5 years in production in KuaiShou. -Another improvement is the option to specify TTL MOVE TO DISK/VOLUME **IF EXISTS**. It allows replicas with non-uniform disk configuration and to have one replica to move old data to cold storage while another replica will have all the data on hot storage. Data will be moved only on replicas that have the specified disk or volume, hence *if exists*. This is developed by **Anton Popov**. +Another improvement is the option to specify TTL MOVE TO DISK/VOLUME **IF EXISTS**. It allows replicas with non-uniform disk configuration and to have one replica to move old data to cold storage while another replica has all the data on hot storage. Data will be moved only on replicas that have the specified disk or volume, hence *if exists*. This was developed by **Anton Popov**. ## Flexible memory limits @@ -82,11 +80,11 @@ This experimental feature is implemented by **Dmitry Novik** and is continuing t Now we allow comments starting with `# ` or `#!`, similarly to MySQL. The variant with `#!` allows using shell scripts with "shebang" interpreted by `clickhouse-local`. -This feature is contributed by **Aaron Katz**. Very nice. +This feature was contributed by **Aaron Katz**. Very nice. -## ... And Many More +## And many more... -Maxim Kita, Danila Kutenin, Anton Popov, zhanglistar, Federico Rodriguez, Raúl Marín, Amos Bird and Alexey Milovidov have contributed a ton of performance optimizations for this release. We are obsessed with high performance, as usual :) +Maxim Kita, Danila Kutenin, Anton Popov, zhanglistar, Federico Rodriguez, Raúl Marín, Amos Bird and Alexey Milovidov have contributed a ton of performance optimizations for this release. We are obsessed with high performance, as usual. :) Read the [full changelog](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) for the 22.2 release and follow [the roadmap](https://github.com/ClickHouse/ClickHouse/issues/32513). From 8a5ac182f0fda7936aa05f4eafabd7ab045f7e54 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 23 Feb 2022 18:32:37 -0500 Subject: [PATCH 13/50] Update array-join.md --- .../statements/select/array-join.md | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index b4d99aaf6b2..eb5ac3a91e8 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -127,7 +127,7 @@ ARRAY JOIN [1, 2, 3] AS arr_external; └─────────────┴──────────────┘ ``` -Multiple arrays can be comma-separated in the `ARRAY JOIN` clause. In this case, `JOIN` is performed with them simultaneously (the direct sum, not the cartesian product). Note that all the arrays must have the same size. Example: +Multiple arrays can be comma-separated in the `ARRAY JOIN` clause. In this case, `JOIN` is performed with them simultaneously (the direct sum, not the cartesian product). Note that all the arrays must have the same size by default. Example: ``` sql SELECT s, arr, a, num, mapped @@ -162,6 +162,25 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num; │ World │ [3,4,5] │ 5 │ 3 │ [1,2,3] │ └───────┴─────────┴───┴─────┴─────────────────────┘ ``` +Multiple arrays with different size can be joined by using: `SETTINGS enable_unaligned_array_join = 1`. Example: + +```sql +SELECT s, arr, a, b +FROM arrays_test ARRAY JOIN arr as a, [['a','b'],['c']] as b +SETTINGS enable_unaligned_array_join = 1; +``` + +```text +┌─s───────┬─arr─────┬─a─┬─b─────────┐ +│ Hello │ [1,2] │ 1 │ ['a','b'] │ +│ Hello │ [1,2] │ 2 │ ['c'] │ +│ World │ [3,4,5] │ 3 │ ['a','b'] │ +│ World │ [3,4,5] │ 4 │ ['c'] │ +│ World │ [3,4,5] │ 5 │ [] │ +│ Goodbye │ [] │ 0 │ ['a','b'] │ +│ Goodbye │ [] │ 0 │ ['c'] │ +└─────────┴─────────┴───┴───────────┘ +``` ## ARRAY JOIN with Nested Data Structure {#array-join-with-nested-data-structure} From 84a7c404396b7285e275a6873ee6e839f533aa05 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 02:35:58 +0300 Subject: [PATCH 14/50] Update clickhouse-v22.2-released.md --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 3987b3b36b3..0d9d91d6432 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -34,7 +34,7 @@ This feature [was developed](https://github.com/ClickHouse/ClickHouse/pull/33302 ## Custom deduplication token -`ReplicatedMergeTree` and `MergeTree` types of tables implement block-level deduplication. When a block of data is inserted, it's cryptographic hash is calculated and if the same block was already inserted before, then the duplicate is skipped and the insert query succeeds. This makes it possible to implement exactly-once semantics for inserts. +`ReplicatedMergeTree` and `MergeTree` types of tables implement block-level deduplication. When a block of data is inserted, its cryptographic hash is calculated and if the same block was already inserted before, then the duplicate is skipped and the insert query succeeds. This makes it possible to implement exactly-once semantics for inserts. In ClickHouse version 22.2 you can provide your own deduplication token instead of an automatically calculated hash. This makes sense if you already have batch identifiers from some other system and you want to reuse them. It also makes sense when blocks can be identical but they should actually be inserted multiple times. Or the opposite - when blocks contain some random data and you want to deduplicate only by significant columns. From ea9ef40c95a9b2943b35e5c2115b79369780a0e1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 02:37:07 +0300 Subject: [PATCH 15/50] Update clickhouse-v22.2-released.md --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 0d9d91d6432..9493ca69b61 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -6,7 +6,7 @@ author: 'Alexey Milovidov' tags: ['company', 'community'] --- -On 2022-02-22, we released version 22.2 of ClickHouse! (That's a lot of 2's!) This latest release includes 2,140 new commits from 118 contributors, including 41 new contributors: +We prepared a new ClickHouse release 22.2, so it's nice if you have tried it on 2022-02-22. If not, you can try it today. This latest release includes 2,140 new commits from 118 contributors, including 41 new contributors: > Aaron Katz, Andre Marianiello, Andrew, Andrii Buriachevskyi, Brian Hunter, CoolT2, Federico Rodriguez, Filippov Denis, Gaurav Kumar, Geoff Genz, HarryLeeIBM, Heena Bansal, ILya Limarenko, Igor Nikonov, IlyaTsoi, Jake Liu, JaySon-Huang, Lemore, Leonid Krylov, Michail Safronov, Mikhail Fursov, Nikita, RogerYK, Roy Bellingan, Saad Ur Rahman, W, Yakov Olkhovskiy, alexeypavlenko, cnmade, grantovsky, hanqf-git, liuneng1994, mlkui, s-kat, tesw yew isal, vahid-sohrabloo, yakov-olkhovskiy, zhifeng, zkun, zxealous, 박동철. From 838d931137f0c8aa0867b26f757d55cb3e3f4e51 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 23 Feb 2022 18:37:13 -0500 Subject: [PATCH 16/50] Update array-join.md --- docs/en/sql-reference/statements/select/array-join.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index eb5ac3a91e8..f138bcc45c7 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -162,7 +162,7 @@ ARRAY JOIN arr AS a, arrayEnumerate(arr) AS num; │ World │ [3,4,5] │ 5 │ 3 │ [1,2,3] │ └───────┴─────────┴───┴─────┴─────────────────────┘ ``` -Multiple arrays with different size can be joined by using: `SETTINGS enable_unaligned_array_join = 1`. Example: +Multiple arrays with different sizes can be joined by using: `SETTINGS enable_unaligned_array_join = 1`. Example: ```sql SELECT s, arr, a, b From 8c42d16c95efef2b3d31f1abef3c3de70ad36b4e Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 23 Feb 2022 14:37:15 +0000 Subject: [PATCH 17/50] Add test in_untuple_issue_34810 --- .../02226_in_untuple_issue_34810.reference | 1 + .../0_stateless/02226_in_untuple_issue_34810.sql | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 tests/queries/0_stateless/02226_in_untuple_issue_34810.reference create mode 100644 tests/queries/0_stateless/02226_in_untuple_issue_34810.sql diff --git a/tests/queries/0_stateless/02226_in_untuple_issue_34810.reference b/tests/queries/0_stateless/02226_in_untuple_issue_34810.reference new file mode 100644 index 00000000000..84cc85e3715 --- /dev/null +++ b/tests/queries/0_stateless/02226_in_untuple_issue_34810.reference @@ -0,0 +1 @@ +2001 2 diff --git a/tests/queries/0_stateless/02226_in_untuple_issue_34810.sql b/tests/queries/0_stateless/02226_in_untuple_issue_34810.sql new file mode 100644 index 00000000000..a313d526e9d --- /dev/null +++ b/tests/queries/0_stateless/02226_in_untuple_issue_34810.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS calendar; +DROP TABLE IF EXISTS events32; + +CREATE TABLE calendar ( `year` Int64, `month` Int64 ) ENGINE = TinyLog; +INSERT INTO calendar VALUES (2000, 1), (2001, 2), (2000, 3); + +CREATE TABLE events32 ( `year` Int32, `month` Int32 ) ENGINE = TinyLog; +INSERT INTO events32 VALUES (2001, 2), (2001, 3); + +SELECT * FROM calendar WHERE (year, month) IN ( SELECT (year, month) FROM events32 ); + +DROP TABLE IF EXISTS calendar; +DROP TABLE IF EXISTS events32; From b792812b61c10911055e5597b9edfec19049fe43 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 20:45:44 +0300 Subject: [PATCH 18/50] Update website/blog/en/2022/clickhouse-v22.2-released.md Co-authored-by: Christoph Wurm --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 9493ca69b61..95b83880d0e 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -10,7 +10,7 @@ We prepared a new ClickHouse release 22.2, so it's nice if you have tried it on > Aaron Katz, Andre Marianiello, Andrew, Andrii Buriachevskyi, Brian Hunter, CoolT2, Federico Rodriguez, Filippov Denis, Gaurav Kumar, Geoff Genz, HarryLeeIBM, Heena Bansal, ILya Limarenko, Igor Nikonov, IlyaTsoi, Jake Liu, JaySon-Huang, Lemore, Leonid Krylov, Michail Safronov, Mikhail Fursov, Nikita, RogerYK, Roy Bellingan, Saad Ur Rahman, W, Yakov Olkhovskiy, alexeypavlenko, cnmade, grantovsky, hanqf-git, liuneng1994, mlkui, s-kat, tesw yew isal, vahid-sohrabloo, yakov-olkhovskiy, zhifeng, zkun, zxealous, 박동철. -Let me tell what is most interesting in 22.2... +Let me tell you what is most interesting in 22.2... ## Projections are production ready From e77d907cc054c776939f315318f454429f77599d Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 20:46:04 +0300 Subject: [PATCH 19/50] Update website/blog/en/2022/clickhouse-v22.2-released.md Co-authored-by: Christoph Wurm --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 95b83880d0e..511412397cc 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -24,7 +24,7 @@ Read an example of how to optimize queries with projections [in our docs](https: ## Control of file creation and rewriting on data export -When you export your data with an `INSERT INTO TABLE FUNCTION` statement into `file`, `s3` or `hdfs` and the target file already exists, now you can control how to deal with it: you can append new data into the file if it is possible, or rewrite it with a new data, or create another file with a similar name like 'data.1.parquet.gz'. +When you export your data with an `INSERT INTO TABLE FUNCTION` statement into `file`, `s3` or `hdfs` and the target file already exists, you can now control how to deal with it: you can append new data into the file if it is possible, rewrite it with new data, or create another file with a similar name like 'data.1.parquet.gz'. Some storage systems like `s3` and some formats like `Parquet` don't support data appending. In previous ClickHouse versions, if you insert multiple times into a file with Parquet data format, you will end up with a file that is not recognized by other systems. Now you can choose between throwing exceptions on subsequent inserts or creating more files. From fecf0ad086a0c1cc3d6bf6d29a3d60f23e2e68ac Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 20:46:14 +0300 Subject: [PATCH 20/50] Update website/blog/en/2022/clickhouse-v22.2-released.md Co-authored-by: Christoph Wurm --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 511412397cc..4fb53f29fde 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -62,7 +62,7 @@ You can configure multiple disks to store ClickHouse data instead of managing RA Since version 22.2 ClickHouse can automatically repair broken disks without server restart by downloading the missing parts from replicas and placing them on the healthy disks. -This feature is implemented by **Amos Bird** and is already being used for more than 1.5 years in production in KuaiShou. +This feature was implemented by **Amos Bird** and is already being used for more than 1.5 years in production at Kuaishou. Another improvement is the option to specify TTL MOVE TO DISK/VOLUME **IF EXISTS**. It allows replicas with non-uniform disk configuration and to have one replica to move old data to cold storage while another replica has all the data on hot storage. Data will be moved only on replicas that have the specified disk or volume, hence *if exists*. This was developed by **Anton Popov**. From 8c38c51d3391c20ba5676dc247f9e32f61601f96 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 20:46:57 +0300 Subject: [PATCH 21/50] Update website/blog/en/2022/clickhouse-v22.2-released.md Co-authored-by: Christoph Wurm --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 4fb53f29fde..44c6b18d9a6 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -68,7 +68,7 @@ Another improvement is the option to specify TTL MOVE TO DISK/VOLUME **IF EXISTS ## Flexible memory limits -We split per-query and per-user memory limits to a pair of hard and soft limits. The settings `max_memory_usage` and `max_memory_usage_for_user` act as hard limits. When memory consumption is approaching the hard limit, an exception will be thrown. Two other settings: `max_guaranteed_memory_usage` and `max_guaranteed_memory_usage_for_user` act like soft limits. +We split per-query and per-user memory limits into a pair of hard and soft limits. The settings `max_memory_usage` and `max_memory_usage_for_user` act as hard limits. When memory consumption is approaching the hard limit, an exception will be thrown. Two other settings: `max_guaranteed_memory_usage` and `max_guaranteed_memory_usage_for_user` act as soft limits. A query will be allowed to use more memory than a soft limit if there is available memory. But if there will be memory shortage (with relative to per user hard limit or total per server memory consumption), we calculate the "overcommit ratio" - how more memory every query is consumed in relative to the soft limit, and we will kill the most overcommitted query to let other queries to run. From d0050f6ff48247f4b8bb8439287565581931070b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 20:47:07 +0300 Subject: [PATCH 22/50] Update website/blog/en/2022/clickhouse-v22.2-released.md Co-authored-by: Christoph Wurm --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 44c6b18d9a6..73d6637e41a 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -70,7 +70,7 @@ Another improvement is the option to specify TTL MOVE TO DISK/VOLUME **IF EXISTS We split per-query and per-user memory limits into a pair of hard and soft limits. The settings `max_memory_usage` and `max_memory_usage_for_user` act as hard limits. When memory consumption is approaching the hard limit, an exception will be thrown. Two other settings: `max_guaranteed_memory_usage` and `max_guaranteed_memory_usage_for_user` act as soft limits. -A query will be allowed to use more memory than a soft limit if there is available memory. But if there will be memory shortage (with relative to per user hard limit or total per server memory consumption), we calculate the "overcommit ratio" - how more memory every query is consumed in relative to the soft limit, and we will kill the most overcommitted query to let other queries to run. +A query will be allowed to use more memory than a soft limit if there is available memory. But if there will be memory shortage (relative to the per-user hard limit or total per-server memory consumption), we calculate the "overcommit ratio" - how much more memory every query is consuming relative to the soft limit - and we will kill the most overcommitted query to let other queries run. In short, your query will not be limited to a few gigabytes of RAM if you have hundreds gigabytes available. From b3a4e4ead205426bd44977c4e19069c35458bad5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 20:47:15 +0300 Subject: [PATCH 23/50] Update website/blog/en/2022/clickhouse-v22.2-released.md Co-authored-by: Christoph Wurm --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 73d6637e41a..528654135f5 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -72,7 +72,7 @@ We split per-query and per-user memory limits into a pair of hard and soft limit A query will be allowed to use more memory than a soft limit if there is available memory. But if there will be memory shortage (relative to the per-user hard limit or total per-server memory consumption), we calculate the "overcommit ratio" - how much more memory every query is consuming relative to the soft limit - and we will kill the most overcommitted query to let other queries run. -In short, your query will not be limited to a few gigabytes of RAM if you have hundreds gigabytes available. +In short, your query will not be limited to a few gigabytes of RAM if you have hundreds of gigabytes available. This experimental feature is implemented by **Dmitry Novik** and is continuing to be developed. From 8fc66a64414f6598ae81ceac650cd487eb906065 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 20:47:28 +0300 Subject: [PATCH 24/50] Update website/blog/en/2022/clickhouse-v22.2-released.md Co-authored-by: Christoph Wurm --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index 528654135f5..d2d36b86b3e 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -74,7 +74,7 @@ A query will be allowed to use more memory than a soft limit if there is availab In short, your query will not be limited to a few gigabytes of RAM if you have hundreds of gigabytes available. -This experimental feature is implemented by **Dmitry Novik** and is continuing to be developed. +This experimental feature was implemented by **Dmitry Novik** and is continuing to be developed. ## Shell-style comments in SQL From 170f8fbc00435dd404a10f6229e4f905b638206e Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 24 Feb 2022 20:47:35 +0300 Subject: [PATCH 25/50] Update website/blog/en/2022/clickhouse-v22.2-released.md Co-authored-by: Christoph Wurm --- website/blog/en/2022/clickhouse-v22.2-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2022/clickhouse-v22.2-released.md b/website/blog/en/2022/clickhouse-v22.2-released.md index d2d36b86b3e..d55b0e6bcf0 100644 --- a/website/blog/en/2022/clickhouse-v22.2-released.md +++ b/website/blog/en/2022/clickhouse-v22.2-released.md @@ -78,7 +78,7 @@ This experimental feature was implemented by **Dmitry Novik** and is continuing ## Shell-style comments in SQL -Now we allow comments starting with `# ` or `#!`, similarly to MySQL. The variant with `#!` allows using shell scripts with "shebang" interpreted by `clickhouse-local`. +Now we allow comments starting with `# ` or `#!`, similar to MySQL. The variant with `#!` allows using shell scripts with "shebang" interpreted by `clickhouse-local`. This feature was contributed by **Aaron Katz**. Very nice. From 2e9434c2a2712861b9b91bf9047a12df8ceb3e87 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 24 Feb 2022 20:44:12 -0400 Subject: [PATCH 26/50] Update array.md --- docs/en/sql-reference/data-types/array.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 4e7e7390e41..a8dad7ba989 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -5,7 +5,7 @@ toc_title: Array(T) # Array(t) {#data-type-array} -An array of `T`-type items. `T` can be any data type, including an array. +An array of `T`-type items, with the starting array index as 1. `T` can be any data type, including an array. ## Creating an Array {#creating-an-array} From f3f6602180f9a2bc7fbcacdf1b3738fb16aae01a Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 24 Feb 2022 20:47:24 -0400 Subject: [PATCH 27/50] Update array.md --- docs/ru/sql-reference/data-types/array.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/array.md b/docs/ru/sql-reference/data-types/array.md index abc16751a79..e37bdf1af77 100644 --- a/docs/ru/sql-reference/data-types/array.md +++ b/docs/ru/sql-reference/data-types/array.md @@ -5,7 +5,7 @@ toc_title: Array(T) # Array(T) {#data-type-array} -Массив из элементов типа `T`. `T` может любым, в том числе массивом. Таким образом поддерживаются многомерные массивы. +Массив из элементов типа `T`. `T` может любым, в том числе массивом. Таким образом поддерживаются многомерные массивы. Первый элемент массива имеет индекс 1. ## Создание массива {#creating-an-array} From a50d44bd693bd385a63f0ed403eeb82107a58d46 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 25 Feb 2022 10:26:24 +0000 Subject: [PATCH 28/50] Update version_date.tsv after v22.2.3.5-stable --- utils/list-versions/version_date.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 664dbf837fd..3f9a9cdc24c 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v22.2.3.5-stable 2022-02-25 v22.2.2.1-stable 2022-02-17 v22.1.3.7-stable 2022-01-23 v22.1.2.2-stable 2022-01-19 From ef3cbdcf6a97ef04aa83f6a6398437fe46535108 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 25 Feb 2022 10:53:21 +0000 Subject: [PATCH 29/50] Add check_prerequisites to release.py --- tests/ci/release.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/ci/release.py b/tests/ci/release.py index 4ffb04b0aae..6e0d984bce8 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -78,7 +78,15 @@ class Release: self._git.update() self.version = get_version_from_repo() + def check_prerequisites(self): + """ + Check tooling installed in the system + """ + self.run("gh auth status") + self.run("git status") + def do(self, check_dirty: bool, check_branch: bool, with_prestable: bool): + self.check_prerequisites() if check_dirty: logging.info("Checking if repo is clean") From 93de1b1c99071b36f63790139058d087131ebbc5 Mon Sep 17 00:00:00 2001 From: Alexandre Snarskii Date: Fri, 25 Feb 2022 14:05:00 +0300 Subject: [PATCH 30/50] Implement MemoryStatisticsOS for FreeBSD --- src/Common/ErrorCodes.cpp | 2 + src/Common/MemoryStatisticsOS.cpp | 56 +++++++++++++++++++++++- src/Common/MemoryStatisticsOS.h | 13 +++++- src/Interpreters/AsynchronousMetrics.cpp | 6 ++- src/Interpreters/AsynchronousMetrics.h | 4 +- 5 files changed, 77 insertions(+), 4 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index e991daf3209..bb79f6b02a1 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -612,6 +612,8 @@ M(641, CANNOT_APPEND_TO_FILE) \ M(642, CANNOT_PACK_ARCHIVE) \ M(643, CANNOT_UNPACK_ARCHIVE) \ + M(644, CANNOT_SYSCTL) \ + M(645, KERNEL_STRUCTURE_SIZE_MISMATCH) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/MemoryStatisticsOS.cpp b/src/Common/MemoryStatisticsOS.cpp index 220d7a2013b..274d8d36c8b 100644 --- a/src/Common/MemoryStatisticsOS.cpp +++ b/src/Common/MemoryStatisticsOS.cpp @@ -1,7 +1,11 @@ -#if defined(OS_LINUX) +#if defined(OS_LINUX) || defined(OS_FREEBSD) #include #include +#if defined(OS_FREEBSD) +#include +#include +#endif #include #include #include @@ -18,6 +22,8 @@ namespace DB { +#if defined(OS_LINUX) + namespace ErrorCodes { extern const int FILE_DOESNT_EXIST; @@ -103,6 +109,54 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const return data; } +#endif + +#if defined(OS_FREEBSD) + +namespace ErrorCodes +{ + extern const int CANNOT_SYSCTL; + extern const int KERNEL_STRUCTURE_SIZE_MISMATCH; +} + +MemoryStatisticsOS::MemoryStatisticsOS() +{ + pagesize = ::getpagesize(); + self = ::getpid(); +} + +MemoryStatisticsOS::~MemoryStatisticsOS() +{ +} + +MemoryStatisticsOS::Data MemoryStatisticsOS::get() const +{ + Data data; + int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, self }; + struct kinfo_proc kp; + size_t len = sizeof(struct kinfo_proc); + + if (-1 == ::sysctl(mib, 4, &kp, &len, NULL, 0)) + throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::CANNOT_SYSCTL); + + if (sizeof(struct kinfo_proc) != len) + throw DB::Exception(DB::ErrorCodes::KERNEL_STRUCTURE_SIZE_MISMATCH, "Kernel returns structure of {} bytes instead of expected {}", + len, sizeof(struct kinfo_proc)); + + if (sizeof(struct kinfo_proc) != kp.ki_structsize) + throw DB::Exception(DB::ErrorCodes::KERNEL_STRUCTURE_SIZE_MISMATCH, "Kernel stucture size ({}) does not match expected ({}).", + kp.ki_structsize, sizeof(struct kinfo_proc)); + + data.virt = kp.ki_size; + data.resident = kp.ki_rssize * pagesize; + data.code = kp.ki_tsize * pagesize; + data.data_and_stack = (kp.ki_dsize + kp.ki_ssize) * pagesize; + + return data; +} + +#endif + } #endif diff --git a/src/Common/MemoryStatisticsOS.h b/src/Common/MemoryStatisticsOS.h index 0893e333007..43f3fdf2f24 100644 --- a/src/Common/MemoryStatisticsOS.h +++ b/src/Common/MemoryStatisticsOS.h @@ -1,6 +1,9 @@ #pragma once -#if defined(OS_LINUX) +#if defined(OS_LINUX) || defined(OS_FREEBSD) #include +#if defined(OS_FREEBSD) +#include +#endif namespace DB @@ -23,7 +26,9 @@ public: { uint64_t virt; uint64_t resident; +#if defined(OS_LINUX) uint64_t shared; +#endif uint64_t code; uint64_t data_and_stack; }; @@ -35,7 +40,13 @@ public: Data get() const; private: +#if defined(OS_LINUX) int fd; +#endif +#if defined(OS_FREEBSD) + int pagesize; + pid_t self; +#endif }; } diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index b8262370a0d..c87ce12c2fa 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -620,13 +620,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti new_values["Uptime"] = getContext()->getUptimeSeconds(); /// Process process memory usage according to OS -#if defined(OS_LINUX) +#if defined(OS_LINUX) || defined(OS_FREEBSD) { MemoryStatisticsOS::Data data = memory_stat.get(); new_values["MemoryVirtual"] = data.virt; new_values["MemoryResident"] = data.resident; +#if !defined(OS_FREEBSD) new_values["MemoryShared"] = data.shared; +#endif new_values["MemoryCode"] = data.code; new_values["MemoryDataAndStack"] = data.data_and_stack; @@ -653,7 +655,9 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_amount); } } +#endif +#if defined(OS_LINUX) if (loadavg) { try diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 3c7581ce1a3..e4bcb2890f3 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -76,9 +76,11 @@ private: bool first_run = true; std::chrono::system_clock::time_point previous_update_time; -#if defined(OS_LINUX) +#if defined(OS_LINUX) || defined(OS_FREEBSD) MemoryStatisticsOS memory_stat; +#endif +#if defined(OS_LINUX) std::optional meminfo; std::optional loadavg; std::optional proc_stat; From 5c38203f7d62881fb261d79b58ff75f492f41f6c Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 25 Feb 2022 10:57:39 +0000 Subject: [PATCH 31/50] Argument --commit for release.py is required --- tests/ci/release.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/ci/release.py b/tests/ci/release.py index 6e0d984bce8..fb24ec89154 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -393,6 +393,12 @@ def parse_args() -> argparse.Namespace: "`gh` (github-cli) commands", ) + parser.add_argument( + "--commit", + required=True, + type=commit, + help="commit create a release", + ) parser.add_argument( "--repo", default="ClickHouse/ClickHouse", @@ -413,12 +419,6 @@ def parse_args() -> argparse.Namespace: dest="release_type", help="a release type, new branch is created only for 'major' and 'minor'", ) - parser.add_argument( - "--commit", - default=git.sha, - type=commit, - help="commit create a release, default to HEAD", - ) parser.add_argument("--with-prestable", default=True, help=argparse.SUPPRESS) parser.add_argument( "--no-prestable", From 9032cc6c3d9cdd8f3aafe4f5e8b838c3d19e4111 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 25 Feb 2022 11:17:45 +0000 Subject: [PATCH 32/50] Fix stylecheck for tests/ci/release.py --- tests/ci/release.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ci/release.py b/tests/ci/release.py index fb24ec89154..89182dc7428 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -11,7 +11,6 @@ from version_helper import ( FILE_WITH_VERSION_PATH, ClickHouseVersion, VersionType, - git, get_abs_path, get_version_from_repo, update_cmake_version, From 45b157f9098a0c193fd01f1540c8188a4f5f2787 Mon Sep 17 00:00:00 2001 From: Vladimir C Date: Fri, 25 Feb 2022 12:37:12 +0100 Subject: [PATCH 33/50] Use SYSTEM_ERROR in MemoryStatisticsOS for FreeBSD --- src/Common/ErrorCodes.cpp | 2 -- src/Common/MemoryStatisticsOS.cpp | 9 ++++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index bb79f6b02a1..e991daf3209 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -612,8 +612,6 @@ M(641, CANNOT_APPEND_TO_FILE) \ M(642, CANNOT_PACK_ARCHIVE) \ M(643, CANNOT_UNPACK_ARCHIVE) \ - M(644, CANNOT_SYSCTL) \ - M(645, KERNEL_STRUCTURE_SIZE_MISMATCH) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/MemoryStatisticsOS.cpp b/src/Common/MemoryStatisticsOS.cpp index 274d8d36c8b..57af9855b8f 100644 --- a/src/Common/MemoryStatisticsOS.cpp +++ b/src/Common/MemoryStatisticsOS.cpp @@ -115,8 +115,7 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const namespace ErrorCodes { - extern const int CANNOT_SYSCTL; - extern const int KERNEL_STRUCTURE_SIZE_MISMATCH; + extern const int SYSTEM_ERROR; } MemoryStatisticsOS::MemoryStatisticsOS() @@ -137,14 +136,14 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const size_t len = sizeof(struct kinfo_proc); if (-1 == ::sysctl(mib, 4, &kp, &len, NULL, 0)) - throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::CANNOT_SYSCTL); + throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::SYSTEM_ERROR); if (sizeof(struct kinfo_proc) != len) - throw DB::Exception(DB::ErrorCodes::KERNEL_STRUCTURE_SIZE_MISMATCH, "Kernel returns structure of {} bytes instead of expected {}", + throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Kernel returns structure of {} bytes instead of expected {}", len, sizeof(struct kinfo_proc)); if (sizeof(struct kinfo_proc) != kp.ki_structsize) - throw DB::Exception(DB::ErrorCodes::KERNEL_STRUCTURE_SIZE_MISMATCH, "Kernel stucture size ({}) does not match expected ({}).", + throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Kernel stucture size ({}) does not match expected ({}).", kp.ki_structsize, sizeof(struct kinfo_proc)); data.virt = kp.ki_size; From c39efd80061f5491aeefcd45726f5605b64560fa Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 25 Feb 2022 16:13:32 +0300 Subject: [PATCH 34/50] Update README.md --- docker/packager/README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docker/packager/README.md b/docker/packager/README.md index 223d70916d4..a78feb8d7fc 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -3,25 +3,25 @@ compilers and build settings. Correctly configured Docker daemon is single depen Usage: -Build deb package with `clang-11` in `debug` mode: +Build deb package with `clang-14` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-11 --build-type=debug +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug $ ls -l deb/test_output --rw-r--r-- 1 root root 3730 clickhouse-client_18.14.2+debug_all.deb --rw-r--r-- 1 root root 84221888 clickhouse-common-static_18.14.2+debug_amd64.deb --rw-r--r-- 1 root root 255967314 clickhouse-common-static-dbg_18.14.2+debug_amd64.deb --rw-r--r-- 1 root root 14940 clickhouse-server_18.14.2+debug_all.deb --rw-r--r-- 1 root root 340206010 clickhouse-server-base_18.14.2+debug_amd64.deb --rw-r--r-- 1 root root 7900 clickhouse-server-common_18.14.2+debug_all.deb +-rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb +-rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb +-rw-r--r-- 1 root root 255967314 clickhouse-common-static-dbg_22.2.2+debug_amd64.deb +-rw-r--r-- 1 root root 14940 clickhouse-server_22.2.2+debug_all.deb +-rw-r--r-- 1 root root 340206010 clickhouse-server-base_22.2.2+debug_amd64.deb +-rw-r--r-- 1 root root 7900 clickhouse-server-common_22.2.2+debug_all.deb ``` -Build ClickHouse binary with `clang-11` and `address` sanitizer in `relwithdebuginfo` +Build ClickHouse binary with `clang-14` and `address` sanitizer in `relwithdebuginfo` mode: ``` $ mkdir $HOME/some_clickhouse -$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-11 --sanitizer=address +$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-14 --sanitizer=address $ ls -l $HOME/some_clickhouse -rwxr-xr-x 1 root root 787061952 clickhouse lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse From 4a14b25ca62afef81b5d87bdcd70ee69bb575416 Mon Sep 17 00:00:00 2001 From: Alexandre Snarskii Date: Fri, 25 Feb 2022 17:02:49 +0300 Subject: [PATCH 35/50] use getPageSize, correct typo --- src/Common/MemoryStatisticsOS.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/MemoryStatisticsOS.cpp b/src/Common/MemoryStatisticsOS.cpp index 57af9855b8f..c61d8d7f531 100644 --- a/src/Common/MemoryStatisticsOS.cpp +++ b/src/Common/MemoryStatisticsOS.cpp @@ -120,7 +120,7 @@ namespace ErrorCodes MemoryStatisticsOS::MemoryStatisticsOS() { - pagesize = ::getpagesize(); + pagesize = static_cast(::getPageSize()); self = ::getpid(); } @@ -143,7 +143,7 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const len, sizeof(struct kinfo_proc)); if (sizeof(struct kinfo_proc) != kp.ki_structsize) - throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Kernel stucture size ({}) does not match expected ({}).", + throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Kernel structure size ({}) does not match expected ({}).", kp.ki_structsize, sizeof(struct kinfo_proc)); data.virt = kp.ki_size; From 40075d4c6b5e7259a6a783f29e0649bb1aca9e4b Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Feb 2022 14:50:38 +0100 Subject: [PATCH 36/50] Fix --- src/Databases/SQLite/SQLiteUtils.cpp | 37 ++++++++++--------- ...2227_test_create_empty_sqlite_db.reference | 1 + .../02227_test_create_empty_sqlite_db.sh | 32 ++++++++++++++++ 3 files changed, 52 insertions(+), 18 deletions(-) create mode 100644 tests/queries/0_stateless/02227_test_create_empty_sqlite_db.reference create mode 100755 tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh diff --git a/src/Databases/SQLite/SQLiteUtils.cpp b/src/Databases/SQLite/SQLiteUtils.cpp index 5b38caeabee..0bb16ea3c8f 100644 --- a/src/Databases/SQLite/SQLiteUtils.cpp +++ b/src/Databases/SQLite/SQLiteUtils.cpp @@ -23,35 +23,36 @@ void processSQLiteError(const String & message, bool throw_on_error) LOG_ERROR(&Poco::Logger::get("SQLiteEngine"), fmt::runtime(message)); } - String validateSQLiteDatabasePath(const String & path, const String & user_files_path, bool throw_on_error) { - String canonical_user_files_path = fs::canonical(user_files_path); - - String canonical_path; - std::error_code err; - if (fs::path(path).is_relative()) - canonical_path = fs::canonical(fs::path(user_files_path) / path, err); - else - canonical_path = fs::canonical(path, err); + return fs::absolute(fs::path(user_files_path) / path).lexically_normal(); - if (err) - processSQLiteError(fmt::format("SQLite database path '{}' is invalid. Error: {}", path, err.message()), throw_on_error); + String absolute_path = fs::absolute(path).lexically_normal(); + String absolute_user_files_path = fs::absolute(user_files_path).lexically_normal(); - if (!canonical_path.starts_with(canonical_user_files_path)) + if (!absolute_path.starts_with(absolute_user_files_path)) + { processSQLiteError(fmt::format("SQLite database file path '{}' must be inside 'user_files' directory", path), throw_on_error); - - return canonical_path; + return ""; + } + return absolute_path; } - -SQLitePtr openSQLiteDB(const String & database_path, ContextPtr context, bool throw_on_error) +SQLitePtr openSQLiteDB(const String & path, ContextPtr context, bool throw_on_error) { - auto validated_path = validateSQLiteDatabasePath(database_path, context->getUserFilesPath(), throw_on_error); + auto user_files_path = context->getUserFilesPath(); + auto database_path = validateSQLiteDatabasePath(path, user_files_path, throw_on_error); + + /// For attach database there is no throw mode. + if (database_path.empty()) + return nullptr; + + if (!fs::exists(database_path)) + LOG_WARNING(&Poco::Logger::get("SQLite"), "SQLite database path {} does not exist, will create an empty SQLite database", database_path); sqlite3 * tmp_sqlite_db = nullptr; - int status = sqlite3_open(validated_path.c_str(), &tmp_sqlite_db); + int status = sqlite3_open(database_path.c_str(), &tmp_sqlite_db); if (status != SQLITE_OK) { diff --git a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.reference b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.reference new file mode 100644 index 00000000000..472973e965a --- /dev/null +++ b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.reference @@ -0,0 +1 @@ +table1 diff --git a/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh new file mode 100755 index 00000000000..253d3f3149d --- /dev/null +++ b/tests/queries/0_stateless/02227_test_create_empty_sqlite_db.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# See 01658_read_file_to_string_column.sh +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +function cleanup() +{ + ${CLICKHOUSE_CLIENT} --query="DROP DATABASE IF EXISTS ${CURR_DATABASE}" + rm -r "${DB_PATH}" +} +trap cleanup EXIT + +export CURR_DATABASE="test_01889_sqllite_${CLICKHOUSE_DATABASE}" + +DB_PATH=${user_files_path}/${CURR_DATABASE}_db1 + +${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +DROP DATABASE IF EXISTS ${CURR_DATABASE}; +CREATE DATABASE ${CURR_DATABASE} ENGINE = SQLite('${DB_PATH}'); +SHOW TABLES FROM ${CURR_DATABASE}; +""" + +sqlite3 "${DB_PATH}" 'CREATE TABLE table1 (col1 text, col2 smallint);' + +${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +SHOW TABLES FROM ${CURR_DATABASE}; +""" From 92d2cff045bae0268d6959ff9c520ce7a26afd53 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Feb 2022 16:04:11 +0100 Subject: [PATCH 37/50] Fix --- programs/odbc-bridge/ODBCBlockInputStream.cpp | 2 -- src/Core/PostgreSQL/insertPostgreSQLValue.cpp | 2 -- tests/integration/test_storage_postgresql/test.py | 10 ++++++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 3cf10171a94..45ab4e51d8f 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -149,8 +149,6 @@ void ODBCSource::insertValue( DateTime64 time = 0; const auto * datetime_type = assert_cast(data_type.get()); readDateTime64Text(time, datetime_type->getScale(), in, datetime_type->getTimeZone()); - if (time < 0) - time = 0; assert_cast(column).insertValue(time); break; } diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index f4d47049554..b51bbafef54 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -108,8 +108,6 @@ void insertPostgreSQLValue( ReadBufferFromString in(value); DateTime64 time = 0; readDateTime64Text(time, 6, in, assert_cast(data_type.get())->getTimeZone()); - if (time < 0) - time = 0; assert_cast(column).insertValue(time); break; } diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 87337a6b459..55be61e052b 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -447,6 +447,16 @@ def test_where_false(started_cluster): cursor.execute("DROP TABLE test") +def test_datetime64(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("drop table if exists test") + cursor.execute("create table test (ts timestamp)") + cursor.execute("insert into test select '1960-01-01 20:00:00';") + + result = node1.query("select * from postgresql(postgres1, table='test')") + assert(result.strip() == '1960-01-01 20:00:00.000000') + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From c231fdf930ea2f77b0061f68949c638303b72ec9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 25 Feb 2022 17:27:56 +0000 Subject: [PATCH 38/50] Update version_date.tsv after v22.1.4.30-stable --- utils/list-versions/version_date.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 3f9a9cdc24c..a81cc96ab6c 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,5 +1,6 @@ v22.2.3.5-stable 2022-02-25 v22.2.2.1-stable 2022-02-17 +v22.1.4.30-stable 2022-02-25 v22.1.3.7-stable 2022-01-23 v22.1.2.2-stable 2022-01-19 v21.12.4.1-stable 2022-01-23 From 2a062d414b9b93145f5cee1d5d37f3a1f83fe0dc Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 25 Feb 2022 21:48:18 +0300 Subject: [PATCH 39/50] Update style.md --- docs/en/development/style.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 49b2f68b9f3..72a0359671d 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -322,7 +322,7 @@ std::string getName() const override { return "Memory"; } class StorageMemory : public IStorage ``` -**4.** `using` are named the same way as classes, or with `_t` on the end. +**4.** `using` are named the same way as classes. **5.** Names of template type arguments: in simple cases, use `T`; `T`, `U`; `T1`, `T2`. From 22a01c077a85adb64bca5b71ea8ab7fcc0435ba0 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 25 Feb 2022 21:49:24 +0300 Subject: [PATCH 40/50] Update style.md --- docs/en/development/style.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 72a0359671d..ac4326b908c 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -490,7 +490,7 @@ if (0 != close(fd)) throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE); ``` -`Do not use assert`. +You can use assert to check invariants in code. **4.** Exception types. From c624a530cc089c1bcd1972cd4b3938940c2db9b0 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 25 Feb 2022 21:51:39 +0300 Subject: [PATCH 41/50] Update style.md --- docs/en/development/style.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/development/style.md b/docs/en/development/style.md index ac4326b908c..1c863d6b914 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -571,7 +571,7 @@ Don’t use these types for numbers: `signed/unsigned long`, `long long`, `short **13.** Passing arguments. -Pass complex values by reference (including `std::string`). +Pass complex values by value if they are going to be moved and use std::move; pass by reference if you want to update value in a loop. If a function captures ownership of an object created in the heap, make the argument type `shared_ptr` or `unique_ptr`. @@ -581,7 +581,7 @@ In most cases, just use `return`. Do not write `return std::move(res)`. If the function allocates an object on heap and returns it, use `shared_ptr` or `unique_ptr`. -In rare cases you might need to return the value via an argument. In this case, the argument should be a reference. +In rare cases (updating a value in a loop) you might need to return the value via an argument. In this case, the argument should be a reference. ``` cpp using AggregateFunctionPtr = std::shared_ptr; From ba6032a353ff34ed56c9252db292c5cbe28d8b9a Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sat, 26 Feb 2022 01:31:04 +0100 Subject: [PATCH 42/50] Update src/Databases/SQLite/SQLiteUtils.cpp Co-authored-by: Vladimir C --- src/Databases/SQLite/SQLiteUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/SQLite/SQLiteUtils.cpp b/src/Databases/SQLite/SQLiteUtils.cpp index 0bb16ea3c8f..88e6d0ab6fd 100644 --- a/src/Databases/SQLite/SQLiteUtils.cpp +++ b/src/Databases/SQLite/SQLiteUtils.cpp @@ -49,7 +49,7 @@ SQLitePtr openSQLiteDB(const String & path, ContextPtr context, bool throw_on_er return nullptr; if (!fs::exists(database_path)) - LOG_WARNING(&Poco::Logger::get("SQLite"), "SQLite database path {} does not exist, will create an empty SQLite database", database_path); + LOG_DEBUG(&Poco::Logger::get("SQLite"), "SQLite database path {} does not exist, will create an empty SQLite database", database_path); sqlite3 * tmp_sqlite_db = nullptr; int status = sqlite3_open(database_path.c_str(), &tmp_sqlite_db); From 99bd56e2deeeda291b299481796300cd4c260516 Mon Sep 17 00:00:00 2001 From: Hongbin Date: Mon, 28 Feb 2022 08:15:37 +0800 Subject: [PATCH 43/50] Fix some code comments style --- .../AggregateFunctionArgMinMax.h | 2 +- src/Storages/Cache/ExternalDataSourceCache.cpp | 10 +++++----- src/Storages/Cache/ExternalDataSourceCache.h | 2 +- src/Storages/Cache/IRemoteFileMetadata.h | 2 +- src/Storages/Cache/RemoteCacheController.cpp | 10 +++++----- src/Storages/Cache/RemoteCacheController.h | 10 +++++----- src/Storages/Distributed/DirectoryMonitor.cpp | 14 +++++++------- src/Storages/Distributed/DirectoryMonitor.h | 2 +- src/Storages/FileLog/FileLogDirectoryWatcher.h | 2 +- src/Storages/FileLog/FileLogSource.h | 2 +- src/Storages/FileLog/StorageFileLog.h | 6 +++--- src/Storages/HDFS/StorageHDFS.h | 2 +- src/Storages/System/attachSystemTablesImpl.h | 4 ++-- src/TableFunctions/TableFunctionRemote.h | 4 ++-- src/TableFunctions/TableFunctionS3.h | 2 +- src/TableFunctions/TableFunctionS3Cluster.h | 2 +- src/TableFunctions/TableFunctionURL.h | 2 +- src/TableFunctions/TableFunctionValues.h | 2 +- 18 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h index 4441ad0cc26..516d33f42de 100644 --- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h +++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h @@ -16,7 +16,7 @@ namespace ErrorCodes } -/// For possible values for template parameters, see AggregateFunctionMinMaxAny.h +/// For possible values for template parameters, see 'AggregateFunctionMinMaxAny.h'. template struct AggregateFunctionArgMinMaxData { diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp index 711dfeebcae..0f78c8d3511 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.cpp +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -144,7 +144,7 @@ void ExternalDataSourceCache::initOnce(ContextPtr context, const String & root_d local_cache_bytes_read_before_flush = bytes_read_before_flush_; lru_caches = std::make_unique(limit_size_); - /// create if root_dir not exists + /// Create if root_dir not exists. if (!fs::exists(fs::path(root_dir))) { fs::create_directories(fs::path(root_dir)); @@ -156,7 +156,7 @@ void ExternalDataSourceCache::initOnce(ContextPtr context, const String & root_d String ExternalDataSourceCache::calculateLocalPath(IRemoteFileMetadataPtr metadata) const { - // add version into the full_path, and not block to read the new version + // Add version into the full_path, and not block to read the new version. String full_path = metadata->getName() + ":" + metadata->remote_path + ":" + metadata->getVersion(); UInt128 hashcode = sipHash128(full_path.c_str(), full_path.size()); String hashcode_str = getHexUIntLowercase(hashcode); @@ -166,7 +166,7 @@ String ExternalDataSourceCache::calculateLocalPath(IRemoteFileMetadataPtr metada std::pair, std::unique_ptr> ExternalDataSourceCache::createReader( ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer) { - // If something is wrong on startup, rollback to read from the original ReadBuffer + // If something is wrong on startup, rollback to read from the original ReadBuffer. if (!isInitialized()) { LOG_ERROR(log, "ExternalDataSourceCache has not been initialized"); @@ -180,7 +180,7 @@ std::pair, std::unique_ptr> Externa auto cache = lru_caches->get(local_path); if (cache) { - // the remote file has been updated, need to redownload + // The remote file has been updated, need to redownload. if (!cache->value().isValid() || cache->value().isModified(remote_file_metadata)) { LOG_TRACE( @@ -201,7 +201,7 @@ std::pair, std::unique_ptr> Externa if (!fs::exists(local_path)) fs::create_directories(local_path); - // cache is not found or is invalid, try to remove it at first + // Cache is not found or is invalid, try to remove it at first. lru_caches->tryRemove(local_path); auto new_cache_controller diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index 3e2bbb05104..c555198e4c4 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -78,7 +78,7 @@ protected: ExternalDataSourceCache(); private: - // root directory of local cache for remote filesystem + // Root directory of local cache for remote filesystem. String root_dir; size_t local_cache_bytes_read_before_flush = 0; diff --git a/src/Storages/Cache/IRemoteFileMetadata.h b/src/Storages/Cache/IRemoteFileMetadata.h index 0b07103b786..1d152fcdd15 100644 --- a/src/Storages/Cache/IRemoteFileMetadata.h +++ b/src/Storages/Cache/IRemoteFileMetadata.h @@ -18,7 +18,7 @@ public: // serialize virtual String toString() const = 0; - // used for comparing two file metadatas are the same or not. + // Used for comparing two file metadatas are the same or not. virtual String getVersion() const = 0; String remote_path; diff --git a/src/Storages/Cache/RemoteCacheController.cpp b/src/Storages/Cache/RemoteCacheController.cpp index b0bb31c09e7..b5fc38fffcd 100644 --- a/src/Storages/Cache/RemoteCacheController.cpp +++ b/src/Storages/Cache/RemoteCacheController.cpp @@ -31,9 +31,9 @@ std::shared_ptr RemoteCacheController::recover(const std: auto cache_controller = std::make_shared(nullptr, local_path_, 0); if (cache_controller->file_status != DOWNLOADED) { - // do not load this invalid cached file and clear it. the clear action is in + // Do not load this invalid cached file and clear it. the clear action is in // ExternalDataSourceCache::recoverTask(), because deleting directories during iteration will - // cause unexpected behaviors + // cause unexpected behaviors. LOG_INFO(log, "Recover cached file failed. local path:{}", local_path_.string()); return nullptr; } @@ -76,7 +76,7 @@ RemoteCacheController::RemoteCacheController( , local_cache_bytes_read_before_flush(cache_bytes_before_flush_) , current_offset(0) { - // on recover, file_metadata_ptr is null, but it will be allocated after loading from metadata.txt + // On recover, file_metadata_ptr is null, but it will be allocated after loading from metadata.txt // when we allocate a whole new file cache,file_metadata_ptr must not be null. if (file_metadata_ptr) { @@ -106,14 +106,14 @@ void RemoteCacheController::waitMoreData(size_t start_offset_, size_t end_offset std::unique_lock lock{mutex}; if (file_status == DOWNLOADED) { - // finish reading + // Finish reading. if (start_offset_ >= current_offset) { lock.unlock(); return; } } - else // block until more data is ready + else // Block until more data is ready. { if (current_offset >= end_offset_) { diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h index 6047dbd5eb4..ca2cb837e34 100644 --- a/src/Storages/Cache/RemoteCacheController.h +++ b/src/Storages/Cache/RemoteCacheController.h @@ -29,7 +29,7 @@ public: IRemoteFileMetadataPtr file_metadata_, const std::filesystem::path & local_path_, size_t cache_bytes_before_flush_); ~RemoteCacheController(); - // recover from local disk + // Recover from local disk. static std::shared_ptr recover(const std::filesystem::path & local_path); /** @@ -40,9 +40,9 @@ public: void close(); /** - * called in LocalCachedFileReader read(), the reading process would be blocked until + * Called in LocalCachedFileReader read(), the reading process would be blocked until * enough data be downloaded. - * If the file has finished download, the process would unblocked + * If the file has finished download, the process would unblocked. */ void waitMoreData(size_t start_offset_, size_t end_offset_); @@ -69,7 +69,7 @@ public: void startBackgroundDownload(std::unique_ptr in_readbuffer_, BackgroundSchedulePool & thread_pool); private: - // flush file and status information + // Flush file and status information. void flush(bool need_flush_status = false); BackgroundSchedulePool::TaskHolder download_task_holder; @@ -79,7 +79,7 @@ private: std::condition_variable more_data_signal; String metadata_class; - LocalFileStatus file_status = TO_DOWNLOAD; // for tracking download process + LocalFileStatus file_status = TO_DOWNLOAD; // For tracking download process. IRemoteFileMetadataPtr file_metadata_ptr; std::filesystem::path local_path; diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index d7422b1ddbc..d833371a742 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -219,7 +219,7 @@ namespace return distributed_header; } - /// remote_error argument is used to decide whether some errors should be + /// 'remote_error' argument is used to decide whether some errors should be /// ignored or not, in particular: /// /// - ATTEMPT_TO_READ_AFTER_EOF should not be ignored @@ -399,7 +399,7 @@ void StorageDistributedDirectoryMonitor::flushAllData() { processFiles(files); - /// Update counters + /// Update counters. getFiles(); } } @@ -475,7 +475,7 @@ void StorageDistributedDirectoryMonitor::run() break; } - /// Update counters + /// Update counters. getFiles(); if (!quit && do_sleep) @@ -491,8 +491,8 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri const auto & shards_info = cluster->getShardsInfo(); const auto & shards_addresses = cluster->getShardsAddresses(); - /// check new format shard{shard_index}_replica{replica_index} - /// (shard_index and replica_index starts from 1) + /// Check new format shard{shard_index}_replica{replica_index} + /// (shard_index and replica_index starts from 1). if (address.shard_index != 0) { if (!address.replica_index) @@ -511,7 +511,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri return shard_info.per_replica_pools[address.replica_index - 1]; } - /// existing connections pool have a higher priority + /// Existing connections pool have a higher priority. for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index) { const Cluster::Addresses & replicas_addresses = shards_addresses[shard_index]; @@ -1152,7 +1152,7 @@ void StorageDistributedDirectoryMonitor::markAsSend(const std::string & file_pat bool StorageDistributedDirectoryMonitor::maybeMarkAsBroken(const std::string & file_path, const Exception & e) { - /// mark file as broken if necessary + /// Mark file as broken if necessary. if (isFileBrokenErrorCode(e.code(), e.isRemoteException())) { markAsBroken(file_path); diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h index 307b57a5668..3c0f3b29dde 100644 --- a/src/Storages/Distributed/DirectoryMonitor.h +++ b/src/Storages/Distributed/DirectoryMonitor.h @@ -52,7 +52,7 @@ public: static std::shared_ptr createSourceFromFile(const String & file_name); - /// For scheduling via DistributedBlockOutputStream + /// For scheduling via DistributedBlockOutputStream. bool addAndSchedule(size_t file_size, size_t ms); struct InternalStatus diff --git a/src/Storages/FileLog/FileLogDirectoryWatcher.h b/src/Storages/FileLog/FileLogDirectoryWatcher.h index 0b0c86397aa..bc855a1c2fa 100644 --- a/src/Storages/FileLog/FileLogDirectoryWatcher.h +++ b/src/Storages/FileLog/FileLogDirectoryWatcher.h @@ -45,7 +45,7 @@ public: private: friend class DirectoryWatcherBase; - /// Here must pass by value, otherwise will lead to stack-use-of-scope + /// Here must pass by value, otherwise will lead to stack-use-of-scope. void onItemAdded(DirectoryWatcherBase::DirectoryEvent ev); void onItemRemoved(DirectoryWatcherBase::DirectoryEvent ev); void onItemModified(DirectoryWatcherBase::DirectoryEvent ev); diff --git a/src/Storages/FileLog/FileLogSource.h b/src/Storages/FileLog/FileLogSource.h index c9fd1cc5a79..f1cc83b4a06 100644 --- a/src/Storages/FileLog/FileLogSource.h +++ b/src/Storages/FileLog/FileLogSource.h @@ -52,7 +52,7 @@ private: Block virtual_header; /// The start pos and end pos of files responsible by this stream, - /// does not include end + /// does not include end. size_t start; size_t end; }; diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index f7e67747965..1f5078ab68e 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -56,7 +56,7 @@ public: enum class FileStatus { - OPEN, /// first time open file after table start up + OPEN, /// First time open file after table start up. NO_CHANGE, UPDATED, REMOVED, @@ -83,7 +83,7 @@ public: { InodeToFileMeta meta_by_inode; FileNameToContext context_by_name; - /// file names without path + /// File names without path. Names file_names; }; @@ -199,7 +199,7 @@ private: /// Used in shutdown() void serialize() const; - /// Used in FileSource closeFileAndStoreMeta(file_name); + /// Used in FileSource closeFileAndStoreMeta(file_name). void serialize(UInt64 inode, const FileMeta & file_meta) const; void deserialize(); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 0e23e45f4c5..8c709c7d4d3 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -150,7 +150,7 @@ private: std::unique_ptr read_buf; std::unique_ptr pipeline; std::unique_ptr reader; - /// onCancel and generate can be called concurrently + /// OnCancel and generate can be called concurrently. std::mutex reader_mutex; String current_path; diff --git a/src/Storages/System/attachSystemTablesImpl.h b/src/Storages/System/attachSystemTablesImpl.h index 4f83a0a4fda..b6080d15f2c 100644 --- a/src/Storages/System/attachSystemTablesImpl.h +++ b/src/Storages/System/attachSystemTablesImpl.h @@ -12,13 +12,13 @@ void attach(ContextPtr context, IDatabase & system_database, const String & tabl assert(system_database.getDatabaseName() == DatabaseCatalog::SYSTEM_DATABASE); if (system_database.getUUID() == UUIDHelpers::Nil) { - /// Attach to Ordinary database + /// Attach to Ordinary database. auto table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name); system_database.attachTable(context, table_name, StorageT::create(table_id, std::forward(args)...)); } else { - /// Attach to Atomic database + /// Attach to Atomic database. /// NOTE: UUIDs are not persistent, but it's ok since no data are stored on disk for these storages /// and path is actually not used auto table_id = StorageID(DatabaseCatalog::SYSTEM_DATABASE, table_name, UUIDHelpers::generateV4()); diff --git a/src/TableFunctions/TableFunctionRemote.h b/src/TableFunctions/TableFunctionRemote.h index 976397ddc45..6f28f1ec9de 100644 --- a/src/TableFunctions/TableFunctionRemote.h +++ b/src/TableFunctions/TableFunctionRemote.h @@ -10,10 +10,10 @@ namespace DB /* remote ('address', db, table) - creates a temporary StorageDistributed. * To get the table structure, a DESC TABLE request is made to the remote server. - * For example + * For example: * SELECT count() FROM remote('example01-01-1', merge, hits) - go to `example01-01-1`, in the merge database, the hits table. * An expression that generates a set of shards and replicas can also be specified as the host name - see below. - * Also, there is a cluster version of the function: cluster('existing_cluster_name', 'db', 'table') + * Also, there is a cluster version of the function: cluster('existing_cluster_name', 'db', 'table'). */ class TableFunctionRemote : public ITableFunction { diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index 374e653072e..bd0226e348a 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -13,7 +13,7 @@ namespace DB class Context; -/* s3(source, [access_key_id, secret_access_key,] format, structure[, compression]) - creates a temporary storage for a file in S3 +/* s3(source, [access_key_id, secret_access_key,] format, structure[, compression]) - creates a temporary storage for a file in S3. */ class TableFunctionS3 : public ITableFunction { diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index cc857725ce6..a0e4db56fea 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -13,7 +13,7 @@ namespace DB class Context; /** - * s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) + * S3 cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) * A table function, which allows to process many files from S3 on a specific cluster * On initiator it creates a connection to _all_ nodes in cluster, discloses asterics * in S3 file path and dispatch each file dynamically. diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index 798a37dc478..35483b1a04a 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -10,7 +10,7 @@ namespace DB class Context; -/* url(source, format[, structure, compression]) - creates a temporary storage from url +/* url(source, format[, structure, compression]) - creates a temporary storage from url. */ class TableFunctionURL : public ITableFunctionFileLike { diff --git a/src/TableFunctions/TableFunctionValues.h b/src/TableFunctions/TableFunctionValues.h index f01bcf6e20e..61ce5158086 100644 --- a/src/TableFunctions/TableFunctionValues.h +++ b/src/TableFunctions/TableFunctionValues.h @@ -5,7 +5,7 @@ namespace DB { /* values(structure, values...) - creates a temporary storage filling columns with values - * values is case-insensitive table function + * values is case-insensitive table function. */ class TableFunctionValues : public ITableFunction { From 644f9168fab88117508cc7a43067557b9a9a0a15 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 28 Feb 2022 09:12:17 +0300 Subject: [PATCH 44/50] Ignore per-column TTL in CREATE TABLE AS if new table engine does not support it Follow-up for: #6968 Signed-off-by: Azat Khuzhin --- src/Interpreters/InterpreterCreateQuery.cpp | 5 +++ src/Storages/ColumnsDescription.cpp | 16 ++++++++++ src/Storages/ColumnsDescription.h | 1 + ...02230_create_table_as_ignore_ttl.reference | 32 +++++++++++++++++++ .../02230_create_table_as_ignore_ttl.sql | 18 +++++++++++ 5 files changed, 72 insertions(+) create mode 100644 tests/queries/0_stateless/02230_create_table_as_ignore_ttl.reference create mode 100644 tests/queries/0_stateless/02230_create_table_as_ignore_ttl.sql diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 71db15dc46f..16d6e2d0652 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -642,6 +642,11 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti properties.indices = as_storage_metadata->getSecondaryIndices(); properties.projections = as_storage_metadata->getProjections().clone(); } + else + { + /// Only MergeTree support TTL + properties.columns.resetColumnTTLs(); + } properties.constraints = as_storage_metadata->getConstraints(); } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 3733d1214b0..8b08f5f28dd 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -635,6 +635,22 @@ ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const return ret; } +void ColumnsDescription::resetColumnTTLs() +{ + std::vector old_columns; + old_columns.reserve(columns.size()); + for (const auto & col : columns) + old_columns.emplace_back(col); + + columns.clear(); + + for (auto & col : old_columns) + { + col.ttl.reset(); + add(col); + } +} + String ColumnsDescription::toString() const { diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 6cf863c6cb4..9fb03c70be9 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -104,6 +104,7 @@ public: using ColumnTTLs = std::unordered_map; ColumnTTLs getColumnTTLs() const; + void resetColumnTTLs(); bool has(const String & column_name) const; bool hasNested(const String & column_name) const; diff --git a/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.reference b/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.reference new file mode 100644 index 00000000000..5236875e209 --- /dev/null +++ b/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.reference @@ -0,0 +1,32 @@ +CREATE TABLE default.data_02230_ttl +( + `date` Date, + `key` Int32 +) +ENGINE = MergeTree +ORDER BY key +TTL date + 14 +SETTINGS index_granularity = 8192 +CREATE TABLE default.null_02230_ttl +( + `date` Date, + `key` Int32 +) +ENGINE = Null +CREATE TABLE default.data_02230_column_ttl +( + `date` Date, + `value` Int32 TTL date + 7, + `key` Int32 +) +ENGINE = MergeTree +ORDER BY key +TTL date + 14 +SETTINGS index_granularity = 8192 +CREATE TABLE default.null_02230_column_ttl +( + `date` Date, + `value` Int32, + `key` Int32 +) +ENGINE = Null diff --git a/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.sql b/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.sql new file mode 100644 index 00000000000..8838f67ec83 --- /dev/null +++ b/tests/queries/0_stateless/02230_create_table_as_ignore_ttl.sql @@ -0,0 +1,18 @@ +drop table if exists data_02230_ttl; +drop table if exists null_02230_ttl; +create table data_02230_ttl (date Date, key Int) Engine=MergeTree() order by key TTL date + 14; +show create data_02230_ttl format TSVRaw; +create table null_02230_ttl engine=Null() as data_02230_ttl; +show create null_02230_ttl format TSVRaw; +drop table data_02230_ttl; +drop table null_02230_ttl; + +drop table if exists data_02230_column_ttl; +drop table if exists null_02230_column_ttl; +create table data_02230_column_ttl (date Date, value Int TTL date + 7, key Int) Engine=MergeTree() order by key TTL date + 14; +show create data_02230_column_ttl format TSVRaw; +create table null_02230_column_ttl engine=Null() as data_02230_column_ttl; +-- check that order of columns is the same +show create null_02230_column_ttl format TSVRaw; +drop table data_02230_column_ttl; +drop table null_02230_column_ttl; From d117ce48938e8f87fd3f0b2c216600f75435c675 Mon Sep 17 00:00:00 2001 From: Hongbin Date: Mon, 28 Feb 2022 16:31:58 +0800 Subject: [PATCH 45/50] fix comments --- src/TableFunctions/TableFunctionS3Cluster.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index a0e4db56fea..1d9821ae7ae 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -13,7 +13,7 @@ namespace DB class Context; /** - * S3 cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) + * s3 cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) * A table function, which allows to process many files from S3 on a specific cluster * On initiator it creates a connection to _all_ nodes in cluster, discloses asterics * in S3 file path and dispatch each file dynamically. From c9bc4421140086a285e5e3b6c239f43dbb39c274 Mon Sep 17 00:00:00 2001 From: Hongbin Date: Mon, 28 Feb 2022 16:44:35 +0800 Subject: [PATCH 46/50] fix comments --- src/Storages/HDFS/StorageHDFS.h | 2 +- src/TableFunctions/TableFunctionS3Cluster.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 8c709c7d4d3..c8ad6ffdeaf 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -150,7 +150,7 @@ private: std::unique_ptr read_buf; std::unique_ptr pipeline; std::unique_ptr reader; - /// OnCancel and generate can be called concurrently. + /// onCancel and generate can be called concurrently. std::mutex reader_mutex; String current_path; diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index 1d9821ae7ae..35d18631ae1 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -13,7 +13,7 @@ namespace DB class Context; /** - * s3 cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) + * s3cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) * A table function, which allows to process many files from S3 on a specific cluster * On initiator it creates a connection to _all_ nodes in cluster, discloses asterics * in S3 file path and dispatch each file dynamically. From 067df156eb41e92ed7e463172695c0c2837570ab Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 28 Feb 2022 11:54:27 +0300 Subject: [PATCH 47/50] Use proper exit value on exit by signal (like in glibc) Signed-off-by: Azat Khuzhin --- src/Client/ClientBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 2829f9e9f46..97950a73b3a 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -233,7 +233,7 @@ public: void interruptSignalHandler(int signum) { if (exit_on_signal.test_and_set()) - _exit(signum); + _exit(128 + signum); } From 895c40686c9fb99b16f2b5102c40a33f556accae Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 28 Feb 2022 12:12:28 +0300 Subject: [PATCH 48/50] Tiny cleanup of ClientBase Signed-off-by: Azat Khuzhin --- src/Client/ClientBase.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 97950a73b3a..4aedb12afcb 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -229,7 +229,7 @@ public: static bool cancelled() { return exit_on_signal.test(); } }; -/// This signal handler is set only for sigint. +/// This signal handler is set only for SIGINT. void interruptSignalHandler(int signum) { if (exit_on_signal.test_and_set()) @@ -243,22 +243,22 @@ ClientBase::ClientBase() = default; void ClientBase::setupSignalHandler() { - exit_on_signal.test_and_set(); + exit_on_signal.test_and_set(); - struct sigaction new_act; - memset(&new_act, 0, sizeof(new_act)); + struct sigaction new_act; + memset(&new_act, 0, sizeof(new_act)); - new_act.sa_handler = interruptSignalHandler; - new_act.sa_flags = 0; + new_act.sa_handler = interruptSignalHandler; + new_act.sa_flags = 0; #if defined(OS_DARWIN) sigemptyset(&new_act.sa_mask); #else - if (sigemptyset(&new_act.sa_mask)) + if (sigemptyset(&new_act.sa_mask)) throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler."); #endif - if (sigaction(SIGINT, &new_act, nullptr)) + if (sigaction(SIGINT, &new_act, nullptr)) throw Exception(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER, "Cannot set signal handler."); } From f66ad2ee482a2df25784a49015c46fbe0e9125d7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 26 Feb 2022 23:40:24 +0300 Subject: [PATCH 49/50] Stop processing multiqueries in clickhouse-client/local on SIGINT Signed-off-by: Azat Khuzhin --- src/Client/ClientBase.cpp | 13 ++++++++----- src/Client/ClientBase.h | 4 +++- ...9_client_stop_multiquery_in_SIGINT.reference | 0 .../02229_client_stop_multiquery_in_SIGINT.sh | 17 +++++++++++++++++ 4 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.reference create mode 100755 tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 4aedb12afcb..7dfa60ad560 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -702,7 +702,6 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa /// Also checks if query execution should be cancelled. void ClientBase::receiveResult(ASTPtr parsed_query) { - bool cancelled = false; QueryInterruptHandler query_interrupt_handler; // TODO: get the poll_interval from commandline. @@ -773,7 +772,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query) /// Receive a part of the result, or progress info or an exception and process it. /// Returns true if one should continue receiving packets. /// Output of result is suppressed if query was cancelled. -bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled) +bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_) { Packet packet = connection->receivePacket(); @@ -783,7 +782,7 @@ bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled) return true; case Protocol::Server::Data: - if (!cancelled) + if (!cancelled_) onData(packet.block, parsed_query); return true; @@ -796,12 +795,12 @@ bool ClientBase::receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled) return true; case Protocol::Server::Totals: - if (!cancelled) + if (!cancelled_) onTotals(packet.block, parsed_query); return true; case Protocol::Server::Extremes: - if (!cancelled) + if (!cancelled_) onExtremes(packet.block, parsed_query); return true; @@ -1265,6 +1264,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin { resetOutput(); have_error = false; + cancelled = false; client_exception.reset(); server_exception.reset(); @@ -1392,6 +1392,9 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( String & query_to_execute, ASTPtr & parsed_query, const String & all_queries_text, std::optional & current_exception) { + if (!is_interactive && cancelled) + return MultiQueryProcessingStage::QUERIES_END; + if (this_query_begin >= all_queries_end) return MultiQueryProcessingStage::QUERIES_END; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 0a11745b996..a92888868a4 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -108,7 +108,7 @@ protected: private: void receiveResult(ASTPtr parsed_query); - bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled); + bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled_); void receiveLogs(ASTPtr parsed_query); bool receiveSampleBlock(Block & out, ColumnsDescription & columns_description, ASTPtr parsed_query); bool receiveEndOfQuery(); @@ -259,6 +259,8 @@ protected: }; std::vector hosts_and_ports{}; + + bool cancelled = false; }; } diff --git a/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.reference b/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh b/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh new file mode 100755 index 00000000000..171dcc52c9c --- /dev/null +++ b/tests/queries/0_stateless/02229_client_stop_multiquery_in_SIGINT.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +timeout -s INT 3s $CLICKHOUSE_CLIENT --max_block_size 1 -nm -q " + SELECT sleep(1) FROM numbers(100) FORMAT Null; + SELECT 'FAIL'; +" + +timeout -s INT 3s $CLICKHOUSE_LOCAL --max_block_size 1 -nm -q " + SELECT sleep(1) FROM numbers(100) FORMAT Null; + SELECT 'FAIL'; +" + +exit 0 From af4362e40a995af76be449dc87728b0a1c174028 Mon Sep 17 00:00:00 2001 From: Filatenkov Artur <58165623+FArthur-cmd@users.noreply.github.com> Date: Mon, 28 Feb 2022 21:30:02 +0300 Subject: [PATCH 50/50] Improve certificate reloader (#34887) * add ec support * Add test * fix tests and improve code style --- src/Server/CertificateReloader.cpp | 2 +- src/Server/CertificateReloader.h | 2 +- .../configs/ECcert.crt | 16 ++++++++++++ .../configs/ECcert.key | 6 +++++ .../test_reload_certificate/test.py | 25 +++++++++++++++++++ 5 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 tests/integration/test_reload_certificate/configs/ECcert.crt create mode 100644 tests/integration/test_reload_certificate/configs/ECcert.key diff --git a/src/Server/CertificateReloader.cpp b/src/Server/CertificateReloader.cpp index f3f366876da..aaffd08365c 100644 --- a/src/Server/CertificateReloader.cpp +++ b/src/Server/CertificateReloader.cpp @@ -37,7 +37,7 @@ int CertificateReloader::setCertificate(SSL * ssl) return -1; SSL_use_certificate(ssl, const_cast(current->cert.certificate())); - SSL_use_RSAPrivateKey(ssl, current->key.impl()->getRSA()); + SSL_use_PrivateKey(ssl, const_cast(static_cast(current->key))); int err = SSL_check_private_key(ssl); if (err != 1) diff --git a/src/Server/CertificateReloader.h b/src/Server/CertificateReloader.h index 7f93b006875..88c732c2db6 100644 --- a/src/Server/CertificateReloader.h +++ b/src/Server/CertificateReloader.h @@ -74,7 +74,7 @@ private: struct Data { Poco::Crypto::X509Certificate cert; - Poco::Crypto::RSAKey key; + Poco::Crypto::EVPPKey key; Data(std::string cert_path, std::string key_path); }; diff --git a/tests/integration/test_reload_certificate/configs/ECcert.crt b/tests/integration/test_reload_certificate/configs/ECcert.crt new file mode 100644 index 00000000000..b87ce0099dc --- /dev/null +++ b/tests/integration/test_reload_certificate/configs/ECcert.crt @@ -0,0 +1,16 @@ +-----BEGIN CERTIFICATE----- +MIICkzCCAhigAwIBAgIUcrahhUuSDdw60Wyfo2E4kVUWWQ8wCgYIKoZIzj0EAwIw +fzELMAkGA1UEBhMCQ04xEzARBgNVBAgMClNvbWUtU3RhdGUxDTALBgNVBAcMBGNp +dHkxEDAOBgNVBAoMB2NvbXBhbnkxEDAOBgNVBAsMB3NlY3Rpb24xEjAQBgNVBAMM +CWxvY2FsaG9zdDEUMBIGCSqGSIb3DQEJARYFZW1haWwwIBcNMjIwMjI3MTg1NzQz +WhgPMjEyMjAyMDMxODU3NDNaMH8xCzAJBgNVBAYTAkNOMRMwEQYDVQQIDApTb21l +LVN0YXRlMQ0wCwYDVQQHDARjaXR5MRAwDgYDVQQKDAdjb21wYW55MRAwDgYDVQQL +DAdzZWN0aW9uMRIwEAYDVQQDDAlsb2NhbGhvc3QxFDASBgkqhkiG9w0BCQEWBWVt +YWlsMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAEgoPY/r89/83zHzmpbsA+kW3YflVQ +tKXO8Kl7ki5q+v1qUu3xmr4HttNxvHLOCfK798KMGg9y+NO5y4D4D2ZgLGxkNt8X +yWvhkbe3xKdGSqBpplbLT+M9FtmQ6tzzzFJVo1MwUTAdBgNVHQ4EFgQUmpLPeJBD +ID5s1AeWsVIEt6Z/ca0wHwYDVR0jBBgwFoAUmpLPeJBDID5s1AeWsVIEt6Z/ca0w +DwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAgNpADBmAjEAv4uNU4NgprBgNQxk +fIZpJCf/TpisuVsLUHXl8JrMVKKVUf7zr59GH2yiOoukfD5hAjEAlCohSA6/Ken4 +JWkKPCrfnsBZ7VX8Y+4ZqLKuG+IGAu2vQTg+Jc6M23M1vEgi1dqf +-----END CERTIFICATE----- diff --git a/tests/integration/test_reload_certificate/configs/ECcert.key b/tests/integration/test_reload_certificate/configs/ECcert.key new file mode 100644 index 00000000000..b127f8a53fe --- /dev/null +++ b/tests/integration/test_reload_certificate/configs/ECcert.key @@ -0,0 +1,6 @@ +-----BEGIN PRIVATE KEY----- +MIG2AgEAMBAGByqGSM49AgEGBSuBBAAiBIGeMIGbAgEBBDAJbfB78wfRHn5A4x3e +EAqrFk/hbBD+c8snbFgjQqxg4qTcp154Rc01B9V0US27MJuhZANiAASCg9j+vz3/ +zfMfOaluwD6Rbdh+VVC0pc7wqXuSLmr6/WpS7fGavge203G8cs4J8rv3wowaD3L4 +07nLgPgPZmAsbGQ23xfJa+GRt7fEp0ZKoGmmVstP4z0W2ZDq3PPMUlU= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_reload_certificate/test.py b/tests/integration/test_reload_certificate/test.py index dc0c391d6f0..d37fd1bccbc 100644 --- a/tests/integration/test_reload_certificate/test.py +++ b/tests/integration/test_reload_certificate/test.py @@ -6,6 +6,7 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) cluster = ClickHouseCluster(__file__) node = cluster.add_instance('node', main_configs=["configs/first.crt", "configs/first.key", "configs/second.crt", "configs/second.key", + "configs/ECcert.crt", "configs/ECcert.key", "configs/cert.xml"]) @pytest.fixture(scope="module", autouse=True) @@ -73,3 +74,27 @@ def test_first_than_second_cert(): assert False except: assert True + +def test_ECcert_reload(): + # Set first key + change_config_to_key('first') + + # Command with correct certificate + assert node.exec_in_container(['curl', '--silent', '--cacert', '/etc/clickhouse-server/config.d/{cur_name}.crt'.format(cur_name='first'), + 'https://localhost:8443/']) == 'Ok.\n' + + # Change to other key + change_config_to_key('ECcert') + + # Command with correct certificate + assert node.exec_in_container(['curl', '--silent', '--cacert', '/etc/clickhouse-server/config.d/{cur_name}.crt'.format(cur_name='ECcert'), + 'https://localhost:8443/']) == 'Ok.\n' + + # Command with wrong certificate + # Same as previous + try: + node.exec_in_container(['curl', '--silent', '--cacert', '/etc/clickhouse-server/config.d/{cur_name}.crt'.format(cur_name='first'), + 'https://localhost:8443/']) + assert False + except: + assert True