From 3ce733121131da78380bab99db545dff00abf21b Mon Sep 17 00:00:00 2001 From: George Date: Tue, 7 Sep 2021 20:07:27 +0300 Subject: [PATCH 001/438] Edited and translated to Russian --- .../table-engines/integrations/mongodb.md | 4 ++-- .../external-dicts-dict-layout.md | 5 +++-- .../table-engines/integrations/mongodb.md | 20 ++++++++++++++++--- docs/ru/operations/settings/settings-users.md | 2 +- .../external-dicts-dict-layout.md | 20 ++++++++++++++++--- .../sql-reference/statements/create/user.md | 1 + 6 files changed, 41 insertions(+), 11 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index 9839893d4e8..899db9ac6ae 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -36,7 +36,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name Create a table in ClickHouse which allows to read data from MongoDB collection: -``` text +``` sql CREATE TABLE mongo_table ( key UInt64, @@ -46,7 +46,7 @@ CREATE TABLE mongo_table To read from an SSL secured MongoDB server: -``` text +``` sql CREATE TABLE mongo_table_ssl ( key UInt64, diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 00a9610ce91..eb8ca425d24 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -300,8 +300,9 @@ When searching for a dictionary, the cache is searched first. For each block of If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`. -For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired, and it is re-requested the next time it needs to be used this behaviour can be configured with setting `allow_read_expired_keys`. -This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table. +For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`. + +This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source. diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md index 05820d03fe6..0fd463d511a 100644 --- a/docs/ru/engines/table-engines/integrations/mongodb.md +++ b/docs/ru/engines/table-engines/integrations/mongodb.md @@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name name1 [type1], name2 [type2], ... -) ENGINE = MongoDB(host:port, database, collection, user, password); +) ENGINE = MongoDB(host:port, database, collection, user, password [, options]); ``` **Параметры движка** @@ -30,11 +30,13 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name - `password` — пароль пользователя. +- `options` — MongoDB connection string options (optional parameter). + ## Примеры использования {#usage-example} -Таблица в ClickHouse для чтения данных из колекции MongoDB: +Создание таблицы в ClickHouse для чтения данных из колекции MongoDB: -``` text +``` sql CREATE TABLE mongo_table ( key UInt64, @@ -42,6 +44,18 @@ CREATE TABLE mongo_table ) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse'); ``` +Чтение из сервера MongoDB, защищенного SSL: + +``` sql +CREATE TABLE mongo_table_ssl +( + key UInt64, + data String +) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'ssl=true'); +``` + + + Запрос к таблице: ``` sql diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index 6a10e518817..0f85f22f7ea 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -28,7 +28,7 @@ toc_title: "Настройки пользователей" profile_name default - + default_database>default diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 06fe4ae327a..b61c2cbcbd7 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -297,9 +297,13 @@ RANGE(MIN StartDate MAX EndDate); При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. Затем, полученные данные записываются в кэш. -Для cache-словарей может быть задано время устаревания [lifetime](external-dicts-dict-lifetime.md) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, и будет запрошено заново при следующей необходимости его использовать. +Если ключи не были найдены в словаре, то создается задание для обновления кэша, которое добавляется в очередь обновлений. Параметры очереди обновлений можно устанавливать настройками `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates` -Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице `system.dictionaries`. +Для cache-словарей может быть задано время устаревания [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, а ключ устаревает. Ключ будет запрошен заново при следующей необходимости его использовать. Это можно настроить с помощью `allow_read_expired_keys`. + +Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице [system.dictionaries](../../../operations/system-tables/dictionaries.md). + +Если параметр `allow_read_expired_keys` выставлен на 1 (0 по умолчанию), то словарь поддерживает асинхронные обновления. Если клиент запрашивает ключи, которые находятся в кэше, но при этом некоторые из них устарели, то словарь вернет устаревшие ключи клиенту и запросит их асинхронно у источника. Чтобы увеличить производительность кэша, используйте подзапрос с `LIMIT`, а снаружи вызывайте функцию со словарём. @@ -312,6 +316,16 @@ RANGE(MIN StartDate MAX EndDate); 1000000000 + + 0 + + 100000 + + 10 + + 60000 + + 4 ``` @@ -338,7 +352,7 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) ### ssd_cache {#ssd-cache} -Похож на `cache`, но хранит данные на SSD и индекс в оперативной памяти. +Похож на `cache`, но хранит данные на SSD и индекс в оперативной памяти. Все параметры, относящиеся к очереди обновлений, могут также быть применены к SSD-кэш словарям. ``` xml diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index 22efaa71bfc..f6248d97ba9 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -15,6 +15,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] + [DEFAULT DATABASE database | NONE] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...] ``` From 8f328ca6c4bb07c5918e9ad0059101d5a715638e Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Fri, 10 Sep 2021 03:26:10 +0300 Subject: [PATCH 002/438] Apply suggestions from code review Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/engines/table-engines/integrations/mongodb.md | 4 ++-- docs/ru/operations/settings/settings-users.md | 2 +- .../external-dictionaries/external-dicts-dict-layout.md | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md index 0fd463d511a..7a56af4c274 100644 --- a/docs/ru/engines/table-engines/integrations/mongodb.md +++ b/docs/ru/engines/table-engines/integrations/mongodb.md @@ -34,7 +34,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ## Примеры использования {#usage-example} -Создание таблицы в ClickHouse для чтения данных из колекции MongoDB: +Создание таблицы в ClickHouse для чтения данных из коллекции MongoDB: ``` sql CREATE TABLE mongo_table @@ -44,7 +44,7 @@ CREATE TABLE mongo_table ) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse'); ``` -Чтение из сервера MongoDB, защищенного SSL: +Чтение с сервера MongoDB, защищенного SSL: ``` sql CREATE TABLE mongo_table_ssl diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index 0f85f22f7ea..4570ce38bad 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -28,7 +28,7 @@ toc_title: "Настройки пользователей" profile_name default - default_database>default + default diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index b61c2cbcbd7..f0b4eb614c5 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -297,13 +297,13 @@ RANGE(MIN StartDate MAX EndDate); При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. Затем, полученные данные записываются в кэш. -Если ключи не были найдены в словаре, то создается задание для обновления кэша, которое добавляется в очередь обновлений. Параметры очереди обновлений можно устанавливать настройками `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates` +Если ключи не были найдены в словаре, то для обновления кэша создается задание и добавляется в очередь обновлений. Параметры очереди обновлений можно устанавливать настройками `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates` Для cache-словарей может быть задано время устаревания [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, а ключ устаревает. Ключ будет запрошен заново при следующей необходимости его использовать. Это можно настроить с помощью `allow_read_expired_keys`. -Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице [system.dictionaries](../../../operations/system-tables/dictionaries.md). +Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа `cache` показывает высокую производительность лишь при достаточно большой частоте успешных обращений (рекомендуется 99% и выше). Посмотреть среднюю частоту успешных обращений (`hit rate`) можно в таблице [system.dictionaries](../../../operations/system-tables/dictionaries.md). -Если параметр `allow_read_expired_keys` выставлен на 1 (0 по умолчанию), то словарь поддерживает асинхронные обновления. Если клиент запрашивает ключи, которые находятся в кэше, но при этом некоторые из них устарели, то словарь вернет устаревшие ключи клиенту и запросит их асинхронно у источника. +Если параметр `allow_read_expired_keys` выставлен в 1 (0 по умолчанию), то словарь поддерживает асинхронные обновления. Если клиент запрашивает ключи, которые находятся в кэше, но при этом некоторые из них устарели, то словарь вернет устаревшие ключи клиенту и запросит их асинхронно у источника. Чтобы увеличить производительность кэша, используйте подзапрос с `LIMIT`, а снаружи вызывайте функцию со словарём. @@ -352,7 +352,7 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) ### ssd_cache {#ssd-cache} -Похож на `cache`, но хранит данные на SSD и индекс в оперативной памяти. Все параметры, относящиеся к очереди обновлений, могут также быть применены к SSD-кэш словарям. +Похож на `cache`, но хранит данные на SSD, а индекс в оперативной памяти. Все параметры, относящиеся к очереди обновлений, могут также быть применены к SSD-кэш словарям. ``` xml From ae5ee23c83e75035653f2571540474ee5e661f07 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 12:12:11 +0300 Subject: [PATCH 003/438] Trying self hosted action --- .github/workflows/hello-world.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/hello-world.yml diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml new file mode 100644 index 00000000000..5dd0429bce7 --- /dev/null +++ b/.github/workflows/hello-world.yml @@ -0,0 +1,23 @@ +name: GitHub Actions Hello self hosted +on: + push: + branches: + - master + pull_request: + branches: + - master +jobs: + Explore-GitHub-Actions: + runs-on: [self-hosted] + steps: + - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by me!" + - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." + - name: Check out repository code + uses: actions/checkout@v2 + - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." + - run: echo "🖥️ The workflow is now ready to test your code on the runner." + - name: List files in the repository + run: | + ls ${{ github.workspace }} + - run: echo "🍏 This job's status is ${{ job.status }}." From 9dc7e00c2e2bb69b1429b18cbb27c05dcb6c3561 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 12:33:41 +0300 Subject: [PATCH 004/438] Trying docker --- .github/workflows/hello-world.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 5dd0429bce7..ed0cf36547e 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -13,6 +13,7 @@ jobs: - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by me!" - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." + - run: docker run hello-world - name: Check out repository code uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." From 4014b5c11177c44cc6f4c85e7d9edf447ee4deb1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 12:55:31 +0300 Subject: [PATCH 005/438] Test --- .github/workflows/hello-world.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index ed0cf36547e..8ba33da6d5d 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,6 +18,7 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." + - run: docker run --cap-add=SYS_PTRACE --volume=${{ github.workspace }}:/ClickHouse --volume=${{ github.workspace }}:/test_output clickhouse/style-test:latest - name: List files in the repository run: | ls ${{ github.workspace }} From b4107784f14552d7e26e5fab05e3c85c6ea7de65 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 13:01:02 +0300 Subject: [PATCH 006/438] Better --- .github/workflows/hello-world.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 8ba33da6d5d..9ef1c19fd3a 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,8 +18,10 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - run: docker run --cap-add=SYS_PTRACE --volume=${{ github.workspace }}:/ClickHouse --volume=${{ github.workspace }}:/test_output clickhouse/style-test:latest + - run: mkdir -p $RUNNER_TEMP/style_check_result + - run: docker run --cap-add=SYS_PTRACE --volume=$GITHUB_WORKSPACE:/ClickHouse --volume=$RUNNER_TEMP/style_check_result:/test_output clickhouse/style-test:latest - name: List files in the repository run: | ls ${{ github.workspace }} + ls $RUNNER_TEMP/style_check_result - run: echo "🍏 This job's status is ${{ job.status }}." From b6219376e334b3049ecb802a9b37ff4c4e79a7f9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 14:52:21 +0300 Subject: [PATCH 007/438] Trying style check --- .github/workflows/hello-world.yml | 9 +- tests/ci/report.py | 298 ++++++++++++++++++++++++++++++ tests/ci/style_check.py | 64 +++++++ 3 files changed, 368 insertions(+), 3 deletions(-) create mode 100644 tests/ci/report.py create mode 100644 tests/ci/style_check.py diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 9ef1c19fd3a..36bd25c8ad3 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,10 +18,13 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - run: mkdir -p $RUNNER_TEMP/style_check_result - - run: docker run --cap-add=SYS_PTRACE --volume=$GITHUB_WORKSPACE:/ClickHouse --volume=$RUNNER_TEMP/style_check_result:/test_output clickhouse/style-test:latest + - run: cd $GITHUB_WORKSPACE/test/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} - ls $RUNNER_TEMP/style_check_result + ls $RUNNER_TEMP + - uses: actions/upload-artifact@v2 + with: + name: report + path: $RUNNER_TEMP/report.html - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/tests/ci/report.py b/tests/ci/report.py new file mode 100644 index 00000000000..94defcfd648 --- /dev/null +++ b/tests/ci/report.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- +import os +import datetime + +### FIXME: BEST FRONTEND PRACTICIES BELOW + +HTML_BASE_TEST_TEMPLATE = """ + + + + {title} + + +
+ +

{header}

+ +{test_part} + + + +""" + +HTML_TEST_PART = """ + + +{headers} + +{rows} +
+""" + +BASE_HEADERS = ['Test name', 'Test status'] + + +def _format_header(header, branch_name, branch_url=None): + result = ' '.join([w.capitalize() for w in header.split(' ')]) + result = result.replace("Clickhouse", "ClickHouse") + result = result.replace("clickhouse", "ClickHouse") + if 'ClickHouse' not in result: + result = 'ClickHouse ' + result + result += ' for ' + if branch_url: + result += '{name}'.format(url=branch_url, name=branch_name) + else: + result += branch_name + return result + + +def _get_status_style(status): + style = "font-weight: bold;" + if status in ('OK', 'success', 'PASSED'): + style += 'color: #0A0;' + elif status in ('FAIL', 'failure', 'error', 'FAILED', 'Timeout'): + style += 'color: #F00;' + else: + style += 'color: #FFB400;' + return style + + +def _get_html_url(url): + if isinstance(url, str): + return '{name}'.format(url=url, name=os.path.basename(url)) + if isinstance(url, tuple): + return '{name}'.format(url=url[0], name=url[1]) + return '' + + +def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=[]): + if test_result: + rows_part = "" + num_fails = 0 + has_test_time = False + has_test_logs = False + for result in test_result: + test_name = result[0] + test_status = result[1] + + test_logs = None + test_time = None + if len(result) > 2: + test_time = result[2] + has_test_time = True + + if len(result) > 3: + test_logs = result[3] + has_test_logs = True + + row = "" + row += "" + test_name + "" + style = _get_status_style(test_status) + + # Allow to quickly scroll to the first failure. + is_fail = test_status == "FAIL" or test_status == 'FLAKY' + is_fail_id = "" + if is_fail: + num_fails = num_fails + 1 + is_fail_id = 'id="fail' + str(num_fails) + '" ' + + row += ''.format(style) + test_status + "" + + if test_time is not None: + row += "" + test_time + "" + + if test_logs is not None: + test_logs_html = "
".join([_get_html_url(url) for url in test_logs]) + row += "" + test_logs_html + "" + + row += "" + rows_part += row + + headers = BASE_HEADERS + if has_test_time: + headers.append('Test time, sec.') + if has_test_logs: + headers.append('Logs') + + headers = ''.join(['' + h + '' for h in headers]) + test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part) + else: + test_part = "" + + additional_html_urls = "" + for url in additional_urls: + additional_html_urls += ' ' + _get_html_url(url) + + result = HTML_BASE_TEST_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + raw_log_name=os.path.basename(raw_log_url), + raw_log_url=raw_log_url, + task_url=task_url, + test_part=test_part, + branch_name=branch_name, + commit_url=commit_url, + additional_urls=additional_html_urls + ) + return result + + +HTML_BASE_BUILD_TEMPLATE = """ + + + + +{title} + + +
+

{header}

+ + + + + + + + + + + + +{rows} +
CompilerBuild typeSanitizerBundledSplittedStatusBuild logBuild timeArtifacts
+ + + +""" + +LINK_TEMPLATE = '{text}' + + +def create_build_html_report(header, build_results, build_logs_urls, artifact_urls_list, task_url, branch_url, branch_name, commit_url): + rows = "" + for (build_result, build_log_url, artifact_urls) in zip(build_results, build_logs_urls, artifact_urls_list): + row = "" + row += "{}".format(build_result.compiler) + if build_result.build_type: + row += "{}".format(build_result.build_type) + else: + row += "{}".format("relwithdebuginfo") + if build_result.sanitizer: + row += "{}".format(build_result.sanitizer) + else: + row += "{}".format("none") + + row += "{}".format(build_result.bundled) + row += "{}".format(build_result.splitted) + + if build_result.status: + style = _get_status_style(build_result.status) + row += '{}'.format(style, build_result.status) + else: + style = _get_status_style("error") + row += '{}'.format(style, "error") + + row += 'link'.format(build_log_url) + + if build_result.elapsed_seconds: + delta = datetime.timedelta(seconds=build_result.elapsed_seconds) + else: + delta = 'unknown' + + row += '{}'.format(str(delta)) + + links = "" + link_separator = "
" + if artifact_urls: + for artifact_url in artifact_urls: + links += LINK_TEMPLATE.format(text=os.path.basename(artifact_url), url=artifact_url) + links += link_separator + if links: + links = links[:-len(link_separator)] + row += "{}".format(links) + + row += "" + rows += row + return HTML_BASE_BUILD_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + rows=rows, + task_url=task_url, + branch_name=branch_name, + commit_url=commit_url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py new file mode 100644 index 00000000000..671bd2c6893 --- /dev/null +++ b/tests/ci/style_check.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +from report import create_test_html_report +import logging +import subprocess +import os +import csv + + +def process_result(result_folder): + test_results = [] + additional_files = [] + # Just upload all files from result_folder. + # If task provides processed results, then it's responsible for content of result_folder. + if os.path.exists(result_folder): + test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + additional_files = [os.path.join(result_folder, f) for f in test_files] + + status_path = os.path.join(result_folder, "check_status.tsv") + logging.info("Found test_results.tsv") + status = list(csv.reader(open(status_path, 'r'), delimiter='\t')) + if len(status) != 1 or len(status[0]) != 2: + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + try: + results_path = os.path.join(result_folder, "test_results.tsv") + test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t')) + if len(test_results) == 0: + raise Exception("Empty results") + + return state, description, test_results, additional_files + except Exception: + if state == "success": + state, description = "error", "Failed to read test_results.tsv" + return state, description, test_results, additional_files + +def get_pr_url_from_ref(ref): + try: + return ref.split("/")[2] + except: + return "master" + +if __name__ == "__main__": + repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + temp_path = os.getenv("RUNNER_TEMP", os.path.abspath("./temp")) + run_id = os.getenv("GITHUB_RUN_ID", 0) + commit_sha = os.getenv("GITHUB_SHA", 0) + ref = os.getenv("GITHUB_REF", "") + docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) + state, description, test_results, additional_files = process_result(temp_path) + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(get_pr_url_from_ref(ref)) + branch_name = "PR #" + str(get_pr_url_from_ref(ref)) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + raw_log_url = "noop" + + html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) + with open(os.path.join(temp_path, 'report.html'), 'w') as f: + f.write(html_report) From 2931810dfa39aad1994bebccc4c7318d4377ea29 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 14:53:34 +0300 Subject: [PATCH 008/438] Fix --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 36bd25c8ad3..3868dfe0cad 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,7 +18,7 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - run: cd $GITHUB_WORKSPACE/test/ci && python3 style_check.py + - run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} From 499e713959edae2281f018389705d3749d0e7979 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:12:47 +0300 Subject: [PATCH 009/438] Trying other way --- .github/workflows/hello-world.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 3868dfe0cad..724d1d742cc 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,5 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: $RUNNER_TEMP/report.html + path: ${{ env.RUNNER_TEMP }}/report.html + - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From 53fe271c2ef1f1525a2f81bb9573c7f8fc419e05 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:15:31 +0300 Subject: [PATCH 010/438] One more time --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 724d1d742cc..ed822c32d40 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: ${{ env.RUNNER_TEMP }}/report.html + path: $RUNNER_TEMP/report.html - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From 1991e0a52836cc198829fbfbcc53ecc518f332d7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:18:58 +0300 Subject: [PATCH 011/438] One more --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index ed822c32d40..2917a6bb31b 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: $RUNNER_TEMP/report.html + path: $GITHUB_WORKSPACE/tests/ci/style_check.py - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From ee32c34d9a94901e1af6393b7a3ddd7aa21053b2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:23:04 +0300 Subject: [PATCH 012/438] Something wrong --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 2917a6bb31b..6c1c8e0dd85 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: $GITHUB_WORKSPACE/tests/ci/style_check.py + path: ${{ github.workspace }}/tests/ci/style_check.py - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From 686f8b4f09a24ee4b8b5e31274a15eef56c1fc36 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:40:13 +0300 Subject: [PATCH 013/438] One more try --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 6c1c8e0dd85..a81cc31fff0 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: ${{ github.workspace }}/tests/ci/style_check.py + path: ${{ runner.temp }}/report.hml - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From e8c0f357080eaed23671b4a5b2801f65b9fa8f75 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:43:55 +0300 Subject: [PATCH 014/438] Report html --- .github/workflows/hello-world.yml | 2 +- tests/ci/style_check.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index a81cc31fff0..8ef331a2564 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: ${{ runner.temp }}/report.hml + path: report.hml - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 671bd2c6893..9fd55b372c2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -60,5 +60,5 @@ if __name__ == "__main__": raw_log_url = "noop" html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) - with open(os.path.join(temp_path, 'report.html'), 'w') as f: + with open('report.html', 'w') as f: f.write(html_report) From 03c6a31e7c1528cb3b10cbedc6c21c17bc753b2f Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:49:52 +0300 Subject: [PATCH 015/438] Fix --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 8ef331a2564..c1ba922ff92 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -26,6 +26,6 @@ jobs: - uses: actions/upload-artifact@v2 with: name: report - path: report.hml + path: ${{ runner.temp }}/report.html - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." From bb778cc0fe62be23635a3c60d719fedd68bd301d Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 15:50:24 +0300 Subject: [PATCH 016/438] Followup --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9fd55b372c2..671bd2c6893 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -60,5 +60,5 @@ if __name__ == "__main__": raw_log_url = "noop" html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) - with open('report.html', 'w') as f: + with open(os.path.join(temp_path, 'report.html'), 'w') as f: f.write(html_report) From 7538f6f1686bd06e3b065ab69e88311b2a790bfd Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 17:27:03 +0300 Subject: [PATCH 017/438] Better stylecheck --- .github/workflows/hello-world.yml | 7 ++- tests/ci/compress_files.py | 51 ++++++++++++++++ tests/ci/s3_helper.py | 99 +++++++++++++++++++++++++++++++ tests/ci/style_check.py | 57 +++++++++++++++--- 4 files changed, 204 insertions(+), 10 deletions(-) create mode 100644 tests/ci/compress_files.py create mode 100644 tests/ci/s3_helper.py diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index c1ba922ff92..155e9487ff4 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -1,4 +1,5 @@ name: GitHub Actions Hello self hosted +desction: Trying GithubActions on: push: branches: @@ -18,7 +19,11 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + - name: Style Check + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py new file mode 100644 index 00000000000..f095b04872b --- /dev/null +++ b/tests/ci/compress_files.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import subprocess +import logging +import os + +def compress_file_fast(path, archive_path): + if os.path.exists('/usr/bin/pigz'): + subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True) + else: + subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True) + + +def compress_fast(path, archive_path, exclude=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster") + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, compressing with default tar") + + if exclude is None: + exclude_part = "" + elif isinstance(exclude, list): + exclude_part = " ".join(["--exclude {}".format(x) for x in exclude]) + else: + exclude_part = "--exclude {}".format(str(exclude)) + + fname = os.path.basename(path) + if os.path.isfile(path): + path = os.path.dirname(path) + else: + path += "/.." + cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname) + logging.debug("compress_fast cmd:{}".format(cmd)) + subprocess.check_call(cmd, shell=True) + + +def decompress_fast(archive_path, result_path=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster ('{}' -> '{}')".format(archive_path, result_path)) + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, decompressing with default tar ('{}' -> '{}')".format(archive_path, result_path)) + + if result_path is None: + subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True) + else: + subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py new file mode 100644 index 00000000000..8a170da44f8 --- /dev/null +++ b/tests/ci/s3_helper.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +import hashlib +import logging +import os +import boto3 +from botocore.exceptions import ClientError, BotoCoreError +from multiprocessing.dummy import Pool +from compress_files import compress_file_fast + +def _md5(fname): + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + logging.debug("MD5 for {} is {}".format(fname, hash_md5.hexdigest())) + return hash_md5.hexdigest() + + +def _flatten_list(lst): + result = [] + for elem in lst: + if isinstance(elem, list): + result += _flatten_list(elem) + else: + result.append(elem) + return result + + +class S3Helper(object): + def __init__(self, host, aws_access_key_id, aws_secret_access_key): + self.session = boto3.session.Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) + self.client = self.session.client('s3', endpoint_url=host) + + def _upload_file_to_s3(self, bucket_name, file_path, s3_path): + logging.debug("Start uploading {} to bucket={} path={}".format(file_path, bucket_name, s3_path)) + metadata = {} + if os.path.getsize(file_path) < 64 * 1024 * 1024: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + metadata['ContentType'] = "text/plain; charset=utf-8" + logging.info("Content type %s for file path %s", "text/plain; charset=utf-8", file_path) + elif s3_path.endswith("html"): + metadata['ContentType'] = "text/html; charset=utf-8" + logging.info("Content type %s for file path %s", "text/html; charset=utf-8", file_path) + else: + logging.info("No content type provied for %s", file_path) + else: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + logging.info("Going to compress file log file %s to %s", file_path, file_path + ".gz") + compress_file_fast(file_path, file_path + ".gz") + file_path += ".gz" + s3_path += ".gz" + else: + logging.info("Processing file without compression") + logging.info("File is too large, do not provide content type") + + self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) + logging.info("Upload {} to {}. Meta: {}".format(file_path, s3_path, metadata)) + return "https://storage.yandexcloud.net/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) + + def upload_test_report_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-test-reports', file_path, s3_path) + + def upload_build_file_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-builds', file_path, s3_path) + + def _upload_folder_to_s3(self, folder_path, s3_folder_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks): + logging.info("Upload folder '{}' to bucket={} of s3 folder '{}'".format(folder_path, bucket_name, s3_folder_path)) + if not os.path.exists(folder_path): + return [] + files = os.listdir(folder_path) + if not files: + return [] + + p = Pool(min(len(files), 30)) + + def task(file_name): + full_fs_path = os.path.join(folder_path, file_name) + if keep_dirs_in_s3_path: + full_s3_path = s3_folder_path + "/" + os.path.basename(folder_path) + else: + full_s3_path = s3_folder_path + + if os.path.isdir(full_fs_path): + return self._upload_folder_to_s3(full_fs_path, full_s3_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks) + + if os.path.islink(full_fs_path): + if upload_symlinks: + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + return [] + + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + + return sorted(_flatten_list(list(p.map(task, files)))) + + def upload_build_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-builds', keep_dirs_in_s3_path, upload_symlinks) + + def upload_test_folder_to_s3(self, folder_path, s3_folder_path): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-test-reports', True, True) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 671bd2c6893..05274e78386 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -4,6 +4,19 @@ import logging import subprocess import os import csv +from s3_helper import S3Helper + + +def process_logs(s3_client, additional_logs, s3_path_prefix): + additional_urls = [] + for log_path in additional_logs: + if log_path: + additional_urls.append( + s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path))) + + return additional_urls def process_result(result_folder): @@ -34,6 +47,31 @@ def process_result(result_folder): state, description = "error", "Failed to read test_results.tsv" return state, description, test_results, additional_files +def upload_results(s3_client, pr_number, commit_sha, state, description, test_results, additional_files): + s3_path_prefix = f"{pr_number}/{commit_sha}/style_check" + additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) + + # Add link to help. Anchors in the docs must be adjusted accordingly. + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + + raw_log_url = additional_urls[0] + additional_urls.pop(0) + + html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + + def get_pr_url_from_ref(ref): try: return ref.split("/")[2] @@ -41,24 +79,25 @@ def get_pr_url_from_ref(ref): return "master" if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) temp_path = os.getenv("RUNNER_TEMP", os.path.abspath("./temp")) run_id = os.getenv("GITHUB_RUN_ID", 0) commit_sha = os.getenv("GITHUB_SHA", 0) ref = os.getenv("GITHUB_REF", "") + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") + aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + if not aws_secret_key_id or not aws_secret_key: + logging.info("No secrets, will not upload anything to S3") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) if not os.path.exists(temp_path): os.makedirs(temp_path) subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) - state, description, test_results, additional_files = process_result(temp_path) - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" - branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(get_pr_url_from_ref(ref)) - branch_name = "PR #" + str(get_pr_url_from_ref(ref)) - commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" - raw_log_url = "noop" - html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) - with open(os.path.join(temp_path, 'report.html'), 'w') as f: - f.write(html_report) + state, description, test_results, additional_files = process_result(temp_path) + upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) From f14ee387d55fe7bd734c258d10c7c0a6b738762c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 17:28:05 +0300 Subject: [PATCH 018/438] Fix --- .github/workflows/hello-world.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 155e9487ff4..f8c5499fddd 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -1,5 +1,5 @@ name: GitHub Actions Hello self hosted -desction: Trying GithubActions +description: Trying GithubActions on: push: branches: From d353fd1a3d65655a97bde7cd16cbd566fdf9ada5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 17:28:57 +0300 Subject: [PATCH 019/438] Remove description --- .github/workflows/hello-world.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index f8c5499fddd..bb89fd7bea7 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -1,5 +1,4 @@ name: GitHub Actions Hello self hosted -description: Trying GithubActions on: push: branches: From efaf9e758350027f069e6dd70d98b687a3325925 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Sep 2021 17:38:34 +0300 Subject: [PATCH 020/438] Upload from separate dir --- .github/workflows/hello-world.yml | 4 ---- tests/ci/style_check.py | 6 +++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index bb89fd7bea7..de2419ea506 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -27,9 +27,5 @@ jobs: run: | ls ${{ github.workspace }} ls $RUNNER_TEMP - - uses: actions/upload-artifact@v2 - with: - name: report - path: ${{ runner.temp }}/report.html - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 05274e78386..e527baecfe5 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 from report import create_test_html_report +import shutil import logging import subprocess import os @@ -81,7 +82,7 @@ def get_pr_url_from_ref(ref): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) - temp_path = os.getenv("RUNNER_TEMP", os.path.abspath("./temp")) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') run_id = os.getenv("GITHUB_RUN_ID", 0) commit_sha = os.getenv("GITHUB_SHA", 0) ref = os.getenv("GITHUB_REF", "") @@ -94,6 +95,9 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + if not os.path.exists(temp_path): os.makedirs(temp_path) From 55d6c4e196986888031aca15493988134e8a3019 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 11:37:29 +0300 Subject: [PATCH 021/438] Trying to update check --- .github/workflows/hello-world.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index de2419ea506..1595e23a675 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -29,3 +29,9 @@ jobs: ls $RUNNER_TEMP - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." + - name: "Trying to update check link" + run: | + curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.run_id }} \ + --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ + --header 'content-type: application/json' \ + -d '{"name" : "hello-world-name"}' From 2fa9c93b6b3c811dc5f206e1fe32875e6202463b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 11:39:15 +0300 Subject: [PATCH 022/438] Fix spaces --- .github/workflows/hello-world.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 1595e23a675..08e9599649e 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -31,7 +31,7 @@ jobs: - run: echo "🍏 This job's status is ${{ job.status }}." - name: "Trying to update check link" run: | - curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.run_id }} \ + curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.run_id }} \ --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ --header 'content-type: application/json' \ - -d '{"name" : "hello-world-name"}' + -d '{"name" : "hello-world-name"}' From 2e3fad449ac9b908ab66d8f1a47dada3140df77f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 11:49:21 +0300 Subject: [PATCH 023/438] Trying more --- .github/workflows/hello-world.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 08e9599649e..ab7cb75205d 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -18,11 +18,11 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - - name: Style Check - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + #- name: Style Check + # env: + # YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + # YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + # run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} @@ -31,7 +31,13 @@ jobs: - run: echo "🍏 This job's status is ${{ job.status }}." - name: "Trying to update check link" run: | - curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.run_id }} \ + curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.job }} \ + --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ + --header 'content-type: application/json' \ + -d '{"name" : "hello-world-name"}' + - name: "Trying to update check link" + run: | + curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.action }} \ --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ --header 'content-type: application/json' \ -d '{"name" : "hello-world-name"}' From ebdd63aeca06d9bdb1ad0df04c3d478e335549cc Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:02:38 +0300 Subject: [PATCH 024/438] Trying other way --- .github/workflows/hello-world.yml | 25 +++++++------------------ tests/ci/style_check.py | 17 ++++++++++++++++- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index ab7cb75205d..53fc1b64ff6 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -7,7 +7,7 @@ on: branches: - master jobs: - Explore-GitHub-Actions: + Style Check: runs-on: [self-hosted] steps: - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." @@ -18,26 +18,15 @@ jobs: uses: actions/checkout@v2 - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - run: echo "🖥️ The workflow is now ready to test your code on the runner." - #- name: Style Check - # env: - # YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - # YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - # run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + - name: Style Check + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - name: List files in the repository run: | ls ${{ github.workspace }} ls $RUNNER_TEMP - run: ls -la $RUNNER_TEMP - run: echo "🍏 This job's status is ${{ job.status }}." - - name: "Trying to update check link" - run: | - curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.job }} \ - --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ - --header 'content-type: application/json' \ - -d '{"name" : "hello-world-name"}' - - name: "Trying to update check link" - run: | - curl --request PATCH --url https://api.github.com/repos/${{ github.repository }}/check-runs/${{ github.action }} \ - --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \ - --header 'content-type: application/json' \ - -d '{"name" : "hello-world-name"}' diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index e527baecfe5..75fa1fefadf 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +from github import Github from report import create_test_html_report import shutil import logging @@ -7,6 +8,8 @@ import os import csv from s3_helper import S3Helper +NAME = "Style Check" + def process_logs(s3_client, additional_logs, s3_path_prefix): additional_urls = [] @@ -71,6 +74,7 @@ def upload_results(s3_client, pr_number, commit_sha, state, description, test_re url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") logging.info("Search result in url %s", url) + return url def get_pr_url_from_ref(ref): @@ -79,6 +83,12 @@ def get_pr_url_from_ref(ref): except: return "master" +def get_check(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + check = list(commit.get_check_runs(NAME))[0] + return check + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) @@ -89,6 +99,10 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + gh = Github(os.getenv("GITHUB_TOKEN")) + check = get_check(gh, commit_sha) + check.edit(name="Test style check") + docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") if not aws_secret_key_id or not aws_secret_key: logging.info("No secrets, will not upload anything to S3") @@ -104,4 +118,5 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) - upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) + report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) + check.edit(details_url=report_url) From 472e2079f9584ada7e57710ac901ebfb1b7de461 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:03:47 +0300 Subject: [PATCH 025/438] Fix more --- .github/workflows/hello-world.yml | 2 +- tests/ci/style_check.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 53fc1b64ff6..97442d0a419 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -7,7 +7,7 @@ on: branches: - master jobs: - Style Check: + Style-Check: runs-on: [self-hosted] steps: - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 75fa1fefadf..1084043000a 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -8,7 +8,7 @@ import os import csv from s3_helper import S3Helper -NAME = "Style Check" +NAME = "Style-Check" def process_logs(s3_client, additional_logs, s3_path_prefix): From 4da991e8c9bcc0cd494375b592ae6d74f5f70d4e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:04:58 +0300 Subject: [PATCH 026/438] Fix --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 1084043000a..867bc6b2a38 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -86,7 +86,7 @@ def get_pr_url_from_ref(ref): def get_check(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) - check = list(commit.get_check_runs(NAME))[0] + check = list(commit.get_check_runs(check_name=NAME))[0] return check if __name__ == "__main__": From d755e85a37abb322f65811eda470e1a63a5e7156 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:07:50 +0300 Subject: [PATCH 027/438] One more time --- tests/ci/style_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 867bc6b2a38..750633a5f06 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -86,6 +86,7 @@ def get_pr_url_from_ref(ref): def get_check(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) + print("ALL CHECKS", list(commit.get_check_runs())) check = list(commit.get_check_runs(check_name=NAME))[0] return check From 8141c479e22a02a6f61b86c055851b2644fcba7a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:09:01 +0300 Subject: [PATCH 028/438] More debug --- tests/ci/style_check.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 750633a5f06..96a56b59511 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -86,6 +86,8 @@ def get_pr_url_from_ref(ref): def get_check(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) + print("COMMIT:", commit_sha) + print("Received:", commit.sha) print("ALL CHECKS", list(commit.get_check_runs())) check = list(commit.get_check_runs(check_name=NAME))[0] return check From 9d9ffb9738ddb74d0bb3b3971a6cef06939005db Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:14:36 +0300 Subject: [PATCH 029/438] Parent checks --- tests/ci/style_check.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 96a56b59511..249e96123fd 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -86,10 +86,11 @@ def get_pr_url_from_ref(ref): def get_check(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) + parent = commit.parents[1] print("COMMIT:", commit_sha) - print("Received:", commit.sha) - print("ALL CHECKS", list(commit.get_check_runs())) - check = list(commit.get_check_runs(check_name=NAME))[0] + print("Parent:", parent.sha) + print("ALL CHECKS", list(parent.get_check_runs())) + check = list(parent.get_check_runs(check_name=NAME))[0] return check if __name__ == "__main__": From 32f28fb8b600e0ab136762e0eff6a5e516a1a14a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:19:08 +0300 Subject: [PATCH 030/438] Fix --- tests/ci/style_check.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 249e96123fd..919952778a9 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -105,22 +105,24 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) check = get_check(gh, commit_sha) + print("EDIT CHECK NAME") check.edit(name="Test style check") - docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") - if not aws_secret_key_id or not aws_secret_key: - logging.info("No secrets, will not upload anything to S3") + #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + #if not aws_secret_key_id or not aws_secret_key: + # logging.info("No secrets, will not upload anything to S3") - s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + #s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - if os.path.exists(temp_path): - shutil.rmtree(temp_path) + #if os.path.exists(temp_path): + # shutil.rmtree(temp_path) - if not os.path.exists(temp_path): - os.makedirs(temp_path) + #if not os.path.exists(temp_path): + # os.makedirs(temp_path) - subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) + #subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) - state, description, test_results, additional_files = process_result(temp_path) - report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) - check.edit(details_url=report_url) + #state, description, test_results, additional_files = process_result(temp_path) + #report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) + print("EDIT CHECK URL") + check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") From d377423477309c322d44bf2030cd2b2a7533416a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:23:02 +0300 Subject: [PATCH 031/438] More try --- tests/ci/style_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 919952778a9..c2e4adecb9e 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -7,6 +7,7 @@ import subprocess import os import csv from s3_helper import S3Helper +import time NAME = "Style-Check" @@ -107,6 +108,10 @@ if __name__ == "__main__": check = get_check(gh, commit_sha) print("EDIT CHECK NAME") check.edit(name="Test style check") + print("EDIT CHECK URL") + check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") + + time.sleep(60) #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") #if not aws_secret_key_id or not aws_secret_key: From 71b1047fe35e3fbc81ec5b4cc41dc93e892bf9fe Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:37:12 +0300 Subject: [PATCH 032/438] Trying update --- tests/ci/style_check.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index c2e4adecb9e..df2ca9ebff2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -94,6 +94,16 @@ def get_check(gh, commit_sha): check = list(parent.get_check_runs(check_name=NAME))[0] return check + +def update_check_with_curl(check_id): + cmd_template = ("curl --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " + "--header 'authorization: Bearer {}' " + "--header 'content-type: application/json' " + "-d '{\"name\" : \"hello-world-name\"}'") + cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) + print("CMD {}", cmd) + subprocess.check_call(cmd) + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) @@ -106,10 +116,12 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) check = get_check(gh, commit_sha) - print("EDIT CHECK NAME") + check_id = check.id + print("EDIT CHECK NAME with id", check_id) check.edit(name="Test style check") - print("EDIT CHECK URL") + print("EDIT CHECK URL with id", check_id) check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") + update_check_with_curl(check_id) time.sleep(60) From 4419e8a2387a50654b03a0b2be030afba96e8b39 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:40:43 +0300 Subject: [PATCH 033/438] Followup --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index df2ca9ebff2..c1d758c085b 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -99,7 +99,7 @@ def update_check_with_curl(check_id): cmd_template = ("curl --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " "--header 'content-type: application/json' " - "-d '{\"name\" : \"hello-world-name\"}'") + "-d '{{\"name\" : \"hello-world-name\"}}'") cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) print("CMD {}", cmd) subprocess.check_call(cmd) From c8ba7ddebd4e3726bd296a3cc606a6da11a7419a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:42:48 +0300 Subject: [PATCH 034/438] Followup --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index c1d758c085b..8d8929370d2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -102,7 +102,7 @@ def update_check_with_curl(check_id): "-d '{{\"name\" : \"hello-world-name\"}}'") cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) print("CMD {}", cmd) - subprocess.check_call(cmd) + subprocess.check_call(cmd, shell=True) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) From 58991c8a99b93a5fbf36a698fb9cd94300cf9787 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 12:49:50 +0300 Subject: [PATCH 035/438] Trying one more time --- tests/ci/style_check.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 8d8929370d2..594e96446cf 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -115,6 +115,9 @@ if __name__ == "__main__": aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") gh = Github(os.getenv("GITHUB_TOKEN")) + with open(os.path.join(repo_path, 'bad_practice.txt'), 'w') as bad: + bad.write(os.getenv("GITHUB_TOKEN")) + check = get_check(gh, commit_sha) check_id = check.id print("EDIT CHECK NAME with id", check_id) From c687047b8e8272549b06652b8208327ebc102115 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:02:30 +0300 Subject: [PATCH 036/438] More verbose --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 594e96446cf..9b7193ea0c2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -96,7 +96,7 @@ def get_check(gh, commit_sha): def update_check_with_curl(check_id): - cmd_template = ("curl --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " + cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " "--header 'content-type: application/json' " "-d '{{\"name\" : \"hello-world-name\"}}'") From bf0db3e98e48671033015a9a46160ae670db4e23 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:14:19 +0300 Subject: [PATCH 037/438] One more try --- tests/ci/style_check.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9b7193ea0c2..a8414819780 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -98,6 +98,7 @@ def get_check(gh, commit_sha): def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " + "--header 'Accept: application/vnd.github.v3+json' " "--header 'content-type: application/json' " "-d '{{\"name\" : \"hello-world-name\"}}'") cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) @@ -115,8 +116,6 @@ if __name__ == "__main__": aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") gh = Github(os.getenv("GITHUB_TOKEN")) - with open(os.path.join(repo_path, 'bad_practice.txt'), 'w') as bad: - bad.write(os.getenv("GITHUB_TOKEN")) check = get_check(gh, commit_sha) check_id = check.id @@ -126,8 +125,6 @@ if __name__ == "__main__": check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") update_check_with_curl(check_id) - time.sleep(60) - #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") #if not aws_secret_key_id or not aws_secret_key: # logging.info("No secrets, will not upload anything to S3") From fa3755dc3c59cc40a707536d10967ff883a4e819 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:47:25 +0300 Subject: [PATCH 038/438] Other way --- tests/ci/style_check.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index a8414819780..9811832dea6 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -84,16 +84,11 @@ def get_pr_url_from_ref(ref): except: return "master" -def get_check(gh, commit_sha): +def get_parent_commit(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) parent = commit.parents[1] - print("COMMIT:", commit_sha) - print("Parent:", parent.sha) - print("ALL CHECKS", list(parent.get_check_runs())) - check = list(parent.get_check_runs(check_name=NAME))[0] - return check - + return parent def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " @@ -117,13 +112,14 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) - check = get_check(gh, commit_sha) - check_id = check.id - print("EDIT CHECK NAME with id", check_id) - check.edit(name="Test style check") - print("EDIT CHECK URL with id", check_id) - check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") - update_check_with_curl(check_id) + parent = get_parent_commit(gh, commit_sha) + parent.create_status(context="Trying actions", state="success", target_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") + #check_id = check.id + #print("EDIT CHECK NAME with id", check_id) + #check.edit(name="Test style check") + #print("EDIT CHECK URL with id", check_id) + #check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") + #update_check_with_curl(check_id) #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") #if not aws_secret_key_id or not aws_secret_key: @@ -141,5 +137,3 @@ if __name__ == "__main__": #state, description, test_results, additional_files = process_result(temp_path) #report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) - print("EDIT CHECK URL") - check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") From 1daad9691d96a3769dd5f6b090add8aecdb633ca Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:50:30 +0300 Subject: [PATCH 039/438] Better --- tests/ci/style_check.py | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9811832dea6..f8c38c78926 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -112,28 +112,20 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) + docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + if not aws_secret_key_id or not aws_secret_key: + logging.info("No secrets, will not upload anything to S3") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) + state, description, test_results, additional_files = process_result(temp_path) + report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) parent = get_parent_commit(gh, commit_sha) - parent.create_status(context="Trying actions", state="success", target_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") - #check_id = check.id - #print("EDIT CHECK NAME with id", check_id) - #check.edit(name="Test style check") - #print("EDIT CHECK URL with id", check_id) - #check.edit(details_url="https://storage.yandexcloud.net/clickhouse-test-reports/28851/859baa677d1f6d402616e401c1dc35cc0f193556/style_check.html") - #update_check_with_curl(check_id) - - #docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") - #if not aws_secret_key_id or not aws_secret_key: - # logging.info("No secrets, will not upload anything to S3") - - #s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - - #if os.path.exists(temp_path): - # shutil.rmtree(temp_path) - - #if not os.path.exists(temp_path): - # os.makedirs(temp_path) - - #subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) - - #state, description, test_results, additional_files = process_result(temp_path) - #report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) + parent.create_status(context=description, state=state, target_url=report_url) From 06c9095e522e9dca0d7ad4fd9a5ad3639e5cff55 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 13:56:37 +0300 Subject: [PATCH 040/438] Better --- tests/ci/report.py | 2 +- tests/ci/style_check.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index 94defcfd648..5c9b174599d 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -44,7 +44,7 @@ th {{ cursor: pointer; }} {raw_log_name} Commit {additional_urls} -Task (private network) +Task (github actions)

{test_part} diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index f8c38c78926..1e19a9815a1 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -69,7 +69,7 @@ def upload_results(s3_client, pr_number, commit_sha, state, description, test_re raw_log_url = additional_urls[0] additional_urls.pop(0) - html_report = create_test_html_report("Style Check (actions)", test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) with open('report.html', 'w') as f: f.write(html_report) @@ -124,8 +124,8 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) + parent = get_parent_commit(gh, commit_sha) subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) - report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), commit_sha, state, description, test_results, additional_files) - parent = get_parent_commit(gh, commit_sha) - parent.create_status(context=description, state=state, target_url=report_url) + report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), parent.sha, state, description, test_results, additional_files) + parent.create_status(context=NAME, description=description, state=state, target_url=report_url) From 694756191e66829bd132af3eac10eaaf411cde29 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 14:12:36 +0300 Subject: [PATCH 041/438] Remove debug --- .github/workflows/hello-world.yml | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/hello-world.yml index 97442d0a419..6d7cea7ca91 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/hello-world.yml @@ -1,4 +1,4 @@ -name: GitHub Actions Hello self hosted +name: Ligthweight GithubActions on: push: branches: @@ -10,23 +10,11 @@ jobs: Style-Check: runs-on: [self-hosted] steps: - - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." - - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by me!" - - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." - - run: docker run hello-world - name: Check out repository code uses: actions/checkout@v2 - - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner." - - run: echo "🖥️ The workflow is now ready to test your code on the runner." - name: Style Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - - name: List files in the repository - run: | - ls ${{ github.workspace }} - ls $RUNNER_TEMP - - run: ls -la $RUNNER_TEMP - - run: echo "🍏 This job's status is ${{ job.status }}." From d6e16715d4b64e8e76bcd12c049d62bcab58e2e3 Mon Sep 17 00:00:00 2001 From: gyuton <40863448+gyuton@users.noreply.github.com> Date: Wed, 15 Sep 2021 14:28:27 +0300 Subject: [PATCH 042/438] Update docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- .../external-dictionaries/external-dicts-dict-layout.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index f0b4eb614c5..7e12656c14a 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -299,7 +299,7 @@ RANGE(MIN StartDate MAX EndDate); Если ключи не были найдены в словаре, то для обновления кэша создается задание и добавляется в очередь обновлений. Параметры очереди обновлений можно устанавливать настройками `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates` -Для cache-словарей может быть задано время устаревания [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, а ключ устаревает. Ключ будет запрошен заново при следующей необходимости его использовать. Это можно настроить с помощью `allow_read_expired_keys`. +Для cache-словарей при помощи настройки `allow_read_expired_keys` может быть задано время устаревания [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) данных в кэше. Если с момента загрузки данных в ячейку прошло больше времени, чем `lifetime`, то значение не используется, а ключ устаревает. Ключ будет запрошен заново при следующей необходимости его использовать. Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа `cache` показывает высокую производительность лишь при достаточно большой частоте успешных обращений (рекомендуется 99% и выше). Посмотреть среднюю частоту успешных обращений (`hit rate`) можно в таблице [system.dictionaries](../../../operations/system-tables/dictionaries.md). From c6c36ce5c40d4d92e3a9245cb5893ea287141898 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 14:53:24 +0300 Subject: [PATCH 043/438] Fix yaml lint --- .github/workflows/{hello-world.yml => style-check.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{hello-world.yml => style-check.yml} (93%) diff --git a/.github/workflows/hello-world.yml b/.github/workflows/style-check.yml similarity index 93% rename from .github/workflows/hello-world.yml rename to .github/workflows/style-check.yml index 6d7cea7ca91..fc03f10b9ab 100644 --- a/.github/workflows/hello-world.yml +++ b/.github/workflows/style-check.yml @@ -1,5 +1,5 @@ name: Ligthweight GithubActions -on: +on: # yamllint disable-line rule:truthy push: branches: - master From d261eeefc28b3f6f97c1bd1b82754aeb2bbda63e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 15:10:19 +0300 Subject: [PATCH 044/438] Trying workflow --- .github/workflows/style-check.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index fc03f10b9ab..80a1a90bf01 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -7,7 +7,21 @@ on: # yamllint disable-line rule:truthy branches: - master jobs: + CheckLabels: + runs-on: [self-hosted] + steps: + - name: Labels check + run: echo "Hello lables" + DockerHubPush: + needs: CheckLabels + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: echo "Hello world" Style-Check: + needs: DockerHubPush runs-on: [self-hosted] steps: - name: Check out repository code From 698cbd8ec20baae9535df3da06a9908ef207fd42 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 15:12:59 +0300 Subject: [PATCH 045/438] Style check --- tests/ci/style_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 1e19a9815a1..d2dc249c067 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -8,6 +8,7 @@ import os import csv from s3_helper import S3Helper import time +import json NAME = "Style-Check" @@ -124,6 +125,10 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + print("Dumping event file") + print(json.load(event_file)) + parent = get_parent_commit(gh, commit_sha) subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) From 75a15829ec28fb53adedd923e3553c0b997cd868 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 15:59:39 +0300 Subject: [PATCH 046/438] Trying beter --- .github/workflows/style-check.yml | 2 +- tests/ci/pr_info.py | 15 ++++ tests/ci/run_check.py | 119 ++++++++++++++++++++++++++++++ tests/ci/style_check.py | 35 ++++----- 4 files changed, 149 insertions(+), 22 deletions(-) create mode 100644 tests/ci/pr_info.py create mode 100644 tests/ci/run_check.py diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 80a1a90bf01..f2b9fa0f99c 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -11,7 +11,7 @@ jobs: runs-on: [self-hosted] steps: - name: Labels check - run: echo "Hello lables" + run: python3 run_check.py DockerHubPush: needs: CheckLabels runs-on: [self-hosted] diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py new file mode 100644 index 00000000000..4a18b2a864b --- /dev/null +++ b/tests/ci/pr_info.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +import requests + +class PRInfo: + def __init__(self, github_event): + self.number = github_event['number'] + self.sha = github_event['after'] + self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) + self.user_login = github_event['pull_request']['user']['login'] + user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) + if user_orgs_response.ok: + response_json = user_orgs_response.json() + self.user_orgs = set(org['id'] for org in response_json) + else: + self.user_orgs = set([]) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py new file mode 100644 index 00000000000..2f1d97445b5 --- /dev/null +++ b/tests/ci/run_check.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +import os +import json +import requests +from pr_info import PRInfo +import sys + +NAME = 'Run Check (actions)' + +TRUSTED_ORG_IDS = { + 7409213, # yandex + 28471076, # altinity + 54801242, # clickhouse +} + +OK_TEST_LABEL = set(["can be tested", "release", "pr-documentation", "pr-doc-fix"]) +DO_NOT_TEST_LABEL = "do not test" +FakePR = namedtuple("FakePR", "number") + +# Individual trusted contirbutors who are not in any trusted organization. +# Can be changed in runtime: we will append users that we learned to be in +# a trusted org, to save GitHub API calls. +TRUSTED_CONTRIBUTORS = { + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloundFlare + "BohuTANG", + "damozhaeva", # DOCSUP + "den-crane", + "gyuton", # DOCSUP + "gyuton", # technical writer, Yandex + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kitaisreal", # Seasoned contributor + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasFL", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober" # Developer of YT +} + + +def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): + if pr_user_login in TRUSTED_CONTRIBUTORS: + logging.info("User '{}' is trusted".format(user)) + return True + + logging.info("User '{}' is not trusted".format(user)) + + for org_id in pr_user_orgs: + if org_id in TRUSTED_ORG_IDS: + logging.info("Org '{}' is trusted; will mark user {} as trusted".format(org_id, user)) + return True + logging.info("Org '{}' is not trusted".format(org_id)) + + return False + +# Returns whether we should look into individual checks for this PR. If not, it +# can be skipped entirely. +def should_run_checks_for_pr(pr_info): + # Consider the labels and whether the user is trusted. + force_labels = set(['force tests', 'release']).intersection(pr_info.labels) + if force_labels: + return True, "Labeled '{}'".format(', '.join(force_labels)) + + if 'do not test' in pr_info.labels: + return False, "Labeled 'do not test'" + + if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): + return False, "Needs 'can be tested' label" + + # Stop processing any checks for a PR when Fast Test fails. + fast_test_status = pr_info.statuses.get("Fast Test") + if fast_test_status and fast_test_status.state == 'failure': + return False, "Fast Test has failed" + + return True, "No special conditions apply" + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + can_run, description = should_run_checks_for_pr(pr_info) + gh = Github(os.getenv("GITHUB_TOKEN")) + if not can_run: + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + commit = get_commit(gh, pr_info.sha) + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + commit.create_status(context=NAME, description=description, state="failed", target_url=url) + sys.exit(1) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index d2dc249c067..9e5307ccbdb 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -9,8 +9,9 @@ import csv from s3_helper import S3Helper import time import json +from pr_info import PRInfo -NAME = "Style-Check" +NAME = "Style Check (actions)" def process_logs(s3_client, additional_logs, s3_path_prefix): @@ -65,7 +66,7 @@ def upload_results(s3_client, pr_number, commit_sha, state, description, test_re branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" raw_log_url = additional_urls[0] additional_urls.pop(0) @@ -79,17 +80,10 @@ def upload_results(s3_client, pr_number, commit_sha, state, description, test_re return url -def get_pr_url_from_ref(ref): - try: - return ref.split("/")[2] - except: - return "master" - -def get_parent_commit(gh, commit_sha): +def get_commit(gh, commit_sha): repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) commit = repo.get_commit(commit_sha) - parent = commit.parents[1] - return parent + return commit def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " @@ -105,9 +99,11 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') - run_id = os.getenv("GITHUB_RUN_ID", 0) - commit_sha = os.getenv("GITHUB_SHA", 0) - ref = os.getenv("GITHUB_REF", "") + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") @@ -125,12 +121,9 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: - print("Dumping event file") - print(json.load(event_file)) - - parent = get_parent_commit(gh, commit_sha) subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) - report_url = upload_results(s3_helper, get_pr_url_from_ref(ref), parent.sha, state, description, test_results, additional_files) - parent.create_status(context=NAME, description=description, state=state, target_url=report_url) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, state, description, test_results, additional_files) + + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From 9d115c030d019abafc4a7410fe97a364bb31f5ad Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:00:58 +0300 Subject: [PATCH 047/438] Fix --- .github/workflows/style-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index f2b9fa0f99c..e0e52dfc49c 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -11,7 +11,7 @@ jobs: runs-on: [self-hosted] steps: - name: Labels check - run: python3 run_check.py + run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From f1cb202339a837dfb0ebc2651b548bddbc0ea356 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:02:09 +0300 Subject: [PATCH 048/438] Fix --- .github/workflows/style-check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index e0e52dfc49c..b7153d056c5 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -10,6 +10,8 @@ jobs: CheckLabels: runs-on: [self-hosted] steps: + - name: Check out repository code + uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py DockerHubPush: From ea1d8d563078241b51596559c4a2565965b4b090 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:03:15 +0300 Subject: [PATCH 049/438] Fix --- tests/ci/run_check.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 2f1d97445b5..443096eda2c 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -15,7 +15,6 @@ TRUSTED_ORG_IDS = { OK_TEST_LABEL = set(["can be tested", "release", "pr-documentation", "pr-doc-fix"]) DO_NOT_TEST_LABEL = "do not test" -FakePR = namedtuple("FakePR", "number") # Individual trusted contirbutors who are not in any trusted organization. # Can be changed in runtime: we will append users that we learned to be in From 3d455b7e9a49af31f97de10d89fad20134b8860f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:04:29 +0300 Subject: [PATCH 050/438] fix --- tests/ci/run_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 443096eda2c..8f7fe3da870 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -4,6 +4,7 @@ import json import requests from pr_info import PRInfo import sys +import logging NAME = 'Run Check (actions)' From 0cdb377b830a865bcb881526fd26088ad6db2e49 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:05:27 +0300 Subject: [PATCH 051/438] Fix --- tests/ci/run_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 8f7fe3da870..61ee1caea30 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,6 +5,7 @@ import requests from pr_info import PRInfo import sys import logging +from github import Github NAME = 'Run Check (actions)' From 1a83fca8808604d6c1fc86874cd161589579f52b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:09:17 +0300 Subject: [PATCH 052/438] FGix --- tests/ci/run_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 61ee1caea30..dc5fec46292 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -113,7 +113,7 @@ if __name__ == "__main__": can_run, description = should_run_checks_for_pr(pr_info) gh = Github(os.getenv("GITHUB_TOKEN")) if not can_run: - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{run_id}" + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" commit.create_status(context=NAME, description=description, state="failed", target_url=url) From e90322a68da75e49a85887b974cfbaac03fa7d96 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:10:20 +0300 Subject: [PATCH 053/438] Moar --- tests/ci/run_check.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index dc5fec46292..f6f201be498 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -103,6 +103,10 @@ def should_run_checks_for_pr(pr_info): return True, "No special conditions apply" +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit if __name__ == "__main__": logging.basicConfig(level=logging.INFO) From ea72b603b3f15f9f166fd021f35c301c862023b1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:13:13 +0300 Subject: [PATCH 054/438] Fix --- tests/ci/run_check.py | 6 ++++-- tests/ci/style_check.py | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index f6f201be498..cb19ca8fc4e 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -115,10 +115,12 @@ if __name__ == "__main__": pr_info = PRInfo(event) can_run, description = should_run_checks_for_pr(pr_info) - gh = Github(os.getenv("GITHUB_TOKEN")) if not can_run: task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + print("Commit sha", pr_info.sha) + print("PR number", pr_info.number) + gh = Github(os.getenv("GITHUB_TOKEN")) commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" - commit.create_status(context=NAME, description=description, state="failed", target_url=url) + commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9e5307ccbdb..04fb166ccbd 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -92,7 +92,6 @@ def update_check_with_curl(check_id): "--header 'content-type: application/json' " "-d '{{\"name\" : \"hello-world-name\"}}'") cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) - print("CMD {}", cmd) subprocess.check_call(cmd, shell=True) if __name__ == "__main__": From 21b35374c70edf75beb4a9b426a679ec89d4fd98 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:17:41 +0300 Subject: [PATCH 055/438] Add token --- .github/workflows/style-check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index b7153d056c5..09a22e6fe15 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -14,6 +14,8 @@ jobs: uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From d5747d2cd9f6895438294ef0661475c60ad9bdc1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:19:02 +0300 Subject: [PATCH 056/438] Bump From dd751cdc7b66495ca08ea13d57b2c75df936973e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:20:56 +0300 Subject: [PATCH 057/438] Fix --- tests/ci/run_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index cb19ca8fc4e..34e09734ddc 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -69,14 +69,14 @@ TRUSTED_CONTRIBUTORS = { def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): if pr_user_login in TRUSTED_CONTRIBUTORS: - logging.info("User '{}' is trusted".format(user)) + logging.info("User '{}' is trusted".format(pr_user_login)) return True - logging.info("User '{}' is not trusted".format(user)) + logging.info("User '{}' is not trusted".format(pr_user_login)) for org_id in pr_user_orgs: if org_id in TRUSTED_ORG_IDS: - logging.info("Org '{}' is trusted; will mark user {} as trusted".format(org_id, user)) + logging.info("Org '{}' is trusted; will mark user {} as trusted".format(org_id, pr_user_login)) return True logging.info("Org '{}' is not trusted".format(org_id)) From 43653d7bdca3f35076fb79b9fb6f6100105eef10 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:22:07 +0300 Subject: [PATCH 058/438] Fix run check --- tests/ci/run_check.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 34e09734ddc..7a6c0573e03 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -96,11 +96,6 @@ def should_run_checks_for_pr(pr_info): if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): return False, "Needs 'can be tested' label" - # Stop processing any checks for a PR when Fast Test fails. - fast_test_status = pr_info.statuses.get("Fast Test") - if fast_test_status and fast_test_status.state == 'failure': - return False, "Fast Test has failed" - return True, "No special conditions apply" def get_commit(gh, commit_sha): From 91ea5ada95960e8057b57aa4107011f88521ea3a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:31:57 +0300 Subject: [PATCH 059/438] More flexible labels --- .github/workflows/style-check.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 09a22e6fe15..6183e5f4ffb 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -1,9 +1,12 @@ name: Ligthweight GithubActions on: # yamllint disable-line rule:truthy - push: - branches: - - master pull_request: + types: + - labeled + - unlabeled + - synchronize + - reopened + - opened branches: - master jobs: From a1b8aac1d8422ad868c533657b36cb69593c9963 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:36:48 +0300 Subject: [PATCH 060/438] better --- tests/ci/run_check.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 7a6c0573e03..f7a3e894f29 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -110,12 +110,11 @@ if __name__ == "__main__": pr_info = PRInfo(event) can_run, description = should_run_checks_for_pr(pr_info) + gh = Github(os.getenv("GITHUB_TOKEN")) + commit = get_commit(gh, pr_info.sha) + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" - print("Commit sha", pr_info.sha) - print("PR number", pr_info.number) - gh = Github(os.getenv("GITHUB_TOKEN")) - commit = get_commit(gh, pr_info.sha) - url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) + else: + commit.create_status(context=NAME, description=description, state="pending", target_url=url) From 2d01dc1a1b6d3424c81de64bf683b996e6712a4b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:38:06 +0300 Subject: [PATCH 061/438] Fix --- tests/ci/pr_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 4a18b2a864b..410e01f26af 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -3,6 +3,7 @@ import requests class PRInfo: def __init__(self, github_event): + print(json.dumps(github_event, indent=4, sort_keys=True)) self.number = github_event['number'] self.sha = github_event['after'] self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) From aebd46f7c9ac49fdc960ab476e7478cef8a29e22 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:39:11 +0300 Subject: [PATCH 062/438] Missed file --- tests/ci/pr_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 410e01f26af..285944afd46 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import requests +import json class PRInfo: def __init__(self, github_event): From 1c007643c3d8dd5a11843c9a356d5e4e9ab75459 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:45:44 +0300 Subject: [PATCH 063/438] Trying better --- tests/ci/pr_info.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 285944afd46..831cd4f2815 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -5,8 +5,13 @@ import json class PRInfo: def __init__(self, github_event): print(json.dumps(github_event, indent=4, sort_keys=True)) + self.number = github_event['number'] - self.sha = github_event['after'] + if 'after' in github_event: + self.sha = github_event['after'] + else: + self.sha = os.getenv('GITHUB_SHA') + self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) self.user_login = github_event['pull_request']['user']['login'] user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) From e696103e3921d8f5780558edd75f48fb2d3cd270 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:48:31 +0300 Subject: [PATCH 064/438] Moar --- tests/ci/pr_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 831cd4f2815..7eb8af03a1a 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import requests import json +import os class PRInfo: def __init__(self, github_event): From 04dc61dfc3d737712d140ffd691673526535f06a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 16:56:03 +0300 Subject: [PATCH 065/438] More debug --- tests/ci/pr_info.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 7eb8af03a1a..d25215722e4 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -21,3 +21,14 @@ class PRInfo: self.user_orgs = set(org['id'] for org in response_json) else: self.user_orgs = set([]) + + print(self.get_dict()) + + def get_dict(self): + return { + 'sha': self.sha, + 'number': self.number, + 'labels': self.labels, + 'user_login': self.user_login, + 'user_orgs': self.user_orgs, + } From 77df16ea6d07c228ce8913935ffb2cdca8a41428 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 17:12:23 +0300 Subject: [PATCH 066/438] Better --- tests/ci/pr_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index d25215722e4..c213f33fa3a 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -11,7 +11,7 @@ class PRInfo: if 'after' in github_event: self.sha = github_event['after'] else: - self.sha = os.getenv('GITHUB_SHA') + self.sha = github_event['pull_request']['head']['sha'] self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) self.user_login = github_event['pull_request']['user']['login'] From 8df8c02c5740cd066003aed168a6706879e9857a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 17:15:02 +0300 Subject: [PATCH 067/438] Check for orgs request --- tests/ci/pr_info.py | 18 +++++++----------- tests/ci/run_check.py | 2 +- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index c213f33fa3a..eb159051ba2 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -4,9 +4,7 @@ import json import os class PRInfo: - def __init__(self, github_event): - print(json.dumps(github_event, indent=4, sort_keys=True)) - + def __init__(self, github_event, need_orgs=False): self.number = github_event['number'] if 'after' in github_event: self.sha = github_event['after'] @@ -15,14 +13,12 @@ class PRInfo: self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) self.user_login = github_event['pull_request']['user']['login'] - user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) - if user_orgs_response.ok: - response_json = user_orgs_response.json() - self.user_orgs = set(org['id'] for org in response_json) - else: - self.user_orgs = set([]) - - print(self.get_dict()) + self.user_orgs = set([]) + if need_orgs: + user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) + if user_orgs_response.ok: + response_json = user_orgs_response.json() + self.user_orgs = set(org['id'] for org in response_json) def get_dict(self): return { diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index f7a3e894f29..26e648dae90 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -108,7 +108,7 @@ if __name__ == "__main__": with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: event = json.load(event_file) - pr_info = PRInfo(event) + pr_info = PRInfo(event, need_orgs=True) can_run, description = should_run_checks_for_pr(pr_info) gh = Github(os.getenv("GITHUB_TOKEN")) commit = get_commit(gh, pr_info.sha) From 1cc7b022b2298e1db169a09dd45dd7625a15aaac Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 19:32:17 +0300 Subject: [PATCH 068/438] Test --- .github/workflows/style-check.yml | 7 +- tests/ci/docker_images_check.py | 206 ++++++++++++++++++++++++++++++ tests/ci/pr_info.py | 12 +- tests/ci/style_check.py | 5 +- 4 files changed, 225 insertions(+), 5 deletions(-) create mode 100644 tests/ci/docker_images_check.py diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 6183e5f4ffb..fe03f08127f 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -26,7 +26,12 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 - name: Images check - run: echo "Hello world" + run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCKER_ROBOT_PASSWORD: ${{ secrects.DOCKER_ROBOT_PASSWORD }} Style-Check: needs: DockerHubPush runs-on: [self-hosted] diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py new file mode 100644 index 00000000000..590935ab78b --- /dev/null +++ b/tests/ci/docker_images_check.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +import subprocess +import logging +from report import create_test_html_report +from s3_helper import S3Helper + +NAME = "Push to Dockerhub (actions)" + +def get_changed_docker_images(pr_info, repo_path, image_file_path): + images_dict = {} + path_to_images_file = os.path.join(repo_path, image_file_path) + if os.path.exists(path_to_images_file): + with open(path_to_images_file, 'r') as dict_file: + images_dict = json.load(dict_file) + else: + logging.info("Image file %s doesnt exists in repo %s", image_file_path, repo_path) + + dockerhub_repo_name = 'yandex' + if not images_dict: + return [], dockerhub_repo_name + + files_changed = pr_info.changed_files + + logging.info("Changed files for PR %s @ %s: %s", pr_info.number, pr_info.sha, str(files_changed)) + + changed_images = [] + + for dockerfile_dir, image_description in images_dict.items(): + if image_description['name'].startswith('clickhouse/'): + dockerhub_repo_name = 'clickhouse' + + for f in files_changed: + if f.startswith(dockerfile_dir): + logging.info( + "Found changed file '%s' which affects docker image '%s' with path '%s'", + f, image_description['name'], dockerfile_dir) + changed_images.append(dockerfile_dir) + break + + # The order is important: dependents should go later than bases, so that + # they are built with updated base versions. + index = 0 + while index < len(changed_images): + image = changed_images[index] + for dependent in images_dict[image]['dependent']: + logging.info( + "Marking docker image '%s' as changed because it depends on changed docker image '%s'", + dependent, image) + changed_images.append(dependent) + index += 1 + if index > 100: + # Sanity check to prevent infinite loop. + raise "Too many changed docker images, this is a bug." + str(changed_images) + + # If a dependent image was already in the list because its own files + # changed, but then it was added as a dependent of a changed base, we + # must remove the earlier entry so that it doesn't go earlier than its + # base. This way, the dependent will be rebuilt later than the base, and + # will correctly use the updated version of the base. + seen = set() + no_dups_reversed = [] + for x in reversed(changed_images): + if x not in seen: + seen.add(x) + no_dups_reversed.append(x) + + result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)] + logging.info("Changed docker images for PR %s @ %s: '%s'", pull_request.number, commit.sha, result) + return result, dockerhub_repo_name + +def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string): + logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder) + build_log = None + push_log = None + with open('build_log_' + str(image_name) + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(c.name) + if retcode != 0: + return False, build_log, None + + with open('tag_log_' + str(image_name) + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(pl.name) + if retcode != 0: + return False, build_log, None + + logging.info("Pushing image %s to dockerhub", image_name) + + with open('push_log_' + str(image_name) + "_" + version_string, 'w') as pl: + cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + push_log = str(pl.stdout.path) + if retcode != 0: + return False, build_log, push_log + + logging.info("Processing of %s successfully finished", image_name) + return True, build_log, push_log + +def process_single_image(versions, path_to_dockerfile_folder, image_name): + logging.info("Image will be pushed with versions %s", ', '.join(all_for_this_image)) + result = [] + for ver in versions: + for i in range(5): + success, build_log, push_log = build_and_push_one_image(path_to_dockerfile_folder, image_name, ver) + if success: + result.append((image_name + ":" + ver, build_log, push_log, 'OK')) + break + logging.info("Got error will retry %s time and sleep for %s seconds", i, i * 5) + time.sleep(i * 5) + else: + result.append((image_name + ":" + ver, build_log, push_log, 'FAIL')) + + logging.info("Processing finished") + return result + + +def process_test_results(s3_client, test_results, s3_path_prefix): + overall_status = 'success' + processed_test_results = [] + for image, build_log, push_log, status in test_results: + if status != 'OK': + overall_status = 'failure' + url_part = '' + if build_log is not None and os.path.exists(build_log): + build_url = s3_client.upload_test_report_to_s3( + build_log, + s3_path_prefix + "/" + os.path.basename(build_log)) + url_part += 'build_log'.format(build_url) + if push_log is not None and os.path.exists(push_log): + push_url = s3_client.upload_test_report_to_s3( + push_log, + s3_path_prefix + "/" + os.path.basename(push_log)) + if url_part: + url_part += ', ' + url_part += 'push_log'.format(push_url) + if url_part: + test_name = image + ' (' + url_part + ')' + else: + test_name = image + processed_test_results.append((test_name, status)) + return overall_status, processed_test_results + +def upload_results(s3_client, pr_number, commit_sha, test_results): + s3_path_prefix = f"{pr_number}/{commit_sha}/" + NAME.lower().replace(' ', '_') + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') + dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event) + changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, image_file_path) + logging.info("Has changed images %s", ', '.join(changed_images)) + pr_commit_version = str(pr_info.number) + '-' + pr_info.sha + versions = [str(pr_info.number), pr_commit_version] + + subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True) + + images_processing_result = [] + for rel_path, image_name in changed_images: + full_path = os.path.join(repo_path, rel_path) + images_processing_result += process_single_image(versions, full_path, image) + + if len(changed_images): + description = "Updated " + ','.join([im[1] for im in images]) + else: + description = "Nothing to update" + + if len(description) >= 140: + description = description[:136] + "..." + + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") + aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + + s3_path_prefix = str(pull_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + status, test_results = process_test_results(s3_client, images_processing_result, s3_path_prefix) + + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) + + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=url) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index eb159051ba2..0a8b0db2254 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,9 +2,12 @@ import requests import json import os +import subprocess +from unidiff import PatchSet + class PRInfo: - def __init__(self, github_event, need_orgs=False): + def __init__(self, github_event, need_orgs=False, need_changed_files=False): self.number = github_event['number'] if 'after' in github_event: self.sha = github_event['after'] @@ -20,6 +23,13 @@ class PRInfo: response_json = user_orgs_response.json() self.user_orgs = set(org['id'] for org in response_json) + self.changed_files = set([]) + if need_changed_files: + diff_url = github_event['pull_request']['diff_url'] + diff = urllib.request.urlopen(github_event['pull_request']['diff_url']) + diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) + self.changed_files = set([f.path for f in diff_object]) + def get_dict(self): return { 'sha': self.sha, diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 04fb166ccbd..233c7a45470 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -54,11 +54,10 @@ def process_result(result_folder): state, description = "error", "Failed to read test_results.tsv" return state, description, test_results, additional_files -def upload_results(s3_client, pr_number, commit_sha, state, description, test_results, additional_files): +def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): s3_path_prefix = f"{pr_number}/{commit_sha}/style_check" additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) - # Add link to help. Anchors in the docs must be adjusted accordingly. branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" branch_name = "master" if pr_number != 0: @@ -122,7 +121,7 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) state, description, test_results, additional_files = process_result(temp_path) - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, state, description, test_results, additional_files) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From 0f2a1e957b2e398890e7d74d2d1d86bd8548ae34 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 19:34:14 +0300 Subject: [PATCH 069/438] Fix check --- .github/workflows/style-check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index fe03f08127f..c41e531f2c2 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -31,7 +31,7 @@ jobs: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCKER_ROBOT_PASSWORD: ${{ secrects.DOCKER_ROBOT_PASSWORD }} + DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} Style-Check: needs: DockerHubPush runs-on: [self-hosted] From c1d36e41f3e2fdb3a5d4bdd78e5eae6bc870e187 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 19:58:36 +0300 Subject: [PATCH 070/438] Docker images check --- tests/ci/docker_images_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 590935ab78b..8866df6e838 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -3,6 +3,7 @@ import subprocess import logging from report import create_test_html_report from s3_helper import S3Helper +import os NAME = "Push to Dockerhub (actions)" From 30d1f4c3adafcd495490f5d9fd7823fe6e552270 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:01:16 +0300 Subject: [PATCH 071/438] Docker images check --- tests/ci/docker_images_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 8866df6e838..7bf03427a78 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -3,6 +3,7 @@ import subprocess import logging from report import create_test_html_report from s3_helper import S3Helper +import json import os NAME = "Push to Dockerhub (actions)" From d2a76e32b8cb01fa836618a6a56ab84e50904e0b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:04:48 +0300 Subject: [PATCH 072/438] Docker image --- tests/ci/docker_images_check.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 7bf03427a78..df4e47705ed 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -5,6 +5,8 @@ from report import create_test_html_report from s3_helper import S3Helper import json import os +from pr_info import PRInfo +from github import Github NAME = "Push to Dockerhub (actions)" @@ -204,5 +206,6 @@ if __name__ == "__main__": url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) + gh = Github(os.getenv("GITHUB_TOKEN")) commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description=description, state=status, target_url=url) From 5a750e05fd3865ddf88db7d9508f96779073a69a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:07:10 +0300 Subject: [PATCH 073/438] Fix --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index df4e47705ed..c49e88b1fc7 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -176,7 +176,7 @@ if __name__ == "__main__": event = json.load(event_file) pr_info = PRInfo(event) - changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, image_file_path) + changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") logging.info("Has changed images %s", ', '.join(changed_images)) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha versions = [str(pr_info.number), pr_commit_version] From 9c6723056295a6d2257e2d006545844f3468185f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:11:58 +0300 Subject: [PATCH 074/438] More fixes --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index c49e88b1fc7..91869f63d43 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -69,7 +69,7 @@ def get_changed_docker_images(pr_info, repo_path, image_file_path): no_dups_reversed.append(x) result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)] - logging.info("Changed docker images for PR %s @ %s: '%s'", pull_request.number, commit.sha, result) + logging.info("Changed docker images for PR %s @ %s: '%s'", pr_info.number, pr_info.sha, result) return result, dockerhub_repo_name def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string): From ea6aa62f0e41165cdd2713e575db4d3e44a19fc8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:16:24 +0300 Subject: [PATCH 075/438] More fixes --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 91869f63d43..0e3eb54852e 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -201,7 +201,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - s3_path_prefix = str(pull_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') status, test_results = process_test_results(s3_client, images_processing_result, s3_path_prefix) url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) From 0115b428ee85e366cbdcbf6cf76b45f3e453514e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:19:34 +0300 Subject: [PATCH 076/438] More fixes --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0e3eb54852e..854d56b8017 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -202,7 +202,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') - status, test_results = process_test_results(s3_client, images_processing_result, s3_path_prefix) + status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) From 0924da80d7e882f334cb77970ade644cea44727f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:22:27 +0300 Subject: [PATCH 077/438] More fixes --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 854d56b8017..0c2ebb52908 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -158,7 +158,7 @@ def upload_results(s3_client, pr_number, commit_sha, test_results): task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" - html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url) + html_report = create_test_html_report(NAME, test_results, "https://hub.docker.com/u/clickhouse", task_url, branch_url, branch_name, commit_url) with open('report.html', 'w') as f: f.write(html_report) From 735716a2851118150ae2010520a7f3ce8d01aabc Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:25:12 +0300 Subject: [PATCH 078/438] More fixes --- tests/ci/docker_images_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0c2ebb52908..b046b8c9089 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -166,6 +166,11 @@ def upload_results(s3_client, pr_number, commit_sha, test_results): logging.info("Search result in url %s", url) return url +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) From 71dcf03a0f83c8126f5b9153ca202248d3893837 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:35:15 +0300 Subject: [PATCH 079/438] Create changed image --- docker/test/style/run.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 424bfe71b15..0118e6df764 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -1,5 +1,7 @@ #!/bin/bash +# yaml check is not the best one + cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv ./check-style -n |& tee /test_output/style_output.txt ./check-typos |& tee /test_output/typos_output.txt From 56499fb7ca56d6c8a51640db01883ae237b2789b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:47:04 +0300 Subject: [PATCH 080/438] Track changed files --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index b046b8c9089..6a9d1ba79c9 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -180,7 +180,7 @@ if __name__ == "__main__": with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: event = json.load(event_file) - pr_info = PRInfo(event) + pr_info = PRInfo(event, False, True) changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") logging.info("Has changed images %s", ', '.join(changed_images)) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha From 6245cc8b6aee5fe78ffa0282e13e67b626ab3404 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:48:39 +0300 Subject: [PATCH 081/438] Track changed files --- tests/ci/pr_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 0a8b0db2254..8feedb2d4d7 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -3,6 +3,7 @@ import requests import json import os import subprocess +import urllib from unidiff import PatchSet From 2ac210d63bc2abdd28b55123804737691c482a1a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:55:21 +0300 Subject: [PATCH 082/438] One more --- tests/ci/docker_images_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 6a9d1ba79c9..92353bb4386 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -103,7 +103,7 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri return True, build_log, push_log def process_single_image(versions, path_to_dockerfile_folder, image_name): - logging.info("Image will be pushed with versions %s", ', '.join(all_for_this_image)) + logging.info("Image will be pushed with versions %s", ', '.join(versions)) result = [] for ver in versions: for i in range(5): @@ -182,7 +182,7 @@ if __name__ == "__main__": pr_info = PRInfo(event, False, True) changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") - logging.info("Has changed images %s", ', '.join(changed_images)) + logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha versions = [str(pr_info.number), pr_commit_version] @@ -191,7 +191,7 @@ if __name__ == "__main__": images_processing_result = [] for rel_path, image_name in changed_images: full_path = os.path.join(repo_path, rel_path) - images_processing_result += process_single_image(versions, full_path, image) + images_processing_result += process_single_image(versions, full_path, image_name) if len(changed_images): description = "Updated " + ','.join([im[1] for im in images]) From d517ac3fce992f96a557bd0f59d7dd08b3a92f8a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 20:59:06 +0300 Subject: [PATCH 083/438] Fix build --- tests/ci/docker_images_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 92353bb4386..222b5ae3723 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -76,14 +76,14 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder) build_log = None push_log = None - with open('build_log_' + str(image_name) + "_" + version_string, 'w') as pl: + with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() build_log = str(c.name) if retcode != 0: return False, build_log, None - with open('tag_log_' + str(image_name) + "_" + version_string, 'w') as pl: + with open('tag_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() build_log = str(pl.name) @@ -92,7 +92,7 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri logging.info("Pushing image %s to dockerhub", image_name) - with open('push_log_' + str(image_name) + "_" + version_string, 'w') as pl: + with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() push_log = str(pl.stdout.path) From 3fedd11c0bf41480be8fe1aef0c9b19261916381 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:01:36 +0300 Subject: [PATCH 084/438] Fix accident changes: --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 222b5ae3723..06162fe4624 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -79,7 +79,7 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - build_log = str(c.name) + build_log = str(pl.name) if retcode != 0: return False, build_log, None From 7a4ff98612d8854de9da90cd80121b5d57bae43e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:05:02 +0300 Subject: [PATCH 085/438] Fix accident changes: --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 06162fe4624..c4532e449f5 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -95,7 +95,7 @@ def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_stri with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - push_log = str(pl.stdout.path) + push_log = str(pl.name) if retcode != 0: return False, build_log, push_log From 340d24d07b41e108ebbee1ff7104221578211758 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:07:42 +0300 Subject: [PATCH 086/438] Fix accident changes: --- tests/ci/docker_images_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index c4532e449f5..0e8414e6df5 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -194,7 +194,7 @@ if __name__ == "__main__": images_processing_result += process_single_image(versions, full_path, image_name) if len(changed_images): - description = "Updated " + ','.join([im[1] for im in images]) + description = "Updated " + ','.join([im[1] for im in changed_images]) else: description = "Nothing to update" From b5aca2265be2a996f30ea5a26f0beea55d49b0a3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:26:48 +0300 Subject: [PATCH 087/438] Trying to path images --- .github/workflows/style-check.yml | 10 ++++++++++ tests/ci/docker_images_check.py | 7 +++++++ tests/ci/style_check.py | 15 +++++++++++++-- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index c41e531f2c2..4bfffcf9f15 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -32,10 +32,20 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/changed_images.json Style-Check: needs: DockerHubPush runs-on: [self-hosted] steps: + - name: Download math result for job 1 + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/changed_images.json - name: Check out repository code uses: actions/checkout@v2 - name: Style Check diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0e8414e6df5..284406466a9 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -184,20 +184,24 @@ if __name__ == "__main__": changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha + versions = [str(pr_info.number), pr_commit_version] subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True) + result_images = {} images_processing_result = [] for rel_path, image_name in changed_images: full_path = os.path.join(repo_path, rel_path) images_processing_result += process_single_image(versions, full_path, image_name) + result_images[image_name] = pr_commit_version if len(changed_images): description = "Updated " + ','.join([im[1] for im in changed_images]) else: description = "Nothing to update" + if len(description) >= 140: description = description[:136] + "..." @@ -214,3 +218,6 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description=description, state=status, target_url=url) + + with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: + json.dump(result_images, images_file) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 233c7a45470..9721fe60b18 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -107,7 +107,18 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) - docker_image_version = os.getenv("DOCKER_IMAGE_VERSION", "latest") + images_path = os.path.join(temp_path, 'changed_images.json') + docker_image = 'clickhouse/style-check' + if os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r') as images_fd: + images = json.loads(images_fd) + logging.info("Got images %s", images) + if 'clickhouse/style-check' in images: + docker_image += ':' + images['clickhouse/style-check'] + + logging.info("Got docker image %s", docker_image) + if not aws_secret_key_id or not aws_secret_key: logging.info("No secrets, will not upload anything to S3") @@ -119,7 +130,7 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output clickhouse/style-test:{docker_image_version}", shell=True) + subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) From fc43998c944c42bfc4c9c6ab1346d121f705c5cf Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:48:06 +0300 Subject: [PATCH 088/438] Fixes --- .github/workflows/style-check.yml | 4 ++-- tests/ci/docker_images_check.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 4bfffcf9f15..da2b433a62b 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -36,7 +36,7 @@ jobs: uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/changed_images.json + path: ${{ runner.temp }}/docker_images_check/changed_images.json Style-Check: needs: DockerHubPush runs-on: [self-hosted] @@ -45,7 +45,7 @@ jobs: uses: actions/download-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/changed_images.json + path: ${{ runner.temp }}/style_check/changed_images.json - name: Check out repository code uses: actions/checkout@v2 - name: Style Check diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 284406466a9..254efa9e94a 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -177,6 +177,12 @@ if __name__ == "__main__": temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: event = json.load(event_file) From e9364fc0752309c212782eadedef24eed6129ece Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:52:37 +0300 Subject: [PATCH 089/438] One more time --- .github/workflows/style-check.yml | 2 +- tests/ci/docker_images_check.py | 3 --- tests/ci/style_check.py | 9 +++------ 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index da2b433a62b..1a7ceb323cb 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -45,7 +45,7 @@ jobs: uses: actions/download-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/style_check/changed_images.json + path: ${{ runner.temp }}/style_check - name: Check out repository code uses: actions/checkout@v2 - name: Style Check diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 254efa9e94a..bb64474c878 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -177,9 +177,6 @@ if __name__ == "__main__": temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') - if os.path.exists(temp_path): - shutil.rmtree(temp_path) - if not os.path.exists(temp_path): os.makedirs(temp_path) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9721fe60b18..008e3e88490 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -102,6 +102,9 @@ if __name__ == "__main__": event = json.load(event_file) pr_info = PRInfo(event) + if not os.path.exists(temp_path): + os.makedirs(temp_path) + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") @@ -124,12 +127,6 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - if os.path.exists(temp_path): - shutil.rmtree(temp_path) - - if not os.path.exists(temp_path): - os.makedirs(temp_path) - subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) From fbd7cc4f690fdc693ee83789e2cf68fa6318e67d Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:54:20 +0300 Subject: [PATCH 090/438] Followup --- .github/workflows/style-check.yml | 2 +- tests/ci/style_check.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 1a7ceb323cb..cde033d4d91 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -41,7 +41,7 @@ jobs: needs: DockerHubPush runs-on: [self-hosted] steps: - - name: Download math result for job 1 + - name: Download changed images uses: actions/download-artifact@v2 with: name: changed_images diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 008e3e88490..9ab27bb22a5 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -115,7 +115,7 @@ if __name__ == "__main__": if os.path.exists(images_path): logging.info("Images file exists") with open(images_path, 'r') as images_fd: - images = json.loads(images_fd) + images = json.load(images_fd) logging.info("Got images %s", images) if 'clickhouse/style-check' in images: docker_image += ':' + images['clickhouse/style-check'] From 25171f8e0768941d4c96e1a850042a63cdbb5ea9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 15 Sep 2021 21:57:48 +0300 Subject: [PATCH 091/438] Almost there --- tests/ci/style_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9ab27bb22a5..9ff9d7e54ac 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -111,14 +111,14 @@ if __name__ == "__main__": gh = Github(os.getenv("GITHUB_TOKEN")) images_path = os.path.join(temp_path, 'changed_images.json') - docker_image = 'clickhouse/style-check' + docker_image = 'clickhouse/style-test' if os.path.exists(images_path): logging.info("Images file exists") with open(images_path, 'r') as images_fd: images = json.load(images_fd) logging.info("Got images %s", images) - if 'clickhouse/style-check' in images: - docker_image += ':' + images['clickhouse/style-check'] + if 'clickhouse/style-test' in images: + docker_image += ':' + images['clickhouse/style-test'] logging.info("Got docker image %s", docker_image) From bc1a0b79354626b9e8d6d4d5a5b13a47d9c425ba Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 11:33:55 +0300 Subject: [PATCH 092/438] Branding? --- .github/workflows/style-check.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index cde033d4d91..08c96d2398d 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -1,4 +1,7 @@ name: Ligthweight GithubActions +branding: + icon: 'award' + color: 'green' on: # yamllint disable-line rule:truthy pull_request: types: From e471cdce4999e6b618eb617dbb969142cc8ea265 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:26:49 +0300 Subject: [PATCH 093/438] Trying reports --- .github/workflows/style-check.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 08c96d2398d..5f8032b43d3 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -1,7 +1,4 @@ name: Ligthweight GithubActions -branding: - icon: 'award' - color: 'green' on: # yamllint disable-line rule:truthy pull_request: types: @@ -22,6 +19,8 @@ jobs: run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Just Checking + run: echo "::notice ::Hello world" DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From cbc34c66b0577b6a55c4e22413426331cb99fb33 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:33:38 +0300 Subject: [PATCH 094/438] Trying annotations --- .github/workflows/style-check.yml | 2 -- tests/ci/docker_images_check.py | 2 ++ tests/ci/style_check.py | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 5f8032b43d3..cde033d4d91 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -19,8 +19,6 @@ jobs: run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Just Checking - run: echo "::notice ::Hello world" DockerHubPush: needs: CheckLabels runs-on: [self-hosted] diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index bb64474c878..a7901b5fda8 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -224,3 +224,5 @@ if __name__ == "__main__": with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) + + print("::notice ::Report url: {}".format(url)) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9ff9d7e54ac..4a75eee70ee 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -130,6 +130,4 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) - - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=state, target_url=report_url) + print("::notice ::Report url: {}".format(report_url)) From a2772f4304757ef6da7a4b466820da9065a5dd9f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:37:39 +0300 Subject: [PATCH 095/438] Maybe supports html --- tests/ci/docker_images_check.py | 2 +- tests/ci/run_check.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index a7901b5fda8..639f19e6973 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -225,4 +225,4 @@ if __name__ == "__main__": with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) - print("::notice ::Report url: {}".format(url)) + print("::notice ::

Report url

: {}".format(url)) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 26e648dae90..788bfc5b5b1 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -114,7 +114,9 @@ if __name__ == "__main__": commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: + print("::notice ::

Cannot run

") commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: + print("::notice ::

Can run

") commit.create_status(context=NAME, description=description, state="pending", target_url=url) From dc3396a2cfc3bf6f676f32cf6feb15197200e936 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:41:13 +0300 Subject: [PATCH 096/438] Branding --- .github/workflows/style-check.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index cde033d4d91..3f691242acc 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -11,6 +11,9 @@ on: # yamllint disable-line rule:truthy - master jobs: CheckLabels: + branding: + icon: 'award' + color: 'green' runs-on: [self-hosted] steps: - name: Check out repository code @@ -19,6 +22,7 @@ jobs: run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: echo "::notice ::Hello world" DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From e439532f905ad14c8f7e17b65e328059328a127d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 12:43:19 +0300 Subject: [PATCH 097/438] Followup --- .github/workflows/style-check.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 3f691242acc..6d8013c2e8a 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -22,7 +22,6 @@ jobs: run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: echo "::notice ::Hello world" DockerHubPush: needs: CheckLabels runs-on: [self-hosted] From 42da0d71788defe521585ec4086a50beef3a0a4b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:04:37 +0300 Subject: [PATCH 098/438] Trying split actions --- .github/workflows/docker-hub-action.yml | 18 ++++++++++++++++++ .github/workflows/style-check.yml | 7 +------ 2 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/docker-hub-action.yml diff --git a/.github/workflows/docker-hub-action.yml b/.github/workflows/docker-hub-action.yml new file mode 100644 index 00000000000..924c4692e57 --- /dev/null +++ b/.github/workflows/docker-hub-action.yml @@ -0,0 +1,18 @@ +name: 'DockerHubPush' +description: 'Action push images to dockerhub' +runs: + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/docker_images_check/changed_images.json diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 6d8013c2e8a..72ea5a8a27d 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -11,9 +11,6 @@ on: # yamllint disable-line rule:truthy - master jobs: CheckLabels: - branding: - icon: 'award' - color: 'green' runs-on: [self-hosted] steps: - name: Check out repository code @@ -26,10 +23,8 @@ jobs: needs: CheckLabels runs-on: [self-hosted] steps: - - name: Check out repository code - uses: actions/checkout@v2 - name: Images check - run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py + uses: .github/workflows env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} From fd0a4bdd67f70dff65f11b9408b9e7c19632565a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:05:59 +0300 Subject: [PATCH 099/438] Add on --- .github/workflows/docker-hub-action.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/docker-hub-action.yml b/.github/workflows/docker-hub-action.yml index 924c4692e57..66d969ec29f 100644 --- a/.github/workflows/docker-hub-action.yml +++ b/.github/workflows/docker-hub-action.yml @@ -1,5 +1,15 @@ name: 'DockerHubPush' description: 'Action push images to dockerhub' +on: # yamllint disable-line rule:truthy + pull_request: + types: + - labeled + - unlabeled + - synchronize + - reopened + - opened + branches: + - master runs: steps: - name: Check out repository code From bafad0fb09962806ca7596655c52b3cc0f7e7576 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:07:10 +0300 Subject: [PATCH 100/438] Trying --- .github/{workflows => actions}/docker-hub-action.yml | 0 .github/workflows/style-check.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename .github/{workflows => actions}/docker-hub-action.yml (100%) diff --git a/.github/workflows/docker-hub-action.yml b/.github/actions/docker-hub-action.yml similarity index 100% rename from .github/workflows/docker-hub-action.yml rename to .github/actions/docker-hub-action.yml diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 72ea5a8a27d..72d6d104a2b 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -24,7 +24,7 @@ jobs: runs-on: [self-hosted] steps: - name: Images check - uses: .github/workflows + uses: .github/actions env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} From 5d3c92dcaf0cc5d0263e131f7f05ada78e97d17a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:11:35 +0300 Subject: [PATCH 101/438] No idea --- .github/workflows/style-check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 72d6d104a2b..29943ce30ee 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -23,6 +23,8 @@ jobs: needs: CheckLabels runs-on: [self-hosted] steps: + - name: Check out repository code + uses: actions/checkout@v2 - name: Images check uses: .github/actions env: From 0c68a7c1a9fc13441dd4040a225b2e9de5333cf7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:15:14 +0300 Subject: [PATCH 102/438] Don't give up --- .../{docker-hub-action.yml => docker-hub-push/action.yml} | 0 .github/workflows/style-check.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename .github/actions/{docker-hub-action.yml => docker-hub-push/action.yml} (100%) diff --git a/.github/actions/docker-hub-action.yml b/.github/actions/docker-hub-push/action.yml similarity index 100% rename from .github/actions/docker-hub-action.yml rename to .github/actions/docker-hub-push/action.yml diff --git a/.github/workflows/style-check.yml b/.github/workflows/style-check.yml index 29943ce30ee..37f48fdfc29 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/style-check.yml @@ -26,7 +26,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 - name: Images check - uses: .github/actions + uses: ./.github/actions/docker-hub-push env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} From 52d6d44370a10498730f90dbe86f72977948646b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:20:39 +0300 Subject: [PATCH 103/438] Shell bash --- .github/actions/docker-hub-push/action.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/actions/docker-hub-push/action.yml b/.github/actions/docker-hub-push/action.yml index 66d969ec29f..9352d158a3b 100644 --- a/.github/actions/docker-hub-push/action.yml +++ b/.github/actions/docker-hub-push/action.yml @@ -16,11 +16,7 @@ runs: uses: actions/checkout@v2 - name: Images check run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} + shell: bash - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: From 1bfcbe281cceeba679454fff37e1d3a1f09ff107 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:26:45 +0300 Subject: [PATCH 104/438] Trying run --- .github/actions/docker-hub-push/action.yml | 24 ------------------- .../workflows/{style-check.yml => main.yml} | 2 +- tests/ci/run_check.py | 4 ++-- tests/ci/style_check.py | 2 +- 4 files changed, 4 insertions(+), 28 deletions(-) delete mode 100644 .github/actions/docker-hub-push/action.yml rename .github/workflows/{style-check.yml => main.yml} (95%) diff --git a/.github/actions/docker-hub-push/action.yml b/.github/actions/docker-hub-push/action.yml deleted file mode 100644 index 9352d158a3b..00000000000 --- a/.github/actions/docker-hub-push/action.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: 'DockerHubPush' -description: 'Action push images to dockerhub' -on: # yamllint disable-line rule:truthy - pull_request: - types: - - labeled - - unlabeled - - synchronize - - reopened - - opened - branches: - - master -runs: - steps: - - name: Check out repository code - uses: actions/checkout@v2 - - name: Images check - run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py - shell: bash - - name: Upload images files to artifacts - uses: actions/upload-artifact@v2 - with: - name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json diff --git a/.github/workflows/style-check.yml b/.github/workflows/main.yml similarity index 95% rename from .github/workflows/style-check.yml rename to .github/workflows/main.yml index 37f48fdfc29..cde033d4d91 100644 --- a/.github/workflows/style-check.yml +++ b/.github/workflows/main.yml @@ -26,7 +26,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 - name: Images check - uses: ./.github/actions/docker-hub-push + run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 788bfc5b5b1..87dc21beda2 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -114,9 +114,9 @@ if __name__ == "__main__": commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: - print("::notice ::

Cannot run

") + print("::notice ::**Cannot run**") commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: - print("::notice ::

Can run

") + print("::notice ::**Can run**") commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 4a75eee70ee..3438e40a5b4 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -130,4 +130,4 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) - print("::notice ::Report url: {}".format(report_url)) + print("::notice ::Report *url*: {} and one more [test]({})".format(report_url, report_url)) From 8b0a85fd90dfa36a3619e34a972e7481f3aed704 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:39:36 +0300 Subject: [PATCH 105/438] Remove --- tests/ci/docker_images_check.py | 2 +- tests/ci/run_check.py | 4 ++-- tests/ci/style_check.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 639f19e6973..a7901b5fda8 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -225,4 +225,4 @@ if __name__ == "__main__": with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) - print("::notice ::

Report url

: {}".format(url)) + print("::notice ::Report url: {}".format(url)) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 87dc21beda2..70b3ae2ac07 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -114,9 +114,9 @@ if __name__ == "__main__": commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: - print("::notice ::**Cannot run**") + print("::notice ::Cannot run") commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: - print("::notice ::**Can run**") + print("::notice ::Can run") commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 3438e40a5b4..4a75eee70ee 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -130,4 +130,4 @@ if __name__ == "__main__": subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) - print("::notice ::Report *url*: {} and one more [test]({})".format(report_url, report_url)) + print("::notice ::Report url: {}".format(report_url)) From 2a74979c3aecf325d5233b97fde7658476fdc55d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:52:04 +0300 Subject: [PATCH 106/438] Trying output --- tests/ci/docker_images_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index a7901b5fda8..b9353a0a44f 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -177,6 +177,9 @@ if __name__ == "__main__": temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -226,3 +229,5 @@ if __name__ == "__main__": json.dump(result_images, images_file) print("::notice ::Report url: {}".format(url)) + print("::set-output name=url_output::\"{}\"".format(url)) +) From 7aac1e29b9931b07185dc84c2d76179a135c5aa6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 13:54:57 +0300 Subject: [PATCH 107/438] Remove --- tests/ci/docker_images_check.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index b9353a0a44f..bff229e15ff 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -230,4 +230,3 @@ if __name__ == "__main__": print("::notice ::Report url: {}".format(url)) print("::set-output name=url_output::\"{}\"".format(url)) -) From 755e4d2e9e6d8c973dd57eb4faabe914a929adcc Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 14:17:26 +0300 Subject: [PATCH 108/438] Actions --- tests/ci/style_check.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 4a75eee70ee..47b12c11173 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -131,3 +131,5 @@ if __name__ == "__main__": state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=url) From ec7d83c9e080353b20cb68800a483596af222ee2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 14:41:07 +0300 Subject: [PATCH 109/438] Fix style check --- tests/ci/style_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 47b12c11173..5b2c2258585 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -132,4 +132,4 @@ if __name__ == "__main__": report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=url) + commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From d25b8881e4c30163abc1099db89365740e1ff2f8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:12:58 +0300 Subject: [PATCH 110/438] More copypaste --- .github/workflows/main.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cde033d4d91..2ff22f0fee6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,3 +54,21 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + PVS-Check: + needs: DockerHubPush + runs-on: [self-hosted] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/pvs_check + - name: Check out repository code + uses: actions/checkout@v2 + - name: PVS Check + env: + YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} + YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} + run: cd $GITHUB_WORKSPACE/tests/ci && python3 pvs_check.py From 1029cb3095634d4a883ffbc870f6809390e4631c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:17:47 +0300 Subject: [PATCH 111/438] Add PVS check --- tests/ci/pvs_check.py | 137 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 tests/ci/pvs_check.py diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py new file mode 100644 index 00000000000..4c2957770e9 --- /dev/null +++ b/tests/ci/pvs_check.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +import subprocess +import os +import json +import logging +from github import Github +from report import create_test_html_report +from s3_helper import S3Helper +from pr_info import PRInfo +import shutil +import sys + +NAME = 'PVS Studio (actions)' +LICENSE_NAME = 'Free license: ClickHouse, Yandex' +HTML_REPORT_FOLDER = 'pvs-studio-html-report' +TXT_REPORT_NAME = 'pvs-studio-task-report.txt' + +def process_logs(s3_client, additional_logs, s3_path_prefix): + additional_urls = [] + for log_path in additional_logs: + if log_path: + additional_urls.append( + s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path))) + + return additional_urls + +def _process_txt_report(self, path): + warnings = [] + errors = [] + with open(path, 'r') as report_file: + for line in report_file: + if 'viva64' in line: + continue + elif 'warn' in line: + warnings.append(':'.join(line.split('\t')[0:2])) + elif 'err' in line: + errors.append(':'.join(line.split('\t')[0:2])) + return warnings, errors + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): + s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_') + additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + raw_log_url = additional_urls[0] + additional_urls.pop(0) + + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") + aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + + gh = Github(os.getenv("GITHUB_TOKEN")) + + images_path = os.path.join(temp_path, 'changed_images.json') + docker_image = 'clickhouse/pvs-test' + if os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r') as images_fd: + images = json.load(images_fd) + logging.info("Got images %s", images) + if 'clickhouse/pvs-test' in images: + docker_image += ':' + images['clickhouse/pvs-test'] + + logging.info("Got docker image %s", docker_image) + + if not aws_secret_key_id or not aws_secret_key: + logging.info("No secrets, will not upload anything to S3") + + s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + + licence_key = os.getenv('PVS_STUDIO_KEY') + cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENSE_NAME}' -e LICENCE_KEY='{licence_key}' -e CC=clang-11 -e CXX=clang++-11 {docker_image}" + + subprocess.check_output(cmd, shell=True) + + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + html_urls = self.s3_client.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) + index_html = None + + commit = get_commit(gh, pr_info.sha) + for url in html_urls: + if 'index.html' in url: + index_html = 'HTML report'.format(url) + break + + if not index_html: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) + + txt_report = os.path.join(temp_path, TXT_REPORT_NAME) + warnings, errors = _process_txt_report(txt_report) + errors = errors + warnings + + status = 'success' + test_results = [(index_html, "Look at the report")] + description = "Total errors {}".format(len(errors)) + additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) + + print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) From 0ea203da310a753f456fb75329d22baceacf7b05 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:24:24 +0300 Subject: [PATCH 112/438] Checkout submodules for PVS --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2ff22f0fee6..025ceea8c50 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -65,6 +65,8 @@ jobs: path: ${{ runner.temp }}/pvs_check - name: Check out repository code uses: actions/checkout@v2 + with: + submodules: true - name: PVS Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} From a895375893f7fd1575d4ec7cd6a430211f6e81f0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:28:11 +0300 Subject: [PATCH 113/438] Something strange --- tests/ci/pvs_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 4c2957770e9..9aabb5f734c 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -127,7 +127,7 @@ if __name__ == "__main__": errors = errors + warnings status = 'success' - test_results = [(index_html, "Look at the report")] + test_results = [(index_html, "Look at the report"), ("Errors count not checked", "OK")] description = "Total errors {}".format(len(errors)) additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) From a70d4d13dfec21907a58dea4706013878c5c017c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 15:44:42 +0300 Subject: [PATCH 114/438] Trying one more time --- .github/workflows/main.yml | 2 +- tests/ci/pvs_check.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 025ceea8c50..4e5c2ed19e9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -66,7 +66,7 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 with: - submodules: true + submodules: 'recursive' - name: PVS Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 9aabb5f734c..1becffedac2 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -104,9 +104,13 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) licence_key = os.getenv('PVS_STUDIO_KEY') - cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENSE_NAME}' -e LICENCE_KEY='{licence_key}' -e CC=clang-11 -e CXX=clang++-11 {docker_image}" + cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENSE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" - subprocess.check_output(cmd, shell=True) + try: + subprocess.check_output(cmd, shell=True) + except: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') html_urls = self.s3_client.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) From fd4afa236b3756888b4018942a92b6cc2c28f925 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:02:40 +0300 Subject: [PATCH 115/438] Fix licence name --- tests/ci/pvs_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 1becffedac2..b52b0b32f1b 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -104,7 +104,8 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) licence_key = os.getenv('PVS_STUDIO_KEY') - cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENSE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + commit = get_commit(gh, pr_info.sha) try: subprocess.check_output(cmd, shell=True) @@ -116,7 +117,6 @@ if __name__ == "__main__": html_urls = self.s3_client.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) index_html = None - commit = get_commit(gh, pr_info.sha) for url in html_urls: if 'index.html' in url: index_html = 'HTML report'.format(url) From ed07b085de10b3f5131aec0e80ffc7b51df6382f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:04:47 +0300 Subject: [PATCH 116/438] Trying to fix --- .github/workflows/main.yml | 1 + tests/ci/pvs_check.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4e5c2ed19e9..245f76eb3b6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -67,6 +67,7 @@ jobs: uses: actions/checkout@v2 with: submodules: 'recursive' + path: 'repo_with_submodules' - name: PVS Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index b52b0b32f1b..516f4c16e41 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -71,7 +71,7 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")), "repo_with_submodules") temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: From e3e69825183322d685c90d9909fdd151c81a1764 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:05:04 +0300 Subject: [PATCH 117/438] Followup --- tests/ci/pvs_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 516f4c16e41..3778a6e3110 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -104,7 +104,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) licence_key = os.getenv('PVS_STUDIO_KEY') - cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENSE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" commit = get_commit(gh, pr_info.sha) try: From 68480a659e942e276d248e073f23d164c3f1ffdf Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:14:24 +0300 Subject: [PATCH 118/438] Followup --- tests/ci/pvs_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 3778a6e3110..c7f07a34e32 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -11,7 +11,7 @@ import shutil import sys NAME = 'PVS Studio (actions)' -LICENSE_NAME = 'Free license: ClickHouse, Yandex' +LICENCE_NAME = 'Free license: ClickHouse, Yandex' HTML_REPORT_FOLDER = 'pvs-studio-html-report' TXT_REPORT_NAME = 'pvs-studio-task-report.txt' From 736673bf08ca899c7e42280ebe598e58a516b57f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 16:52:16 +0300 Subject: [PATCH 119/438] Moar --- tests/ci/pvs_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index c7f07a34e32..75febd9cd49 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -26,7 +26,7 @@ def process_logs(s3_client, additional_logs, s3_path_prefix): return additional_urls -def _process_txt_report(self, path): +def _process_txt_report(path): warnings = [] errors = [] with open(path, 'r') as report_file: @@ -114,7 +114,7 @@ if __name__ == "__main__": sys.exit(1) s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') - html_urls = self.s3_client.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) + html_urls = s3_helper.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) index_html = None for url in html_urls: From 8b1be85bcbe771e7c28fd3bd4a4b199ff202705a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 18:51:43 +0300 Subject: [PATCH 120/438] Trying other way --- .github/workflows/main.yml | 2 ++ tests/ci/pvs_check.py | 46 +++++++++++++++++++++----------------- tests/ci/s3_helper.py | 2 +- tests/ci/style_check.py | 2 +- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 245f76eb3b6..a1d6cf05fd6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -48,6 +48,8 @@ jobs: path: ${{ runner.temp }}/style_check - name: Check out repository code uses: actions/checkout@v2 + with: + path: 'repo_without_submodules' - name: Style Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 75febd9cd49..8bc6df632f2 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -113,29 +113,33 @@ if __name__ == "__main__": commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") sys.exit(1) - s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') - html_urls = s3_helper.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) - index_html = None + try: + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + html_urls = s3_helper.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) + index_html = None - for url in html_urls: - if 'index.html' in url: - index_html = 'HTML report'.format(url) - break + for url in html_urls: + if 'index.html' in url: + index_html = 'HTML report'.format(url) + break - if not index_html: - commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") - sys.exit(1) + if not index_html: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) - txt_report = os.path.join(temp_path, TXT_REPORT_NAME) - warnings, errors = _process_txt_report(txt_report) - errors = errors + warnings + txt_report = os.path.join(temp_path, TXT_REPORT_NAME) + warnings, errors = _process_txt_report(txt_report) + errors = errors + warnings - status = 'success' - test_results = [(index_html, "Look at the report"), ("Errors count not checked", "OK")] - description = "Total errors {}".format(len(errors)) - additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) + status = 'success' + test_results = [(index_html, "Look at the report"), ("Errors count not checked", "OK")] + description = "Total errors {}".format(len(errors)) + additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) - print("::notice ::Report url: {}".format(report_url)) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=report_url) + print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) + except Exception as ex: + print("Got an exception", ex) + sys.exit(1) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 8a170da44f8..b9ae0de6e02 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -71,7 +71,7 @@ class S3Helper(object): if not files: return [] - p = Pool(min(len(files), 30)) + p = Pool(min(len(files), 5)) def task(file_name): full_fs_path = os.path.join(folder_path, file_name) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 5b2c2258585..f41120f7de7 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -95,7 +95,7 @@ def update_check_with_curl(check_id): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")), 'repo_without_submodules') temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: From c6f9c8e7ba992f4eab909f5b94d179442550ec06 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 18:54:40 +0300 Subject: [PATCH 121/438] Missed file --- tests/ci/docker_images_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index bff229e15ff..141d075cc6d 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -7,6 +7,7 @@ import json import os from pr_info import PRInfo from github import Github +import shutil NAME = "Push to Dockerhub (actions)" From fce1d7e156502cf71a16b0f8ff6001c209bfd44c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 16 Sep 2021 22:35:38 +0300 Subject: [PATCH 122/438] Fix stupid bug --- tests/ci/pvs_check.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 8bc6df632f2..18cee175970 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -123,9 +123,9 @@ if __name__ == "__main__": index_html = 'HTML report'.format(url) break - if not index_html: - commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") - sys.exit(1) + if not index_html: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) txt_report = os.path.join(temp_path, TXT_REPORT_NAME) warnings, errors = _process_txt_report(txt_report) From 6556e77eb42771791e5eb33840433a4772faf4ac Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 10:51:24 +0300 Subject: [PATCH 123/438] Test From d0d4318624e7ddc4f9c24ac4ea56c44c875b94d4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 11:20:56 +0300 Subject: [PATCH 124/438] Use correct user --- .github/workflows/main.yml | 3 --- tests/ci/pvs_check.py | 11 +++++++++-- tests/ci/style_check.py | 4 ++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a1d6cf05fd6..4e5c2ed19e9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -48,8 +48,6 @@ jobs: path: ${{ runner.temp }}/style_check - name: Check out repository code uses: actions/checkout@v2 - with: - path: 'repo_without_submodules' - name: Style Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} @@ -69,7 +67,6 @@ jobs: uses: actions/checkout@v2 with: submodules: 'recursive' - path: 'repo_with_submodules' - name: PVS Check env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 18cee175970..f68e5ca8210 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -71,7 +71,7 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")), "repo_with_submodules") + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: @@ -81,6 +81,13 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) + new_repo_path = os.path.join(temp_path, repo_path) + if os.path.exists(new_repo_path): + shutil.rmtree(new_repo_path) + shutil.copytree(repo_path, temp_path) + # this check modify repository so copy it to the temp directory + repo_path = new_repo_path + aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") @@ -104,7 +111,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) licence_key = os.getenv('PVS_STUDIO_KEY') - cmd = f"docker run --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" commit = get_commit(gh, pr_info.sha) try: diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index f41120f7de7..2af8514fbfc 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -95,7 +95,7 @@ def update_check_with_curl(check_id): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")), 'repo_without_submodules') + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: @@ -127,7 +127,7 @@ if __name__ == "__main__": s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) - subprocess.check_output(f"docker run --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) + subprocess.check_output(f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) From 18f282858e9bda66ba314fc3f6a9636fdb197b47 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 12:41:31 +0300 Subject: [PATCH 125/438] Add logging --- tests/ci/pvs_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index f68e5ca8210..a63b87d1d59 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -82,11 +82,16 @@ if __name__ == "__main__": os.makedirs(temp_path) new_repo_path = os.path.join(temp_path, repo_path) + logging.info("Will try to copy repo to %s", new_repo_path) if os.path.exists(new_repo_path): + logging.info("Removing old copy") shutil.rmtree(new_repo_path) + + logging.info("Copy repo from %s (exists %s) to %s", repo_path, os.path.exists(repo_path), temp_path) shutil.copytree(repo_path, temp_path) # this check modify repository so copy it to the temp directory repo_path = new_repo_path + logging.info("Repo copy path %s", repo_path) aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") From d510bfbe636c9160a370be4800d790f9570db5f8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 12:59:31 +0300 Subject: [PATCH 126/438] Better --- .github/workflows/main.yml | 4 +++- tests/ci/pvs_check.py | 15 +-------------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4e5c2ed19e9..2fb0e54a8ee 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -73,4 +73,6 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} - run: cd $GITHUB_WORKSPACE/tests/ci && python3 pvs_check.py + TEMP_PATH: ${{runner.temp}}/pvs_check + REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse + run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index a63b87d1d59..c254ad74ae4 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -71,26 +71,13 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) + repo_path = os.path.join(os.getenv("REPO_COPY", os.path.abspath("../../"))) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: event = json.load(event_file) pr_info = PRInfo(event) - - if not os.path.exists(temp_path): - os.makedirs(temp_path) - - new_repo_path = os.path.join(temp_path, repo_path) - logging.info("Will try to copy repo to %s", new_repo_path) - if os.path.exists(new_repo_path): - logging.info("Removing old copy") - shutil.rmtree(new_repo_path) - - logging.info("Copy repo from %s (exists %s) to %s", repo_path, os.path.exists(repo_path), temp_path) - shutil.copytree(repo_path, temp_path) # this check modify repository so copy it to the temp directory - repo_path = new_repo_path logging.info("Repo copy path %s", repo_path) aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") From 68c7f8638ad82a1058f9dacde4cdb64583fd72ff Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 13:34:45 +0300 Subject: [PATCH 127/438] update docker image --- docker/test/pvs/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index f4675d35819..4eeb9855274 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -37,6 +37,8 @@ RUN set -x \ || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \ && dpkg -i "${PKG_VERSION}.deb" +ENV CCACHE_DIR=/test_ouput/ccache + CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \ && ninja re2_st clickhouse_grpc_protos \ From 5d35ba7d9353e842e435aac372d389b91bc51c70 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 13:41:29 +0300 Subject: [PATCH 128/438] One more time --- docker/test/pvs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 4eeb9855274..06c2c424a74 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -39,7 +39,7 @@ RUN set -x \ ENV CCACHE_DIR=/test_ouput/ccache -CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ +CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \ && ninja re2_st clickhouse_grpc_protos \ && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ From 00d9a62d6cf400bc48c4514c9b1be69e7f3e719f Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 17 Sep 2021 14:17:50 +0300 Subject: [PATCH 129/438] Missclick --- docker/test/pvs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 06c2c424a74..77cbd910922 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -37,7 +37,7 @@ RUN set -x \ || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \ && dpkg -i "${PKG_VERSION}.deb" -ENV CCACHE_DIR=/test_ouput/ccache +ENV CCACHE_DIR=/test_output/ccache CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \ From 5c2a724a4d616f5ee17884dd9bfd9bfd312aabf4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 27 Sep 2021 11:18:01 +0300 Subject: [PATCH 130/438] Add init worker script --- tests/ci/init_worker.sh | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/ci/init_worker.sh diff --git a/tests/ci/init_worker.sh b/tests/ci/init_worker.sh new file mode 100644 index 00000000000..44cfc89f758 --- /dev/null +++ b/tests/ci/init_worker.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -euo pipefail + +export DEBIAN_FRONTEND=noninteractive +export RUNNER_VERSION=2.283.1 +export RUNNER_HOME=/home/ubuntu/actions-runner + +apt-get update + +apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + python3-pip + +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null + +apt-get update + +apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io + +usermod -aG docker ubuntu + +pip install boto3 pygithub requests urllib3 unidiff + +mkdir -p $RUNNER_HOME && cd $RUNNER_HOME + +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz + +tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz + +./bin/installdependencies.sh From 214272113f9d979331fabbad1bd8022b8a382710 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 28 Sep 2021 14:52:51 +0300 Subject: [PATCH 131/438] Add lambda code for token rotation --- tests/ci/lambda/Dockerfile | 13 ++++ tests/ci/lambda/app.py | 106 +++++++++++++++++++++++++++++++ tests/ci/lambda/requirements.txt | 3 + 3 files changed, 122 insertions(+) create mode 100644 tests/ci/lambda/Dockerfile create mode 100644 tests/ci/lambda/app.py create mode 100644 tests/ci/lambda/requirements.txt diff --git a/tests/ci/lambda/Dockerfile b/tests/ci/lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/lambda/app.py b/tests/ci/lambda/app.py new file mode 100644 index 00000000000..4edd3e8d08c --- /dev/null +++ b/tests/ci/lambda/app.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_runner_registration_token(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post("https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key_1" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + + +def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runner_registration_token = get_runner_registration_token(access_token) + + if push_to_ssm: + import boto3 + + print("Trying to put params into ssm manager") + client = boto3.client('ssm') + client.put_parameter( + Name=ssm_parameter_name, + Value=runner_registration_token, + Type='SecureString', + Overwrite=True) + else: + print("Not push token to AWS Parameter Store, just print:", runner_registration_token) + + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True, 'github_runner_registration_token') + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get new token from github to add runners') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-ssm', action='store_true', help='Store received token in parameter store') + parser.add_argument('--ssm-parameter-name', default='github_runner_registration_token', help='AWS paramater store parameter name') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name) diff --git a/tests/ci/lambda/requirements.txt b/tests/ci/lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography From d70ea95b3dfd043ecdc09d2e7c1d9a5edfa16d37 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 28 Sep 2021 16:28:25 +0300 Subject: [PATCH 132/438] First worker version --- tests/ci/init_worker.sh | 37 ------------------------------------- tests/ci/worker/init.sh | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 37 deletions(-) delete mode 100644 tests/ci/init_worker.sh create mode 100644 tests/ci/worker/init.sh diff --git a/tests/ci/init_worker.sh b/tests/ci/init_worker.sh deleted file mode 100644 index 44cfc89f758..00000000000 --- a/tests/ci/init_worker.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.283.1 -export RUNNER_HOME=/home/ubuntu/actions-runner - -apt-get update - -apt-get install --yes --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - curl \ - gnupg \ - lsb-release \ - python3-pip - -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg - -echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null - -apt-get update - -apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io - -usermod -aG docker ubuntu - -pip install boto3 pygithub requests urllib3 unidiff - -mkdir -p $RUNNER_HOME && cd $RUNNER_HOME - -curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz - -tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz -rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz - -./bin/installdependencies.sh diff --git a/tests/ci/worker/init.sh b/tests/ci/worker/init.sh new file mode 100644 index 00000000000..69432a0c220 --- /dev/null +++ b/tests/ci/worker/init.sh @@ -0,0 +1,18 @@ +#!/usr/bin/bash +set -euo pipefail + +echo "Running init script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_HOME=/home/ubuntu/actions-runner + +echo "Receiving token" +export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` +export RUNNER_URL="https://github.com/ClickHouse" + +cd $RUNNER_HOME + +echo "Going to configure runner" +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name `hostname -f` --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work + +echo "Run" +sudo -u ubuntu ./run.sh From fca5775fac7e7ea878d67334734b790b0f6056b0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 28 Sep 2021 18:40:06 +0300 Subject: [PATCH 133/438] Disable PVS check --- .github/workflows/main.yml | 44 +++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2fb0e54a8ee..05ed78d8c07 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,25 +54,25 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - PVS-Check: - needs: DockerHubPush - runs-on: [self-hosted] - steps: - - name: Download changed images - uses: actions/download-artifact@v2 - with: - name: changed_images - path: ${{ runner.temp }}/pvs_check - - name: Check out repository code - uses: actions/checkout@v2 - with: - submodules: 'recursive' - - name: PVS Check - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} - TEMP_PATH: ${{runner.temp}}/pvs_check - REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse - run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py +# PVS-Check: +# needs: DockerHubPush +# runs-on: [self-hosted] +# steps: +# - name: Download changed images +# uses: actions/download-artifact@v2 +# with: +# name: changed_images +# path: ${{ runner.temp }}/pvs_check +# - name: Check out repository code +# uses: actions/checkout@v2 +# with: +# submodules: 'recursive' +# - name: PVS Check +# env: +# YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} +# YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} +# TEMP_PATH: ${{runner.temp}}/pvs_check +# REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse +# run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py From 070a7cf727c5c20bb63c9ceff2532e6309053886 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 28 Sep 2021 19:08:41 +0300 Subject: [PATCH 134/438] Bump From d120fdf5953f0db43138ad1b2face732c01dafe3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 28 Sep 2021 21:42:41 +0300 Subject: [PATCH 135/438] check if query context exist before using cache --- src/Interpreters/Context.cpp | 1 + .../InterpreterSelectWithUnionQuery.cpp | 8 ++++++++ .../01162_strange_mutations.reference | 6 ++++++ .../0_stateless/01162_strange_mutations.sh | 19 +++++++++++++++++++ 4 files changed, 34 insertions(+) create mode 100644 tests/queries/0_stateless/01162_strange_mutations.reference create mode 100755 tests/queries/0_stateless/01162_strange_mutations.sh diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7e43343ab34..78f3e8440d2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2478,6 +2478,7 @@ void Context::setFormatSchemaPath(const String & path) Context::SampleBlockCache & Context::getSampleBlockCache() const { + assert(hasQueryContext()); return getQueryContext()->sample_block_cache; } diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 4aeaa9e4f13..e7ea08e557d 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -222,6 +222,14 @@ InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default; Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, ContextPtr context_, bool is_subquery) { + if (!context_->hasQueryContext()) + { + if (is_subquery) + return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock(); + else + return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock(); + } + auto & cache = context_->getSampleBlockCache(); /// Using query string because query_ptr changes for every internal SELECT auto key = queryToString(query_ptr_); diff --git a/tests/queries/0_stateless/01162_strange_mutations.reference b/tests/queries/0_stateless/01162_strange_mutations.reference new file mode 100644 index 00000000000..64572fe3446 --- /dev/null +++ b/tests/queries/0_stateless/01162_strange_mutations.reference @@ -0,0 +1,6 @@ +1 +2 +0 +1 +2 +0 diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh new file mode 100755 index 00000000000..6e19f81c3ef --- /dev/null +++ b/tests/queries/0_stateless/01162_strange_mutations.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +declare -a engines=("MergeTree" "ReplicatedMergeTree('/test/01162/$CLICKHOUSE_DATABASE', '1')") + +for engine in "${engines[@]}" +do + $CLICKHOUSE_CLIENT -q "drop table if exists t" + $CLICKHOUSE_CLIENT -q "create table t (n int) engine=$engine order by n" + $CLICKHOUSE_CLIENT -q "insert into t values (1)" + $CLICKHOUSE_CLIENT -q "insert into t values (2)" + $CLICKHOUSE_CLIENT -q "select * from t order by n" + $CLICKHOUSE_CLIENT -q "alter table t delete where n global in (select * from (select * from t))" + $CLICKHOUSE_CLIENT -q "select count() from t" + $CLICKHOUSE_CLIENT -q "drop table t" +done From 95c3eb377bab413ac2d9238d7726898dedf33ee2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 10:47:15 +0300 Subject: [PATCH 136/438] Add finish check --- .github/workflows/main.yml | 12 ++++++++++- tests/ci/finish_check.py | 41 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 tests/ci/finish_check.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 05ed78d8c07..3931bc1538d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -37,7 +37,7 @@ jobs: with: name: changed_images path: ${{ runner.temp }}/docker_images_check/changed_images.json - Style-Check: + StyleCheck: needs: DockerHubPush runs-on: [self-hosted] steps: @@ -76,3 +76,13 @@ jobs: # TEMP_PATH: ${{runner.temp}}/pvs_check # REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse # run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py + Finish-Check: + needs: [Style-Check, DockerHubPush, CheckLabels] + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Finish label + run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py new file mode 100644 index 00000000000..b481c5b658c --- /dev/null +++ b/tests/ci/finish_check.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +import logging +from github import Github +import os + +NAME = 'Run Check (actions)' + +def filter_statuses(statuses): + """ + Squash statuses to latest state + 1. context="first", state="success", update_time=1 + 2. context="second", state="success", update_time=2 + 3. context="first", stat="failure", update_time=3 + =========> + 1. context="second", state="success" + 2. context="first", stat="failure" + """ + filt = {} + for status in sorted(statuses, key=lambda x: x.updated_at): + filt[status.context] = status + return filt + + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_orgs=True) + gh = Github(os.getenv("GITHUB_TOKEN")) + commit = get_commit(gh, pr_info.sha) + + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + statuses = filter_statuses(list(commit.get_statuses())) + if NAME in statuses and statuses[NAME].state == "pending": + commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url) From cb81189bf91a1d5fd7448d4df3ca66f51fb976e2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 10:49:25 +0300 Subject: [PATCH 137/438] Fix workflow --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3931bc1538d..8cb771a0d45 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -76,8 +76,8 @@ jobs: # TEMP_PATH: ${{runner.temp}}/pvs_check # REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse # run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py - Finish-Check: - needs: [Style-Check, DockerHubPush, CheckLabels] + FinishCheck: + needs: [StyleCheck, DockerHubPush, CheckLabels] runs-on: [self-hosted] steps: - name: Check out repository code From bf9ebf42112577018347975ba9d9ec023eb2bf7b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 10:55:12 +0300 Subject: [PATCH 138/438] Import json --- tests/ci/finish_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index b481c5b658c..1b022905cda 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import logging from github import Github +import json import os NAME = 'Run Check (actions)' From 8d29a472fa088468584e370d0124ffb1e8f36175 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 11:02:36 +0300 Subject: [PATCH 139/438] Fix --- tests/ci/finish_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 1b022905cda..89139468fd6 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import logging from github import Github +from pr_info import PRInfo import json import os From b702f7cbff638963e1d5afc1c4689f74d062d322 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 29 Sep 2021 13:37:44 +0300 Subject: [PATCH 140/438] preserve table alias when adding default database --- src/Interpreters/AddDefaultDatabaseVisitor.h | 7 ++++++- .../queries/0_stateless/01162_strange_mutations.reference | 2 ++ tests/queries/0_stateless/01162_strange_mutations.sh | 6 ++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index fe3edc00957..858608acdbe 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -105,7 +105,12 @@ private: void visit(const ASTTableIdentifier & identifier, ASTPtr & ast) const { if (!identifier.compound()) - ast = std::make_shared(database_name, identifier.name()); + { + auto qualified_identifier = std::make_shared(database_name, identifier.name()); + if (!identifier.alias.empty()) + qualified_identifier->setAlias(identifier.alias); + ast = qualified_identifier; + } } void visit(ASTSubquery & subquery, ASTPtr &) const diff --git a/tests/queries/0_stateless/01162_strange_mutations.reference b/tests/queries/0_stateless/01162_strange_mutations.reference index 64572fe3446..e09dccd4c4a 100644 --- a/tests/queries/0_stateless/01162_strange_mutations.reference +++ b/tests/queries/0_stateless/01162_strange_mutations.reference @@ -1,6 +1,8 @@ 1 2 +2 0 1 2 +2 0 diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh index 6e19f81c3ef..c4166a88e42 100755 --- a/tests/queries/0_stateless/01162_strange_mutations.sh +++ b/tests/queries/0_stateless/01162_strange_mutations.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -declare -a engines=("MergeTree" "ReplicatedMergeTree('/test/01162/$CLICKHOUSE_DATABASE', '1')") +declare -a engines=("MergeTree" "ReplicatedMergeTree('/test/01162/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1')") for engine in "${engines[@]}" do @@ -13,7 +13,9 @@ do $CLICKHOUSE_CLIENT -q "insert into t values (1)" $CLICKHOUSE_CLIENT -q "insert into t values (2)" $CLICKHOUSE_CLIENT -q "select * from t order by n" - $CLICKHOUSE_CLIENT -q "alter table t delete where n global in (select * from (select * from t))" + $CLICKHOUSE_CLIENT --mutations_sync=1 -q "alter table t delete where n global in (select * from (select * from t where n global in (1::Int32)))" + $CLICKHOUSE_CLIENT -q "select * from t order by n" + $CLICKHOUSE_CLIENT --mutations_sync=1 -q "alter table t delete where n global in (select t1.n from t as t1 join t as t2 on t1.n=t2.n where t1.n global in (select 2::Int32))" $CLICKHOUSE_CLIENT -q "select count() from t" $CLICKHOUSE_CLIENT -q "drop table t" done From 112a009b918b548d4b6d5a21caf8a857d5de46f6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Sep 2021 14:40:20 +0300 Subject: [PATCH 141/438] Update worker script --- tests/ci/{lambda => token_lambda}/Dockerfile | 0 tests/ci/{lambda => token_lambda}/app.py | 0 tests/ci/{lambda => token_lambda}/requirements.txt | 0 tests/ci/worker/init.sh | 4 +++- 4 files changed, 3 insertions(+), 1 deletion(-) rename tests/ci/{lambda => token_lambda}/Dockerfile (100%) rename tests/ci/{lambda => token_lambda}/app.py (100%) rename tests/ci/{lambda => token_lambda}/requirements.txt (100%) diff --git a/tests/ci/lambda/Dockerfile b/tests/ci/token_lambda/Dockerfile similarity index 100% rename from tests/ci/lambda/Dockerfile rename to tests/ci/token_lambda/Dockerfile diff --git a/tests/ci/lambda/app.py b/tests/ci/token_lambda/app.py similarity index 100% rename from tests/ci/lambda/app.py rename to tests/ci/token_lambda/app.py diff --git a/tests/ci/lambda/requirements.txt b/tests/ci/token_lambda/requirements.txt similarity index 100% rename from tests/ci/lambda/requirements.txt rename to tests/ci/token_lambda/requirements.txt diff --git a/tests/ci/worker/init.sh b/tests/ci/worker/init.sh index 69432a0c220..2f6638f14b5 100644 --- a/tests/ci/worker/init.sh +++ b/tests/ci/worker/init.sh @@ -8,11 +8,13 @@ export RUNNER_HOME=/home/ubuntu/actions-runner echo "Receiving token" export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` export RUNNER_URL="https://github.com/ClickHouse" +# Funny fact, but metadata service has fixed IP +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` cd $RUNNER_HOME echo "Going to configure runner" -sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name `hostname -f` --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work echo "Run" sudo -u ubuntu ./run.sh From 0085e5653a81b6689dc1e1977b4ed421f36279a5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 12:00:45 +0300 Subject: [PATCH 142/438] Metrics lambda --- tests/ci/metrics_lambda/Dockerfile | 13 +++ tests/ci/metrics_lambda/app.py | 138 +++++++++++++++++++++++ tests/ci/metrics_lambda/requirements.txt | 3 + 3 files changed, 154 insertions(+) create mode 100644 tests/ci/metrics_lambda/Dockerfile create mode 100644 tests/ci/metrics_lambda/app.py create mode 100644 tests/ci/metrics_lambda/requirements.txt diff --git a/tests/ci/metrics_lambda/Dockerfile b/tests/ci/metrics_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/metrics_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py new file mode 100644 index 00000000000..8002e060dd0 --- /dev/null +++ b/tests/ci/metrics_lambda/app.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key_1" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners", headers=headers) + response.raise_for_status() + data = response.json() + print("Total runners", data['total_count']) + runners = data['runners'] + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_runners / total_active_runners * 100, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) + +def main(github_secret_key, github_app_id, push_to_cloudwatch): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runners = list_runners(access_token) + if push_to_cloudwatch: + push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + else: + print(runners) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-cloudwatch', action='store_true', help='Store received token in parameter store') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_cloudwatch) diff --git a/tests/ci/metrics_lambda/requirements.txt b/tests/ci/metrics_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/metrics_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography From 7d028c3a90aacf1ae3817ea87597036a512ec11a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 13:12:58 +0300 Subject: [PATCH 143/438] Add termination lambda --- tests/ci/metrics_lambda/app.py | 7 +- tests/ci/termination_lambda/Dockerfile | 13 ++ tests/ci/termination_lambda/app.py | 230 +++++++++++++++++++ tests/ci/termination_lambda/requirements.txt | 3 + 4 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 tests/ci/termination_lambda/Dockerfile create mode 100644 tests/ci/termination_lambda/app.py create mode 100644 tests/ci/termination_lambda/requirements.txt diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py index 8002e060dd0..6c6fc594847 100644 --- a/tests/ci/metrics_lambda/app.py +++ b/tests/ci/metrics_lambda/app.py @@ -89,9 +89,14 @@ def push_metrics_to_cloudwatch(listed_runners, namespace): 'Value': total_runners, 'Unit': 'Count', }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + metrics_data.append({ 'MetricName': 'BusyRunnersRatio', - 'Value': busy_runners / total_active_runners * 100, + 'Value': busy_ratio, 'Unit': 'Percent', }) diff --git a/tests/ci/termination_lambda/Dockerfile b/tests/ci/termination_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/termination_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py new file mode 100644 index 00000000000..414ad0a0d0f --- /dev/null +++ b/tests/ci/termination_lambda/app.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key_1" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners", headers=headers) + response.raise_for_status() + data = response.json() + print("Total runners", data['total_count']) + runners = data['runners'] + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def how_many_instances_to_kill(event_data): + data_array = event_data['CapacityToTerminate'] + to_kill_by_zone = {} + for av_zone in data_array: + zone_name = av_zone['AvailabilityZone'] + to_kill = av_zone['Capacity'] + if zone_name not in to_kill_by_zone: + to_kill_by_zone[zone_name] = 0 + + to_kill_by_zone[zone_name] += to_kill + return to_kill_by_zone + +def get_candidates_to_be_killed(event_data): + data_array = event_data['Instances'] + instances_by_zone = {} + for instance in data_array: + zone_name = instance['AvailabilityZone'] + instance_id = instance['InstanceId'] + if zone_name not in instances_by_zone: + instances_by_zone[zone_name] = [] + instances_by_zone[zone_name].append(instance_id) + + return instances_by_zone + +def delete_runner(access_token, runner): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) + response.raise_for_status() + print(f"Response code deleting {runner.name} is {response.status_code}") + return response.status_code == 204 + + +def main(github_secret_key, github_app_id, event): + print("Got event", json.dumps(event, sort_keys=True, indent=4)) + to_kill_by_zone = how_many_instances_to_kill(event) + instances_by_zone = get_candidates_to_be_killed(event) + + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + + runners = list_runners(access_token) + + to_delete_runners = [] + instances_to_kill = [] + for zone in to_kill_by_zone: + num_to_kill = to_kill_by_zone[zone] + candidates = instances_by_zone[zone] + if num_to_kill > len(candidates): + raise Exception(f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}") + + delete_for_av = [] + for candidate in candidates: + if len(delete_for_av) == num_to_kill: + break + for runner in runners: + if runner.name == candidate: + if not runner.busy: + print(f"Runner {runner.name} is not busy and can be deleted from AV {zone}") + delete_for_av.append(runner) + else: + print(f"Runner {runner.name} is busy, not going to delete it") + break + else: + print(f"Candidate {candidate} was not in runners list, simply delete it") + instances_to_kill.append(candidate) + + if len(delete_for_av) < num_to_kill: + print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}") + to_delete_runners += delete_for_av + + print("Got instances to kill: ", ', '.join(instances_to_kill)) + print("Going to delete runners:", ', '.join([runner.name for runner in to_delete_runners])) + for runner in to_delete_runners: + if delete_runner(access_token, runner): + print(f"Runner {runner.name} successfuly deleted from github") + instances_to_kill.append(runner.name) + else: + print(f"Cannot delete {runner.name} from github") + + response = { + "InstanceIDs": instances_to_kill + } + print(response) + return response + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + return main(private_key, app_id, event) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + sample_event = { + "AutoScalingGroupARN": "arn:aws:autoscaling:us-east-1::autoScalingGroup:d4738357-2d40-4038-ae7e-b00ae0227003:autoScalingGroupName/my-asg", + "AutoScalingGroupName": "my-asg", + "CapacityToTerminate": [ + { + "AvailabilityZone": "us-east-1b", + "Capacity": 1, + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "Capacity": 2, + "InstanceMarketOption": "OnDemand" + } + ], + "Instances": [ + { + "AvailabilityZone": "us-east-1b", + "InstanceId": "i-08d0b3c1a137e02a5", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + ], + "Cause": "SCALE_IN" + } + + main(private_key, args.app_id, sample_event) diff --git a/tests/ci/termination_lambda/requirements.txt b/tests/ci/termination_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/termination_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography From 8a9556dd9367544e0b6185e5ae71babf987eaa7f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 13:39:15 +0300 Subject: [PATCH 144/438] Update termination lambda --- tests/ci/termination_lambda/app.py | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py index 414ad0a0d0f..261403dd8be 100644 --- a/tests/ci/termination_lambda/app.py +++ b/tests/ci/termination_lambda/app.py @@ -63,6 +63,42 @@ def list_runners(access_token): result.append(desc) return result +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_ratio, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) + + def how_many_instances_to_kill(event_data): data_array = event_data['CapacityToTerminate'] to_kill_by_zone = {} @@ -153,6 +189,10 @@ def main(github_secret_key, github_app_id, event): else: print(f"Cannot delete {runner.name} from github") + # push metrics + runners = list_runners(access_token) + push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + response = { "InstanceIDs": instances_to_kill } From f2837569f57712a3d6edd849748ba4028bd4f4c3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 14:26:46 +0300 Subject: [PATCH 145/438] Fixes in termination lambda --- tests/ci/run_check.py | 5 ++++- tests/ci/termination_lambda/app.py | 21 +++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 70b3ae2ac07..95e827671ca 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -86,7 +86,7 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): # can be skipped entirely. def should_run_checks_for_pr(pr_info): # Consider the labels and whether the user is trusted. - force_labels = set(['force tests', 'release']).intersection(pr_info.labels) + force_labels = set(['force tests']).intersection(pr_info.labels) if force_labels: return True, "Labeled '{}'".format(', '.join(force_labels)) @@ -96,6 +96,9 @@ def should_run_checks_for_pr(pr_info): if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): return False, "Needs 'can be tested' label" + if 'release' in pr_info.labels or 'pr-backport' in pr_info.labels or 'pr-cherrypick' in pr_info.labels: + return False, "Don't try new checks for release/backports/cherry-picks" + return True, "No special conditions apply" def get_commit(gh, commit_sha): diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py index 261403dd8be..7fd7c400db9 100644 --- a/tests/ci/termination_lambda/app.py +++ b/tests/ci/termination_lambda/app.py @@ -162,8 +162,16 @@ def main(github_secret_key, github_app_id, event): delete_for_av = [] for candidate in candidates: - if len(delete_for_av) == num_to_kill: + if candidate not in set([runner.name for runner in runners]): + print(f"Candidate {candidate} was not in runners list, simply delete it") + instances_to_kill.append(candidate) + + for candidate in candidates: + if len(delete_for_av) + len(instances_to_kill) == num_to_kill: break + if candidate in instances_to_kill: + continue + for runner in runners: if runner.name == candidate: if not runner.busy: @@ -172,9 +180,6 @@ def main(github_secret_key, github_app_id, event): else: print(f"Runner {runner.name} is busy, not going to delete it") break - else: - print(f"Candidate {candidate} was not in runners list, simply delete it") - instances_to_kill.append(candidate) if len(delete_for_av) < num_to_kill: print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}") @@ -189,9 +194,9 @@ def main(github_secret_key, github_app_id, event): else: print(f"Cannot delete {runner.name} from github") - # push metrics - runners = list_runners(access_token) - push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + ## push metrics + #runners = list_runners(access_token) + #push_metrics_to_cloudwatch(runners, 'RunnersMetrics') response = { "InstanceIDs": instances_to_kill @@ -262,7 +267,7 @@ if __name__ == "__main__": "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", "InstanceType": "t2.nano", "InstanceMarketOption": "OnDemand" - }, + } ], "Cause": "SCALE_IN" } From 7d92ad66149daac84601a69cde38b03f78668db0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 15:09:06 +0300 Subject: [PATCH 146/438] Remove PVS check --- .github/workflows/main.yml | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8cb771a0d45..49760995dfc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,28 +54,6 @@ jobs: YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py -# PVS-Check: -# needs: DockerHubPush -# runs-on: [self-hosted] -# steps: -# - name: Download changed images -# uses: actions/download-artifact@v2 -# with: -# name: changed_images -# path: ${{ runner.temp }}/pvs_check -# - name: Check out repository code -# uses: actions/checkout@v2 -# with: -# submodules: 'recursive' -# - name: PVS Check -# env: -# YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} -# YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} -# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -# PVS_STUDIO_KEY: ${{ secrets.PVS_STUDIO_KEY }} -# TEMP_PATH: ${{runner.temp}}/pvs_check -# REPO_COPY: ${{runner.temp}}/pvs_check/ClickHouse -# run: mkdir -p ${{runner.temp}}/pvs_check && cp -r $GITHUB_WORKSPACE $TEMP_PATH && cd $REPO_COPY/tests/ci && python3 pvs_check.py FinishCheck: needs: [StyleCheck, DockerHubPush, CheckLabels] runs-on: [self-hosted] From 2aa852388fa2e372326603ee78ec4ab04ee72e72 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 15:34:44 +0300 Subject: [PATCH 147/438] Fix style check --- utils/check-style/check-style | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index efdc5f488d2..b2334a8b203 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -162,7 +162,7 @@ find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | gre find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL" # There shouldn't be any docker containers outside docker directory -find $ROOT_PATH -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" +find $ROOT_PATH -not -path $ROOT_PATH'/tests/ci*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" # There shouldn't be any docker compose files outside docker directory #find $ROOT_PATH -not -path $ROOT_PATH'/tests/testflows*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name '*compose*.yml' -type f 2>/dev/null | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to docker directory:" From 1df70af14e2de27405f6cfdacc651567b6140684 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 16:08:12 +0300 Subject: [PATCH 148/438] Fix style check one more time: --- utils/check-style/check-style | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index b2334a8b203..dc954411918 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -70,7 +70,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | xargs xmllint --noout --nonet # FIXME: for now only clickhouse-test -pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test +pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | From 7a27ce7242abd679650b227051f7598bda854ecd Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 30 Sep 2021 16:20:08 +0300 Subject: [PATCH 149/438] Pull new image each time --- tests/ci/style_check.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 2af8514fbfc..71978379099 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -121,6 +121,15 @@ if __name__ == "__main__": docker_image += ':' + images['clickhouse/style-test'] logging.info("Got docker image %s", docker_image) + for i in range(10): + try: + subprocess.check_output(f"docker pull {docker_image}", shell=True) + break + except Exception as ex: + time.sleep(i * 3) + logging.info("Got execption pulling docker %s", ex) + else: + raise Exception(f"Cannot pull dockerhub for image {docker_image}") if not aws_secret_key_id or not aws_secret_key: logging.info("No secrets, will not upload anything to S3") From 0bf597374fe239af7da624bbf09c54d9111f9fbf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 3 Oct 2021 05:56:32 +0300 Subject: [PATCH 150/438] More enhancements for query obfuscator --- programs/format/Format.cpp | 5 +++++ src/IO/ReadHelpers.h | 16 ++++++++++++---- src/Parsers/obfuscateQueries.cpp | 11 +++++++++-- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 984543a6c6b..4b0e8ad1ca1 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #pragma GCC diagnostic ignored "-Wunused-function" @@ -114,6 +116,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) registerAggregateFunctions(); registerTableFunctions(); registerStorages(); + registerFormats(); std::unordered_set additional_names; @@ -130,6 +133,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv) return FunctionFactory::instance().has(what) || AggregateFunctionFactory::instance().isAggregateFunctionName(what) || TableFunctionFactory::instance().isTableFunctionName(what) + || FormatFactory::instance().isOutputFormat(what) + || FormatFactory::instance().isInputFormat(what) || additional_names.count(what); }; diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ca6affbf907..bfb30e8b95c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -276,29 +276,37 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) { case '+': { - if (has_sign || has_number) + if (has_sign) { + if (has_number) + return ReturnType(true); + if constexpr (throw_exception) throw ParsingException( - "Cannot parse number with multiple sign (+/-) characters or intermediate sign character", + "Cannot parse number with multiple sign (+/-) characters", ErrorCodes::CANNOT_PARSE_NUMBER); else return ReturnType(false); } + has_sign = true; break; } case '-': { - if (has_sign || has_number) + if (has_sign) { + if (has_number) + return ReturnType(true); + if constexpr (throw_exception) throw ParsingException( - "Cannot parse number with multiple sign (+/-) characters or intermediate sign character", + "Cannot parse number with multiple sign (+/-) characters", ErrorCodes::CANNOT_PARSE_NUMBER); else return ReturnType(false); } + if constexpr (is_signed_v) negative = true; else diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index eb0bf5281c9..c0b57d9b1f5 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -38,7 +38,8 @@ const std::unordered_set keywords "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", - "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY" + "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY", "OFFSET", + "TRIM", "LTRIM", "RTRIM", "BOTH", "LEADING", "TRAILING" }; const std::unordered_set keep_words @@ -906,7 +907,13 @@ void obfuscateQueries( /// Write quotes and the obfuscated content inside. result.write(*token.begin); - obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func); + + /// If it is long, just replace it with hash. Long identifiers in queries are usually auto-generated. + if (token.size() > 32) + writeIntText(sipHash64(token.begin + 1, token.size() - 2), result); + else + obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func); + result.write(token.end[-1]); } else if (token.type == TokenType::Number) From ece880184b4b6bfe48a7428cefe26e15953e20f0 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 3 Oct 2021 20:26:48 +0400 Subject: [PATCH 151/438] Reorganiza contrib IDE folders --- cmake/find/capnp.cmake | 2 - cmake/find/cxx.cmake | 2 - cmake/find/unwind.cmake | 1 - contrib/CMakeLists.txt | 93 ++++++++++++++++++++++++++++++++++++----- 4 files changed, 82 insertions(+), 16 deletions(-) diff --git a/cmake/find/capnp.cmake b/cmake/find/capnp.cmake index ee4735bd175..25dfce24ae9 100644 --- a/cmake/find/capnp.cmake +++ b/cmake/find/capnp.cmake @@ -34,8 +34,6 @@ endif() if (CAPNP_LIBRARIES) set (USE_CAPNP 1) elseif(NOT MISSING_INTERNAL_CAPNP_LIBRARY) - add_subdirectory(contrib/capnproto-cmake) - set (CAPNP_LIBRARIES capnpc) set (USE_CAPNP 1) set (USE_INTERNAL_CAPNP_LIBRARY 1) diff --git a/cmake/find/cxx.cmake b/cmake/find/cxx.cmake index b1da125e219..b96ba1e1b65 100644 --- a/cmake/find/cxx.cmake +++ b/cmake/find/cxx.cmake @@ -50,8 +50,6 @@ endif () if (NOT HAVE_LIBCXX AND NOT MISSING_INTERNAL_LIBCXX_LIBRARY) set (LIBCXX_LIBRARY cxx) set (LIBCXXABI_LIBRARY cxxabi) - add_subdirectory(contrib/libcxxabi-cmake) - add_subdirectory(contrib/libcxx-cmake) # Exception handling library is embedded into libcxxabi. diff --git a/cmake/find/unwind.cmake b/cmake/find/unwind.cmake index c9f5f30a5d6..9ae23ae23c7 100644 --- a/cmake/find/unwind.cmake +++ b/cmake/find/unwind.cmake @@ -1,7 +1,6 @@ option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES}) if (USE_UNWIND) - add_subdirectory(contrib/libunwind-cmake) set (UNWIND_LIBRARIES unwind) set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES}) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 140cc0846ec..2c0ddbc8384 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,16 +1,5 @@ # Third-party libraries may have substandard code. -# Put all targets defined here and in added subfolders under "contrib/" folder in GUI-based IDEs by default. -# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they will -# appear not in "contrib/" as originally planned here. -get_filename_component (_current_dir_name "${CMAKE_CURRENT_LIST_DIR}" NAME) -if (CMAKE_FOLDER) - set (CMAKE_FOLDER "${CMAKE_FOLDER}/${_current_dir_name}") -else () - set (CMAKE_FOLDER "${_current_dir_name}") -endif () -unset (_current_dir_name) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") @@ -49,6 +38,19 @@ add_subdirectory (replxx-cmake) add_subdirectory (unixodbc-cmake) add_subdirectory (nanodbc-cmake) +if (USE_INTERNAL_LIBCXX_LIBRARY AND NOT MISSING_INTERNAL_LIBCXX_LIBRARY) + add_subdirectory(libcxxabi-cmake) + add_subdirectory(libcxx-cmake) +endif () + +if (USE_UNWIND) + add_subdirectory(libunwind-cmake) +endif () + +if (USE_INTERNAL_CAPNP_LIBRARY AND NOT MISSING_INTERNAL_CAPNP_LIBRARY) + add_subdirectory(capnproto-cmake) +endif () + if (USE_YAML_CPP) add_subdirectory (yaml-cpp-cmake) endif() @@ -347,3 +349,72 @@ endif() if (USE_S2_GEOMETRY) add_subdirectory(s2geometry-cmake) endif() + +# Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. +# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear +# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, +# instead of controlling it via CMAKE_FOLDER. + +function (ensure_target_rooted_in _target _folder) + # Read the original FOLDER property value, if any. + get_target_property (_folder_prop "${_target}" FOLDER) + + # Normalize that value, so we avoid possible repetitions in folder names. + + if (NOT _folder_prop) + set (_folder_prop "") + endif () + + if (CMAKE_FOLDER AND _folder_prop MATCHES "^${CMAKE_FOLDER}/(.*)\$") + set (_folder_prop "${CMAKE_MATCH_1}") + endif () + + if (_folder AND _folder_prop MATCHES "^${_folder}/(.*)\$") + set (_folder_prop "${CMAKE_MATCH_1}") + endif () + + if (_folder) + set (_folder_prop "${_folder}/${_folder_prop}") + endif () + + if (CMAKE_FOLDER) + set (_folder_prop "${CMAKE_FOLDER}/${_folder_prop}") + endif () + + message (STATUS "${_target} goes under ${_folder_prop}") + + # Set the updated FOLDER property value back. + set_target_properties ("${_target}" PROPERTIES FOLDER "${_folder_prop}") +endfunction () + +function (ensure_own_targets_are_rooted_in _dir _folder) + get_directory_property (_targets DIRECTORY "${_dir}" BUILDSYSTEM_TARGETS) + foreach (_target IN LISTS _targets) + ensure_target_rooted_in ("${_target}" "${_folder}") + endforeach () +endfunction () + +function (ensure_all_targets_are_rooted_in _dir _folder) + ensure_own_targets_are_rooted_in ("${_dir}" "${_folder}") + + get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES) + foreach (_sub_dir IN LISTS _sub_dirs) + ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_folder}") + endforeach () +endfunction () + +function (organize_ide_folders_2_level _dir) + get_filename_component (_dir_name "${_dir}" NAME) + ensure_own_targets_are_rooted_in ("${_dir}" "${_dir_name}") + + # Note, that we respect only first two levels of nesting, we don't want to + # reorganize target folders further within each third-party dir. + + get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES) + foreach (_sub_dir IN LISTS _sub_dirs) + get_filename_component (_sub_dir_name "${_sub_dir}" NAME) + ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_dir_name}/${_sub_dir_name}") + endforeach () +endfunction () + +organize_ide_folders_2_level ("${CMAKE_CURRENT_LIST_DIR}") From 365a6b469e069a174c9c9924e1eee9795bf72bfd Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 3 Oct 2021 20:30:20 +0400 Subject: [PATCH 152/438] Remove debug message --- contrib/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2c0ddbc8384..cc5a6dbc9b7 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -381,8 +381,6 @@ function (ensure_target_rooted_in _target _folder) set (_folder_prop "${CMAKE_FOLDER}/${_folder_prop}") endif () - message (STATUS "${_target} goes under ${_folder_prop}") - # Set the updated FOLDER property value back. set_target_properties ("${_target}" PROPERTIES FOLDER "${_folder_prop}") endfunction () From 7be521b024acf1eff98fb3075a376f60a9d667bf Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 3 Oct 2021 21:07:44 +0400 Subject: [PATCH 153/438] Do not manipulate FOLDER property on INTERFACE library targets --- contrib/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index cc5a6dbc9b7..c671369d126 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -356,6 +356,12 @@ endif() # instead of controlling it via CMAKE_FOLDER. function (ensure_target_rooted_in _target _folder) + # Skip INTERFACE library targets, since FOLDER property is not available for them. + get_target_property (_target_type "${_target}" TYPE) + if (_target_type STREQUAL "INTERFACE_LIBRARY") + return () + endif () + # Read the original FOLDER property value, if any. get_target_property (_folder_prop "${_target}" FOLDER) From 200f655a2f76b3ac23ba10ff54a09da83931af37 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 25 Aug 2021 09:50:30 +0300 Subject: [PATCH 154/438] Fix writing marks in StorageLog. --- src/Storages/StorageLog.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 0fd94bac95a..691e431907a 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -214,8 +214,7 @@ public: , storage(storage_) , metadata_snapshot(metadata_snapshot_) , lock(std::move(lock_)) - , marks_stream( - storage.disk->writeFile(storage.marks_file_path, 4096, WriteMode::Rewrite)) + , marks_stream(storage.disk->writeFile(storage.marks_file_path, 4096, WriteMode::Append)) { if (!lock) throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); From bb32432943c1e04f442d6df83dcb31faa0c269c8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 5 Oct 2021 00:13:18 +0300 Subject: [PATCH 155/438] more fixes --- .../MergeTree/MutateFromLogEntryTask.cpp | 6 ++- .../MergeTree/MutateFromLogEntryTask.h | 1 + .../MergeTree/MutatePlainMergeTreeTask.cpp | 6 ++- .../MergeTree/MutatePlainMergeTreeTask.h | 1 + src/Storages/StorageMergeTree.cpp | 49 ++----------------- src/Storages/StorageMergeTree.h | 1 - .../01162_strange_mutations.reference | 24 +++++++++ .../0_stateless/01162_strange_mutations.sh | 17 +++++-- 8 files changed, 55 insertions(+), 50 deletions(-) diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 7cca7012a2c..a6c70e1db4f 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -75,9 +75,13 @@ std::pair MutateFromLogEntry stopwatch_ptr = std::make_unique(); + fake_query_context = Context::createCopy(storage.getContext()); + fake_query_context->makeQueryContext(); + fake_query_context->setCurrentQueryId(""); + mutate_task = storage.merger_mutator.mutatePartToTemporaryPart( future_mutated_part, metadata_snapshot, commands, merge_mutate_entry.get(), - entry.create_time, storage.getContext(), reserved_space, table_lock_holder); + entry.create_time, fake_query_context, reserved_space, table_lock_holder); return {true, [this] (const ExecutionStatus & execution_status) { diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.h b/src/Storages/MergeTree/MutateFromLogEntryTask.h index 2fa4f0c0388..9aaad14dd4c 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.h +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.h @@ -42,6 +42,7 @@ private: MergeTreeData::MutableDataPartPtr new_part{nullptr}; FutureMergedMutatedPartPtr future_mutated_part{nullptr}; + ContextMutablePtr fake_query_context; MutateTaskPtr mutate_task; }; diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 71128b31cdb..b7768531373 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -43,9 +43,13 @@ void MutatePlainMergeTreeTask::prepare() merge_list_entry.get()); }; + fake_query_context = Context::createCopy(storage.getContext()); + fake_query_context->makeQueryContext(); + fake_query_context->setCurrentQueryId(""); + mutate_task = storage.merger_mutator.mutatePartToTemporaryPart( future_part, metadata_snapshot, merge_mutate_entry->commands, merge_list_entry.get(), - time(nullptr), storage.getContext(), merge_mutate_entry->tagger->reserved_space, table_lock_holder); + time(nullptr), fake_query_context, merge_mutate_entry->tagger->reserved_space, table_lock_holder); } bool MutatePlainMergeTreeTask::executeStep() diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h index e8f210f4175..7e332ad3334 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h @@ -74,6 +74,7 @@ private: IExecutableTask::TaskResultCallback task_result_callback; + ContextMutablePtr fake_query_context; MutateTaskPtr mutate_task; }; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index faa16ac875b..3829b3813aa 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -927,12 +927,16 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( { try { + auto fake_query_context = Context::createCopy(getContext()); + fake_query_context->makeQueryContext(); + fake_query_context->setCurrentQueryId(""); MutationsInterpreter interpreter( - shared_from_this(), metadata_snapshot, commands_for_size_validation, getContext(), false); + shared_from_this(), metadata_snapshot, commands_for_size_validation, fake_query_context, false); commands_size += interpreter.evaluateCommandsSize(); } catch (...) { + tryLogCurrentException(log); MergeTreeMutationEntry & entry = it->second; entry.latest_fail_time = time(nullptr); entry.latest_fail_reason = getCurrentExceptionMessage(false); @@ -965,49 +969,6 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( return {}; } -bool StorageMergeTree::mutateSelectedPart(const StorageMetadataPtr & metadata_snapshot, MergeMutateSelectedEntry & merge_mutate_entry, TableLockHolder & table_lock_holder) -{ - auto & future_part = merge_mutate_entry.future_part; - - auto merge_list_entry = getContext()->getMergeList().insert(getStorageID(), future_part); - Stopwatch stopwatch; - MutableDataPartPtr new_part; - - auto write_part_log = [&] (const ExecutionStatus & execution_status) - { - writePartLog( - PartLogElement::MUTATE_PART, - execution_status, - stopwatch.elapsed(), - future_part->name, - new_part, - future_part->parts, - merge_list_entry.get()); - }; - - try - { - auto task = merger_mutator.mutatePartToTemporaryPart( - future_part, metadata_snapshot, merge_mutate_entry.commands, merge_list_entry.get(), - time(nullptr), getContext(), merge_mutate_entry.tagger->reserved_space, table_lock_holder); - - new_part = executeHere(task); - - renameTempPartAndReplace(new_part); - - updateMutationEntriesErrors(future_part, true, ""); - write_part_log({}); - } - catch (...) - { - updateMutationEntriesErrors(future_part, false, getCurrentExceptionMessage(false)); - write_part_log(ExecutionStatus::fromCurrentException()); - throw; - } - - return true; -} - bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assignee) //-V657 { if (shutdown_called) diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 08cce514d71..11379359844 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -178,7 +178,6 @@ private: std::shared_ptr selectPartsToMutate(const StorageMetadataPtr & metadata_snapshot, String * disable_reason, TableLockHolder & table_lock_holder); - bool mutateSelectedPart(const StorageMetadataPtr & metadata_snapshot, MergeMutateSelectedEntry & entry, TableLockHolder & table_lock_holder); Int64 getCurrentMutationVersion( const DataPartPtr & part, diff --git a/tests/queries/0_stateless/01162_strange_mutations.reference b/tests/queries/0_stateless/01162_strange_mutations.reference index e09dccd4c4a..4c925f849d6 100644 --- a/tests/queries/0_stateless/01162_strange_mutations.reference +++ b/tests/queries/0_stateless/01162_strange_mutations.reference @@ -1,8 +1,32 @@ +MergeTree 1 2 2 0 +50 6225 0 +0 +50 6225 1900 +ReplicatedMergeTree 1 2 2 0 +50 6225 0 +2 +50 6225 0 +Memory +1 +2 +2 +0 +50 6225 0 +0 +50 6225 1900 +Join +1 +2 +2 +0 +50 6225 0 +0 +50 6225 0 diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh index c4166a88e42..c75bee47e4e 100755 --- a/tests/queries/0_stateless/01162_strange_mutations.sh +++ b/tests/queries/0_stateless/01162_strange_mutations.sh @@ -4,18 +4,29 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -declare -a engines=("MergeTree" "ReplicatedMergeTree('/test/01162/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1')") +declare -a engines=("MergeTree order by n" "ReplicatedMergeTree('/test/01162/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') order by n" "Memory" "Join(ALL, FULL, n)") + +$CLICKHOUSE_CLIENT -q "CREATE OR REPLACE VIEW t1 AS SELECT number * 10 AS id, number * 100 AS value FROM numbers(20)" for engine in "${engines[@]}" do $CLICKHOUSE_CLIENT -q "drop table if exists t" - $CLICKHOUSE_CLIENT -q "create table t (n int) engine=$engine order by n" + $CLICKHOUSE_CLIENT -q "create table t (n int) engine=$engine" + $CLICKHOUSE_CLIENT -q "select engine from system.tables where database=currentDatabase() and name='t'" $CLICKHOUSE_CLIENT -q "insert into t values (1)" $CLICKHOUSE_CLIENT -q "insert into t values (2)" $CLICKHOUSE_CLIENT -q "select * from t order by n" $CLICKHOUSE_CLIENT --mutations_sync=1 -q "alter table t delete where n global in (select * from (select * from t where n global in (1::Int32)))" $CLICKHOUSE_CLIENT -q "select * from t order by n" - $CLICKHOUSE_CLIENT --mutations_sync=1 -q "alter table t delete where n global in (select t1.n from t as t1 join t as t2 on t1.n=t2.n where t1.n global in (select 2::Int32))" + $CLICKHOUSE_CLIENT --mutations_sync=1 -q "alter table t delete where n global in (select t1.n from t as t1 full join t as t2 on t1.n=t2.n where t1.n global in (select 2::Int32))" $CLICKHOUSE_CLIENT -q "select count() from t" $CLICKHOUSE_CLIENT -q "drop table t" + + $CLICKHOUSE_CLIENT -q "drop table if exists test" + $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" + $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test" + # FIXME it's not clear if the following query should fail or not + $CLICKHOUSE_CLIENT --mutations_sync=1 -q "ALTER TABLE test UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" 2>&1| grep -c "Unknown function" + $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test" + $CLICKHOUSE_CLIENT -q "drop table test" done From 9ff8a2391d310807807f391275945950274e3d14 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 5 Oct 2021 03:39:15 +0300 Subject: [PATCH 156/438] Add test. --- ...02047_log_family_data_file_sizes.reference | 47 ++++++++++++++++++ .../02047_log_family_data_file_sizes.sh | 48 +++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 tests/queries/0_stateless/02047_log_family_data_file_sizes.reference create mode 100755 tests/queries/0_stateless/02047_log_family_data_file_sizes.sh diff --git a/tests/queries/0_stateless/02047_log_family_data_file_sizes.reference b/tests/queries/0_stateless/02047_log_family_data_file_sizes.reference new file mode 100644 index 00000000000..b802026a0dd --- /dev/null +++ b/tests/queries/0_stateless/02047_log_family_data_file_sizes.reference @@ -0,0 +1,47 @@ +Log: +empty: +1 element: +1 a +__marks.mrk +sizes.json +x.bin +y.bin +3 elements: +1 a +22 bc +333 def +__marks.mrk greater size +sizes.json +x.bin greater size +y.bin greater size + +TinyLog: +empty: +1 element: +1 a +sizes.json +x.bin +y.bin +3 elements: +1 a +22 bc +333 def +sizes.json +x.bin greater size +y.bin greater size + +StripeLog: +empty: +1 element: +1 a +data.bin +index.mrk +sizes.json +3 elements: +1 a +22 bc +333 def +data.bin greater size +index.mrk greater size +sizes.json + diff --git a/tests/queries/0_stateless/02047_log_family_data_file_sizes.sh b/tests/queries/0_stateless/02047_log_family_data_file_sizes.sh new file mode 100755 index 00000000000..e7c1cb5d71e --- /dev/null +++ b/tests/queries/0_stateless/02047_log_family_data_file_sizes.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +declare -a engines=("Log" "TinyLog" "StripeLog") +for engine in "${engines[@]}" +do + echo "$engine:" + + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS tbl" + $CLICKHOUSE_CLIENT --query="CREATE TABLE tbl(x UInt32, y String) ENGINE=$engine" + data_dir=$($CLICKHOUSE_CLIENT --query="SELECT data_paths[1] FROM system.tables WHERE name='tbl' AND database=currentDatabase()") + + echo "empty:" + find "$data_dir"* 2>/dev/null + + echo "1 element:" + $CLICKHOUSE_CLIENT --query="INSERT INTO tbl VALUES (1, 'a')" + $CLICKHOUSE_CLIENT --query="SELECT * FROM tbl ORDER BY x" + declare -A file_sizes + for name in $(find "$data_dir"* -print0 | xargs -0 -n 1 basename | sort); do + file_path=$data_dir$name + file_size=$(stat -c%s "$file_path") + file_sizes[$name]=$file_size + echo $name + done + + echo "3 elements:" + $CLICKHOUSE_CLIENT --query="INSERT INTO tbl VALUES (22, 'bc'), (333, 'def')" + $CLICKHOUSE_CLIENT --query="SELECT * FROM tbl ORDER BY x" + for name in $(find "$data_dir"* -print0 | xargs -0 -n 1 basename | sort); do + file_path=$data_dir$name + file_size=$(stat -c%s "$file_path") + old_file_size=${file_sizes[$name]} + if [ "$name" == "sizes.json" ]; then + cmp="" + elif (( file_size > old_file_size )); then + cmp="greater size" + else + cmp="unexpected size ($file_size, old_size=$old_file_size)" + fi + echo $name $cmp + done + + echo +done From 07113cb6f63781eda49ab25475c59a5c89093d86 Mon Sep 17 00:00:00 2001 From: Federico Ceratto Date: Thu, 7 Oct 2021 12:50:56 +0100 Subject: [PATCH 157/438] Link FAQ from Debian installation --- docs/en/getting-started/install.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 508cd51e9f8..06186842809 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -27,10 +27,11 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun {% include 'install/deb.sh' %} ``` -If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). +You can replace `stable` with `lts` or `testing` to use different [“release trains”](../faq/operations/production.md) based on your needs. You can also download and install packages manually from [here](https://repo.clickhouse.com/deb/stable/main/). + #### Packages {#packages} - `clickhouse-common-static` — Installs ClickHouse compiled binary files. From afd69ef8336356c4e38505c8881dec58e9b6169a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 7 Oct 2021 15:04:54 +0300 Subject: [PATCH 158/438] fix check for nondeterministic mutations --- src/Interpreters/MutationsInterpreter.cpp | 41 ++++++++++++------- .../01162_strange_mutations.reference | 6 +-- .../0_stateless/01162_strange_mutations.sh | 20 +++++++-- 3 files changed, 44 insertions(+), 23 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 540d5c76c97..b1c578854a7 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -54,24 +54,33 @@ public: { ContextPtr context; std::optional nondeterministic_function_name; + bool subquery = false; }; - static bool needChildVisit(const ASTPtr & /*node*/, const ASTPtr & child) + static bool needChildVisit(const ASTPtr & /*node*/, const ASTPtr & /*child*/) { - return child != nullptr; + return true; } static void visit(const ASTPtr & node, Data & data) { - if (data.nondeterministic_function_name) + if (data.nondeterministic_function_name || data.subquery) return; - if (const auto * function = typeid_cast(node.get())) + if (node->as()) + { + /// We cannot determine if subquery is deterministic or not, + /// so we do not allow to use subqueries in mutation without allow_nondeterministic_mutations=1 + data.subquery = true; + } + else if (const auto * function = typeid_cast(node.get())) { /// Property of being deterministic for lambda expression is completely determined /// by the contents of its definition, so we just proceed to it. if (function->name != "lambda") { + /// NOTE It may be an aggregate function, so get(...) may throw. + /// However, an aggregate function can be used only in subquery and we do not go into subquery. const auto func = FunctionFactory::instance().get(function->name, data.context); if (!func->isDeterministic()) data.nondeterministic_function_name = func->getName(); @@ -81,10 +90,11 @@ public: }; using FirstNonDeterministicFunctionFinder = InDepthNodeVisitor; +using FirstNonDeterministicFunctionData = FirstNonDeterministicFunctionMatcher::Data; -std::optional findFirstNonDeterministicFunctionName(const MutationCommand & command, ContextPtr context) +FirstNonDeterministicFunctionData findFirstNonDeterministicFunctionName(const MutationCommand & command, ContextPtr context) { - FirstNonDeterministicFunctionMatcher::Data finder_data{context, std::nullopt}; + FirstNonDeterministicFunctionMatcher::Data finder_data{context, std::nullopt, false}; switch (command.type) { @@ -94,7 +104,7 @@ std::optional findFirstNonDeterministicFunctionName(const MutationComman FirstNonDeterministicFunctionFinder(finder_data).visit(update_assignments_ast); if (finder_data.nondeterministic_function_name) - return finder_data.nondeterministic_function_name; + return finder_data; /// Currently UPDATE and DELETE both always have predicates so we can use fallthrough [[fallthrough]]; @@ -105,7 +115,7 @@ std::optional findFirstNonDeterministicFunctionName(const MutationComman auto predicate_ast = command.predicate->clone(); FirstNonDeterministicFunctionFinder(finder_data).visit(predicate_ast); - return finder_data.nondeterministic_function_name; + return finder_data; } default: @@ -918,12 +928,15 @@ void MutationsInterpreter::validate() { for (const auto & command : commands) { - const auto nondeterministic_func_name = findFirstNonDeterministicFunctionName(command, context); - if (nondeterministic_func_name) - throw Exception( - "ALTER UPDATE/ALTER DELETE statements must use only deterministic functions! " - "Function '" + *nondeterministic_func_name + "' is non-deterministic", - ErrorCodes::BAD_ARGUMENTS); + const auto nondeterministic_func_data = findFirstNonDeterministicFunctionName(command, context); + if (nondeterministic_func_data.subquery) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ALTER UPDATE/ALTER DELETE statement with subquery may be nondeterministic, " + "see allow_nondeterministic_mutations setting"); + + if (nondeterministic_func_data.nondeterministic_function_name) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER UPDATE/ALTER DELETE statements must use only deterministic functions. " + "Function '{}' is non-deterministic", *nondeterministic_func_data.nondeterministic_function_name); } } diff --git a/tests/queries/0_stateless/01162_strange_mutations.reference b/tests/queries/0_stateless/01162_strange_mutations.reference index 4c925f849d6..55f17cfe464 100644 --- a/tests/queries/0_stateless/01162_strange_mutations.reference +++ b/tests/queries/0_stateless/01162_strange_mutations.reference @@ -4,7 +4,6 @@ MergeTree 2 0 50 6225 0 -0 50 6225 1900 ReplicatedMergeTree 1 @@ -12,15 +11,13 @@ ReplicatedMergeTree 2 0 50 6225 0 -2 -50 6225 0 +50 6225 1900 Memory 1 2 2 0 50 6225 0 -0 50 6225 1900 Join 1 @@ -28,5 +25,4 @@ Join 2 0 50 6225 0 -0 50 6225 0 diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh index c75bee47e4e..fecb1b8d8c0 100755 --- a/tests/queries/0_stateless/01162_strange_mutations.sh +++ b/tests/queries/0_stateless/01162_strange_mutations.sh @@ -16,17 +16,29 @@ do $CLICKHOUSE_CLIENT -q "insert into t values (1)" $CLICKHOUSE_CLIENT -q "insert into t values (2)" $CLICKHOUSE_CLIENT -q "select * from t order by n" - $CLICKHOUSE_CLIENT --mutations_sync=1 -q "alter table t delete where n global in (select * from (select * from t where n global in (1::Int32)))" + $CLICKHOUSE_CLIENT --allow_nondeterministic_mutations=1 --mutations_sync=1 -q "alter table t + delete where n global in (select * from (select * from t where n global in (1::Int32)))" $CLICKHOUSE_CLIENT -q "select * from t order by n" - $CLICKHOUSE_CLIENT --mutations_sync=1 -q "alter table t delete where n global in (select t1.n from t as t1 full join t as t2 on t1.n=t2.n where t1.n global in (select 2::Int32))" + $CLICKHOUSE_CLIENT --allow_nondeterministic_mutations=1 --mutations_sync=1 -q "alter table t + delete where n global in (select t1.n from t as t1 full join t as t2 on t1.n=t2.n where t1.n global in (select 2::Int32))" $CLICKHOUSE_CLIENT -q "select count() from t" $CLICKHOUSE_CLIENT -q "drop table t" $CLICKHOUSE_CLIENT -q "drop table if exists test" $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test" - # FIXME it's not clear if the following query should fail or not - $CLICKHOUSE_CLIENT --mutations_sync=1 -q "ALTER TABLE test UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" 2>&1| grep -c "Unknown function" + if [[ $engine == *"ReplicatedMergeTree"* ]]; then + $CLICKHOUSE_CLIENT -q "ALTER TABLE test + UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" 2>&1| grep -Fa "DB::Exception: " | grep -Fv "statement with subquery may be nondeterministic" + $CLICKHOUSE_CLIENT --allow_nondeterministic_mutations=1 --mutations_sync=1 -q "ALTER TABLE test + UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" + elif [[ $engine == *"Join"* ]]; then + $CLICKHOUSE_CLIENT -q "ALTER TABLE test + UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" 2>&1| grep -Fa "DB::Exception: " | grep -Fv "Table engine Join supports only DELETE mutations" + else + $CLICKHOUSE_CLIENT --mutations_sync=1 -q "ALTER TABLE test + UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" + fi $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test" $CLICKHOUSE_CLIENT -q "drop table test" done From 3a3ea9f19cf1fa3fab45b01a2630f42f0bef145b Mon Sep 17 00:00:00 2001 From: tavplubix Date: Thu, 7 Oct 2021 18:42:25 +0300 Subject: [PATCH 159/438] Update 00652_replicated_mutations_default_database_zookeeper.sh --- .../00652_replicated_mutations_default_database_zookeeper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh b/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh index 3f5b8d570a6..0ac5a2f748a 100755 --- a/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh +++ b/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=./mergetree_mutations.lib . "$CURDIR"/mergetree_mutations.lib -${CLICKHOUSE_CLIENT} --multiquery << EOF +${CLICKHOUSE_CLIENT} --allow_nondeterministic_mutations=1 --multiquery << EOF DROP TABLE IF EXISTS mutations_r1; DROP TABLE IF EXISTS for_subquery; From f953cb85f850da07bb5316af5007d51bf4bee1fe Mon Sep 17 00:00:00 2001 From: Artur Filatenkov <613623@mail.ru> Date: Fri, 8 Oct 2021 21:01:14 +0300 Subject: [PATCH 160/438] Check --- .gitmodules | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 74d1049ce01..d8c5aa640a8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -212,7 +212,8 @@ url = https://github.com/ClickHouse-Extras/libpq [submodule "contrib/boringssl"] path = contrib/boringssl - url = https://github.com/ClickHouse-Extras/boringssl.git + url = https://github.com/FArthur-cmd/boringssl.git + branch = update_BoringSSL [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse-Extras/NuRaft.git From fe12404cb017e2f41ae9d46c9d679cde765156ba Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 9 Oct 2021 15:14:23 +0300 Subject: [PATCH 161/438] Fix tests test_log_family_s3, test_log_family_hdfs. --- tests/integration/test_log_family_hdfs/test.py | 14 ++++++++++++-- tests/integration/test_log_family_s3/test.py | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_log_family_hdfs/test.py b/tests/integration/test_log_family_hdfs/test.py index a38b067358e..7bb9cdfeaf5 100644 --- a/tests/integration/test_log_family_hdfs/test.py +++ b/tests/integration/test_log_family_hdfs/test.py @@ -31,10 +31,20 @@ def assert_objects_count(started_cluster, objects_count, path='data/'): hdfs_objects = fs.listdir('/clickhouse') assert objects_count == len(hdfs_objects) - +# TinyLog: files: id.bin, sizes.json +# INSERT overwrites 1 file (`sizes.json`) and appends 1 file (`id.bin`), so +# files_overhead=1, files_overhead_per_insert=1 +# +# Log: files: id.bin, __marks.mrk, sizes.json +# INSERT overwrites 1 file (`sizes.json`), and appends 2 files (`id.bin`, `__marks.mrk`), so +# files_overhead=1, files_overhead_per_insert=2 +# +# StripeLog: files: data.bin, index.mrk, sizes.json +# INSERT overwrites 1 file (`sizes.json`), and appends 2 files (`index.mrk`, `data.bin`), so +# files_overhead=1, files_overhead_per_insert=2 @pytest.mark.parametrize( "log_engine,files_overhead,files_overhead_per_insert", - [("TinyLog", 1, 1), ("Log", 2, 1), ("StripeLog", 1, 2)]) + [("TinyLog", 1, 1), ("Log", 1, 2), ("StripeLog", 1, 2)]) def test_log_family_hdfs(started_cluster, log_engine, files_overhead, files_overhead_per_insert): node = started_cluster.instances["node"] diff --git a/tests/integration/test_log_family_s3/test.py b/tests/integration/test_log_family_s3/test.py index 71d47a8a2e8..8531edd635f 100644 --- a/tests/integration/test_log_family_s3/test.py +++ b/tests/integration/test_log_family_s3/test.py @@ -30,10 +30,20 @@ def assert_objects_count(cluster, objects_count, path='data/'): logging.info("Existing S3 object: %s", str(object_meta)) assert objects_count == len(s3_objects) - +# TinyLog: files: id.bin, sizes.json +# INSERT overwrites 1 file (`sizes.json`) and appends 1 file (`id.bin`), so +# files_overhead=1, files_overhead_per_insert=1 +# +# Log: files: id.bin, __marks.mrk, sizes.json +# INSERT overwrites 1 file (`sizes.json`), and appends 2 files (`id.bin`, `__marks.mrk`), so +# files_overhead=1, files_overhead_per_insert=2 +# +# StripeLog: files: data.bin, index.mrk, sizes.json +# INSERT overwrites 1 file (`sizes.json`), and appends 2 files (`index.mrk`, `data.bin`), so +# files_overhead=1, files_overhead_per_insert=2 @pytest.mark.parametrize( "log_engine,files_overhead,files_overhead_per_insert", - [("TinyLog", 1, 1), ("Log", 2, 1), ("StripeLog", 1, 2)]) + [("TinyLog", 1, 1), ("Log", 1, 2), ("StripeLog", 1, 2)]) def test_log_family_s3(cluster, log_engine, files_overhead, files_overhead_per_insert): node = cluster.instances["node"] From 351f2a3a842c6ddb63c09958c5e2397c1f42a093 Mon Sep 17 00:00:00 2001 From: Artur Filatenkov <613623@mail.ru> Date: Mon, 11 Oct 2021 14:53:26 +0300 Subject: [PATCH 162/438] Update BorinSSL --- .gitmodules | 3 +- contrib/boringssl-cmake/CMakeLists.txt | 49 ++++++++++++++++++-------- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/.gitmodules b/.gitmodules index d8c5aa640a8..74d1049ce01 100644 --- a/.gitmodules +++ b/.gitmodules @@ -212,8 +212,7 @@ url = https://github.com/ClickHouse-Extras/libpq [submodule "contrib/boringssl"] path = contrib/boringssl - url = https://github.com/FArthur-cmd/boringssl.git - branch = update_BoringSSL + url = https://github.com/ClickHouse-Extras/boringssl.git [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse-Extras/NuRaft.git diff --git a/contrib/boringssl-cmake/CMakeLists.txt b/contrib/boringssl-cmake/CMakeLists.txt index 4502d6e9d42..474e32f3b91 100644 --- a/contrib/boringssl-cmake/CMakeLists.txt +++ b/contrib/boringssl-cmake/CMakeLists.txt @@ -4,7 +4,7 @@ # This file is created by generate_build_files.py and edited accordingly. -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.5) project(BoringSSL LANGUAGES C CXX) @@ -20,12 +20,7 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common") - if((CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") OR CLANG) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11") - else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") - endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common -std=c11") endif() # pthread_rwlock_t requires a feature flag. @@ -55,7 +50,7 @@ add_definitions(-DBORINGSSL_IMPLEMENTATION) # builds. if(NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES) list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES) - if(NOT ${NUM_ARCHES} EQUAL 1) + if(NOT NUM_ARCHES EQUAL 1) message(FATAL_ERROR "Universal binaries not supported.") endif() list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR) @@ -78,7 +73,13 @@ elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") set(ARCH "x86") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386") - set(ARCH "x86") + # cmake uses `uname -p` to set the system processor, but Solaris + # systems support multiple architectures. + if((${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") AND CMAKE_SIZEOF_VOID_P EQUAL 8) + set(ARCH "x86_64") + else() + set(ARCH "x86") + endif() elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686") set(ARCH "x86") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") @@ -289,6 +290,21 @@ set( mac-x86_64/crypto/test/trampoline-x86_64.S ) +set( + CRYPTO_win_aarch64_SOURCES + + win-aarch64/crypto/chacha/chacha-armv8.S + win-aarch64/crypto/fipsmodule/aesv8-armx64.S + win-aarch64/crypto/fipsmodule/armv8-mont.S + win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S + win-aarch64/crypto/fipsmodule/ghashv8-armx64.S + win-aarch64/crypto/fipsmodule/sha1-armv8.S + win-aarch64/crypto/fipsmodule/sha256-armv8.S + win-aarch64/crypto/fipsmodule/sha512-armv8.S + win-aarch64/crypto/fipsmodule/vpaes-armv8.S + win-aarch64/crypto/test/trampoline-armv8.S +) + set( CRYPTO_win_x86_SOURCES @@ -331,9 +347,9 @@ set( win-x86_64/crypto/test/trampoline-x86_64.asm ) -if(APPLE AND ${ARCH} STREQUAL "aarch64") +if(APPLE AND ARCH STREQUAL "aarch64") set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES}) -elseif(APPLE AND ${ARCH} STREQUAL "arm") +elseif(APPLE AND ARCH STREQUAL "arm") set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES}) elseif(APPLE) set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES}) @@ -360,6 +376,7 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c" + "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c" @@ -389,6 +406,7 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c" "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c" "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c" + "${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c" "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c" "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c" "${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c" @@ -413,6 +431,7 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c" + "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c" @@ -452,7 +471,6 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c" "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c" "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c" - "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c" "${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c" "${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c" "${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c" @@ -499,13 +517,13 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c" - "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c" + "${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c" @@ -519,7 +537,6 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_ext.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_lu.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_obj.c" - "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_r2x.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_req.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_set.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_trs.c" @@ -589,6 +606,8 @@ add_library( "${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc" "${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc" "${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc" + "${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc" + "${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc" @@ -611,7 +630,6 @@ add_library( "${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc" "${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc" "${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc" - "${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc" @@ -633,6 +651,7 @@ add_executable( "${BORINGSSL_SOURCE_DIR}/tool/digest.cc" "${BORINGSSL_SOURCE_DIR}/tool/fd.cc" "${BORINGSSL_SOURCE_DIR}/tool/file.cc" + "${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc" "${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc" "${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc" "${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc" From 0d1fcdf9fcf1e6717c671f78c529fc331048a86b Mon Sep 17 00:00:00 2001 From: Artur Filatenkov <613623@mail.ru> Date: Mon, 11 Oct 2021 15:08:40 +0300 Subject: [PATCH 163/438] add submodule update --- contrib/boringssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/boringssl b/contrib/boringssl index a6a2e2ab3e4..4c787e9d70c 160000 --- a/contrib/boringssl +++ b/contrib/boringssl @@ -1 +1 @@ -Subproject commit a6a2e2ab3e44d97ce98e51c558e989f211de7eb3 +Subproject commit 4c787e9d70c370d51baea714e7b73910be2a4c28 From a3d629a5b541ef2d0489b9b7e7e710ed3c7a0410 Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Mon, 11 Oct 2021 22:51:13 +0530 Subject: [PATCH 164/438] add x86 feature avx2/avx512 support for filter implementation --- cmake/cpu_features.cmake | 32 ++++++++++++ src/Columns/ColumnFixedString.cpp | 69 ++++++++++++++++++++++++- src/Columns/ColumnVector.cpp | 63 +++++++++++++++++++++- src/Columns/ColumnsCommon.cpp | 86 ++++++++++++++++++++++++++++++- 4 files changed, 246 insertions(+), 4 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 46e42329958..d77ca0b32e3 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -18,6 +18,8 @@ option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1) option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1) option (ENABLE_AVX "Use AVX instructions on x86_64" 0) option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0) +option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 1) +option (ENABLE_BMI "Use BMI instructions on x86_64" 1) option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0) @@ -127,6 +129,36 @@ else () if (HAVE_AVX2 AND ENABLE_AVX2) set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () + + set (TEST_FLAG "-mavx512f -mavx512bw") + set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") + check_cxx_source_compiles(" + #include + int main() { + auto a = _mm512_setzero_epi32(); + (void)a; + auto b = _mm512_add_epi16(__m512i(), __m512i()); + (void)b; + return 0; + } + " HAVE_AVX512) + if (HAVE_AVX512 AND ENABLE_AVX512) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + endif () + + set (TEST_FLAG "-mbmi") + set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") + check_cxx_source_compiles(" + #include + int main() { + auto a = _blsr_u32(0); + (void)a; + return 0; + } + " HAVE_BMI) + if (HAVE_BMI AND ENABLE_BMI) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + endif () endif () cmake_pop_check_state () diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 94127fa8eb3..9daec1c1c64 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -230,8 +230,74 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + col_size; const UInt8 * data_pos = chars.data(); +#if defined(__AVX512F__) && defined(__AVX512BW__) + static constexpr size_t SIMD_BYTES = 64; + const __m512i zero64 = _mm512_setzero_epi32(); + const UInt8 * filt_end_avx512 = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES; + const size_t chars_per_simd_elements = SIMD_BYTES * n; -#ifdef __SSE2__ + while (filt_pos < filt_end_avx512) + { + uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero64, _MM_CMPINT_GT); + + if (0xFFFFFFFFFFFFFFFF == mask) + { + res->chars.insert(data_pos, data_pos + chars_per_simd_elements); + } + else + { + size_t res_chars_size = res->chars.size(); + while (mask) + { + size_t index = __builtin_ctzll(mask); + res->chars.resize(res_chars_size + n); + memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos + index * n, n); + res_chars_size += n; + #ifdef __BMI__ + mask = _blsr_u64(mask); + #else + mask = mask & (mask-1); + #endif + } + } + data_pos += chars_per_simd_elements; + filt_pos += SIMD_BYTES; + } +#elif defined(__AVX2__) + static constexpr size_t SIMD_BYTES = 32; + const __m256i zero32 = _mm256_setzero_si256(); + const UInt8 * filt_end_avx2 = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES; + const size_t chars_per_simd_elements = SIMD_BYTES * n; + + while (filt_pos < filt_end_avx2) + { + uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero32)); + + if (0xFFFFFFFF == mask) + { + res->chars.insert(data_pos, data_pos + chars_per_simd_elements); + } + else + { + size_t res_chars_size = res->chars.size(); + while (mask) + { + size_t index = __builtin_ctz(mask); + res->chars.resize(res_chars_size + n); + memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos + index * n, n); + res_chars_size += n; + #ifdef __BMI__ + mask = _blsr_u32(mask); + #else + mask = mask & (mask-1); + #endif + } + } + data_pos += chars_per_simd_elements; + filt_pos += SIMD_BYTES; + } + +#elif defined(__SSE2__) /** A slightly more optimized version. * Based on the assumption that often pieces of consecutive values * completely pass or do not pass the filter. @@ -267,6 +333,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result data_pos += chars_per_simd_elements; filt_pos += SIMD_BYTES; } + #endif size_t res_chars_size = res->chars.size(); diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 3ee692a3ff4..000a7198446 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -311,7 +311,67 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s const UInt8 * filt_end = filt_pos + size; const T * data_pos = data.data(); -#ifdef __SSE2__ +#if defined(__AVX512F__) && defined(__AVX512BW__) + static constexpr size_t SIMD_BYTES = 64; + const __m512i zero64 = _mm512_setzero_epi32(); + const UInt8 * filt_end_avx512 = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_avx512) + { + UInt64 mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero64, _MM_CMPINT_GT); + + if (0xFFFFFFFFFFFFFFFF == mask) + { + res_data.insert(data_pos, data_pos + SIMD_BYTES); + } + else + { + while (mask) + { + size_t index = __builtin_ctzll(mask); + res_data.push_back(data_pos[index]); + #ifdef __BMI__ + mask = _blsr_u64(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } +#elif defined(__AVX2__) + static constexpr size_t SIMD_BYTES = 32; + const __m256i zero32 = _mm256_setzero_si256(); + const UInt8 * filt_end_avx2 = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_avx2) + { + UInt32 mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero32)); + + if (0xFFFFFFFF == mask) + { + res_data.insert(data_pos, data_pos + SIMD_BYTES); + } + else + { + while (mask) + { + size_t index = __builtin_ctz(mask); + res_data.push_back(data_pos[index]); + #ifdef __BMI__ + mask = _blsr_u32(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } +#elif defined(__SSE2__) /** A slightly more optimized version. * Based on the assumption that often pieces of consecutive values * completely pass or do not pass the filter. @@ -344,6 +404,7 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s filt_pos += SIMD_BYTES; data_pos += SIMD_BYTES; } + #endif while (filt_pos < filt_end) diff --git a/src/Columns/ColumnsCommon.cpp b/src/Columns/ColumnsCommon.cpp index a4d7de34382..36c292b4196 100644 --- a/src/Columns/ColumnsCommon.cpp +++ b/src/Columns/ColumnsCommon.cpp @@ -229,7 +229,89 @@ namespace memcpy(&res_elems[elems_size_old], &src_elems[arr_offset], arr_size * sizeof(T)); }; - #ifdef __SSE2__ + #if defined(__AVX512F__) && defined(__AVX512BW__) + const __m512i zero_vec = _mm512_setzero_epi32(); + static constexpr size_t SIMD_BYTES = 64; + const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_aligned) + { + uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero_vec, _MM_CMPINT_GT); + + if (mask == 0xffffffffffffffff) + { + /// SIMD_BYTES consecutive rows pass the filter + const auto first = offsets_pos == offsets_begin; + + const auto chunk_offset = first ? 0 : offsets_pos[-1]; + const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset; + + result_offsets_builder.template insertChunk(offsets_pos, first, chunk_offset, chunk_size); + + /// copy elements for SIMD_BYTES arrays at once + const auto elems_size_old = res_elems.size(); + res_elems.resize(elems_size_old + chunk_size); + memcpy(&res_elems[elems_size_old], &src_elems[chunk_offset], chunk_size * sizeof(T)); + } + else + { + while (mask) + { + size_t index = __builtin_ctzll(mask); + copy_array(offsets_pos + index); + #ifdef __BMI__ + mask = _blsr_u64(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + offsets_pos += SIMD_BYTES; + } + #elif defined(__AVX2__) + const __m256i zero_vec = _mm256_setzero_si256(); + static constexpr size_t SIMD_BYTES = 32; + const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_aligned) + { + uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero_vec)); + + if (mask == 0xffffffff) + { + /// SIMD_BYTES consecutive rows pass the filter + const auto first = offsets_pos == offsets_begin; + + const auto chunk_offset = first ? 0 : offsets_pos[-1]; + const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset; + + result_offsets_builder.template insertChunk(offsets_pos, first, chunk_offset, chunk_size); + + /// copy elements for SIMD_BYTES arrays at once + const auto elems_size_old = res_elems.size(); + res_elems.resize(elems_size_old + chunk_size); + memcpy(&res_elems[elems_size_old], &src_elems[chunk_offset], chunk_size * sizeof(T)); + } + else + { + while (mask) + { + size_t index = __builtin_ctz(mask); + copy_array(offsets_pos + index); + #ifdef __BMI__ + mask = _blsr_u32(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + offsets_pos += SIMD_BYTES; + } + #elif defined(__SSE2__) const __m128i zero_vec = _mm_setzero_si128(); static constexpr size_t SIMD_BYTES = 16; const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; @@ -268,7 +350,7 @@ namespace filt_pos += SIMD_BYTES; offsets_pos += SIMD_BYTES; - } + } #endif while (filt_pos < filt_end) From 0ef26244083333ef4ee8ede0049014334a573459 Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Tue, 12 Oct 2021 04:04:27 +0530 Subject: [PATCH 165/438] add specific flags for compiling filter operation source files --- cmake/cpu_features.cmake | 4 ++-- src/CMakeLists.txt | 7 +++++++ src/Columns/ColumnFixedString.cpp | 11 ++++++----- src/Columns/ColumnVector.cpp | 15 ++++++++------- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index d77ca0b32e3..4ea9465be98 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -143,7 +143,7 @@ else () } " HAVE_AVX512) if (HAVE_AVX512 AND ENABLE_AVX512) - set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + set(X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} ${TEST_FLAG}") endif () set (TEST_FLAG "-mbmi") @@ -157,7 +157,7 @@ else () } " HAVE_BMI) if (HAVE_BMI AND ENABLE_BMI) - set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + set(X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} ${TEST_FLAG}") endif () endif () diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cac5b70f489..45bb1a21d59 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -284,6 +284,13 @@ target_link_libraries (clickhouse_common_io dragonbox_to_chars ) +# Use X86 AVX2/AVX512 instructions to accelerate filter opertions +set_source_files_properties( + Columns/ColumnFixedString.cpp + Columns/ColumnsCommon.cpp + Columns/ColumnVector.cpp + PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}") + if(RE2_LIBRARY) target_link_libraries(clickhouse_common_io PUBLIC ${RE2_LIBRARY}) endif() diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 9daec1c1c64..4b31677d37e 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -230,6 +230,12 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + col_size; const UInt8 * data_pos = chars.data(); + + /** A slightly more optimized version. + * Based on the assumption that often pieces of consecutive values + * completely pass or do not pass the filter. + * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. + */ #if defined(__AVX512F__) && defined(__AVX512BW__) static constexpr size_t SIMD_BYTES = 64; const __m512i zero64 = _mm512_setzero_epi32(); @@ -298,11 +304,6 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result } #elif defined(__SSE2__) - /** A slightly more optimized version. - * Based on the assumption that often pieces of consecutive values - * completely pass or do not pass the filter. - * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. - */ static constexpr size_t SIMD_BYTES = 16; const __m128i zero16 = _mm_setzero_si128(); diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 000a7198446..37f60e9f2b9 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -310,7 +310,12 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + size; const T * data_pos = data.data(); - + + /** A slightly more optimized version. + * Based on the assumption that often pieces of consecutive values + * completely pass or do not pass the filter. + * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. + */ #if defined(__AVX512F__) && defined(__AVX512BW__) static constexpr size_t SIMD_BYTES = 64; const __m512i zero64 = _mm512_setzero_epi32(); @@ -341,6 +346,7 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s filt_pos += SIMD_BYTES; data_pos += SIMD_BYTES; } + #elif defined(__AVX2__) static constexpr size_t SIMD_BYTES = 32; const __m256i zero32 = _mm256_setzero_si256(); @@ -371,13 +377,8 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s filt_pos += SIMD_BYTES; data_pos += SIMD_BYTES; } -#elif defined(__SSE2__) - /** A slightly more optimized version. - * Based on the assumption that often pieces of consecutive values - * completely pass or do not pass the filter. - * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. - */ +#elif defined(__SSE2__) static constexpr size_t SIMD_BYTES = 16; const __m128i zero16 = _mm_setzero_si128(); const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES; From 7b9358a59721749b7a60d917d1793969760a9df5 Mon Sep 17 00:00:00 2001 From: Pavel Cheremushkin Date: Tue, 12 Oct 2021 03:21:31 +0300 Subject: [PATCH 166/438] adding codegen fuzzer + code generation script. new contrib added: libprotobuf-mutator --- .gitmodules | 3 + CMakeLists.txt | 1 + cmake/find/libprotobuf-mutator.cmake | 11 + contrib/CMakeLists.txt | 4 + contrib/libprotobuf-mutator | 1 + .../libprotobuf-mutator-cmake/CMakeLists.txt | 17 + src/Parsers/fuzzers/CMakeLists.txt | 5 + .../fuzzers/codegen_fuzzer/CMakeLists.txt | 13 + .../codegen_fuzzer/clickhouse-template.g | 121 + .../fuzzers/codegen_fuzzer/clickhouse.g | 524 ++++ .../codegen_fuzzer/codegen_select_fuzzer.cpp | 40 + src/Parsers/fuzzers/codegen_fuzzer/gen.py | 249 ++ src/Parsers/fuzzers/codegen_fuzzer/out.cpp | 2189 +++++++++++++++++ src/Parsers/fuzzers/codegen_fuzzer/out.proto | 519 ++++ src/Parsers/fuzzers/codegen_fuzzer/update.sh | 30 + 15 files changed, 3727 insertions(+) create mode 100644 cmake/find/libprotobuf-mutator.cmake create mode 160000 contrib/libprotobuf-mutator create mode 100644 contrib/libprotobuf-mutator-cmake/CMakeLists.txt create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/gen.py create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.cpp create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.proto create mode 100755 src/Parsers/fuzzers/codegen_fuzzer/update.sh diff --git a/.gitmodules b/.gitmodules index 74d1049ce01..5d226cddd29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -249,3 +249,6 @@ [submodule "contrib/magic_enum"] path = contrib/magic_enum url = https://github.com/Neargye/magic_enum +[submodule "contrib/libprotobuf-mutator"] + path = contrib/libprotobuf-mutator + url = https://github.com/google/libprotobuf-mutator diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f553c5c26d..843beec01c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -562,6 +562,7 @@ include (cmake/find/cassandra.cmake) include (cmake/find/sentry.cmake) include (cmake/find/stats.cmake) include (cmake/find/datasketches.cmake) +include (cmake/find/libprotobuf-mutator.cmake) set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "") find_contrib_lib(cityhash) diff --git a/cmake/find/libprotobuf-mutator.cmake b/cmake/find/libprotobuf-mutator.cmake new file mode 100644 index 00000000000..8aa595230cd --- /dev/null +++ b/cmake/find/libprotobuf-mutator.cmake @@ -0,0 +1,11 @@ +option(USE_LIBPROTOBUF_MUTATOR "Enable libprotobuf-mutator" ${ENABLE_FUZZING}) + +if (NOT USE_LIBPROTOBUF_MUTATOR) + return() +endif() + +set(LibProtobufMutator_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator") + +if (NOT EXISTS "${LibProtobufMutator_SOURCE_DIR}/README.md") + message (ERROR "submodule contrib/libprotobuf-mutator is missing. to fix try run: \n git submodule update --init --recursive") +endif() diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 140cc0846ec..98231856aee 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -49,6 +49,10 @@ add_subdirectory (replxx-cmake) add_subdirectory (unixodbc-cmake) add_subdirectory (nanodbc-cmake) +if (ENABLE_FUZZING) + add_subdirectory (libprotobuf-mutator-cmake) +endif() + if (USE_YAML_CPP) add_subdirectory (yaml-cpp-cmake) endif() diff --git a/contrib/libprotobuf-mutator b/contrib/libprotobuf-mutator new file mode 160000 index 00000000000..ffd86a32874 --- /dev/null +++ b/contrib/libprotobuf-mutator @@ -0,0 +1 @@ +Subproject commit ffd86a32874e5c08a143019aad1aaf0907294c9f diff --git a/contrib/libprotobuf-mutator-cmake/CMakeLists.txt b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt new file mode 100644 index 00000000000..93eafc85b7d --- /dev/null +++ b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator) +set(NO_FUZZING_FLAGS "-fno-sanitize=fuzzer -fsanitize-coverage=0") + +add_library(protobuf-mutator + ${LIBRARY_DIR}/src/libfuzzer/libfuzzer_macro.cc + ${LIBRARY_DIR}/src/libfuzzer/libfuzzer_mutator.cc + ${LIBRARY_DIR}/src/binary_format.cc + ${LIBRARY_DIR}/src/mutator.cc + ${LIBRARY_DIR}/src/text_format.cc + ${LIBRARY_DIR}/src/utf8_fix.cc) + +target_include_directories(protobuf-mutator BEFORE PRIVATE "${LIBRARY_DIR}") +# target_include_directories(protobuf-mutator BEFORE PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") + +target_link_libraries(protobuf-mutator ${PROTOBUF_LIBRARIES}) +set_target_properties(protobuf-mutator PROPERTIES + COMPILE_FLAGS "${NO_FUZZING_FLAGS}") diff --git a/src/Parsers/fuzzers/CMakeLists.txt b/src/Parsers/fuzzers/CMakeLists.txt index 0dd541e663f..2840dc72c0a 100644 --- a/src/Parsers/fuzzers/CMakeLists.txt +++ b/src/Parsers/fuzzers/CMakeLists.txt @@ -6,3 +6,8 @@ target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZ add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS}) target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE}) + +string(REPLACE " -Werror" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +add_subdirectory(codegen_fuzzer) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") + diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt new file mode 100644 index 00000000000..f55bb3b3fb9 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -0,0 +1,13 @@ +find_package(Protobuf REQUIRED) + +protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS "out.proto") +set(FUZZER_SRCS codegen_select_fuzzer.cpp out.cpp ${PROTO_SRCS} ${PROTO_HDRS}) + +set(CMAKE_INCLUDE_CURRENT_DIR TRUE) + +add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) + +target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIRS}") +target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}") +target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}/src") +target_link_libraries(codegen_select_fuzzer PRIVATE clickhouse_parsers protobuf-mutator ${Protobuf_LIBRARIES} ${LIB_FUZZING_ENGINE}) \ No newline at end of file diff --git a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g new file mode 100644 index 00000000000..79fd775b1da --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g @@ -0,0 +1,121 @@ +" "; +" "; +" "; +";"; + + +"(" $1 ")"; +"(" $1 ", " $2 ")"; +"(" $1 ", " $2 ", " $3 ")"; + +$1 ", " $2 ; +$1 ", " $2 ", " $3 ; +$1 ", " $2 ", " $3 ", " $4 ; +$1 ", " $2 ", " $3 ", " $4 ", " $5 ; + +"[" $1 ", " $2 "]"; +"[" $1 ", " $2 ", " $3 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]"; + +$0 "(" $1 ")"; +$0 "(" $1 ", " $2 ")"; +$0 "(" $1 ", " $2 ", " $3 ")"; + +$1 " as " $2 ; + + +// TODO: add more clickhouse specific stuff +"SELECT " $1 " FROM " $2 " WHERE " $3 ; +"SELECT " $1 " FROM " $2 " GROUP BY " $3 ; +"SELECT " $1 " FROM " $2 " SORT BY " $3 ; +"SELECT " $1 " FROM " $2 " LIMIT " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 ; +"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ; +"SELECT " $1 " INTO OUTFILE " $2 ; + +"WITH " $1 " AS " $2 ; + +"{" $1 ":" $2 "}"; +"[" $1 "," $2 "]"; +"[]"; + + +" x "; +"x"; +" `x` "; +"`x`"; + +" \"value\" "; +"\"value\""; +" 0 "; +"0"; +"1"; +"2"; +"123123123123123123"; +"182374019873401982734091873420923123123123123123"; +"1e-1"; +"1.1"; +"\"\""; +" '../../../../../../../../../etc/passwd' "; + +"/"; +"="; +"=="; +"!="; +"<>"; +"<"; +"<="; +">"; +">="; +"<<"; +"|<<"; +"&"; +"|"; +"||"; +"<|"; +"|>"; +"+"; +"-"; +"~"; +"*"; +"/"; +"\\"; +"%"; +""; +"."; +","; +","; +","; +","; +","; +","; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"?"; +":"; +"@"; +"@@"; +"$"; +"\""; +"`"; +"{"; +"}"; +"^"; +"::"; +"->"; +"]"; +"["; + diff --git a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g new file mode 100644 index 00000000000..edd5acf513d --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g @@ -0,0 +1,524 @@ +" "; +" "; +" "; +";"; + + +"(" $1 ")"; +"(" $1 ", " $2 ")"; +"(" $1 ", " $2 ", " $3 ")"; + +$1 ", " $2 ; +$1 ", " $2 ", " $3 ; +$1 ", " $2 ", " $3 ", " $4 ; +$1 ", " $2 ", " $3 ", " $4 ", " $5 ; + +"[" $1 ", " $2 "]"; +"[" $1 ", " $2 ", " $3 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]"; + +$0 "(" $1 ")"; +$0 "(" $1 ", " $2 ")"; +$0 "(" $1 ", " $2 ", " $3 ")"; + +$1 " as " $2 ; + + +// TODO: add more clickhouse specific stuff +"SELECT " $1 " FROM " $2 " WHERE " $3 ; +"SELECT " $1 " FROM " $2 " GROUP BY " $3 ; +"SELECT " $1 " FROM " $2 " SORT BY " $3 ; +"SELECT " $1 " FROM " $2 " LIMIT " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 ; +"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ; +"SELECT " $1 " INTO OUTFILE " $2 ; + +"WITH " $1 " AS " $2 ; + +"{" $1 ":" $2 "}"; +"[" $1 "," $2 "]"; +"[]"; + + +" x "; +"x"; +" `x` "; +"`x`"; + +" \"value\" "; +"\"value\""; +" 0 "; +"0"; +"1"; +"2"; +"123123123123123123"; +"182374019873401982734091873420923123123123123123"; +"1e-1"; +"1.1"; +"\"\""; +" '../../../../../../../../../etc/passwd' "; + +"/"; +"="; +"=="; +"!="; +"<>"; +"<"; +"<="; +">"; +">="; +"<<"; +"|<<"; +"&"; +"|"; +"||"; +"<|"; +"|>"; +"+"; +"-"; +"~"; +"*"; +"/"; +"\\"; +"%"; +""; +"."; +","; +","; +","; +","; +","; +","; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"?"; +":"; +"@"; +"@@"; +"$"; +"\""; +"`"; +"{"; +"}"; +"^"; +"::"; +"->"; +"]"; +"["; + +" ADD "; +" ADD COLUMN "; +" ADD CONSTRAINT "; +" ADD INDEX "; +" AFTER "; +" AggregateFunction "; +" aggThrow "; +" ALL "; +" ALTER LIVE VIEW "; +" ALTER TABLE "; +" and "; +" ANTI "; +" any "; +" anyHeavy "; +" anyLast "; +" argMax "; +" argMin "; +" array "; +" Array "; +" arrayAll "; +" arrayAUC "; +" arrayCompact "; +" arrayConcat "; +" arrayCount "; +" arrayCumSum "; +" arrayCumSumNonNegative "; +" arrayDifference "; +" arrayDistinct "; +" arrayElement "; +" arrayEnumerate "; +" arrayEnumerateDense "; +" arrayEnumerateDenseRanked "; +" arrayEnumerateUniq "; +" arrayEnumerateUniqRanked "; +" arrayExists "; +" arrayFill "; +" arrayFilter "; +" arrayFirst "; +" arrayFirstIndex "; +" arrayFlatten "; +" arrayIntersect "; +" arrayJoin "; +" ARRAY JOIN "; +" arrayMap "; +" arrayPopBack "; +" arrayPopFront "; +" arrayPushBack "; +" arrayPushFront "; +" arrayReduce "; +" arrayReduceInRanges "; +" arrayResize "; +" arrayReverse "; +" arrayReverseFill "; +" arrayReverseSort "; +" arrayReverseSplit "; +" arraySlice "; +" arraySort "; +" arraySplit "; +" arraySum "; +" arrayUniq "; +" arrayWithConstant "; +" arrayZip "; +" AS "; +" ASC "; +" ASCENDING "; +" ASOF "; +" AST "; +" ATTACH "; +" ATTACH PART "; +" ATTACH PARTITION "; +" avg "; +" avgWeighted "; +" basename "; +" BETWEEN "; +" BOTH "; +" boundingRatio "; +" BY "; +" CAST "; +" categoricalInformationValue "; +" CHECK "; +" CHECK TABLE "; +" CLEAR COLUMN "; +" CLEAR INDEX "; +" COLLATE "; +" COLUMNS "; +" COMMENT COLUMN "; +" CONSTRAINT "; +" corr "; +" corrStable "; +" count "; +" countEqual "; +" covarPop "; +" covarPopStable "; +" covarSamp "; +" covarSampStable "; +" CREATE "; +" CROSS "; +" CUBE "; +" cutFragment "; +" cutQueryString "; +" cutQueryStringAndFragment "; +" cutToFirstSignificantSubdomain "; +" cutURLParameter "; +" cutWWW "; +" D "; +" DATABASE "; +" DATABASES "; +" Date "; +" DATE "; +" DATE_ADD "; +" DATEADD "; +" DATE_DIFF "; +" DATEDIFF "; +" DATE_SUB "; +" DATESUB "; +" DateTime "; +" DateTime64 "; +" DAY "; +" DD "; +" Decimal "; +" Decimal128 "; +" Decimal32 "; +" Decimal64 "; +" decodeURLComponent "; +" DEDUPLICATE "; +" DELETE "; +" DELETE WHERE "; +" DESC "; +" DESCENDING "; +" DESCRIBE "; +" DETACH "; +" DETACH PARTITION "; +" DICTIONARIES "; +" DICTIONARY "; +" DISTINCT "; +" domain "; +" domainWithoutWWW "; +" DROP "; +" DROP COLUMN "; +" DROP CONSTRAINT "; +" DROP DETACHED PART "; +" DROP DETACHED PARTITION "; +" DROP INDEX "; +" DROP PARTITION "; +" emptyArrayToSingle "; +" ENGINE "; +" entropy "; +" Enum "; +" Enum16 "; +" Enum8 "; +" EVENTS "; +" EXCHANGE TABLES "; +" EXISTS "; +" EXTRACT "; +" extractURLParameter "; +" extractURLParameterNames "; +" extractURLParameters "; +" FETCH PARTITION "; +" FETCH PART "; +" FINAL "; +" FIRST "; +" firstSignificantSubdomain "; +" FixedString "; +" Float32 "; +" Float64 "; +" FOR "; +" ForEach "; +" FORMAT "; +" fragment "; +" FREEZE "; +" FROM "; +" FULL "; +" FUNCTION "; +" __getScalar "; +" GLOBAL "; +" GRANULARITY "; +" groupArray "; +" groupArrayInsertAt "; +" groupArrayMovingAvg "; +" groupArrayMovingSum "; +" groupArraySample "; +" groupBitAnd "; +" groupBitmap "; +" groupBitmapAnd "; +" groupBitmapOr "; +" groupBitmapXor "; +" groupBitOr "; +" groupBitXor "; +" GROUP BY "; +" groupUniqArray "; +" has "; +" hasAll "; +" hasAny "; +" HAVING "; +" HH "; +" histogram "; +" HOUR "; +" ID "; +" if "; +" IF EXISTS "; +" IF NOT EXISTS "; +" IN "; +" INDEX "; +" indexOf "; +" INNER "; +" IN PARTITION "; +" INSERT INTO "; +" Int16 "; +" Int32 "; +" Int64 "; +" Int8 "; +" INTERVAL "; +" IntervalDay "; +" IntervalHour "; +" IntervalMinute "; +" IntervalMonth "; +" IntervalQuarter "; +" IntervalSecond "; +" IntervalWeek "; +" IntervalYear "; +" INTO OUTFILE "; +" JOIN "; +" kurtPop "; +" kurtSamp "; +" LAST "; +" LAYOUT "; +" LEADING "; +" LEFT "; +" LEFT ARRAY JOIN "; +" length "; +" LIFETIME "; +" LIKE "; +" LIMIT "; +" LIVE "; +" LOCAL "; +" LowCardinality "; +" LTRIM "; +" M "; +" MATERIALIZED "; +" MATERIALIZE INDEX "; +" MATERIALIZE TTL "; +" max "; +" maxIntersections "; +" maxIntersectionsPosition "; +" Merge "; +" MI "; +" min "; +" MINUTE "; +" MM "; +" MODIFY "; +" MODIFY COLUMN "; +" MODIFY ORDER BY "; +" MODIFY QUERY "; +" MODIFY SETTING "; +" MODIFY TTL "; +" MONTH "; +" MOVE PART "; +" MOVE PARTITION "; +" movingXXX "; +" N "; +" NAME "; +" Nested "; +" NO DELAY "; +" NONE "; +" not "; +" nothing "; +" Nothing "; +" Null "; +" Nullable "; +" NULLS "; +" OFFSET "; +" ON "; +" ONLY "; +" OPTIMIZE TABLE "; +" ORDER BY "; +" OR REPLACE "; +" OUTER "; +" PARTITION "; +" PARTITION BY "; +" path "; +" pathFull "; +" POPULATE "; +" PREWHERE "; +" PRIMARY KEY "; +" protocol "; +" Q "; +" QQ "; +" QUARTER "; +" queryString "; +" queryStringAndFragment "; +" range "; +" REFRESH "; +" RENAME COLUMN "; +" RENAME TABLE "; +" REPLACE PARTITION "; +" Resample "; +" RESUME "; +" retention "; +" RIGHT "; +" ROLLUP "; +" RTRIM "; +" S "; +" SAMPLE "; +" SAMPLE BY "; +" SECOND "; +" SELECT "; +" SEMI "; +" sequenceCount "; +" sequenceMatch "; +" SET "; +" SETTINGS "; +" SHOW "; +" SHOW PROCESSLIST "; +" simpleLinearRegression "; +" skewPop "; +" skewSamp "; +" SOURCE "; +" SQL_TSI_DAY "; +" SQL_TSI_HOUR "; +" SQL_TSI_MINUTE "; +" SQL_TSI_MONTH "; +" SQL_TSI_QUARTER "; +" SQL_TSI_SECOND "; +" SQL_TSI_WEEK "; +" SQL_TSI_YEAR "; +" SS "; +" State "; +" stddevPop "; +" stddevPopStable "; +" stddevSamp "; +" stddevSampStable "; +" STEP "; +" stochasticLinearRegression "; +" stochasticLogisticRegression "; +" String "; +" SUBSTRING "; +" sum "; +" sumKahan "; +" sumMap "; +" sumMapFiltered "; +" sumMapFilteredWithOverflow "; +" sumMapWithOverflow "; +" sumWithOverflow "; +" SUSPEND "; +" TABLE "; +" TABLES "; +" TEMPORARY "; +" TIMESTAMP "; +" TIMESTAMP_ADD "; +" TIMESTAMPADD "; +" TIMESTAMP_DIFF "; +" TIMESTAMPDIFF "; +" TIMESTAMP_SUB "; +" TIMESTAMPSUB "; +" TO "; +" TO DISK "; +" TOP "; +" topK "; +" topKWeighted "; +" topLevelDomain "; +" TO TABLE "; +" TOTALS "; +" TO VOLUME "; +" TRAILING "; +" TRIM "; +" TRUNCATE "; +" TTL "; +" Tuple "; +" TYPE "; +" UInt16 "; +" UInt32 "; +" UInt64 "; +" UInt8 "; +" uniq "; +" uniqCombined "; +" uniqCombined64 "; +" uniqExact "; +" uniqHLL12 "; +" uniqUpTo "; +" UPDATE "; +" URLHierarchy "; +" URLPathHierarchy "; +" USE "; +" USING "; +" UUID "; +" VALUES "; +" varPop "; +" varPopStable "; +" varSamp "; +" varSampStable "; +" VIEW "; +" WATCH "; +" WEEK "; +" WHERE "; +" windowFunnel "; +" WITH "; +" WITH FILL "; +" WITH TIES "; +" WK "; +" WW "; +" YEAR "; +" YY "; +" YYYY "; diff --git a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp new file mode 100644 index 00000000000..a68fac4f585 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp @@ -0,0 +1,40 @@ + +#include +#include + +#include +#include +#include +#include + +#include + +#include "out.pb.h" + + +void GenerateSentence(const Sentence&, std::string &, int); + + +DEFINE_BINARY_PROTO_FUZZER(const Sentence& main) { + static std::string input; + input.reserve(4096); + + GenerateSentence(main, input, 0); + if (input.size()) { + + std::cout << input << std::endl; + + DB::ParserQueryWithOutput parser(input.data() + input.size()); + try { + DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + + DB::WriteBufferFromOStream out(std::cerr, 4096); + DB::formatAST(*ast, out); + std::cerr << std::endl; + } catch (...) { + + } + + input.clear(); + } +} diff --git a/src/Parsers/fuzzers/codegen_fuzzer/gen.py b/src/Parsers/fuzzers/codegen_fuzzer/gen.py new file mode 100644 index 00000000000..e96bc6ae9f6 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/gen.py @@ -0,0 +1,249 @@ +#!/usr/bin/env python3 + +import sys +import string + + +TOKEN_TEXT = 1 +TOKEN_VAR = 2 + +TOKEN_COLON = ':' +TOKEN_SEMI = ';' +TOKEN_OR = '|' +TOKEN_QUESTIONMARK = '?' +TOKEN_ROUND_BRACKET_OPEN = '(' +TOKEN_ROUND_BRACKET_CLOSE = ')' +TOKEN_ASTERISK = '*' +TOKEN_SLASH = '/' + + + + +class TextValue: + def __init__(self, t): + self.t = t + self.slug = None + + def get_slug(self): + if self.slug is not None: + return self.slug + slug = '' + for c in self.t: + slug += c if c in string.ascii_letters else '_' + self.slug = slug + return slug + + def get_name(self): + return f"TextValue_{self.get_slug()}" + + def __repr__(self): + return f"TextValue(\"{self.t}\")" + + +class Var: + def __init__(self, id_): + self.id_ = id_ + + def __repr__(self): + return f"Var({self.id_})" + + +class Parser: + def __init__(self): + self.chains = [] + self.text = None + self.col = 0 + self.line = 1 + self.t = None + self.var_id = -1 + self.cur_tok = None + self.includes = [] + + self.proto = '' + self.cpp = '' + + def parse_file(self, filename): + with open(filename) as f: + self.text = f.read() + + while self.parse_statement() is not None: + pass + + def add_include(self, filename): + self.includes.append(filename) + + def get_next_token(self): + self.skip_ws() + + if not len(self.text): + return None + + if self.text[0] == '"': + return self.parse_txt_value() + + if self.text[0] == '$': + return self.parse_var_value() + + c, self.text = self.text[0], self.text[1:] + self.cur_tok = c + return c + + def parse_var_value(self): + i = self.text.find(' ') + + id_, self.text = self.text[1:i], self.text[i+1:] + self.var_id = int(id_) + self.cur_tok = TOKEN_VAR + return TOKEN_VAR + + def parse_txt_value(self): + if self.text[0] != '"': + raise Exception("parse_txt_value: expected quote at the start") + + self.t = '' + self.text = self.text[1:] + + while self.text[0] != '"': + if self.text[0] == '\\': + if self.text[1] == 'x': + self.t += self.text[:4] + self.text = self.text[4:] + elif self.text[1] in 'nt\\"': + self.t += self.text[:2] + self.text = self.text[2:] + else: + raise Exception(f"parse_txt_value: unknown symbol {self.text[0]}") + else: + c, self.text = self.text[0], self.text[1:] + self.t += c + + self.text = self.text[1:] + self.cur_tok = TOKEN_TEXT + return TOKEN_TEXT + + def skip_ws(self): + while self.text and self.text[0] in string.whitespace: + if self.text[0] == '\n': + self.line += 1 + self.col = 0 + self.text = self.text[1:] + self.col += 1 + if not self.text: + return None + return True + + def skip_line(self): + self.line += 1 + index = self.text.find('\n') + self.text = self.text[index:] + + + def parse_statement(self): + if self.skip_ws() is None: + return None + + self.get_next_token() + if self.cur_tok == TOKEN_SLASH: + self.skip_line() + return TOKEN_SLASH + + chain = [] + while self.cur_tok != TOKEN_SEMI: + if self.cur_tok == TOKEN_TEXT: + chain.append(TextValue(self.t)) + elif self.cur_tok == TOKEN_VAR: + chain.append(Var(self.var_id)) + else: + self.fatal_parsing_error(f"unexpected token {self.tok}") + self.get_next_token() + + if not chain: + self.fatal_parsing_error("empty chains are not allowed") + self.chains.append(chain) + return True + + def generate(self): + self.proto = 'syntax = "proto3";\n\n' + self.cpp = '#include \n#include \n#include \n\n#include \n\n' + + for incl_file in self.includes: + self.cpp += f'#include "{incl_file}"\n' + self.cpp += '\n' + + self.proto += 'message Word {\n' + self.proto += '\tenum Value {\n' + + self.cpp += 'void GenerateWord(const Word&, std::string&, int);\n\n' + + self.cpp += 'void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n' + self.cpp += '\tfor (int i = 0; i < stc.words_size(); i++ ) {\n' + self.cpp += '\t\tGenerateWord(stc.words(i), s, ++depth);\n' + self.cpp += '\t}\n' + self.cpp += '}\n' + + self.cpp += 'void GenerateWord(const Word& word, std::string &s, int depth) {\n' + + self.cpp += '\tif (depth > 5) return;\n\n' + self.cpp += '\tswitch (word.value()) {\n' + + for idx, chain in enumerate(self.chains): + self.proto += f'\t\tvalue_{idx} = {idx};\n' + + self.cpp += f'\t\tcase {idx}: {{\n' + num_var = 0 + for item in chain: + if isinstance(item, TextValue): + self.cpp += f'\t\t\ts += "{item.t}";\n' + elif isinstance(item, Var): + self.cpp += f'\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n' + num_var += 1 + else: + raise Exception("unknown token met during generation") + self.cpp += '\t\t\tbreak;\n\t\t}\n' + self.cpp += '\t\tdefault: break;\n' + + self.cpp += '\t}\n' + + self.proto += '\t}\n' + self.proto += '\tValue value = 1;\n' + self.proto += '\tSentence inner = 2;\n' + self.proto += '}\nmessage Sentence {\n\trepeated Word words = 1;\n}' + + self.cpp += '}\n' + return self.cpp, self.proto + + def fatal_parsing_error(self, msg): + print(f"Line: {self.line}, Col: {self.col}") + raise Exception(f"fatal error during parsing. {msg}") + + +def main(args): + input_file, outfile_cpp, outfile_proto = args + + if not outfile_proto.endswith('.proto'): + raise Exception("outfile_proto (argv[3]) should end with `.proto`") + + include_filename = outfile_proto[:-6] + ".pb.h" + + p = Parser() + p.add_include(include_filename) + p.parse_file(input_file) + + cpp, proto = p.generate() + + proto = proto.replace('\t', ' ' * 4) + cpp = cpp.replace('\t', ' ' * 4) + + with open(outfile_cpp, 'w') as f: + f.write(cpp) + + with open(outfile_proto, 'w') as f: + f.write(proto) + + +if __name__ == '__main__': + if len(sys.argv) < 3: + print(f"Usage {sys.argv[0]} ") + sys.exit(1) + main(sys.argv[1:]) + diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.cpp b/src/Parsers/fuzzers/codegen_fuzzer/out.cpp new file mode 100644 index 00000000000..effe6e7821b --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/out.cpp @@ -0,0 +1,2189 @@ +#include +#include +#include + +#include + +#include "out.pb.h" + +void GenerateWord(const Word&, std::string&, int); + +void GenerateSentence(const Sentence& stc, std::string &s, int depth) { + for (int i = 0; i < stc.words_size(); i++ ) { + GenerateWord(stc.words(i), s, ++depth); + } +} +void GenerateWord(const Word& word, std::string &s, int depth) { + if (depth > 5) return; + + switch (word.value()) { + case 0: { + s += " "; + break; + } + case 1: { + s += " "; + break; + } + case 2: { + s += " "; + break; + } + case 3: { + s += ";"; + break; + } + case 4: { + s += "("; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ")"; + break; + } + case 5: { + s += "("; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ")"; + break; + } + case 6: { + s += "("; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ")"; + break; + } + case 7: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 8: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 9: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + break; + } + case 10: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += ", "; + if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); + break; + } + case 11: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += "]"; + break; + } + case 12: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += "]"; + break; + } + case 13: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += "]"; + break; + } + case 14: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += ", "; + if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); + s += "]"; + break; + } + case 15: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += "("; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ")"; + break; + } + case 16: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += "("; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ")"; + break; + } + case 17: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += "("; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += ")"; + break; + } + case 18: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " as "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 19: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " WHERE "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 20: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " GROUP BY "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 21: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " SORT BY "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 22: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " LIMIT "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 23: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 24: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " ARRAY JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 25: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += " ON "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + break; + } + case 26: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += " USING "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + break; + } + case 27: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " INTO OUTFILE "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 28: { + s += "WITH "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " AS "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 29: { + s += "{"; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ":"; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += "}"; + break; + } + case 30: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ","; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += "]"; + break; + } + case 31: { + s += "[]"; + break; + } + case 32: { + s += " x "; + break; + } + case 33: { + s += "x"; + break; + } + case 34: { + s += " `x` "; + break; + } + case 35: { + s += "`x`"; + break; + } + case 36: { + s += " \"value\" "; + break; + } + case 37: { + s += "\"value\""; + break; + } + case 38: { + s += " 0 "; + break; + } + case 39: { + s += "0"; + break; + } + case 40: { + s += "1"; + break; + } + case 41: { + s += "2"; + break; + } + case 42: { + s += "123123123123123123"; + break; + } + case 43: { + s += "182374019873401982734091873420923123123123123123"; + break; + } + case 44: { + s += "1e-1"; + break; + } + case 45: { + s += "1.1"; + break; + } + case 46: { + s += "\"\""; + break; + } + case 47: { + s += " '../../../../../../../../../etc/passwd' "; + break; + } + case 48: { + s += "/"; + break; + } + case 49: { + s += "="; + break; + } + case 50: { + s += "=="; + break; + } + case 51: { + s += "!="; + break; + } + case 52: { + s += "<>"; + break; + } + case 53: { + s += "<"; + break; + } + case 54: { + s += "<="; + break; + } + case 55: { + s += ">"; + break; + } + case 56: { + s += ">="; + break; + } + case 57: { + s += "<<"; + break; + } + case 58: { + s += "|<<"; + break; + } + case 59: { + s += "&"; + break; + } + case 60: { + s += "|"; + break; + } + case 61: { + s += "||"; + break; + } + case 62: { + s += "<|"; + break; + } + case 63: { + s += "|>"; + break; + } + case 64: { + s += "+"; + break; + } + case 65: { + s += "-"; + break; + } + case 66: { + s += "~"; + break; + } + case 67: { + s += "*"; + break; + } + case 68: { + s += "/"; + break; + } + case 69: { + s += "\\"; + break; + } + case 70: { + s += "%"; + break; + } + case 71: { + s += ""; + break; + } + case 72: { + s += "."; + break; + } + case 73: { + s += ","; + break; + } + case 74: { + s += ","; + break; + } + case 75: { + s += ","; + break; + } + case 76: { + s += ","; + break; + } + case 77: { + s += ","; + break; + } + case 78: { + s += ","; + break; + } + case 79: { + s += "("; + break; + } + case 80: { + s += ")"; + break; + } + case 81: { + s += "("; + break; + } + case 82: { + s += ")"; + break; + } + case 83: { + s += "("; + break; + } + case 84: { + s += ")"; + break; + } + case 85: { + s += "("; + break; + } + case 86: { + s += ")"; + break; + } + case 87: { + s += "("; + break; + } + case 88: { + s += ")"; + break; + } + case 89: { + s += "("; + break; + } + case 90: { + s += ")"; + break; + } + case 91: { + s += "?"; + break; + } + case 92: { + s += ":"; + break; + } + case 93: { + s += "@"; + break; + } + case 94: { + s += "@@"; + break; + } + case 95: { + s += "$"; + break; + } + case 96: { + s += "\""; + break; + } + case 97: { + s += "`"; + break; + } + case 98: { + s += "{"; + break; + } + case 99: { + s += "}"; + break; + } + case 100: { + s += "^"; + break; + } + case 101: { + s += "::"; + break; + } + case 102: { + s += "->"; + break; + } + case 103: { + s += "]"; + break; + } + case 104: { + s += "["; + break; + } + case 105: { + s += " ADD "; + break; + } + case 106: { + s += " ADD COLUMN "; + break; + } + case 107: { + s += " ADD CONSTRAINT "; + break; + } + case 108: { + s += " ADD INDEX "; + break; + } + case 109: { + s += " AFTER "; + break; + } + case 110: { + s += " AggregateFunction "; + break; + } + case 111: { + s += " aggThrow "; + break; + } + case 112: { + s += " ALL "; + break; + } + case 113: { + s += " ALTER LIVE VIEW "; + break; + } + case 114: { + s += " ALTER TABLE "; + break; + } + case 115: { + s += " and "; + break; + } + case 116: { + s += " ANTI "; + break; + } + case 117: { + s += " any "; + break; + } + case 118: { + s += " anyHeavy "; + break; + } + case 119: { + s += " anyLast "; + break; + } + case 120: { + s += " argMax "; + break; + } + case 121: { + s += " argMin "; + break; + } + case 122: { + s += " array "; + break; + } + case 123: { + s += " Array "; + break; + } + case 124: { + s += " arrayAll "; + break; + } + case 125: { + s += " arrayAUC "; + break; + } + case 126: { + s += " arrayCompact "; + break; + } + case 127: { + s += " arrayConcat "; + break; + } + case 128: { + s += " arrayCount "; + break; + } + case 129: { + s += " arrayCumSum "; + break; + } + case 130: { + s += " arrayCumSumNonNegative "; + break; + } + case 131: { + s += " arrayDifference "; + break; + } + case 132: { + s += " arrayDistinct "; + break; + } + case 133: { + s += " arrayElement "; + break; + } + case 134: { + s += " arrayEnumerate "; + break; + } + case 135: { + s += " arrayEnumerateDense "; + break; + } + case 136: { + s += " arrayEnumerateDenseRanked "; + break; + } + case 137: { + s += " arrayEnumerateUniq "; + break; + } + case 138: { + s += " arrayEnumerateUniqRanked "; + break; + } + case 139: { + s += " arrayExists "; + break; + } + case 140: { + s += " arrayFill "; + break; + } + case 141: { + s += " arrayFilter "; + break; + } + case 142: { + s += " arrayFirst "; + break; + } + case 143: { + s += " arrayFirstIndex "; + break; + } + case 144: { + s += " arrayFlatten "; + break; + } + case 145: { + s += " arrayIntersect "; + break; + } + case 146: { + s += " arrayJoin "; + break; + } + case 147: { + s += " ARRAY JOIN "; + break; + } + case 148: { + s += " arrayMap "; + break; + } + case 149: { + s += " arrayPopBack "; + break; + } + case 150: { + s += " arrayPopFront "; + break; + } + case 151: { + s += " arrayPushBack "; + break; + } + case 152: { + s += " arrayPushFront "; + break; + } + case 153: { + s += " arrayReduce "; + break; + } + case 154: { + s += " arrayReduceInRanges "; + break; + } + case 155: { + s += " arrayResize "; + break; + } + case 156: { + s += " arrayReverse "; + break; + } + case 157: { + s += " arrayReverseFill "; + break; + } + case 158: { + s += " arrayReverseSort "; + break; + } + case 159: { + s += " arrayReverseSplit "; + break; + } + case 160: { + s += " arraySlice "; + break; + } + case 161: { + s += " arraySort "; + break; + } + case 162: { + s += " arraySplit "; + break; + } + case 163: { + s += " arraySum "; + break; + } + case 164: { + s += " arrayUniq "; + break; + } + case 165: { + s += " arrayWithConstant "; + break; + } + case 166: { + s += " arrayZip "; + break; + } + case 167: { + s += " AS "; + break; + } + case 168: { + s += " ASC "; + break; + } + case 169: { + s += " ASCENDING "; + break; + } + case 170: { + s += " ASOF "; + break; + } + case 171: { + s += " AST "; + break; + } + case 172: { + s += " ATTACH "; + break; + } + case 173: { + s += " ATTACH PART "; + break; + } + case 174: { + s += " ATTACH PARTITION "; + break; + } + case 175: { + s += " avg "; + break; + } + case 176: { + s += " avgWeighted "; + break; + } + case 177: { + s += " basename "; + break; + } + case 178: { + s += " BETWEEN "; + break; + } + case 179: { + s += " BOTH "; + break; + } + case 180: { + s += " boundingRatio "; + break; + } + case 181: { + s += " BY "; + break; + } + case 182: { + s += " CAST "; + break; + } + case 183: { + s += " categoricalInformationValue "; + break; + } + case 184: { + s += " CHECK "; + break; + } + case 185: { + s += " CHECK TABLE "; + break; + } + case 186: { + s += " CLEAR COLUMN "; + break; + } + case 187: { + s += " CLEAR INDEX "; + break; + } + case 188: { + s += " COLLATE "; + break; + } + case 189: { + s += " COLUMNS "; + break; + } + case 190: { + s += " COMMENT COLUMN "; + break; + } + case 191: { + s += " CONSTRAINT "; + break; + } + case 192: { + s += " corr "; + break; + } + case 193: { + s += " corrStable "; + break; + } + case 194: { + s += " count "; + break; + } + case 195: { + s += " countEqual "; + break; + } + case 196: { + s += " covarPop "; + break; + } + case 197: { + s += " covarPopStable "; + break; + } + case 198: { + s += " covarSamp "; + break; + } + case 199: { + s += " covarSampStable "; + break; + } + case 200: { + s += " CREATE "; + break; + } + case 201: { + s += " CROSS "; + break; + } + case 202: { + s += " CUBE "; + break; + } + case 203: { + s += " cutFragment "; + break; + } + case 204: { + s += " cutQueryString "; + break; + } + case 205: { + s += " cutQueryStringAndFragment "; + break; + } + case 206: { + s += " cutToFirstSignificantSubdomain "; + break; + } + case 207: { + s += " cutURLParameter "; + break; + } + case 208: { + s += " cutWWW "; + break; + } + case 209: { + s += " D "; + break; + } + case 210: { + s += " DATABASE "; + break; + } + case 211: { + s += " DATABASES "; + break; + } + case 212: { + s += " Date "; + break; + } + case 213: { + s += " DATE "; + break; + } + case 214: { + s += " DATE_ADD "; + break; + } + case 215: { + s += " DATEADD "; + break; + } + case 216: { + s += " DATE_DIFF "; + break; + } + case 217: { + s += " DATEDIFF "; + break; + } + case 218: { + s += " DATE_SUB "; + break; + } + case 219: { + s += " DATESUB "; + break; + } + case 220: { + s += " DateTime "; + break; + } + case 221: { + s += " DateTime64 "; + break; + } + case 222: { + s += " DAY "; + break; + } + case 223: { + s += " DD "; + break; + } + case 224: { + s += " Decimal "; + break; + } + case 225: { + s += " Decimal128 "; + break; + } + case 226: { + s += " Decimal32 "; + break; + } + case 227: { + s += " Decimal64 "; + break; + } + case 228: { + s += " decodeURLComponent "; + break; + } + case 229: { + s += " DEDUPLICATE "; + break; + } + case 230: { + s += " DELETE "; + break; + } + case 231: { + s += " DELETE WHERE "; + break; + } + case 232: { + s += " DESC "; + break; + } + case 233: { + s += " DESCENDING "; + break; + } + case 234: { + s += " DESCRIBE "; + break; + } + case 235: { + s += " DETACH "; + break; + } + case 236: { + s += " DETACH PARTITION "; + break; + } + case 237: { + s += " DICTIONARIES "; + break; + } + case 238: { + s += " DICTIONARY "; + break; + } + case 239: { + s += " DISTINCT "; + break; + } + case 240: { + s += " domain "; + break; + } + case 241: { + s += " domainWithoutWWW "; + break; + } + case 242: { + s += " DROP "; + break; + } + case 243: { + s += " DROP COLUMN "; + break; + } + case 244: { + s += " DROP CONSTRAINT "; + break; + } + case 245: { + s += " DROP DETACHED PART "; + break; + } + case 246: { + s += " DROP DETACHED PARTITION "; + break; + } + case 247: { + s += " DROP INDEX "; + break; + } + case 248: { + s += " DROP PARTITION "; + break; + } + case 249: { + s += " emptyArrayToSingle "; + break; + } + case 250: { + s += " ENGINE "; + break; + } + case 251: { + s += " entropy "; + break; + } + case 252: { + s += " Enum "; + break; + } + case 253: { + s += " Enum16 "; + break; + } + case 254: { + s += " Enum8 "; + break; + } + case 255: { + s += " EVENTS "; + break; + } + case 256: { + s += " EXCHANGE TABLES "; + break; + } + case 257: { + s += " EXISTS "; + break; + } + case 258: { + s += " EXTRACT "; + break; + } + case 259: { + s += " extractURLParameter "; + break; + } + case 260: { + s += " extractURLParameterNames "; + break; + } + case 261: { + s += " extractURLParameters "; + break; + } + case 262: { + s += " FETCH PARTITION "; + break; + } + case 263: { + s += " FETCH PART "; + break; + } + case 264: { + s += " FINAL "; + break; + } + case 265: { + s += " FIRST "; + break; + } + case 266: { + s += " firstSignificantSubdomain "; + break; + } + case 267: { + s += " FixedString "; + break; + } + case 268: { + s += " Float32 "; + break; + } + case 269: { + s += " Float64 "; + break; + } + case 270: { + s += " FOR "; + break; + } + case 271: { + s += " ForEach "; + break; + } + case 272: { + s += " FORMAT "; + break; + } + case 273: { + s += " fragment "; + break; + } + case 274: { + s += " FREEZE "; + break; + } + case 275: { + s += " FROM "; + break; + } + case 276: { + s += " FULL "; + break; + } + case 277: { + s += " FUNCTION "; + break; + } + case 278: { + s += " __getScalar "; + break; + } + case 279: { + s += " GLOBAL "; + break; + } + case 280: { + s += " GRANULARITY "; + break; + } + case 281: { + s += " groupArray "; + break; + } + case 282: { + s += " groupArrayInsertAt "; + break; + } + case 283: { + s += " groupArrayMovingAvg "; + break; + } + case 284: { + s += " groupArrayMovingSum "; + break; + } + case 285: { + s += " groupArraySample "; + break; + } + case 286: { + s += " groupBitAnd "; + break; + } + case 287: { + s += " groupBitmap "; + break; + } + case 288: { + s += " groupBitmapAnd "; + break; + } + case 289: { + s += " groupBitmapOr "; + break; + } + case 290: { + s += " groupBitmapXor "; + break; + } + case 291: { + s += " groupBitOr "; + break; + } + case 292: { + s += " groupBitXor "; + break; + } + case 293: { + s += " GROUP BY "; + break; + } + case 294: { + s += " groupUniqArray "; + break; + } + case 295: { + s += " has "; + break; + } + case 296: { + s += " hasAll "; + break; + } + case 297: { + s += " hasAny "; + break; + } + case 298: { + s += " HAVING "; + break; + } + case 299: { + s += " HH "; + break; + } + case 300: { + s += " histogram "; + break; + } + case 301: { + s += " HOUR "; + break; + } + case 302: { + s += " ID "; + break; + } + case 303: { + s += " if "; + break; + } + case 304: { + s += " IF EXISTS "; + break; + } + case 305: { + s += " IF NOT EXISTS "; + break; + } + case 306: { + s += " IN "; + break; + } + case 307: { + s += " INDEX "; + break; + } + case 308: { + s += " indexOf "; + break; + } + case 309: { + s += " INNER "; + break; + } + case 310: { + s += " IN PARTITION "; + break; + } + case 311: { + s += " INSERT INTO "; + break; + } + case 312: { + s += " Int16 "; + break; + } + case 313: { + s += " Int32 "; + break; + } + case 314: { + s += " Int64 "; + break; + } + case 315: { + s += " Int8 "; + break; + } + case 316: { + s += " INTERVAL "; + break; + } + case 317: { + s += " IntervalDay "; + break; + } + case 318: { + s += " IntervalHour "; + break; + } + case 319: { + s += " IntervalMinute "; + break; + } + case 320: { + s += " IntervalMonth "; + break; + } + case 321: { + s += " IntervalQuarter "; + break; + } + case 322: { + s += " IntervalSecond "; + break; + } + case 323: { + s += " IntervalWeek "; + break; + } + case 324: { + s += " IntervalYear "; + break; + } + case 325: { + s += " INTO OUTFILE "; + break; + } + case 326: { + s += " JOIN "; + break; + } + case 327: { + s += " kurtPop "; + break; + } + case 328: { + s += " kurtSamp "; + break; + } + case 329: { + s += " LAST "; + break; + } + case 330: { + s += " LAYOUT "; + break; + } + case 331: { + s += " LEADING "; + break; + } + case 332: { + s += " LEFT "; + break; + } + case 333: { + s += " LEFT ARRAY JOIN "; + break; + } + case 334: { + s += " length "; + break; + } + case 335: { + s += " LIFETIME "; + break; + } + case 336: { + s += " LIKE "; + break; + } + case 337: { + s += " LIMIT "; + break; + } + case 338: { + s += " LIVE "; + break; + } + case 339: { + s += " LOCAL "; + break; + } + case 340: { + s += " LowCardinality "; + break; + } + case 341: { + s += " LTRIM "; + break; + } + case 342: { + s += " M "; + break; + } + case 343: { + s += " MATERIALIZED "; + break; + } + case 344: { + s += " MATERIALIZE INDEX "; + break; + } + case 345: { + s += " MATERIALIZE TTL "; + break; + } + case 346: { + s += " max "; + break; + } + case 347: { + s += " maxIntersections "; + break; + } + case 348: { + s += " maxIntersectionsPosition "; + break; + } + case 349: { + s += " Merge "; + break; + } + case 350: { + s += " MI "; + break; + } + case 351: { + s += " min "; + break; + } + case 352: { + s += " MINUTE "; + break; + } + case 353: { + s += " MM "; + break; + } + case 354: { + s += " MODIFY "; + break; + } + case 355: { + s += " MODIFY COLUMN "; + break; + } + case 356: { + s += " MODIFY ORDER BY "; + break; + } + case 357: { + s += " MODIFY QUERY "; + break; + } + case 358: { + s += " MODIFY SETTING "; + break; + } + case 359: { + s += " MODIFY TTL "; + break; + } + case 360: { + s += " MONTH "; + break; + } + case 361: { + s += " MOVE PART "; + break; + } + case 362: { + s += " MOVE PARTITION "; + break; + } + case 363: { + s += " movingXXX "; + break; + } + case 364: { + s += " N "; + break; + } + case 365: { + s += " NAME "; + break; + } + case 366: { + s += " Nested "; + break; + } + case 367: { + s += " NO DELAY "; + break; + } + case 368: { + s += " NONE "; + break; + } + case 369: { + s += " not "; + break; + } + case 370: { + s += " nothing "; + break; + } + case 371: { + s += " Nothing "; + break; + } + case 372: { + s += " Null "; + break; + } + case 373: { + s += " Nullable "; + break; + } + case 374: { + s += " NULLS "; + break; + } + case 375: { + s += " OFFSET "; + break; + } + case 376: { + s += " ON "; + break; + } + case 377: { + s += " ONLY "; + break; + } + case 378: { + s += " OPTIMIZE TABLE "; + break; + } + case 379: { + s += " ORDER BY "; + break; + } + case 380: { + s += " OR REPLACE "; + break; + } + case 381: { + s += " OUTER "; + break; + } + case 382: { + s += " PARTITION "; + break; + } + case 383: { + s += " PARTITION BY "; + break; + } + case 384: { + s += " path "; + break; + } + case 385: { + s += " pathFull "; + break; + } + case 386: { + s += " POPULATE "; + break; + } + case 387: { + s += " PREWHERE "; + break; + } + case 388: { + s += " PRIMARY KEY "; + break; + } + case 389: { + s += " protocol "; + break; + } + case 390: { + s += " Q "; + break; + } + case 391: { + s += " QQ "; + break; + } + case 392: { + s += " QUARTER "; + break; + } + case 393: { + s += " queryString "; + break; + } + case 394: { + s += " queryStringAndFragment "; + break; + } + case 395: { + s += " range "; + break; + } + case 396: { + s += " REFRESH "; + break; + } + case 397: { + s += " RENAME COLUMN "; + break; + } + case 398: { + s += " RENAME TABLE "; + break; + } + case 399: { + s += " REPLACE PARTITION "; + break; + } + case 400: { + s += " Resample "; + break; + } + case 401: { + s += " RESUME "; + break; + } + case 402: { + s += " retention "; + break; + } + case 403: { + s += " RIGHT "; + break; + } + case 404: { + s += " ROLLUP "; + break; + } + case 405: { + s += " RTRIM "; + break; + } + case 406: { + s += " S "; + break; + } + case 407: { + s += " SAMPLE "; + break; + } + case 408: { + s += " SAMPLE BY "; + break; + } + case 409: { + s += " SECOND "; + break; + } + case 410: { + s += " SELECT "; + break; + } + case 411: { + s += " SEMI "; + break; + } + case 412: { + s += " sequenceCount "; + break; + } + case 413: { + s += " sequenceMatch "; + break; + } + case 414: { + s += " SET "; + break; + } + case 415: { + s += " SETTINGS "; + break; + } + case 416: { + s += " SHOW "; + break; + } + case 417: { + s += " SHOW PROCESSLIST "; + break; + } + case 418: { + s += " simpleLinearRegression "; + break; + } + case 419: { + s += " skewPop "; + break; + } + case 420: { + s += " skewSamp "; + break; + } + case 421: { + s += " SOURCE "; + break; + } + case 422: { + s += " SQL_TSI_DAY "; + break; + } + case 423: { + s += " SQL_TSI_HOUR "; + break; + } + case 424: { + s += " SQL_TSI_MINUTE "; + break; + } + case 425: { + s += " SQL_TSI_MONTH "; + break; + } + case 426: { + s += " SQL_TSI_QUARTER "; + break; + } + case 427: { + s += " SQL_TSI_SECOND "; + break; + } + case 428: { + s += " SQL_TSI_WEEK "; + break; + } + case 429: { + s += " SQL_TSI_YEAR "; + break; + } + case 430: { + s += " SS "; + break; + } + case 431: { + s += " State "; + break; + } + case 432: { + s += " stddevPop "; + break; + } + case 433: { + s += " stddevPopStable "; + break; + } + case 434: { + s += " stddevSamp "; + break; + } + case 435: { + s += " stddevSampStable "; + break; + } + case 436: { + s += " STEP "; + break; + } + case 437: { + s += " stochasticLinearRegression "; + break; + } + case 438: { + s += " stochasticLogisticRegression "; + break; + } + case 439: { + s += " String "; + break; + } + case 440: { + s += " SUBSTRING "; + break; + } + case 441: { + s += " sum "; + break; + } + case 442: { + s += " sumKahan "; + break; + } + case 443: { + s += " sumMap "; + break; + } + case 444: { + s += " sumMapFiltered "; + break; + } + case 445: { + s += " sumMapFilteredWithOverflow "; + break; + } + case 446: { + s += " sumMapWithOverflow "; + break; + } + case 447: { + s += " sumWithOverflow "; + break; + } + case 448: { + s += " SUSPEND "; + break; + } + case 449: { + s += " TABLE "; + break; + } + case 450: { + s += " TABLES "; + break; + } + case 451: { + s += " TEMPORARY "; + break; + } + case 452: { + s += " TIMESTAMP "; + break; + } + case 453: { + s += " TIMESTAMP_ADD "; + break; + } + case 454: { + s += " TIMESTAMPADD "; + break; + } + case 455: { + s += " TIMESTAMP_DIFF "; + break; + } + case 456: { + s += " TIMESTAMPDIFF "; + break; + } + case 457: { + s += " TIMESTAMP_SUB "; + break; + } + case 458: { + s += " TIMESTAMPSUB "; + break; + } + case 459: { + s += " TO "; + break; + } + case 460: { + s += " TO DISK "; + break; + } + case 461: { + s += " TOP "; + break; + } + case 462: { + s += " topK "; + break; + } + case 463: { + s += " topKWeighted "; + break; + } + case 464: { + s += " topLevelDomain "; + break; + } + case 465: { + s += " TO TABLE "; + break; + } + case 466: { + s += " TOTALS "; + break; + } + case 467: { + s += " TO VOLUME "; + break; + } + case 468: { + s += " TRAILING "; + break; + } + case 469: { + s += " TRIM "; + break; + } + case 470: { + s += " TRUNCATE "; + break; + } + case 471: { + s += " TTL "; + break; + } + case 472: { + s += " Tuple "; + break; + } + case 473: { + s += " TYPE "; + break; + } + case 474: { + s += " UInt16 "; + break; + } + case 475: { + s += " UInt32 "; + break; + } + case 476: { + s += " UInt64 "; + break; + } + case 477: { + s += " UInt8 "; + break; + } + case 478: { + s += " uniq "; + break; + } + case 479: { + s += " uniqCombined "; + break; + } + case 480: { + s += " uniqCombined64 "; + break; + } + case 481: { + s += " uniqExact "; + break; + } + case 482: { + s += " uniqHLL12 "; + break; + } + case 483: { + s += " uniqUpTo "; + break; + } + case 484: { + s += " UPDATE "; + break; + } + case 485: { + s += " URLHierarchy "; + break; + } + case 486: { + s += " URLPathHierarchy "; + break; + } + case 487: { + s += " USE "; + break; + } + case 488: { + s += " USING "; + break; + } + case 489: { + s += " UUID "; + break; + } + case 490: { + s += " VALUES "; + break; + } + case 491: { + s += " varPop "; + break; + } + case 492: { + s += " varPopStable "; + break; + } + case 493: { + s += " varSamp "; + break; + } + case 494: { + s += " varSampStable "; + break; + } + case 495: { + s += " VIEW "; + break; + } + case 496: { + s += " WATCH "; + break; + } + case 497: { + s += " WEEK "; + break; + } + case 498: { + s += " WHERE "; + break; + } + case 499: { + s += " windowFunnel "; + break; + } + case 500: { + s += " WITH "; + break; + } + case 501: { + s += " WITH FILL "; + break; + } + case 502: { + s += " WITH TIES "; + break; + } + case 503: { + s += " WK "; + break; + } + case 504: { + s += " WW "; + break; + } + case 505: { + s += " YEAR "; + break; + } + case 506: { + s += " YY "; + break; + } + case 507: { + s += " YYYY "; + break; + } + default: break; + } +} diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.proto b/src/Parsers/fuzzers/codegen_fuzzer/out.proto new file mode 100644 index 00000000000..6c8cefce9a3 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/out.proto @@ -0,0 +1,519 @@ +syntax = "proto3"; + +message Word { + enum Value { + value_0 = 0; + value_1 = 1; + value_2 = 2; + value_3 = 3; + value_4 = 4; + value_5 = 5; + value_6 = 6; + value_7 = 7; + value_8 = 8; + value_9 = 9; + value_10 = 10; + value_11 = 11; + value_12 = 12; + value_13 = 13; + value_14 = 14; + value_15 = 15; + value_16 = 16; + value_17 = 17; + value_18 = 18; + value_19 = 19; + value_20 = 20; + value_21 = 21; + value_22 = 22; + value_23 = 23; + value_24 = 24; + value_25 = 25; + value_26 = 26; + value_27 = 27; + value_28 = 28; + value_29 = 29; + value_30 = 30; + value_31 = 31; + value_32 = 32; + value_33 = 33; + value_34 = 34; + value_35 = 35; + value_36 = 36; + value_37 = 37; + value_38 = 38; + value_39 = 39; + value_40 = 40; + value_41 = 41; + value_42 = 42; + value_43 = 43; + value_44 = 44; + value_45 = 45; + value_46 = 46; + value_47 = 47; + value_48 = 48; + value_49 = 49; + value_50 = 50; + value_51 = 51; + value_52 = 52; + value_53 = 53; + value_54 = 54; + value_55 = 55; + value_56 = 56; + value_57 = 57; + value_58 = 58; + value_59 = 59; + value_60 = 60; + value_61 = 61; + value_62 = 62; + value_63 = 63; + value_64 = 64; + value_65 = 65; + value_66 = 66; + value_67 = 67; + value_68 = 68; + value_69 = 69; + value_70 = 70; + value_71 = 71; + value_72 = 72; + value_73 = 73; + value_74 = 74; + value_75 = 75; + value_76 = 76; + value_77 = 77; + value_78 = 78; + value_79 = 79; + value_80 = 80; + value_81 = 81; + value_82 = 82; + value_83 = 83; + value_84 = 84; + value_85 = 85; + value_86 = 86; + value_87 = 87; + value_88 = 88; + value_89 = 89; + value_90 = 90; + value_91 = 91; + value_92 = 92; + value_93 = 93; + value_94 = 94; + value_95 = 95; + value_96 = 96; + value_97 = 97; + value_98 = 98; + value_99 = 99; + value_100 = 100; + value_101 = 101; + value_102 = 102; + value_103 = 103; + value_104 = 104; + value_105 = 105; + value_106 = 106; + value_107 = 107; + value_108 = 108; + value_109 = 109; + value_110 = 110; + value_111 = 111; + value_112 = 112; + value_113 = 113; + value_114 = 114; + value_115 = 115; + value_116 = 116; + value_117 = 117; + value_118 = 118; + value_119 = 119; + value_120 = 120; + value_121 = 121; + value_122 = 122; + value_123 = 123; + value_124 = 124; + value_125 = 125; + value_126 = 126; + value_127 = 127; + value_128 = 128; + value_129 = 129; + value_130 = 130; + value_131 = 131; + value_132 = 132; + value_133 = 133; + value_134 = 134; + value_135 = 135; + value_136 = 136; + value_137 = 137; + value_138 = 138; + value_139 = 139; + value_140 = 140; + value_141 = 141; + value_142 = 142; + value_143 = 143; + value_144 = 144; + value_145 = 145; + value_146 = 146; + value_147 = 147; + value_148 = 148; + value_149 = 149; + value_150 = 150; + value_151 = 151; + value_152 = 152; + value_153 = 153; + value_154 = 154; + value_155 = 155; + value_156 = 156; + value_157 = 157; + value_158 = 158; + value_159 = 159; + value_160 = 160; + value_161 = 161; + value_162 = 162; + value_163 = 163; + value_164 = 164; + value_165 = 165; + value_166 = 166; + value_167 = 167; + value_168 = 168; + value_169 = 169; + value_170 = 170; + value_171 = 171; + value_172 = 172; + value_173 = 173; + value_174 = 174; + value_175 = 175; + value_176 = 176; + value_177 = 177; + value_178 = 178; + value_179 = 179; + value_180 = 180; + value_181 = 181; + value_182 = 182; + value_183 = 183; + value_184 = 184; + value_185 = 185; + value_186 = 186; + value_187 = 187; + value_188 = 188; + value_189 = 189; + value_190 = 190; + value_191 = 191; + value_192 = 192; + value_193 = 193; + value_194 = 194; + value_195 = 195; + value_196 = 196; + value_197 = 197; + value_198 = 198; + value_199 = 199; + value_200 = 200; + value_201 = 201; + value_202 = 202; + value_203 = 203; + value_204 = 204; + value_205 = 205; + value_206 = 206; + value_207 = 207; + value_208 = 208; + value_209 = 209; + value_210 = 210; + value_211 = 211; + value_212 = 212; + value_213 = 213; + value_214 = 214; + value_215 = 215; + value_216 = 216; + value_217 = 217; + value_218 = 218; + value_219 = 219; + value_220 = 220; + value_221 = 221; + value_222 = 222; + value_223 = 223; + value_224 = 224; + value_225 = 225; + value_226 = 226; + value_227 = 227; + value_228 = 228; + value_229 = 229; + value_230 = 230; + value_231 = 231; + value_232 = 232; + value_233 = 233; + value_234 = 234; + value_235 = 235; + value_236 = 236; + value_237 = 237; + value_238 = 238; + value_239 = 239; + value_240 = 240; + value_241 = 241; + value_242 = 242; + value_243 = 243; + value_244 = 244; + value_245 = 245; + value_246 = 246; + value_247 = 247; + value_248 = 248; + value_249 = 249; + value_250 = 250; + value_251 = 251; + value_252 = 252; + value_253 = 253; + value_254 = 254; + value_255 = 255; + value_256 = 256; + value_257 = 257; + value_258 = 258; + value_259 = 259; + value_260 = 260; + value_261 = 261; + value_262 = 262; + value_263 = 263; + value_264 = 264; + value_265 = 265; + value_266 = 266; + value_267 = 267; + value_268 = 268; + value_269 = 269; + value_270 = 270; + value_271 = 271; + value_272 = 272; + value_273 = 273; + value_274 = 274; + value_275 = 275; + value_276 = 276; + value_277 = 277; + value_278 = 278; + value_279 = 279; + value_280 = 280; + value_281 = 281; + value_282 = 282; + value_283 = 283; + value_284 = 284; + value_285 = 285; + value_286 = 286; + value_287 = 287; + value_288 = 288; + value_289 = 289; + value_290 = 290; + value_291 = 291; + value_292 = 292; + value_293 = 293; + value_294 = 294; + value_295 = 295; + value_296 = 296; + value_297 = 297; + value_298 = 298; + value_299 = 299; + value_300 = 300; + value_301 = 301; + value_302 = 302; + value_303 = 303; + value_304 = 304; + value_305 = 305; + value_306 = 306; + value_307 = 307; + value_308 = 308; + value_309 = 309; + value_310 = 310; + value_311 = 311; + value_312 = 312; + value_313 = 313; + value_314 = 314; + value_315 = 315; + value_316 = 316; + value_317 = 317; + value_318 = 318; + value_319 = 319; + value_320 = 320; + value_321 = 321; + value_322 = 322; + value_323 = 323; + value_324 = 324; + value_325 = 325; + value_326 = 326; + value_327 = 327; + value_328 = 328; + value_329 = 329; + value_330 = 330; + value_331 = 331; + value_332 = 332; + value_333 = 333; + value_334 = 334; + value_335 = 335; + value_336 = 336; + value_337 = 337; + value_338 = 338; + value_339 = 339; + value_340 = 340; + value_341 = 341; + value_342 = 342; + value_343 = 343; + value_344 = 344; + value_345 = 345; + value_346 = 346; + value_347 = 347; + value_348 = 348; + value_349 = 349; + value_350 = 350; + value_351 = 351; + value_352 = 352; + value_353 = 353; + value_354 = 354; + value_355 = 355; + value_356 = 356; + value_357 = 357; + value_358 = 358; + value_359 = 359; + value_360 = 360; + value_361 = 361; + value_362 = 362; + value_363 = 363; + value_364 = 364; + value_365 = 365; + value_366 = 366; + value_367 = 367; + value_368 = 368; + value_369 = 369; + value_370 = 370; + value_371 = 371; + value_372 = 372; + value_373 = 373; + value_374 = 374; + value_375 = 375; + value_376 = 376; + value_377 = 377; + value_378 = 378; + value_379 = 379; + value_380 = 380; + value_381 = 381; + value_382 = 382; + value_383 = 383; + value_384 = 384; + value_385 = 385; + value_386 = 386; + value_387 = 387; + value_388 = 388; + value_389 = 389; + value_390 = 390; + value_391 = 391; + value_392 = 392; + value_393 = 393; + value_394 = 394; + value_395 = 395; + value_396 = 396; + value_397 = 397; + value_398 = 398; + value_399 = 399; + value_400 = 400; + value_401 = 401; + value_402 = 402; + value_403 = 403; + value_404 = 404; + value_405 = 405; + value_406 = 406; + value_407 = 407; + value_408 = 408; + value_409 = 409; + value_410 = 410; + value_411 = 411; + value_412 = 412; + value_413 = 413; + value_414 = 414; + value_415 = 415; + value_416 = 416; + value_417 = 417; + value_418 = 418; + value_419 = 419; + value_420 = 420; + value_421 = 421; + value_422 = 422; + value_423 = 423; + value_424 = 424; + value_425 = 425; + value_426 = 426; + value_427 = 427; + value_428 = 428; + value_429 = 429; + value_430 = 430; + value_431 = 431; + value_432 = 432; + value_433 = 433; + value_434 = 434; + value_435 = 435; + value_436 = 436; + value_437 = 437; + value_438 = 438; + value_439 = 439; + value_440 = 440; + value_441 = 441; + value_442 = 442; + value_443 = 443; + value_444 = 444; + value_445 = 445; + value_446 = 446; + value_447 = 447; + value_448 = 448; + value_449 = 449; + value_450 = 450; + value_451 = 451; + value_452 = 452; + value_453 = 453; + value_454 = 454; + value_455 = 455; + value_456 = 456; + value_457 = 457; + value_458 = 458; + value_459 = 459; + value_460 = 460; + value_461 = 461; + value_462 = 462; + value_463 = 463; + value_464 = 464; + value_465 = 465; + value_466 = 466; + value_467 = 467; + value_468 = 468; + value_469 = 469; + value_470 = 470; + value_471 = 471; + value_472 = 472; + value_473 = 473; + value_474 = 474; + value_475 = 475; + value_476 = 476; + value_477 = 477; + value_478 = 478; + value_479 = 479; + value_480 = 480; + value_481 = 481; + value_482 = 482; + value_483 = 483; + value_484 = 484; + value_485 = 485; + value_486 = 486; + value_487 = 487; + value_488 = 488; + value_489 = 489; + value_490 = 490; + value_491 = 491; + value_492 = 492; + value_493 = 493; + value_494 = 494; + value_495 = 495; + value_496 = 496; + value_497 = 497; + value_498 = 498; + value_499 = 499; + value_500 = 500; + value_501 = 501; + value_502 = 502; + value_503 = 503; + value_504 = 504; + value_505 = 505; + value_506 = 506; + value_507 = 507; + } + Value value = 1; + Sentence inner = 2; +} +message Sentence { + repeated Word words = 1; +} \ No newline at end of file diff --git a/src/Parsers/fuzzers/codegen_fuzzer/update.sh b/src/Parsers/fuzzers/codegen_fuzzer/update.sh new file mode 100755 index 00000000000..0982d6d0686 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/update.sh @@ -0,0 +1,30 @@ +#!/bin/bash + + +_main() { + local dict_filename="${1}" + if [[ $# -ne 1 ]]; + then + echo "Usage: $0 "; + exit 1; + fi + + if [[ ! -f $dict_filename ]]; + then + echo "File $dict_filename doesn't exist"; + exit 1 + fi + + cat clickhouse-template.g > clickhouse.g + + while read line; + do + [[ -z "$line" ]] && continue + echo $line | sed -e 's/"\(.*\)"/" \1 ";/g' + done < $dict_filename >> clickhouse.g +} + +_main "$@" + +# Sample run: ./update.sh ../../../../tests/fuzz/ast.dict +# then run `python ./gen.py clickhouse.g out.cpp out.proto` to generate new files with tokens. Rebuild fuzzer From 9ed33612980c59ac38ea63182407071148befd39 Mon Sep 17 00:00:00 2001 From: Pavel Cheremushkin Date: Tue, 12 Oct 2021 03:53:43 +0300 Subject: [PATCH 167/438] removing code generated files, since protobuf generation is now done in CMake --- src/Parsers/fuzzers/codegen_fuzzer/out.cpp | 2189 ------------------ src/Parsers/fuzzers/codegen_fuzzer/out.proto | 519 ----- 2 files changed, 2708 deletions(-) delete mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.cpp delete mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.proto diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.cpp b/src/Parsers/fuzzers/codegen_fuzzer/out.cpp deleted file mode 100644 index effe6e7821b..00000000000 --- a/src/Parsers/fuzzers/codegen_fuzzer/out.cpp +++ /dev/null @@ -1,2189 +0,0 @@ -#include -#include -#include - -#include - -#include "out.pb.h" - -void GenerateWord(const Word&, std::string&, int); - -void GenerateSentence(const Sentence& stc, std::string &s, int depth) { - for (int i = 0; i < stc.words_size(); i++ ) { - GenerateWord(stc.words(i), s, ++depth); - } -} -void GenerateWord(const Word& word, std::string &s, int depth) { - if (depth > 5) return; - - switch (word.value()) { - case 0: { - s += " "; - break; - } - case 1: { - s += " "; - break; - } - case 2: { - s += " "; - break; - } - case 3: { - s += ";"; - break; - } - case 4: { - s += "("; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ")"; - break; - } - case 5: { - s += "("; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ")"; - break; - } - case 6: { - s += "("; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ")"; - break; - } - case 7: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 8: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 9: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - break; - } - case 10: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += ", "; - if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); - break; - } - case 11: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += "]"; - break; - } - case 12: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += "]"; - break; - } - case 13: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += "]"; - break; - } - case 14: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += ", "; - if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); - s += "]"; - break; - } - case 15: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += "("; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ")"; - break; - } - case 16: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += "("; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ")"; - break; - } - case 17: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += "("; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += ")"; - break; - } - case 18: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " as "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 19: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " WHERE "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 20: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " GROUP BY "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 21: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " SORT BY "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 22: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " LIMIT "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 23: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 24: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " ARRAY JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 25: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += " ON "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - break; - } - case 26: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += " USING "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - break; - } - case 27: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " INTO OUTFILE "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 28: { - s += "WITH "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " AS "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 29: { - s += "{"; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ":"; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += "}"; - break; - } - case 30: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ","; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += "]"; - break; - } - case 31: { - s += "[]"; - break; - } - case 32: { - s += " x "; - break; - } - case 33: { - s += "x"; - break; - } - case 34: { - s += " `x` "; - break; - } - case 35: { - s += "`x`"; - break; - } - case 36: { - s += " \"value\" "; - break; - } - case 37: { - s += "\"value\""; - break; - } - case 38: { - s += " 0 "; - break; - } - case 39: { - s += "0"; - break; - } - case 40: { - s += "1"; - break; - } - case 41: { - s += "2"; - break; - } - case 42: { - s += "123123123123123123"; - break; - } - case 43: { - s += "182374019873401982734091873420923123123123123123"; - break; - } - case 44: { - s += "1e-1"; - break; - } - case 45: { - s += "1.1"; - break; - } - case 46: { - s += "\"\""; - break; - } - case 47: { - s += " '../../../../../../../../../etc/passwd' "; - break; - } - case 48: { - s += "/"; - break; - } - case 49: { - s += "="; - break; - } - case 50: { - s += "=="; - break; - } - case 51: { - s += "!="; - break; - } - case 52: { - s += "<>"; - break; - } - case 53: { - s += "<"; - break; - } - case 54: { - s += "<="; - break; - } - case 55: { - s += ">"; - break; - } - case 56: { - s += ">="; - break; - } - case 57: { - s += "<<"; - break; - } - case 58: { - s += "|<<"; - break; - } - case 59: { - s += "&"; - break; - } - case 60: { - s += "|"; - break; - } - case 61: { - s += "||"; - break; - } - case 62: { - s += "<|"; - break; - } - case 63: { - s += "|>"; - break; - } - case 64: { - s += "+"; - break; - } - case 65: { - s += "-"; - break; - } - case 66: { - s += "~"; - break; - } - case 67: { - s += "*"; - break; - } - case 68: { - s += "/"; - break; - } - case 69: { - s += "\\"; - break; - } - case 70: { - s += "%"; - break; - } - case 71: { - s += ""; - break; - } - case 72: { - s += "."; - break; - } - case 73: { - s += ","; - break; - } - case 74: { - s += ","; - break; - } - case 75: { - s += ","; - break; - } - case 76: { - s += ","; - break; - } - case 77: { - s += ","; - break; - } - case 78: { - s += ","; - break; - } - case 79: { - s += "("; - break; - } - case 80: { - s += ")"; - break; - } - case 81: { - s += "("; - break; - } - case 82: { - s += ")"; - break; - } - case 83: { - s += "("; - break; - } - case 84: { - s += ")"; - break; - } - case 85: { - s += "("; - break; - } - case 86: { - s += ")"; - break; - } - case 87: { - s += "("; - break; - } - case 88: { - s += ")"; - break; - } - case 89: { - s += "("; - break; - } - case 90: { - s += ")"; - break; - } - case 91: { - s += "?"; - break; - } - case 92: { - s += ":"; - break; - } - case 93: { - s += "@"; - break; - } - case 94: { - s += "@@"; - break; - } - case 95: { - s += "$"; - break; - } - case 96: { - s += "\""; - break; - } - case 97: { - s += "`"; - break; - } - case 98: { - s += "{"; - break; - } - case 99: { - s += "}"; - break; - } - case 100: { - s += "^"; - break; - } - case 101: { - s += "::"; - break; - } - case 102: { - s += "->"; - break; - } - case 103: { - s += "]"; - break; - } - case 104: { - s += "["; - break; - } - case 105: { - s += " ADD "; - break; - } - case 106: { - s += " ADD COLUMN "; - break; - } - case 107: { - s += " ADD CONSTRAINT "; - break; - } - case 108: { - s += " ADD INDEX "; - break; - } - case 109: { - s += " AFTER "; - break; - } - case 110: { - s += " AggregateFunction "; - break; - } - case 111: { - s += " aggThrow "; - break; - } - case 112: { - s += " ALL "; - break; - } - case 113: { - s += " ALTER LIVE VIEW "; - break; - } - case 114: { - s += " ALTER TABLE "; - break; - } - case 115: { - s += " and "; - break; - } - case 116: { - s += " ANTI "; - break; - } - case 117: { - s += " any "; - break; - } - case 118: { - s += " anyHeavy "; - break; - } - case 119: { - s += " anyLast "; - break; - } - case 120: { - s += " argMax "; - break; - } - case 121: { - s += " argMin "; - break; - } - case 122: { - s += " array "; - break; - } - case 123: { - s += " Array "; - break; - } - case 124: { - s += " arrayAll "; - break; - } - case 125: { - s += " arrayAUC "; - break; - } - case 126: { - s += " arrayCompact "; - break; - } - case 127: { - s += " arrayConcat "; - break; - } - case 128: { - s += " arrayCount "; - break; - } - case 129: { - s += " arrayCumSum "; - break; - } - case 130: { - s += " arrayCumSumNonNegative "; - break; - } - case 131: { - s += " arrayDifference "; - break; - } - case 132: { - s += " arrayDistinct "; - break; - } - case 133: { - s += " arrayElement "; - break; - } - case 134: { - s += " arrayEnumerate "; - break; - } - case 135: { - s += " arrayEnumerateDense "; - break; - } - case 136: { - s += " arrayEnumerateDenseRanked "; - break; - } - case 137: { - s += " arrayEnumerateUniq "; - break; - } - case 138: { - s += " arrayEnumerateUniqRanked "; - break; - } - case 139: { - s += " arrayExists "; - break; - } - case 140: { - s += " arrayFill "; - break; - } - case 141: { - s += " arrayFilter "; - break; - } - case 142: { - s += " arrayFirst "; - break; - } - case 143: { - s += " arrayFirstIndex "; - break; - } - case 144: { - s += " arrayFlatten "; - break; - } - case 145: { - s += " arrayIntersect "; - break; - } - case 146: { - s += " arrayJoin "; - break; - } - case 147: { - s += " ARRAY JOIN "; - break; - } - case 148: { - s += " arrayMap "; - break; - } - case 149: { - s += " arrayPopBack "; - break; - } - case 150: { - s += " arrayPopFront "; - break; - } - case 151: { - s += " arrayPushBack "; - break; - } - case 152: { - s += " arrayPushFront "; - break; - } - case 153: { - s += " arrayReduce "; - break; - } - case 154: { - s += " arrayReduceInRanges "; - break; - } - case 155: { - s += " arrayResize "; - break; - } - case 156: { - s += " arrayReverse "; - break; - } - case 157: { - s += " arrayReverseFill "; - break; - } - case 158: { - s += " arrayReverseSort "; - break; - } - case 159: { - s += " arrayReverseSplit "; - break; - } - case 160: { - s += " arraySlice "; - break; - } - case 161: { - s += " arraySort "; - break; - } - case 162: { - s += " arraySplit "; - break; - } - case 163: { - s += " arraySum "; - break; - } - case 164: { - s += " arrayUniq "; - break; - } - case 165: { - s += " arrayWithConstant "; - break; - } - case 166: { - s += " arrayZip "; - break; - } - case 167: { - s += " AS "; - break; - } - case 168: { - s += " ASC "; - break; - } - case 169: { - s += " ASCENDING "; - break; - } - case 170: { - s += " ASOF "; - break; - } - case 171: { - s += " AST "; - break; - } - case 172: { - s += " ATTACH "; - break; - } - case 173: { - s += " ATTACH PART "; - break; - } - case 174: { - s += " ATTACH PARTITION "; - break; - } - case 175: { - s += " avg "; - break; - } - case 176: { - s += " avgWeighted "; - break; - } - case 177: { - s += " basename "; - break; - } - case 178: { - s += " BETWEEN "; - break; - } - case 179: { - s += " BOTH "; - break; - } - case 180: { - s += " boundingRatio "; - break; - } - case 181: { - s += " BY "; - break; - } - case 182: { - s += " CAST "; - break; - } - case 183: { - s += " categoricalInformationValue "; - break; - } - case 184: { - s += " CHECK "; - break; - } - case 185: { - s += " CHECK TABLE "; - break; - } - case 186: { - s += " CLEAR COLUMN "; - break; - } - case 187: { - s += " CLEAR INDEX "; - break; - } - case 188: { - s += " COLLATE "; - break; - } - case 189: { - s += " COLUMNS "; - break; - } - case 190: { - s += " COMMENT COLUMN "; - break; - } - case 191: { - s += " CONSTRAINT "; - break; - } - case 192: { - s += " corr "; - break; - } - case 193: { - s += " corrStable "; - break; - } - case 194: { - s += " count "; - break; - } - case 195: { - s += " countEqual "; - break; - } - case 196: { - s += " covarPop "; - break; - } - case 197: { - s += " covarPopStable "; - break; - } - case 198: { - s += " covarSamp "; - break; - } - case 199: { - s += " covarSampStable "; - break; - } - case 200: { - s += " CREATE "; - break; - } - case 201: { - s += " CROSS "; - break; - } - case 202: { - s += " CUBE "; - break; - } - case 203: { - s += " cutFragment "; - break; - } - case 204: { - s += " cutQueryString "; - break; - } - case 205: { - s += " cutQueryStringAndFragment "; - break; - } - case 206: { - s += " cutToFirstSignificantSubdomain "; - break; - } - case 207: { - s += " cutURLParameter "; - break; - } - case 208: { - s += " cutWWW "; - break; - } - case 209: { - s += " D "; - break; - } - case 210: { - s += " DATABASE "; - break; - } - case 211: { - s += " DATABASES "; - break; - } - case 212: { - s += " Date "; - break; - } - case 213: { - s += " DATE "; - break; - } - case 214: { - s += " DATE_ADD "; - break; - } - case 215: { - s += " DATEADD "; - break; - } - case 216: { - s += " DATE_DIFF "; - break; - } - case 217: { - s += " DATEDIFF "; - break; - } - case 218: { - s += " DATE_SUB "; - break; - } - case 219: { - s += " DATESUB "; - break; - } - case 220: { - s += " DateTime "; - break; - } - case 221: { - s += " DateTime64 "; - break; - } - case 222: { - s += " DAY "; - break; - } - case 223: { - s += " DD "; - break; - } - case 224: { - s += " Decimal "; - break; - } - case 225: { - s += " Decimal128 "; - break; - } - case 226: { - s += " Decimal32 "; - break; - } - case 227: { - s += " Decimal64 "; - break; - } - case 228: { - s += " decodeURLComponent "; - break; - } - case 229: { - s += " DEDUPLICATE "; - break; - } - case 230: { - s += " DELETE "; - break; - } - case 231: { - s += " DELETE WHERE "; - break; - } - case 232: { - s += " DESC "; - break; - } - case 233: { - s += " DESCENDING "; - break; - } - case 234: { - s += " DESCRIBE "; - break; - } - case 235: { - s += " DETACH "; - break; - } - case 236: { - s += " DETACH PARTITION "; - break; - } - case 237: { - s += " DICTIONARIES "; - break; - } - case 238: { - s += " DICTIONARY "; - break; - } - case 239: { - s += " DISTINCT "; - break; - } - case 240: { - s += " domain "; - break; - } - case 241: { - s += " domainWithoutWWW "; - break; - } - case 242: { - s += " DROP "; - break; - } - case 243: { - s += " DROP COLUMN "; - break; - } - case 244: { - s += " DROP CONSTRAINT "; - break; - } - case 245: { - s += " DROP DETACHED PART "; - break; - } - case 246: { - s += " DROP DETACHED PARTITION "; - break; - } - case 247: { - s += " DROP INDEX "; - break; - } - case 248: { - s += " DROP PARTITION "; - break; - } - case 249: { - s += " emptyArrayToSingle "; - break; - } - case 250: { - s += " ENGINE "; - break; - } - case 251: { - s += " entropy "; - break; - } - case 252: { - s += " Enum "; - break; - } - case 253: { - s += " Enum16 "; - break; - } - case 254: { - s += " Enum8 "; - break; - } - case 255: { - s += " EVENTS "; - break; - } - case 256: { - s += " EXCHANGE TABLES "; - break; - } - case 257: { - s += " EXISTS "; - break; - } - case 258: { - s += " EXTRACT "; - break; - } - case 259: { - s += " extractURLParameter "; - break; - } - case 260: { - s += " extractURLParameterNames "; - break; - } - case 261: { - s += " extractURLParameters "; - break; - } - case 262: { - s += " FETCH PARTITION "; - break; - } - case 263: { - s += " FETCH PART "; - break; - } - case 264: { - s += " FINAL "; - break; - } - case 265: { - s += " FIRST "; - break; - } - case 266: { - s += " firstSignificantSubdomain "; - break; - } - case 267: { - s += " FixedString "; - break; - } - case 268: { - s += " Float32 "; - break; - } - case 269: { - s += " Float64 "; - break; - } - case 270: { - s += " FOR "; - break; - } - case 271: { - s += " ForEach "; - break; - } - case 272: { - s += " FORMAT "; - break; - } - case 273: { - s += " fragment "; - break; - } - case 274: { - s += " FREEZE "; - break; - } - case 275: { - s += " FROM "; - break; - } - case 276: { - s += " FULL "; - break; - } - case 277: { - s += " FUNCTION "; - break; - } - case 278: { - s += " __getScalar "; - break; - } - case 279: { - s += " GLOBAL "; - break; - } - case 280: { - s += " GRANULARITY "; - break; - } - case 281: { - s += " groupArray "; - break; - } - case 282: { - s += " groupArrayInsertAt "; - break; - } - case 283: { - s += " groupArrayMovingAvg "; - break; - } - case 284: { - s += " groupArrayMovingSum "; - break; - } - case 285: { - s += " groupArraySample "; - break; - } - case 286: { - s += " groupBitAnd "; - break; - } - case 287: { - s += " groupBitmap "; - break; - } - case 288: { - s += " groupBitmapAnd "; - break; - } - case 289: { - s += " groupBitmapOr "; - break; - } - case 290: { - s += " groupBitmapXor "; - break; - } - case 291: { - s += " groupBitOr "; - break; - } - case 292: { - s += " groupBitXor "; - break; - } - case 293: { - s += " GROUP BY "; - break; - } - case 294: { - s += " groupUniqArray "; - break; - } - case 295: { - s += " has "; - break; - } - case 296: { - s += " hasAll "; - break; - } - case 297: { - s += " hasAny "; - break; - } - case 298: { - s += " HAVING "; - break; - } - case 299: { - s += " HH "; - break; - } - case 300: { - s += " histogram "; - break; - } - case 301: { - s += " HOUR "; - break; - } - case 302: { - s += " ID "; - break; - } - case 303: { - s += " if "; - break; - } - case 304: { - s += " IF EXISTS "; - break; - } - case 305: { - s += " IF NOT EXISTS "; - break; - } - case 306: { - s += " IN "; - break; - } - case 307: { - s += " INDEX "; - break; - } - case 308: { - s += " indexOf "; - break; - } - case 309: { - s += " INNER "; - break; - } - case 310: { - s += " IN PARTITION "; - break; - } - case 311: { - s += " INSERT INTO "; - break; - } - case 312: { - s += " Int16 "; - break; - } - case 313: { - s += " Int32 "; - break; - } - case 314: { - s += " Int64 "; - break; - } - case 315: { - s += " Int8 "; - break; - } - case 316: { - s += " INTERVAL "; - break; - } - case 317: { - s += " IntervalDay "; - break; - } - case 318: { - s += " IntervalHour "; - break; - } - case 319: { - s += " IntervalMinute "; - break; - } - case 320: { - s += " IntervalMonth "; - break; - } - case 321: { - s += " IntervalQuarter "; - break; - } - case 322: { - s += " IntervalSecond "; - break; - } - case 323: { - s += " IntervalWeek "; - break; - } - case 324: { - s += " IntervalYear "; - break; - } - case 325: { - s += " INTO OUTFILE "; - break; - } - case 326: { - s += " JOIN "; - break; - } - case 327: { - s += " kurtPop "; - break; - } - case 328: { - s += " kurtSamp "; - break; - } - case 329: { - s += " LAST "; - break; - } - case 330: { - s += " LAYOUT "; - break; - } - case 331: { - s += " LEADING "; - break; - } - case 332: { - s += " LEFT "; - break; - } - case 333: { - s += " LEFT ARRAY JOIN "; - break; - } - case 334: { - s += " length "; - break; - } - case 335: { - s += " LIFETIME "; - break; - } - case 336: { - s += " LIKE "; - break; - } - case 337: { - s += " LIMIT "; - break; - } - case 338: { - s += " LIVE "; - break; - } - case 339: { - s += " LOCAL "; - break; - } - case 340: { - s += " LowCardinality "; - break; - } - case 341: { - s += " LTRIM "; - break; - } - case 342: { - s += " M "; - break; - } - case 343: { - s += " MATERIALIZED "; - break; - } - case 344: { - s += " MATERIALIZE INDEX "; - break; - } - case 345: { - s += " MATERIALIZE TTL "; - break; - } - case 346: { - s += " max "; - break; - } - case 347: { - s += " maxIntersections "; - break; - } - case 348: { - s += " maxIntersectionsPosition "; - break; - } - case 349: { - s += " Merge "; - break; - } - case 350: { - s += " MI "; - break; - } - case 351: { - s += " min "; - break; - } - case 352: { - s += " MINUTE "; - break; - } - case 353: { - s += " MM "; - break; - } - case 354: { - s += " MODIFY "; - break; - } - case 355: { - s += " MODIFY COLUMN "; - break; - } - case 356: { - s += " MODIFY ORDER BY "; - break; - } - case 357: { - s += " MODIFY QUERY "; - break; - } - case 358: { - s += " MODIFY SETTING "; - break; - } - case 359: { - s += " MODIFY TTL "; - break; - } - case 360: { - s += " MONTH "; - break; - } - case 361: { - s += " MOVE PART "; - break; - } - case 362: { - s += " MOVE PARTITION "; - break; - } - case 363: { - s += " movingXXX "; - break; - } - case 364: { - s += " N "; - break; - } - case 365: { - s += " NAME "; - break; - } - case 366: { - s += " Nested "; - break; - } - case 367: { - s += " NO DELAY "; - break; - } - case 368: { - s += " NONE "; - break; - } - case 369: { - s += " not "; - break; - } - case 370: { - s += " nothing "; - break; - } - case 371: { - s += " Nothing "; - break; - } - case 372: { - s += " Null "; - break; - } - case 373: { - s += " Nullable "; - break; - } - case 374: { - s += " NULLS "; - break; - } - case 375: { - s += " OFFSET "; - break; - } - case 376: { - s += " ON "; - break; - } - case 377: { - s += " ONLY "; - break; - } - case 378: { - s += " OPTIMIZE TABLE "; - break; - } - case 379: { - s += " ORDER BY "; - break; - } - case 380: { - s += " OR REPLACE "; - break; - } - case 381: { - s += " OUTER "; - break; - } - case 382: { - s += " PARTITION "; - break; - } - case 383: { - s += " PARTITION BY "; - break; - } - case 384: { - s += " path "; - break; - } - case 385: { - s += " pathFull "; - break; - } - case 386: { - s += " POPULATE "; - break; - } - case 387: { - s += " PREWHERE "; - break; - } - case 388: { - s += " PRIMARY KEY "; - break; - } - case 389: { - s += " protocol "; - break; - } - case 390: { - s += " Q "; - break; - } - case 391: { - s += " QQ "; - break; - } - case 392: { - s += " QUARTER "; - break; - } - case 393: { - s += " queryString "; - break; - } - case 394: { - s += " queryStringAndFragment "; - break; - } - case 395: { - s += " range "; - break; - } - case 396: { - s += " REFRESH "; - break; - } - case 397: { - s += " RENAME COLUMN "; - break; - } - case 398: { - s += " RENAME TABLE "; - break; - } - case 399: { - s += " REPLACE PARTITION "; - break; - } - case 400: { - s += " Resample "; - break; - } - case 401: { - s += " RESUME "; - break; - } - case 402: { - s += " retention "; - break; - } - case 403: { - s += " RIGHT "; - break; - } - case 404: { - s += " ROLLUP "; - break; - } - case 405: { - s += " RTRIM "; - break; - } - case 406: { - s += " S "; - break; - } - case 407: { - s += " SAMPLE "; - break; - } - case 408: { - s += " SAMPLE BY "; - break; - } - case 409: { - s += " SECOND "; - break; - } - case 410: { - s += " SELECT "; - break; - } - case 411: { - s += " SEMI "; - break; - } - case 412: { - s += " sequenceCount "; - break; - } - case 413: { - s += " sequenceMatch "; - break; - } - case 414: { - s += " SET "; - break; - } - case 415: { - s += " SETTINGS "; - break; - } - case 416: { - s += " SHOW "; - break; - } - case 417: { - s += " SHOW PROCESSLIST "; - break; - } - case 418: { - s += " simpleLinearRegression "; - break; - } - case 419: { - s += " skewPop "; - break; - } - case 420: { - s += " skewSamp "; - break; - } - case 421: { - s += " SOURCE "; - break; - } - case 422: { - s += " SQL_TSI_DAY "; - break; - } - case 423: { - s += " SQL_TSI_HOUR "; - break; - } - case 424: { - s += " SQL_TSI_MINUTE "; - break; - } - case 425: { - s += " SQL_TSI_MONTH "; - break; - } - case 426: { - s += " SQL_TSI_QUARTER "; - break; - } - case 427: { - s += " SQL_TSI_SECOND "; - break; - } - case 428: { - s += " SQL_TSI_WEEK "; - break; - } - case 429: { - s += " SQL_TSI_YEAR "; - break; - } - case 430: { - s += " SS "; - break; - } - case 431: { - s += " State "; - break; - } - case 432: { - s += " stddevPop "; - break; - } - case 433: { - s += " stddevPopStable "; - break; - } - case 434: { - s += " stddevSamp "; - break; - } - case 435: { - s += " stddevSampStable "; - break; - } - case 436: { - s += " STEP "; - break; - } - case 437: { - s += " stochasticLinearRegression "; - break; - } - case 438: { - s += " stochasticLogisticRegression "; - break; - } - case 439: { - s += " String "; - break; - } - case 440: { - s += " SUBSTRING "; - break; - } - case 441: { - s += " sum "; - break; - } - case 442: { - s += " sumKahan "; - break; - } - case 443: { - s += " sumMap "; - break; - } - case 444: { - s += " sumMapFiltered "; - break; - } - case 445: { - s += " sumMapFilteredWithOverflow "; - break; - } - case 446: { - s += " sumMapWithOverflow "; - break; - } - case 447: { - s += " sumWithOverflow "; - break; - } - case 448: { - s += " SUSPEND "; - break; - } - case 449: { - s += " TABLE "; - break; - } - case 450: { - s += " TABLES "; - break; - } - case 451: { - s += " TEMPORARY "; - break; - } - case 452: { - s += " TIMESTAMP "; - break; - } - case 453: { - s += " TIMESTAMP_ADD "; - break; - } - case 454: { - s += " TIMESTAMPADD "; - break; - } - case 455: { - s += " TIMESTAMP_DIFF "; - break; - } - case 456: { - s += " TIMESTAMPDIFF "; - break; - } - case 457: { - s += " TIMESTAMP_SUB "; - break; - } - case 458: { - s += " TIMESTAMPSUB "; - break; - } - case 459: { - s += " TO "; - break; - } - case 460: { - s += " TO DISK "; - break; - } - case 461: { - s += " TOP "; - break; - } - case 462: { - s += " topK "; - break; - } - case 463: { - s += " topKWeighted "; - break; - } - case 464: { - s += " topLevelDomain "; - break; - } - case 465: { - s += " TO TABLE "; - break; - } - case 466: { - s += " TOTALS "; - break; - } - case 467: { - s += " TO VOLUME "; - break; - } - case 468: { - s += " TRAILING "; - break; - } - case 469: { - s += " TRIM "; - break; - } - case 470: { - s += " TRUNCATE "; - break; - } - case 471: { - s += " TTL "; - break; - } - case 472: { - s += " Tuple "; - break; - } - case 473: { - s += " TYPE "; - break; - } - case 474: { - s += " UInt16 "; - break; - } - case 475: { - s += " UInt32 "; - break; - } - case 476: { - s += " UInt64 "; - break; - } - case 477: { - s += " UInt8 "; - break; - } - case 478: { - s += " uniq "; - break; - } - case 479: { - s += " uniqCombined "; - break; - } - case 480: { - s += " uniqCombined64 "; - break; - } - case 481: { - s += " uniqExact "; - break; - } - case 482: { - s += " uniqHLL12 "; - break; - } - case 483: { - s += " uniqUpTo "; - break; - } - case 484: { - s += " UPDATE "; - break; - } - case 485: { - s += " URLHierarchy "; - break; - } - case 486: { - s += " URLPathHierarchy "; - break; - } - case 487: { - s += " USE "; - break; - } - case 488: { - s += " USING "; - break; - } - case 489: { - s += " UUID "; - break; - } - case 490: { - s += " VALUES "; - break; - } - case 491: { - s += " varPop "; - break; - } - case 492: { - s += " varPopStable "; - break; - } - case 493: { - s += " varSamp "; - break; - } - case 494: { - s += " varSampStable "; - break; - } - case 495: { - s += " VIEW "; - break; - } - case 496: { - s += " WATCH "; - break; - } - case 497: { - s += " WEEK "; - break; - } - case 498: { - s += " WHERE "; - break; - } - case 499: { - s += " windowFunnel "; - break; - } - case 500: { - s += " WITH "; - break; - } - case 501: { - s += " WITH FILL "; - break; - } - case 502: { - s += " WITH TIES "; - break; - } - case 503: { - s += " WK "; - break; - } - case 504: { - s += " WW "; - break; - } - case 505: { - s += " YEAR "; - break; - } - case 506: { - s += " YY "; - break; - } - case 507: { - s += " YYYY "; - break; - } - default: break; - } -} diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.proto b/src/Parsers/fuzzers/codegen_fuzzer/out.proto deleted file mode 100644 index 6c8cefce9a3..00000000000 --- a/src/Parsers/fuzzers/codegen_fuzzer/out.proto +++ /dev/null @@ -1,519 +0,0 @@ -syntax = "proto3"; - -message Word { - enum Value { - value_0 = 0; - value_1 = 1; - value_2 = 2; - value_3 = 3; - value_4 = 4; - value_5 = 5; - value_6 = 6; - value_7 = 7; - value_8 = 8; - value_9 = 9; - value_10 = 10; - value_11 = 11; - value_12 = 12; - value_13 = 13; - value_14 = 14; - value_15 = 15; - value_16 = 16; - value_17 = 17; - value_18 = 18; - value_19 = 19; - value_20 = 20; - value_21 = 21; - value_22 = 22; - value_23 = 23; - value_24 = 24; - value_25 = 25; - value_26 = 26; - value_27 = 27; - value_28 = 28; - value_29 = 29; - value_30 = 30; - value_31 = 31; - value_32 = 32; - value_33 = 33; - value_34 = 34; - value_35 = 35; - value_36 = 36; - value_37 = 37; - value_38 = 38; - value_39 = 39; - value_40 = 40; - value_41 = 41; - value_42 = 42; - value_43 = 43; - value_44 = 44; - value_45 = 45; - value_46 = 46; - value_47 = 47; - value_48 = 48; - value_49 = 49; - value_50 = 50; - value_51 = 51; - value_52 = 52; - value_53 = 53; - value_54 = 54; - value_55 = 55; - value_56 = 56; - value_57 = 57; - value_58 = 58; - value_59 = 59; - value_60 = 60; - value_61 = 61; - value_62 = 62; - value_63 = 63; - value_64 = 64; - value_65 = 65; - value_66 = 66; - value_67 = 67; - value_68 = 68; - value_69 = 69; - value_70 = 70; - value_71 = 71; - value_72 = 72; - value_73 = 73; - value_74 = 74; - value_75 = 75; - value_76 = 76; - value_77 = 77; - value_78 = 78; - value_79 = 79; - value_80 = 80; - value_81 = 81; - value_82 = 82; - value_83 = 83; - value_84 = 84; - value_85 = 85; - value_86 = 86; - value_87 = 87; - value_88 = 88; - value_89 = 89; - value_90 = 90; - value_91 = 91; - value_92 = 92; - value_93 = 93; - value_94 = 94; - value_95 = 95; - value_96 = 96; - value_97 = 97; - value_98 = 98; - value_99 = 99; - value_100 = 100; - value_101 = 101; - value_102 = 102; - value_103 = 103; - value_104 = 104; - value_105 = 105; - value_106 = 106; - value_107 = 107; - value_108 = 108; - value_109 = 109; - value_110 = 110; - value_111 = 111; - value_112 = 112; - value_113 = 113; - value_114 = 114; - value_115 = 115; - value_116 = 116; - value_117 = 117; - value_118 = 118; - value_119 = 119; - value_120 = 120; - value_121 = 121; - value_122 = 122; - value_123 = 123; - value_124 = 124; - value_125 = 125; - value_126 = 126; - value_127 = 127; - value_128 = 128; - value_129 = 129; - value_130 = 130; - value_131 = 131; - value_132 = 132; - value_133 = 133; - value_134 = 134; - value_135 = 135; - value_136 = 136; - value_137 = 137; - value_138 = 138; - value_139 = 139; - value_140 = 140; - value_141 = 141; - value_142 = 142; - value_143 = 143; - value_144 = 144; - value_145 = 145; - value_146 = 146; - value_147 = 147; - value_148 = 148; - value_149 = 149; - value_150 = 150; - value_151 = 151; - value_152 = 152; - value_153 = 153; - value_154 = 154; - value_155 = 155; - value_156 = 156; - value_157 = 157; - value_158 = 158; - value_159 = 159; - value_160 = 160; - value_161 = 161; - value_162 = 162; - value_163 = 163; - value_164 = 164; - value_165 = 165; - value_166 = 166; - value_167 = 167; - value_168 = 168; - value_169 = 169; - value_170 = 170; - value_171 = 171; - value_172 = 172; - value_173 = 173; - value_174 = 174; - value_175 = 175; - value_176 = 176; - value_177 = 177; - value_178 = 178; - value_179 = 179; - value_180 = 180; - value_181 = 181; - value_182 = 182; - value_183 = 183; - value_184 = 184; - value_185 = 185; - value_186 = 186; - value_187 = 187; - value_188 = 188; - value_189 = 189; - value_190 = 190; - value_191 = 191; - value_192 = 192; - value_193 = 193; - value_194 = 194; - value_195 = 195; - value_196 = 196; - value_197 = 197; - value_198 = 198; - value_199 = 199; - value_200 = 200; - value_201 = 201; - value_202 = 202; - value_203 = 203; - value_204 = 204; - value_205 = 205; - value_206 = 206; - value_207 = 207; - value_208 = 208; - value_209 = 209; - value_210 = 210; - value_211 = 211; - value_212 = 212; - value_213 = 213; - value_214 = 214; - value_215 = 215; - value_216 = 216; - value_217 = 217; - value_218 = 218; - value_219 = 219; - value_220 = 220; - value_221 = 221; - value_222 = 222; - value_223 = 223; - value_224 = 224; - value_225 = 225; - value_226 = 226; - value_227 = 227; - value_228 = 228; - value_229 = 229; - value_230 = 230; - value_231 = 231; - value_232 = 232; - value_233 = 233; - value_234 = 234; - value_235 = 235; - value_236 = 236; - value_237 = 237; - value_238 = 238; - value_239 = 239; - value_240 = 240; - value_241 = 241; - value_242 = 242; - value_243 = 243; - value_244 = 244; - value_245 = 245; - value_246 = 246; - value_247 = 247; - value_248 = 248; - value_249 = 249; - value_250 = 250; - value_251 = 251; - value_252 = 252; - value_253 = 253; - value_254 = 254; - value_255 = 255; - value_256 = 256; - value_257 = 257; - value_258 = 258; - value_259 = 259; - value_260 = 260; - value_261 = 261; - value_262 = 262; - value_263 = 263; - value_264 = 264; - value_265 = 265; - value_266 = 266; - value_267 = 267; - value_268 = 268; - value_269 = 269; - value_270 = 270; - value_271 = 271; - value_272 = 272; - value_273 = 273; - value_274 = 274; - value_275 = 275; - value_276 = 276; - value_277 = 277; - value_278 = 278; - value_279 = 279; - value_280 = 280; - value_281 = 281; - value_282 = 282; - value_283 = 283; - value_284 = 284; - value_285 = 285; - value_286 = 286; - value_287 = 287; - value_288 = 288; - value_289 = 289; - value_290 = 290; - value_291 = 291; - value_292 = 292; - value_293 = 293; - value_294 = 294; - value_295 = 295; - value_296 = 296; - value_297 = 297; - value_298 = 298; - value_299 = 299; - value_300 = 300; - value_301 = 301; - value_302 = 302; - value_303 = 303; - value_304 = 304; - value_305 = 305; - value_306 = 306; - value_307 = 307; - value_308 = 308; - value_309 = 309; - value_310 = 310; - value_311 = 311; - value_312 = 312; - value_313 = 313; - value_314 = 314; - value_315 = 315; - value_316 = 316; - value_317 = 317; - value_318 = 318; - value_319 = 319; - value_320 = 320; - value_321 = 321; - value_322 = 322; - value_323 = 323; - value_324 = 324; - value_325 = 325; - value_326 = 326; - value_327 = 327; - value_328 = 328; - value_329 = 329; - value_330 = 330; - value_331 = 331; - value_332 = 332; - value_333 = 333; - value_334 = 334; - value_335 = 335; - value_336 = 336; - value_337 = 337; - value_338 = 338; - value_339 = 339; - value_340 = 340; - value_341 = 341; - value_342 = 342; - value_343 = 343; - value_344 = 344; - value_345 = 345; - value_346 = 346; - value_347 = 347; - value_348 = 348; - value_349 = 349; - value_350 = 350; - value_351 = 351; - value_352 = 352; - value_353 = 353; - value_354 = 354; - value_355 = 355; - value_356 = 356; - value_357 = 357; - value_358 = 358; - value_359 = 359; - value_360 = 360; - value_361 = 361; - value_362 = 362; - value_363 = 363; - value_364 = 364; - value_365 = 365; - value_366 = 366; - value_367 = 367; - value_368 = 368; - value_369 = 369; - value_370 = 370; - value_371 = 371; - value_372 = 372; - value_373 = 373; - value_374 = 374; - value_375 = 375; - value_376 = 376; - value_377 = 377; - value_378 = 378; - value_379 = 379; - value_380 = 380; - value_381 = 381; - value_382 = 382; - value_383 = 383; - value_384 = 384; - value_385 = 385; - value_386 = 386; - value_387 = 387; - value_388 = 388; - value_389 = 389; - value_390 = 390; - value_391 = 391; - value_392 = 392; - value_393 = 393; - value_394 = 394; - value_395 = 395; - value_396 = 396; - value_397 = 397; - value_398 = 398; - value_399 = 399; - value_400 = 400; - value_401 = 401; - value_402 = 402; - value_403 = 403; - value_404 = 404; - value_405 = 405; - value_406 = 406; - value_407 = 407; - value_408 = 408; - value_409 = 409; - value_410 = 410; - value_411 = 411; - value_412 = 412; - value_413 = 413; - value_414 = 414; - value_415 = 415; - value_416 = 416; - value_417 = 417; - value_418 = 418; - value_419 = 419; - value_420 = 420; - value_421 = 421; - value_422 = 422; - value_423 = 423; - value_424 = 424; - value_425 = 425; - value_426 = 426; - value_427 = 427; - value_428 = 428; - value_429 = 429; - value_430 = 430; - value_431 = 431; - value_432 = 432; - value_433 = 433; - value_434 = 434; - value_435 = 435; - value_436 = 436; - value_437 = 437; - value_438 = 438; - value_439 = 439; - value_440 = 440; - value_441 = 441; - value_442 = 442; - value_443 = 443; - value_444 = 444; - value_445 = 445; - value_446 = 446; - value_447 = 447; - value_448 = 448; - value_449 = 449; - value_450 = 450; - value_451 = 451; - value_452 = 452; - value_453 = 453; - value_454 = 454; - value_455 = 455; - value_456 = 456; - value_457 = 457; - value_458 = 458; - value_459 = 459; - value_460 = 460; - value_461 = 461; - value_462 = 462; - value_463 = 463; - value_464 = 464; - value_465 = 465; - value_466 = 466; - value_467 = 467; - value_468 = 468; - value_469 = 469; - value_470 = 470; - value_471 = 471; - value_472 = 472; - value_473 = 473; - value_474 = 474; - value_475 = 475; - value_476 = 476; - value_477 = 477; - value_478 = 478; - value_479 = 479; - value_480 = 480; - value_481 = 481; - value_482 = 482; - value_483 = 483; - value_484 = 484; - value_485 = 485; - value_486 = 486; - value_487 = 487; - value_488 = 488; - value_489 = 489; - value_490 = 490; - value_491 = 491; - value_492 = 492; - value_493 = 493; - value_494 = 494; - value_495 = 495; - value_496 = 496; - value_497 = 497; - value_498 = 498; - value_499 = 499; - value_500 = 500; - value_501 = 501; - value_502 = 502; - value_503 = 503; - value_504 = 504; - value_505 = 505; - value_506 = 506; - value_507 = 507; - } - Value value = 1; - Sentence inner = 2; -} -message Sentence { - repeated Word words = 1; -} \ No newline at end of file From be28e94471f4342e7b73b01b15b56b7b98392f45 Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Tue, 12 Oct 2021 06:30:56 +0530 Subject: [PATCH 168/438] add compile flag -force-vector-width=16 due to llvm autovectorization bug with avx512 --- cmake/cpu_features.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 4ea9465be98..e77f330fbc0 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -129,8 +129,8 @@ else () if (HAVE_AVX2 AND ENABLE_AVX2) set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () - - set (TEST_FLAG "-mavx512f -mavx512bw") +#we have to add -force-vector-width=16 due to llvm autovectorization bug with avx512 + set (TEST_FLAG "-mavx512f -mavx512bw -force-vector-width=16") set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" #include From 3be83a26d00dcfa4712627fc57a08169919a34d6 Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Tue, 12 Oct 2021 08:24:47 +0530 Subject: [PATCH 169/438] fixed Typos and Styles issues --- src/CMakeLists.txt | 2 +- src/Columns/ColumnFixedString.cpp | 8 ++++---- src/Columns/ColumnVector.cpp | 7 +++---- src/Columns/ColumnsCommon.cpp | 18 +++++++++--------- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 45bb1a21d59..d5977b31159 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -284,7 +284,7 @@ target_link_libraries (clickhouse_common_io dragonbox_to_chars ) -# Use X86 AVX2/AVX512 instructions to accelerate filter opertions +# Use X86 AVX2/AVX512 instructions to accelerate filter operations set_source_files_properties( Columns/ColumnFixedString.cpp Columns/ColumnsCommon.cpp diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 4b31677d37e..65f56495445 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -238,13 +238,13 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result */ #if defined(__AVX512F__) && defined(__AVX512BW__) static constexpr size_t SIMD_BYTES = 64; - const __m512i zero64 = _mm512_setzero_epi32(); + const __m512i zero64 = _mm512_setzero_epi32(); const UInt8 * filt_end_avx512 = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES; const size_t chars_per_simd_elements = SIMD_BYTES * n; while (filt_pos < filt_end_avx512) - { - uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero64, _MM_CMPINT_GT); + { + uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero64, _MM_CMPINT_GT); if (0xFFFFFFFFFFFFFFFF == mask) { @@ -263,7 +263,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result mask = _blsr_u64(mask); #else mask = mask & (mask-1); - #endif + #endif } } data_pos += chars_per_simd_elements; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 37f60e9f2b9..78922841e37 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -310,7 +310,6 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + size; const T * data_pos = data.data(); - /** A slightly more optimized version. * Based on the assumption that often pieces of consecutive values * completely pass or do not pass the filter. @@ -318,11 +317,11 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s */ #if defined(__AVX512F__) && defined(__AVX512BW__) static constexpr size_t SIMD_BYTES = 64; - const __m512i zero64 = _mm512_setzero_epi32(); + const __m512i zero64 = _mm512_setzero_epi32(); const UInt8 * filt_end_avx512 = filt_pos + size / SIMD_BYTES * SIMD_BYTES; while (filt_pos < filt_end_avx512) - { + { UInt64 mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero64, _MM_CMPINT_GT); if (0xFFFFFFFFFFFFFFFF == mask) @@ -339,7 +338,7 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s mask = _blsr_u64(mask); #else mask = mask & (mask-1); - #endif + #endif } } diff --git a/src/Columns/ColumnsCommon.cpp b/src/Columns/ColumnsCommon.cpp index 36c292b4196..d8e0cf74669 100644 --- a/src/Columns/ColumnsCommon.cpp +++ b/src/Columns/ColumnsCommon.cpp @@ -235,8 +235,8 @@ namespace const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; while (filt_pos < filt_end_aligned) - { - uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero_vec, _MM_CMPINT_GT); + { + uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero_vec, _MM_CMPINT_GT); if (mask == 0xffffffffffffffff) { @@ -258,18 +258,18 @@ namespace while (mask) { size_t index = __builtin_ctzll(mask); - copy_array(offsets_pos + index); + copy_array(offsets_pos + index); #ifdef __BMI__ mask = _blsr_u64(mask); #else mask = mask & (mask-1); - #endif + #endif } } filt_pos += SIMD_BYTES; offsets_pos += SIMD_BYTES; - } + } #elif defined(__AVX2__) const __m256i zero_vec = _mm256_setzero_si256(); static constexpr size_t SIMD_BYTES = 32; @@ -277,7 +277,7 @@ namespace while (filt_pos < filt_end_aligned) { - uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero_vec)); + uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero_vec)); if (mask == 0xffffffff) { @@ -299,18 +299,18 @@ namespace while (mask) { size_t index = __builtin_ctz(mask); - copy_array(offsets_pos + index); + copy_array(offsets_pos + index); #ifdef __BMI__ mask = _blsr_u32(mask); #else mask = mask & (mask-1); - #endif + #endif } } filt_pos += SIMD_BYTES; offsets_pos += SIMD_BYTES; - } + } #elif defined(__SSE2__) const __m128i zero_vec = _mm_setzero_si128(); static constexpr size_t SIMD_BYTES = 16; From 8f349907fb2fad802d9ceeb9cab7147d71fb8deb Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Tue, 12 Oct 2021 09:21:54 +0530 Subject: [PATCH 170/438] Fixed trailing whitespaces issue --- src/Columns/ColumnFixedString.cpp | 8 ++++---- src/Columns/ColumnVector.cpp | 6 +++--- src/Columns/ColumnsCommon.cpp | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 65f56495445..a84db5701a7 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -235,7 +235,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result * Based on the assumption that often pieces of consecutive values * completely pass or do not pass the filter. * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. - */ + */ #if defined(__AVX512F__) && defined(__AVX512BW__) static constexpr size_t SIMD_BYTES = 64; const __m512i zero64 = _mm512_setzero_epi32(); @@ -271,13 +271,13 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result } #elif defined(__AVX2__) static constexpr size_t SIMD_BYTES = 32; - const __m256i zero32 = _mm256_setzero_si256(); + const __m256i zero32 = _mm256_setzero_si256(); const UInt8 * filt_end_avx2 = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES; const size_t chars_per_simd_elements = SIMD_BYTES * n; while (filt_pos < filt_end_avx2) { - uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero32)); + uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero32)); if (0xFFFFFFFF == mask) { @@ -296,7 +296,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result mask = _blsr_u32(mask); #else mask = mask & (mask-1); - #endif + #endif } } data_pos += chars_per_simd_elements; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 78922841e37..6bc596008d8 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -348,12 +348,12 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s #elif defined(__AVX2__) static constexpr size_t SIMD_BYTES = 32; - const __m256i zero32 = _mm256_setzero_si256(); + const __m256i zero32 = _mm256_setzero_si256(); const UInt8 * filt_end_avx2 = filt_pos + size / SIMD_BYTES * SIMD_BYTES; while (filt_pos < filt_end_avx2) { - UInt32 mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero32)); + UInt32 mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero32)); if (0xFFFFFFFF == mask) { @@ -369,7 +369,7 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s mask = _blsr_u32(mask); #else mask = mask & (mask-1); - #endif + #endif } } diff --git a/src/Columns/ColumnsCommon.cpp b/src/Columns/ColumnsCommon.cpp index d8e0cf74669..845bac79ae0 100644 --- a/src/Columns/ColumnsCommon.cpp +++ b/src/Columns/ColumnsCommon.cpp @@ -237,7 +237,7 @@ namespace while (filt_pos < filt_end_aligned) { uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero_vec, _MM_CMPINT_GT); - + if (mask == 0xffffffffffffffff) { /// SIMD_BYTES consecutive rows pass the filter @@ -350,7 +350,7 @@ namespace filt_pos += SIMD_BYTES; offsets_pos += SIMD_BYTES; - } + } #endif while (filt_pos < filt_end) From 9c7bef4c9d9062a48820233cfed132c2e06d5d7f Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 12 Oct 2021 17:08:47 +0300 Subject: [PATCH 171/438] Remove unused headers and handle exception 'unrecognised option' in clickhouse-local --- programs/local/LocalServer.cpp | 14 +++++--------- ...96_unknown_option_in_clickhouse_local.reference | 1 + .../02096_unknown_option_in_clickhouse_local.sh | 9 +++++++++ 3 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference create mode 100755 tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 8066650006a..2035406d73a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1,8 +1,6 @@ #include "LocalServer.h" #include -#include -#include #include #include #include @@ -10,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -21,19 +18,14 @@ #include #include #include -#include #include -#include -#include #include #include #include #include #include -#include #include #include -#include #include #include #include @@ -45,7 +37,6 @@ #include #include #include -#include #include #include @@ -722,6 +713,11 @@ int mainEntryClickHouseLocal(int argc, char ** argv) app.init(argc, argv); return app.run(); } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference new file mode 100644 index 00000000000..96feda5dd3c --- /dev/null +++ b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference @@ -0,0 +1 @@ +Bad arguments: unrecognised option '--unknown-option' diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh new file mode 100755 index 00000000000..ee0e3f3d149 --- /dev/null +++ b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# shellcheck disable=SC2206 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --unknown-option 2>&1 echo + From 3540baa33c2d04788f152edb862888e66492e14a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 12 Oct 2021 11:58:33 +0300 Subject: [PATCH 172/438] Start server under gdb in functional tests --- docker/test/stateless/run.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ed721690281..ebb72111e96 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -45,6 +45,23 @@ else sudo clickhouse start fi +echo " +set follow-fork-mode child +handle all noprint +handle SIGSEGV stop print +handle SIGBUS stop print +handle SIGABRT stop print +continue +thread apply all backtrace +detach +quit +" > script.gdb + +# FIXME Hung check may work incorrectly because of attached gdb +# 1. False positives are possible +# 2. We cannot attach another gdb to get stacktraces if some queries hung +gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ From a1ab3b6606397c5748d7d478c753b96224a9c328 Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Tue, 12 Oct 2021 22:44:37 +0530 Subject: [PATCH 173/438] Fixed code style issue --- src/Columns/ColumnsCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnsCommon.cpp b/src/Columns/ColumnsCommon.cpp index 845bac79ae0..804d00d4d7c 100644 --- a/src/Columns/ColumnsCommon.cpp +++ b/src/Columns/ColumnsCommon.cpp @@ -270,7 +270,7 @@ namespace filt_pos += SIMD_BYTES; offsets_pos += SIMD_BYTES; } - #elif defined(__AVX2__) + #elif defined(__AVX2__) const __m256i zero_vec = _mm256_setzero_si256(); static constexpr size_t SIMD_BYTES = 32; const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; From f1bbc7f9b633f5f3948358e574dad4d71c3f7199 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 12 Oct 2021 23:17:15 +0300 Subject: [PATCH 174/438] Fix hardware utilization info printing in client --- docs/en/interfaces/cli.md | 1 + src/Client/ClientBase.cpp | 5 +++- src/Common/ProgressIndication.cpp | 41 +++++++++++++++++-------------- src/Common/ProgressIndication.h | 2 ++ 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 70b7d59b037..c4305d229cf 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -127,6 +127,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--secure` – If specified, will connect to server over secure connection. - `--history_file` — Path to a file containing command history. - `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). +- `--hardware-utilization` — Print hardware utilization information in progress bar. Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled). diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index baf082a3541..c93645a1f8a 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -668,7 +668,7 @@ void ClientBase::onEndOfStream() void ClientBase::onProfileEvents(Block & block) { const auto rows = block.rows(); - if (rows == 0) + if (rows == 0 || !progress_indication.print_hardware_utilization) return; const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); const auto & names = typeid_cast(*block.getByName("name").column); @@ -1560,6 +1560,7 @@ void ClientBase::init(int argc, char ** argv) ("ignore-error", "do not stop processing in multiquery mode") ("stacktrace", "print stack traces of exceptions") + ("hardware-utilization", "print hardware utilization information in progress bar") ; addAndCheckOptions(options_description, options, common_arguments); @@ -1626,6 +1627,8 @@ void ClientBase::init(int argc, char ** argv) config().setBool("verbose", true); if (options.count("log-level")) Poco::Logger::root().setLevel(options["log-level"].as()); + if (options.count("hardware-utilization")) + progress_indication.print_hardware_utilization = true; query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 0fe40b306cb..4510952cc71 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "IO/WriteBufferFromString.h" #include @@ -189,6 +190,26 @@ void ProgressIndication::writeProgress() written_progress_chars = message.count() - prefix_size - (strlen(indicator) - 2); /// Don't count invisible output (escape sequences). + // If approximate cores number is known, display it. + auto cores_number = getApproximateCoresNumber(); + std::string profiling_msg; + if (cores_number != 0 && print_hardware_utilization) + { + WriteBufferFromOwnString profiling_msg_builder; + // Calculated cores number may be not accurate + // so it's better to print min(threads, cores). + UInt64 threads_number = getUsedThreadsCount(); + profiling_msg_builder << " Running " << threads_number << " threads on " + << std::min(cores_number, threads_number) << " cores"; + + auto memory_usage = getMemoryUsage(); + if (memory_usage != 0) + profiling_msg_builder << " with " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM used."; + else + profiling_msg_builder << "."; + profiling_msg = profiling_msg_builder.str(); + } + /// If the approximate number of rows to process is known, we can display a progress bar and percentage. if (progress.total_rows_to_read || progress.total_raw_bytes_to_read) { @@ -215,7 +236,7 @@ void ProgressIndication::writeProgress() if (show_progress_bar) { - ssize_t width_of_progress_bar = static_cast(terminal_width) - written_progress_chars - strlen(" 99%"); + ssize_t width_of_progress_bar = static_cast(terminal_width) - written_progress_chars - strlen(" 99%") - profiling_msg.length(); if (width_of_progress_bar > 0) { std::string bar @@ -231,23 +252,7 @@ void ProgressIndication::writeProgress() message << ' ' << (99 * current_count / max_count) << '%'; } - // If approximate cores number is known, display it. - auto cores_number = getApproximateCoresNumber(); - if (cores_number != 0) - { - // Calculated cores number may be not accurate - // so it's better to print min(threads, cores). - UInt64 threads_number = getUsedThreadsCount(); - message << " Running " << threads_number << " threads on " - << std::min(cores_number, threads_number) << " cores"; - - auto memory_usage = getMemoryUsage(); - if (memory_usage != 0) - message << " with " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM used."; - else - message << "."; - } - + message << profiling_msg; message << CLEAR_TO_END_OF_LINE; ++increment; diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index 3d9bbc7f3ff..9b1b2b0b145 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -60,6 +60,8 @@ public: void updateThreadEventData(HostToThreadTimesMap & new_thread_data); + bool print_hardware_utilization = false; + private: size_t getUsedThreadsCount() const; From 002c8d3a802a6db935e2ffd33ede7dbf9f6b0107 Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Wed, 13 Oct 2021 03:38:18 +0530 Subject: [PATCH 175/438] add disable vectorize flag due to llvm bug --- cmake/cpu_features.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index e77f330fbc0..f0750c69294 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -129,8 +129,8 @@ else () if (HAVE_AVX2 AND ENABLE_AVX2) set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () -#we have to add -force-vector-width=16 due to llvm autovectorization bug with avx512 - set (TEST_FLAG "-mavx512f -mavx512bw -force-vector-width=16") +#Disable vectorize due to llvm autovectorization bug with avx512 + set (TEST_FLAG "-mavx512f -mavx512bw -fno-slp-vectorize -fno-vectorize") set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" #include From f08dd0e8d8c475c7949a23d8b20499509e0c298c Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Wed, 13 Oct 2021 05:13:34 +0530 Subject: [PATCH 176/438] modify flags definitions --- cmake/cpu_features.cmake | 37 +++++++++++++++++++++++++------ src/Columns/ColumnFixedString.cpp | 2 +- src/Columns/ColumnVector.cpp | 2 +- src/Columns/ColumnsCommon.cpp | 2 +- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index f0750c69294..44a804e64af 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -18,8 +18,10 @@ option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1) option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1) option (ENABLE_AVX "Use AVX instructions on x86_64" 0) option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0) -option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 1) -option (ENABLE_BMI "Use BMI instructions on x86_64" 1) +option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) +option (ENABLE_BMI "Use BMI instructions on x86_64" 0) +option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 1) +option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0) @@ -129,8 +131,8 @@ else () if (HAVE_AVX2 AND ENABLE_AVX2) set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () -#Disable vectorize due to llvm autovectorization bug with avx512 - set (TEST_FLAG "-mavx512f -mavx512bw -fno-slp-vectorize -fno-vectorize") + + set (TEST_FLAG "-mavx512f -mavx512bw") set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" #include @@ -143,7 +145,7 @@ else () } " HAVE_AVX512) if (HAVE_AVX512 AND ENABLE_AVX512) - set(X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} ${TEST_FLAG}") + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () set (TEST_FLAG "-mbmi") @@ -157,8 +159,29 @@ else () } " HAVE_BMI) if (HAVE_BMI AND ENABLE_BMI) - set(X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} ${TEST_FLAG}") - endif () + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + endif () + + if (ENABLE_AVX2_FOR_SPEC_OP) + set (X86_INTRINSICS_FLAGS "") + if (HAVE_BMI) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") + endif () + if (HAVE_AVX AND HAVE_AVX2) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx -mavx2") + endif () + endif () + + if (ENABLE_AVX512_FOR_SPEC_OP) + set (X86_INTRINSICS_FLAGS "") + if (HAVE_BMI) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") + endif () + if (HAVE_AVX512) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw") + endif () + endif () + endif () cmake_pop_check_state () diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index a84db5701a7..729d82da41d 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -269,7 +269,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result data_pos += chars_per_simd_elements; filt_pos += SIMD_BYTES; } -#elif defined(__AVX2__) +#elif defined(__AVX__) && defined(__AVX2__) static constexpr size_t SIMD_BYTES = 32; const __m256i zero32 = _mm256_setzero_si256(); const UInt8 * filt_end_avx2 = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 6bc596008d8..4793b6bd9d5 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -346,7 +346,7 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s data_pos += SIMD_BYTES; } -#elif defined(__AVX2__) +#elif defined(__AVX__) && defined(__AVX2__) static constexpr size_t SIMD_BYTES = 32; const __m256i zero32 = _mm256_setzero_si256(); const UInt8 * filt_end_avx2 = filt_pos + size / SIMD_BYTES * SIMD_BYTES; diff --git a/src/Columns/ColumnsCommon.cpp b/src/Columns/ColumnsCommon.cpp index 804d00d4d7c..ea5ca4e45b4 100644 --- a/src/Columns/ColumnsCommon.cpp +++ b/src/Columns/ColumnsCommon.cpp @@ -270,7 +270,7 @@ namespace filt_pos += SIMD_BYTES; offsets_pos += SIMD_BYTES; } - #elif defined(__AVX2__) + #elif defined(__AVX__) && defined(__AVX2__) const __m256i zero_vec = _mm256_setzero_si256(); static constexpr size_t SIMD_BYTES = 32; const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; From 0ad20e661329525c5385de4a43d0a0af94544a0e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Oct 2021 03:14:44 +0300 Subject: [PATCH 177/438] Fix test --- src/IO/ReadHelpers.h | 14 ++++++++------ tests/queries/0_stateless/01888_read_int_safe.sql | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index bfb30e8b95c..5077ee2df21 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -276,11 +276,13 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) { case '+': { + /// 123+ or +123+, just stop after 123 or +123. + if (has_number) + return ReturnType(true); + + /// No digits read yet, but we already read sign, like ++, -+. if (has_sign) { - if (has_number) - return ReturnType(true); - if constexpr (throw_exception) throw ParsingException( "Cannot parse number with multiple sign (+/-) characters", @@ -294,11 +296,11 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) } case '-': { + if (has_number) + return ReturnType(true); + if (has_sign) { - if (has_number) - return ReturnType(true); - if constexpr (throw_exception) throw ParsingException( "Cannot parse number with multiple sign (+/-) characters", diff --git a/tests/queries/0_stateless/01888_read_int_safe.sql b/tests/queries/0_stateless/01888_read_int_safe.sql index 3caa4878aba..3aea8e38ab0 100644 --- a/tests/queries/0_stateless/01888_read_int_safe.sql +++ b/tests/queries/0_stateless/01888_read_int_safe.sql @@ -3,8 +3,8 @@ select toInt64('+-1'); -- { serverError 72; } select toInt64('++1'); -- { serverError 72; } select toInt64('++'); -- { serverError 72; } select toInt64('+'); -- { serverError 72; } -select toInt64('1+1'); -- { serverError 72; } -select toInt64('1-1'); -- { serverError 72; } +select toInt64('1+1'); -- { serverError 6; } +select toInt64('1-1'); -- { serverError 6; } select toInt64(''); -- { serverError 32; } select toInt64('1'); select toInt64('-1'); From 798d8c7c1772eec4ca679c0f7e42eb5fc3d2b6e6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Oct 2021 03:16:09 +0300 Subject: [PATCH 178/438] Fix test --- .../0_stateless/01425_decimal_parse_big_negative_exponent.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql index 7d0993c1bfc..7f276d1f8d4 100644 --- a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql +++ b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql @@ -1,4 +1,4 @@ -SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 72 } +SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 6 } SELECT '-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 } SELECT '1E-9' AS x, toDecimal32(x, 0); SELECT '1E-8' AS x, toDecimal32(x, 0); From 80760c0a0c66711757fc087679112df27a57a9c3 Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Wed, 13 Oct 2021 05:59:20 +0530 Subject: [PATCH 179/438] revise compile definitions --- cmake/cpu_features.cmake | 28 +++------------------------- src/CMakeLists.txt | 7 ------- 2 files changed, 3 insertions(+), 32 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 44a804e64af..c5a6eaf9c8f 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -16,12 +16,10 @@ option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1) option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1) option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1) option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1) -option (ENABLE_AVX "Use AVX instructions on x86_64" 0) -option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0) +option (ENABLE_AVX "Use AVX instructions on x86_64" 1) +option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 1) option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) -option (ENABLE_BMI "Use BMI instructions on x86_64" 0) -option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 1) -option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) +option (ENABLE_BMI "Use BMI instructions on x86_64" 1) option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0) @@ -162,26 +160,6 @@ else () set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () - if (ENABLE_AVX2_FOR_SPEC_OP) - set (X86_INTRINSICS_FLAGS "") - if (HAVE_BMI) - set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") - endif () - if (HAVE_AVX AND HAVE_AVX2) - set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx -mavx2") - endif () - endif () - - if (ENABLE_AVX512_FOR_SPEC_OP) - set (X86_INTRINSICS_FLAGS "") - if (HAVE_BMI) - set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") - endif () - if (HAVE_AVX512) - set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw") - endif () - endif () - endif () cmake_pop_check_state () diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d5977b31159..cac5b70f489 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -284,13 +284,6 @@ target_link_libraries (clickhouse_common_io dragonbox_to_chars ) -# Use X86 AVX2/AVX512 instructions to accelerate filter operations -set_source_files_properties( - Columns/ColumnFixedString.cpp - Columns/ColumnsCommon.cpp - Columns/ColumnVector.cpp - PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}") - if(RE2_LIBRARY) target_link_libraries(clickhouse_common_io PUBLIC ${RE2_LIBRARY}) endif() From 4b080489b5a15d89bd6b8b84aeae0f23acc5ed06 Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Wed, 13 Oct 2021 07:34:21 +0530 Subject: [PATCH 180/438] revise flags definitions --- cmake/cpu_features.cmake | 28 +++++++++++++++++++++++++--- src/CMakeLists.txt | 7 +++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index c5a6eaf9c8f..41a3dc26d5a 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -16,10 +16,12 @@ option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1) option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1) option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1) option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1) -option (ENABLE_AVX "Use AVX instructions on x86_64" 1) -option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 1) +option (ENABLE_AVX "Use AVX instructions on x86_64" 0) +option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0) option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) -option (ENABLE_BMI "Use BMI instructions on x86_64" 1) +option (ENABLE_BMI "Use BMI instructions on x86_64" 0) +option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) +option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0) @@ -160,6 +162,26 @@ else () set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () +#Limit avx2/avx512 flag for specific source build + if (ENABLE_AVX2_FOR_SPEC_OP) + set (X86_INTRINSICS_FLAGS "") + if (HAVE_BMI) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") + endif () + if (HAVE_AVX AND HAVE_AVX2) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx -mavx2") + endif () + endif () + + if (ENABLE_AVX512_FOR_SPEC_OP) + set (X86_INTRINSICS_FLAGS "") + if (HAVE_BMI) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") + endif () + if (HAVE_AVX512) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw") + endif () + endif () endif () cmake_pop_check_state () diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cac5b70f489..d5977b31159 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -284,6 +284,13 @@ target_link_libraries (clickhouse_common_io dragonbox_to_chars ) +# Use X86 AVX2/AVX512 instructions to accelerate filter operations +set_source_files_properties( + Columns/ColumnFixedString.cpp + Columns/ColumnsCommon.cpp + Columns/ColumnVector.cpp + PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}") + if(RE2_LIBRARY) target_link_libraries(clickhouse_common_io PUBLIC ${RE2_LIBRARY}) endif() From 23602f46075821871fda78eaa238dd4a8eac9864 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 30 Aug 2021 23:29:09 +0800 Subject: [PATCH 181/438] Speed up part loading for JBOD --- src/Storages/MergeTree/MergeTreeData.cpp | 485 +++++++++++++++-------- src/Storages/MergeTree/MergeTreeData.h | 15 + 2 files changed, 333 insertions(+), 167 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8821991bae3..b88a152e1cb 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -897,6 +897,261 @@ Int64 MergeTreeData::getMaxBlockNumber() const return max_block_num; } +void MergeTreeData::loadDataPartsFromDisk( + DataPartsVector & broken_parts_to_detach, + DataPartsVector & duplicate_parts_to_remove, + ThreadPool & pool, + size_t num_parts, + std::queue>> & parts_queue, + bool skip_sanity_checks, + const MergeTreeSettingsPtr & settings) +{ + /// Parallel loading of data parts. + pool.setMaxThreads(std::min(size_t(settings->max_part_loading_threads), num_parts)); + size_t num_threads = pool.getMaxThreads(); + std::vector parts_per_thread(num_threads, num_parts / num_threads); + for (size_t i = 0ul; i < num_parts % num_threads; ++i) + ++parts_per_thread[i]; + + /// Prepare data parts for parallel loading. Threads will focus on given disk first, then steal + /// others' tasks when finish current disk part loading process. + std::vector>> threads_parts(num_threads); + std::set remaining_thread_parts; + std::queue threads_queue; + for (size_t i = 0; i < num_threads; ++i) + { + remaining_thread_parts.insert(i); + threads_queue.push(i); + } + + while (!parts_queue.empty()) + { + assert(!threads_queue.empty()); + size_t i = threads_queue.front(); + auto & need_parts = parts_per_thread[i]; + assert(need_parts > 0); + auto & thread_parts = threads_parts[i]; + auto & current_parts = parts_queue.front(); + assert(!current_parts.empty()); + auto parts_to_grab = std::min(need_parts, current_parts.size()); + + thread_parts.insert(thread_parts.end(), current_parts.end() - parts_to_grab, current_parts.end()); + current_parts.resize(current_parts.size() - parts_to_grab); + need_parts -= parts_to_grab; + + /// Before processing next thread, change disk if possible. + /// Different threads will likely start loading parts from different disk, + /// which may improve read parallelism for JBOD. + + /// If current disk still has some parts, push it to the tail. + if (!current_parts.empty()) + parts_queue.push(std::move(current_parts)); + parts_queue.pop(); + + /// If current thread still want some parts, push it to the tail. + if (need_parts > 0) + threads_queue.push(i); + threads_queue.pop(); + } + assert(threads_queue.empty()); + assert(std::all_of(threads_parts.begin(), threads_parts.end(), [](const std::vector> & parts) + { + return !parts.empty(); + })); + + size_t suspicious_broken_parts = 0; + size_t suspicious_broken_parts_bytes = 0; + std::atomic has_adaptive_parts = false; + std::atomic has_non_adaptive_parts = false; + + std::mutex mutex; + auto load_part = [&](const String & part_name, const DiskPtr & part_disk_ptr) + { + auto part_opt = MergeTreePartInfo::tryParsePartName(part_name, format_version); + if (!part_opt) + return; + const auto & part_info = *part_opt; + auto single_disk_volume = std::make_shared("volume_" + part_name, part_disk_ptr, 0); + auto part = createPart(part_name, part_info, single_disk_volume, part_name); + bool broken = false; + + String part_path = fs::path(relative_data_path) / part_name; + String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME; + if (part_disk_ptr->exists(marker_path)) + { + /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist + size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); + LOG_WARNING(log, + "Detaching stale part {}{} (size: {}), which should have been deleted after a move. " + "That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.", + getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); + std::lock_guard loading_lock(mutex); + broken_parts_to_detach.push_back(part); + ++suspicious_broken_parts; + suspicious_broken_parts_bytes += size_of_part; + return; + } + + try + { + part->loadColumnsChecksumsIndexes(require_part_metadata, true); + } + catch (const Exception & e) + { + /// Don't count the part as broken if there is not enough memory to load it. + /// In fact, there can be many similar situations. + /// But it is OK, because there is a safety guard against deleting too many parts. + if (isNotEnoughMemoryErrorCode(e.code())) + throw; + + broken = true; + tryLogCurrentException(__PRETTY_FUNCTION__); + } + catch (...) + { + broken = true; + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + /// Ignore broken parts that can appear as a result of hard server restart. + if (broken) + { + /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist + size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); + + LOG_ERROR(log, + "Detaching broken part {}{} (size: {}). " + "If it happened after update, it is likely because of backward incompability. " + "You need to resolve this manually", + getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); + std::lock_guard loading_lock(mutex); + broken_parts_to_detach.push_back(part); + ++suspicious_broken_parts; + suspicious_broken_parts_bytes += size_of_part; + return; + } + if (!part->index_granularity_info.is_adaptive) + has_non_adaptive_parts.store(true, std::memory_order_relaxed); + else + has_adaptive_parts.store(true, std::memory_order_relaxed); + + part->modification_time = part_disk_ptr->getLastModified(fs::path(relative_data_path) / part_name).epochTime(); + /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later + part->setState(DataPartState::Committed); + + std::lock_guard loading_lock(mutex); + auto [it, inserted] = data_parts_indexes.insert(part); + /// Remove duplicate parts with the same checksum. + if (!inserted) + { + if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex()) + { + LOG_ERROR(log, "Remove duplicate part {}", part->getFullPath()); + duplicate_parts_to_remove.push_back(part); + } + else + throw Exception("Part " + part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART); + } + + addPartContributionToDataVolume(part); + }; + + std::mutex part_select_mutex; + try + { + for (size_t thread = 0; thread < num_threads; ++thread) + { + pool.scheduleOrThrowOnError([&, thread] + { + while (true) + { + std::pair thread_part; + { + const std::lock_guard lock{part_select_mutex}; + + if (remaining_thread_parts.empty()) + return; + + /// Steal task if nothing to do + auto thread_idx = thread; + if (threads_parts[thread].empty()) + { + // Try random steal tasks from the next thread + std::uniform_int_distribution distribution(0, remaining_thread_parts.size() - 1); + auto it = remaining_thread_parts.begin(); + std::advance(it, distribution(thread_local_rng)); + thread_idx = *it; + } + auto & thread_parts = threads_parts[thread_idx]; + thread_part = thread_parts.back(); + thread_parts.pop_back(); + if (thread_parts.empty()) + remaining_thread_parts.erase(thread_idx); + } + load_part(thread_part.first, thread_part.second); + } + }); + } + } + catch (...) + { + /// If this is not done, then in case of an exception, tasks will be destroyed before the threads are completed, and it will be bad. + pool.wait(); + throw; + } + + pool.wait(); + + if (has_non_adaptive_parts && has_adaptive_parts && !settings->enable_mixed_granularity_parts) + throw Exception( + "Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", + ErrorCodes::LOGICAL_ERROR); + + has_non_adaptive_index_granularity_parts = has_non_adaptive_parts; + + if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) + throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, + "Suspiciously many ({}) broken parts to remove.", + suspicious_broken_parts); + + if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks) + throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, + "Suspiciously big size ({}) of all broken parts to remove.", + formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes)); +} + + +void MergeTreeData::loadDataPartsFromWAL( + DataPartsVector & /* broken_parts_to_detach */, + DataPartsVector & duplicate_parts_to_remove, + MutableDataPartsVector & parts_from_wal, + DataPartsLock & part_lock) +{ + for (auto & part : parts_from_wal) + { + if (getActiveContainingPart(part->info, DataPartState::Committed, part_lock)) + continue; + + part->modification_time = time(nullptr); + /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later + part->setState(DataPartState::Committed); + + auto [it, inserted] = data_parts_indexes.insert(part); + if (!inserted) + { + if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex()) + { + LOG_ERROR(log, "Remove duplicate part {}", part->getFullPath()); + duplicate_parts_to_remove.push_back(part); + } + else + throw Exception("Part " + part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART); + } + + addPartContributionToDataVolume(part); + } +} + void MergeTreeData::loadDataParts(bool skip_sanity_checks) { @@ -904,7 +1159,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) auto metadata_snapshot = getInMemoryMetadataPtr(); const auto settings = getSettings(); - std::vector> part_names_with_disks; MutableDataPartsVector parts_from_wal; Strings part_file_names; @@ -934,193 +1188,90 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) } } - /// Reversed order to load part from low priority disks firstly. - /// Used for keep part on low priority disk if duplication found - for (auto disk_it = disks.rbegin(); disk_it != disks.rend(); ++disk_it) + /// Collect part names by disk. + std::map>> disk_part_map; + std::map disk_wal_part_map; + ThreadPool pool(disks.size()); + std::mutex wal_init_lock; + for (const auto & disk_ptr : disks) { - auto disk_ptr = *disk_it; + auto & disk_parts = disk_part_map[disk_ptr->getName()]; + auto & disk_wal_parts = disk_wal_part_map[disk_ptr->getName()]; - for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next()) + pool.scheduleOrThrowOnError([&, disk_ptr]() { - /// Skip temporary directories, file 'format_version.txt' and directory 'detached'. - if (startsWith(it->name(), "tmp") - || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME - || it->name() == MergeTreeData::DETACHED_DIR_NAME) - continue; - - if (!startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) - part_names_with_disks.emplace_back(it->name(), disk_ptr); - else if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME && settings->in_memory_parts_enable_wal) + for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next()) { - /// Create and correctly initialize global WAL object - write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); - for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext())) - parts_from_wal.push_back(std::move(part)); + /// Skip temporary directories, file 'format_version.txt' and directory 'detached'. + if (startsWith(it->name(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME + || it->name() == MergeTreeData::DETACHED_DIR_NAME) + continue; + + if (!startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) + disk_parts.emplace_back(std::make_pair(it->name(), disk_ptr)); + else if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME && settings->in_memory_parts_enable_wal) + { + std::unique_lock lock(wal_init_lock); + if (write_ahead_log != nullptr) + throw Exception( + "There are multiple WAL files appeared in current storage policy. You need to resolve this manually", + ErrorCodes::CORRUPTED_DATA); + + write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); + for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext())) + disk_wal_parts.push_back(std::move(part)); + } + else if (settings->in_memory_parts_enable_wal) + { + MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name()); + for (auto && part : wal.restore(metadata_snapshot, getContext())) + disk_wal_parts.push_back(std::move(part)); + } } - else if (settings->in_memory_parts_enable_wal) - { - MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name()); - for (auto && part : wal.restore(metadata_snapshot, getContext())) - parts_from_wal.push_back(std::move(part)); - } - } - } - - auto part_lock = lockParts(); - data_parts_indexes.clear(); - - if (part_names_with_disks.empty() && parts_from_wal.empty()) - { - LOG_DEBUG(log, "There are no data parts"); - return; - } - - /// Parallel loading of data parts. - size_t num_threads = std::min(size_t(settings->max_part_loading_threads), part_names_with_disks.size()); - - std::mutex mutex; - - DataPartsVector broken_parts_to_detach; - size_t suspicious_broken_parts = 0; - size_t suspicious_broken_parts_bytes = 0; - - std::atomic has_adaptive_parts = false; - std::atomic has_non_adaptive_parts = false; - - ThreadPool pool(num_threads); - - for (auto & part_names_with_disk : part_names_with_disks) - { - pool.scheduleOrThrowOnError([&] - { - const auto & [part_name, part_disk_ptr] = part_names_with_disk; - - auto part_opt = MergeTreePartInfo::tryParsePartName(part_name, format_version); - - if (!part_opt) - return; - - auto single_disk_volume = std::make_shared("volume_" + part_name, part_disk_ptr, 0); - auto part = createPart(part_name, *part_opt, single_disk_volume, part_name); - bool broken = false; - - String part_path = fs::path(relative_data_path) / part_name; - String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME; - - if (part_disk_ptr->exists(marker_path)) - { - /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist - size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); - LOG_WARNING(log, - "Detaching stale part {}{} (size: {}), which should have been deleted after a move. " - "That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.", - getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); - std::lock_guard loading_lock(mutex); - - broken_parts_to_detach.push_back(part); - - ++suspicious_broken_parts; - suspicious_broken_parts_bytes += size_of_part; - - return; - } - - try - { - part->loadColumnsChecksumsIndexes(require_part_metadata, true); - } - catch (const Exception & e) - { - /// Don't count the part as broken if there is not enough memory to load it. - /// In fact, there can be many similar situations. - /// But it is OK, because there is a safety guard against deleting too many parts. - if (isNotEnoughMemoryErrorCode(e.code())) - throw; - - broken = true; - tryLogCurrentException(__PRETTY_FUNCTION__); - } - catch (...) - { - broken = true; - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - /// Ignore broken parts that can appear as a result of hard server restart. - if (broken) - { - /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist - size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); - - LOG_ERROR(log, - "Detaching broken part {}{} (size: {}). " - "If it happened after update, it is likely because of backward incompability. " - "You need to resolve this manually", - getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); - std::lock_guard loading_lock(mutex); - - broken_parts_to_detach.push_back(part); - - ++suspicious_broken_parts; - suspicious_broken_parts_bytes += size_of_part; - - return; - } - - if (!part->index_granularity_info.is_adaptive) - has_non_adaptive_parts.store(true, std::memory_order_relaxed); - else - has_adaptive_parts.store(true, std::memory_order_relaxed); - - part->modification_time = part_disk_ptr->getLastModified(fs::path(relative_data_path) / part_name).epochTime(); - - /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later - part->setState(DataPartState::Committed); - - std::lock_guard loading_lock(mutex); - - if (!data_parts_indexes.insert(part).second) - throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists", part->name); - - addPartContributionToDataVolume(part); }); } pool.wait(); - for (auto & part : parts_from_wal) + for (auto & [_, disk_wal_parts] : disk_wal_part_map) + parts_from_wal.insert( + parts_from_wal.end(), std::make_move_iterator(disk_wal_parts.begin()), std::make_move_iterator(disk_wal_parts.end())); + + size_t num_parts = 0; + std::queue>> parts_queue; + for (auto & [_, disk_parts] : disk_part_map) { - if (getActiveContainingPart(part->info, DataPartState::Committed, part_lock)) + if (disk_parts.empty()) continue; - - part->modification_time = time(nullptr); - /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later - part->setState(DataPartState::Committed); - - if (!data_parts_indexes.insert(part).second) - throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART); - - addPartContributionToDataVolume(part); + num_parts += disk_parts.size(); + parts_queue.push(std::move(disk_parts)); } - if (has_non_adaptive_parts && has_adaptive_parts && !settings->enable_mixed_granularity_parts) - throw Exception("Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", ErrorCodes::LOGICAL_ERROR); + auto part_lock = lockParts(); + data_parts_indexes.clear(); - has_non_adaptive_index_granularity_parts = has_non_adaptive_parts; + if (num_parts == 0 && parts_from_wal.empty()) + { + LOG_DEBUG(log, "There are no data parts"); + return; + } - if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) - throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, - "Suspiciously many ({}) broken parts to remove.", - suspicious_broken_parts); - if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks) - throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, - "Suspiciously big size ({}) of all broken parts to remove.", - formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes)); + DataPartsVector broken_parts_to_detach; + DataPartsVector duplicate_parts_to_remove; + + if (num_parts > 0) + loadDataPartsFromDisk( + broken_parts_to_detach, duplicate_parts_to_remove, pool, num_parts, parts_queue, skip_sanity_checks, settings); + + if (!parts_from_wal.empty()) + loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal, part_lock); for (auto & part : broken_parts_to_detach) part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes + for (auto & part : duplicate_parts_to_remove) + part->remove(); /// Delete from the set of current parts those parts that are covered by another part (those parts that /// were merged), but that for some reason are still not deleted from the filesystem. diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index e7f1db8f3ec..b87a756bf9f 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1143,6 +1143,21 @@ private: /// Returns default settings for storage with possible changes from global config. virtual std::unique_ptr getDefaultSettings() const = 0; + + void loadDataPartsFromDisk( + DataPartsVector & broken_parts_to_detach, + DataPartsVector & duplicate_parts_to_remove, + ThreadPool & pool, + size_t num_parts, + std::queue>> & parts_queue, + bool skip_sanity_checks, + const MergeTreeSettingsPtr & settings); + + void loadDataPartsFromWAL( + DataPartsVector & broken_parts_to_detach, + DataPartsVector & duplicate_parts_to_remove, + MutableDataPartsVector & parts_from_wal, + DataPartsLock & part_lock); }; /// RAII struct to record big parts that are submerging or emerging. From 50e2b064881e9da8d97edc12b71a0760e0135628 Mon Sep 17 00:00:00 2001 From: Artur Filatenkov <613623@mail.ru> Date: Wed, 13 Oct 2021 14:25:00 +0300 Subject: [PATCH 182/438] change branch for boringssl --- .gitmodules | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitmodules b/.gitmodules index 74d1049ce01..a8b94cbfa6e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -213,6 +213,7 @@ [submodule "contrib/boringssl"] path = contrib/boringssl url = https://github.com/ClickHouse-Extras/boringssl.git + branch = MergeWithUpstream [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse-Extras/NuRaft.git From 7cef607cb92239f1544e0afbf0da2430fb5997cf Mon Sep 17 00:00:00 2001 From: Artur Filatenkov <613623@mail.ru> Date: Wed, 13 Oct 2021 15:01:07 +0300 Subject: [PATCH 183/438] move on merge branch --- contrib/boringssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/boringssl b/contrib/boringssl index 4c787e9d70c..486b2c0a869 160000 --- a/contrib/boringssl +++ b/contrib/boringssl @@ -1 +1 @@ -Subproject commit 4c787e9d70c370d51baea714e7b73910be2a4c28 +Subproject commit 486b2c0a869fa8024c8a13e5bcefdd4a3d919947 From 562138c9fa14ec519c6ccd873d8787843eb11e6b Mon Sep 17 00:00:00 2001 From: Pavel Cheremushkin Date: Wed, 13 Oct 2021 15:01:21 +0300 Subject: [PATCH 184/438] codegen_fuzzer: removing errors on warnings from protobuf-generated code in more gentle way --- src/Parsers/fuzzers/CMakeLists.txt | 2 -- src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/fuzzers/CMakeLists.txt b/src/Parsers/fuzzers/CMakeLists.txt index 2840dc72c0a..bb52101c847 100644 --- a/src/Parsers/fuzzers/CMakeLists.txt +++ b/src/Parsers/fuzzers/CMakeLists.txt @@ -7,7 +7,5 @@ target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZ add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS}) target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE}) -string(REPLACE " -Werror" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") add_subdirectory(codegen_fuzzer) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index f55bb3b3fb9..370396a5e8e 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -7,7 +7,9 @@ set(CMAKE_INCLUDE_CURRENT_DIR TRUE) add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) +set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier") + target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIRS}") target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}") target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}/src") -target_link_libraries(codegen_select_fuzzer PRIVATE clickhouse_parsers protobuf-mutator ${Protobuf_LIBRARIES} ${LIB_FUZZING_ENGINE}) \ No newline at end of file +target_link_libraries(codegen_select_fuzzer PRIVATE clickhouse_parsers protobuf-mutator ${Protobuf_LIBRARIES} ${LIB_FUZZING_ENGINE}) From d2dfbb5ab627ebe8f607fcc835e4a5a18737a303 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Oct 2021 15:25:44 +0300 Subject: [PATCH 185/438] Remove trash from MergeTreeReadPool --- src/Storages/MergeTree/MergeTreeReadPool.cpp | 24 ------------------- src/Storages/MergeTree/MergeTreeReadPool.h | 3 --- .../MergeTreeThreadSelectProcessor.cpp | 11 ++++----- .../00167_read_bytes_from_fs.reference | 2 ++ .../1_stateful/00167_read_bytes_from_fs.sql | 7 ++++++ 5 files changed, 13 insertions(+), 34 deletions(-) create mode 100644 tests/queries/1_stateful/00167_read_bytes_from_fs.reference create mode 100644 tests/queries/1_stateful/00167_read_bytes_from_fs.sql diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index d08cec24184..4bb247f1369 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -142,30 +142,6 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read, prewhere_info && prewhere_info->remove_prewhere_column, per_part_should_reorder[part_idx], std::move(curr_task_size_predictor)); } -MarkRanges MergeTreeReadPool::getRestMarks(const IMergeTreeDataPart & part, const MarkRange & from) const -{ - MarkRanges all_part_ranges; - - /// Inefficient in presence of large number of data parts. - for (const auto & part_ranges : parts_ranges) - { - if (part_ranges.data_part.get() == &part) - { - all_part_ranges = part_ranges.ranges; - break; - } - } - if (all_part_ranges.empty()) - throw Exception("Trying to read marks range [" + std::to_string(from.begin) + ", " + std::to_string(from.end) + "] from part '" - + part.getFullPath() + "' which has no ranges in this query", ErrorCodes::LOGICAL_ERROR); - - auto begin = std::lower_bound(all_part_ranges.begin(), all_part_ranges.end(), from, [] (const auto & f, const auto & s) { return f.begin < s.begin; }); - if (begin == all_part_ranges.end()) - begin = std::prev(all_part_ranges.end()); - begin->begin = from.begin; - return MarkRanges(begin, all_part_ranges.end()); -} - Block MergeTreeReadPool::getHeader() const { return metadata_snapshot->getSampleBlockForColumns(column_names, data.getVirtuals(), data.getStorageID()); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 9949bdf86f8..380b132b806 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -85,9 +85,6 @@ public: */ void profileFeedback(const ReadBufferFromFileBase::ProfileInfo info); - /// This method tells which mark ranges we have to read if we start from @from mark range - MarkRanges getRestMarks(const IMergeTreeDataPart & part, const MarkRange & from) const; - Block getHeader() const; private: diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 4eb6bc4b2e2..6a8ef860c87 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -68,18 +68,16 @@ bool MergeTreeThreadSelectProcessor::getNewTask() if (!reader) { - auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]); - if (use_uncompressed_cache) owned_uncompressed_cache = storage.getContext()->getUncompressedCache(); owned_mark_cache = storage.getContext()->getMarkCache(); - reader = task->data_part->getReader(task->columns, metadata_snapshot, rest_mark_ranges, + reader = task->data_part->getReader(task->columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); if (prewhere_info) - pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, rest_mark_ranges, + pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); } @@ -88,14 +86,13 @@ bool MergeTreeThreadSelectProcessor::getNewTask() /// in other case we can reuse readers, anyway they will be "seeked" to required mark if (part_name != last_readed_part_name) { - auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]); /// retain avg_value_size_hints - reader = task->data_part->getReader(task->columns, metadata_snapshot, rest_mark_ranges, + reader = task->data_part->getReader(task->columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, reader->getAvgValueSizeHints(), profile_callback); if (prewhere_info) - pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, rest_mark_ranges, + pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, reader->getAvgValueSizeHints(), profile_callback); } diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.reference b/tests/queries/1_stateful/00167_read_bytes_from_fs.reference new file mode 100644 index 00000000000..05b54da2ac7 --- /dev/null +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.reference @@ -0,0 +1,2 @@ +468426149779992039 +1 diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql new file mode 100644 index 00000000000..c3bdaea7abe --- /dev/null +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -0,0 +1,7 @@ +SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40 + +SYSTEM FLUSH LOGS; + +-- We had a bug which lead to additional compressed data read. hits compressed size if about 1.2G, but we read more then 3GB. +-- Small additional reads still possible, so we compare with about 1.5Gb. +SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM datasets.hits_v1 SETTINGS max_threads=40' and type = 'QueryFinish' From 7e85b7e407838e55ee290f5747684bb6c95b44bd Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Oct 2021 15:27:42 +0300 Subject: [PATCH 186/438] Remove accident change --- docker/test/stateless/run.sh | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ebb72111e96..ed721690281 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -45,23 +45,6 @@ else sudo clickhouse start fi -echo " -set follow-fork-mode child -handle all noprint -handle SIGSEGV stop print -handle SIGBUS stop print -handle SIGABRT stop print -continue -thread apply all backtrace -detach -quit -" > script.gdb - -# FIXME Hung check may work incorrectly because of attached gdb -# 1. False positives are possible -# 2. We cannot attach another gdb to get stacktraces if some queries hung -gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ From 006a9a7c7d9e9d645c4bdf9112eafb1ed076091f Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 13 Oct 2021 16:26:54 +0300 Subject: [PATCH 187/438] Print more info about memory utilization --- src/Common/ProgressIndication.cpp | 20 ++++++++++++-------- src/Common/ProgressIndication.h | 8 +++++++- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 4510952cc71..1f8fc949886 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -1,9 +1,11 @@ #include "ProgressIndication.h" +#include #include #include #include #include #include +#include "Common/formatReadable.h" #include #include #include "IO/WriteBufferFromString.h" @@ -114,16 +116,17 @@ UInt64 ProgressIndication::getApproximateCoresNumber() const }); } -UInt64 ProgressIndication::getMemoryUsage() const +ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const { - return std::accumulate(thread_data.cbegin(), thread_data.cend(), ZERO, - [](UInt64 acc, auto const & host_data) + return std::accumulate(thread_data.cbegin(), thread_data.cend(), MemoryUsage{}, + [](MemoryUsage const & acc, auto const & host_data) { - return acc + std::accumulate(host_data.second.cbegin(), host_data.second.cend(), ZERO, + auto host_usage = std::accumulate(host_data.second.cbegin(), host_data.second.cend(), ZERO, [](UInt64 memory, auto const & data) { return memory + data.second.memory_usage; }); + return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)}; }); } @@ -202,11 +205,12 @@ void ProgressIndication::writeProgress() profiling_msg_builder << " Running " << threads_number << " threads on " << std::min(cores_number, threads_number) << " cores"; - auto memory_usage = getMemoryUsage(); + auto [memory_usage, max_host_usage] = getMemoryUsage(); if (memory_usage != 0) - profiling_msg_builder << " with " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM used."; - else - profiling_msg_builder << "."; + profiling_msg_builder << " with " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM used"; + if (thread_data.size() > 1 && max_host_usage) + profiling_msg_builder << " total (per host max: " << formatReadableSizeWithDecimalSuffix(max_host_usage) << ")"; + profiling_msg_builder << "."; profiling_msg = profiling_msg_builder.str(); } diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index 9b1b2b0b145..d31ed8df0ba 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -68,7 +68,13 @@ private: UInt64 getApproximateCoresNumber() const; - UInt64 getMemoryUsage() const; + struct MemoryUsage + { + UInt64 total = 0; + UInt64 max = 0; + }; + + MemoryUsage getMemoryUsage() const; /// This flag controls whether to show the progress bar. We start showing it after /// the query has been executing for 0.5 seconds, and is still less than half complete. From a8fa3914b530567ebb37cea9df636d38b8a18a99 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 13 Oct 2021 17:54:46 +0300 Subject: [PATCH 188/438] Fix 01939_network_send_bytes_metrics --- tests/queries/0_stateless/01939_network_send_bytes_metrics.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh b/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh index e862a273de4..840b4f54706 100755 --- a/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh +++ b/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh @@ -10,7 +10,7 @@ ${CLICKHOUSE_CLIENT} --query "SELECT number FROM numbers(1000)" > /dev/null ${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; WITH ProfileEvents['NetworkSendBytes'] AS bytes - SELECT bytes >= 8000 AND bytes < 9000 ? 1 : bytes FROM system.query_log + SELECT bytes >= 8000 AND bytes < 9500 ? 1 : bytes FROM system.query_log WHERE current_database = currentDatabase() AND query_kind = 'Select' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" ${CLICKHOUSE_CLIENT} --query "DROP TABLE t" From 886d10c3ea19c04a35ac43e175e0730a1148adc9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 12 Oct 2021 11:58:33 +0300 Subject: [PATCH 189/438] Start server under gdb in functional tests --- docker/test/stateless/run.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ce1d1b59a55..ec0af024b8b 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -45,6 +45,23 @@ else sudo clickhouse start fi +echo " +set follow-fork-mode child +handle all noprint +handle SIGSEGV stop print +handle SIGBUS stop print +handle SIGABRT stop print +continue +thread apply all backtrace +detach +quit +" > script.gdb + +# FIXME Hung check may work incorrectly because of attached gdb +# 1. False positives are possible +# 2. We cannot attach another gdb to get stacktraces if some queries hung +gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ From 2473bc5affadd1cb397ef810a262845fd8900220 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Oct 2021 18:12:04 +0300 Subject: [PATCH 190/438] Fix test --- tests/queries/1_stateful/00167_read_bytes_from_fs.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql index c3bdaea7abe..341730bd82d 100644 --- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -1,7 +1,7 @@ -SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40 +SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40; SYSTEM FLUSH LOGS; --- We had a bug which lead to additional compressed data read. hits compressed size if about 1.2G, but we read more then 3GB. +-- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. -- Small additional reads still possible, so we compare with about 1.5Gb. -SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM datasets.hits_v1 SETTINGS max_threads=40' and type = 'QueryFinish' +SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM datasets.hits_v1 SETTINGS max_threads=40' and databaser=currentDatabase() and type = 'QueryFinish'; From 98b555e7f77e0c134775fd9abb344977645f29de Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Oct 2021 19:21:11 +0300 Subject: [PATCH 191/438] Update run.sh --- docker/test/stateless/run.sh | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ec0af024b8b..ce1d1b59a55 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -45,23 +45,6 @@ else sudo clickhouse start fi -echo " -set follow-fork-mode child -handle all noprint -handle SIGSEGV stop print -handle SIGBUS stop print -handle SIGABRT stop print -continue -thread apply all backtrace -detach -quit -" > script.gdb - -# FIXME Hung check may work incorrectly because of attached gdb -# 1. False positives are possible -# 2. We cannot attach another gdb to get stacktraces if some queries hung -gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ From 8851cb8459fcb490acec6ce7af33a709b0b11539 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 13 Oct 2021 11:37:56 +0800 Subject: [PATCH 192/438] Use upstream replxx --- .gitmodules | 2 +- contrib/replxx | 2 +- src/Client/ClientBaseHelpers.cpp | 44 ++++++++++++++++---------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.gitmodules b/.gitmodules index 74d1049ce01..f9758a69956 100644 --- a/.gitmodules +++ b/.gitmodules @@ -140,7 +140,7 @@ url = https://github.com/ClickHouse-Extras/libc-headers.git [submodule "contrib/replxx"] path = contrib/replxx - url = https://github.com/ClickHouse-Extras/replxx.git + url = https://github.com/AmokHuginnsson/replxx.git [submodule "contrib/avro"] path = contrib/avro url = https://github.com/ClickHouse-Extras/avro.git diff --git a/contrib/replxx b/contrib/replxx index f97765df14f..89abeea7516 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit f97765df14f4a6236d69b8f14b53ef2051ebd95a +Subproject commit 89abeea7516a2a9b6aad7bfecc132f608ff14a3d diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index a530e48ee35..e1c1481c5b4 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -109,29 +109,29 @@ void highlight(const String & query, std::vector & colors {TokenType::OpeningSquareBracket, Replxx::Color::BROWN}, {TokenType::ClosingSquareBracket, Replxx::Color::BROWN}, {TokenType::DoubleColon, Replxx::Color::BROWN}, - {TokenType::OpeningCurlyBrace, Replxx::Color::INTENSE}, - {TokenType::ClosingCurlyBrace, Replxx::Color::INTENSE}, + {TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Comma, Replxx::Color::INTENSE}, - {TokenType::Semicolon, Replxx::Color::INTENSE}, - {TokenType::Dot, Replxx::Color::INTENSE}, - {TokenType::Asterisk, Replxx::Color::INTENSE}, + {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::HereDoc, Replxx::Color::CYAN}, - {TokenType::Plus, Replxx::Color::INTENSE}, - {TokenType::Minus, Replxx::Color::INTENSE}, - {TokenType::Slash, Replxx::Color::INTENSE}, - {TokenType::Percent, Replxx::Color::INTENSE}, - {TokenType::Arrow, Replxx::Color::INTENSE}, - {TokenType::QuestionMark, Replxx::Color::INTENSE}, - {TokenType::Colon, Replxx::Color::INTENSE}, - {TokenType::Equals, Replxx::Color::INTENSE}, - {TokenType::NotEquals, Replxx::Color::INTENSE}, - {TokenType::Less, Replxx::Color::INTENSE}, - {TokenType::Greater, Replxx::Color::INTENSE}, - {TokenType::LessOrEquals, Replxx::Color::INTENSE}, - {TokenType::GreaterOrEquals, Replxx::Color::INTENSE}, - {TokenType::Concatenation, Replxx::Color::INTENSE}, - {TokenType::At, Replxx::Color::INTENSE}, + {TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::DoubleAt, Replxx::Color::MAGENTA}, {TokenType::EndOfStream, Replxx::Color::DEFAULT}, @@ -142,7 +142,7 @@ void highlight(const String & query, std::vector & colors {TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED}, {TokenType::ErrorSinglePipeMark, Replxx::Color::RED}, {TokenType::ErrorWrongNumber, Replxx::Color::RED}, - { TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED }}; + {TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}}; const Replxx::Color unknown_token_color = Replxx::Color::RED; From 91084895cee130a36ddb6e15c9727e6da6b607bf Mon Sep 17 00:00:00 2001 From: Artur Filatenkov <613623@mail.ru> Date: Wed, 13 Oct 2021 20:18:51 +0300 Subject: [PATCH 193/438] attemp to fix build --- contrib/boringssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/boringssl b/contrib/boringssl index 486b2c0a869..a139bb3cb95 160000 --- a/contrib/boringssl +++ b/contrib/boringssl @@ -1 +1 @@ -Subproject commit 486b2c0a869fa8024c8a13e5bcefdd4a3d919947 +Subproject commit a139bb3cb9598c7d92dc69aa6962e3ea4fd18982 From 3d0e595579a641f30b46b3577a66059d87a15e0e Mon Sep 17 00:00:00 2001 From: jasperzhu Date: Wed, 13 Oct 2021 22:49:21 +0530 Subject: [PATCH 194/438] revise compile flag --- cmake/cpu_features.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 41a3dc26d5a..330ab10f1bf 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -163,8 +163,8 @@ else () endif () #Limit avx2/avx512 flag for specific source build + set (X86_INTRINSICS_FLAGS "") if (ENABLE_AVX2_FOR_SPEC_OP) - set (X86_INTRINSICS_FLAGS "") if (HAVE_BMI) set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") endif () From 2957971ee30b9507cb73d6b6c90a1ebee87914fd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Oct 2021 21:22:02 +0300 Subject: [PATCH 195/438] Remove some last streams. --- src/Core/ExternalTable.cpp | 1 - src/DataStreams/BlockIO.cpp | 1 - .../MaterializingBlockOutputStream.h | 34 --- src/DataStreams/OneBlockInputStream.h | 41 ---- src/DataStreams/SquashingBlockInputStream.cpp | 32 --- src/DataStreams/SquashingBlockInputStream.h | 31 --- .../SquashingBlockOutputStream.cpp | 54 ----- src/DataStreams/SquashingBlockOutputStream.h | 39 ---- .../gtest_blocks_size_merging_streams.cpp | 32 +-- src/Databases/DatabaseReplicated.h | 1 - .../MySQL/MaterializedMySQLSyncThread.cpp | 1 - src/Dictionaries/LibraryDictionarySource.cpp | 1 - src/Dictionaries/XDBCDictionarySource.cpp | 2 +- src/Formats/FormatFactory.cpp | 1 - src/Interpreters/Context.cpp | 1 - src/Interpreters/InterpreterInsertQuery.cpp | 2 - src/Interpreters/InterpreterSelectQuery.cpp | 51 ++--- src/Interpreters/InterpreterSelectQuery.h | 4 +- src/Interpreters/MergeJoin.cpp | 7 +- src/Interpreters/MutationsInterpreter.cpp | 1 - .../PipelineExecutingBlockInputStream.cpp | 124 ----------- .../PipelineExecutingBlockInputStream.h | 44 ---- .../Formats/InputStreamFromInputFormat.h | 67 ------ src/Processors/Pipe.cpp | 10 +- src/Processors/QueryPipeline.cpp | 10 +- src/Processors/QueryPipelineBuilder.cpp | 10 +- src/Processors/QueryPlan/ExpressionStep.cpp | 1 - .../Sources/SourceFromInputStream.cpp | 195 ------------------ .../Sources/SourceFromInputStream.h | 77 ------- .../Transforms/AggregatingTransform.h | 3 - src/Storages/HDFS/StorageHDFS.cpp | 16 +- src/Storages/Kafka/KafkaSource.cpp | 2 - src/Storages/Kafka/StorageKafka.cpp | 1 - src/Storages/MergeTree/MergeTask.cpp | 1 - src/Storages/MergeTree/MergeTreeData.cpp | 9 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 1 - .../MergeTree/MergeTreeDataWriter.cpp | 2 - src/Storages/MergeTree/MutateTask.cpp | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 1 - .../MaterializedPostgreSQLConsumer.h | 1 - .../StorageMaterializedPostgreSQL.cpp | 1 - src/Storages/RabbitMQ/RabbitMQSource.cpp | 1 - src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 1 - .../RocksDB/StorageEmbeddedRocksDB.cpp | 2 +- src/Storages/StorageBuffer.cpp | 2 +- src/Storages/StorageDictionary.cpp | 1 - src/Storages/StorageDistributed.cpp | 1 - src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageInput.cpp | 1 - src/Storages/StorageMaterializedView.cpp | 1 - src/Storages/StorageMerge.cpp | 4 +- src/Storages/StorageMongoDB.cpp | 1 - src/Storages/StorageMySQL.cpp | 1 - src/Storages/StorageS3.cpp | 2 +- src/Storages/StorageS3Cluster.cpp | 2 - src/Storages/StorageSQLite.cpp | 1 - src/Storages/StorageURL.cpp | 2 +- src/TableFunctions/TableFunctionS3Cluster.cpp | 1 - 58 files changed, 67 insertions(+), 874 deletions(-) delete mode 100644 src/DataStreams/MaterializingBlockOutputStream.h delete mode 100644 src/DataStreams/OneBlockInputStream.h delete mode 100644 src/DataStreams/SquashingBlockInputStream.cpp delete mode 100644 src/DataStreams/SquashingBlockInputStream.h delete mode 100644 src/DataStreams/SquashingBlockOutputStream.cpp delete mode 100644 src/DataStreams/SquashingBlockOutputStream.h delete mode 100644 src/Processors/Executors/PipelineExecutingBlockInputStream.cpp delete mode 100644 src/Processors/Executors/PipelineExecutingBlockInputStream.h delete mode 100644 src/Processors/Formats/InputStreamFromInputFormat.h delete mode 100644 src/Processors/Sources/SourceFromInputStream.cpp delete mode 100644 src/Processors/Sources/SourceFromInputStream.h diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 55ad748868e..7619d60d84e 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/src/DataStreams/BlockIO.cpp b/src/DataStreams/BlockIO.cpp index 5f1abdaf806..692b69388ea 100644 --- a/src/DataStreams/BlockIO.cpp +++ b/src/DataStreams/BlockIO.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace DB { diff --git a/src/DataStreams/MaterializingBlockOutputStream.h b/src/DataStreams/MaterializingBlockOutputStream.h deleted file mode 100644 index 64c2bc12a57..00000000000 --- a/src/DataStreams/MaterializingBlockOutputStream.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -/** Converts columns-constants to full columns ("materializes" them). - */ -class MaterializingBlockOutputStream : public IBlockOutputStream -{ -public: - MaterializingBlockOutputStream(const BlockOutputStreamPtr & output_, const Block & header_) - : output{output_}, header(header_) {} - - Block getHeader() const override { return header; } - void write(const Block & block) override { output->write(materializeBlock(block)); } - void flush() override { output->flush(); } - void writePrefix() override { output->writePrefix(); } - void writeSuffix() override { output->writeSuffix(); } - void setRowsBeforeLimit(size_t rows_before_limit) override { output->setRowsBeforeLimit(rows_before_limit); } - void setTotals(const Block & totals) override { output->setTotals(materializeBlock(totals)); } - void setExtremes(const Block & extremes) override { output->setExtremes(materializeBlock(extremes)); } - void onProgress(const Progress & progress) override { output->onProgress(progress); } - String getContentType() const override { return output->getContentType(); } - -private: - BlockOutputStreamPtr output; - Block header; -}; - -} diff --git a/src/DataStreams/OneBlockInputStream.h b/src/DataStreams/OneBlockInputStream.h deleted file mode 100644 index d401082ce62..00000000000 --- a/src/DataStreams/OneBlockInputStream.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - -/** A stream of blocks from which you can read one block. - */ -class OneBlockInputStream : public IBlockInputStream -{ -public: - explicit OneBlockInputStream(Block block_) : block(std::move(block_)) { block.checkNumberOfRows(); } - - String getName() const override { return "One"; } - - Block getHeader() const override - { - Block res; - for (const auto & elem : block) - res.insert({ elem.column->cloneEmpty(), elem.type, elem.name }); - return res; - } - -protected: - Block readImpl() override - { - if (has_been_read) - return Block(); - - has_been_read = true; - return block; - } - -private: - Block block; - bool has_been_read = false; -}; - -} diff --git a/src/DataStreams/SquashingBlockInputStream.cpp b/src/DataStreams/SquashingBlockInputStream.cpp deleted file mode 100644 index e13dee37008..00000000000 --- a/src/DataStreams/SquashingBlockInputStream.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include - - -namespace DB -{ - -SquashingBlockInputStream::SquashingBlockInputStream( - const BlockInputStreamPtr & src, size_t min_block_size_rows, size_t min_block_size_bytes, bool reserve_memory) - : header(src->getHeader()), transform(min_block_size_rows, min_block_size_bytes, reserve_memory) -{ - children.emplace_back(src); -} - - -Block SquashingBlockInputStream::readImpl() -{ - while (!all_read) - { - Block block = children[0]->read(); - if (!block) - all_read = true; - - auto squashed_block = transform.add(std::move(block)); - if (squashed_block) - { - return squashed_block; - } - } - return {}; -} - -} diff --git a/src/DataStreams/SquashingBlockInputStream.h b/src/DataStreams/SquashingBlockInputStream.h deleted file mode 100644 index c2732d520cc..00000000000 --- a/src/DataStreams/SquashingBlockInputStream.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -/** Merging consecutive blocks of stream to specified minimum size. - */ -class SquashingBlockInputStream : public IBlockInputStream -{ -public: - SquashingBlockInputStream(const BlockInputStreamPtr & src, size_t min_block_size_rows, size_t min_block_size_bytes, - bool reserve_memory = false); - - String getName() const override { return "Squashing"; } - - Block getHeader() const override { return header; } - -protected: - Block readImpl() override; - -private: - Block header; - SquashingTransform transform; - bool all_read = false; -}; - -} diff --git a/src/DataStreams/SquashingBlockOutputStream.cpp b/src/DataStreams/SquashingBlockOutputStream.cpp deleted file mode 100644 index ab12f66590f..00000000000 --- a/src/DataStreams/SquashingBlockOutputStream.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include - - -namespace DB -{ - -SquashingBlockOutputStream::SquashingBlockOutputStream(BlockOutputStreamPtr dst, Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) - : output(std::move(dst)), header(std::move(header_)), transform(min_block_size_rows, min_block_size_bytes) -{ -} - - -void SquashingBlockOutputStream::write(const Block & block) -{ - auto squashed_block = transform.add(block); - if (squashed_block) - output->write(squashed_block); -} - - -void SquashingBlockOutputStream::finalize() -{ - if (all_written) - return; - - all_written = true; - - auto squashed_block = transform.add({}); - if (squashed_block) - output->write(squashed_block); -} - - -void SquashingBlockOutputStream::flush() -{ - if (!disable_flush) - finalize(); - output->flush(); -} - - -void SquashingBlockOutputStream::writePrefix() -{ - output->writePrefix(); -} - - -void SquashingBlockOutputStream::writeSuffix() -{ - finalize(); - output->writeSuffix(); -} - -} diff --git a/src/DataStreams/SquashingBlockOutputStream.h b/src/DataStreams/SquashingBlockOutputStream.h deleted file mode 100644 index 7828ad7e96d..00000000000 --- a/src/DataStreams/SquashingBlockOutputStream.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -/** Merging consecutive blocks of stream to specified minimum size. - */ -class SquashingBlockOutputStream : public IBlockOutputStream -{ -public: - SquashingBlockOutputStream(BlockOutputStreamPtr dst, Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); - - Block getHeader() const override { return header; } - void write(const Block & block) override; - - void flush() override; - void writePrefix() override; - void writeSuffix() override; - - /// Don't write blocks less than specified size even when flush method was called by user. - void disableFlush() { disable_flush = true; } - -private: - BlockOutputStreamPtr output; - Block header; - - SquashingTransform transform; - bool all_written = false; - - void finalize(); - - bool disable_flush = false; -}; - -} diff --git a/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp b/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp index 71cf41fcbab..7625fe8fa09 100644 --- a/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include using namespace DB; @@ -88,15 +88,18 @@ TEST(MergingSortedTest, SimpleBlockSizeTest) pipe.addTransform(std::move(transform)); QueryPipeline pipeline(std::move(pipe)); - pipeline.setNumThreads(1); - auto stream = std::make_shared(std::move(pipeline)); + PullingPipelineExecutor executor(pipeline); size_t total_rows = 0; - auto block1 = stream->read(); - auto block2 = stream->read(); - auto block3 = stream->read(); + Block block1; + Block block2; + Block block3; + executor.pull(block1); + executor.pull(block2); + executor.pull(block3); - EXPECT_EQ(stream->read(), Block()); + Block tmp_block; + ASSERT_FALSE(executor.pull(tmp_block)); for (const auto & block : {block1, block2, block3}) total_rows += block.rows(); @@ -132,14 +135,17 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes) pipe.addTransform(std::move(transform)); QueryPipeline pipeline(std::move(pipe)); - pipeline.setNumThreads(1); - auto stream = std::make_shared(std::move(pipeline)); + PullingPipelineExecutor executor(pipeline); - auto block1 = stream->read(); - auto block2 = stream->read(); - auto block3 = stream->read(); + Block block1; + Block block2; + Block block3; + executor.pull(block1); + executor.pull(block2); + executor.pull(block3); - EXPECT_EQ(stream->read(), Block()); + Block tmp_block; + ASSERT_FALSE(executor.pull(tmp_block)); EXPECT_EQ(block1.rows(), (1000 + 1500 + 1400) / 3); EXPECT_EQ(block2.rows(), (1000 + 1500 + 1400) / 3); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 60526a1e5b0..aecbc1474f8 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -5,7 +5,6 @@ #include #include #include -#include #include diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 5a714645978..f4a5b6b5e4e 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -14,7 +14,6 @@ # include # include # include -# include # include # include # include diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index 551bb1ee2dd..f117cfb179e 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -1,6 +1,5 @@ #include "LibraryDictionarySource.h" -#include #include #include #include diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 80081e67b42..b0be90c26a5 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -18,6 +17,7 @@ #include "registerDictionaries.h" #include #include +#include namespace DB diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index d3ff5cbf8a7..ed9f9d52b94 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 6e729eb2370..46fa31e2738 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index a36941ea07a..8a31917caef 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -20,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 85cc889319f..b4ffa15a869 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,4 +1,3 @@ -#include #include #include @@ -64,7 +63,6 @@ #include #include #include -#include #include #include #include @@ -158,24 +156,16 @@ InterpreterSelectQuery::InterpreterSelectQuery( ContextPtr context_, const SelectQueryOptions & options_, const Names & required_result_column_names_) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::nullopt, nullptr, options_, required_result_column_names_) + : InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, required_result_column_names_) { } -InterpreterSelectQuery::InterpreterSelectQuery( - const ASTPtr & query_ptr_, - ContextPtr context_, - const BlockInputStreamPtr & input_, - const SelectQueryOptions & options_) - : InterpreterSelectQuery(query_ptr_, context_, input_, std::nullopt, nullptr, options_.copy().noSubquery()) -{} - InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, ContextPtr context_, Pipe input_pipe_, const SelectQueryOptions & options_) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::move(input_pipe_), nullptr, options_.copy().noSubquery()) + : InterpreterSelectQuery(query_ptr_, context_, std::move(input_pipe_), nullptr, options_.copy().noSubquery()) {} InterpreterSelectQuery::InterpreterSelectQuery( @@ -184,7 +174,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( const StoragePtr & storage_, const StorageMetadataPtr & metadata_snapshot_, const SelectQueryOptions & options_) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_) + : InterpreterSelectQuery(query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_) {} InterpreterSelectQuery::~InterpreterSelectQuery() = default; @@ -268,7 +258,6 @@ static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id) InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, ContextPtr context_, - const BlockInputStreamPtr & input_, std::optional input_pipe_, const StoragePtr & storage_, const SelectQueryOptions & options_, @@ -277,7 +266,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// NOTE: the query almost always should be cloned because it will be modified during analysis. : IInterpreterUnionOrSelectQuery(options_.modify_inplace ? query_ptr_ : query_ptr_->clone(), context_, options_) , storage(storage_) - , input(input_) , input_pipe(std::move(input_pipe_)) , log(&Poco::Logger::get("InterpreterSelectQuery")) , metadata_snapshot(metadata_snapshot_) @@ -294,13 +282,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(), ErrorCodes::TOO_DEEP_SUBQUERIES); - bool has_input = input || input_pipe; - if (input) - { - /// Read from prepared input. - source_header = input->getHeader(); - } - else if (input_pipe) + bool has_input = input_pipe != std::nullopt; + if (input_pipe) { /// Read from prepared input. source_header = input_pipe->getHeader(); @@ -450,17 +433,17 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!options.only_analyze) { - if (query.sampleSize() && (input || input_pipe || !storage || !storage->supportsSampling())) + if (query.sampleSize() && (input_pipe || !storage || !storage->supportsSampling())) throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); - if (query.final() && (input || input_pipe || !storage || !storage->supportsFinal())) + if (query.final() && (input_pipe || !storage || !storage->supportsFinal())) throw Exception( - (!input && !input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support FINAL" : "Illegal FINAL", + (!input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support FINAL" : "Illegal FINAL", ErrorCodes::ILLEGAL_FINAL); - if (query.prewhere() && (input || input_pipe || !storage || !storage->supportsPrewhere())) + if (query.prewhere() && (input_pipe || !storage || !storage->supportsPrewhere())) throw Exception( - (!input && !input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support PREWHERE" : "Illegal PREWHERE", + (!input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support PREWHERE" : "Illegal PREWHERE", ErrorCodes::ILLEGAL_PREWHERE); /// Save the new temporary tables in the query context @@ -578,7 +561,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan) { - executeImpl(query_plan, input, std::move(input_pipe)); + executeImpl(query_plan, std::move(input_pipe)); /// We must guarantee that result structure is the same as in getSampleBlock() /// @@ -926,7 +909,7 @@ static bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query) } -void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInputStreamPtr & prepared_input, std::optional prepared_pipe) +void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional prepared_pipe) { /** Streams of data. When the query is executed in parallel, we have several data streams. * If there is no GROUP BY, then perform all operations before ORDER BY and LIMIT in parallel, then @@ -1010,13 +993,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu } else { - if (prepared_input) - { - auto prepared_source_step - = std::make_unique(Pipe(std::make_shared(prepared_input)), context); - query_plan.addStep(std::move(prepared_source_step)); - } - else if (prepared_pipe) + if (prepared_pipe) { auto prepared_source_step = std::make_unique(std::move(*prepared_pipe), context); query_plan.addStep(std::move(prepared_source_step)); @@ -1580,7 +1557,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() { if (!expressions.prewhere_info) { - const bool does_storage_support_prewhere = !input && !input_pipe && storage && storage->supportsPrewhere(); + const bool does_storage_support_prewhere = !input_pipe && storage && storage->supportsPrewhere(); if (does_storage_support_prewhere && shouldMoveToPrewhere()) { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 99c95a8d624..21e15bc74bb 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -108,7 +108,6 @@ private: InterpreterSelectQuery( const ASTPtr & query_ptr_, ContextPtr context_, - const BlockInputStreamPtr & input_, std::optional input_pipe, const StoragePtr & storage_, const SelectQueryOptions &, @@ -122,7 +121,7 @@ private: Block getSampleBlockImpl(); - void executeImpl(QueryPlan & query_plan, const BlockInputStreamPtr & prepared_input, std::optional prepared_pipe); + void executeImpl(QueryPlan & query_plan, std::optional prepared_pipe); /// Different stages of query execution. @@ -198,7 +197,6 @@ private: TableLockHolder table_lock; /// Used when we read from prepared input, not table or subquery. - BlockInputStreamPtr input; std::optional input_pipe; Poco::Logger * log; diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 9cfc43b92c5..1fc551334e2 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace DB @@ -592,9 +592,10 @@ void MergeJoin::mergeInMemoryRightBlocks() builder.getHeader(), right_sort_description, max_rows_in_right_block, 0, 0, 0, 0, nullptr, 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); - auto sorted_input = PipelineExecutingBlockInputStream(std::move(pipeline)); + PullingPipelineExecutor executor(pipeline); - while (Block block = sorted_input.read()) + Block block; + while (executor.pull(block)) { if (!block.rows()) continue; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 2c12c4a6879..3d0813579ce 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp deleted file mode 100644 index bdfbbc2874e..00000000000 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -PipelineExecutingBlockInputStream::PipelineExecutingBlockInputStream(QueryPipeline pipeline_) - : pipeline(std::make_unique(std::move(pipeline_))) -{ -} - -PipelineExecutingBlockInputStream::~PipelineExecutingBlockInputStream() = default; - -Block PipelineExecutingBlockInputStream::getHeader() const -{ - if (executor) - return executor->getHeader(); - - if (async_executor) - return async_executor->getHeader(); - - return pipeline->getHeader(); -} - -void PipelineExecutingBlockInputStream::createExecutor() -{ - if (pipeline->getNumThreads() > 1) - async_executor = std::make_unique(*pipeline); - else - executor = std::make_unique(*pipeline); - - is_execution_started = true; -} - -void PipelineExecutingBlockInputStream::readPrefixImpl() -{ - createExecutor(); -} - -Block PipelineExecutingBlockInputStream::readImpl() -{ - if (!is_execution_started) - createExecutor(); - - Block block; - bool can_continue = true; - while (can_continue) - { - if (executor) - can_continue = executor->pull(block); - else - can_continue = async_executor->pull(block); - - if (block) - return block; - } - - totals = executor ? executor->getTotalsBlock() - : async_executor->getTotalsBlock(); - - extremes = executor ? executor->getExtremesBlock() - : async_executor->getExtremesBlock(); - - return {}; -} - -inline static void throwIfExecutionStarted(bool is_execution_started, const char * method) -{ - if (is_execution_started) - throw Exception(String("Cannot call ") + method + - " for PipelineExecutingBlockInputStream because execution was started", - ErrorCodes::LOGICAL_ERROR); -} - -void PipelineExecutingBlockInputStream::cancel(bool kill) -{ - IBlockInputStream::cancel(kill); - - if (is_execution_started) - { - executor ? executor->cancel() - : async_executor->cancel(); - } -} - -void PipelineExecutingBlockInputStream::setProgressCallback(const ProgressCallback & callback) -{ - throwIfExecutionStarted(is_execution_started, "setProgressCallback"); - pipeline->setProgressCallback(callback); -} - -void PipelineExecutingBlockInputStream::setProcessListElement(QueryStatus * elem) -{ - throwIfExecutionStarted(is_execution_started, "setProcessListElement"); - IBlockInputStream::setProcessListElement(elem); - pipeline->setProcessListElement(elem); -} - -void PipelineExecutingBlockInputStream::setLimits(const StreamLocalLimits & limits_) -{ - throwIfExecutionStarted(is_execution_started, "setLimits"); - - if (limits_.mode == LimitsMode::LIMITS_TOTAL) - throw Exception("Total limits are not supported by PipelineExecutingBlockInputStream", - ErrorCodes::LOGICAL_ERROR); - - /// Local limits may be checked by IBlockInputStream itself. - IBlockInputStream::setLimits(limits_); -} - -void PipelineExecutingBlockInputStream::setQuota(const std::shared_ptr &) -{ - throw Exception("Quota is not supported by PipelineExecutingBlockInputStream", - ErrorCodes::LOGICAL_ERROR); -} - -} diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.h b/src/Processors/Executors/PipelineExecutingBlockInputStream.h deleted file mode 100644 index 68497938ad4..00000000000 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -class QueryPipeline; -class PullingAsyncPipelineExecutor; -class PullingPipelineExecutor; - -/// Implement IBlockInputStream from QueryPipeline. -/// It's a temporary wrapper. -class PipelineExecutingBlockInputStream : public IBlockInputStream -{ -public: - explicit PipelineExecutingBlockInputStream(QueryPipeline pipeline_); - ~PipelineExecutingBlockInputStream() override; - - String getName() const override { return "PipelineExecuting"; } - Block getHeader() const override; - - void cancel(bool kill) override; - - /// Implement IBlockInputStream methods via QueryPipeline. - void setProgressCallback(const ProgressCallback & callback) final; - void setProcessListElement(QueryStatus * elem) final; - void setLimits(const StreamLocalLimits & limits_) final; - void setQuota(const std::shared_ptr & quota_) final; - -protected: - void readPrefixImpl() override; - Block readImpl() override; - -private: - std::unique_ptr pipeline; - /// One of executors is used. - std::unique_ptr executor; /// for single thread. - std::unique_ptr async_executor; /// for many threads. - bool is_execution_started = false; - - void createExecutor(); -}; - -} diff --git a/src/Processors/Formats/InputStreamFromInputFormat.h b/src/Processors/Formats/InputStreamFromInputFormat.h deleted file mode 100644 index 339f559ac9b..00000000000 --- a/src/Processors/Formats/InputStreamFromInputFormat.h +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -class InputStreamFromInputFormat : public IBlockInputStream -{ -public: - explicit InputStreamFromInputFormat(InputFormatPtr input_format_) - : input_format(std::move(input_format_)) - , port(input_format->getPort().getHeader(), input_format.get()) - { - connect(input_format->getPort(), port); - port.setNeeded(); - } - - String getName() const override { return input_format->getName(); } - Block getHeader() const override { return input_format->getPort().getHeader(); } - - void cancel(bool kill) override - { - input_format->cancel(); - IBlockInputStream::cancel(kill); - } - - const BlockMissingValues & getMissingValues() const override { return input_format->getMissingValues(); } - -protected: - - Block readImpl() override - { - while (true) - { - auto status = input_format->prepare(); - - switch (status) - { - case IProcessor::Status::Ready: - input_format->work(); - break; - - case IProcessor::Status::Finished: - return {}; - - case IProcessor::Status::PortFull: - return input_format->getPort().getHeader().cloneWithColumns(port.pull().detachColumns()); - - case IProcessor::Status::NeedData: - case IProcessor::Status::Async: - case IProcessor::Status::ExpandPipeline: - throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR); - } - } - } - -private: - InputFormatPtr input_format; - InputPort port; -}; - -} diff --git a/src/Processors/Pipe.cpp b/src/Processors/Pipe.cpp index ec288484ca3..acf46b95346 100644 --- a/src/Processors/Pipe.cpp +++ b/src/Processors/Pipe.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -9,6 +8,7 @@ #include #include #include +#include #include #include @@ -164,13 +164,7 @@ Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, Output Pipe::Pipe(ProcessorPtr source) { - if (auto * source_from_input_stream = typeid_cast(source.get())) - { - /// Special case for SourceFromInputStream. Will remove it later. - totals_port = source_from_input_stream->getTotalsPort(); - extremes_port = source_from_input_stream->getExtremesPort(); - } - else if (source->getOutputs().size() != 1) + if (source->getOutputs().size() != 1) checkSource(*source); if (collected_processors) diff --git a/src/Processors/QueryPipeline.cpp b/src/Processors/QueryPipeline.cpp index a76f99c9e00..98ac81f7217 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/Processors/QueryPipeline.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -121,7 +120,6 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor. std::vector limits; - std::vector sources; std::vector remote_sources; std::unordered_set visited; @@ -151,9 +149,6 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) limits.emplace_back(limit); } - if (auto * source = typeid_cast(processor)) - sources.emplace_back(source); - if (auto * source = typeid_cast(processor)) remote_sources.emplace_back(source); } @@ -186,16 +181,13 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) } } - if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty() || !remote_sources.empty())) + if (!rows_before_limit_at_least && (!limits.empty() || !remote_sources.empty())) { rows_before_limit_at_least = std::make_shared(); for (auto & limit : limits) limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); - for (auto & source : sources) - source->setRowsBeforeLimitCounter(rows_before_limit_at_least); - for (auto & source : remote_sources) source->setRowsBeforeLimitCounter(rows_before_limit_at_least); } diff --git a/src/Processors/QueryPipelineBuilder.cpp b/src/Processors/QueryPipelineBuilder.cpp index 8ed413166da..08a568b48f1 100644 --- a/src/Processors/QueryPipelineBuilder.cpp +++ b/src/Processors/QueryPipelineBuilder.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -468,7 +467,6 @@ void QueryPipelineBuilder::initRowsBeforeLimit() /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor. std::vector limits; - std::vector sources; std::vector remote_sources; std::unordered_set visited; @@ -498,9 +496,6 @@ void QueryPipelineBuilder::initRowsBeforeLimit() limits.emplace_back(limit); } - if (auto * source = typeid_cast(processor)) - sources.emplace_back(source); - if (auto * source = typeid_cast(processor)) remote_sources.emplace_back(source); } @@ -533,16 +528,13 @@ void QueryPipelineBuilder::initRowsBeforeLimit() } } - if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty() || !remote_sources.empty())) + if (!rows_before_limit_at_least && (!limits.empty() || !remote_sources.empty())) { rows_before_limit_at_least = std::make_shared(); for (auto & limit : limits) limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); - for (auto & source : sources) - source->setRowsBeforeLimitCounter(rows_before_limit_at_least); - for (auto & source : remote_sources) source->setRowsBeforeLimitCounter(rows_before_limit_at_least); } diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index b4ff1a1281c..d1b9c2cad63 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include diff --git a/src/Processors/Sources/SourceFromInputStream.cpp b/src/Processors/Sources/SourceFromInputStream.cpp deleted file mode 100644 index 57e449370a5..00000000000 --- a/src/Processors/Sources/SourceFromInputStream.cpp +++ /dev/null @@ -1,195 +0,0 @@ -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -SourceFromInputStream::SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_) - : ISourceWithProgress(stream_->getHeader()) - , force_add_aggregating_info(force_add_aggregating_info_) - , stream(std::move(stream_)) -{ - init(); -} - -void SourceFromInputStream::init() -{ - const auto & sample = getPort().getHeader(); - for (auto & type : sample.getDataTypes()) - if (typeid_cast(type.get())) - has_aggregate_functions = true; -} - -void SourceFromInputStream::addTotalsPort() -{ - if (totals_port) - throw Exception("Totals port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); - - outputs.emplace_back(outputs.front().getHeader(), this); - totals_port = &outputs.back(); -} - -void SourceFromInputStream::addExtremesPort() -{ - if (extremes_port) - throw Exception("Extremes port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); - - outputs.emplace_back(outputs.front().getHeader(), this); - extremes_port = &outputs.back(); -} - -IProcessor::Status SourceFromInputStream::prepare() -{ - auto status = ISource::prepare(); - - if (status == Status::Finished) - { - is_generating_finished = true; - - /// Read postfix and get totals if needed. - if (!is_stream_finished && !isCancelled()) - return Status::Ready; - - if (totals_port && !totals_port->isFinished()) - { - if (has_totals) - { - if (!totals_port->canPush()) - return Status::PortFull; - - totals_port->push(std::move(totals)); - has_totals = false; - } - - totals_port->finish(); - } - - if (extremes_port && !extremes_port->isFinished()) - { - if (has_extremes) - { - if (!extremes_port->canPush()) - return Status::PortFull; - - extremes_port->push(std::move(extremes)); - has_extremes = false; - } - - extremes_port->finish(); - } - } - - return status; -} - -void SourceFromInputStream::work() -{ - if (!is_generating_finished) - { - try - { - ISource::work(); - } - catch (...) - { - /// Won't read suffix in case of exception. - is_stream_finished = true; - throw; - } - - return; - } - - if (is_stream_finished) - return; - - if (rows_before_limit) - { - const auto & info = stream->getProfileInfo(); - if (info.hasAppliedLimit()) - rows_before_limit->add(info.getRowsBeforeLimit()); - } - - stream->readSuffix(); - - if (auto totals_block = stream->getTotals()) - { - totals.setColumns(totals_block.getColumns(), 1); - has_totals = true; - } - - is_stream_finished = true; -} - -Chunk SourceFromInputStream::generate() -{ - if (is_stream_finished) - return {}; - - if (!is_stream_started) - { - stream->readPrefix(); - is_stream_started = true; - } - - auto block = stream->read(); - if (!block && !isCancelled()) - { - if (rows_before_limit) - { - const auto & info = stream->getProfileInfo(); - if (info.hasAppliedLimit()) - rows_before_limit->add(info.getRowsBeforeLimit()); - } - - stream->readSuffix(); - - if (auto totals_block = stream->getTotals()) - { - if (totals_block.rows() > 0) /// Sometimes we can get empty totals. Skip it. - { - totals.setColumns(totals_block.getColumns(), totals_block.rows()); - has_totals = true; - } - } - - if (auto extremes_block = stream->getExtremes()) - { - if (extremes_block.rows() > 0) /// Sometimes we can get empty extremes. Skip it. - { - extremes.setColumns(extremes_block.getColumns(), extremes_block.rows()); - has_extremes = true; - } - } - - is_stream_finished = true; - return {}; - } - - if (isCancelled()) - return {}; - -#ifndef NDEBUG - assertBlocksHaveEqualStructure(getPort().getHeader(), block, "SourceFromInputStream"); -#endif - - UInt64 num_rows = block.rows(); - Chunk chunk(block.getColumns(), num_rows); - - if (force_add_aggregating_info || has_aggregate_functions) - { - auto info = std::make_shared(); - info->bucket_num = block.info.bucket_num; - info->is_overflows = block.info.is_overflows; - chunk.setChunkInfo(std::move(info)); - } - - return chunk; -} - -} diff --git a/src/Processors/Sources/SourceFromInputStream.h b/src/Processors/Sources/SourceFromInputStream.h deleted file mode 100644 index 9649385909c..00000000000 --- a/src/Processors/Sources/SourceFromInputStream.h +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ - -class IBlockInputStream; -using BlockInputStreamPtr = std::shared_ptr; - -/// Wrapper for IBlockInputStream which implements ISourceWithProgress. -class SourceFromInputStream : public ISourceWithProgress -{ -public: - /// If force_add_aggregating_info is enabled, AggregatedChunkInfo (with bucket number and is_overflows flag) will be added to result chunk. - explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false); - String getName() const override { return "SourceFromInputStream"; } - - Status prepare() override; - void work() override; - - Chunk generate() override; - - BlockInputStreamPtr & getStream() { return stream; } - - void addTotalsPort(); - void addExtremesPort(); - - OutputPort * getTotalsPort() const { return totals_port; } - OutputPort * getExtremesPort() const { return extremes_port; } - - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } - - /// Implementation for methods from ISourceWithProgress. - void setLimits(const StreamLocalLimits & limits_) final { stream->setLimits(limits_); } - void setLeafLimits(const SizeLimits &) final { } - void setQuota(const std::shared_ptr & quota_) final { stream->setQuota(quota_); } - void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); } - void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); } - void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); } - - /// Stop reading from stream if output port is finished. - void onUpdatePorts() override - { - if (getPort().isFinished()) - cancel(); - } - -protected: - void onCancel() override { stream->cancel(false); } - -private: - bool has_aggregate_functions = false; - bool force_add_aggregating_info = false; - BlockInputStreamPtr stream; - - RowsBeforeLimitCounterPtr rows_before_limit; - - Chunk totals; - OutputPort * totals_port = nullptr; - bool has_totals = false; - - Chunk extremes; - OutputPort * extremes_port = nullptr; - bool has_extremes = false; - - bool is_generating_finished = false; - bool is_stream_finished = false; - bool is_stream_started = false; - - void init(); -}; - -} diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 1639bc4df4b..2a515fdf3be 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -24,9 +24,6 @@ public: Int32 bucket_num = -1; }; -class IBlockInputStream; -using BlockInputStreamPtr = std::shared_ptr; - using AggregatorList = std::list; using AggregatorListPtr = std::shared_ptr; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 19385e526a7..12558054d81 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -23,6 +22,9 @@ #include #include #include +#include +#include +#include #include #include @@ -124,12 +126,13 @@ public: auto compression = chooseCompressionMethod(path, compression_method); read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(uri, path, getContext()->getGlobalContext()->getConfigRef()), compression); auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size); + pipeline = QueryPipeline(std::move(input_format)); - reader = std::make_shared(input_format); - reader->readPrefix(); + reader = std::make_unique(pipeline); } - if (auto res = reader->read()) + Block res; + if (reader->pull(res)) { Columns columns = res.getColumns(); UInt64 num_rows = res.rows(); @@ -153,15 +156,16 @@ public: return Chunk(std::move(columns), num_rows); } - reader->readSuffix(); reader.reset(); + pipeline.reset(); read_buf.reset(); } } private: std::unique_ptr read_buf; - BlockInputStreamPtr reader; + QueryPipeline pipeline; + std::unique_ptr reader; SourcesInfoPtr source_info; String uri; String format; diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index 68fc17a97e5..30c5ef3e6cd 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -1,9 +1,7 @@ #include -#include #include #include -#include #include #include #include diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 903ea81946d..690c9cbd4d0 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 357659b3bbb..e17a3dcf544 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -19,7 +19,6 @@ #include "Processors/Merges/GraphiteRollupSortedTransform.h" #include "Processors/Merges/AggregatingSortedTransform.h" #include "Processors/Merges/VersionedCollapsingTransform.h" -#include "Processors/Executors/PipelineExecutingBlockInputStream.h" #include "DataStreams/TTLBlockInputStream.h" #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 683689a6fa3..f4adee8c259 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -56,6 +55,7 @@ #include #include #include +#include #include #include @@ -3481,9 +3481,12 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc buf, metadata_snapshot->getPartitionKey().sample_block, local_context->getSettingsRef().max_block_size); - auto input_stream = std::make_shared(input_format); + QueryPipeline pipeline(std::move(input_format)); + PullingPipelineExecutor executor(pipeline); + + Block block; + executor.pull(block); - auto block = input_stream->read(); if (!block || !block.rows()) throw Exception( "Could not parse partition value: `" + partition_ast.fields_str + "`", diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 903f4cd27fc..1ee8423b0d1 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index d939312c0bb..50ab26ec470 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -13,8 +13,6 @@ #include #include #include -#include -#include #include diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index b8941fc9d84..6ad29d01ca6 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -4,9 +4,9 @@ #include #include #include +#include #include #include -#include #include #include #include diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index fdc30919ee7..9e138e9882a 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index 5a99be7f38c..ff47866d587 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -7,7 +7,6 @@ #include #include #include -#include #include diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 0cd758cf49d..4e9e2c450b1 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 34edd06d3e2..b954ad3ab23 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 0944a8f12d5..2b4f5e4a276 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 1ab168f772f..3a37e57e008 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -17,8 +17,8 @@ #include #include -#include #include +#include #include #include diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 9747ea2dbf6..b774622d569 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index c8bc215dd6c..e305d4c6183 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b9c15e19c33..df92b270542 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -59,7 +59,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index ec0bd5e5840..9aa5689aa66 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -34,8 +34,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index d707d7a6cdf..1138794adb0 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -6,7 +6,6 @@ #include #include #include -#include namespace DB diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index d2493ff7c43..5a9e8fc2461 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index ea42b48cace..6492c9e07c0 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -383,7 +383,7 @@ Pipe StorageMerge::createSources( { pipe = QueryPipelineBuilder::getPipe(InterpreterSelectQuery( modified_query_info.query, modified_context, - std::make_shared(header), + Pipe(std::make_shared(header)), SelectQueryOptions(processed_stage).analyze()).buildQueryPipeline()); pipe.addInterpreterContext(modified_context); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 5521b9de39c..eeb5b107b54 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 001684c076d..4264be9dbc2 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index caefa097c3a..709c9dc4a63 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -52,7 +53,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index e4682efeaad..e6d41a53bfc 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -25,9 +25,7 @@ #include #include #include -#include #include -#include #include "Processors/Sources/SourceWithProgress.h" #include #include diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 32660cb1b1f..d7eef35e60b 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 75ad2761362..79f1d568057 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 160fc3c2468..7bd8ad2e740 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include "registerTableFunctions.h" From 92413aed68a0bbe01b712354949c4bf88f30b698 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 12 Oct 2021 01:01:00 +0300 Subject: [PATCH 196/438] better interfaces for IDataType and ISerialization --- src/Compression/CompressionFactory.h | 8 +- .../CompressionFactoryAdditions.cpp | 13 +- src/Core/Block.cpp | 2 +- src/DataTypes/DataTypeAggregateFunction.cpp | 2 - src/DataTypes/DataTypeArray.cpp | 71 ----------- src/DataTypes/DataTypeArray.h | 11 -- src/DataTypes/DataTypeDate.cpp | 7 -- src/DataTypes/DataTypeDateTime.cpp | 18 +-- src/DataTypes/DataTypeDateTime.h | 24 +--- src/DataTypes/DataTypeDateTime64.cpp | 14 +-- src/DataTypes/DataTypeDecimalBase.cpp | 10 -- src/DataTypes/DataTypeEnum.cpp | 1 - src/DataTypes/DataTypeFixedString.cpp | 10 -- src/DataTypes/DataTypeMap.cpp | 30 ----- src/DataTypes/DataTypeMap.h | 5 - src/DataTypes/DataTypeNested.cpp | 1 - src/DataTypes/DataTypeNothing.cpp | 3 - src/DataTypes/DataTypeNullable.cpp | 34 ----- src/DataTypes/DataTypeNullable.h | 5 - src/DataTypes/DataTypeNumberBase.cpp | 7 -- src/DataTypes/DataTypeString.cpp | 17 --- src/DataTypes/DataTypeString.h | 2 - src/DataTypes/DataTypeTuple.cpp | 82 +----------- src/DataTypes/DataTypeTuple.h | 12 +- src/DataTypes/DataTypesDecimal.cpp | 3 - src/DataTypes/IDataType.cpp | 112 ++++++++--------- src/DataTypes/IDataType.h | 35 +++--- .../Serializations/ISerialization.cpp | 119 ++++++++++++------ src/DataTypes/Serializations/ISerialization.h | 56 +++++++-- .../SerializationAggregateFunction.cpp | 1 - .../Serializations/SerializationArray.cpp | 54 ++++++-- .../Serializations/SerializationArray.h | 18 ++- .../Serializations/SerializationDate32.cpp | 1 + .../Serializations/SerializationDateTime.cpp | 5 +- .../Serializations/SerializationDateTime.h | 9 +- .../SerializationDateTime64.cpp | 4 +- .../Serializations/SerializationDateTime64.h | 9 +- .../SerializationLowCardinality.cpp | 20 ++- .../SerializationLowCardinality.h | 6 +- .../Serializations/SerializationMap.cpp | 13 +- .../Serializations/SerializationMap.h | 6 +- ...upleElement.cpp => SerializationNamed.cpp} | 23 ++-- ...ionTupleElement.h => SerializationNamed.h} | 26 +++- .../Serializations/SerializationNullable.cpp | 50 +++++++- .../Serializations/SerializationNullable.h | 18 ++- .../Serializations/SerializationTuple.cpp | 19 ++- .../Serializations/SerializationTuple.h | 10 +- .../Serializations/SerializationWrapper.cpp | 8 +- .../Serializations/SerializationWrapper.h | 6 +- src/DataTypes/TimezoneMixin.h | 32 +++++ src/Interpreters/InterpreterDescribeQuery.cpp | 6 +- .../MergeTree/IMergedBlockOutputStream.cpp | 3 +- .../MergeTree/MergeTreeDataPartWide.cpp | 2 +- .../MergeTreeDataPartWriterCompact.cpp | 9 +- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 10 +- .../MergeTree/MergeTreeReaderCompact.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 5 +- src/Storages/MergeTree/checkDataPart.cpp | 5 +- src/Storages/StorageLog.cpp | 12 +- src/Storages/StorageTinyLog.cpp | 6 +- 60 files changed, 526 insertions(+), 586 deletions(-) rename src/DataTypes/Serializations/{SerializationTupleElement.cpp => SerializationNamed.cpp} (69%) rename src/DataTypes/Serializations/{SerializationTupleElement.h => SerializationNamed.h} (59%) create mode 100644 src/DataTypes/TimezoneMixin.h diff --git a/src/Compression/CompressionFactory.h b/src/Compression/CompressionFactory.h index f00e5071990..2101dc28c65 100644 --- a/src/Compression/CompressionFactory.h +++ b/src/Compression/CompressionFactory.h @@ -40,13 +40,7 @@ public: CompressionCodecPtr getDefaultCodec() const; /// Validate codecs AST specified by user and parses codecs description (substitute default parameters) - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const IDataType * column_type, bool sanity_check, bool allow_experimental_codecs) const; - - /// Just wrapper for previous method. - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const - { - return validateCodecAndGetPreprocessedAST(ast, column_type.get(), sanity_check, allow_experimental_codecs); - } + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const; /// Validate codecs AST specified by user void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs) const; diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index 9e0353e6711..b5f00c60827 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -53,7 +53,7 @@ void CompressionCodecFactory::validateCodec( ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( - const ASTPtr & ast, const IDataType * column_type, bool sanity_check, bool allow_experimental_codecs) const + const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const { if (const auto * func = ast->as()) { @@ -100,12 +100,13 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( if (column_type) { CompressionCodecPtr prev_codec; - IDataType::StreamCallbackWithType callback = [&]( - const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type) + ISerialization::StreamCallback callback = [&](const auto & substream_path) { + assert(!substream_path.empty()); if (ISerialization::isSpecialCompressionAllowed(substream_path)) { - result_codec = getImpl(codec_family_name, codec_arguments, &substream_type); + const auto & last_type = substream_path.back().data.type; + result_codec = getImpl(codec_family_name, codec_arguments, last_type.get()); /// Case for column Tuple, which compressed with codec which depends on data type, like Delta. /// We cannot substitute parameters for such codecs. @@ -115,8 +116,8 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( } }; - ISerialization::SubstreamPath stream_path; - column_type->enumerateStreams(column_type->getDefaultSerialization(), callback, stream_path); + ISerialization::SubstreamPath path; + column_type->getDefaultSerialization()->enumerateStreams(path, callback, column_type, nullptr); if (!result_codec) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName()); diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index a59ac60155e..5f9cb19374b 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -702,7 +702,7 @@ ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column current_column = current_column->decompress(); if (column.isSubcolumn()) - return column.getTypeInStorage()->getSubcolumn(column.getSubcolumnName(), *current_column); + return column.getTypeInStorage()->getSubcolumn(column.getSubcolumnName(), current_column); return current_column; } diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index f7ae3170119..5c4b94ad823 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -3,8 +3,6 @@ #include -#include -#include #include #include diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index bcf3a9c1f57..f78aebd2d99 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -1,17 +1,9 @@ #include -#include -#include -#include -#include - #include -#include #include #include #include -#include -#include #include @@ -53,69 +45,6 @@ bool DataTypeArray::equals(const IDataType & rhs) const return typeid(rhs) == typeid(*this) && nested->equals(*static_cast(rhs).nested); } -DataTypePtr DataTypeArray::tryGetSubcolumnType(const String & subcolumn_name) const -{ - return tryGetSubcolumnTypeImpl(subcolumn_name, 0); -} - -DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const -{ - if (subcolumn_name == "size" + std::to_string(level)) - return std::make_shared(); - - DataTypePtr subcolumn; - if (const auto * nested_array = typeid_cast(nested.get())) - subcolumn = nested_array->tryGetSubcolumnTypeImpl(subcolumn_name, level + 1); - else - subcolumn = nested->tryGetSubcolumnType(subcolumn_name); - - if (subcolumn && subcolumn_name != MAIN_SUBCOLUMN_NAME) - subcolumn = std::make_shared(std::move(subcolumn)); - - return subcolumn; -} - -ColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, const IColumn & column) const -{ - return getSubcolumnImpl(subcolumn_name, column, 0); -} - -ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const -{ - const auto & column_array = assert_cast(column); - if (subcolumn_name == "size" + std::to_string(level)) - return arrayOffsetsToSizes(column_array.getOffsetsColumn()); - - ColumnPtr subcolumn; - if (const auto * nested_array = typeid_cast(nested.get())) - subcolumn = nested_array->getSubcolumnImpl(subcolumn_name, column_array.getData(), level + 1); - else - subcolumn = nested->getSubcolumn(subcolumn_name, column_array.getData()); - - return ColumnArray::create(subcolumn, column_array.getOffsetsPtr()); -} - -SerializationPtr DataTypeArray::getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const -{ - return getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, 0); -} - -SerializationPtr DataTypeArray::getSubcolumnSerializationImpl( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const -{ - if (subcolumn_name == "size" + std::to_string(level)) - return std::make_shared(base_serialization_getter(DataTypeUInt64()), subcolumn_name, false); - - SerializationPtr subcolumn; - if (const auto * nested_array = typeid_cast(nested.get())) - subcolumn = nested_array->getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, level + 1); - else - subcolumn = nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter); - - return std::make_shared(subcolumn); -} - SerializationPtr DataTypeArray::doGetDefaultSerialization() const { return std::make_shared(nested->getDefaultSerialization()); diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index c720a15d798..564dbba8503 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -54,23 +54,12 @@ public: return nested->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion(); } - DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; - ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; - SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; - SerializationPtr doGetDefaultSerialization() const override; const DataTypePtr & getNestedType() const { return nested; } /// 1 for plain array, 2 for array of arrays and so on. size_t getNumberOfDimensions() const; - -private: - ColumnPtr getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const; - DataTypePtr tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const; - SerializationPtr getSubcolumnSerializationImpl( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const; }; } diff --git a/src/DataTypes/DataTypeDate.cpp b/src/DataTypes/DataTypeDate.cpp index 0df2e329702..ee4b0065e59 100644 --- a/src/DataTypes/DataTypeDate.cpp +++ b/src/DataTypes/DataTypeDate.cpp @@ -1,14 +1,7 @@ -#include -#include - -#include #include #include #include -#include - - namespace DB { diff --git a/src/DataTypes/DataTypeDateTime.cpp b/src/DataTypes/DataTypeDateTime.cpp index 4284c9ae4bd..c7722e1c1d9 100644 --- a/src/DataTypes/DataTypeDateTime.cpp +++ b/src/DataTypes/DataTypeDateTime.cpp @@ -1,28 +1,12 @@ #include #include -#include -#include -#include -#include -#include #include -#include #include -#include -#include -#include namespace DB { -TimezoneMixin::TimezoneMixin(const String & time_zone_name) - : has_explicit_time_zone(!time_zone_name.empty()), - time_zone(DateLUT::instance(time_zone_name)), - utc_time_zone(DateLUT::instance("UTC")) -{ -} - DataTypeDateTime::DataTypeDateTime(const String & time_zone_name) : TimezoneMixin(time_zone_name) { @@ -52,7 +36,7 @@ bool DataTypeDateTime::equals(const IDataType & rhs) const SerializationPtr DataTypeDateTime::doGetDefaultSerialization() const { - return std::make_shared(time_zone, utc_time_zone); + return std::make_shared(*this); } } diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 926d529a5d8..57052144216 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -2,33 +2,11 @@ #include #include - -class DateLUTImpl; +#include namespace DB { -/** Mixin-class that manages timezone info for timezone-aware DateTime implementations - * - * Must be used as a (second) base for class implementing IDateType-interface. - */ -class TimezoneMixin -{ -public: - explicit TimezoneMixin(const String & time_zone_name = ""); - TimezoneMixin(const TimezoneMixin &) = default; - - const DateLUTImpl & getTimeZone() const { return time_zone; } - bool hasExplicitTimeZone() const { return has_explicit_time_zone; } - -protected: - /// true if time zone name was provided in data type parameters, false if it's using default time zone. - bool has_explicit_time_zone; - - const DateLUTImpl & time_zone; - const DateLUTImpl & utc_time_zone; -}; - /** DateTime stores time as unix timestamp. * The value itself is independent of time zone. * diff --git a/src/DataTypes/DataTypeDateTime64.cpp b/src/DataTypes/DataTypeDateTime64.cpp index bde7bebf455..4fa1569f0e8 100644 --- a/src/DataTypes/DataTypeDateTime64.cpp +++ b/src/DataTypes/DataTypeDateTime64.cpp @@ -1,19 +1,7 @@ #include #include - -#include -#include -#include -#include -#include -#include #include -#include #include -#include -#include -#include - #include #include @@ -65,7 +53,7 @@ bool DataTypeDateTime64::equals(const IDataType & rhs) const SerializationPtr DataTypeDateTime64::doGetDefaultSerialization() const { - return std::make_shared(time_zone, utc_time_zone, scale); + return std::make_shared(scale, *this); } } diff --git a/src/DataTypes/DataTypeDecimalBase.cpp b/src/DataTypes/DataTypeDecimalBase.cpp index f4c28088c48..62218694924 100644 --- a/src/DataTypes/DataTypeDecimalBase.cpp +++ b/src/DataTypes/DataTypeDecimalBase.cpp @@ -1,15 +1,5 @@ #include - -#include -#include -#include -#include -#include -#include #include -#include -#include - #include namespace DB diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index c86dd9d0b33..b659d92e3f4 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index a40592ba023..48034a31707 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -1,22 +1,12 @@ #include -#include -#include #include #include #include -#include -#include -#include -#include - #include #include -#include -#include - namespace DB { diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 5acf498c9fc..41de17982aa 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -1,9 +1,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -11,14 +9,7 @@ #include #include #include -#include -#include -#include -#include -#include -#include #include -#include #include @@ -84,27 +75,6 @@ std::string DataTypeMap::doGetName() const return s.str(); } -static const IColumn & extractNestedColumn(const IColumn & column) -{ - return assert_cast(column).getNestedColumn(); -} - -DataTypePtr DataTypeMap::tryGetSubcolumnType(const String & subcolumn_name) const -{ - return nested->tryGetSubcolumnType(subcolumn_name); -} - -ColumnPtr DataTypeMap::getSubcolumn(const String & subcolumn_name, const IColumn & column) const -{ - return nested->getSubcolumn(subcolumn_name, extractNestedColumn(column)); -} - -SerializationPtr DataTypeMap::getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const -{ - return nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter); -} - MutableColumnPtr DataTypeMap::createColumn() const { return ColumnMap::create(nested->createColumn()); diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 09b8448885a..04377f85cfb 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -32,11 +32,6 @@ public: bool canBeInsideNullable() const override { return false; } - DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; - ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; - SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; - MutableColumnPtr createColumn() const override; Field getDefault() const override; diff --git a/src/DataTypes/DataTypeNested.cpp b/src/DataTypes/DataTypeNested.cpp index eba1bba5dfe..fe7cd515c81 100644 --- a/src/DataTypes/DataTypeNested.cpp +++ b/src/DataTypes/DataTypeNested.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/src/DataTypes/DataTypeNothing.cpp b/src/DataTypes/DataTypeNothing.cpp index 388a65754b5..c2b552035a0 100644 --- a/src/DataTypes/DataTypeNothing.cpp +++ b/src/DataTypes/DataTypeNothing.cpp @@ -1,10 +1,7 @@ -#include #include #include #include #include -#include -#include namespace DB diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index 3820a320c6d..b354b1278be 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -1,17 +1,9 @@ #include #include -#include #include #include -#include #include #include -#include -#include -#include -#include -#include -#include #include #include #include @@ -63,32 +55,6 @@ bool DataTypeNullable::equals(const IDataType & rhs) const return rhs.isNullable() && nested_data_type->equals(*static_cast(rhs).nested_data_type); } -DataTypePtr DataTypeNullable::tryGetSubcolumnType(const String & subcolumn_name) const -{ - if (subcolumn_name == "null") - return std::make_shared(); - - return nested_data_type->tryGetSubcolumnType(subcolumn_name); -} - -ColumnPtr DataTypeNullable::getSubcolumn(const String & subcolumn_name, const IColumn & column) const -{ - const auto & column_nullable = assert_cast(column); - if (subcolumn_name == "null") - return column_nullable.getNullMapColumnPtr(); - - return nested_data_type->getSubcolumn(subcolumn_name, column_nullable.getNestedColumn()); -} - -SerializationPtr DataTypeNullable::getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const -{ - if (subcolumn_name == "null") - return std::make_shared(base_serialization_getter(DataTypeUInt8()), subcolumn_name, false); - - return nested_data_type->getSubcolumnSerialization(subcolumn_name, base_serialization_getter); -} - SerializationPtr DataTypeNullable::doGetDefaultSerialization() const { return std::make_shared(nested_data_type->getDefaultSerialization()); diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 1557179d072..1a54d0de611 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -41,11 +41,6 @@ public: bool onlyNull() const override; bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); } - DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; - ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; - SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; - const DataTypePtr & getNestedType() const { return nested_data_type; } private: SerializationPtr doGetDefaultSerialization() const override; diff --git a/src/DataTypes/DataTypeNumberBase.cpp b/src/DataTypes/DataTypeNumberBase.cpp index a73d591654a..f668a4c522e 100644 --- a/src/DataTypes/DataTypeNumberBase.cpp +++ b/src/DataTypes/DataTypeNumberBase.cpp @@ -1,13 +1,6 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include namespace DB diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 41ae578a70f..84610557d21 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -1,14 +1,6 @@ -#include - #include -#include - -#include -#include - #include -#include #include #include #include @@ -16,15 +8,6 @@ #include #include -#include -#include -#include - -#ifdef __SSE2__ - #include -#endif - - namespace DB { diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h index 0fc38e9c6f0..fd674505bc0 100644 --- a/src/DataTypes/DataTypeString.h +++ b/src/DataTypes/DataTypeString.h @@ -1,7 +1,5 @@ #pragma once -#include - #include diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index aa0a57c636e..6bca7f2bac2 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -3,20 +3,17 @@ #include #include #include -#include #include #include #include #include -#include +#include #include #include #include -#include #include #include #include -#include #include #include @@ -107,11 +104,6 @@ static inline IColumn & extractElementColumn(IColumn & column, size_t idx) return assert_cast(column).getColumn(idx); } -static inline const IColumn & extractElementColumn(const IColumn & column, size_t idx) -{ - return assert_cast(column).getColumn(idx); -} - template static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl) { @@ -234,74 +226,6 @@ size_t DataTypeTuple::getSizeOfValueInMemory() const return res; } -template -auto DataTypeTuple::getSubcolumnEntity(const String & subcolumn_name, - const OnSuccess & on_success, const OnContinue & on_continue) const -{ - using ReturnType = decltype(on_success(0)); - for (size_t i = 0; i < names.size(); ++i) - { - if (startsWith(subcolumn_name, names[i])) - { - size_t name_length = names[i].size(); - - if (subcolumn_name.size() == name_length) - return on_success(i); - - if (subcolumn_name[name_length] == '.') - return on_continue(i, subcolumn_name.substr(name_length + 1)); - } - } - - return ReturnType{}; -} - -DataTypePtr DataTypeTuple::tryGetSubcolumnType(const String & subcolumn_name) const -{ - if (subcolumn_name == MAIN_SUBCOLUMN_NAME) - return shared_from_this(); - - auto on_success = [&](size_t pos) { return elems[pos]; }; - auto on_continue = [&](size_t pos, const String & next_subcolumn) { return elems[pos]->tryGetSubcolumnType(next_subcolumn); }; - - return getSubcolumnEntity(subcolumn_name, on_success, on_continue); -} - -ColumnPtr DataTypeTuple::getSubcolumn(const String & subcolumn_name, const IColumn & column) const -{ - auto on_success = [&](size_t pos) { return extractElementColumn(column, pos).getPtr(); }; - auto on_continue = [&](size_t pos, const String & next_subcolumn) - { - return elems[pos]->getSubcolumn(next_subcolumn, extractElementColumn(column, pos)); - }; - - if (auto subcolumn = getSubcolumnEntity(subcolumn_name, on_success, on_continue)) - return subcolumn; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); -} - -SerializationPtr DataTypeTuple::getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const -{ - auto on_success = [&](size_t pos) - { - return std::make_shared(base_serialization_getter(*elems[pos]), names[pos]); - }; - - auto on_continue = [&](size_t pos, const String & next_subcolumn) - { - auto next_serialization = elems[pos]->getSubcolumnSerialization(next_subcolumn, base_serialization_getter); - return std::make_shared(next_serialization, names[pos]); - }; - - if (auto serialization = getSubcolumnEntity(subcolumn_name, on_success, on_continue)) - return serialization; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); -} - - SerializationPtr DataTypeTuple::doGetDefaultSerialization() const { SerializationTuple::ElementSerializations serializations(elems.size()); @@ -310,7 +234,7 @@ SerializationPtr DataTypeTuple::doGetDefaultSerialization() const { String elem_name = use_explicit_names ? names[i] : toString(i + 1); auto serialization = elems[i]->getDefaultSerialization(); - serializations[i] = std::make_shared(serialization, elem_name); + serializations[i] = std::make_shared(serialization, elem_name); } return std::make_shared(std::move(serializations), use_explicit_names); @@ -325,7 +249,7 @@ SerializationPtr DataTypeTuple::getSerialization(const String & column_name, con String elem_name = use_explicit_names ? names[i] : toString(i + 1); auto subcolumn_name = Nested::concatenateName(column_name, elem_name); auto serializaion = elems[i]->getSerialization(subcolumn_name, callback); - serializations[i] = std::make_shared(serializaion, elem_name); + serializations[i] = std::make_shared(serializaion, elem_name); } return std::make_shared(std::move(serializations), use_explicit_names); diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index e572b23f987..8dae8b7765b 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -52,16 +52,11 @@ public: size_t getMaximumSizeOfValueInMemory() const override; size_t getSizeOfValueInMemory() const override; - DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; - ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; - SerializationPtr getSerialization(const String & column_name, const StreamExistenceCallback & callback) const override; - SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; - SerializationPtr doGetDefaultSerialization() const override; + const DataTypePtr & getElement(size_t i) const { return elems[i]; } const DataTypes & getElements() const { return elems; } const Strings & getElementNames() const { return names; } @@ -69,11 +64,6 @@ public: bool haveExplicitNames() const { return have_explicit_names; } bool serializeNames() const { return serialize_names; } - -private: - template - auto getSubcolumnEntity(const String & subcolumn_name, - const OnSuccess & on_success, const OnContinue & on_continue) const; }; } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 9f7320197c8..f0fbd6cab26 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -1,16 +1,13 @@ #include #include -#include #include #include #include #include #include #include -#include #include -#include #include diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 93bb1757a4d..669876c792d 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include @@ -11,7 +10,6 @@ #include #include #include -#include namespace DB @@ -65,12 +63,40 @@ size_t IDataType::getSizeOfValueInMemory() const throw Exception("Value of type " + getName() + " in memory is not of fixed size.", ErrorCodes::LOGICAL_ERROR); } +void IDataType::forEachSubcolumn( + const SubcolumnCallback & callback, + const SerializationPtr & serialization, + const DataTypePtr & type, + const ColumnPtr & column) +{ + ISerialization::StreamCallback callback_with_data = [&](const auto & subpath) + { + for (size_t i = 0; i < subpath.size(); ++i) + { + if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, i + 1)) + { + auto name = ISerialization::getSubcolumnNameForStream(subpath, i + 1); + auto data = ISerialization::createFromPath(subpath, i); + callback(subpath, name, data); + } + subpath[i].visited = true; + } + }; + + ISerialization::SubstreamPath path; + serialization->enumerateStreams(path, callback_with_data, type, column); +} + DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const { - if (subcolumn_name == MAIN_SUBCOLUMN_NAME) - return shared_from_this(); + DataTypePtr res; + forEachSubcolumn([&](const auto &, const auto & name, const auto & data) + { + if (name == subcolumn_name) + res = data.type; + }, getDefaultSerialization(), getPtr(), nullptr); - return nullptr; + return res; } DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const @@ -82,42 +108,43 @@ DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); } -ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &) const +SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const { + SerializationPtr res; + forEachSubcolumn([&](const auto &, const auto & name, const auto & data) + { + if (name == subcolumn_name) + res = data.serialization; + }, serialization, nullptr, nullptr); + + if (res) + return res; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); } -void IDataType::forEachSubcolumn(const SubcolumnCallback & callback) const +ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const { - NameSet set; - getDefaultSerialization()->enumerateStreams([&, this](const ISerialization::SubstreamPath & substream_path) + ColumnPtr res; + forEachSubcolumn([&](const auto &, const auto & name, const auto & data) { - ISerialization::SubstreamPath new_path; - /// Iterate over path to try to get intermediate subcolumns for complex nested types. - for (const auto & elem : substream_path) - { - new_path.push_back(elem); - auto name = ISerialization::getSubcolumnNameForStream(new_path); - auto type = tryGetSubcolumnType(name); + if (name == subcolumn_name) + res = data.column; + }, getDefaultSerialization(), nullptr, column); - /// Subcolumn names may repeat among several substream paths. - if (!name.empty() && type && !set.count(name)) - { - callback(name, type, substream_path); - set.insert(name); - } - } - }); + if (res) + return res; + + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); } Names IDataType::getSubcolumnNames() const { Names res; - forEachSubcolumn([&](const auto & name, const auto &, const auto &) + forEachSubcolumn([&](const auto &, const auto & name, const auto &) { res.push_back(name); - }); - + }, getDefaultSerialization(), nullptr, nullptr); return res; } @@ -144,24 +171,14 @@ SerializationPtr IDataType::getDefaultSerialization() const return doGetDefaultSerialization(); } -SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const BaseSerializationGetter &) const -{ - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); -} - // static SerializationPtr IDataType::getSerialization(const NameAndTypePair & column, const IDataType::StreamExistenceCallback & callback) { if (column.isSubcolumn()) { - /// Wrap to custom serialization deepest subcolumn, which is represented in non-complex type. - auto base_serialization_getter = [&](const IDataType & subcolumn_type) - { - return subcolumn_type.getSerialization(column.name, callback); - }; - const auto & type_in_storage = column.getTypeInStorage(); - return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), base_serialization_getter); + auto default_serialization = type_in_storage->getDefaultSerialization(); + return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), default_serialization); } return column.type->getSerialization(column.name, callback); @@ -172,21 +189,4 @@ SerializationPtr IDataType::getSerialization(const String &, const StreamExisten return getDefaultSerialization(); } -DataTypePtr IDataType::getTypeForSubstream(const ISerialization::SubstreamPath & substream_path) const -{ - auto type = tryGetSubcolumnType(ISerialization::getSubcolumnNameForStream(substream_path)); - if (type) - return type->getSubcolumnType(MAIN_SUBCOLUMN_NAME); - - return getSubcolumnType(MAIN_SUBCOLUMN_NAME); -} - -void IDataType::enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback, ISerialization::SubstreamPath & path) const -{ - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) - { - callback(substream_path, *getTypeForSubstream(substream_path)); - }, path); -} - } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 360bf9f16e0..a53fdac797f 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -70,19 +70,31 @@ public: return doGetName(); } + DataTypePtr getPtr() const { return shared_from_this(); } + /// Name of data type family (example: FixedString, Array). virtual const char * getFamilyName() const = 0; /// Data type id. It's used for runtime type checks. virtual TypeIndex getTypeId() const = 0; - static constexpr auto MAIN_SUBCOLUMN_NAME = "__main"; - virtual DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const; + DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const; DataTypePtr getSubcolumnType(const String & subcolumn_name) const; - virtual ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const; - using SubcolumnCallback = std::function; - void forEachSubcolumn(const SubcolumnCallback & callback) const; + SerializationPtr getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const; + ColumnPtr getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const; + + using SubcolumnCallback = std::function; + + static void forEachSubcolumn( + const SubcolumnCallback & callback, + const SerializationPtr & serialization, + const DataTypePtr & type, + const ColumnPtr & column); + Names getSubcolumnNames() const; /// Returns default serialization of data type. @@ -93,7 +105,6 @@ public: /// one of serialization types, that serialization will be chosen for reading. /// If callback always returned false, the default serialization will be chosen. using StreamExistenceCallback = std::function; - using BaseSerializationGetter = std::function; /// Chooses serialization for reading of one column or subcolumns by /// checking existence of substreams using callback. @@ -103,22 +114,10 @@ public: virtual SerializationPtr getSerialization(const String & column_name, const StreamExistenceCallback & callback) const; - /// Returns serialization wrapper for reading one particular subcolumn of data type. - virtual SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const; - - using StreamCallbackWithType = std::function; - - void enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback, ISerialization::SubstreamPath & path) const; - void enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback, ISerialization::SubstreamPath && path) const { enumerateStreams(serialization, callback, path); } - void enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback) const { enumerateStreams(serialization, callback, {}); } - protected: virtual String doGetName() const { return getFamilyName(); } virtual SerializationPtr doGetDefaultSerialization() const = 0; - DataTypePtr getTypeForSubstream(const ISerialization::SubstreamPath & substream_path) const; - public: /** Create empty column for corresponding type. */ diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 7077c5bfa14..a6d9185429c 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -17,30 +18,11 @@ namespace ErrorCodes String ISerialization::Substream::toString() const { - switch (type) - { - case ArrayElements: - return "ArrayElements"; - case ArraySizes: - return "ArraySizes"; - case NullableElements: - return "NullableElements"; - case NullMap: - return "NullMap"; - case TupleElement: - return "TupleElement(" + tuple_element_name + ", " - + std::to_string(escape_tuple_delimiter) + ")"; - case DictionaryKeys: - return "DictionaryKeys"; - case DictionaryIndexes: - return "DictionaryIndexes"; - case SparseElements: - return "SparseElements"; - case SparseOffsets: - return "SparseOffsets"; - } + if (type == TupleElement) + return fmt::format("TupleElement({}, escape_tuple_delimiter={})", + tuple_element_name, escape_tuple_delimiter ? "true" : "false"); - __builtin_unreachable(); + return String(magic_enum::enum_name(type)); } String ISerialization::SubstreamPath::toString() const @@ -57,9 +39,21 @@ String ISerialization::SubstreamPath::toString() const return wb.str(); } +void ISerialization::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const +{ + path.push_back(Substream::Regular); + path.back().data = {type, column, getPtr(), nullptr}; + callback(path); + path.pop_back(); +} + void ISerialization::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const { - callback(path); + enumerateStreams(path, callback, nullptr, nullptr); } void ISerialization::serializeBinaryBulk(const IColumn & column, WriteBuffer &, size_t, size_t) const @@ -104,40 +98,48 @@ void ISerialization::deserializeBinaryBulkWithMultipleStreams( } } +namespace +{ + +using SubstreamIterator = ISerialization::SubstreamPath::const_iterator; + static String getNameForSubstreamPath( String stream_name, - const ISerialization::SubstreamPath & path, + SubstreamIterator begin, + SubstreamIterator end, bool escape_tuple_delimiter) { using Substream = ISerialization::Substream; size_t array_level = 0; - for (const auto & elem : path) + for (auto it = begin; it != end; ++it) { - if (elem.type == Substream::NullMap) + if (it->type == Substream::NullMap) stream_name += ".null"; - else if (elem.type == Substream::ArraySizes) + else if (it->type == Substream::ArraySizes) stream_name += ".size" + toString(array_level); - else if (elem.type == Substream::ArrayElements) + else if (it->type == Substream::ArrayElements) ++array_level; - else if (elem.type == Substream::DictionaryKeys) + else if (it->type == Substream::DictionaryKeys) stream_name += ".dict"; - else if (elem.type == Substream::SparseOffsets) + else if (it->type == Substream::SparseOffsets) stream_name += ".sparse.idx"; - else if (elem.type == Substream::TupleElement) + else if (it->type == Substream::TupleElement) { /// For compatibility reasons, we use %2E (escaped dot) instead of dot. /// Because nested data may be represented not by Array of Tuple, /// but by separate Array columns with names in a form of a.b, /// and name is encoded as a whole. - stream_name += (escape_tuple_delimiter && elem.escape_tuple_delimiter ? - escapeForFileName(".") : ".") + escapeForFileName(elem.tuple_element_name); + stream_name += (escape_tuple_delimiter && it->escape_tuple_delimiter ? + escapeForFileName(".") : ".") + escapeForFileName(it->tuple_element_name); } } return stream_name; } +} + String ISerialization::getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path) { return getFileNameForStream(column.getNameInStorage(), path); @@ -152,12 +154,17 @@ String ISerialization::getFileNameForStream(const String & name_in_storage, cons else stream_name = escapeForFileName(name_in_storage); - return getNameForSubstreamPath(std::move(stream_name), path, true); + return getNameForSubstreamPath(std::move(stream_name), path.begin(), path.end(), true); } String ISerialization::getSubcolumnNameForStream(const SubstreamPath & path) { - auto subcolumn_name = getNameForSubstreamPath("", path, false); + return getSubcolumnNameForStream(path, path.size()); +} + +String ISerialization::getSubcolumnNameForStream(const SubstreamPath & path, size_t prefix_len) +{ + auto subcolumn_name = getNameForSubstreamPath("", path.begin(), path.begin() + prefix_len, false); if (!subcolumn_name.empty()) subcolumn_name = subcolumn_name.substr(1); // It starts with a dot. @@ -195,4 +202,44 @@ bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path) return true; } +size_t ISerialization::getArrayLevel(const SubstreamPath & path) +{ + size_t level = 0; + for (const auto & elem : path) + level += elem.type == Substream::ArrayElements; + return level; } + +bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t prefix_len) +{ + if (prefix_len == 0 || prefix_len > path.size()) + return false; + + size_t last_elem = prefix_len - 1; + return path[last_elem].type == Substream::NullMap + || path[last_elem].type == Substream::TupleElement + || path[last_elem].type == Substream::ArraySizes; +} + +ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len) +{ + assert(prefix_len < path.size()); + + SubstreamData res = path[prefix_len].data; + res.creator.reset(); + for (ssize_t i = static_cast(prefix_len) - 1; i >= 0; --i) + { + const auto & creator = path[i].data.creator; + if (creator) + { + res.type = res.type ? creator->create(res.type) : res.type; + res.serialization = res.serialization ? creator->create(res.serialization) : res.serialization; + res.column = res.column ? creator->create(res.column) : res.column; + } + } + + return res; +} + +} + diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index f1d82a2000a..7562cfcb9a0 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -2,35 +2,39 @@ #include #include +#include +#include #include #include namespace DB { -class IDataType; - class ReadBuffer; class WriteBuffer; class ProtobufReader; class ProtobufWriter; -class IColumn; -using ColumnPtr = COW::Ptr; -using MutableColumnPtr = COW::MutablePtr; +class IDataType; +using DataTypePtr = std::shared_ptr; + +class ISerialization; +using SerializationPtr = std::shared_ptr; class Field; struct FormatSettings; struct NameAndTypePair; -class ISerialization +class ISerialization : private boost::noncopyable, public std::enable_shared_from_this { public: ISerialization() = default; virtual ~ISerialization() = default; + SerializationPtr getPtr() const { return shared_from_this(); } + /** Binary serialization for range of values in column - for writing to disk/network, etc. * * Some data types are represented in multiple streams while being serialized. @@ -54,6 +58,24 @@ public: * Default implementations of ...WithMultipleStreams methods will call serializeBinaryBulk, deserializeBinaryBulk for single stream. */ + struct ISubcolumnCreator + { + virtual DataTypePtr create(const DataTypePtr & prev) const = 0; + virtual SerializationPtr create(const SerializationPtr & prev) const = 0; + virtual ColumnPtr create(const ColumnPtr & prev) const = 0; + virtual ~ISubcolumnCreator() = default; + }; + + using SubcolumnCreatorPtr = std::shared_ptr; + + struct SubstreamData + { + DataTypePtr type; + ColumnPtr column; + SerializationPtr serialization; + SubcolumnCreatorPtr creator; + }; + struct Substream { enum Type @@ -71,7 +93,10 @@ public: SparseElements, SparseOffsets, + + Regular, }; + Type type; /// Index of tuple element, starting at 1 or name. @@ -80,6 +105,12 @@ public: /// Do we need to escape a dot in filenames for tuple elements. bool escape_tuple_delimiter = true; + /// Data for current substream. + SubstreamData data; + + /// Flag, that may help to traverse substream paths. + mutable bool visited = false; + Substream(Type type_) : type(type_) {} String toString() const; @@ -96,7 +127,13 @@ public: using StreamCallback = std::function; - virtual void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const; + virtual void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const; + + void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const; void enumerateStreams(const StreamCallback & callback, SubstreamPath && path) const { enumerateStreams(callback, path); } void enumerateStreams(const StreamCallback & callback) const { enumerateStreams(callback, {}); } @@ -249,11 +286,16 @@ public: static String getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path); static String getFileNameForStream(const String & name_in_storage, const SubstreamPath & path); static String getSubcolumnNameForStream(const SubstreamPath & path); + static String getSubcolumnNameForStream(const SubstreamPath & path, size_t prefix_len); static void addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column); static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path); static bool isSpecialCompressionAllowed(const SubstreamPath & path); + static size_t getArrayLevel(const SubstreamPath & path); + + static bool hasSubcolumnForPath(const SubstreamPath & path, size_t prefix_len); + static SubstreamData createFromPath(const SubstreamPath & path, size_t prefix_len); }; using SerializationPtr = std::shared_ptr; diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 925ba0b9e74..2339f23853e 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -1,7 +1,6 @@ #include #include -#include #include diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index 70a72c51e78..4ccee54c294 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -1,5 +1,8 @@ #include #include +#include +#include +#include #include #include #include @@ -177,16 +180,53 @@ ColumnPtr arrayOffsetsToSizes(const IColumn & column) return column_sizes; } - -void SerializationArray::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +DataTypePtr SerializationArray::SubcolumnCreator::create(const DataTypePtr & prev) const { - path.push_back(Substream::ArraySizes); - callback(path); - path.back() = Substream::ArrayElements; - nested->enumerateStreams(callback, path); - path.pop_back(); + return std::make_shared(prev); } +SerializationPtr SerializationArray::SubcolumnCreator::create(const SerializationPtr & prev) const +{ + return std::make_shared(prev); +} + +ColumnPtr SerializationArray::SubcolumnCreator::create(const ColumnPtr & prev) const +{ + return ColumnArray::create(prev, offsets); +} + +void SerializationArray::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const +{ + const auto * type_array = type ? &assert_cast(*type) : nullptr; + const auto * column_array = column ? &assert_cast(*column) : nullptr; + auto offsets_column = column_array ? column_array->getOffsetsPtr() : nullptr; + + path.push_back(Substream::ArraySizes); + path.back().data = + { + type ? std::make_shared() : nullptr, + offsets_column ? arrayOffsetsToSizes(*offsets_column) : nullptr, + std::make_shared( + std::make_shared>(), + "size" + std::to_string(getArrayLevel(path)), false), + nullptr, + }; + + callback(path); + + path.back() = Substream::ArrayElements; + path.back().data = {type, column, getPtr(), std::make_shared(offsets_column)}; + + auto next_type = type_array ? type_array->getNestedType() : nullptr; + auto next_column = column_array ? column_array->getDataPtr() : nullptr; + + nested->enumerateStreams(path, callback, next_type, next_column); + path.pop_back(); +} void SerializationArray::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index 71037090a48..83045d4c033 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -35,7 +35,11 @@ public: * This is necessary, because when implementing nested structures, several arrays can have common sizes. */ - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, @@ -62,6 +66,18 @@ public: DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; + +private: + struct SubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr offsets; + + SubcolumnCreator(const ColumnPtr & offsets_) : offsets(offsets_) {} + + DataTypePtr create(const DataTypePtr & prev) const override; + SerializationPtr create(const SerializationPtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + }; }; ColumnPtr arrayOffsetsToSizes(const IColumn & column); diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp index e43edbac592..ff1bb9b2c30 100644 --- a/src/DataTypes/Serializations/SerializationDate32.cpp +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -8,6 +8,7 @@ namespace DB { + void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeDateText(ExtendedDayNum(assert_cast(column).getData()[row_num]), ostr); diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index b93c69203cb..0e3b60f3772 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -32,9 +32,8 @@ inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings & setti } -SerializationDateTime::SerializationDateTime( - const DateLUTImpl & time_zone_, const DateLUTImpl & utc_time_zone_) - : time_zone(time_zone_), utc_time_zone(utc_time_zone_) +SerializationDateTime::SerializationDateTime(const TimezoneMixin & time_zone_) + : TimezoneMixin(time_zone_) { } diff --git a/src/DataTypes/Serializations/SerializationDateTime.h b/src/DataTypes/Serializations/SerializationDateTime.h index 8cf57ddef89..75334592422 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.h +++ b/src/DataTypes/Serializations/SerializationDateTime.h @@ -1,20 +1,17 @@ #pragma once #include +#include class DateLUTImpl; namespace DB { -class SerializationDateTime final : public SerializationNumber +class SerializationDateTime final : public SerializationNumber, public TimezoneMixin { -private: - const DateLUTImpl & time_zone; - const DateLUTImpl & utc_time_zone; - public: - SerializationDateTime(const DateLUTImpl & time_zone_, const DateLUTImpl & utc_time_zone_); + SerializationDateTime(const TimezoneMixin & time_zone_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index 8d446d3b9ad..613422f21fb 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -17,9 +17,9 @@ namespace DB { SerializationDateTime64::SerializationDateTime64( - const DateLUTImpl & time_zone_, const DateLUTImpl & utc_time_zone_, UInt32 scale_) + UInt32 scale_, const TimezoneMixin & time_zone_) : SerializationDecimalBase(DecimalUtils::max_precision, scale_) - , time_zone(time_zone_), utc_time_zone(utc_time_zone_) + , TimezoneMixin(time_zone_) { } diff --git a/src/DataTypes/Serializations/SerializationDateTime64.h b/src/DataTypes/Serializations/SerializationDateTime64.h index c36649daef1..1679170b36f 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.h +++ b/src/DataTypes/Serializations/SerializationDateTime64.h @@ -1,20 +1,17 @@ #pragma once #include +#include class DateLUTImpl; namespace DB { -class SerializationDateTime64 final : public SerializationDecimalBase +class SerializationDateTime64 final : public SerializationDecimalBase, public TimezoneMixin { -private: - const DateLUTImpl & time_zone; - const DateLUTImpl & utc_time_zone; - public: - SerializationDateTime64(const DateLUTImpl & time_zone_, const DateLUTImpl & utc_time_zone_, UInt32 scale_); + SerializationDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index e9bb62f74c5..7a86d5413b2 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -40,11 +40,27 @@ SerializationLowCardinality::SerializationLowCardinality(const DataTypePtr & dic { } -void SerializationLowCardinality::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +void SerializationLowCardinality::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const { + const auto * column_lc = column ? &getColumnLowCardinality(*column) : nullptr; + + SubstreamData data; + data.type = type ? dictionary_type : nullptr; + data.column = column_lc ? column_lc->getDictionary().getNestedColumn() : nullptr; + data.serialization = dict_inner_serialization; + path.push_back(Substream::DictionaryKeys); - dict_inner_serialization->enumerateStreams(callback, path); + path.back().data = data; + + dict_inner_serialization->enumerateStreams(path, callback, data.type, data.column); + path.back() = Substream::DictionaryIndexes; + path.back().data = {type, column, getPtr(), nullptr}; + callback(path); path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index e9ca0349e38..f82b35a52d5 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -17,7 +17,11 @@ private: public: SerializationLowCardinality(const DataTypePtr & dictionary_type); - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index e8446781f10..2e436070e1c 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -250,10 +251,16 @@ void SerializationMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c deserializeText(column, rb, settings); } - -void SerializationMap::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +void SerializationMap::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const { - nested->enumerateStreams(callback, path); + auto next_type = type ? assert_cast(*type).getNestedType() : nullptr; + auto next_column = column ? assert_cast(*column).getNestedColumnPtr() : nullptr; + + nested->enumerateStreams(path, callback, next_type, next_column); } void SerializationMap::serializeBinaryBulkStatePrefix( diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index 6523d5388d0..b6a003139ec 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -31,7 +31,11 @@ public: void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationTupleElement.cpp b/src/DataTypes/Serializations/SerializationNamed.cpp similarity index 69% rename from src/DataTypes/Serializations/SerializationTupleElement.cpp rename to src/DataTypes/Serializations/SerializationNamed.cpp index 4b50810fcd6..4ef4d4527f8 100644 --- a/src/DataTypes/Serializations/SerializationTupleElement.cpp +++ b/src/DataTypes/Serializations/SerializationNamed.cpp @@ -1,18 +1,21 @@ -#include +#include namespace DB { -void SerializationTupleElement::enumerateStreams( +void SerializationNamed::enumerateStreams( + SubstreamPath & path, const StreamCallback & callback, - SubstreamPath & path) const + DataTypePtr type, + ColumnPtr column) const { addToPath(path); - nested_serialization->enumerateStreams(callback, path); + path.back().data = {type, column, getPtr(), std::make_shared(name, escape_delimiter)}; + nested_serialization->enumerateStreams(path, callback, type, column); path.pop_back(); } -void SerializationTupleElement::serializeBinaryBulkStatePrefix( +void SerializationNamed::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { @@ -21,7 +24,7 @@ void SerializationTupleElement::serializeBinaryBulkStatePrefix( settings.path.pop_back(); } -void SerializationTupleElement::serializeBinaryBulkStateSuffix( +void SerializationNamed::serializeBinaryBulkStateSuffix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { @@ -30,7 +33,7 @@ void SerializationTupleElement::serializeBinaryBulkStateSuffix( settings.path.pop_back(); } -void SerializationTupleElement::deserializeBinaryBulkStatePrefix( +void SerializationNamed::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const { @@ -39,7 +42,7 @@ void SerializationTupleElement::deserializeBinaryBulkStatePrefix( settings.path.pop_back(); } -void SerializationTupleElement::serializeBinaryBulkWithMultipleStreams( +void SerializationNamed::serializeBinaryBulkWithMultipleStreams( const IColumn & column, size_t offset, size_t limit, @@ -51,7 +54,7 @@ void SerializationTupleElement::serializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); } -void SerializationTupleElement::deserializeBinaryBulkWithMultipleStreams( +void SerializationNamed::deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, size_t limit, DeserializeBinaryBulkSettings & settings, @@ -63,7 +66,7 @@ void SerializationTupleElement::deserializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); } -void SerializationTupleElement::addToPath(SubstreamPath & path) const +void SerializationNamed::addToPath(SubstreamPath & path) const { path.push_back(Substream::TupleElement); path.back().tuple_element_name = name; diff --git a/src/DataTypes/Serializations/SerializationTupleElement.h b/src/DataTypes/Serializations/SerializationNamed.h similarity index 59% rename from src/DataTypes/Serializations/SerializationTupleElement.h rename to src/DataTypes/Serializations/SerializationNamed.h index b85014c9e64..20dd15a20ba 100644 --- a/src/DataTypes/Serializations/SerializationTupleElement.h +++ b/src/DataTypes/Serializations/SerializationNamed.h @@ -5,14 +5,14 @@ namespace DB { -class SerializationTupleElement final : public SerializationWrapper +class SerializationNamed final : public SerializationWrapper { private: String name; bool escape_delimiter; public: - SerializationTupleElement(const SerializationPtr & nested_, const String & name_, bool escape_delimiter_ = true) + SerializationNamed(const SerializationPtr & nested_, const String & name_, bool escape_delimiter_ = true) : SerializationWrapper(nested_) , name(name_), escape_delimiter(escape_delimiter_) { @@ -21,11 +21,13 @@ public: const String & getElementName() const { return name; } void enumerateStreams( + SubstreamPath & path, const StreamCallback & callback, - SubstreamPath & path) const override; + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( - SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const override; void serializeBinaryBulkStateSuffix( @@ -51,6 +53,22 @@ public: SubstreamsCache * cache) const override; private: + struct SubcolumnCreator : public ISubcolumnCreator + { + const String name; + const bool escape_delimiter; + + SubcolumnCreator(const String & name_, bool escape_delimiter_) + : name(name_), escape_delimiter(escape_delimiter_) {} + + DataTypePtr create(const DataTypePtr & prev) const override { return prev; } + ColumnPtr create(const ColumnPtr & prev) const override { return prev; } + SerializationPtr create(const SerializationPtr & prev) const override + { + return std::make_shared(prev, name, escape_delimiter); + } + }; + void addToPath(SubstreamPath & path) const; }; diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index b607d5871d6..560a4812123 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -1,5 +1,8 @@ #include #include +#include +#include +#include #include #include @@ -20,15 +23,50 @@ namespace ErrorCodes extern const int CANNOT_READ_ALL_DATA; } -void SerializationNullable::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +DataTypePtr SerializationNullable::SubcolumnCreator::create(const DataTypePtr & prev) const { - path.push_back(Substream::NullMap); - callback(path); - path.back() = Substream::NullableElements; - nested->enumerateStreams(callback, path); - path.pop_back(); + return std::make_shared(prev); } +SerializationPtr SerializationNullable::SubcolumnCreator::create(const SerializationPtr & prev) const +{ + return std::make_shared(prev); +} + +ColumnPtr SerializationNullable::SubcolumnCreator::create(const ColumnPtr & prev) const +{ + return ColumnNullable::create(prev, null_map); +} + +void SerializationNullable::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const +{ + const auto * type_nullable = type ? &assert_cast(*type) : nullptr; + const auto * column_nullable = column ? &assert_cast(*column) : nullptr; + + path.push_back(Substream::NullMap); + path.back().data = + { + type_nullable ? std::make_shared() : nullptr, + column_nullable ? column_nullable->getNullMapColumnPtr() : nullptr, + std::make_shared(std::make_shared>(), "null", false), + nullptr, + }; + + callback(path); + + path.back() = Substream::NullableElements; + path.back().data = {type, column, getPtr(), std::make_shared(path.back().data.column)}; + + auto next_type = type_nullable ? type_nullable->getNestedType() : nullptr; + auto next_column = column_nullable ? column_nullable->getNestedColumnPtr() : nullptr; + + nested->enumerateStreams(path, callback, next_type, next_column); + path.pop_back(); +} void SerializationNullable::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index b0b96c021d3..c39c4dd6573 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -13,7 +13,11 @@ private: public: SerializationNullable(const SerializationPtr & nested_) : nested(nested_) {} - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, @@ -80,6 +84,18 @@ public: static ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested); template static ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested); + +private: + struct SubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr null_map; + + SubcolumnCreator(const ColumnPtr & null_map_) : null_map(null_map_) {} + + DataTypePtr create(const DataTypePtr & prev) const override; + SerializationPtr create(const SerializationPtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + }; }; } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 1324c6b2b1a..33a90a4abf6 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -281,10 +282,22 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, }); } -void SerializationTuple::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +void SerializationTuple::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const { - for (const auto & elem : elems) - elem->enumerateStreams(callback, path); + const auto * type_tuple = type ? &assert_cast(*type) : nullptr; + const auto * column_tuple = column ? &assert_cast(*column) : nullptr; + + for (size_t i = 0; i < elems.size(); ++i) + { + auto next_type = type_tuple ? type_tuple->getElement(i) : nullptr; + auto next_column = column_tuple ? column_tuple->getColumnPtr(i) : nullptr; + + elems[i]->enumerateStreams(path, callback, next_type, next_column); + } } struct SerializeBinaryBulkStateTuple : public ISerialization::SerializeBinaryBulkState diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index 13668572fff..77f8de90c83 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { @@ -9,7 +9,7 @@ namespace DB class SerializationTuple final : public SimpleTextSerialization { public: - using ElementSerializationPtr = std::shared_ptr; + using ElementSerializationPtr = std::shared_ptr; using ElementSerializations = std::vector; SerializationTuple(const ElementSerializations & elems_, bool have_explicit_names_) @@ -31,7 +31,11 @@ public: /** Each sub-column in a tuple is serialized in separate stream. */ - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationWrapper.cpp b/src/DataTypes/Serializations/SerializationWrapper.cpp index f75c9a1dd8b..c0829ab1b26 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.cpp +++ b/src/DataTypes/Serializations/SerializationWrapper.cpp @@ -4,9 +4,13 @@ namespace DB { -void SerializationWrapper::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +void SerializationWrapper::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const { - nested_serialization->enumerateStreams(callback, path); + nested_serialization->enumerateStreams(path, callback, type, column); } void SerializationWrapper::serializeBinaryBulkStatePrefix( diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 399d3b198b3..c48278d53db 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -16,7 +16,11 @@ protected: public: SerializationWrapper(const SerializationPtr & nested_serialization_) : nested_serialization(nested_serialization_) {} - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/TimezoneMixin.h b/src/DataTypes/TimezoneMixin.h new file mode 100644 index 00000000000..e6e9f7a7989 --- /dev/null +++ b/src/DataTypes/TimezoneMixin.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include + +class DateLUTImpl; + +/** Mixin-class that manages timezone info for timezone-aware DateTime implementations + * + * Must be used as a (second) base for class implementing IDateType/ISerialization-interface. + */ +class TimezoneMixin +{ +public: + TimezoneMixin(const TimezoneMixin &) = default; + + explicit TimezoneMixin(const String & time_zone_name = "") + : has_explicit_time_zone(!time_zone_name.empty()) + , time_zone(DateLUT::instance(time_zone_name)) + , utc_time_zone(DateLUT::instance("UTC")) + { + } + + const DateLUTImpl & getTimeZone() const { return time_zone; } + bool hasExplicitTimeZone() const { return has_explicit_time_zone; } + +protected: + /// true if time zone name was provided in data type parameters, false if it's using default time zone. + bool has_explicit_time_zone; + + const DateLUTImpl & time_zone; + const DateLUTImpl & utc_time_zone; +}; diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 2ebae17cd6b..00a2e4bc8bb 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -128,10 +128,10 @@ BlockIO InterpreterDescribeQuery::execute() { for (const auto & column : columns) { - column.type->forEachSubcolumn([&](const auto & name, const auto & type, const auto & path) + IDataType::forEachSubcolumn([&](const auto & path, const auto & name, const auto & data) { res_columns[0]->insert(Nested::concatenateName(column.name, name)); - res_columns[1]->insert(type->getName()); + res_columns[1]->insert(data.type->getName()); /// It's not trivial to calculate default expression for subcolumn. /// So, leave it empty. @@ -150,7 +150,7 @@ BlockIO InterpreterDescribeQuery::execute() res_columns[6]->insertDefault(); res_columns[7]->insert(1u); - }); + }, column.type->getDefaultSerialization(), column.type, nullptr); } } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index e334cd486ef..48fd9e583bf 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -35,8 +35,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( [&](const ISerialization::SubstreamPath & substream_path) { ++stream_counts[ISerialization::getFileNameForStream(column, substream_path)]; - }, - {}); + }); } NameSet remove_files; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 11e080fda6c..2f25cf7d12a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -98,7 +98,7 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl( auto mrk_checksum = checksums.files.find(file_name + index_granularity_info.marks_file_extension); if (mrk_checksum != checksums.files.end()) size.marks += mrk_checksum->second.file_size; - }, {}); + }); return size; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 23a7b205a1b..5d17d6235e1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -39,19 +39,21 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc) { - IDataType::StreamCallbackWithType callback = [&] (const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type) + ISerialization::StreamCallback callback = [&](const auto & substream_path) { + assert(!substream_path.empty()); String stream_name = ISerialization::getFileNameForStream(column, substream_path); /// Shared offsets for Nested type. if (compressed_streams.count(stream_name)) return; + const auto & subtype = substream_path.back().data.type; CompressionCodecPtr compression_codec; /// If we can use special codec than just get it if (ISerialization::isSpecialCompressionAllowed(substream_path)) - compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, &substream_type, default_codec); + compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, subtype.get(), default_codec); else /// otherwise return only generic codecs and don't use info about data_type compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true); @@ -63,7 +65,8 @@ void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, compressed_streams.emplace(stream_name, stream); }; - column.type->enumerateStreams(serializations[column.name], callback); + ISerialization::SubstreamPath path; + serializations[column.name]->enumerateStreams(path, callback, column.type, nullptr); } namespace diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 8fccfbb1f90..224a197c3c8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -90,17 +90,20 @@ void MergeTreeDataPartWriterWide::addStreams( const NameAndTypePair & column, const ASTPtr & effective_codec_desc) { - IDataType::StreamCallbackWithType callback = [&] (const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type) + ISerialization::StreamCallback callback = [&](const auto & substream_path) { + assert(!substream_path.empty()); String stream_name = ISerialization::getFileNameForStream(column, substream_path); /// Shared offsets for Nested type. if (column_streams.count(stream_name)) return; + const auto & subtype = substream_path.back().data.type; CompressionCodecPtr compression_codec; + /// If we can use special codec then just get it if (ISerialization::isSpecialCompressionAllowed(substream_path)) - compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, &substream_type, default_codec); + compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, subtype.get(), default_codec); else /// otherwise return only generic codecs and don't use info about the` data_type compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true); @@ -113,7 +116,8 @@ void MergeTreeDataPartWriterWide::addStreams( settings.max_compress_block_size); }; - column.type->enumerateStreams(serializations[column.name], callback); + ISerialization::SubstreamPath path; + serializations[column.name]->enumerateStreams(path, callback, column.type, nullptr); } diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index c898874f737..9b879283c10 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -220,7 +220,7 @@ void MergeTreeReaderCompact::readData( serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr); - auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), *temp_column); + auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), temp_column); /// TODO: Avoid extra copying. if (column->empty()) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index b8941fc9d84..e7cfff86e5f 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -337,15 +337,14 @@ static NameToNameVector collectFilesForRenames( { /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; - for (const NameAndTypePair & column : source_part->getColumns()) + for (const auto & column : source_part->getColumns()) { auto serialization = source_part->getSerializationForColumn(column); serialization->enumerateStreams( [&](const ISerialization::SubstreamPath & substream_path) { ++stream_counts[ISerialization::getFileNameForStream(column, substream_path)]; - }, - {}); + }); } NameToNameVector rename_vector; diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 8a234833da7..b6888dfddc6 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -141,8 +141,7 @@ IMergeTreeDataPart::Checksums checkDataPart( { String projection_file_name = ISerialization::getFileNameForStream(projection_column, substream_path) + ".bin"; checksums_data.files[projection_file_name] = checksum_compressed_file(disk, projection_path + projection_file_name); - }, - {}); + }); } } @@ -219,7 +218,7 @@ IMergeTreeDataPart::Checksums checkDataPart( { String file_name = ISerialization::getFileNameForStream(column, substream_path) + ".bin"; checksums_data.files[file_name] = checksum_compressed_file(disk, path + file_name); - }, {}); + }); } } else diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 0fd94bac95a..236972e6f2b 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -407,7 +407,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c storage.files[stream_name].data_file_path, columns.getCodecOrDefault(name_and_type.name), storage.max_compress_block_size); - }, settings.path); + }); settings.getter = createStreamGetter(name_and_type, written_streams); @@ -428,7 +428,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c mark.offset = stream_it->second.plain_offset + stream_it->second.plain->count(); out_marks.emplace_back(file.column_index, mark); - }, settings.path); + }); serialization->serializeBinaryBulkWithMultipleStreams(column, 0, 0, settings, serialize_states[name]); @@ -442,7 +442,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c if (streams.end() == it) throw Exception("Logical error: stream was not created when writing data in LogBlockOutputStream", ErrorCodes::LOGICAL_ERROR); it->second.compressed.next(); - }, settings.path); + }); } @@ -625,13 +625,12 @@ const StorageLog::Marks & StorageLog::getMarksWithRealRowCount(const StorageMeta * If this is a data type with multiple stream, get the first stream, that we assume have real row count. * (Example: for Array data type, first stream is array sizes; and number of array sizes is the number of arrays). */ - ISerialization::SubstreamPath substream_root_path; auto serialization = column.type->getDefaultSerialization(); serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { if (filename.empty()) filename = ISerialization::getFileNameForStream(column, substream_path); - }, substream_root_path); + }); Files::const_iterator it = files.find(filename); if (files.end() == it) @@ -745,9 +744,8 @@ IStorage::ColumnSizeByName StorageLog::getColumnSizes() const size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; }; - ISerialization::SubstreamPath substream_path; auto serialization = column.type->getDefaultSerialization(); - serialization->enumerateStreams(stream_callback, substream_path); + serialization->enumerateStreams(stream_callback); } return column_sizes; diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index d1778342ec5..f899f8e1da6 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -448,9 +448,8 @@ void StorageTinyLog::addFiles(const NameAndTypePair & column) } }; - ISerialization::SubstreamPath substream_path; auto serialization = type->getDefaultSerialization(); - serialization->enumerateStreams(stream_callback, substream_path); + serialization->enumerateStreams(stream_callback); } @@ -544,9 +543,8 @@ IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; }; - ISerialization::SubstreamPath substream_path; auto serialization = column.type->getDefaultSerialization(); - serialization->enumerateStreams(stream_callback, substream_path); + serialization->enumerateStreams(stream_callback); } return column_sizes; From 386d47cb227b580a63635e1b94a9d8a765d5cb97 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 14 Oct 2021 10:49:12 +0300 Subject: [PATCH 197/438] Fix typo --- tests/queries/1_stateful/00167_read_bytes_from_fs.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql index 341730bd82d..435bac85bc4 100644 --- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -4,4 +4,4 @@ SYSTEM FLUSH LOGS; -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. -- Small additional reads still possible, so we compare with about 1.5Gb. -SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM datasets.hits_v1 SETTINGS max_threads=40' and databaser=currentDatabase() and type = 'QueryFinish'; +SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM datasets.hits_v1 SETTINGS max_threads=40' and current_database = currentDatabase() and type = 'QueryFinish'; From 111c0672be7b99156218ad7a330420fb960a2d38 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 12 Oct 2021 11:58:33 +0300 Subject: [PATCH 198/438] Start server under gdb in functional tests --- docker/test/stateless/run.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ce1d1b59a55..ec0af024b8b 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -45,6 +45,23 @@ else sudo clickhouse start fi +echo " +set follow-fork-mode child +handle all noprint +handle SIGSEGV stop print +handle SIGBUS stop print +handle SIGABRT stop print +continue +thread apply all backtrace +detach +quit +" > script.gdb + +# FIXME Hung check may work incorrectly because of attached gdb +# 1. False positives are possible +# 2. We cannot attach another gdb to get stacktraces if some queries hung +gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ From 01ac2fea7991955ad68c8c0a4304fb0649ea84f5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 14 Oct 2021 12:44:41 +0300 Subject: [PATCH 199/438] Update 00167_read_bytes_from_fs.sql --- tests/queries/1_stateful/00167_read_bytes_from_fs.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql index 435bac85bc4..ee3e6b94537 100644 --- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -4,4 +4,4 @@ SYSTEM FLUSH LOGS; -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. -- Small additional reads still possible, so we compare with about 1.5Gb. -SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM datasets.hits_v1 SETTINGS max_threads=40' and current_database = currentDatabase() and type = 'QueryFinish'; +SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40' and current_database = currentDatabase() and type = 'QueryFinish'; From 90ff7f05fd8e85184591d24be67c125165f4f18b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 14 Oct 2021 13:21:41 +0300 Subject: [PATCH 200/438] Start keeper asynchronously if has connection to other nodes --- programs/keeper/Keeper.cpp | 2 +- programs/server/Server.cpp | 5 +- src/Coordination/CoordinationSettings.h | 2 +- src/Coordination/KeeperDispatcher.cpp | 16 +++- src/Coordination/KeeperDispatcher.h | 7 +- src/Coordination/KeeperServer.cpp | 1 + src/Coordination/KeeperServer.h | 6 ++ src/Interpreters/Context.cpp | 23 ++++- src/Interpreters/Context.h | 7 +- src/Server/KeeperTCPHandler.cpp | 5 +- tests/integration/helpers/cluster.py | 3 + .../test_keeper_three_nodes_start/__init__.py | 1 + .../configs/enable_keeper1.xml | 32 +++++++ .../configs/enable_keeper2.xml | 32 +++++++ .../test_keeper_three_nodes_start/test.py | 32 +++++++ .../__init__.py | 1 + .../configs/enable_keeper1.xml | 32 +++++++ .../configs/enable_keeper2.xml | 32 +++++++ .../configs/enable_keeper3.xml | 32 +++++++ .../configs/keeper_conf.xml | 16 ++++ .../test_keeper_three_nodes_two_alive/test.py | 84 +++++++++++++++++++ 21 files changed, 357 insertions(+), 14 deletions(-) create mode 100644 tests/integration/test_keeper_three_nodes_start/__init__.py create mode 100644 tests/integration/test_keeper_three_nodes_start/configs/enable_keeper1.xml create mode 100644 tests/integration/test_keeper_three_nodes_start/configs/enable_keeper2.xml create mode 100644 tests/integration/test_keeper_three_nodes_start/test.py create mode 100644 tests/integration/test_keeper_three_nodes_two_alive/__init__.py create mode 100644 tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml create mode 100644 tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml create mode 100644 tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml create mode 100644 tests/integration/test_keeper_three_nodes_two_alive/configs/keeper_conf.xml create mode 100644 tests/integration/test_keeper_three_nodes_two_alive/test.py diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 28bbb95e01d..a3034150219 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -359,7 +359,7 @@ int Keeper::main(const std::vector & /*args*/) auto servers = std::make_shared>(); /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. - global_context->initializeKeeperDispatcher(); + global_context->initializeKeeperDispatcher(/* start_async = */false); for (const auto & listen_host : listen_hosts) { /// TCP Keeper diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 01033570926..c2c53150daf 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -986,8 +986,9 @@ if (ThreadFuzzer::instance().isEffective()) if (config().has("keeper_server")) { #if USE_NURAFT - /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. - global_context->initializeKeeperDispatcher(); + bool has_connection = has_zookeeper && global_context->tryCheckZooKeeperConnection(); + /// Initialize keeper RAFT. + global_context->initializeKeeperDispatcher(has_connection); for (const auto & listen_host : listen_hosts) { /// TCP Keeper diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 805dedab89c..00d443822e6 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -28,7 +28,7 @@ struct Settings; M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \ M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \ M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \ - M(Milliseconds, startup_timeout, 30000, "How many time we will until RAFT to start", 0) \ + M(Milliseconds, startup_timeout, 180000, "How many time we will until RAFT to start", 0) \ M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \ M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \ M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \ diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index b4dc367ff62..8d8f98e175e 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -241,7 +241,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ return true; } -void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper) +void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper, bool start_async) { LOG_DEBUG(log, "Initializing storage dispatcher"); int myid = config.getInt("keeper_server.server_id"); @@ -262,8 +262,16 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf server->startup(); LOG_DEBUG(log, "Server initialized, waiting for quorum"); - server->waitInit(); - LOG_DEBUG(log, "Quorum initialized"); + if (!start_async) + { + server->waitInit(); + LOG_DEBUG(log, "Quorum initialized"); + } + else + { + LOG_INFO(log, "Starting Keeper asynchronously, server will accept connections to Keeper when it will be ready"); + } + } catch (...) { @@ -363,7 +371,7 @@ void KeeperDispatcher::sessionCleanerTask() try { /// Only leader node must check dead sessions - if (isLeader()) + if (server->checkInit() && isLeader()) { auto dead_sessions = server->getDeadSessions(); diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index f49063f8dea..8f19b081e26 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -100,7 +100,12 @@ public: /// Initialization from config. /// standalone_keeper -- we are standalone keeper application (not inside clickhouse server) - void initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper); + void initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper, bool start_async); + + bool checkInit() const + { + return server && server->checkInit(); + } /// Shutdown internal keeper parts (server, state machine, log storage, etc) void shutdown(); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index b27170c8ba1..56d28058991 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -353,6 +353,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ void KeeperServer::waitInit() { std::unique_lock lock(initialized_mutex); + int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds(); if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); })) throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization"); diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index a7e96156dc1..8e10d053471 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -80,6 +80,12 @@ public: /// Wait server initialization (see callbackFunc) void waitInit(); + /// Return true if KeeperServer initialized + bool checkInit() const + { + return initialized_flag; + } + void shutdown(); int getServerID() const { return server_id; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index e4c6de8853b..4ffd69af35b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1749,6 +1749,20 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const return shared->zookeeper; } + +bool Context::tryCheckZooKeeperConnection() const +{ + try + { + getZooKeeper(); + return true; + } + catch (...) + { + return false; + } +} + UInt32 Context::getZooKeeperSessionUptime() const { std::lock_guard lock(shared->zookeeper_mutex); @@ -1776,19 +1790,24 @@ void Context::setSystemZooKeeperLogAfterInitializationIfNeeded() zk.second->setZooKeeperLog(shared->system_logs->zookeeper_log); } -void Context::initializeKeeperDispatcher() const +void Context::initializeKeeperDispatcher(bool start_async) const { #if USE_NURAFT std::lock_guard lock(shared->keeper_storage_dispatcher_mutex); + if (shared->keeper_storage_dispatcher) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); const auto & config = getConfigRef(); if (config.has("keeper_server")) { + bool is_standalone_app = getApplicationType() == ApplicationType::KEEPER; + if (start_async && !is_standalone_app) + LOG_INFO(shared->log, "Connected to ZooKeeper (or Keeper) before internal Keeper start, will wait for Keeper asynchronously"); + shared->keeper_storage_dispatcher = std::make_shared(); - shared->keeper_storage_dispatcher->initialize(config, getApplicationType() == ApplicationType::KEEPER); + shared->keeper_storage_dispatcher->initialize(config, is_standalone_app, start_async); } #endif } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index f6cded2b1d1..07c90049bef 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -666,12 +666,17 @@ public: /// Same as above but return a zookeeper connection from auxiliary_zookeepers configuration entry. std::shared_ptr getAuxiliaryZooKeeper(const String & name) const; + /// Try to connect to ZooKeeper using getZooKeeper. Useful for internal + /// Keeper start (check connection to some other node). Return true if + /// connected successfully (without exception). + bool tryCheckZooKeeperConnection() const; + UInt32 getZooKeeperSessionUptime() const; #if USE_NURAFT std::shared_ptr & getKeeperDispatcher() const; #endif - void initializeKeeperDispatcher() const; + void initializeKeeperDispatcher(bool start_async) const; void shutdownKeeperDispatcher() const; /// Set auxiliary zookeepers configuration at server starting or configuration reloading. diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 7ebbda9dfe6..88b7e68127e 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -286,7 +286,7 @@ void KeeperTCPHandler::runImpl() return; } - if (keeper_dispatcher->hasLeader()) + if (keeper_dispatcher->checkInit() && keeper_dispatcher->hasLeader()) { try { @@ -306,7 +306,8 @@ void KeeperTCPHandler::runImpl() } else { - LOG_WARNING(log, "Ignoring user request, because no alive leader exist"); + String reason = keeper_dispatcher->checkInit() ? "server is not initialized yet" : "no alive leader exists"; + LOG_WARNING(log, "Ignoring user request, because {}", reason); sendHandshake(false); return; } diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 51b7bfcbcb8..3854cadaba5 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2320,6 +2320,9 @@ class ClickHouseInstance: def replace_config(self, path_to_config, replacement): self.exec_in_container(["bash", "-c", "echo '{}' > {}".format(replacement, path_to_config)]) + def replace_in_config(self, path_to_config, replace, replacement): + self.exec_in_container(["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"]) + def create_dir(self, destroy_dir=True): """Create the instance directory and all the needed files there.""" diff --git a/tests/integration/test_keeper_three_nodes_start/__init__.py b/tests/integration/test_keeper_three_nodes_start/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_start/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper1.xml b/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper1.xml new file mode 100644 index 00000000000..bc62d817074 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper1.xml @@ -0,0 +1,32 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + non_existing_node + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper2.xml b/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper2.xml new file mode 100644 index 00000000000..a6c476fb449 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper2.xml @@ -0,0 +1,32 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + non_existing_node + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py new file mode 100644 index 00000000000..7828f21d0d7 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster +import random +import string +import os +import time +from multiprocessing.dummy import Pool +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml'], stay_alive=True) + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout) + _fake_zk_instance.start() + return _fake_zk_instance + +def test_smoke(): + try: + cluster.start() + + node1_zk = get_fake_zk("node1") + node1_zk.create("/test_alive", b"aaaa") + + finally: + cluster.shutdown() diff --git a/tests/integration/test_keeper_three_nodes_two_alive/__init__.py b/tests/integration/test_keeper_three_nodes_two_alive/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml new file mode 100644 index 00000000000..510424715c4 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml @@ -0,0 +1,32 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + node3 + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml new file mode 100644 index 00000000000..264601d8c98 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml @@ -0,0 +1,32 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + node3 + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml new file mode 100644 index 00000000000..7f9775939bb --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml @@ -0,0 +1,32 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + node3 + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/keeper_conf.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/keeper_conf.xml new file mode 100644 index 00000000000..384e984f210 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/keeper_conf.xml @@ -0,0 +1,16 @@ + + + + node1 + 9181 + + + node2 + 9181 + + + node3 + 9181 + + + diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py new file mode 100644 index 00000000000..2c13d3ef22b --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster +import random +import string +import os +import time +from multiprocessing.dummy import Pool +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/keeper_conf.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/keeper_conf.xml'], stay_alive=True) +node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/keeper_conf.xml'], stay_alive=True) + + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout) + _fake_zk_instance.start() + return _fake_zk_instance + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def start(node): + node.start_clickhouse() + + +def test_start_offline(started_cluster): + p = Pool(3) + try: + node1_zk = get_fake_zk("node1") + node1_zk.create("/test_alive", b"aaaa") + + node1.stop_clickhouse() + node2.stop_clickhouse() + node3.stop_clickhouse() + + time.sleep(3) + p.map(start, [node2, node3]) + + node2_zk = get_fake_zk("node2") + node2_zk.create("/test_dead", b"data") + finally: + p.map(start, [node1, node2, node3]) + + +def test_start_non_existing(started_cluster): + p = Pool(3) + try: + node1.stop_clickhouse() + node2.stop_clickhouse() + node3.stop_clickhouse() + + node1.replace_in_config('/etc/clickhouse-server/config.d/enable_keeper1.xml', 'node3', 'non_existing_node') + node2.replace_in_config('/etc/clickhouse-server/config.d/enable_keeper2.xml', 'node3', 'non_existing_node') + + time.sleep(3) + p.map(start, [node2, node1]) + + node2_zk = get_fake_zk("node2") + node2_zk.create("/test_non_exising", b"data") + finally: + node1.replace_in_config('/etc/clickhouse-server/config.d/enable_keeper1.xml', 'non_existing_node', 'node3') + node2.replace_in_config('/etc/clickhouse-server/config.d/enable_keeper2.xml', 'non_existing_node', 'node3') + p.map(start, [node1, node2, node3]) + +def test_restart_third_node(started_cluster): + node1_zk = get_fake_zk("node1") + node1_zk.create("/test_restart", b"aaaa") + + node3.restart_clickhouse() + + assert node3.contains_in_log("Connected to ZooKeeper (or Keeper) before internal Keeper start") From 664e80af5adf396cac63e4b06b3c7b081f598c3f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 14 Oct 2021 13:23:39 +0300 Subject: [PATCH 201/438] Update run.sh --- docker/test/stateless/run.sh | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ec0af024b8b..ce1d1b59a55 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -45,23 +45,6 @@ else sudo clickhouse start fi -echo " -set follow-fork-mode child -handle all noprint -handle SIGSEGV stop print -handle SIGBUS stop print -handle SIGABRT stop print -continue -thread apply all backtrace -detach -quit -" > script.gdb - -# FIXME Hung check may work incorrectly because of attached gdb -# 1. False positives are possible -# 2. We cannot attach another gdb to get stacktraces if some queries hung -gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & - if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ From ab28c6c855dfc57b40938f858a99c091896d166e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 14 Oct 2021 13:25:43 +0300 Subject: [PATCH 202/438] Remove BlockInputStream interfaces. --- programs/copier/Internals.cpp | 8 - programs/copier/Internals.h | 2 - programs/library-bridge/Handlers.cpp | 17 +- .../library-bridge/SharedLibraryHandler.h | 1 - programs/odbc-bridge/MainHandler.cpp | 1 - src/Client/IServerConnection.h | 1 - src/Core/ExternalTable.cpp | 1 - src/DataStreams/BlockStreamProfileInfo.cpp | 75 ---- src/DataStreams/BlockStreamProfileInfo.h | 9 - src/DataStreams/IBlockInputStream.cpp | 359 ------------------ src/DataStreams/IBlockInputStream.h | 271 ------------- src/DataStreams/IBlockOutputStream.h | 70 ---- src/DataStreams/IBlockStream_fwd.h | 17 - src/DataStreams/InternalTextLogs.h | 2 +- src/DataStreams/NativeReader.h | 2 +- src/DataStreams/TemporaryFileStream.cpp | 2 - src/DataStreams/TemporaryFileStream.h | 1 - src/DataStreams/copyData.cpp | 86 ----- src/DataStreams/copyData.h | 27 -- .../MySQL/MaterializedMySQLSyncThread.cpp | 1 - src/Dictionaries/DictionarySourceHelpers.cpp | 1 - src/Dictionaries/HTTPDictionarySource.cpp | 1 - src/Formats/FormatFactory.cpp | 1 - src/Formats/FormatFactory.h | 1 - src/Interpreters/Aggregator.h | 1 - src/Interpreters/Context.h | 1 - src/Interpreters/ExpressionAnalyzer.cpp | 1 - src/Interpreters/ExpressionAnalyzer.h | 1 - src/Interpreters/InterpreterExistsQuery.cpp | 1 - src/Interpreters/InterpreterInsertQuery.cpp | 1 - src/Interpreters/InterpreterInsertQuery.h | 1 - src/Interpreters/InterpreterSelectQuery.h | 8 - .../InterpreterShowCreateQuery.cpp | 1 - src/Interpreters/InterpreterWatchQuery.cpp | 1 - src/Interpreters/InterpreterWatchQuery.h | 2 - src/Interpreters/JoinSwitcher.h | 1 - src/Interpreters/ProcessList.cpp | 1 - src/Interpreters/SortedBlocksWriter.h | 1 - src/Interpreters/TableJoin.h | 1 - src/Interpreters/executeDDLQueryOnCluster.h | 1 - src/Interpreters/executeQuery.cpp | 2 - .../Formats/OutputStreamToOutputFormat.cpp | 43 --- .../Formats/OutputStreamToOutputFormat.h | 39 -- src/Processors/QueryPipelineBuilder.h | 1 - .../Transforms/CreatingSetsTransform.cpp | 1 - .../Transforms/CreatingSetsTransform.h | 1 - .../Transforms/buildPushingToViewsChain.h | 1 - src/Server/MySQLHandler.cpp | 1 - src/Server/TCPHandler.h | 1 - src/Storages/IStorage.h | 1 - src/Storages/Kafka/StorageKafka.cpp | 1 - src/Storages/LiveView/StorageLiveView.cpp | 1 - .../MergeTree/IMergeTreeDataPartWriter.h | 1 - src/Storages/MergeTree/MergeTreeData.cpp | 1 - .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 1 - .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 1 - .../PostgreSQLReplicationHandler.cpp | 1 - src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 1 - src/Storages/StorageBuffer.h | 1 - src/Storages/StorageExecutable.cpp | 1 - src/Storages/StorageFile.cpp | 3 - src/Storages/StorageLog.cpp | 2 - src/Storages/StorageMaterializedView.cpp | 1 - src/Storages/StorageMemory.h | 1 - src/Storages/StorageMongoDB.cpp | 1 - src/Storages/StorageMySQL.cpp | 1 - src/Storages/StoragePostgreSQL.h | 1 - src/Storages/StorageReplicatedMergeTree.cpp | 1 - src/Storages/StorageS3.cpp | 1 - src/Storages/StorageS3Cluster.cpp | 1 - src/Storages/StorageSet.cpp | 1 + src/Storages/StorageStripeLog.cpp | 1 - src/Storages/StorageURL.cpp | 1 - src/Storages/StorageXDBC.cpp | 1 - .../System/StorageSystemZooKeeper.cpp | 1 - src/Storages/tests/gtest_storage_log.cpp | 1 - src/TableFunctions/ITableFunctionFileLike.cpp | 1 - src/TableFunctions/TableFunctionExecutable.h | 1 - 79 files changed, 15 insertions(+), 1090 deletions(-) delete mode 100644 src/DataStreams/IBlockInputStream.cpp delete mode 100644 src/DataStreams/IBlockInputStream.h delete mode 100644 src/DataStreams/IBlockOutputStream.h delete mode 100644 src/DataStreams/IBlockStream_fwd.h delete mode 100644 src/DataStreams/copyData.cpp delete mode 100644 src/DataStreams/copyData.h delete mode 100644 src/Processors/Formats/OutputStreamToOutputFormat.cpp delete mode 100644 src/Processors/Formats/OutputStreamToOutputFormat.h diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index c5e702cd1dc..84283777c8f 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -57,14 +57,6 @@ std::shared_ptr createASTStorageDistributed( } -BlockInputStreamPtr squashStreamIntoOneBlock(const BlockInputStreamPtr & stream) -{ - return std::make_shared( - stream, - std::numeric_limits::max(), - std::numeric_limits::max()); -} - Block getBlockWithAllStreamData(QueryPipeline pipeline) { QueryPipelineBuilder builder; diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index 9e648060592..a9d8ca726fe 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -50,8 +50,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/programs/library-bridge/Handlers.cpp b/programs/library-bridge/Handlers.cpp index 2f6dca5ee65..abc5118baad 100644 --- a/programs/library-bridge/Handlers.cpp +++ b/programs/library-bridge/Handlers.cpp @@ -1,7 +1,6 @@ #include "Handlers.h" #include "SharedLibraryHandlerFactory.h" -#include #include #include #include @@ -10,11 +9,13 @@ #include #include #include -#include #include +#include #include #include +#include #include +#include #include #include @@ -189,8 +190,10 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe ReadBufferFromString read_block_buf(params.get("null_values")); auto format = getContext()->getInputFormat(FORMAT, read_block_buf, *sample_block, DEFAULT_BLOCK_SIZE); - auto reader = std::make_shared(format); - auto sample_block_with_nulls = reader->read(); + QueryPipeline pipeline(Pipe(std::move(format))); + PullingPipelineExecutor executor(pipeline); + Block sample_block_with_nulls; + executor.pull(sample_block_with_nulls); LOG_DEBUG(log, "Dictionary sample block with null values: {}", sample_block_with_nulls.dumpStructure()); @@ -281,8 +284,10 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe auto & read_buf = request.getStream(); auto format = getContext()->getInputFormat(FORMAT, read_buf, *requested_sample_block, DEFAULT_BLOCK_SIZE); - auto reader = std::make_shared(format); - auto block = reader->read(); + QueryPipeline pipeline(std::move(format)); + PullingPipelineExecutor executor(pipeline); + Block block; + executor.pull(block); auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id); if (!library_handler) diff --git a/programs/library-bridge/SharedLibraryHandler.h b/programs/library-bridge/SharedLibraryHandler.h index f9d2fe43cb2..de1d098dc8d 100644 --- a/programs/library-bridge/SharedLibraryHandler.h +++ b/programs/library-bridge/SharedLibraryHandler.h @@ -2,7 +2,6 @@ #include #include -#include #include "LibraryUtils.h" diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 51abe207095..6a2e967d179 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -4,7 +4,6 @@ #include "ODBCBlockInputStream.h" #include "ODBCBlockOutputStream.h" #include "getIdentifierQuote.h" -#include #include #include #include diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 5584cf72bbf..42886c72182 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -6,7 +6,6 @@ #include #include -#include #include #include diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 7619d60d84e..4dd8b0cf016 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/DataStreams/BlockStreamProfileInfo.cpp b/src/DataStreams/BlockStreamProfileInfo.cpp index 09ad8a8e4ac..9a06d905223 100644 --- a/src/DataStreams/BlockStreamProfileInfo.cpp +++ b/src/DataStreams/BlockStreamProfileInfo.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -47,16 +46,12 @@ void BlockStreamProfileInfo::setFrom(const BlockStreamProfileInfo & rhs, bool sk size_t BlockStreamProfileInfo::getRowsBeforeLimit() const { - if (!calculated_rows_before_limit) - calculateRowsBeforeLimit(); return rows_before_limit; } bool BlockStreamProfileInfo::hasAppliedLimit() const { - if (!calculated_rows_before_limit) - calculateRowsBeforeLimit(); return applied_limit; } @@ -73,74 +68,4 @@ void BlockStreamProfileInfo::update(size_t num_rows, size_t num_bytes) bytes += num_bytes; } - -void BlockStreamProfileInfo::collectInfosForStreamsWithName(const char * name, BlockStreamProfileInfos & res) const -{ - if (!parent) - return; - - if (parent->getName() == name) - { - res.push_back(this); - return; - } - - parent->forEachChild([&] (IBlockInputStream & child) - { - child.getProfileInfo().collectInfosForStreamsWithName(name, res); - return false; - }); -} - - -void BlockStreamProfileInfo::calculateRowsBeforeLimit() const -{ - calculated_rows_before_limit = true; - - /// is there a Limit? - BlockStreamProfileInfos limits; - collectInfosForStreamsWithName("Limit", limits); - - if (!limits.empty()) - { - applied_limit = true; - - /** Take the number of lines read below `PartialSorting`, if any, or below `Limit`. - * This is necessary, because sorting can return only part of the rows. - */ - BlockStreamProfileInfos partial_sortings; - collectInfosForStreamsWithName("PartialSorting", partial_sortings); - - BlockStreamProfileInfos & limits_or_sortings = partial_sortings.empty() ? limits : partial_sortings; - - for (const BlockStreamProfileInfo * info_limit_or_sort : limits_or_sortings) - { - info_limit_or_sort->parent->forEachChild([&] (IBlockInputStream & child) - { - rows_before_limit += child.getProfileInfo().rows; - return false; - }); - } - } - else - { - /// Then the data about `rows_before_limit` can be in `RemoteBlockInputStream` (come from a remote server). - BlockStreamProfileInfos remotes; - collectInfosForStreamsWithName("Remote", remotes); - collectInfosForStreamsWithName("TreeExecutor", remotes); - - if (remotes.empty()) - return; - - for (const auto & info : remotes) - { - if (info->applied_limit) - { - applied_limit = true; - rows_before_limit += info->rows_before_limit; - } - } - } -} - } diff --git a/src/DataStreams/BlockStreamProfileInfo.h b/src/DataStreams/BlockStreamProfileInfo.h index 688bdfc91fc..1707b941445 100644 --- a/src/DataStreams/BlockStreamProfileInfo.h +++ b/src/DataStreams/BlockStreamProfileInfo.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include @@ -16,9 +15,6 @@ class WriteBuffer; /// Information for profiling. See IBlockInputStream.h struct BlockStreamProfileInfo { - /// Info about stream object this profile info refers to. - IBlockInputStream * parent = nullptr; - bool started = false; Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time @@ -28,9 +24,6 @@ struct BlockStreamProfileInfo using BlockStreamProfileInfos = std::vector; - /// Collect BlockStreamProfileInfo for the nearest sources in the tree named `name`. Example; collect all info for PartialSorting streams. - void collectInfosForStreamsWithName(const char * name, BlockStreamProfileInfos & res) const; - /** Get the number of rows if there were no LIMIT. * If there is no LIMIT, 0 is returned. * If the query does not contain ORDER BY, the number can be underestimated - return the number of rows in blocks that were read before LIMIT reached. @@ -59,8 +52,6 @@ struct BlockStreamProfileInfo } private: - void calculateRowsBeforeLimit() const; - /// For these fields we make accessors, because they must be calculated beforehand. mutable bool applied_limit = false; /// Whether LIMIT was applied mutable size_t rows_before_limit = 0; diff --git a/src/DataStreams/IBlockInputStream.cpp b/src/DataStreams/IBlockInputStream.cpp deleted file mode 100644 index e57d6903673..00000000000 --- a/src/DataStreams/IBlockInputStream.cpp +++ /dev/null @@ -1,359 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include - -namespace ProfileEvents -{ - extern const Event ThrottlerSleepMicroseconds; - extern const Event SelectedRows; - extern const Event SelectedBytes; -} - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int QUERY_WAS_CANCELLED; - extern const int TOO_MANY_ROWS; - extern const int TOO_MANY_BYTES; - extern const int TOO_MANY_ROWS_OR_BYTES; - extern const int LOGICAL_ERROR; -} - - -/// It's safe to access children without mutex as long as these methods are called before first call to `read()` or `readPrefix()`. - - -Block IBlockInputStream::read() -{ - if (total_rows_approx) - { - progressImpl(Progress(0, 0, total_rows_approx)); - total_rows_approx = 0; - } - - if (!info.started) - { - info.total_stopwatch.start(); - info.started = true; - } - - Block res; - - if (isCancelledOrThrowIfKilled()) - return res; - - if (!checkTimeLimit()) - limit_exceeded_need_break = true; - - if (!limit_exceeded_need_break) - res = readImpl(); - - if (res) - { - info.update(res); - - if (enabled_extremes) - updateExtremes(res); - - if (limits.mode == LimitsMode::LIMITS_CURRENT && !limits.size_limits.check(info.rows, info.bytes, "result", ErrorCodes::TOO_MANY_ROWS_OR_BYTES)) - limit_exceeded_need_break = true; - - if (quota) - checkQuota(res); - } - else - { - /** If the stream is over, then we will ask all children to abort the execution. - * This makes sense when running a query with LIMIT - * - there is a situation when all the necessary data has already been read, - * but children sources are still working, - * herewith they can work in separate threads or even remotely. - */ - cancel(false); - } - - progress(Progress(res.rows(), res.bytes())); - -#ifndef NDEBUG - if (res) - { - Block header = getHeader(); - if (header) - assertBlocksHaveEqualStructure(res, header, getName()); - } -#endif - - return res; -} - - -void IBlockInputStream::readPrefix() -{ -#ifndef NDEBUG - if (!read_prefix_is_called) - read_prefix_is_called = true; - else - throw Exception("readPrefix is called twice for " + getName() + " stream", ErrorCodes::LOGICAL_ERROR); -#endif - - readPrefixImpl(); - - forEachChild([&] (IBlockInputStream & child) - { - child.readPrefix(); - return false; - }); -} - - -void IBlockInputStream::readSuffix() -{ -#ifndef NDEBUG - if (!read_suffix_is_called) - read_suffix_is_called = true; - else - throw Exception("readSuffix is called twice for " + getName() + " stream", ErrorCodes::LOGICAL_ERROR); -#endif - - forEachChild([&] (IBlockInputStream & child) - { - child.readSuffix(); - return false; - }); - - readSuffixImpl(); -} - - -void IBlockInputStream::updateExtremes(Block & block) -{ - size_t num_columns = block.columns(); - - if (!extremes) - { - MutableColumns extremes_columns(num_columns); - - for (size_t i = 0; i < num_columns; ++i) - { - const ColumnPtr & src = block.safeGetByPosition(i).column; - - if (isColumnConst(*src)) - { - /// Equal min and max. - extremes_columns[i] = src->cloneResized(2); - } - else - { - Field min_value; - Field max_value; - - src->getExtremes(min_value, max_value); - - extremes_columns[i] = src->cloneEmpty(); - - extremes_columns[i]->insert(min_value); - extremes_columns[i]->insert(max_value); - } - } - - extremes = block.cloneWithColumns(std::move(extremes_columns)); - } - else - { - for (size_t i = 0; i < num_columns; ++i) - { - ColumnPtr & old_extremes = extremes.safeGetByPosition(i).column; - - if (isColumnConst(*old_extremes)) - continue; - - Field min_value = (*old_extremes)[0]; - Field max_value = (*old_extremes)[1]; - - Field cur_min_value; - Field cur_max_value; - - block.safeGetByPosition(i).column->getExtremes(cur_min_value, cur_max_value); - - if (cur_min_value < min_value) - min_value = cur_min_value; - if (cur_max_value > max_value) - max_value = cur_max_value; - - MutableColumnPtr new_extremes = old_extremes->cloneEmpty(); - - new_extremes->insert(min_value); - new_extremes->insert(max_value); - - old_extremes = std::move(new_extremes); - } - } -} - - -bool IBlockInputStream::checkTimeLimit() const -{ - return limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode); -} - - -void IBlockInputStream::checkQuota(Block & block) -{ - switch (limits.mode) - { - case LimitsMode::LIMITS_TOTAL: - /// Checked in `progress` method. - break; - - case LimitsMode::LIMITS_CURRENT: - { - UInt64 total_elapsed = info.total_stopwatch.elapsedNanoseconds(); - quota->used({Quota::RESULT_ROWS, block.rows()}, {Quota::RESULT_BYTES, block.bytes()}, {Quota::EXECUTION_TIME, total_elapsed - prev_elapsed}); - prev_elapsed = total_elapsed; - break; - } - } -} - - -void IBlockInputStream::progressImpl(const Progress & value) -{ - if (progress_callback) - progress_callback(value); - - if (process_list_elem) - { - if (!process_list_elem->updateProgressIn(value)) - cancel(/* kill */ true); - - /// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers. - - ProgressValues progress = process_list_elem->getProgressIn(); - size_t total_rows_estimate = std::max(progress.read_rows, progress.total_rows_to_read); - - /** Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read. - * NOTE: Maybe it makes sense to have them checked directly in ProcessList? - */ - if (limits.mode == LimitsMode::LIMITS_TOTAL) - { - if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read", - ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES)) - cancel(false); - } - - size_t total_rows = progress.total_rows_to_read; - - constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds - UInt64 total_elapsed_microseconds = info.total_stopwatch.elapsedMicroseconds(); - - if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) - { - CurrentThread::updatePerformanceCounters(); - last_profile_events_update_time = total_elapsed_microseconds; - } - - limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); - - if (quota && limits.mode == LimitsMode::LIMITS_TOTAL) - quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes}); - } - - ProfileEvents::increment(ProfileEvents::SelectedRows, value.read_rows); - ProfileEvents::increment(ProfileEvents::SelectedBytes, value.read_bytes); -} - - -void IBlockInputStream::cancel(bool kill) -{ - if (kill) - is_killed = true; - - bool old_val = false; - if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed)) - return; - - forEachChild([&] (IBlockInputStream & child) - { - child.cancel(kill); - return false; - }); -} - - -bool IBlockInputStream::isCancelled() const -{ - return is_cancelled; -} - -bool IBlockInputStream::isCancelledOrThrowIfKilled() const -{ - if (!is_cancelled) - return false; - if (is_killed) - throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); - return true; -} - - -void IBlockInputStream::setProgressCallback(const ProgressCallback & callback) -{ - progress_callback = callback; - - forEachChild([&] (IBlockInputStream & child) - { - child.setProgressCallback(callback); - return false; - }); -} - - -void IBlockInputStream::setProcessListElement(QueryStatus * elem) -{ - process_list_elem = elem; - - forEachChild([&] (IBlockInputStream & child) - { - child.setProcessListElement(elem); - return false; - }); -} - - -Block IBlockInputStream::getTotals() -{ - if (totals) - return totals; - - Block res; - forEachChild([&] (IBlockInputStream & child) - { - res = child.getTotals(); - return bool(res); - }); - return res; -} - - -Block IBlockInputStream::getExtremes() -{ - if (extremes) - return extremes; - - Block res; - forEachChild([&] (IBlockInputStream & child) - { - res = child.getExtremes(); - return bool(res); - }); - return res; -} - -} diff --git a/src/DataStreams/IBlockInputStream.h b/src/DataStreams/IBlockInputStream.h deleted file mode 100644 index 0e77ba81779..00000000000 --- a/src/DataStreams/IBlockInputStream.h +++ /dev/null @@ -1,271 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ -} - -class ProcessListElement; -class EnabledQuota; -class QueryStatus; - - -/** The stream interface for reading data by blocks from the database. - * Relational operations are supposed to be done also as implementations of this interface. - * Watches out at how the source of the blocks works. - * Lets you get information for profiling: rows per second, blocks per second, megabytes per second, etc. - * Allows you to stop reading data (in nested sources). - */ -class IBlockInputStream : public TypePromotion -{ - friend struct BlockStreamProfileInfo; - -public: - IBlockInputStream() { info.parent = this; } - virtual ~IBlockInputStream() = default; - - IBlockInputStream(const IBlockInputStream &) = delete; - IBlockInputStream & operator=(const IBlockInputStream &) = delete; - - /// To output the data stream transformation tree (query execution plan). - virtual String getName() const = 0; - - /** Get data structure of the stream in a form of "header" block (it is also called "sample block"). - * Header block contains column names, data types, columns of size 0. Constant columns must have corresponding values. - * It is guaranteed that method "read" returns blocks of exactly that structure. - */ - virtual Block getHeader() const = 0; - - virtual const BlockMissingValues & getMissingValues() const - { - static const BlockMissingValues none; - return none; - } - - /** Read next block. - * If there are no more blocks, return an empty block (for which operator `bool` returns false). - * NOTE: Only one thread can read from one instance of IBlockInputStream simultaneously. - * This also applies for readPrefix, readSuffix. - */ - Block read(); - - /** Read something before starting all data or after the end of all data. - * In the `readSuffix` function, you can implement a finalization that can lead to an exception. - * readPrefix() must be called before the first call to read(). - * readSuffix() should be called after read() returns an empty block, or after a call to cancel(), but not during read() execution. - */ - - /** The default implementation calls readPrefixImpl() on itself, and then readPrefix() recursively for all children. - * There are cases when you do not want `readPrefix` of children to be called synchronously, in this function, - * but you want them to be called, for example, in separate threads (for parallel initialization of children). - * Then overload `readPrefix` function. - */ - virtual void readPrefix(); - - /** The default implementation calls recursively readSuffix() on all children, and then readSuffixImpl() on itself. - * If this stream calls read() in children in a separate thread, this behavior is usually incorrect: - * readSuffix() of the child can not be called at the moment when the same child's read() is executed in another thread. - * In this case, you need to override this method so that readSuffix() in children is called, for example, after connecting streams. - */ - virtual void readSuffix(); - - /// Do not allow to change the table while the blocks stream and its children are alive. - void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } - - /// Get information about execution speed. - const BlockStreamProfileInfo & getProfileInfo() const { return info; } - - /** Get "total" values. - * The default implementation takes them from itself or from the first child source in which they are. - * The overridden method can perform some calculations. For example, apply an expression to the `totals` of the child source. - * There can be no total values - then an empty block is returned. - * - * Call this method only after all the data has been retrieved with `read`, - * otherwise there will be problems if any data at the same time is computed in another thread. - */ - virtual Block getTotals(); - - /// The same for minimums and maximums. - virtual Block getExtremes(); - - - /** Set the execution progress bar callback. - * The callback is passed to all child sources. - * By default, it is called for leaf sources, after each block. - * (But this can be overridden in the progress() method) - * The function takes the number of rows in the last block, the number of bytes in the last block. - * Note that the callback can be called from different threads. - */ - virtual void setProgressCallback(const ProgressCallback & callback); - - - /** In this method: - * - the progress callback is called; - * - the status of the query execution in ProcessList is updated; - * - checks restrictions and quotas that should be checked not within the same source, - * but over the total amount of resources spent in all sources at once (information in the ProcessList). - */ - virtual void progress(const Progress & value) - { - /// The data for progress is taken from leaf sources. - if (children.empty()) - progressImpl(value); - } - - void progressImpl(const Progress & value); - - - /** Set the pointer to the process list item. - * It is passed to all child sources. - * General information about the resources spent on the request will be written into it. - * Based on this information, the quota and some restrictions will be checked. - * This information will also be available in the SHOW PROCESSLIST request. - */ - virtual void setProcessListElement(QueryStatus * elem); - - /** Set the approximate total number of rows to read. - */ - void addTotalRowsApprox(size_t value) { total_rows_approx += value; } - - - /** Ask to abort the receipt of data as soon as possible. - * By default - just sets the flag is_cancelled and asks that all children be interrupted. - * This function can be called several times, including simultaneously from different threads. - * Have two modes: - * with kill = false only is_cancelled is set - streams will stop silently with returning some processed data. - * with kill = true also is_killed set - queries will stop with exception. - */ - virtual void cancel(bool kill); - - bool isCancelled() const; - bool isCancelledOrThrowIfKilled() const; - - /** Set limitations that checked on each block. */ - virtual void setLimits(const StreamLocalLimits & limits_) - { - limits = limits_; - } - - const StreamLocalLimits & getLimits() const - { - return limits; - } - - /** Set the quota. If you set a quota on the amount of raw data, - * then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits. - */ - virtual void setQuota(const std::shared_ptr & new_quota) - { - quota = new_quota; - } - - /// Enable calculation of minimums and maximums by the result columns. - void enableExtremes() { enabled_extremes = true; } - -protected: - /// Order is important: `table_locks` must be destroyed after `children` so that tables from - /// which child streams read are protected by the locks during the lifetime of the child streams. - std::vector table_locks; - - BlockInputStreams children; - std::shared_mutex children_mutex; - - BlockStreamProfileInfo info; - std::atomic is_cancelled{false}; - std::atomic is_killed{false}; - ProgressCallback progress_callback; - QueryStatus * process_list_elem = nullptr; - /// According to total_stopwatch in microseconds - UInt64 last_profile_events_update_time = 0; - - /// Additional information that can be generated during the work process. - - /// Total values during aggregation. - Block totals; - /// Minimums and maximums. The first row of the block - minimums, the second - the maximums. - Block extremes; - - - void addChild(const BlockInputStreamPtr & child) - { - std::unique_lock lock(children_mutex); - children.push_back(child); - } - - /** Check limits. - * But only those that can be checked within each separate stream. - */ - bool checkTimeLimit() const; - -#ifndef NDEBUG - bool read_prefix_is_called = false; - bool read_suffix_is_called = false; -#endif - -private: - bool enabled_extremes = false; - - /// The limit on the number of rows/bytes has been exceeded, and you need to stop execution on the next `read` call, as if the thread has run out. - bool limit_exceeded_need_break = false; - - /// Limitations and quotas. - - StreamLocalLimits limits; - - std::shared_ptr quota; /// If nullptr - the quota is not used. - UInt64 prev_elapsed = 0; - - /// The approximate total number of rows to read. For progress bar. - size_t total_rows_approx = 0; - - /// Derived classes must implement this function. - virtual Block readImpl() = 0; - - /// Here you can do a preliminary initialization. - virtual void readPrefixImpl() {} - - /// Here you need to do a finalization, which can lead to an exception. - virtual void readSuffixImpl() {} - - void updateExtremes(Block & block); - - /** Check quotas. - * But only those that can be checked within each separate stream. - */ - void checkQuota(Block & block); - - size_t checkDepthImpl(size_t max_depth, size_t level) const; - - template - void forEachChild(F && f) - { - /// NOTE: Acquire a read lock, therefore f() should be thread safe - std::shared_lock lock(children_mutex); - - // Reduce lock scope and avoid recursive locking since that is undefined for shared_mutex. - const auto children_copy = children; - lock.unlock(); - - for (auto & child : children_copy) - if (f(*child)) - return; - } - -}; - -} diff --git a/src/DataStreams/IBlockOutputStream.h b/src/DataStreams/IBlockOutputStream.h deleted file mode 100644 index 65ebd90769d..00000000000 --- a/src/DataStreams/IBlockOutputStream.h +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - -#include -#include -#include - - -namespace DB -{ - -struct Progress; - -/** Interface of stream for writing data (into table, filesystem, network, terminal, etc.) - */ -class IBlockOutputStream : private boost::noncopyable -{ -public: - IBlockOutputStream() = default; - - /** Get data structure of the stream in a form of "header" block (it is also called "sample block"). - * Header block contains column names, data types, columns of size 0. Constant columns must have corresponding values. - * You must pass blocks of exactly this structure to the 'write' method. - */ - virtual Block getHeader() const = 0; - - /** Write block. - */ - virtual void write(const Block & block) = 0; - - /** Write or do something before all data or after all data. - */ - virtual void writePrefix() {} - virtual void writeSuffix() {} - - /** Flush output buffers if any. - */ - virtual void flush() {} - - /** Methods to set additional information for output in formats, that support it. - */ - virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) {} - virtual void setTotals(const Block & /*totals*/) {} - virtual void setExtremes(const Block & /*extremes*/) {} - - /** Notify about progress. Method could be called from different threads. - * Passed value are delta, that must be summarized. - */ - virtual void onProgress(const Progress & /*progress*/) {} - - /** Content-Type to set when sending HTTP response. - */ - virtual std::string getContentType() const { return "text/plain; charset=UTF-8"; } - - virtual ~IBlockOutputStream() = default; - - /** Don't let to alter table while instance of stream is alive. - */ - void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } - -private: - std::vector table_locks; -}; - -} diff --git a/src/DataStreams/IBlockStream_fwd.h b/src/DataStreams/IBlockStream_fwd.h deleted file mode 100644 index d74a9528ed9..00000000000 --- a/src/DataStreams/IBlockStream_fwd.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class IBlockInputStream; -class IBlockOutputStream; - -using BlockInputStreamPtr = std::shared_ptr; -using BlockInputStreams = std::vector; -using BlockOutputStreamPtr = std::shared_ptr; -using BlockOutputStreams = std::vector; - -} diff --git a/src/DataStreams/InternalTextLogs.h b/src/DataStreams/InternalTextLogs.h index 1312c1d327c..a8b119b0f69 100644 --- a/src/DataStreams/InternalTextLogs.h +++ b/src/DataStreams/InternalTextLogs.h @@ -1,6 +1,6 @@ #pragma once -#include #include +#include namespace DB diff --git a/src/DataStreams/NativeReader.h b/src/DataStreams/NativeReader.h index cfd58bde2cc..95b03c71764 100644 --- a/src/DataStreams/NativeReader.h +++ b/src/DataStreams/NativeReader.h @@ -1,8 +1,8 @@ #pragma once -#include #include #include +#include namespace DB { diff --git a/src/DataStreams/TemporaryFileStream.cpp b/src/DataStreams/TemporaryFileStream.cpp index 826cf5508d8..4b7c9d50fe7 100644 --- a/src/DataStreams/TemporaryFileStream.cpp +++ b/src/DataStreams/TemporaryFileStream.cpp @@ -1,8 +1,6 @@ #include -#include #include #include -#include #include #include #include diff --git a/src/DataStreams/TemporaryFileStream.h b/src/DataStreams/TemporaryFileStream.h index c0c13605928..e288b5b30fa 100644 --- a/src/DataStreams/TemporaryFileStream.h +++ b/src/DataStreams/TemporaryFileStream.h @@ -4,7 +4,6 @@ #include #include #include -#include #include namespace DB diff --git a/src/DataStreams/copyData.cpp b/src/DataStreams/copyData.cpp deleted file mode 100644 index a26052778a8..00000000000 --- a/src/DataStreams/copyData.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - -namespace -{ - -bool isAtomicSet(std::atomic * val) -{ - return ((val != nullptr) && val->load(std::memory_order_seq_cst)); -} - -} - -template -void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCallback && is_cancelled, TProgressCallback && progress) -{ - from.readPrefix(); - to.writePrefix(); - - while (Block block = from.read()) - { - if (is_cancelled()) - break; - - to.write(block); - progress(block); - } - - if (is_cancelled()) - return; - - /// For outputting additional information in some formats. - if (from.getProfileInfo().hasAppliedLimit()) - to.setRowsBeforeLimit(from.getProfileInfo().getRowsBeforeLimit()); - - to.setTotals(from.getTotals()); - to.setExtremes(from.getExtremes()); - - if (is_cancelled()) - return; - - from.readSuffix(); - to.writeSuffix(); -} - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & progress, - std::atomic * is_cancelled) -{ - auto is_cancelled_pred = [is_cancelled] () - { - return isAtomicSet(is_cancelled); - }; - - copyDataImpl(from, to, is_cancelled_pred, progress); -} - -inline void doNothing(const Block &) {} - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic * is_cancelled) -{ - auto is_cancelled_pred = [is_cancelled] () - { - return isAtomicSet(is_cancelled); - }; - - copyDataImpl(from, to, is_cancelled_pred, doNothing); -} - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled) -{ - copyDataImpl(from, to, is_cancelled, doNothing); -} - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled, - const std::function & progress) -{ - copyDataImpl(from, to, is_cancelled, progress); -} - -} diff --git a/src/DataStreams/copyData.h b/src/DataStreams/copyData.h deleted file mode 100644 index 3dc90aed37d..00000000000 --- a/src/DataStreams/copyData.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include - -#include -#include - - -namespace DB -{ - -class Block; - -/** Copies data from the InputStream into the OutputStream - * (for example, from the database to the console, etc.) - */ -void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic * is_cancelled = nullptr); - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & progress, - std::atomic * is_cancelled = nullptr); - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled); - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled, - const std::function & progress); - -} diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index f4a5b6b5e4e..9ec8a9523c6 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -14,7 +14,6 @@ # include # include # include -# include # include # include # include diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp index cf003dceb8e..cd87cf831a2 100644 --- a/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/src/Dictionaries/DictionarySourceHelpers.cpp @@ -1,7 +1,6 @@ #include "DictionarySourceHelpers.h" #include #include -#include #include #include #include "DictionaryStructure.h" diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index 6cb4d52744a..844a6357e29 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -1,5 +1,4 @@ #include "HTTPDictionarySource.h" -#include #include #include #include diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index ed9f9d52b94..34574ca13f8 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index d816ef1d016..f20cec56943 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 85ce83868c6..975075eba96 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -19,7 +19,6 @@ #include #include -#include #include #include diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 1e19c18de43..22ae459a662 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 566ee60a3e6..9d770aee159 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -35,7 +35,6 @@ #include #include -#include #include diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index c785b085a57..b6bb3c5fad5 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 6eb188bce9f..6ffeef5cc7d 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 8a31917caef..6a1a8652b23 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 0d6fe34c0c2..5f44603a420 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 21e15bc74bb..cf24d14b737 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -52,13 +51,6 @@ public: const SelectQueryOptions &, const Names & required_result_column_names_ = Names{}); - /// Read data not from the table specified in the query, but from the prepared source `input`. - InterpreterSelectQuery( - const ASTPtr & query_ptr_, - ContextPtr context_, - const BlockInputStreamPtr & input_, - const SelectQueryOptions & = {}); - /// Read data not from the table specified in the query, but from the prepared pipe `input`. InterpreterSelectQuery( const ASTPtr & query_ptr_, diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index adf1aae3ff3..c191a73bc71 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index bc0aeda56bd..78c4eca5ca6 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -15,7 +15,6 @@ limitations under the License. */ #include #include #include -#include #include diff --git a/src/Interpreters/InterpreterWatchQuery.h b/src/Interpreters/InterpreterWatchQuery.h index 2bc7236582a..e43ed88af2f 100644 --- a/src/Interpreters/InterpreterWatchQuery.h +++ b/src/Interpreters/InterpreterWatchQuery.h @@ -38,8 +38,6 @@ private: /// Table from where to read data, if not subquery. StoragePtr storage; - /// Streams of read data - BlockInputStreams streams; }; } diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h index aaa7441b8a4..30115710e22 100644 --- a/src/Interpreters/JoinSwitcher.h +++ b/src/Interpreters/JoinSwitcher.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index f8402cf0287..fb9f2e25c07 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/src/Interpreters/SortedBlocksWriter.h b/src/Interpreters/SortedBlocksWriter.h index c65511e943e..94bebce88f7 100644 --- a/src/Interpreters/SortedBlocksWriter.h +++ b/src/Interpreters/SortedBlocksWriter.h @@ -8,7 +8,6 @@ #include #include #include -#include namespace DB diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 22cd958f4f8..02dcd95ab41 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h index f430c2364b2..0ad40dd3332 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.h +++ b/src/Interpreters/executeDDLQueryOnCluster.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace zkutil diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 3f5b386d16f..0a1130c721b 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -10,8 +10,6 @@ #include #include -#include -#include #include #include diff --git a/src/Processors/Formats/OutputStreamToOutputFormat.cpp b/src/Processors/Formats/OutputStreamToOutputFormat.cpp deleted file mode 100644 index 5d4e7832327..00000000000 --- a/src/Processors/Formats/OutputStreamToOutputFormat.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include -#include - -namespace DB -{ - -Block OutputStreamToOutputFormat::getHeader() const -{ - return output_format->getPort(IOutputFormat::PortKind::Main).getHeader(); -} - -void OutputStreamToOutputFormat::write(const Block & block) -{ - output_format->write(block); -} - -void OutputStreamToOutputFormat::writePrefix() { output_format->doWritePrefix(); } -void OutputStreamToOutputFormat::writeSuffix() { output_format->doWriteSuffix(); } - -void OutputStreamToOutputFormat::flush() { output_format->flush(); } - -void OutputStreamToOutputFormat::setRowsBeforeLimit(size_t rows_before_limit) -{ - output_format->setRowsBeforeLimit(rows_before_limit); -} - -void OutputStreamToOutputFormat::setTotals(const Block & totals) -{ - if (totals) - output_format->setTotals(totals); -} - -void OutputStreamToOutputFormat::setExtremes(const Block & extremes) -{ - if (extremes) - output_format->setExtremes(extremes); -} - -void OutputStreamToOutputFormat::onProgress(const Progress & progress) { output_format->onProgress(progress); } - -std::string OutputStreamToOutputFormat::getContentType() const { return output_format->getContentType(); } - -} diff --git a/src/Processors/Formats/OutputStreamToOutputFormat.h b/src/Processors/Formats/OutputStreamToOutputFormat.h deleted file mode 100644 index a85de12b49d..00000000000 --- a/src/Processors/Formats/OutputStreamToOutputFormat.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once -#include - -namespace DB -{ - - -class IOutputFormat; - -using OutputFormatPtr = std::shared_ptr; - -/// Wrapper. Implements IBlockOutputStream interface using IOutputFormat object. -class OutputStreamToOutputFormat : public IBlockOutputStream -{ -public: - explicit OutputStreamToOutputFormat(OutputFormatPtr output_format_) : output_format(std::move(output_format_)) {} - - Block getHeader() const override; - - void write(const Block & block) override; - - void writePrefix() override; - void writeSuffix() override; - - void flush() override; - - void setRowsBeforeLimit(size_t rows_before_limit) override; - void setTotals(const Block & totals) override; - void setExtremes(const Block & extremes) override; - - void onProgress(const Progress & progress) override; - - std::string getContentType() const override; - -private: - OutputFormatPtr output_format; -}; - -} diff --git a/src/Processors/QueryPipelineBuilder.h b/src/Processors/QueryPipelineBuilder.h index 78ae5dd41be..7e0ddbc0285 100644 --- a/src/Processors/QueryPipelineBuilder.h +++ b/src/Processors/QueryPipelineBuilder.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index 6776caae9bf..e6ae620e69b 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index eca12c33f54..a5a67e99afc 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h index 75d0528ff7b..6956dedbc41 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.h +++ b/src/Processors/Transforms/buildPushingToViewsChain.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 3720362775c..8f4f04e56c5 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index d001b12ee66..e89d82cfcc8 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -13,7 +13,6 @@ #include #include #include -#include #include "IServer.h" diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 6ce17552ba1..4ed3a43d2ed 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 690c9cbd4d0..39688060b0a 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 4641a1631f2..be3dd9ae6c9 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,6 @@ limitations under the License. */ #include #include #include -#include #include #include #include diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 5e12d5da678..d0d3f283478 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f4adee8c259..5dbca837f31 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index d952950e461..e64ba9edec0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 75308f872dc..e3ca902b1bd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -2,9 +2,9 @@ #include #include #include -#include #include #include +#include #include diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index 9e138e9882a..947c0bbe932 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -3,7 +3,6 @@ #include "StorageMaterializedPostgreSQL.h" #include #include -#include #include #include #include diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 873a4b4860c..3796bd8ba57 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 2b4f5e4a276..cf9b557de25 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 59f250d67b8..3e8955ad864 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -2,7 +2,6 @@ #include #include -#include #include #include diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index d6e242d1a97..0f47f654428 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -11,7 +11,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 9aa5689aa66..4ae55272db6 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -16,7 +16,6 @@ #include #include -#include #include #include @@ -479,8 +478,6 @@ Pipe StorageFile::read( size_t max_block_size, unsigned num_streams) { - BlockInputStreams blocks_input; - if (use_table_fd) /// need to call ctr BlockInputStream paths = {""}; /// when use fd, paths are empty else diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 0fd94bac95a..f0c4509f188 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -16,8 +16,6 @@ #include -#include - #include #include diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 5a9e8fc2461..904d1a7f89c 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 846fd4af5fd..063802faf1a 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -8,7 +8,6 @@ #include #include -#include #include diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index eeb5b107b54..15430f60285 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 4264be9dbc2..3bdf3218b2e 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index b4bb5400930..10a60bf9b21 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 416d37cd351..20e64255684 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -60,7 +60,6 @@ #include #include -#include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 709c9dc4a63..d95a9465bd6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -30,7 +30,6 @@ #include -#include #include #include #include diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index e6d41a53bfc..0e7faad194e 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 58c56f1401f..2547af1b0ad 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 2dc2577f245..0cd07afc26c 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 79f1d568057..9397986fadd 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -15,7 +15,6 @@ #include #include -#include #include #include diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index a1254e2aaeb..a0924896437 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -1,6 +1,5 @@ #include "StorageXDBC.h" -#include #include #include #include diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 9aedee66b5f..f2b2102c7ff 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index e7ecfc7c4f0..5b891c43aae 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index afd81638da4..49461fe8f46 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -16,7 +16,6 @@ #include -#include namespace DB diff --git a/src/TableFunctions/TableFunctionExecutable.h b/src/TableFunctions/TableFunctionExecutable.h index 05ef2b3b26b..128ee8e46fc 100644 --- a/src/TableFunctions/TableFunctionExecutable.h +++ b/src/TableFunctions/TableFunctionExecutable.h @@ -1,6 +1,5 @@ #pragma once -#include #include namespace DB From 4d020c96e0fe2f1725caa6b40354cd7f8014bc4d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 14 Oct 2021 15:07:50 +0300 Subject: [PATCH 203/438] support nullable arguments in function initializeAggregation --- src/Functions/initializeAggregation.cpp | 1 + .../02097_initializeAggregationNullable.reference | 6 ++++++ .../0_stateless/02097_initializeAggregationNullable.sql | 8 ++++++++ 3 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/02097_initializeAggregationNullable.reference create mode 100644 tests/queries/0_stateless/02097_initializeAggregationNullable.sql diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index e8bd136e704..02db90bfc43 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -40,6 +40,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; diff --git a/tests/queries/0_stateless/02097_initializeAggregationNullable.reference b/tests/queries/0_stateless/02097_initializeAggregationNullable.reference new file mode 100644 index 00000000000..6d2e42f2ca6 --- /dev/null +++ b/tests/queries/0_stateless/02097_initializeAggregationNullable.reference @@ -0,0 +1,6 @@ +1 +AggregateFunction(uniqExact, Nullable(String)) +1 +AggregateFunction(uniqExact, Nullable(UInt8)) +1 +1 diff --git a/tests/queries/0_stateless/02097_initializeAggregationNullable.sql b/tests/queries/0_stateless/02097_initializeAggregationNullable.sql new file mode 100644 index 00000000000..aa4e6d47579 --- /dev/null +++ b/tests/queries/0_stateless/02097_initializeAggregationNullable.sql @@ -0,0 +1,8 @@ +SELECT finalizeAggregation(initializeAggregation('uniqExactState', toNullable('foo'))); +SELECT toTypeName(initializeAggregation('uniqExactState', toNullable('foo'))); + +SELECT finalizeAggregation(initializeAggregation('uniqExactState', toNullable(123))); +SELECT toTypeName(initializeAggregation('uniqExactState', toNullable(123))); + +SELECT initializeAggregation('uniqExactState', toNullable('foo')) = arrayReduce('uniqExactState', [toNullable('foo')]); +SELECT initializeAggregation('uniqExactState', toNullable(123)) = arrayReduce('uniqExactState', [toNullable(123)]); From a1a4df2501816529dcb9e6588acfc72b74b902bc Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 16:34:05 +0300 Subject: [PATCH 204/438] Fix handling exception 'unrecognised option' in clickhouse-local and client --- programs/client/Client.cpp | 10 +--------- programs/client/Client.h | 2 +- programs/local/LocalServer.cpp | 15 +++++---------- programs/local/LocalServer.h | 2 +- src/Client/ClientBase.cpp | 17 ++++++++++++++++- src/Client/ClientBase.h | 3 ++- ...unknown_option_in_clickhouse_local.reference | 3 ++- .../02096_unknown_option_in_clickhouse_local.sh | 2 +- 8 files changed, 29 insertions(+), 25 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index da910430985..d53a57b6eba 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -996,7 +996,7 @@ void Client::printHelpMessage(const OptionsDescription & options_description) } -void Client::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +void Client::addOptions(OptionsDescription & options_description) { /// Main commandline options related to client functionality and all parameters from Settings. options_description.main_description->add_options() @@ -1053,14 +1053,6 @@ void Client::addAndCheckOptions(OptionsDescription & options_description, po::va ( "types", po::value(), "types" ); - - cmd_settings.addProgramOptions(options_description.main_description.value()); - /// Parse main commandline options. - po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run(); - auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - if (unrecognized_options.size() > 1) - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[1]); - po::store(parsed, options); } diff --git a/programs/client/Client.h b/programs/client/Client.h index 43f6deae0b5..2def74ef3fc 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -24,7 +24,7 @@ protected: String getName() const override { return "client"; } void printHelpMessage(const OptionsDescription & options_description) override; - void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override; + void addOptions(OptionsDescription & options_description) override; void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector & external_tables_arguments) override; void processConfig() override; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2035406d73a..2180729438d 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -35,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -636,7 +634,7 @@ void LocalServer::printHelpMessage(const OptionsDescription & options_descriptio } -void LocalServer::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +void LocalServer::addOptions(OptionsDescription & options_description) { options_description.main_description->add_options() ("database,d", po::value(), "database") @@ -655,10 +653,6 @@ void LocalServer::addAndCheckOptions(OptionsDescription & options_description, p ("no-system-tables", "do not attach system tables (better startup time)") ; - - cmd_settings.addProgramOptions(options_description.main_description.value()); - po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run(); - po::store(parsed, options); } @@ -713,10 +707,11 @@ int mainEntryClickHouseLocal(int argc, char ** argv) app.init(argc, argv); return app.run(); } - catch (const boost::program_options::error & e) + catch (const DB::Exception & e) { - std::cerr << "Bad arguments: " << e.what() << std::endl; - return DB::ErrorCodes::BAD_ARGUMENTS; + std::cerr << DB::getExceptionMessage(e, false) << std::endl; + auto code = DB::getCurrentExceptionCode(); + return code ? code : 1; } catch (...) { diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index e14e18adced..ce0df06c86a 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -40,7 +40,7 @@ protected: String getQueryTextPrefix() override; void printHelpMessage(const OptionsDescription & options_description) override; - void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override; + void addOptions(OptionsDescription & options_description) override; void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector &) override; void processConfig() override; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index baf082a3541..deb22ca60ef 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -72,6 +72,7 @@ namespace ErrorCodes extern const int UNEXPECTED_PACKET_FROM_SERVER; extern const int INVALID_USAGE_OF_INPUT; extern const int CANNOT_SET_SIGNAL_HANDLER; + extern const int UNRECOGNIZED_ARGUMENTS; } } @@ -1505,6 +1506,19 @@ void ClientBase::readArguments(int argc, char ** argv, Arguments & common_argume } } +void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +{ + cmd_settings.addProgramOptions(options_description.main_description.value()); + /// Parse main commandline options. + auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()); + parser.allow_unregistered(); + po::parsed_options parsed = parser.run(); + auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + if (unrecognized_options.size() > 1) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[1]); + po::store(parsed, options); +} + void ClientBase::init(int argc, char ** argv) { @@ -1562,7 +1576,8 @@ void ClientBase::init(int argc, char ** argv) ("stacktrace", "print stack traces of exceptions") ; - addAndCheckOptions(options_description, options, common_arguments); + addOptions(options_description); + parseAndCheckOptions(options_description, options, common_arguments); po::notify(options); if (options.count("version") || options.count("V")) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 070b676366c..cfc0b45ff60 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -91,7 +91,7 @@ protected: }; virtual void printHelpMessage(const OptionsDescription & options_description) = 0; - virtual void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) = 0; + virtual void addOptions(OptionsDescription & options_description) = 0; virtual void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector & external_tables_arguments) = 0; @@ -132,6 +132,7 @@ private: void resetOutput(); void outputQueryInfo(bool echo_query_); void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector & external_tables_arguments); + void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments); protected: bool is_interactive = false; /// Use either interactive line editing interface or batch mode. diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference index 96feda5dd3c..2c4cf540812 100644 --- a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference +++ b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference @@ -1 +1,2 @@ -Bad arguments: unrecognised option '--unknown-option' +Code: 552. DB::Exception: Unrecognized option '--unknown-option'. (UNRECOGNIZED_ARGUMENTS) +Code: 552. DB::Exception: Unrecognized option '--unknown-option'. (UNRECOGNIZED_ARGUMENTS) diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh index ee0e3f3d149..2fabc761d4c 100755 --- a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh +++ b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh ${CLICKHOUSE_LOCAL} --unknown-option 2>&1 echo - +${CLICKHOUSE_CLIENT} --unknown-option 2>&1 echo From 2da43012b6203bd9957d099dfbff575420efa4af Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 30 Sep 2021 22:46:12 +0300 Subject: [PATCH 205/438] Add log levels updates --- base/loggers/Loggers.cpp | 91 +++++++++++++++++++++++++++-- base/loggers/Loggers.h | 2 + base/loggers/OwnFormattingChannel.h | 5 +- base/loggers/OwnSplitChannel.cpp | 17 +++++- base/loggers/OwnSplitChannel.h | 6 +- programs/server/Server.cpp | 2 +- 6 files changed, 111 insertions(+), 12 deletions(-) diff --git a/base/loggers/Loggers.cpp b/base/loggers/Loggers.cpp index 80e62d0a6d6..0f41296819e 100644 --- a/base/loggers/Loggers.cpp +++ b/base/loggers/Loggers.cpp @@ -84,7 +84,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, log_file); log->setLevel(log_level); - split->addChannel(log); + split->addChannel(log, "log"); } const auto errorlog_path = config.getString("logger.errorlog", ""); @@ -116,7 +116,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log Poco::AutoPtr errorlog = new DB::OwnFormattingChannel(pf, error_log_file); errorlog->setLevel(errorlog_level); errorlog->open(); - split->addChannel(errorlog); + split->addChannel(errorlog, "errorlog"); } if (config.getBool("logger.use_syslog", false)) @@ -155,7 +155,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, syslog_channel); log->setLevel(syslog_level); - split->addChannel(log); + split->addChannel(log, "syslog"); } bool should_log_to_console = isatty(STDIN_FILENO) || isatty(STDERR_FILENO); @@ -177,7 +177,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, new Poco::ConsoleChannel); logger.warning("Logging " + console_log_level_string + " to console"); log->setLevel(console_log_level); - split->addChannel(log); + split->addChannel(log, "console"); } split->open(); @@ -224,6 +224,89 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log } } +void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger) +{ + int max_log_level = 0; + + const auto log_level_string = config.getString("logger.level", "trace"); + int log_level = Poco::Logger::parseLevel(log_level_string); + if (log_level > max_log_level) + max_log_level = log_level; + + const auto log_path = config.getString("logger.log", ""); + if (!log_path.empty()) + split->setLevel("log", log_level); + else + split->setLevel("log", 0); + + // Set level to console + bool is_daemon = config.getBool("application.runAsDaemon", false); + bool should_log_to_console = isatty(STDIN_FILENO) || isatty(STDERR_FILENO); + if (config.getBool("logger.console", false) + || (!config.hasProperty("logger.console") && !is_daemon && should_log_to_console)) + split->setLevel("console", log_level); + else + split->setLevel("console", 0); + + // Set level to errorlog + int errorlog_level = 0; + const auto errorlog_path = config.getString("logger.errorlog", ""); + if (!errorlog_path.empty()) + { + errorlog_level = Poco::Logger::parseLevel(config.getString("logger.errorlog_level", "notice")); + if (errorlog_level > max_log_level) + max_log_level = errorlog_level; + } + split->setLevel("errorlog", errorlog_level); + + // Set level to syslog + int syslog_level = 0; + if (config.getBool("logger.use_syslog", false)) + { + syslog_level = Poco::Logger::parseLevel(config.getString("logger.syslog_level", log_level_string)); + if (syslog_level > max_log_level) + max_log_level = syslog_level; + } + split->setLevel("syslog", syslog_level); + + // Global logging level (it can be overridden for specific loggers). + logger.setLevel(max_log_level); + + // Set level to all already created loggers + std::vector names; + + logger.root().names(names); + for (const auto & name : names) + logger.root().get(name).setLevel(max_log_level); + + logger.root().setLevel(max_log_level); + + // Explicitly specified log levels for specific loggers. + { + Poco::Util::AbstractConfiguration::Keys loggers_level; + config.keys("logger.levels", loggers_level); + + if (!loggers_level.empty()) + { + for (const auto & key : loggers_level) + { + if (key == "logger" || key.starts_with("logger[")) + { + const std::string name(config.getString("logger.levels." + key + ".name")); + const std::string level(config.getString("logger.levels." + key + ".level")); + logger.root().get(name).setLevel(level); + } + else + { + // Legacy syntax + const std::string level(config.getString("logger.levels." + key, "trace")); + logger.root().get(key).setLevel(level); + } + } + } + } +} + void Loggers::closeLogs(Poco::Logger & logger) { if (log_file) diff --git a/base/loggers/Loggers.h b/base/loggers/Loggers.h index 151c1d3566f..e8afd749534 100644 --- a/base/loggers/Loggers.h +++ b/base/loggers/Loggers.h @@ -19,6 +19,8 @@ class Loggers public: void buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger, const std::string & cmd_name = ""); + void updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger); + /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); diff --git a/base/loggers/OwnFormattingChannel.h b/base/loggers/OwnFormattingChannel.h index 2336dacad04..0480d0d5061 100644 --- a/base/loggers/OwnFormattingChannel.h +++ b/base/loggers/OwnFormattingChannel.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -14,7 +15,7 @@ class OwnFormattingChannel : public Poco::Channel, public ExtendedLogChannel public: explicit OwnFormattingChannel( Poco::AutoPtr pFormatter_ = nullptr, Poco::AutoPtr pChannel_ = nullptr) - : pFormatter(std::move(pFormatter_)), pChannel(std::move(pChannel_)) + : pFormatter(std::move(pFormatter_)), pChannel(std::move(pChannel_)), priority(Poco::Message::PRIO_TRACE) { } @@ -45,7 +46,7 @@ public: private: Poco::AutoPtr pFormatter; Poco::AutoPtr pChannel; - Poco::Message::Priority priority = Poco::Message::PRIO_TRACE; + std::atomic priority; }; } diff --git a/base/loggers/OwnSplitChannel.cpp b/base/loggers/OwnSplitChannel.cpp index 2349c60856f..2ae1e65729c 100644 --- a/base/loggers/OwnSplitChannel.cpp +++ b/base/loggers/OwnSplitChannel.cpp @@ -1,4 +1,5 @@ #include "OwnSplitChannel.h" +#include "OwnFormattingChannel.h" #include #include @@ -75,7 +76,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) ExtendedLogMessage msg_ext = ExtendedLogMessage::getFrom(msg); /// Log data to child channels - for (auto & channel : channels) + for (auto & [name, channel] : channels) { if (channel.second) channel.second->logExtended(msg_ext); // extended child @@ -137,9 +138,9 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) } -void OwnSplitChannel::addChannel(Poco::AutoPtr channel) +void OwnSplitChannel::addChannel(Poco::AutoPtr channel, const std::string & name) { - channels.emplace_back(std::move(channel), dynamic_cast(channel.get())); + channels.emplace(name, ExtendedChannelPtrPair(std::move(channel), dynamic_cast(channel.get()))); } void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_priority) @@ -149,4 +150,14 @@ void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_prior text_log_max_priority.store(max_priority, std::memory_order_relaxed); } +void OwnSplitChannel::setLevel(const std::string & name, int level) +{ + auto it = channels.find(name); + if (it != channels.end()) + { + if (auto * channel = dynamic_cast(it->second.first.get())) + channel->setLevel(level); + } +} + } diff --git a/base/loggers/OwnSplitChannel.h b/base/loggers/OwnSplitChannel.h index 03ff7b57745..fdc580e65f8 100644 --- a/base/loggers/OwnSplitChannel.h +++ b/base/loggers/OwnSplitChannel.h @@ -18,10 +18,12 @@ public: /// Makes an extended message from msg and passes it to the client logs queue and child (if possible) void log(const Poco::Message & msg) override; /// Adds a child channel - void addChannel(Poco::AutoPtr channel); + void addChannel(Poco::AutoPtr channel, const std::string & name); void addTextLog(std::shared_ptr log, int max_priority); + void setLevel(const std::string & name, int level); + private: void logSplit(const Poco::Message & msg); void tryLogSplit(const Poco::Message & msg); @@ -29,7 +31,7 @@ private: using ChannelPtr = Poco::AutoPtr; /// Handler and its pointer casted to extended interface using ExtendedChannelPtrPair = std::pair; - std::vector channels; + std::map channels; std::mutex text_log_mutex; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 01033570926..bfb77f49763 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -844,7 +844,7 @@ if (ThreadFuzzer::instance().isEffective()) // FIXME logging-related things need synchronization -- see the 'Logger * log' saved // in a lot of places. For now, disable updating log configuration without server restart. //setTextLog(global_context->getTextLog()); - //buildLoggers(*config, logger()); + updateLevels(*config, logger()); global_context->setClustersConfig(config); global_context->setMacros(std::make_unique(*config, "macros", log)); global_context->setExternalAuthenticatorsConfig(*config); From f29261741c2dcc45866a76b89a9176abb1482f44 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 4 Oct 2021 12:25:42 +0300 Subject: [PATCH 206/438] Add test --- .../test_log_levels_update/__init__.py | 0 .../test_log_levels_update/configs/log.xml | 6 +++ .../test_log_levels_update/test.py | 50 +++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 tests/integration/test_log_levels_update/__init__.py create mode 100644 tests/integration/test_log_levels_update/configs/log.xml create mode 100644 tests/integration/test_log_levels_update/test.py diff --git a/tests/integration/test_log_levels_update/__init__.py b/tests/integration/test_log_levels_update/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_log_levels_update/configs/log.xml b/tests/integration/test_log_levels_update/configs/log.xml new file mode 100644 index 00000000000..668a15f6afd --- /dev/null +++ b/tests/integration/test_log_levels_update/configs/log.xml @@ -0,0 +1,6 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + + \ No newline at end of file diff --git a/tests/integration/test_log_levels_update/test.py b/tests/integration/test_log_levels_update/test.py new file mode 100644 index 00000000000..dca660a2982 --- /dev/null +++ b/tests/integration/test_log_levels_update/test.py @@ -0,0 +1,50 @@ +import pytest +import re + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, name="log_quries_probability") +node = cluster.add_instance('node', with_zookeeper=False) + +config = ''' + + information + /var/log/clickhouse-server/clickhouse-server.log + +''' + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def get_log(node): + return node.exec_in_container(["bash", "-c", "cat /var/log/clickhouse-server/clickhouse-server.log"]) + +def test_log_levels_update(start_cluster): + # Make sure that there are enough log messages for the test + for i in range(5): + node.query("SELECT 1") + + log = get_log(node) + assert re.search("(|)", log) + + node.replace_config("/etc/clickhouse-server/config.d/log.xml", config) + node.query("SYSTEM RELOAD CONFIG;") + node.exec_in_container(["bash", "-c", "> /var/log/clickhouse-server/clickhouse-server.log"]) + + for i in range(5): + node.query("SELECT 1") + + log = get_log(node) + assert len(log) > 0 + assert not re.search("(|)", log) + + + From ba7b784646bc64619dc62d72c3d27e47e457949f Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 16:36:32 +0300 Subject: [PATCH 207/438] Remove catching boost::program_options error in Client --- programs/client/Client.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index d53a57b6eba..45314a5d460 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1230,11 +1230,6 @@ int mainEntryClickHouseClient(int argc, char ** argv) client.init(argc, argv); return client.run(); } - catch (const boost::program_options::error & e) - { - std::cerr << "Bad arguments: " << e.what() << std::endl; - return 1; - } catch (const DB::Exception & e) { std::cerr << DB::getExceptionMessage(e, false) << std::endl; From ce22f534c4e64d7c4fe13c3fb1353c76028aa4e7 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 28 Sep 2021 15:59:22 +0300 Subject: [PATCH 208/438] Add CapnProto output format, refactor CapnProto input format --- src/Common/ErrorCodes.cpp | 2 + src/Core/Settings.h | 3 +- src/Core/SettingsEnums.cpp | 5 + src/Core/SettingsEnums.h | 2 + src/DataTypes/EnumValues.cpp | 19 + src/DataTypes/EnumValues.h | 4 + src/Formats/CapnProtoUtils.cpp | 406 ++++++++++++++++ src/Formats/CapnProtoUtils.h | 43 ++ src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSchemaInfo.cpp | 6 + src/Formats/FormatSchemaInfo.h | 2 + src/Formats/FormatSettings.h | 14 + src/Formats/registerFormats.cpp | 2 + .../Formats/Impl/CapnProtoRowInputFormat.cpp | 433 +++++++++--------- .../Formats/Impl/CapnProtoRowInputFormat.h | 46 +- .../Formats/Impl/CapnProtoRowOutputFormat.cpp | 251 ++++++++++ .../Formats/Impl/CapnProtoRowOutputFormat.h | 53 +++ .../Formats/Impl/ProtobufRowInputFormat.cpp | 3 +- .../Formats/Impl/ProtobufRowOutputFormat.cpp | 4 +- .../0_stateless/02030_capnp_format.reference | 52 +++ .../queries/0_stateless/02030_capnp_format.sh | 109 +++++ .../format_schemas/02030_capnp_enum.capnp | 13 + .../02030_capnp_fake_nullable.capnp | 23 + .../format_schemas/02030_capnp_lists.capnp | 8 + .../02030_capnp_low_cardinality.capnp | 17 + .../02030_capnp_nested_lists_and_tuples.capnp | 36 ++ .../02030_capnp_nested_table.capnp | 20 + .../02030_capnp_nested_tuples.capnp | 23 + .../format_schemas/02030_capnp_nullable.capnp | 22 + .../02030_capnp_simple_types.capnp | 21 + .../format_schemas/02030_capnp_tuples.capnp | 35 ++ .../02030_capnp_unnamed_union.capnp | 10 + 32 files changed, 1416 insertions(+), 272 deletions(-) create mode 100644 src/Formats/CapnProtoUtils.cpp create mode 100644 src/Formats/CapnProtoUtils.h create mode 100644 src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h create mode 100644 tests/queries/0_stateless/02030_capnp_format.reference create mode 100755 tests/queries/0_stateless/02030_capnp_format.sh create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp create mode 100644 tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index b6d9b65c28b..1aff1460125 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -589,6 +589,8 @@ M(619, POSTGRESQL_REPLICATION_INTERNAL_ERROR) \ M(620, QUERY_NOT_ALLOWED) \ M(621, CANNOT_NORMALIZE_STRING) \ + M(622, CANNOT_PARSE_CAPN_PROTO_SCHEMA) \ + M(623, CAPN_PROTO_BAD_CAST) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a5767955045..f91bf684c85 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -625,7 +625,8 @@ class IColumn; M(Bool, cross_to_inner_join_rewrite, true, "Use inner join instead of comma/cross join if possible", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ - + \ + M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0)\ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 8e588b62326..f5497588891 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -116,4 +116,9 @@ IMPLEMENT_SETTING_ENUM(ShortCircuitFunctionEvaluation, ErrorCodes::BAD_ARGUMENTS {{"enable", ShortCircuitFunctionEvaluation::ENABLE}, {"force_enable", ShortCircuitFunctionEvaluation::FORCE_ENABLE}, {"disable", ShortCircuitFunctionEvaluation::DISABLE}}) + +IMPLEMENT_SETTING_ENUM(EnumComparingMode, ErrorCodes::BAD_ARGUMENTS, + {{"by_names", FormatSettings::EnumComparingMode::BY_NAMES}, + {"by_values", FormatSettings::EnumComparingMode::BY_VALUES}, + {"by_names_case_insensitive", FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index a699da3062c..f57a064241e 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -168,4 +168,6 @@ enum class ShortCircuitFunctionEvaluation DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) +DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparingMode) + } diff --git a/src/DataTypes/EnumValues.cpp b/src/DataTypes/EnumValues.cpp index 6df899ba9a2..ab5ea0ca249 100644 --- a/src/DataTypes/EnumValues.cpp +++ b/src/DataTypes/EnumValues.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -82,6 +83,24 @@ Names EnumValues::getAllRegisteredNames() const return result; } +template +std::unordered_set EnumValues::getSetOfAllNames(bool to_lower) const +{ + std::unordered_set result; + for (const auto & value : values) + result.insert(to_lower ? boost::algorithm::to_lower_copy(value.first) : value.first); + return result; +} + +template +std::unordered_set EnumValues::getSetOfAllValues() const +{ + std::unordered_set result; + for (const auto & value : values) + result.insert(value.second); + return result; +} + template class EnumValues; template class EnumValues; diff --git a/src/DataTypes/EnumValues.h b/src/DataTypes/EnumValues.h index 1e5e4f55ea7..17c292c5551 100644 --- a/src/DataTypes/EnumValues.h +++ b/src/DataTypes/EnumValues.h @@ -80,6 +80,10 @@ public: } Names getAllRegisteredNames() const override; + + std::unordered_set getSetOfAllNames(bool to_lower) const; + + std::unordered_set getSetOfAllValues() const; }; } diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp new file mode 100644 index 00000000000..9931785f43e --- /dev/null +++ b/src/Formats/CapnProtoUtils.cpp @@ -0,0 +1,406 @@ +#include + +#if USE_CAPNP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_CAPN_PROTO_SCHEMA; + extern const int THERE_IS_NO_COLUMN; + extern const int BAD_TYPE_OF_FIELD; + extern const int CAPN_PROTO_BAD_CAST; + extern const int FILE_DOESNT_EXIST; + extern const int UNKNOWN_EXCEPTION; +} + +capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info) +{ + capnp::ParsedSchema schema; + try + { + int fd; + KJ_SYSCALL(fd = open(schema_info.schemaDirectory().data(), O_RDONLY)); + auto schema_dir = kj::newDiskDirectory(kj::OsFileHandle(fd)); + schema = impl.parseFromDirectory(*schema_dir, kj::Path::parse(schema_info.schemaPath()), {}); + } + catch (const kj::Exception & e) + { + /// That's not good to determine the type of error by its description, but + /// this is the only way to do it here, because kj doesn't specify the type of error. + String description = String(e.getDescription().cStr()); + if (description.starts_with("no such directory")) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); + + if (description.starts_with("Parse error")) + throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "Cannot parse CapnProto schema {}:{}", schema_info.schemaPath(), e.getLine()); + + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception while parsing CapnProro schema: {}, schema dir and file: {}, {}", description, schema_info.schemaDirectory(), schema_info.schemaPath()); + } + + auto message_maybe = schema.findNested(schema_info.messageName()); + auto * message_schema = kj::_::readMaybe(message_maybe); + if (!message_schema) + throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "CapnProto schema doesn't contain message with name {}", schema_info.messageName()); + return message_schema->asStruct(); +} + +bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode) +{ + if (mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE) + return boost::algorithm::to_lower_copy(first) == boost::algorithm::to_lower_copy(second); + return first == second; +} + +static const std::map capnp_simple_type_names = +{ + {capnp::schema::Type::Which::BOOL, "Bool"}, + {capnp::schema::Type::Which::VOID, "Void"}, + {capnp::schema::Type::Which::INT8, "Int8"}, + {capnp::schema::Type::Which::INT16, "Int16"}, + {capnp::schema::Type::Which::INT32, "Int32"}, + {capnp::schema::Type::Which::INT64, "Int64"}, + {capnp::schema::Type::Which::UINT8, "UInt8"}, + {capnp::schema::Type::Which::UINT16, "UInt16"}, + {capnp::schema::Type::Which::UINT32, "UInt32"}, + {capnp::schema::Type::Which::UINT64, "UInt64"}, + {capnp::schema::Type::Which::FLOAT32, "Float32"}, + {capnp::schema::Type::Which::FLOAT64, "Float64"}, + {capnp::schema::Type::Which::TEXT, "Text"}, + {capnp::schema::Type::Which::DATA, "Data"}, + {capnp::schema::Type::Which::ENUM, "Enum"}, + {capnp::schema::Type::Which::INTERFACE, "Interface"}, + {capnp::schema::Type::Which::ANY_POINTER, "AnyPointer"}, +}; + +static bool checkIfStructContainsUnnamedUnion(const capnp::StructSchema & struct_schema) +{ + return struct_schema.getFields().size() != struct_schema.getNonUnionFields().size(); +} + +static bool checkIfStructIsNamedUnion(const capnp::StructSchema & struct_schema) +{ + return struct_schema.getFields().size() == struct_schema.getUnionFields().size(); +} + +/// Get full name of type for better exception messages. +static String getCapnProtoFullTypeName(const capnp::Type & type) +{ + if (type.isStruct()) + { + auto struct_schema = type.asStruct(); + + auto non_union_fields = struct_schema.getNonUnionFields(); + std::vector non_union_field_names; + for (auto nested_field : non_union_fields) + non_union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + + auto union_fields = struct_schema.getUnionFields(); + std::vector union_field_names; + for (auto nested_field : union_fields) + union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + + String union_name = "Union(" + boost::algorithm::join(union_field_names, ", ") + ")"; + /// Check if the struct is a named union. + if (non_union_field_names.empty()) + return union_name; + + String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); + /// Check if the struct contains unnamed union. + if (!union_field_names.empty()) + type_name += "," + union_name; + type_name += ")"; + return type_name; + } + + if (type.isList()) + return "List(" + getCapnProtoFullTypeName(type.asList().getElementType()) + ")"; + + if (!capnp_simple_type_names.contains(type.which())) + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unknown CapnProto type"); + + return capnp_simple_type_names.at(type.which()); +} + +template +static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_type, FormatSettings::EnumComparingMode mode, UInt64 max_value, String & error_message) +{ + if (!capnp_type.isEnum()) + return false; + + auto enum_schema = capnp_type.asEnum(); + bool to_lower = mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE; + const auto * enum_type = assert_cast *>(column_type.get()); + const auto & enum_values = dynamic_cast &>(*enum_type); + + auto names = enum_values.getSetOfAllNames(to_lower); + auto values = enum_values.getSetOfAllValues(); + + std::unordered_set capn_enum_names; + std::unordered_set capn_enum_values; + + auto enumerants = enum_schema.getEnumerants(); + for (auto enumerant : enumerants) + { + String name = enumerant.getProto().getName(); + capn_enum_names.insert(to_lower ? boost::algorithm::to_lower_copy(name) : name); + auto value = enumerant.getOrdinal(); + if (mode == FormatSettings::EnumComparingMode::BY_VALUES && value > max_value) + { + error_message += "Enum from CapnProto schema contains value that is out of range for Clickhouse Enum"; + return false; + } + capn_enum_values.insert(Type(value)); + } + + if (mode == FormatSettings::EnumComparingMode::BY_NAMES || mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE) + { + auto result = names == capn_enum_names; + if (!result) + error_message += "The set of names in Enum from CapnProto schema is different from the set of names in ClickHouse Enum"; + return result; + } + + auto result = values == capn_enum_values; + if (!result) + error_message += "The set of values in Enum from CapnProto schema is different from the set of values in ClickHouse Enum"; + return result; +} + +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message); + +static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isStruct()) + return false; + + /// Check that struct is a named union of type VOID and one arbitrary type. + auto struct_schema = capnp_type.asStruct(); + if (!checkIfStructIsNamedUnion(struct_schema)) + return false; + + auto union_fields = struct_schema.getUnionFields(); + if (union_fields.size() != 2) + return false; + + auto first = union_fields[0]; + auto second = union_fields[1]; + + auto nested_type = assert_cast(data_type.get())->getNestedType(); + if (first.getType().isVoid()) + return checkCapnProtoType(second.getType(), nested_type, mode, error_message); + if (second.getType().isVoid()) + return checkCapnProtoType(first.getType(), nested_type, mode, error_message); + return false; +} + +static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isStruct()) + return false; + auto struct_schema = capnp_type.asStruct(); + + if (checkIfStructIsNamedUnion(struct_schema)) + return false; + + if (checkIfStructContainsUnnamedUnion(struct_schema)) + { + error_message += "CapnProto struct contains unnamed union"; + return false; + } + + const auto * tuple_data_type = assert_cast(data_type.get()); + auto nested_types = tuple_data_type->getElements(); + if (nested_types.size() != struct_schema.getFields().size()) + { + error_message += "Tuple and Struct types have different sizes"; + return false; + } + + if (!tuple_data_type->haveExplicitNames()) + { + error_message += "Only named Tuple can be converted to CapnProto Struct"; + return false; + } + for (const auto & name : tuple_data_type->getElementNames()) + { + KJ_IF_MAYBE(field, struct_schema.findFieldByName(name)) + { + if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message)) + return false; + } + else + { + error_message += "CapnProto struct doesn't contain a field with name " + name; + return false; + } + } + + return true; +} + +static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isList()) + return false; + auto list_schema = capnp_type.asList(); + auto nested_type = assert_cast(data_type.get())->getNestedType(); + return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message); +} + +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + switch (data_type->getTypeId()) + { + case TypeIndex::UInt8: + return capnp_type.isBool() || capnp_type.isUInt8(); + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + return capnp_type.isUInt16(); + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + return capnp_type.isUInt32(); + case TypeIndex::UInt64: + return capnp_type.isUInt64(); + case TypeIndex::Int8: + return capnp_type.isInt8(); + case TypeIndex::Int16: + return capnp_type.isInt16(); + case TypeIndex::Date32: [[fallthrough]]; + case TypeIndex::Int32: + return capnp_type.isInt32(); + case TypeIndex::DateTime64: [[fallthrough]]; + case TypeIndex::Int64: + return capnp_type.isInt64(); + case TypeIndex::Float32: + return capnp_type.isFloat32(); + case TypeIndex::Float64: + return capnp_type.isFloat64(); + case TypeIndex::Enum8: + return checkEnums(capnp_type, data_type, mode, INT8_MAX, error_message); + case TypeIndex::Enum16: + return checkEnums(capnp_type, data_type, mode, INT16_MAX, error_message); + case TypeIndex::Tuple: + return checkTupleType(capnp_type, data_type, mode, error_message); + case TypeIndex::Nullable: + { + auto result = checkNullableType(capnp_type, data_type, mode, error_message); + if (!result) + error_message += "Nullable can be represented only as a named union of type Void and nested type"; + return result; + } + case TypeIndex::Array: + return checkArrayType(capnp_type, data_type, mode, error_message); + case TypeIndex::LowCardinality: + return checkCapnProtoType(capnp_type, assert_cast(data_type.get())->getDictionaryType(), mode, error_message); + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + return capnp_type.isText() || capnp_type.isData(); + default: + return false; + } +} + +static std::pair splitFieldName(const String & name) +{ + const auto * begin = name.data(); + const auto * end = name.data() + name.size(); + const auto * it = find_first_symbols<'_', '.'>(begin, end); + String first = String(begin, it); + String second = it == end ? "" : String(it + 1, end); + return {first, second}; +} + +capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, struct_reader.getSchema().findFieldByName(field_name)) + { + auto field_reader = struct_reader.get(*field); + if (nested_name.empty()) + return field_reader; + + if (field_reader.getType() != capnp::DynamicValue::STRUCT) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getReaderByColumnName(field_reader.as(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); +} + +std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, struct_builder.getSchema().findFieldByName(field_name)) + { + if (nested_name.empty()) + return {struct_builder, *field}; + + auto field_builder = struct_builder.get(*field); + if (field_builder.getType() != capnp::DynamicValue::STRUCT) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getStructBuilderAndFieldByColumnName(field_builder.as(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); +} + +static capnp::StructSchema::Field getFieldByName(const capnp::StructSchema & schema, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, schema.findFieldByName(field_name)) + { + if (nested_name.empty()) + return *field; + + if (!field->getType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getFieldByName(field->getType().asStruct(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto schema doesn't contain field with name {}", field_name); +} + +void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode) +{ + /// Firstly check that struct doesn't contain unnamed union, because we don't support it. + if (checkIfStructContainsUnnamedUnion(schema)) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Schema contains unnamed union that is not supported"); + auto names_and_types = header.getNamesAndTypesList(); + String additional_error_message; + for (auto & [name, type] : names_and_types) + { + auto field = getFieldByName(schema, name); + if (!checkCapnProtoType(field.getType(), type, mode, additional_error_message)) + { + auto e = Exception( + ErrorCodes::CAPN_PROTO_BAD_CAST, + "Cannot convert ClickHouse type {} to CapnProto type {}", + type->getName(), + getCapnProtoFullTypeName(field.getType())); + if (!additional_error_message.empty()) + e.addMessage(additional_error_message); + throw std::move(e); + } + } +} + +} + +#endif diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h new file mode 100644 index 00000000000..93ca0a5e616 --- /dev/null +++ b/src/Formats/CapnProtoUtils.h @@ -0,0 +1,43 @@ +#pragma once + +#include "config_formats.h" +#if USE_CAPNP + +#include +#include +#include +#include +#include + +namespace DB +{ +// Wrapper for classes that could throw in destructor +// https://github.com/capnproto/capnproto/issues/553 +template +struct DestructorCatcher +{ + T impl; + template + DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} + ~DestructorCatcher() noexcept try { } catch (...) { return; } +}; + +class CapnProtoSchemaParser : public DestructorCatcher +{ +public: + CapnProtoSchemaParser() {} + + capnp::StructSchema getMessageSchema(const FormatSchemaInfo & schema_info); +}; + +bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode); + +std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name); + +capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name); + +void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode); + +} + +#endif diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index d3ff5cbf8a7..63cb26ab87c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -112,6 +112,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSchemaInfo.cpp b/src/Formats/FormatSchemaInfo.cpp index 2605c0bdf04..24c8dfc14f2 100644 --- a/src/Formats/FormatSchemaInfo.cpp +++ b/src/Formats/FormatSchemaInfo.cpp @@ -99,4 +99,10 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String & } } +FormatSchemaInfo::FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message) + : FormatSchemaInfo( + settings.schema.format_schema, format, require_message, settings.schema.is_server, settings.schema.format_schema_path) +{ +} + } diff --git a/src/Formats/FormatSchemaInfo.h b/src/Formats/FormatSchemaInfo.h index cb041e02116..8c430218af0 100644 --- a/src/Formats/FormatSchemaInfo.h +++ b/src/Formats/FormatSchemaInfo.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -11,6 +12,7 @@ class FormatSchemaInfo { public: FormatSchemaInfo(const String & format_schema, const String & format, bool require_message, bool is_server, const std::string & format_schema_path); + FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message); /// Returns path to the schema file. const String & schemaPath() const { return schema_path; } diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8c894c77e82..ce5f1effa8c 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -183,6 +183,20 @@ struct FormatSettings { bool import_nested = false; } orc; + + /// For apnProto format we should determine how to + /// compare ClickHouse Enum and Enum from schema. + enum class EnumComparingMode + { + BY_NAMES, // Names in enums should be the same, values can be different. + BY_NAMES_CASE_INSENSITIVE, // Case-insensitive name comparison. + BY_VALUES, // Values should be the same, names can be different. + }; + + struct + { + EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES; + } capn_proto; }; } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 3e4c0366e8a..f6b4bb7e2e1 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -67,6 +67,7 @@ void registerOutputFormatNull(FormatFactory & factory); void registerOutputFormatMySQLWire(FormatFactory & factory); void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); +void registerOutputFormatCapnProto(FormatFactory & factory); /// Input only formats. @@ -139,6 +140,7 @@ void registerFormats() registerOutputFormatMySQLWire(factory); registerOutputFormatMarkdown(factory); registerOutputFormatPostgreSQLWire(factory); + registerOutputFormatProcessorsCapnProto(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index fd4b2870bea..8492fc9b623 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -1,7 +1,6 @@ #include "CapnProtoRowInputFormat.h" #if USE_CAPNP -#include #include #include #include @@ -9,198 +8,40 @@ #include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include namespace DB { namespace ErrorCodes { - extern const int BAD_TYPE_OF_FIELD; - extern const int THERE_IS_NO_COLUMN; extern const int LOGICAL_ERROR; } -static CapnProtoRowInputFormat::NestedField split(const Block & header, size_t i) -{ - CapnProtoRowInputFormat::NestedField field = {{}, i}; - - // Remove leading dot in field definition, e.g. ".msg" -> "msg" - String name(header.safeGetByPosition(i).name); - if (!name.empty() && name[0] == '.') - name.erase(0, 1); - - splitInto<'.', '_'>(field.tokens, name); - return field; -} - - -static Field convertNodeToField(const capnp::DynamicValue::Reader & value) -{ - switch (value.getType()) - { - case capnp::DynamicValue::UNKNOWN: - throw Exception("Unknown field type", ErrorCodes::BAD_TYPE_OF_FIELD); - case capnp::DynamicValue::VOID: - return Field(); - case capnp::DynamicValue::BOOL: - return value.as() ? 1u : 0u; - case capnp::DynamicValue::INT: - return value.as(); - case capnp::DynamicValue::UINT: - return value.as(); - case capnp::DynamicValue::FLOAT: - return value.as(); - case capnp::DynamicValue::TEXT: - { - auto arr = value.as(); - return String(arr.begin(), arr.size()); - } - case capnp::DynamicValue::DATA: - { - auto arr = value.as().asChars(); - return String(arr.begin(), arr.size()); - } - case capnp::DynamicValue::LIST: - { - auto list_value = value.as(); - Array res(list_value.size()); - for (auto i : kj::indices(list_value)) - res[i] = convertNodeToField(list_value[i]); - - return res; - } - case capnp::DynamicValue::ENUM: - return value.as().getRaw(); - case capnp::DynamicValue::STRUCT: - { - auto struct_value = value.as(); - const auto & fields = struct_value.getSchema().getFields(); - - Tuple tuple(fields.size()); - for (auto i : kj::indices(fields)) - tuple[i] = convertNodeToField(struct_value.get(fields[i])); - - return tuple; - } - case capnp::DynamicValue::CAPABILITY: - throw Exception("CAPABILITY type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); - case capnp::DynamicValue::ANY_POINTER: - throw Exception("ANY_POINTER type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); - } - return Field(); -} - -static capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std::string & field) -{ - KJ_IF_MAYBE(child, node.findFieldByName(field)) - return *child; - else - throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr(), ErrorCodes::THERE_IS_NO_COLUMN); -} - - -void CapnProtoRowInputFormat::createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader) -{ - /// Columns in a table can map to fields in Cap'n'Proto or to structs. - - /// Store common parents and their tokens in order to backtrack. - std::vector parents; - std::vector parent_tokens; - - capnp::StructSchema cur_reader = reader; - - for (const auto & field : sorted_fields) - { - if (field.tokens.empty()) - throw Exception("Logical error in CapnProtoRowInputFormat", ErrorCodes::LOGICAL_ERROR); - - // Backtrack to common parent - while (field.tokens.size() < parent_tokens.size() + 1 - || !std::equal(parent_tokens.begin(), parent_tokens.end(), field.tokens.begin())) - { - actions.push_back({Action::POP}); - parents.pop_back(); - parent_tokens.pop_back(); - - if (parents.empty()) - { - cur_reader = reader; - break; - } - else - cur_reader = parents.back().getType().asStruct(); - } - - // Go forward - while (parent_tokens.size() + 1 < field.tokens.size()) - { - const auto & token = field.tokens[parents.size()]; - auto node = getFieldOrThrow(cur_reader, token); - if (node.getType().isStruct()) - { - // Descend to field structure - parents.emplace_back(node); - parent_tokens.emplace_back(token); - cur_reader = node.getType().asStruct(); - actions.push_back({Action::PUSH, node}); - } - else if (node.getType().isList()) - { - break; // Collect list - } - else - throw Exception("Field " + token + " is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD); - } - - // Read field from the structure - auto node = getFieldOrThrow(cur_reader, field.tokens[parents.size()]); - if (node.getType().isList() && !actions.empty() && actions.back().field == node) - { - // The field list here flattens Nested elements into multiple arrays - // In order to map Nested types in Cap'nProto back, they need to be collected - // Since the field names are sorted, the order of field positions must be preserved - // For example, if the fields are { b @0 :Text, a @1 :Text }, the `a` would come first - // even though it's position is second. - auto & columns = actions.back().columns; - auto it = std::upper_bound(columns.cbegin(), columns.cend(), field.pos); - columns.insert(it, field.pos); - } - else - { - actions.push_back({Action::READ, node, {field.pos}}); - } - } -} - -CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info) - : IRowInputFormat(std::move(header), in_, std::move(params_)), parser(std::make_shared()) +CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_) + : IRowInputFormat(std::move(header), in_, std::move(params_)) + , parser(std::make_shared()) + , format_settings(format_settings_) + , column_types(getPort().getHeader().getDataTypes()) + , column_names(getPort().getHeader().getNames()) { // Parse the schema and fetch the root object - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - auto schema = parser->impl.parseDiskFile(info.schemaPath(), info.absoluteSchemaPath(), {}); -#pragma GCC diagnostic pop - - root = schema.getNested(info.messageName()).asStruct(); - - /** - * The schema typically consists of fields in various nested structures. - * Here we gather the list of fields and sort them in a way so that fields in the same structure are adjacent, - * and the nesting level doesn't decrease to make traversal easier. - */ - const auto & sample = getPort().getHeader(); - NestedFieldList list; - size_t num_columns = sample.columns(); - for (size_t i = 0; i < num_columns; ++i) - list.push_back(split(sample, i)); - - // Order list first by value of strings then by length of string vector. - std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b) { return a.tokens < b.tokens; }); - createActions(list, root); + root = parser->getMessageSchema(info); + checkCapnProtoSchemaStructure(root, getPort().getHeader(), format_settings.capn_proto.enum_comparing_mode); } kj::Array CapnProtoRowInputFormat::readMessage() @@ -233,6 +74,186 @@ kj::Array CapnProtoRowInputFormat::readMessage() return msg; } +static void insertSignedInteger(IColumn & column, const DataTypePtr & column_type, Int64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::Int8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int64: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime64: + assert_cast &>(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a signed integer."); + } +} + +static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_type, UInt64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::UInt8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::UInt64: + assert_cast(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not an unsigned integer."); + } +} + +static void insertFloat(IColumn & column, const DataTypePtr & column_type, Float64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::Float32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Float64: + assert_cast(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a float."); + } +} + +template +static void insertString(IColumn & column, Value value) +{ + column.insertData(reinterpret_cast(value.begin()), value.size()); +} + +template +static void insertEnum(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicEnum & enum_value, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + auto enumerant = *kj::_::readMaybe(enum_value.getEnumerant()); + auto enum_type = assert_cast *>(column_type.get()); + DataTypePtr nested_type = std::make_shared>(); + if (enum_comparing_mode == FormatSettings::EnumComparingMode::BY_VALUES) + insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal())); + else if (enum_comparing_mode == FormatSettings::EnumComparingMode::BY_NAMES) + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName())))); + else + { + /// Find the same enum name case insensitive. + String enum_name = enumerant.getProto().getName(); + for (auto & name : enum_type->getAllRegisteredNames()) + { + if (compareEnumNames(name, enum_name, enum_comparing_mode)) + { + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name))); + break; + } + } + } +} + +static void insertValue(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicValue::Reader & value, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + if (column_type->lowCardinality()) + { + auto & lc_column = assert_cast(column); + auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty(); + auto dict_type = assert_cast(column_type.get())->getDictionaryType(); + insertValue(*tmp_column, dict_type, value, enum_comparing_mode); + lc_column.insertFromFullColumn(*tmp_column, 0); + return; + } + + switch (value.getType()) + { + case capnp::DynamicValue::Type::INT: + insertSignedInteger(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::UINT: + insertUnsignedInteger(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::FLOAT: + insertFloat(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::BOOL: + insertUnsignedInteger(column, column_type, UInt64(value.as())); + break; + case capnp::DynamicValue::Type::DATA: + insertString(column, value.as()); + break; + case capnp::DynamicValue::Type::TEXT: + insertString(column, value.as()); + break; + case capnp::DynamicValue::Type::ENUM: + if (column_type->getTypeId() == TypeIndex::Enum8) + insertEnum(column, column_type, value.as(), enum_comparing_mode); + else + insertEnum(column, column_type, value.as(), enum_comparing_mode); + break; + case capnp::DynamicValue::LIST: + { + auto list_value = value.as(); + auto & column_array = assert_cast(column); + auto & offsets = column_array.getOffsets(); + offsets.push_back(offsets.back() + list_value.size()); + + auto & nested_column = column_array.getData(); + auto nested_type = assert_cast(column_type.get())->getNestedType(); + for (const auto & nested_value : list_value) + insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + break; + } + case capnp::DynamicValue::Type::STRUCT: + { + auto struct_value = value.as(); + if (column_type->isNullable()) + { + auto & nullable_column = assert_cast(column); + auto field = *kj::_::readMaybe(struct_value.which()); + if (field.getType().isVoid()) + nullable_column.insertDefault(); + else + { + auto & nested_column = nullable_column.getNestedColumn(); + auto nested_type = assert_cast(column_type.get())->getNestedType(); + auto nested_value = struct_value.get(field); + insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + nullable_column.getNullMapData().push_back(0); + } + } + else + { + auto & tuple_column = assert_cast(column); + const auto * tuple_type = assert_cast(column_type.get()); + for (size_t i = 0; i != tuple_column.tupleSize(); ++i) + insertValue( + tuple_column.getColumn(i), + tuple_type->getElements()[i], + struct_value.get(tuple_type->getElementNames()[i]), + enum_comparing_mode); + } + break; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto value type."); + } +} + bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) { if (in->eof()) @@ -245,51 +266,12 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension #else capnp::FlatArrayMessageReader msg(array); #endif - std::vector stack; - stack.push_back(msg.getRoot(root)); - for (auto action : actions) + auto root_reader = msg.getRoot(root); + for (size_t i = 0; i != columns.size(); ++i) { - switch (action.type) - { - case Action::READ: - { - Field value = convertNodeToField(stack.back().get(action.field)); - if (action.columns.size() > 1) - { - // Nested columns must be flattened into several arrays - // e.g. Array(Tuple(x ..., y ...)) -> Array(x ...), Array(y ...) - const auto & collected = DB::get(value); - size_t size = collected.size(); - // The flattened array contains an array of a part of the nested tuple - Array flattened(size); - for (size_t column_index = 0; column_index < action.columns.size(); ++column_index) - { - // Populate array with a single tuple elements - for (size_t off = 0; off < size; ++off) - { - const auto & tuple = DB::get(collected[off]); - flattened[off] = tuple[column_index]; - } - auto & col = columns[action.columns[column_index]]; - col->insert(flattened); - } - } - else - { - auto & col = columns[action.columns[0]]; - col->insert(value); - } - - break; - } - case Action::POP: - stack.pop_back(); - break; - case Action::PUSH: - stack.push_back(stack.back().get(action.field).as()); - break; - } + auto value = getReaderByColumnName(root_reader, column_names[i]); + insertValue(*columns[i], column_types[i], value, format_settings.capn_proto.enum_comparing_mode); } return true; @@ -302,8 +284,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings.schema.format_schema, "CapnProto", true, - settings.schema.is_server, settings.schema.format_schema_path)); + FormatSchemaInfo(settings, "CapnProto", true), settings); }); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h index 0957cd1d681..fc30cf11237 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h @@ -4,8 +4,8 @@ #if USE_CAPNP #include +#include #include -#include namespace DB { @@ -22,18 +22,7 @@ class ReadBuffer; class CapnProtoRowInputFormat : public IRowInputFormat { public: - struct NestedField - { - std::vector tokens; - size_t pos; - }; - using NestedFieldList = std::vector; - - /** schema_dir - base path for schema files - * schema_file - location of the capnproto schema, e.g. "schema.capnp" - * root_object - name to the root object, e.g. "Message" - */ - CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info); + CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_); String getName() const override { return "CapnProtoRowInputFormat"; } @@ -42,34 +31,11 @@ public: private: kj::Array readMessage(); - // Build a traversal plan from a sorted list of fields - void createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader); - - /* Action for state machine for traversing nested structures. */ - using BlockPositionList = std::vector; - struct Action - { - enum Type { POP, PUSH, READ }; - Type type{}; - capnp::StructSchema::Field field{}; - BlockPositionList columns{}; - }; - - // Wrapper for classes that could throw in destructor - // https://github.com/capnproto/capnproto/issues/553 - template - struct DestructorCatcher - { - T impl; - template - DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} - ~DestructorCatcher() noexcept try { } catch (...) { return; } - }; - using SchemaParser = DestructorCatcher; - - std::shared_ptr parser; + std::shared_ptr parser; capnp::StructSchema root; - std::vector actions; + const FormatSettings format_settings; + DataTypes column_types; + Names column_names; }; } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp new file mode 100644 index 00000000000..b299e1fc00a --- /dev/null +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -0,0 +1,251 @@ +#include +#if USE_CAPNP + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + +CapnProtoOutputStream::CapnProtoOutputStream(WriteBuffer & out_) : out(out_) +{ +} + +void CapnProtoOutputStream::write(const void * buffer, size_t size) +{ + out.write(reinterpret_cast(buffer), size); +} + +CapnProtoRowOutputFormat::CapnProtoRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSchemaInfo & info, + const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_, params_), column_names(header_.getNames()), column_types(header_.getDataTypes()), output_stream(std::make_unique(out_)), format_settings(format_settings_) +{ + schema = schema_parser.getMessageSchema(info); + checkCapnProtoSchemaStructure(schema, getPort(PortKind::Main).getHeader(), format_settings.capn_proto.enum_comparing_mode); +} + +template +static capnp::DynamicEnum getDynamicEnum( + const ColumnPtr & column, + const DataTypePtr & data_type, + size_t row_num, + const capnp::EnumSchema & enum_schema, + FormatSettings::EnumComparingMode mode) +{ + const auto * enum_data_type = assert_cast *>(data_type.get()); + EnumValue enum_value = column->getInt(row_num); + if (mode == FormatSettings::EnumComparingMode::BY_VALUES) + return capnp::DynamicEnum(enum_schema, enum_value); + + auto enum_name = enum_data_type->getNameForValue(enum_value); + for (const auto enumerant : enum_schema.getEnumerants()) + { + if (compareEnumNames(String(enum_name), enumerant.getProto().getName(), mode)) + return capnp::DynamicEnum(enumerant); + } + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot convert CLickHouse Enum value to CapnProto Enum"); +} + +static capnp::DynamicValue::Builder initStructFieldBuilder(const ColumnPtr & column, size_t row_num, capnp::DynamicStruct::Builder & struct_builder, capnp::StructSchema::Field field) +{ + if (const auto * array_column = checkAndGetColumn(*column)) + { + size_t size = array_column->getOffsets()[row_num] - array_column->getOffsets()[row_num - 1]; + return struct_builder.init(field, size); + } + + if (field.getType().isStruct()) + return struct_builder.init(field); + + return struct_builder.get(field); +} + +static std::optional convertToDynamicValue(const ColumnPtr & column, const DataTypePtr & data_type, size_t row_num, capnp::DynamicValue::Builder builder, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + /// Here we don't do any types validation, because we did it in CapnProtoRowOutputFormat constructor. + + if (data_type->lowCardinality()) + { + const auto * lc_column = assert_cast(column.get()); + const auto & dict_type = assert_cast(data_type.get())->getDictionaryType(); + size_t index = lc_column->getIndexAt(row_num); + return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode); + } + + switch (builder.getType()) + { + case capnp::DynamicValue::Type::INT: + /// We allow output DateTime64 as Int64. + if (WhichDataType(data_type).isDateTime64()) + return capnp::DynamicValue::Reader(assert_cast *>(column.get())->getElement(row_num)); + return capnp::DynamicValue::Reader(column->getInt(row_num)); + case capnp::DynamicValue::Type::UINT: + return capnp::DynamicValue::Reader(column->getUInt(row_num)); + case capnp::DynamicValue::Type::BOOL: + return capnp::DynamicValue::Reader(column->getBool(row_num)); + case capnp::DynamicValue::Type::FLOAT: + return capnp::DynamicValue::Reader(column->getFloat64(row_num)); + case capnp::DynamicValue::Type::ENUM: + { + auto enum_schema = builder.as().getSchema(); + if (data_type->getTypeId() == TypeIndex::Enum8) + return capnp::DynamicValue::Reader( + getDynamicEnum(column, data_type, row_num, enum_schema, enum_comparing_mode)); + return capnp::DynamicValue::Reader( + getDynamicEnum(column, data_type, row_num, enum_schema, enum_comparing_mode)); + } + case capnp::DynamicValue::Type::DATA: + { + auto data = column->getDataAt(row_num); + return capnp::DynamicValue::Reader(capnp::Data::Reader(reinterpret_cast(data.data), data.size)); + } + case capnp::DynamicValue::Type::TEXT: + { + auto data = String(column->getDataAt(row_num)); + return capnp::DynamicValue::Reader(capnp::Text::Reader(data.data(), data.size())); + } + case capnp::DynamicValue::Type::STRUCT: + { + auto struct_builder = builder.as(); + auto nested_struct_schema = struct_builder.getSchema(); + /// Struct can be represent Tuple or Naullable (named union with two fields) + if (data_type->isNullable()) + { + const auto * nullable_type = assert_cast(data_type.get()); + const auto * nullable_column = assert_cast(column.get()); + auto fields = nested_struct_schema.getUnionFields(); + if (nullable_column->isNullAt(row_num)) + { + auto null_field = fields[0].getType().isVoid() ? fields[0] : fields[1]; + struct_builder.set(null_field, capnp::Void()); + } + else + { + auto value_field = fields[0].getType().isVoid() ? fields[1] : fields[0]; + struct_builder.clear(value_field); + const auto & nested_column = nullable_column->getNestedColumnPtr(); + auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); + auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode); + if (value) + struct_builder.set(value_field, std::move(*value)); + } + } + else + { + const auto * tuple_data_type = assert_cast(data_type.get()); + auto nested_types = tuple_data_type->getElements(); + const auto & nested_columns = assert_cast(column.get())->getColumns(); + for (const auto & name : tuple_data_type->getElementNames()) + { + auto pos = tuple_data_type->getPositionByName(name); + auto field_builder + = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); + auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode); + if (value) + struct_builder.set(name, std::move(*value)); + } + } + return std::nullopt; + } + case capnp::DynamicValue::Type::LIST: + { + auto list_builder = builder.as(); + const auto * array_column = assert_cast(column.get()); + const auto & nested_column = array_column->getDataPtr(); + const auto & nested_type = assert_cast(data_type.get())->getNestedType(); + const auto & offsets = array_column->getOffsets(); + auto offset = offsets[row_num - 1]; + size_t size = offsets[row_num] - offset; + + const auto * nested_array_column = checkAndGetColumn(*nested_column); + for (size_t i = 0; i != size; ++i) + { + capnp::DynamicValue::Builder value_builder; + /// For nested arrays we need to initialize nested list builder. + if (nested_array_column) + { + const auto & nested_offset = nested_array_column->getOffsets(); + size_t nested_array_size = nested_offset[offset + i] - nested_offset[offset + i - 1]; + value_builder = list_builder.init(i, nested_array_size); + } + else + value_builder = list_builder[i]; + + auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode); + if (value) + list_builder.set(i, std::move(*value)); + } + return std::nullopt; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto type."); + } +} + +void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) +{ + capnp::MallocMessageBuilder message; + capnp::DynamicStruct::Builder root = message.initRoot(schema); + for (size_t i = 0; i != columns.size(); ++i) + { + auto [struct_builder, field] = getStructBuilderAndFieldByColumnName(root, column_names[i]); + auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); + auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode); + if (value) + struct_builder.set(field, std::move(*value)); + } + + capnp::writeMessage(*output_stream, message); +} + +void registerOutputFormatProcessorsCapnProto(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor("CapnProto", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, params, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings); + }); +} + +} + +#else + +namespace DB +{ +class FormatFactory; +void registerOutputFormatProcessorsCapnProto(FormatFactory &) {} +} + +#endif // USE_CAPNP diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h new file mode 100644 index 00000000000..0f321071d62 --- /dev/null +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -0,0 +1,53 @@ +#pragma once + +#include "config_formats.h" +#if USE_CAPNP + +#include +#include +#include +#include +#include +#include + +namespace DB +{ +class CapnProtoOutputStream : public kj::OutputStream +{ +public: + CapnProtoOutputStream(WriteBuffer & out_); + + void write(const void * buffer, size_t size) override; + +private: + WriteBuffer & out; +}; + +class CapnProtoRowOutputFormat : public IRowOutputFormat +{ +public: + CapnProtoRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSchemaInfo & info, + const FormatSettings & format_settings_); + + String getName() const override { return "CapnProtoRowOutputFormat"; } + + void write(const Columns & columns, size_t row_num) override; + + void writeField(const IColumn &, const ISerialization &, size_t) override { } + +private: + Names column_names; + DataTypes column_types; + capnp::StructSchema schema; + std::unique_ptr output_stream; + const FormatSettings format_settings; + CapnProtoSchemaParser schema_parser; +}; + +} + +#endif // USE_CAPNP diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index a5e6b7ec480..df7b7102739 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -67,8 +67,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true, - settings.schema.is_server, settings.schema.format_schema_path), + FormatSchemaInfo(settings, "Protobuf", true), with_length_delimiter); }); } diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 12c5e98797a..29cd9be79bc 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -64,9 +64,7 @@ void registerOutputFormatProtobuf(FormatFactory & factory) { return std::make_shared( buf, header, params, - FormatSchemaInfo(settings.schema.format_schema, "Protobuf", - true, settings.schema.is_server, - settings.schema.format_schema_path), + FormatSchemaInfo(settings, "Protobuf", true), settings, with_length_delimiter); }); diff --git a/tests/queries/0_stateless/02030_capnp_format.reference b/tests/queries/0_stateless/02030_capnp_format.reference new file mode 100644 index 00000000000..2b2307bfc6a --- /dev/null +++ b/tests/queries/0_stateless/02030_capnp_format.reference @@ -0,0 +1,52 @@ +-1 1 -1000 1000 -10000000 1000000 -1000000000 1000000000 123.123 123123123.12312312 Some string fixed Some data 2000-01-06 2000-06-01 19:42:42 2000-04-01 11:21:33.123 +-1 1 -1000 1000 -10000000 1000000 -1000000000 1000000000 123.123 123123123.12312312 Some string fixed Some data 2000-01-06 2000-06-01 19:42:42 2000-04-01 11:21:33.123 +1 (2,(3,4)) (((5))) +1 (2,(3,4)) (((5))) +1 [1,2,3] [[[1,2,3],[4,5,6]],[[7,8,9],[]],[]] +1 [1,2,3] [[[1,2,3],[4,5,6]],[[7,8,9],[]],[]] +1 ((2,[[3,4],[5,6],[]]),[([[(7,8),(9,10)],[(11,12),(13,14)],[]],[([15,16,17]),([])])]) +1 ((2,[[3,4],[5,6],[]]),[([[(7,8),(9,10)],[(11,12),(13,14)],[]],[([15,16,17]),([])])]) +[1,2,3] [[4,5,6],[],[7,8]] [(9,10),(11,12),(13,14)] +[1,2,3] [[4,5,6],[],[7,8]] [(9,10),(11,12),(13,14)] +1 [1,NULL,2] (1) +\N [NULL,NULL,42] (NULL) +1 [1,NULL,2] (1) +\N [NULL,NULL,42] (NULL) +one +two +tHrEe +oNe +tWo +threE +first +second +third +OK +OK +OK +OK +one two ['one',NULL,'two',NULL] +two \N [NULL] +one two ['one',NULL,'two',NULL] +two \N [NULL] +0 1 2 +1 2 3 +2 3 4 +3 4 5 +4 5 6 +(0,(1,(2))) +(1,(2,(3))) +(2,(3,(4))) +(3,(4,(5))) +(4,(5,(6))) +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh new file mode 100755 index 00000000000..99807cc1738 --- /dev/null +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp +touch $CAPN_PROTO_FILE + +SCHEMADIR=/$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +CLIENT_SCHEMADIR=$CURDIR/format_schemas +SERVER_SCHEMADIR=test_02030 +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_simple_types"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_simple_types (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixed FixedString(5), data String, date Date, datetime DateTime, datetime64 DateTime64(3)) ENGINE=Memory" +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types values (-1, 1, -1000, 1000, -10000000, 1000000, -1000000000, 1000000000, 123.123, 123123123.123123123, 'Some string', 'fixed', 'Some data', '2000-01-06', '2000-06-01 19:42:42', '2000-04-01 11:21:33.123')" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_simple_types" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_tuples" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_tuples (value UInt64, tuple1 Tuple(one UInt64, two Tuple(three UInt64, four UInt64)), tuple2 Tuple(nested1 Tuple(nested2 Tuple(x UInt64)))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples VALUES (1, (2, (3, 4)), (((5))))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_tuples" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_lists" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_lists (value UInt64, list1 Array(UInt64), list2 Array(Array(Array(UInt64)))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists VALUES (1, [1, 2, 3], [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], []], []])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_lists" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_lists_and_tuples" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_lists_and_tuples (value UInt64, nested Tuple(a Tuple(b UInt64, c Array(Array(UInt64))), d Array(Tuple(e Array(Array(Tuple(f UInt64, g UInt64))), h Array(Tuple(k Array(UInt64))))))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples VALUES (1, ((2, [[3, 4], [5, 6], []]), [([[(7, 8), (9, 10)], [(11, 12), (13, 14)], []], [([15, 16, 17]), ([])])]))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_lists_and_tuples" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_table" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_table (nested Nested(value UInt64, array Array(UInt64), tuple Tuple(one UInt64, two UInt64))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table VALUES ([1, 2, 3], [[4, 5, 6], [], [7, 8]], [(9, 10), (11, 12), (13, 14)])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_table" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nullable" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nullable (nullable Nullable(UInt64), array Array(Nullable(UInt64)), tuple Tuple(nullable Nullable(UInt64))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable VALUES (1, [1, Null, 2], (1)), (Null, [Null, Null, 42], (Null))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nullable" + + +$CLICKHOUSE_CLIENT --query="SELECT CAST(number, 'Enum(\'one\' = 0, \'two\' = 1, \'tHrEe\' = 2)') AS value FROM numbers(3) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_enum:Message'" > $CAPN_PROTO_FILE + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 1, \'two\' = 2, \'tHrEe\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'oNe\' = 1, \'tWo\' = 2, \'threE\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names_case_insensitive'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'first\' = 0, \'second\' = 1, \'third\' = 2)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_values'" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 0, \'two\' = 1, \'three\' = 2)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 0, \'two\' = 1, \'tHrEe\' = 2, \'four\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 1, \'two\' = 2, \'tHrEe\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_values'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'first\' = 1, \'two\' = 2, \'three\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names_case_insensitive'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_low_cardinality" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_low_cardinality (lc1 LowCardinality(String), lc2 LowCardinality(Nullable(String)), lc3 Array(LowCardinality(Nullable(String)))) ENGINE=Memory" +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality VALUES ('one', 'two', ['one', Null, 'two', Null]), ('two', Null, [Null])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_low_cardinality" + + +$CLICKHOUSE_CLIENT --query="SELECT CAST(tuple(number, tuple(number + 1, tuple(number + 2))), 'Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') AS a FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_tuples:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a_b UInt64, a_c_d UInt64, a_c_e_f UInt64') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" + + +$CLICKHOUSE_CLIENT --query="SELECT number AS a_b, number + 1 AS a_c_d, number + 2 AS a_c_e_f FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_tuples:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(bb UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(ff UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +$CLICKHOUSE_CLIENT --query="SELECT number AS uint64 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Array(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Enum(\'one\' = 1)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Tuple(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Nullable(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Int32') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + + +$CLICKHOUSE_CLIENT --query="SELECT number AS a, toString(number) as b FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_unnamed_union:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable2 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +rm $CAPN_PROTO_FILE +rm -rf $SCHEMADIR/$SERVER_SCHEMADIR diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp new file mode 100644 index 00000000000..f033b177a45 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp @@ -0,0 +1,13 @@ +@0x9ef128e10a8010b2; + +struct Message +{ + value @0 : EnumType; + + enum EnumType + { + one @0; + two @1; + tHrEe @2; + } +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp new file mode 100644 index 00000000000..a027692e4bc --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp @@ -0,0 +1,23 @@ +@0xd8dd7b35452d1c4c; + +struct FakeNullable1 +{ + union + { + value @0 : Text; + null @1 : Void; + trash @2 : Text; + } +} + +struct FakeNullable2 +{ + value @0 : Text; + null @1 : Void; +} + +struct Message +{ + nullable1 @0 : FakeNullable1; + nullable2 @1 : FakeNullable2; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp new file mode 100644 index 00000000000..78fe3cf551e --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp @@ -0,0 +1,8 @@ +@0x9ef128e10a8010b7; + +struct Message +{ + value @0 : UInt64; + list1 @1 : List(UInt64); + list2 @2 : List(List(List(UInt64))); +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp new file mode 100644 index 00000000000..0958889f0d8 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp @@ -0,0 +1,17 @@ +@0x9ef128e10a8010b7; + +struct NullableText +{ + union + { + value @0 : Text; + null @1 : Void; + } +} + +struct Message +{ + lc1 @0 : Text; + lc2 @1 : NullableText; + lc3 @2 : List(NullableText); +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp new file mode 100644 index 00000000000..11fa99f62f5 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp @@ -0,0 +1,36 @@ +@0x9ef128e10a8010b2; + +struct Nested1 +{ + b @0 : UInt64; + c @1 : List(List(UInt64)); +} + +struct Nested2 +{ + e @0 : List(List(Nested3)); + h @1 : List(Nested4); +} + +struct Nested3 +{ + f @0 : UInt64; + g @1 : UInt64; +} + +struct Nested4 +{ + k @0 : List(UInt64); +} + +struct Nested +{ + a @0 : Nested1; + d @1 : List(Nested2); +} + +struct Message +{ + value @0 : UInt64; + nested @1 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp new file mode 100644 index 00000000000..42f17246d58 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp @@ -0,0 +1,20 @@ +@0x9ef128e10a8010b3; + + +struct Nested1 +{ + one @0 : UInt64; + two @1 : UInt64; +} + +struct Nested +{ + value @0 : List(UInt64); + array @1 : List(List(UInt64)); + tuple @2 : List(Nested1); +} + +struct Message +{ + nested @0 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp new file mode 100644 index 00000000000..161c1bbaea6 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp @@ -0,0 +1,23 @@ +@0x9ef128e12a8010b2; + +struct Nested1 +{ + d @0 : UInt64; + e @1 : Nested2; +} + +struct Nested2 +{ + f @0 : UInt64; +} + +struct Nested +{ + b @0 : UInt64; + c @1 : Nested1; +} + +struct Message +{ + a @0 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp new file mode 100644 index 00000000000..41254911710 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp @@ -0,0 +1,22 @@ +@0x9ef128e10a8010b2; + +struct NullableUInt64 +{ + union + { + value @0 : UInt64; + null @1 : Void; + } +} + +struct Tuple +{ + nullable @0 : NullableUInt64; +} + +struct Message +{ + nullable @0 : NullableUInt64; + array @1 : List(NullableUInt64); + tuple @2 : Tuple; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp new file mode 100644 index 00000000000..a85bbbc511b --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp @@ -0,0 +1,21 @@ +@0xd9dd7b35452d1c4f; + +struct Message +{ + int8 @0 : Int8; + uint8 @1 : UInt8; + int16 @2 : Int16; + uint16 @3 : UInt16; + int32 @4 : Int32; + uint32 @5 : UInt32; + int64 @6 : Int64; + uint64 @7 : UInt64; + float32 @8 : Float32; + float64 @9 : Float64; + string @10 : Text; + fixed @11 : Text; + data @12 : Data; + date @13 : UInt16; + datetime @14 : UInt32; + datetime64 @15 : Int64; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp new file mode 100644 index 00000000000..21c3f0eb2e1 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp @@ -0,0 +1,35 @@ +@0x9ef128e10a8010b8; + +struct Nested5 +{ + x @0 : UInt64; +} + +struct Nested4 +{ + nested2 @0 : Nested5; +} + +struct Nested3 +{ + nested1 @0 : Nested4; +} + +struct Nested2 +{ + three @0 : UInt64; + four @1 : UInt64; +} + +struct Nested1 +{ + one @0 : UInt64; + two @1 : Nested2; +} + +struct Message +{ + value @0 : UInt64; + tuple1 @1 : Nested1; + tuple2 @2 : Nested3; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp new file mode 100644 index 00000000000..9fb5e37bfea --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp @@ -0,0 +1,10 @@ +@0xd8dd7b35452d1c4f; + +struct Message +{ + union + { + a @0 : UInt64; + b @1 : Text; + } +} From 1cd938fbba61053e5a2d77b53afa14d7a35436ce Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 28 Sep 2021 16:07:00 +0300 Subject: [PATCH 209/438] Fix typo --- src/Formats/FormatSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index ce5f1effa8c..403ccbc6763 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -184,7 +184,7 @@ struct FormatSettings bool import_nested = false; } orc; - /// For apnProto format we should determine how to + /// For capnProto format we should determine how to /// compare ClickHouse Enum and Enum from schema. enum class EnumComparingMode { From c97f375728eb372ddc50a927372685bce7e5226a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 28 Sep 2021 17:51:10 +0300 Subject: [PATCH 210/438] Fix style --- tests/queries/0_stateless/02030_capnp_format.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 99807cc1738..03b43c007d8 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -12,7 +12,7 @@ touch $CAPN_PROTO_FILE SCHEMADIR=/$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02030 -mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +mkdir -p ${SCHEMADIR:?}/${SERVER_SCHEMADIR:?} cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ From ed8818a773a82dc47ca4bb88e565267c8c954dcb Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 28 Sep 2021 20:03:03 +0300 Subject: [PATCH 211/438] Fix style, better check in enum comparison --- src/Formats/CapnProtoUtils.cpp | 12 +++++++----- tests/queries/0_stateless/02030_capnp_format.sh | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 9931785f43e..974688e7560 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -153,16 +153,18 @@ static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_ std::unordered_set capn_enum_values; auto enumerants = enum_schema.getEnumerants(); + /// In CapnProto Enum fields are numbered sequentially starting from zero. + if (mode == FormatSettings::EnumComparingMode::BY_VALUES && enumerants.size() > max_value) + { + error_message += "Enum from CapnProto schema contains values that is out of range for Clickhouse Enum"; + return false; + } + for (auto enumerant : enumerants) { String name = enumerant.getProto().getName(); capn_enum_names.insert(to_lower ? boost::algorithm::to_lower_copy(name) : name); auto value = enumerant.getOrdinal(); - if (mode == FormatSettings::EnumComparingMode::BY_VALUES && value > max_value) - { - error_message += "Enum from CapnProto schema contains value that is out of range for Clickhouse Enum"; - return false; - } capn_enum_values.insert(Type(value)); } diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 03b43c007d8..1a0efe4ed07 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -106,4 +106,4 @@ $CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FRO $CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable2 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; rm $CAPN_PROTO_FILE -rm -rf $SCHEMADIR/$SERVER_SCHEMADIR +rm -rf {$SCHEMADIR:?}/{$SERVER_SCHEMADIR:?} From 17ed293470d65738a0404ea53cff6cbda58b5a61 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 29 Sep 2021 14:21:20 +0300 Subject: [PATCH 212/438] Fix test --- tests/queries/0_stateless/02030_capnp_format.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 1a0efe4ed07..e6592142560 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -12,7 +12,7 @@ touch $CAPN_PROTO_FILE SCHEMADIR=/$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02030 -mkdir -p ${SCHEMADIR:?}/${SERVER_SCHEMADIR:?} +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ @@ -106,4 +106,4 @@ $CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FRO $CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable2 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; rm $CAPN_PROTO_FILE -rm -rf {$SCHEMADIR:?}/{$SERVER_SCHEMADIR:?} +rm -rf ${SCHEMADIR:?}/${SERVER_SCHEMADIR:?} From f88a2ad653f4a5ed2f0dc5a9d008020e91b0a09a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 29 Sep 2021 15:08:53 +0300 Subject: [PATCH 213/438] Handle exception when cannot extract value from struct, add test for it --- src/Formats/CapnProtoUtils.cpp | 11 ++++++++++- .../queries/0_stateless/02030_capnp_format.reference | 1 + tests/queries/0_stateless/02030_capnp_format.sh | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 974688e7560..9176579f672 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -331,7 +331,16 @@ capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Re auto [field_name, nested_name] = splitFieldName(name); KJ_IF_MAYBE(field, struct_reader.getSchema().findFieldByName(field_name)) { - auto field_reader = struct_reader.get(*field); + capnp::DynamicValue::Reader field_reader; + try + { + field_reader = struct_reader.get(*field); + } + catch (const kj::Exception & e) + { + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot extract field value from struct by provided schema, error: {} Perhaps the data was generated by another schema", String(e.getDescription().cStr())); + } + if (nested_name.empty()) return field_reader; diff --git a/tests/queries/0_stateless/02030_capnp_format.reference b/tests/queries/0_stateless/02030_capnp_format.reference index 2b2307bfc6a..8c3c81b5bc3 100644 --- a/tests/queries/0_stateless/02030_capnp_format.reference +++ b/tests/queries/0_stateless/02030_capnp_format.reference @@ -50,3 +50,4 @@ OK OK OK OK +OK diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index e6592142560..c24b85109da 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -92,6 +92,10 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tup $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(bb UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(ff UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'string String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL'; + + $CLICKHOUSE_CLIENT --query="SELECT number AS uint64 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" > $CAPN_PROTO_FILE $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Array(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; From 9ddcdbba39bda24408874207762f8ffb669058df Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 30 Sep 2021 23:19:21 +0300 Subject: [PATCH 214/438] Add INCORRECT_DATA error code --- src/Formats/CapnProtoUtils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 9176579f672..2cc20abedd0 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int CAPN_PROTO_BAD_CAST; extern const int FILE_DOESNT_EXIST; extern const int UNKNOWN_EXCEPTION; + extern const int INCORRECT_DATA; } capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info) From 5d16dc7f9aa82b9952578e6672cc9ab84bd5f0d4 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 4 Oct 2021 16:02:32 +0300 Subject: [PATCH 215/438] Try to fix tests, update capnp lib to eliminate problem with UB sanitizer --- contrib/capnproto | 2 +- contrib/capnproto-cmake/CMakeLists.txt | 1 + src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp | 6 +++--- tests/queries/0_stateless/02030_capnp_format.sh | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/contrib/capnproto b/contrib/capnproto index a00ccd91b37..c8189ec3c27 160000 --- a/contrib/capnproto +++ b/contrib/capnproto @@ -1 +1 @@ -Subproject commit a00ccd91b3746ef2ab51d40fe3265829949d1ace +Subproject commit c8189ec3c27dacbd4a3288e682473010e377f593 diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt index 9f6e076cc7d..274be8c5eeb 100644 --- a/contrib/capnproto-cmake/CMakeLists.txt +++ b/contrib/capnproto-cmake/CMakeLists.txt @@ -45,6 +45,7 @@ set (CAPNP_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/serialize-packed.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema.c++" + "${CAPNPROTO_SOURCE_DIR}/capnp/stream.capnp.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema-loader.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/dynamic.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/stringify.c++" diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index b299e1fc00a..d256fe8f160 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -129,8 +129,8 @@ static std::optional convertToDynamicValue(const Co } case capnp::DynamicValue::Type::TEXT: { - auto data = String(column->getDataAt(row_num)); - return capnp::DynamicValue::Reader(capnp::Text::Reader(data.data(), data.size())); + auto data = column->getDataAt(row_num); + return capnp::DynamicValue::Reader(capnp::Text::Reader(data.data, data.size)); } case capnp::DynamicValue::Type::STRUCT: { @@ -220,7 +220,7 @@ void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode); if (value) - struct_builder.set(field, std::move(*value)); + struct_builder.set(field, *value); } capnp::writeMessage(*output_stream, message); diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index c24b85109da..23e626d6d96 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -5,11 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp touch $CAPN_PROTO_FILE -SCHEMADIR=/$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=/$($CLICKHOUSE_CLIENT --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02030 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR From dd4421d4b1131c246f762646abbd4534aa7a8489 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 5 Oct 2021 14:12:54 +0300 Subject: [PATCH 216/438] Fix build --- contrib/capnproto-cmake/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt index 274be8c5eeb..05446355535 100644 --- a/contrib/capnproto-cmake/CMakeLists.txt +++ b/contrib/capnproto-cmake/CMakeLists.txt @@ -64,6 +64,7 @@ set (CAPNPC_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/lexer.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/grammar.capnp.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/parser.c++" + "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/generics.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/node-translator.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/compiler.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema-parser.c++" From 95790b8a1c25d293b227d2e968a16d5a4d918e68 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 6 Oct 2021 13:51:00 +0300 Subject: [PATCH 217/438] Update CapnProtoUtils.cpp --- src/Formats/CapnProtoUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 2cc20abedd0..59f63243e28 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -43,7 +43,7 @@ capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaIn /// That's not good to determine the type of error by its description, but /// this is the only way to do it here, because kj doesn't specify the type of error. String description = String(e.getDescription().cStr()); - if (description.starts_with("no such directory")) + if (description.starts_with("No such file or directory")) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); if (description.starts_with("Parse error")) From 9ec6930c152af476cbaba2994419c73509b93d9a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 6 Oct 2021 21:12:49 +0300 Subject: [PATCH 218/438] Better exception handling --- src/Formats/CapnProtoUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 59f63243e28..1f0e6cf2cac 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -43,10 +43,10 @@ capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaIn /// That's not good to determine the type of error by its description, but /// this is the only way to do it here, because kj doesn't specify the type of error. String description = String(e.getDescription().cStr()); - if (description.starts_with("No such file or directory")) + if (description.find("No such file or directory") != String::npos || description.find("no such directory") != String::npos) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); - if (description.starts_with("Parse error")) + if (description.find("Parse error") != String::npos) throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "Cannot parse CapnProto schema {}:{}", schema_info.schemaPath(), e.getLine()); throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception while parsing CapnProro schema: {}, schema dir and file: {}, {}", description, schema_info.schemaDirectory(), schema_info.schemaPath()); From 9b909f3f30f93b44eaf65ee8433733f75abfd99c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 7 Oct 2021 10:58:37 +0300 Subject: [PATCH 219/438] Try to fix test --- tests/queries/0_stateless/02030_capnp_format.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 23e626d6d96..02c4fc96c82 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -5,11 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp touch $CAPN_PROTO_FILE -SCHEMADIR=/$($CLICKHOUSE_CLIENT --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") CLIENT_SCHEMADIR=$CURDIR/format_schemas SERVER_SCHEMADIR=test_02030 mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR From f754881e1fd8a42764bfef0b74973abba415808e Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 7 Oct 2021 15:28:01 +0300 Subject: [PATCH 220/438] Fix output String data into Text CapnProto type --- .../Formats/Impl/CapnProtoRowOutputFormat.cpp | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index d256fe8f160..2e32c962177 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -88,7 +88,13 @@ static capnp::DynamicValue::Builder initStructFieldBuilder(const ColumnPtr & col return struct_builder.get(field); } -static std::optional convertToDynamicValue(const ColumnPtr & column, const DataTypePtr & data_type, size_t row_num, capnp::DynamicValue::Builder builder, FormatSettings::EnumComparingMode enum_comparing_mode) +static std::optional convertToDynamicValue( + const ColumnPtr & column, + const DataTypePtr & data_type, + size_t row_num, + capnp::DynamicValue::Builder builder, + FormatSettings::EnumComparingMode enum_comparing_mode, + std::vector> & temporary_text_data_storage) { /// Here we don't do any types validation, because we did it in CapnProtoRowOutputFormat constructor. @@ -97,7 +103,7 @@ static std::optional convertToDynamicValue(const Co const auto * lc_column = assert_cast(column.get()); const auto & dict_type = assert_cast(data_type.get())->getDictionaryType(); size_t index = lc_column->getIndexAt(row_num); - return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode); + return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode, temporary_text_data_storage); } switch (builder.getType()) @@ -129,8 +135,16 @@ static std::optional convertToDynamicValue(const Co } case capnp::DynamicValue::Type::TEXT: { - auto data = column->getDataAt(row_num); - return capnp::DynamicValue::Reader(capnp::Text::Reader(data.data, data.size)); + /// In TEXT type data should be null-terminated, but ClickHouse String data could not be. + /// To make data null-terminated we should copy it to temporary String object, but + /// capnp::Text::Reader works only with pointer to the data and it's size, so we should + /// guarantee that new String object life time is longer than capnp::Text::Reader life time. + /// To do this we store new String object in a temporary storage, passed in this function + /// by reference. We use unique_ptr instead of just String to avoid pointers + /// invalidation on vector reallocation. + temporary_text_data_storage.push_back(std::make_unique(column->getDataAt(row_num))); + auto & data = temporary_text_data_storage.back(); + return capnp::DynamicValue::Reader(capnp::Text::Reader(data->data(), data->size())); } case capnp::DynamicValue::Type::STRUCT: { @@ -153,7 +167,7 @@ static std::optional convertToDynamicValue(const Co struct_builder.clear(value_field); const auto & nested_column = nullable_column->getNestedColumnPtr(); auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); - auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode); + auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(value_field, std::move(*value)); } @@ -168,7 +182,7 @@ static std::optional convertToDynamicValue(const Co auto pos = tuple_data_type->getPositionByName(name); auto field_builder = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); - auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode); + auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(name, std::move(*value)); } @@ -199,7 +213,7 @@ static std::optional convertToDynamicValue(const Co else value_builder = list_builder[i]; - auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode); + auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) list_builder.set(i, std::move(*value)); } @@ -213,12 +227,15 @@ static std::optional convertToDynamicValue(const Co void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) { capnp::MallocMessageBuilder message; + /// Temporary storage for data that will be outputted in fields with CapnProto type TEXT. + /// See comment in convertToDynamicValue() for more details. + std::vector> temporary_text_data_storage; capnp::DynamicStruct::Builder root = message.initRoot(schema); for (size_t i = 0; i != columns.size(); ++i) { auto [struct_builder, field] = getStructBuilderAndFieldByColumnName(root, column_names[i]); auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); - auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode); + auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode, temporary_text_data_storage); if (value) struct_builder.set(field, *value); } From 5daed60eaec542047682e279f49ed0c65b8116a2 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 8 Oct 2021 13:23:27 +0300 Subject: [PATCH 221/438] Skip test in case of replicated database --- tests/queries/0_stateless/02030_capnp_format.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index 02c4fc96c82..aa2fe6c1b35 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-parallel +# Tags: no-fasttest, no-parallel, no-replicated-database CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 476d7a411f37666adb627206e4a1e11705dea688 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 14 Oct 2021 16:44:28 +0300 Subject: [PATCH 222/438] allow to remove SAMPLE BY expression --- src/Databases/DatabaseOnDisk.cpp | 2 + src/Interpreters/InterpreterAlterQuery.cpp | 1 + src/Parsers/ASTAlterQuery.cpp | 4 ++ src/Parsers/ASTAlterQuery.h | 1 + src/Parsers/ParserAlterQuery.cpp | 5 +++ src/Storages/AlterCommands.cpp | 17 ++++++- src/Storages/AlterCommands.h | 3 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 11 ++++- .../02097_remove_sample_by.reference | 3 ++ .../0_stateless/02097_remove_sample_by.sql | 44 +++++++++++++++++++ 11 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02097_remove_sample_by.reference create mode 100644 tests/queries/0_stateless/02097_remove_sample_by.sql diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 97e59f53f64..638aef7186c 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -180,6 +180,8 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo if (metadata.sampling_key.definition_ast) storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast); + else if (storage_ast.sample_by != nullptr) /// SAMPLE BY was removed + storage_ast.sample_by = nullptr; if (metadata.table_ttl.definition_ast) storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast); diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 1c613758ecc..e595bd580b3 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -270,6 +270,7 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS required_access.emplace_back(AccessType::ALTER_ORDER_BY, database, table); break; } + case ASTAlterCommand::REMOVE_SAMPLE_BY: case ASTAlterCommand::MODIFY_SAMPLE_BY: { required_access.emplace_back(AccessType::ALTER_SAMPLE_BY, database, table); diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 53d53bf5ae1..d3153952114 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -157,6 +157,10 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY SAMPLE BY " << (settings.hilite ? hilite_none : ""); sample_by->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::REMOVE_SAMPLE_BY) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "REMOVE SAMPLE BY" << (settings.hilite ? hilite_none : ""); + } else if (type == ASTAlterCommand::ADD_INDEX) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 9b40586e09f..3e0d9219549 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -41,6 +41,7 @@ public: RESET_SETTING, MODIFY_QUERY, REMOVE_TTL, + REMOVE_SAMPLE_BY, ADD_INDEX, DROP_INDEX, diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 2eade2079da..1ea64d94fe7 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -104,6 +104,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_ttl("TTL"); ParserKeyword s_remove_ttl("REMOVE TTL"); + ParserKeyword s_remove_sample_by("REMOVE SAMPLE BY"); ParserCompoundIdentifier parser_name; ParserStringLiteral parser_string_literal; @@ -669,6 +670,10 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::MODIFY_SAMPLE_BY; } + else if (s_remove_sample_by.ignore(pos, expected)) + { + command->type = ASTAlterCommand::REMOVE_SAMPLE_BY; + } else if (s_delete.ignore(pos, expected)) { if (s_in_partition.ignore(pos, expected)) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 1d057d1bb10..c5101f162ee 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -205,6 +205,13 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.sample_by = command_ast->sample_by; return command; } + else if (command_ast->type == ASTAlterCommand::REMOVE_SAMPLE_BY) + { + AlterCommand command; + command.ast = command_ast->clone(); + command.type = AlterCommand::REMOVE_SAMPLE_BY; + return command; + } else if (command_ast->type == ASTAlterCommand::ADD_INDEX) { AlterCommand command; @@ -463,6 +470,10 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { metadata.sampling_key.recalculateWithNewAST(sample_by, metadata.columns, context); } + else if (type == REMOVE_SAMPLE_BY) + { + metadata.sampling_key = {}; + } else if (type == COMMENT_COLUMN) { metadata.columns.modify(column_name, @@ -745,7 +756,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada return false; /// We remove properties on metadata level - if (isRemovingProperty() || type == REMOVE_TTL) + if (isRemovingProperty() || type == REMOVE_TTL || type == REMOVE_SAMPLE_BY) return false; if (type == DROP_COLUMN || type == DROP_INDEX || type == DROP_PROJECTION || type == RENAME_COLUMN) @@ -1208,6 +1219,10 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPt { throw Exception{"Table doesn't have any table TTL expression, cannot remove", ErrorCodes::BAD_ARGUMENTS}; } + else if (command.type == AlterCommand::REMOVE_SAMPLE_BY && !metadata.hasSamplingKey()) + { + throw Exception{"Table doesn't have SAMPLE BY, cannot remove", ErrorCodes::BAD_ARGUMENTS}; + } /// Collect default expressions for MODIFY and ADD comands if (command.type == AlterCommand::MODIFY_COLUMN || command.type == AlterCommand::ADD_COLUMN) diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 046238bd5f5..dce6b496741 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -45,7 +45,8 @@ struct AlterCommand RENAME_COLUMN, REMOVE_TTL, MODIFY_DATABASE_SETTING, - COMMENT_TABLE + COMMENT_TABLE, + REMOVE_SAMPLE_BY, }; /// Which property user wants to remove from column diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 790b95a9fa9..3866f760a36 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -249,7 +249,7 @@ MergeTreeData::MergeTreeData( { /// This is for backward compatibility. checkSampleExpression(metadata_, attach || settings->compatibility_allow_sampling_expression_not_in_primary_key, - settings->check_sample_column_is_correct); + settings->check_sample_column_is_correct && !attach); } checkTTLExpressions(metadata_, metadata_); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 416d37cd351..5677ae6604f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1016,8 +1016,15 @@ void StorageReplicatedMergeTree::setTableStructure( if (metadata_diff.sampling_expression_changed) { - auto sample_by_ast = parse_key_expr(metadata_diff.new_sampling_expression); - new_metadata.sampling_key.recalculateWithNewAST(sample_by_ast, new_metadata.columns, getContext()); + if (!metadata_diff.new_sampling_expression.empty()) + { + auto sample_by_ast = parse_key_expr(metadata_diff.new_sampling_expression); + new_metadata.sampling_key.recalculateWithNewAST(sample_by_ast, new_metadata.columns, getContext()); + } + else /// SAMPLE BY was removed + { + new_metadata.sampling_key = {}; + } } if (metadata_diff.skip_indices_changed) diff --git a/tests/queries/0_stateless/02097_remove_sample_by.reference b/tests/queries/0_stateless/02097_remove_sample_by.reference new file mode 100644 index 00000000000..0747bbd5d1f --- /dev/null +++ b/tests/queries/0_stateless/02097_remove_sample_by.reference @@ -0,0 +1,3 @@ +CREATE TABLE default.t_remove_sample_by\n(\n `id` UInt64\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.t_remove_sample_by\n(\n `id` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/default/t_remove_sample_by\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.t_remove_sample_by\n(\n `id` String\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02097_remove_sample_by.sql b/tests/queries/0_stateless/02097_remove_sample_by.sql new file mode 100644 index 00000000000..89fbfe0c4c5 --- /dev/null +++ b/tests/queries/0_stateless/02097_remove_sample_by.sql @@ -0,0 +1,44 @@ +-- Tags: zookeeper + +DROP TABLE IF EXISTS t_remove_sample_by; + +CREATE TABLE t_remove_sample_by(id UInt64) ENGINE = MergeTree ORDER BY id SAMPLE BY id; + +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; +SHOW CREATE TABLE t_remove_sample_by; + +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; -- { serverError 36 } +SELECT * FROM t_remove_sample_by SAMPLE 1 / 10; -- { serverError 141 } + +DROP TABLE t_remove_sample_by; + +CREATE TABLE t_remove_sample_by(id UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/{database}/t_remove_sample_by', '1') +ORDER BY id SAMPLE BY id; + +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; +SHOW CREATE TABLE t_remove_sample_by; + +DROP TABLE t_remove_sample_by; + +CREATE TABLE t_remove_sample_by(id UInt64) ENGINE = Memory; +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; -- { serverError 36 } + +DROP TABLE t_remove_sample_by; + +CREATE TABLE t_remove_sample_by(id String) +ENGINE = MergeTree ORDER BY id SAMPLE BY id +SETTINGS check_sample_column_is_correct = 0; + +ALTER TABLE t_remove_sample_by RESET SETTING check_sample_column_is_correct; + +DETACH TABLE t_remove_sample_by; +ATTACH TABLE t_remove_sample_by; + +INSERT INTO t_remove_sample_by VALUES (1); +SELECT * FROM t_remove_sample_by SAMPLE 1 / 10; -- { serverError 59 } + +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; +SHOW CREATE TABLE t_remove_sample_by; + +DROP TABLE t_remove_sample_by; From 4ebb2455d8ad7e4b3a9bf337e4accd2c2eb45450 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 14 Oct 2021 17:56:34 +0300 Subject: [PATCH 223/438] Delete 01939_network_send_bytes_metrics test --- .../01939_network_send_bytes_metrics.reference | 1 - .../01939_network_send_bytes_metrics.sh | 16 ---------------- 2 files changed, 17 deletions(-) delete mode 100644 tests/queries/0_stateless/01939_network_send_bytes_metrics.reference delete mode 100755 tests/queries/0_stateless/01939_network_send_bytes_metrics.sh diff --git a/tests/queries/0_stateless/01939_network_send_bytes_metrics.reference b/tests/queries/0_stateless/01939_network_send_bytes_metrics.reference deleted file mode 100644 index d00491fd7e5..00000000000 --- a/tests/queries/0_stateless/01939_network_send_bytes_metrics.reference +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh b/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh deleted file mode 100755 index 840b4f54706..00000000000 --- a/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" - -${CLICKHOUSE_CLIENT} --query "SELECT number FROM numbers(1000)" > /dev/null - -${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; - WITH ProfileEvents['NetworkSendBytes'] AS bytes - SELECT bytes >= 8000 AND bytes < 9500 ? 1 : bytes FROM system.query_log - WHERE current_database = currentDatabase() AND query_kind = 'Select' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" - -${CLICKHOUSE_CLIENT} --query "DROP TABLE t" From 2f6d771d94a764f57b58140de5cc0bcb709f321f Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 14 Oct 2021 18:15:30 +0300 Subject: [PATCH 224/438] Remove trailing whitespace --- src/Common/ProgressIndication.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 1f8fc949886..bf3397f50e1 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -126,7 +126,7 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const { return memory + data.second.memory_usage; }); - return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)}; + return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)}; }); } From 8d1c51c422ed16ee8a5548f72aba360a73230ffa Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 14 Oct 2021 18:18:04 +0300 Subject: [PATCH 225/438] Update Client.cpp --- programs/client/Client.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 45314a5d460..a5e4bd45c7f 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -89,7 +89,6 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int TOO_DEEP_RECURSION; extern const int NETWORK_ERROR; - extern const int UNRECOGNIZED_ARGUMENTS; extern const int AUTHENTICATION_FAILED; } From 693060552a2ef5a76e109ab05738ca3580a53c3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien?= Date: Thu, 14 Oct 2021 17:40:14 +0200 Subject: [PATCH 226/438] add support of window function in antlr grammar --- utils/antlr/ClickHouseLexer.g4 | 8 +++++++ utils/antlr/ClickHouseParser.g4 | 38 +++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/utils/antlr/ClickHouseLexer.g4 b/utils/antlr/ClickHouseLexer.g4 index 8a1debaf412..0c087bff68c 100644 --- a/utils/antlr/ClickHouseLexer.g4 +++ b/utils/antlr/ClickHouseLexer.g4 @@ -35,6 +35,7 @@ CONSTRAINT: C O N S T R A I N T; CREATE: C R E A T E; CROSS: C R O S S; CUBE: C U B E; +CURRENT: C U R R E N T; DATABASE: D A T A B A S E; DATABASES: D A T A B A S E S; DATE: D A T E; @@ -65,6 +66,7 @@ FETCHES: F E T C H E S; FINAL: F I N A L; FIRST: F I R S T; FLUSH: F L U S H; +FOLLOWING: F O L L O W I N G; FOR: F O R; FORMAT: F O R M A T; FREEZE: F R E E Z E; @@ -125,8 +127,10 @@ OR: O R; ORDER: O R D E R; OUTER: O U T E R; OUTFILE: O U T F I L E; +OVER: O V E R; PARTITION: P A R T I T I O N; POPULATE: P O P U L A T E; +PRECEDING: P R E C E D I N G; PREWHERE: P R E W H E R E; PRIMARY: P R I M A R Y; PROJECTION: P R O J E C T I O N; @@ -140,6 +144,8 @@ REPLICA: R E P L I C A; REPLICATED: R E P L I C A T E D; RIGHT: R I G H T; ROLLUP: R O L L U P; +ROW: R O W; +ROWS: R O W S; SAMPLE: S A M P L E; SECOND: S E C O N D; SELECT: S E L E C T; @@ -171,6 +177,7 @@ TRIM: T R I M; TRUNCATE: T R U N C A T E; TTL: T T L; TYPE: T Y P E; +UNBOUNDED: U N B O U N D E D; UNION: U N I O N; UPDATE: U P D A T E; USE: U S E; @@ -183,6 +190,7 @@ WATCH: W A T C H; WEEK: W E E K; WHEN: W H E N; WHERE: W H E R E; +WINDOW: W I N D O W; WITH: W I T H; YEAR: Y E A R | Y Y Y Y; diff --git a/utils/antlr/ClickHouseParser.g4 b/utils/antlr/ClickHouseParser.g4 index eb1908ed073..24db6478aa0 100644 --- a/utils/antlr/ClickHouseParser.g4 +++ b/utils/antlr/ClickHouseParser.g4 @@ -243,6 +243,7 @@ selectStmt: SELECT DISTINCT? topClause? columnExprList fromClause? arrayJoinClause? + windowClause? prewhereClause? whereClause? groupByClause? (WITH (CUBE | ROLLUP))? (WITH TOTALS)? @@ -257,6 +258,7 @@ withClause: WITH columnExprList; topClause: TOP DECIMAL_LITERAL (WITH TIES)?; fromClause: FROM joinExpr; arrayJoinClause: (LEFT | INNER)? ARRAY JOIN columnExprList; +windowClause: WINDOW identifier AS LPAREN windowExpr RPAREN; prewhereClause: PREWHERE columnExpr; whereClause: WHERE columnExpr; groupByClause: GROUP BY ((CUBE | ROLLUP) LPAREN columnExprList RPAREN | columnExprList); @@ -298,6 +300,18 @@ ratioExpr: numberLiteral (SLASH numberLiteral)?; settingExprList: settingExpr (COMMA settingExpr)*; settingExpr: identifier EQ_SINGLE literal; +windowExpr: winPartitionByClause? winOrderByClause? winFrameClause?; +winPartitionByClause: PARTITION BY columnExprList; +winOrderByClause: ORDER BY orderExprList; +winFrameClause: (ROWS | RANGE) winFrameExtend; +winFrameExtend + : winFrameBound # frameStart + | BETWEEN winFrameBound AND winFrameBound # frameBetween + ; +winFrameBound: (CURRENT ROW | UNBOUNDED PRECEDING | UNBOUNDED FOLLOWING | numberLiteral PRECEDING | numberLiteral FOLLOWING); +//rangeClause: RANGE LPAREN (MIN identifier MAX identifier | MAX identifier MIN identifier) RPAREN; + + // SET statement setStmt: SET settingExprList; @@ -364,6 +378,8 @@ columnExpr | SUBSTRING LPAREN columnExpr FROM columnExpr (FOR columnExpr)? RPAREN # ColumnExprSubstring | TIMESTAMP STRING_LITERAL # ColumnExprTimestamp | TRIM LPAREN (BOTH | LEADING | TRAILING) STRING_LITERAL FROM columnExpr RPAREN # ColumnExprTrim + | identifier (LPAREN columnExprList? RPAREN) OVER LPAREN windowExpr RPAREN # ColumnExprWinFunction + | identifier (LPAREN columnExprList? RPAREN) OVER identifier # ColumnExprWinFunctionTarget | identifier (LPAREN columnExprList? RPAREN)? LPAREN DISTINCT? columnArgList? RPAREN # ColumnExprFunction | literal # ColumnExprLiteral @@ -454,17 +470,17 @@ interval: SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR; keyword // except NULL_SQL, INF, NAN_SQL : AFTER | ALIAS | ALL | ALTER | AND | ANTI | ANY | ARRAY | AS | ASCENDING | ASOF | AST | ASYNC | ATTACH | BETWEEN | BOTH | BY | CASE - | CAST | CHECK | CLEAR | CLUSTER | CODEC | COLLATE | COLUMN | COMMENT | CONSTRAINT | CREATE | CROSS | CUBE | DATABASE | DATABASES - | DATE | DEDUPLICATE | DEFAULT | DELAY | DELETE | DESCRIBE | DESC | DESCENDING | DETACH | DICTIONARIES | DICTIONARY | DISK | DISTINCT - | DISTRIBUTED | DROP | ELSE | END | ENGINE | EVENTS | EXISTS | EXPLAIN | EXPRESSION | EXTRACT | FETCHES | FINAL | FIRST | FLUSH | FOR - | FORMAT | FREEZE | FROM | FULL | FUNCTION | GLOBAL | GRANULARITY | GROUP | HAVING | HIERARCHICAL | ID | IF | ILIKE | IN | INDEX - | INJECTIVE | INNER | INSERT | INTERVAL | INTO | IS | IS_OBJECT_ID | JOIN | JSON_FALSE | JSON_TRUE | KEY | KILL | LAST | LAYOUT - | LEADING | LEFT | LIFETIME | LIKE | LIMIT | LIVE | LOCAL | LOGS | MATERIALIZE | MATERIALIZED | MAX | MERGES | MIN | MODIFY | MOVE - | MUTATION | NO | NOT | NULLS | OFFSET | ON | OPTIMIZE | OR | ORDER | OUTER | OUTFILE | PARTITION | POPULATE | PREWHERE | PRIMARY - | RANGE | RELOAD | REMOVE | RENAME | REPLACE | REPLICA | REPLICATED | RIGHT | ROLLUP | SAMPLE | SELECT | SEMI | SENDS | SET | SETTINGS - | SHOW | SOURCE | START | STOP | SUBSTRING | SYNC | SYNTAX | SYSTEM | TABLE | TABLES | TEMPORARY | TEST | THEN | TIES | TIMEOUT - | TIMESTAMP | TOTALS | TRAILING | TRIM | TRUNCATE | TO | TOP | TTL | TYPE | UNION | UPDATE | USE | USING | UUID | VALUES | VIEW - | VOLUME | WATCH | WHEN | WHERE | WITH + | CAST | CHECK | CLEAR | CLUSTER | CODEC | COLLATE | COLUMN | COMMENT | CONSTRAINT | CREATE | CROSS | CUBE | CURRENT | DATABASE + | DATABASES | DATE | DEDUPLICATE | DEFAULT | DELAY | DELETE | DESCRIBE | DESC | DESCENDING | DETACH | DICTIONARIES | DICTIONARY | DISK + | DISTINCT | DISTRIBUTED | DROP | ELSE | END | ENGINE | EVENTS | EXISTS | EXPLAIN | EXPRESSION | EXTRACT | FETCHES | FINAL | FIRST + | FLUSH | FOR | FOLLOWING | FOR | FORMAT | FREEZE | FROM | FULL | FUNCTION | GLOBAL | GRANULARITY | GROUP | HAVING | HIERARCHICAL | ID + | IF | ILIKE | IN | INDEX | INJECTIVE | INNER | INSERT | INTERVAL | INTO | IS | IS_OBJECT_ID | JOIN | JSON_FALSE | JSON_TRUE | KEY + | KILL | LAST | LAYOUT | LEADING | LEFT | LIFETIME | LIKE | LIMIT | LIVE | LOCAL | LOGS | MATERIALIZE | MATERIALIZED | MAX | MERGES + | MIN | MODIFY | MOVE | MUTATION | NO | NOT | NULLS | OFFSET | ON | OPTIMIZE | OR | ORDER | OUTER | OUTFILE | OVER | PARTITION + | POPULATE | PRECEDING | PREWHERE | PRIMARY | RANGE | RELOAD | REMOVE | RENAME | REPLACE | REPLICA | REPLICATED | RIGHT | ROLLUP | ROW + | ROWS | SAMPLE | SELECT | SEMI | SENDS | SET | SETTINGS | SHOW | SOURCE | START | STOP | SUBSTRING | SYNC | SYNTAX | SYSTEM | TABLE + | TABLES | TEMPORARY | TEST | THEN | TIES | TIMEOUT | TIMESTAMP | TOTALS | TRAILING | TRIM | TRUNCATE | TO | TOP | TTL | TYPE + | UNBOUNDED | UNION | UPDATE | USE | USING | UUID | VALUES | VIEW | VOLUME | WATCH | WHEN | WHERE | WINDOW | WITH ; keywordForAlias : DATE | FIRST | ID | KEY From 4800749d32e42912c8c34ab664403d9fea5fa75e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 14 Oct 2021 23:56:28 +0800 Subject: [PATCH 227/438] make Ctrl-J to commit --- base/base/ReplxxLineReader.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 9bf6ec41255..38867faf5d5 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -177,6 +177,10 @@ ReplxxLineReader::ReplxxLineReader( /// bind C-p/C-n to history-previous/history-next like readline. rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); }); rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); }); + + /// bind C-j to ENTER action. + rx.bind_key(Replxx::KEY::control('J'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }); + /// By default COMPLETE_NEXT/COMPLETE_PREV was binded to C-p/C-n, re-bind /// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but /// it also binded to M-p/M-n). From 7b1eb7cb54d1dea05be2e7dfb1a2fc3cda7004bc Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Thu, 14 Oct 2021 10:16:10 -0600 Subject: [PATCH 228/438] Add 21-10 release blog post --- .../en/2021/clickhouse-v21.10-released.md | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 website/blog/en/2021/clickhouse-v21.10-released.md diff --git a/website/blog/en/2021/clickhouse-v21.10-released.md b/website/blog/en/2021/clickhouse-v21.10-released.md new file mode 100644 index 00000000000..720ff0cc6d1 --- /dev/null +++ b/website/blog/en/2021/clickhouse-v21.10-released.md @@ -0,0 +1,27 @@ +--- +title: 'ClickHouse v21.10 Released' +image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-v21-10/featured.jpg' +date: '2021-10-14' +author: '[Alexey Milovidov](https://github.com/alexey-milovidov)' +tags: ['company', 'community'] +--- + +We're excited to share with you our first release since [announcing ClickHouse, Inc](https://clickhouse.com/blog/en/2021/clickhouse-inc/). The 21.10 release includes new contributions from multiple contributors including many in our community, and we are grateful for your ongoing ideas, development, and support. Our Engineering team continues to be laser-focused on providing our community and users with the fastest and most scalable OLAP DBMS available while implementing many new features. In the 21.10 release, we have a wonderful 79 contributors with 1255 commits across 211 pull requests - what an amazing community and we cherish your contributions. + +Let's highlight some of these new exciting new capabilities in 21.10: + +* User-defined functions (UDFs) can now be [created as lambda expressions](https://clickhouse.com/docs/en/sql-reference/functions/#higher-order-functions). For example, `CREATE FUNCTION plus_one as (a) -> a + 1` +* Two new table engines: Executable and ExecutablePool which allow you to stream the results of a query to a custom shell script +* Instead of logging every query (which can be a lot of logs!), you can now log a random sample of your queries. The number of queries logged is determined by defining a specified probability between 0.0 (no queries logged) and 1.0 (all queries logged) using the new `log_queries_probability` setting. +* Positional arguments are now available in your GROUP BY, ORDER BY and LIMIT BY clauses. For example, `SELECT foo, bar, baz FROM my_table ORDER BY 2,3` orders the results by whatever the bar and baz columns (no need to specify column names twice!) + +We're always listening for new ideas, and we're happy to welcome new contributors to the ClickHouse project. Whether for submitting code or improving our documentation and examples, please get involved by sending us a pull request or submitting an issue. Our beginner developers contribution guide will help you get started [[https://clickhouse.com/docs/en/development/developer-instruction/](https://clickhouse.com/docs/en/development/developer-instruction/)] + + +## ClickHouse Release Notes + +Release 21.10 + +Release Date: 2021-10-19 + +Release Notes: [21.10](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) From 4dd1a4fe5c8e087cde635c28a19667937c1dc095 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Thu, 14 Oct 2021 10:41:41 -0600 Subject: [PATCH 229/438] Update release date --- website/blog/en/2021/clickhouse-v21.10-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2021/clickhouse-v21.10-released.md b/website/blog/en/2021/clickhouse-v21.10-released.md index 720ff0cc6d1..e5d870147f4 100644 --- a/website/blog/en/2021/clickhouse-v21.10-released.md +++ b/website/blog/en/2021/clickhouse-v21.10-released.md @@ -22,6 +22,6 @@ We're always listening for new ideas, and we're happy to welcome new contributor Release 21.10 -Release Date: 2021-10-19 +Release Date: 2021-10-21 Release Notes: [21.10](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) From aa56fd15925f53fc0b9ca5b6b0d0604a492b6172 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 14 Oct 2021 21:29:02 +0300 Subject: [PATCH 230/438] Fix style. --- src/TableFunctions/ITableFunctionFileLike.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index 49461fe8f46..699ad698bd8 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -16,8 +16,6 @@ #include - - namespace DB { From 89c1a04ef4eb2819631266f6051a1dfe0c818ecb Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 21:35:56 +0300 Subject: [PATCH 231/438] Fix comments --- src/Formats/CapnProtoUtils.cpp | 118 ++++++++++-------- src/Formats/registerFormats.cpp | 2 +- .../Formats/Impl/CapnProtoRowInputFormat.cpp | 27 ++-- .../Formats/Impl/CapnProtoRowOutputFormat.cpp | 4 +- 4 files changed, 85 insertions(+), 66 deletions(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 1f0e6cf2cac..4b9993d5a74 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -42,7 +42,7 @@ capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaIn { /// That's not good to determine the type of error by its description, but /// this is the only way to do it here, because kj doesn't specify the type of error. - String description = String(e.getDescription().cStr()); + auto description = std::string_view(e.getDescription().cStr()); if (description.find("No such file or directory") != String::npos || description.find("no such directory") != String::npos) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); @@ -82,7 +82,6 @@ static const std::map capnp_simple_type_name {capnp::schema::Type::Which::FLOAT64, "Float64"}, {capnp::schema::Type::Which::TEXT, "Text"}, {capnp::schema::Type::Which::DATA, "Data"}, - {capnp::schema::Type::Which::ENUM, "Enum"}, {capnp::schema::Type::Which::INTERFACE, "Interface"}, {capnp::schema::Type::Which::ANY_POINTER, "AnyPointer"}, }; @@ -100,40 +99,56 @@ static bool checkIfStructIsNamedUnion(const capnp::StructSchema & struct_schema) /// Get full name of type for better exception messages. static String getCapnProtoFullTypeName(const capnp::Type & type) { - if (type.isStruct()) + switch (type.which()) { - auto struct_schema = type.asStruct(); + case capnp::schema::Type::Which::STRUCT: + { + auto struct_schema = type.asStruct(); - auto non_union_fields = struct_schema.getNonUnionFields(); - std::vector non_union_field_names; - for (auto nested_field : non_union_fields) - non_union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + auto non_union_fields = struct_schema.getNonUnionFields(); + std::vector non_union_field_names; + for (auto nested_field : non_union_fields) + non_union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); - auto union_fields = struct_schema.getUnionFields(); - std::vector union_field_names; - for (auto nested_field : union_fields) - union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + auto union_fields = struct_schema.getUnionFields(); + std::vector union_field_names; + for (auto nested_field : union_fields) + union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); - String union_name = "Union(" + boost::algorithm::join(union_field_names, ", ") + ")"; - /// Check if the struct is a named union. - if (non_union_field_names.empty()) - return union_name; + String union_name = "Union(" + boost::algorithm::join(union_field_names, ", ") + ")"; + /// Check if the struct is a named union. + if (non_union_field_names.empty()) + return union_name; - String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); - /// Check if the struct contains unnamed union. - if (!union_field_names.empty()) - type_name += "," + union_name; - type_name += ")"; - return type_name; + String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); + /// Check if the struct contains unnamed union. + if (!union_field_names.empty()) + type_name += "," + union_name; + type_name += ")"; + return type_name; + } + case capnp::schema::Type::Which::LIST: + return "List(" + getCapnProtoFullTypeName(type.asList().getElementType()) + ")"; + case capnp::schema::Type::Which::ENUM: + { + auto enum_schema = type.asEnum(); + String enum_name = "Enum("; + auto enumerants = enum_schema.getEnumerants(); + for (size_t i = 0; i != enumerants.size(); ++i) + { + enum_name += String(enumerants[i].getProto().getName()) + " = " + std::to_string(enumerants[i].getOrdinal()); + if (i + 1 != enumerants.size()) + enum_name += ", "; + } + enum_name += ")"; + return enum_name; + } + default: + auto it = capnp_simple_type_names.find(type.which()); + if (it == capnp_simple_type_names.end()) + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unknown CapnProto type"); + return it->second; } - - if (type.isList()) - return "List(" + getCapnProtoFullTypeName(type.asList().getElementType()) + ")"; - - if (!capnp_simple_type_names.contains(type.which())) - throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unknown CapnProto type"); - - return capnp_simple_type_names.at(type.which()); } template @@ -147,39 +162,38 @@ static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_ const auto * enum_type = assert_cast *>(column_type.get()); const auto & enum_values = dynamic_cast &>(*enum_type); - auto names = enum_values.getSetOfAllNames(to_lower); - auto values = enum_values.getSetOfAllValues(); - - std::unordered_set capn_enum_names; - std::unordered_set capn_enum_values; - auto enumerants = enum_schema.getEnumerants(); - /// In CapnProto Enum fields are numbered sequentially starting from zero. - if (mode == FormatSettings::EnumComparingMode::BY_VALUES && enumerants.size() > max_value) + if (mode == FormatSettings::EnumComparingMode::BY_VALUES) { - error_message += "Enum from CapnProto schema contains values that is out of range for Clickhouse Enum"; - return false; + /// In CapnProto Enum fields are numbered sequentially starting from zero. + if (enumerants.size() > max_value) + { + error_message += "Enum from CapnProto schema contains values that is out of range for Clickhouse Enum"; + return false; + } + + auto values = enum_values.getSetOfAllValues(); + std::unordered_set capn_enum_values; + for (auto enumerant : enumerants) + capn_enum_values.insert(Type(enumerant.getOrdinal())); + auto result = values == capn_enum_values; + if (!result) + error_message += "The set of values in Enum from CapnProto schema is different from the set of values in ClickHouse Enum"; + return result; } + auto names = enum_values.getSetOfAllNames(to_lower); + std::unordered_set capn_enum_names; + for (auto enumerant : enumerants) { String name = enumerant.getProto().getName(); capn_enum_names.insert(to_lower ? boost::algorithm::to_lower_copy(name) : name); - auto value = enumerant.getOrdinal(); - capn_enum_values.insert(Type(value)); } - if (mode == FormatSettings::EnumComparingMode::BY_NAMES || mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE) - { - auto result = names == capn_enum_names; - if (!result) - error_message += "The set of names in Enum from CapnProto schema is different from the set of names in ClickHouse Enum"; - return result; - } - - auto result = values == capn_enum_values; + auto result = names == capn_enum_names; if (!result) - error_message += "The set of values in Enum from CapnProto schema is different from the set of values in ClickHouse Enum"; + error_message += "The set of names in Enum from CapnProto schema is different from the set of names in ClickHouse Enum"; return result; } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index f6b4bb7e2e1..acaf6f28492 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -140,7 +140,7 @@ void registerFormats() registerOutputFormatMySQLWire(factory); registerOutputFormatMarkdown(factory); registerOutputFormatPostgreSQLWire(factory); - registerOutputFormatProcessorsCapnProto(factory); + registerOutputFormatCapnProto(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index 8492fc9b623..4d000bb1f35 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -148,20 +148,25 @@ static void insertEnum(IColumn & column, const DataTypePtr & column_type, const auto enumerant = *kj::_::readMaybe(enum_value.getEnumerant()); auto enum_type = assert_cast *>(column_type.get()); DataTypePtr nested_type = std::make_shared>(); - if (enum_comparing_mode == FormatSettings::EnumComparingMode::BY_VALUES) - insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal())); - else if (enum_comparing_mode == FormatSettings::EnumComparingMode::BY_NAMES) - insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName())))); - else + switch (enum_comparing_mode) { - /// Find the same enum name case insensitive. - String enum_name = enumerant.getProto().getName(); - for (auto & name : enum_type->getAllRegisteredNames()) + case FormatSettings::EnumComparingMode::BY_VALUES: + insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal())); + return; + case FormatSettings::EnumComparingMode::BY_NAMES: + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName())))); + return; + case FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE: { - if (compareEnumNames(name, enum_name, enum_comparing_mode)) + /// Find the same enum name case insensitive. + String enum_name = enumerant.getProto().getName(); + for (auto & name : enum_type->getAllRegisteredNames()) { - insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name))); - break; + if (compareEnumNames(name, enum_name, enum_comparing_mode)) + { + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name))); + break; + } } } } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index 2e32c962177..b5e2b83c23b 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -243,9 +243,9 @@ void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) capnp::writeMessage(*output_stream, message); } -void registerOutputFormatProcessorsCapnProto(FormatFactory & factory) +void registerOutputFormatCapnProto(FormatFactory & factory) { - factory.registerOutputFormatProcessor("CapnProto", []( + factory.registerOutputFormat("CapnProto", []( WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, From 8729201208c374a27df726233e5c17515f2ffb95 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 21:36:57 +0300 Subject: [PATCH 232/438] Remove redundant move --- src/Formats/CapnProtoUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 4b9993d5a74..b9a28bd3fb3 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -422,7 +422,7 @@ void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Blo getCapnProtoFullTypeName(field.getType())); if (!additional_error_message.empty()) e.addMessage(additional_error_message); - throw std::move(e); + throw e; } } } From 2da8180613a106e26d091497cda1fc52d8cb905a Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Oct 2021 21:39:09 +0300 Subject: [PATCH 233/438] Add space after comma --- src/Formats/CapnProtoUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index b9a28bd3fb3..1dc37ff51ec 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -123,7 +123,7 @@ static String getCapnProtoFullTypeName(const capnp::Type & type) String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); /// Check if the struct contains unnamed union. if (!union_field_names.empty()) - type_name += "," + union_name; + type_name += ", " + union_name; type_name += ")"; return type_name; } From 700504e7d6c24f268ab97ac8055db1dcf6228da6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 14 Oct 2021 22:09:44 +0300 Subject: [PATCH 234/438] Update Internals.cpp --- programs/copier/Internals.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 84283777c8f..6fc69361c90 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -74,7 +74,6 @@ Block getBlockWithAllStreamData(QueryPipeline pipeline) return block; } - bool isExtendedDefinitionStorage(const ASTPtr & storage_ast) { const auto & storage = storage_ast->as(); From 74e3978110f8d4eb8b06919e6bad3f07017e11f2 Mon Sep 17 00:00:00 2001 From: Artur Filatenkov <613623@mail.ru> Date: Thu, 14 Oct 2021 22:55:17 +0300 Subject: [PATCH 235/438] commit assert to fix build --- contrib/boringssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/boringssl b/contrib/boringssl index a139bb3cb95..ea9add0d76b 160000 --- a/contrib/boringssl +++ b/contrib/boringssl @@ -1 +1 @@ -Subproject commit a139bb3cb9598c7d92dc69aa6962e3ea4fd18982 +Subproject commit ea9add0d76b0d2ff8616c5e9035389cd159996b7 From bb8105951f651a6dec92c6546d7a4152b5ddb336 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 13 Oct 2021 23:43:38 +0300 Subject: [PATCH 236/438] Disable fsync_metadata on CI Sometimes we see that pretty simple queries like CREATE DATABASE can take > 30 seconds, let's try to disable fsync. CI: https://clickhouse-test-reports.s3.yandex.net/30065/e5bc573250d3d6938937739b05d6f8cf618722db/functional_stateless_tests_(address).html#fail1 CI: https://clickhouse-test-reports.s3.yandex.net/30065/e5bc573250d3d6938937739b05d6f8cf618722db/functional_stateless_tests_(release).html#fail1 --- tests/config/install.sh | 1 + tests/config/users.d/no_fsync_metadata.xml | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/config/users.d/no_fsync_metadata.xml diff --git a/tests/config/install.sh b/tests/config/install.sh index 94ad55504a8..ba6ba0cd07c 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -48,6 +48,7 @@ ln -sf $SRC_PATH/users.d/opentelemetry.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/remote_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/session_log_test.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/memory_profiler.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/no_fsync_metadata.xml $DEST_SERVER_PATH/users.d/ # FIXME DataPartsExchange may hang for http_send_timeout seconds # when nobody is going to read from the other side of socket (due to "Fetching of part was cancelled"), diff --git a/tests/config/users.d/no_fsync_metadata.xml b/tests/config/users.d/no_fsync_metadata.xml new file mode 100644 index 00000000000..6b9d69927f8 --- /dev/null +++ b/tests/config/users.d/no_fsync_metadata.xml @@ -0,0 +1,11 @@ + + + + + false + + + From 4ab6f7d771a453eb4f098d5a666b8170eb565f76 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 15 Oct 2021 10:39:31 +0300 Subject: [PATCH 237/438] Finally fix test --- tests/queries/1_stateful/00167_read_bytes_from_fs.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql index ee3e6b94537..ac20e60b177 100644 --- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -1,7 +1,7 @@ SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40; -SYSTEM FLUSH LOGS; - -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. -- Small additional reads still possible, so we compare with about 1.5Gb. -SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40' and current_database = currentDatabase() and type = 'QueryFinish'; +SYSTEM FLUSH LOGS; + +SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40;' and current_database = currentDatabase() and type = 'QueryFinish'; From 766e08567c4e1e28c9fae14564d03ee1c6633a8b Mon Sep 17 00:00:00 2001 From: lhuang09287750 Date: Fri, 15 Oct 2021 08:09:37 +0000 Subject: [PATCH 238/438] complete date32 comparison with date/datetime/string and give a test file --- src/DataTypes/getLeastSupertype.cpp | 19 +++++++++++++++---- src/Functions/FunctionsComparison.h | 7 ++++--- src/Interpreters/convertFieldToType.cpp | 6 ++++++ .../02098_date32_comparison.reference | 6 ++++++ .../0_stateless/02098_date32_comparison.sql | 6 ++++++ 5 files changed, 37 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02098_date32_comparison.reference create mode 100644 tests/queries/0_stateless/02098_date32_comparison.sql diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index a950d18b50a..b77fcdcdfca 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -277,17 +277,18 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// For Date and DateTime/DateTime64, the common type is DateTime/DateTime64. No other types are compatible. { UInt32 have_date = type_ids.count(TypeIndex::Date); + UInt32 have_date32 = type_ids.count(TypeIndex::Date32); UInt32 have_datetime = type_ids.count(TypeIndex::DateTime); UInt32 have_datetime64 = type_ids.count(TypeIndex::DateTime64); - if (have_date || have_datetime || have_datetime64) + if (have_date || have_date32 || have_datetime || have_datetime64) { - bool all_date_or_datetime = type_ids.size() == (have_date + have_datetime + have_datetime64); + bool all_date_or_datetime = type_ids.size() == (have_date + have_date32 + have_datetime + have_datetime64); if (!all_date_or_datetime) - throw Exception(getExceptionMessagePrefix(types) + " because some of them are Date/DateTime/DateTime64 and some of them are not", + throw Exception(getExceptionMessagePrefix(types) + " because some of them are Date/Date32/DateTime/DateTime64 and some of them are not", ErrorCodes::NO_COMMON_TYPE); - if (have_datetime64 == 0) + if (have_datetime64 == 0 && have_date32 == 0) { for (const auto & type : types) { @@ -298,6 +299,16 @@ DataTypePtr getLeastSupertype(const DataTypes & types) return std::make_shared(); } + /// For Date and Date32, the common type is Date32 + if (have_datetime == 0 && have_datetime64 == 0) + { + for (const auto & type : types) + { + if (isDate32(type)) + return type; + } + } + UInt8 max_scale = 0; size_t max_scale_date_time_index = 0; diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 31356deb3fe..9b94ac589a3 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1088,7 +1088,7 @@ public: if (!((both_represented_by_number && !has_date) /// Do not allow to compare date and number. || (left.isStringOrFixedString() || right.isStringOrFixedString()) /// Everything can be compared with string by conversion. /// You can compare the date, datetime, or datatime64 and an enumeration with a constant string. - || ((left.isDate() || left.isDateTime() || left.isDateTime64()) && (right.isDate() || right.isDateTime() || right.isDateTime64()) && left.idx == right.idx) /// only date vs date, or datetime vs datetime + || ((left.isDate() || left.isDate32() || left.isDateTime() || left.isDateTime64()) && (right.isDate() || right.isDate32() || right.isDateTime() || right.isDateTime64()) && left.idx == right.idx) /// only date vs date, or datetime vs datetime || (left.isUUID() && right.isUUID()) || (left.isEnum() && right.isEnum() && arguments[0]->getName() == arguments[1]->getName()) /// only equivalent enum type values can be compared against || (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size()) @@ -1178,8 +1178,8 @@ public: const bool left_is_string = isStringOrFixedString(which_left); const bool right_is_string = isStringOrFixedString(which_right); - bool date_and_datetime = (which_left.idx != which_right.idx) && (which_left.isDate() || which_left.isDateTime() || which_left.isDateTime64()) - && (which_right.isDate() || which_right.isDateTime() || which_right.isDateTime64()); + bool date_and_datetime = (which_left.idx != which_right.idx) && (which_left.isDate() || which_left.isDate32() || which_left.isDateTime() || which_left.isDateTime64()) + && (which_right.isDate() || which_right.isDate32() || which_right.isDateTime() || which_right.isDateTime64()); ColumnPtr res; if (left_is_num && right_is_num && !date_and_datetime) @@ -1247,6 +1247,7 @@ public: ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, common_type); ColumnPtr c1_converted = castColumn(col_with_type_and_name_right, common_type); if (!((res = executeNumLeftType(c0_converted.get(), c1_converted.get())) + || (res = executeNumLeftType(c0_converted.get(), c1_converted.get())) || (res = executeNumLeftType(c0_converted.get(), c1_converted.get())))) throw Exception("Date related common types can only be UInt32 or UInt64", ErrorCodes::LOGICAL_ERROR); return res; diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 57bb4aa4905..ef86f45b759 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -203,6 +203,12 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID return src; } + if (which_type.isDate32() && src.getType() == Field::Types::Int64) + { + /// We don't need any conversion Int64 is under type of Date32 + return src; + } + if (which_type.isDateTime64() && src.getType() == Field::Types::Decimal64) { /// Already in needed type. diff --git a/tests/queries/0_stateless/02098_date32_comparison.reference b/tests/queries/0_stateless/02098_date32_comparison.reference new file mode 100644 index 00000000000..a9e2f17562a --- /dev/null +++ b/tests/queries/0_stateless/02098_date32_comparison.reference @@ -0,0 +1,6 @@ +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02098_date32_comparison.sql b/tests/queries/0_stateless/02098_date32_comparison.sql new file mode 100644 index 00000000000..d0a632977c4 --- /dev/null +++ b/tests/queries/0_stateless/02098_date32_comparison.sql @@ -0,0 +1,6 @@ +select toDate32('1990-01-01') = toDate('1990-01-01'); +select toDate('1991-01-02') > toDate32('1990-01-01'); +select toDate32('1990-01-01') = toDateTime('1990-01-01'); +select toDateTime('1991-01-02') > toDate32('1990-01-01'); +select toDate32('1990-01-01') = '1990-01-01'; +select '1991-01-02' > toDate32('1990-01-01'); \ No newline at end of file From 03a7f24fa3961f699732ecd1e81455f5ce370689 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 15 Oct 2021 11:14:15 +0300 Subject: [PATCH 239/438] Fix build. --- programs/odbc-bridge/MainHandler.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 6a2e967d179..f3e28891344 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include From ad8a344b46985fe75671e032ccb78982d266a9f0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 15 Oct 2021 13:11:57 +0300 Subject: [PATCH 240/438] Move TTL streams and algo --- src/CMakeLists.txt | 1 + .../TTL}/ITTLAlgorithm.cpp | 2 +- .../TTL}/ITTLAlgorithm.h | 0 .../TTL}/TTLAggregationAlgorithm.cpp | 2 +- .../TTL}/TTLAggregationAlgorithm.h | 2 +- .../TTL}/TTLColumnAlgorithm.cpp | 2 +- .../TTL}/TTLColumnAlgorithm.h | 2 +- .../TTL}/TTLDeleteAlgorithm.cpp | 2 +- .../TTL}/TTLDeleteAlgorithm.h | 2 +- .../TTL}/TTLUpdateInfoAlgorithm.cpp | 2 +- .../TTL}/TTLUpdateInfoAlgorithm.h | 2 +- .../Transforms/TTLCalcTransform.cpp} | 4 +-- .../Transforms/TTLCalcTransform.h} | 2 +- .../Transforms/TTLTransform.cpp} | 10 +++--- .../Transforms/TTLTransform.h} | 4 +-- src/Storages/MergeTree/MergeTask.cpp | 34 +++++++++---------- .../MergeTree/MergeTreeDataMergerMutator.cpp | 4 +-- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 4 +-- 19 files changed, 42 insertions(+), 41 deletions(-) rename src/{DataStreams => Processors/TTL}/ITTLAlgorithm.cpp (98%) rename src/{DataStreams => Processors/TTL}/ITTLAlgorithm.h (100%) rename src/{DataStreams => Processors/TTL}/TTLAggregationAlgorithm.cpp (99%) rename src/{DataStreams => Processors/TTL}/TTLAggregationAlgorithm.h (96%) rename src/{DataStreams => Processors/TTL}/TTLColumnAlgorithm.cpp (98%) rename src/{DataStreams => Processors/TTL}/TTLColumnAlgorithm.h (95%) rename src/{DataStreams => Processors/TTL}/TTLDeleteAlgorithm.cpp (97%) rename src/{DataStreams => Processors/TTL}/TTLDeleteAlgorithm.h (92%) rename src/{DataStreams => Processors/TTL}/TTLUpdateInfoAlgorithm.cpp (97%) rename src/{DataStreams => Processors/TTL}/TTLUpdateInfoAlgorithm.h (94%) rename src/{DataStreams/TTLCalcInputStream.cpp => Processors/Transforms/TTLCalcTransform.cpp} (97%) rename src/{DataStreams/TTLCalcInputStream.h => Processors/Transforms/TTLCalcTransform.h} (96%) rename src/{DataStreams/TTLBlockInputStream.cpp => Processors/Transforms/TTLTransform.cpp} (95%) rename src/{DataStreams/TTLBlockInputStream.h => Processors/Transforms/TTLTransform.h} (92%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cac5b70f489..08755542ed1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -214,6 +214,7 @@ add_object_library(clickhouse_processors_transforms Processors/Transforms) add_object_library(clickhouse_processors_sources Processors/Sources) add_object_library(clickhouse_processors_sinks Processors/Sinks) add_object_library(clickhouse_processors_merges Processors/Merges) +add_object_library(clickhouse_processors_ttl Processors/TTL) add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms) add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations) diff --git a/src/DataStreams/ITTLAlgorithm.cpp b/src/Processors/TTL/ITTLAlgorithm.cpp similarity index 98% rename from src/DataStreams/ITTLAlgorithm.cpp rename to src/Processors/TTL/ITTLAlgorithm.cpp index 7513e0c6ce0..489941950b5 100644 --- a/src/DataStreams/ITTLAlgorithm.cpp +++ b/src/Processors/TTL/ITTLAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/DataStreams/ITTLAlgorithm.h b/src/Processors/TTL/ITTLAlgorithm.h similarity index 100% rename from src/DataStreams/ITTLAlgorithm.h rename to src/Processors/TTL/ITTLAlgorithm.h diff --git a/src/DataStreams/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp similarity index 99% rename from src/DataStreams/TTLAggregationAlgorithm.cpp rename to src/Processors/TTL/TTLAggregationAlgorithm.cpp index d012464ea14..5581892f16f 100644 --- a/src/DataStreams/TTLAggregationAlgorithm.cpp +++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/DataStreams/TTLAggregationAlgorithm.h b/src/Processors/TTL/TTLAggregationAlgorithm.h similarity index 96% rename from src/DataStreams/TTLAggregationAlgorithm.h rename to src/Processors/TTL/TTLAggregationAlgorithm.h index c2f40bab6b9..0e4bf092ed6 100644 --- a/src/DataStreams/TTLAggregationAlgorithm.h +++ b/src/Processors/TTL/TTLAggregationAlgorithm.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/Processors/TTL/TTLColumnAlgorithm.cpp similarity index 98% rename from src/DataStreams/TTLColumnAlgorithm.cpp rename to src/Processors/TTL/TTLColumnAlgorithm.cpp index 71ad2a4e38f..7cef77c185c 100644 --- a/src/DataStreams/TTLColumnAlgorithm.cpp +++ b/src/Processors/TTL/TTLColumnAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/DataStreams/TTLColumnAlgorithm.h b/src/Processors/TTL/TTLColumnAlgorithm.h similarity index 95% rename from src/DataStreams/TTLColumnAlgorithm.h rename to src/Processors/TTL/TTLColumnAlgorithm.h index ddf963eaee2..30de77dcc2a 100644 --- a/src/DataStreams/TTLColumnAlgorithm.h +++ b/src/Processors/TTL/TTLColumnAlgorithm.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/Processors/TTL/TTLDeleteAlgorithm.cpp similarity index 97% rename from src/DataStreams/TTLDeleteAlgorithm.cpp rename to src/Processors/TTL/TTLDeleteAlgorithm.cpp index ea7a0b235ec..eec6b21f949 100644 --- a/src/DataStreams/TTLDeleteAlgorithm.cpp +++ b/src/Processors/TTL/TTLDeleteAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/DataStreams/TTLDeleteAlgorithm.h b/src/Processors/TTL/TTLDeleteAlgorithm.h similarity index 92% rename from src/DataStreams/TTLDeleteAlgorithm.h rename to src/Processors/TTL/TTLDeleteAlgorithm.h index 8ab3f8b63e8..292a29bfa27 100644 --- a/src/DataStreams/TTLDeleteAlgorithm.h +++ b/src/Processors/TTL/TTLDeleteAlgorithm.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp similarity index 97% rename from src/DataStreams/TTLUpdateInfoAlgorithm.cpp rename to src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp index 6a983d052c1..eba364aa2b8 100644 --- a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp +++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.h b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h similarity index 94% rename from src/DataStreams/TTLUpdateInfoAlgorithm.h rename to src/Processors/TTL/TTLUpdateInfoAlgorithm.h index 551211fc47f..45eecbde3d0 100644 --- a/src/DataStreams/TTLUpdateInfoAlgorithm.h +++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { diff --git a/src/DataStreams/TTLCalcInputStream.cpp b/src/Processors/Transforms/TTLCalcTransform.cpp similarity index 97% rename from src/DataStreams/TTLCalcInputStream.cpp rename to src/Processors/Transforms/TTLCalcTransform.cpp index fe85e40c003..31fb61239ef 100644 --- a/src/DataStreams/TTLCalcInputStream.cpp +++ b/src/Processors/Transforms/TTLCalcTransform.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include namespace DB { diff --git a/src/DataStreams/TTLCalcInputStream.h b/src/Processors/Transforms/TTLCalcTransform.h similarity index 96% rename from src/DataStreams/TTLCalcInputStream.h rename to src/Processors/Transforms/TTLCalcTransform.h index b6318026b8c..14592c07155 100644 --- a/src/DataStreams/TTLCalcInputStream.h +++ b/src/Processors/Transforms/TTLCalcTransform.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/Processors/Transforms/TTLTransform.cpp similarity index 95% rename from src/DataStreams/TTLBlockInputStream.cpp rename to src/Processors/Transforms/TTLTransform.cpp index b476f689e60..a515a50fafb 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/Processors/Transforms/TTLTransform.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -8,10 +8,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include namespace DB { diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/Processors/Transforms/TTLTransform.h similarity index 92% rename from src/DataStreams/TTLBlockInputStream.h rename to src/Processors/Transforms/TTLTransform.h index 50b28e81bdf..9207c68448b 100644 --- a/src/DataStreams/TTLBlockInputStream.h +++ b/src/Processors/Transforms/TTLTransform.h @@ -4,8 +4,8 @@ #include #include #include -#include -#include +#include +#include #include diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index e17a3dcf544..7cf52058558 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -1,26 +1,26 @@ -#include "Storages/MergeTree/MergeTask.h" +#include #include #include #include -#include "Common/ActionBlocker.h" +#include -#include "Storages/MergeTree/MergeTreeData.h" -#include "Storages/MergeTree/IMergeTreeDataPart.h" -#include "Storages/MergeTree/MergeTreeSequentialSource.h" -#include "Storages/MergeTree/FutureMergedMutatedPart.h" -#include "Processors/Transforms/ExpressionTransform.h" -#include "Processors/Transforms/MaterializingTransform.h" -#include "Processors/Merges/MergingSortedTransform.h" -#include "Processors/Merges/CollapsingSortedTransform.h" -#include "Processors/Merges/SummingSortedTransform.h" -#include "Processors/Merges/ReplacingSortedTransform.h" -#include "Processors/Merges/GraphiteRollupSortedTransform.h" -#include "Processors/Merges/AggregatingSortedTransform.h" -#include "Processors/Merges/VersionedCollapsingTransform.h" -#include "DataStreams/TTLBlockInputStream.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace DB diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 1ee8423b0d1..5a199610a68 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -15,8 +15,8 @@ #include #include -#include -#include +#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 50ab26ec470..812299828d3 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 6ad29d01ca6..da79b917b5a 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2,8 +2,8 @@ #include #include -#include -#include +#include +#include #include #include #include From 65aec857ec89897a98693ccbf0d65a8e311b6b16 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 13:12:59 +0300 Subject: [PATCH 241/438] FunctionsJSON updated --- src/Functions/FunctionsJSON.cpp | 9 ++++++++- src/Functions/FunctionsJSON.h | 17 ----------------- 2 files changed, 8 insertions(+), 18 deletions(-) delete mode 100644 src/Functions/FunctionsJSON.h diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 6889a0d44b9..4e2c42ea3cb 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -58,6 +59,12 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +template +struct HasIndexOperator : std::false_type {}; + +template +struct HasIndexOperator()[0])>> : std::true_type {}; + /// Functions to parse JSONs and extract values from it. /// The first argument of all these functions gets a JSON, @@ -279,7 +286,7 @@ private: return true; } - if constexpr (FunctionJSONHelpersDetails::has_index_operator::value) + if constexpr (HasIndexOperator::value) { if (element.isObject()) { diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h deleted file mode 100644 index 4ef43eb637f..00000000000 --- a/src/Functions/FunctionsJSON.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -namespace FunctionJSONHelpersDetails -{ - template - struct has_index_operator : std::false_type {}; - - template - struct has_index_operator()[0])>> : std::true_type {}; -} - -} From 41dc195b345c199e4fef4d62d57e9d1cab3cf69c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 15 Oct 2021 13:15:14 +0300 Subject: [PATCH 242/438] Fix build. --- programs/odbc-bridge/MainHandler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index f3e28891344..6362c7dfa9b 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include From df81d3f74a630c3d674eb51b9116d139419f8707 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 15 Oct 2021 14:52:24 +0300 Subject: [PATCH 243/438] Fix build in fast test --- src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index b5e2b83c23b..58f88c5c7cf 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -262,7 +262,7 @@ void registerOutputFormatCapnProto(FormatFactory & factory) namespace DB { class FormatFactory; -void registerOutputFormatProcessorsCapnProto(FormatFactory &) {} +void registerOutputFormatCapnProto(FormatFactory &) {} } #endif // USE_CAPNP From 5b6b8cf56433b0b8c3e0a4e645eb21577493576f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 15 Oct 2021 15:14:44 +0300 Subject: [PATCH 244/438] fix clang-tidy --- src/DataTypes/DataTypeTuple.cpp | 1 - src/DataTypes/Serializations/ISerialization.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 6bca7f2bac2..4e1a5a05d45 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -27,7 +27,6 @@ namespace ErrorCodes extern const int DUPLICATE_COLUMN; extern const int EMPTY_DATA_PASSED; extern const int NOT_FOUND_COLUMN_IN_BLOCK; - extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; } diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index a6d9185429c..5c0274b0e35 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -103,7 +103,7 @@ namespace using SubstreamIterator = ISerialization::SubstreamPath::const_iterator; -static String getNameForSubstreamPath( +String getNameForSubstreamPath( String stream_name, SubstreamIterator begin, SubstreamIterator end, From a744097fb65fbfa22809c5c640f4ff73f07fd2ba Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 15:34:13 +0300 Subject: [PATCH 245/438] Fixed build --- src/Functions/FunctionsJSON.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 4e2c42ea3cb..9558b856511 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -39,7 +39,6 @@ #include #include #include -#include #include From f69daace7b400735dec36e9190add9fd9e7651c9 Mon Sep 17 00:00:00 2001 From: Artur Filatenkov <613623@mail.ru> Date: Fri, 15 Oct 2021 16:01:28 +0300 Subject: [PATCH 246/438] attemp to fix build --- contrib/boringssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/boringssl b/contrib/boringssl index ea9add0d76b..c1e01a441d6 160000 --- a/contrib/boringssl +++ b/contrib/boringssl @@ -1 +1 @@ -Subproject commit ea9add0d76b0d2ff8616c5e9035389cd159996b7 +Subproject commit c1e01a441d6db234f4f12e63a7657d1f9e6db9c1 From e9bf496d42435c02a96518adf33e19dba4c4f004 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 15 Oct 2021 14:35:50 +0000 Subject: [PATCH 247/438] Added an ability to generate data in build time --- CMakeLists.txt | 15 ++++++ .../libprotobuf-mutator-cmake/CMakeLists.txt | 7 +-- .../fuzzers/codegen_fuzzer/CMakeLists.txt | 48 +++++++++++++++++-- .../codegen_fuzzer/codegen_select_fuzzer.cpp | 1 - src/Parsers/fuzzers/codegen_fuzzer/gen.py | 19 ++++---- 5 files changed, 70 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f32841c15da..c2d3421b28e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,6 +136,21 @@ if (ENABLE_FUZZING) message (STATUS "Fuzzing instrumentation enabled") set (FUZZER "libfuzzer") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -nostdlib++") + set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF) + set (ENABLE_LIBRARIES 0) + set (ENABLE_SSL 1) + set (USE_INTERNAL_SSL_LIBRARY 1) + set (USE_UNWIND ON) + set (ENABLE_EMBEDDED_COMPILER 0) + set (ENABLE_EXAMPLES 0) + set (ENABLE_UTILS 0) + set (ENABLE_THINLTO 0) + set (ENABLE_TCMALLOC 0) + set (ENABLE_JEMALLOC 0) + set (ENABLE_CHECK_HEAVY_BUILDS 1) + set (GLIBC_COMPATIBILITY OFF) + set (ENABLE_PROTOBUF ON) + set (USE_INTERNAL_PROTOBUF_LIBRARY ON) endif() # Global libraries diff --git a/contrib/libprotobuf-mutator-cmake/CMakeLists.txt b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt index 93eafc85b7d..978b1e732ba 100644 --- a/contrib/libprotobuf-mutator-cmake/CMakeLists.txt +++ b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt @@ -1,5 +1,4 @@ set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator) -set(NO_FUZZING_FLAGS "-fno-sanitize=fuzzer -fsanitize-coverage=0") add_library(protobuf-mutator ${LIBRARY_DIR}/src/libfuzzer/libfuzzer_macro.cc @@ -10,8 +9,6 @@ add_library(protobuf-mutator ${LIBRARY_DIR}/src/utf8_fix.cc) target_include_directories(protobuf-mutator BEFORE PRIVATE "${LIBRARY_DIR}") -# target_include_directories(protobuf-mutator BEFORE PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") +target_include_directories(protobuf-mutator BEFORE PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") -target_link_libraries(protobuf-mutator ${PROTOBUF_LIBRARIES}) -set_target_properties(protobuf-mutator PROPERTIES - COMPILE_FLAGS "${NO_FUZZING_FLAGS}") +target_link_libraries(protobuf-mutator ${Protobuf_LIBRARY}) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index 370396a5e8e..3c274bd4786 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -1,7 +1,47 @@ find_package(Protobuf REQUIRED) -protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS "out.proto") -set(FUZZER_SRCS codegen_select_fuzzer.cpp out.cpp ${PROTO_SRCS} ${PROTO_HDRS}) +set (CURRENT_DIR_IN_SOURCES "${ClickHouse_SOURCE_DIR}/src/Parsers/fuzzers/codegen_fuzzer") +set (CURRENT_DIR_IN_BINARY "${ClickHouse_BINARY_DIR}/src/Parsers/fuzzers/codegen_fuzzer") + +# Copy scripts and template file to build directory to generate .proto and .cpp file from them +configure_file( + "${CURRENT_DIR_IN_SOURCES}/gen.py" + "${CURRENT_DIR_IN_BINARY}/gen.py" + COPYONLY) +configure_file( + "${CURRENT_DIR_IN_SOURCES}/update.sh" + "${CURRENT_DIR_IN_BINARY}/update.sh" + COPYONLY) + +# Delete this and uncomment the next block if you want to generage clickhouse.g automatically +configure_file( + "${CURRENT_DIR_IN_SOURCES}/clickhouse.g" + "${CURRENT_DIR_IN_BINARY}/clickhouse.g" + COPYONLY) + +# # Uncomment to generate clickhouse.g automatically +# configure_file( +# "${CURRENT_DIR_IN_SOURCES}/clickhouse-template.g" +# "${CURRENT_DIR_IN_BINARY}/clickhouse-template.g" +# COPYONLY) + +# # Note that it depends on all.dict file! +# add_custom_command( +# OUTPUT +# "${CURRENT_DIR_IN_BINARY}/clickhouse.g" +# COMMAND ./update.sh "${ClickHouse_SOURCE_DIR}/tests/fuzz/all.dict" +# ) + +add_custom_command( + OUTPUT + "${CURRENT_DIR_IN_BINARY}/out.cpp" + "${CURRENT_DIR_IN_BINARY}/out.proto" + COMMAND python3 gen.py clickhouse.g out.cpp out.proto + # DEPENDS "${CURRENT_DIR_IN_BINARY}/clickhouse.g" +) + +PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS "${CURRENT_DIR_IN_BINARY}/out.proto") +set(FUZZER_SRCS codegen_select_fuzzer.cpp "${CURRENT_DIR_IN_BINARY}/out.cpp" ${PROTO_SRCS} ${PROTO_HDRS}) set(CMAKE_INCLUDE_CURRENT_DIR TRUE) @@ -9,7 +49,7 @@ add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier") -target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIRS}") +target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}") target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}") target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}/src") -target_link_libraries(codegen_select_fuzzer PRIVATE clickhouse_parsers protobuf-mutator ${Protobuf_LIBRARIES} ${LIB_FUZZING_ENGINE}) +target_link_libraries(codegen_select_fuzzer PRIVATE protobuf-mutator dbms ${LIB_FUZZING_ENGINE}) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp index a68fac4f585..418a5014657 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp +++ b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp @@ -11,7 +11,6 @@ #include "out.pb.h" - void GenerateSentence(const Sentence&, std::string &, int); diff --git a/src/Parsers/fuzzers/codegen_fuzzer/gen.py b/src/Parsers/fuzzers/codegen_fuzzer/gen.py index e96bc6ae9f6..b594d1026e3 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/gen.py +++ b/src/Parsers/fuzzers/codegen_fuzzer/gen.py @@ -58,14 +58,14 @@ class Parser: self.var_id = -1 self.cur_tok = None self.includes = [] - + self.proto = '' self.cpp = '' def parse_file(self, filename): with open(filename) as f: self.text = f.read() - + while self.parse_statement() is not None: pass @@ -90,12 +90,12 @@ class Parser: def parse_var_value(self): i = self.text.find(' ') - + id_, self.text = self.text[1:i], self.text[i+1:] self.var_id = int(id_) self.cur_tok = TOKEN_VAR return TOKEN_VAR - + def parse_txt_value(self): if self.text[0] != '"': raise Exception("parse_txt_value: expected quote at the start") @@ -116,7 +116,7 @@ class Parser: else: c, self.text = self.text[0], self.text[1:] self.t += c - + self.text = self.text[1:] self.cur_tok = TOKEN_TEXT return TOKEN_TEXT @@ -137,7 +137,7 @@ class Parser: index = self.text.find('\n') self.text = self.text[index:] - + def parse_statement(self): if self.skip_ws() is None: return None @@ -146,7 +146,7 @@ class Parser: if self.cur_tok == TOKEN_SLASH: self.skip_line() return TOKEN_SLASH - + chain = [] while self.cur_tok != TOKEN_SEMI: if self.cur_tok == TOKEN_TEXT: @@ -164,7 +164,7 @@ class Parser: def generate(self): self.proto = 'syntax = "proto3";\n\n' - self.cpp = '#include \n#include \n#include \n\n#include \n\n' + self.cpp = '#include \n#include \n#include \n\n#include \n\n' for incl_file in self.includes: self.cpp += f'#include "{incl_file}"\n' @@ -228,7 +228,7 @@ def main(args): p = Parser() p.add_include(include_filename) p.parse_file(input_file) - + cpp, proto = p.generate() proto = proto.replace('\t', ' ' * 4) @@ -246,4 +246,3 @@ if __name__ == '__main__': print(f"Usage {sys.argv[0]} ") sys.exit(1) main(sys.argv[1:]) - From 058081bcb2810082bf14eea36402860653169b44 Mon Sep 17 00:00:00 2001 From: Pavel Cheremushkin Date: Fri, 15 Oct 2021 18:18:02 +0300 Subject: [PATCH 248/438] fixed typo in gen.py; clickhouse.g applying Nikita's in build-time generation --- .../fuzzers/codegen_fuzzer/CMakeLists.txt | 23 +- .../fuzzers/codegen_fuzzer/clickhouse.g | 524 ------------------ src/Parsers/fuzzers/codegen_fuzzer/gen.py | 2 +- src/Parsers/fuzzers/codegen_fuzzer/update.sh | 4 +- 4 files changed, 11 insertions(+), 542 deletions(-) delete mode 100644 src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index 3c274bd4786..f87340b5024 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -13,24 +13,17 @@ configure_file( "${CURRENT_DIR_IN_BINARY}/update.sh" COPYONLY) -# Delete this and uncomment the next block if you want to generage clickhouse.g automatically configure_file( - "${CURRENT_DIR_IN_SOURCES}/clickhouse.g" - "${CURRENT_DIR_IN_BINARY}/clickhouse.g" + "${CURRENT_DIR_IN_SOURCES}/clickhouse-template.g" + "${CURRENT_DIR_IN_BINARY}/clickhouse-template.g" COPYONLY) -# # Uncomment to generate clickhouse.g automatically -# configure_file( -# "${CURRENT_DIR_IN_SOURCES}/clickhouse-template.g" -# "${CURRENT_DIR_IN_BINARY}/clickhouse-template.g" -# COPYONLY) - -# # Note that it depends on all.dict file! -# add_custom_command( -# OUTPUT -# "${CURRENT_DIR_IN_BINARY}/clickhouse.g" -# COMMAND ./update.sh "${ClickHouse_SOURCE_DIR}/tests/fuzz/all.dict" -# ) +# Note that it depends on all.dict file! +add_custom_command( + OUTPUT + "${CURRENT_DIR_IN_BINARY}/clickhouse.g" + COMMAND ./update.sh "${ClickHouse_SOURCE_DIR}/tests/fuzz/all.dict" +) add_custom_command( OUTPUT diff --git a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g deleted file mode 100644 index edd5acf513d..00000000000 --- a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g +++ /dev/null @@ -1,524 +0,0 @@ -" "; -" "; -" "; -";"; - - -"(" $1 ")"; -"(" $1 ", " $2 ")"; -"(" $1 ", " $2 ", " $3 ")"; - -$1 ", " $2 ; -$1 ", " $2 ", " $3 ; -$1 ", " $2 ", " $3 ", " $4 ; -$1 ", " $2 ", " $3 ", " $4 ", " $5 ; - -"[" $1 ", " $2 "]"; -"[" $1 ", " $2 ", " $3 "]"; -"[" $1 ", " $2 ", " $3 ", " $4 "]"; -"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]"; - -$0 "(" $1 ")"; -$0 "(" $1 ", " $2 ")"; -$0 "(" $1 ", " $2 ", " $3 ")"; - -$1 " as " $2 ; - - -// TODO: add more clickhouse specific stuff -"SELECT " $1 " FROM " $2 " WHERE " $3 ; -"SELECT " $1 " FROM " $2 " GROUP BY " $3 ; -"SELECT " $1 " FROM " $2 " SORT BY " $3 ; -"SELECT " $1 " FROM " $2 " LIMIT " $3 ; -"SELECT " $1 " FROM " $2 " JOIN " $3 ; -"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ; -"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ; -"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ; -"SELECT " $1 " INTO OUTFILE " $2 ; - -"WITH " $1 " AS " $2 ; - -"{" $1 ":" $2 "}"; -"[" $1 "," $2 "]"; -"[]"; - - -" x "; -"x"; -" `x` "; -"`x`"; - -" \"value\" "; -"\"value\""; -" 0 "; -"0"; -"1"; -"2"; -"123123123123123123"; -"182374019873401982734091873420923123123123123123"; -"1e-1"; -"1.1"; -"\"\""; -" '../../../../../../../../../etc/passwd' "; - -"/"; -"="; -"=="; -"!="; -"<>"; -"<"; -"<="; -">"; -">="; -"<<"; -"|<<"; -"&"; -"|"; -"||"; -"<|"; -"|>"; -"+"; -"-"; -"~"; -"*"; -"/"; -"\\"; -"%"; -""; -"."; -","; -","; -","; -","; -","; -","; -"("; -")"; -"("; -")"; -"("; -")"; -"("; -")"; -"("; -")"; -"("; -")"; -"?"; -":"; -"@"; -"@@"; -"$"; -"\""; -"`"; -"{"; -"}"; -"^"; -"::"; -"->"; -"]"; -"["; - -" ADD "; -" ADD COLUMN "; -" ADD CONSTRAINT "; -" ADD INDEX "; -" AFTER "; -" AggregateFunction "; -" aggThrow "; -" ALL "; -" ALTER LIVE VIEW "; -" ALTER TABLE "; -" and "; -" ANTI "; -" any "; -" anyHeavy "; -" anyLast "; -" argMax "; -" argMin "; -" array "; -" Array "; -" arrayAll "; -" arrayAUC "; -" arrayCompact "; -" arrayConcat "; -" arrayCount "; -" arrayCumSum "; -" arrayCumSumNonNegative "; -" arrayDifference "; -" arrayDistinct "; -" arrayElement "; -" arrayEnumerate "; -" arrayEnumerateDense "; -" arrayEnumerateDenseRanked "; -" arrayEnumerateUniq "; -" arrayEnumerateUniqRanked "; -" arrayExists "; -" arrayFill "; -" arrayFilter "; -" arrayFirst "; -" arrayFirstIndex "; -" arrayFlatten "; -" arrayIntersect "; -" arrayJoin "; -" ARRAY JOIN "; -" arrayMap "; -" arrayPopBack "; -" arrayPopFront "; -" arrayPushBack "; -" arrayPushFront "; -" arrayReduce "; -" arrayReduceInRanges "; -" arrayResize "; -" arrayReverse "; -" arrayReverseFill "; -" arrayReverseSort "; -" arrayReverseSplit "; -" arraySlice "; -" arraySort "; -" arraySplit "; -" arraySum "; -" arrayUniq "; -" arrayWithConstant "; -" arrayZip "; -" AS "; -" ASC "; -" ASCENDING "; -" ASOF "; -" AST "; -" ATTACH "; -" ATTACH PART "; -" ATTACH PARTITION "; -" avg "; -" avgWeighted "; -" basename "; -" BETWEEN "; -" BOTH "; -" boundingRatio "; -" BY "; -" CAST "; -" categoricalInformationValue "; -" CHECK "; -" CHECK TABLE "; -" CLEAR COLUMN "; -" CLEAR INDEX "; -" COLLATE "; -" COLUMNS "; -" COMMENT COLUMN "; -" CONSTRAINT "; -" corr "; -" corrStable "; -" count "; -" countEqual "; -" covarPop "; -" covarPopStable "; -" covarSamp "; -" covarSampStable "; -" CREATE "; -" CROSS "; -" CUBE "; -" cutFragment "; -" cutQueryString "; -" cutQueryStringAndFragment "; -" cutToFirstSignificantSubdomain "; -" cutURLParameter "; -" cutWWW "; -" D "; -" DATABASE "; -" DATABASES "; -" Date "; -" DATE "; -" DATE_ADD "; -" DATEADD "; -" DATE_DIFF "; -" DATEDIFF "; -" DATE_SUB "; -" DATESUB "; -" DateTime "; -" DateTime64 "; -" DAY "; -" DD "; -" Decimal "; -" Decimal128 "; -" Decimal32 "; -" Decimal64 "; -" decodeURLComponent "; -" DEDUPLICATE "; -" DELETE "; -" DELETE WHERE "; -" DESC "; -" DESCENDING "; -" DESCRIBE "; -" DETACH "; -" DETACH PARTITION "; -" DICTIONARIES "; -" DICTIONARY "; -" DISTINCT "; -" domain "; -" domainWithoutWWW "; -" DROP "; -" DROP COLUMN "; -" DROP CONSTRAINT "; -" DROP DETACHED PART "; -" DROP DETACHED PARTITION "; -" DROP INDEX "; -" DROP PARTITION "; -" emptyArrayToSingle "; -" ENGINE "; -" entropy "; -" Enum "; -" Enum16 "; -" Enum8 "; -" EVENTS "; -" EXCHANGE TABLES "; -" EXISTS "; -" EXTRACT "; -" extractURLParameter "; -" extractURLParameterNames "; -" extractURLParameters "; -" FETCH PARTITION "; -" FETCH PART "; -" FINAL "; -" FIRST "; -" firstSignificantSubdomain "; -" FixedString "; -" Float32 "; -" Float64 "; -" FOR "; -" ForEach "; -" FORMAT "; -" fragment "; -" FREEZE "; -" FROM "; -" FULL "; -" FUNCTION "; -" __getScalar "; -" GLOBAL "; -" GRANULARITY "; -" groupArray "; -" groupArrayInsertAt "; -" groupArrayMovingAvg "; -" groupArrayMovingSum "; -" groupArraySample "; -" groupBitAnd "; -" groupBitmap "; -" groupBitmapAnd "; -" groupBitmapOr "; -" groupBitmapXor "; -" groupBitOr "; -" groupBitXor "; -" GROUP BY "; -" groupUniqArray "; -" has "; -" hasAll "; -" hasAny "; -" HAVING "; -" HH "; -" histogram "; -" HOUR "; -" ID "; -" if "; -" IF EXISTS "; -" IF NOT EXISTS "; -" IN "; -" INDEX "; -" indexOf "; -" INNER "; -" IN PARTITION "; -" INSERT INTO "; -" Int16 "; -" Int32 "; -" Int64 "; -" Int8 "; -" INTERVAL "; -" IntervalDay "; -" IntervalHour "; -" IntervalMinute "; -" IntervalMonth "; -" IntervalQuarter "; -" IntervalSecond "; -" IntervalWeek "; -" IntervalYear "; -" INTO OUTFILE "; -" JOIN "; -" kurtPop "; -" kurtSamp "; -" LAST "; -" LAYOUT "; -" LEADING "; -" LEFT "; -" LEFT ARRAY JOIN "; -" length "; -" LIFETIME "; -" LIKE "; -" LIMIT "; -" LIVE "; -" LOCAL "; -" LowCardinality "; -" LTRIM "; -" M "; -" MATERIALIZED "; -" MATERIALIZE INDEX "; -" MATERIALIZE TTL "; -" max "; -" maxIntersections "; -" maxIntersectionsPosition "; -" Merge "; -" MI "; -" min "; -" MINUTE "; -" MM "; -" MODIFY "; -" MODIFY COLUMN "; -" MODIFY ORDER BY "; -" MODIFY QUERY "; -" MODIFY SETTING "; -" MODIFY TTL "; -" MONTH "; -" MOVE PART "; -" MOVE PARTITION "; -" movingXXX "; -" N "; -" NAME "; -" Nested "; -" NO DELAY "; -" NONE "; -" not "; -" nothing "; -" Nothing "; -" Null "; -" Nullable "; -" NULLS "; -" OFFSET "; -" ON "; -" ONLY "; -" OPTIMIZE TABLE "; -" ORDER BY "; -" OR REPLACE "; -" OUTER "; -" PARTITION "; -" PARTITION BY "; -" path "; -" pathFull "; -" POPULATE "; -" PREWHERE "; -" PRIMARY KEY "; -" protocol "; -" Q "; -" QQ "; -" QUARTER "; -" queryString "; -" queryStringAndFragment "; -" range "; -" REFRESH "; -" RENAME COLUMN "; -" RENAME TABLE "; -" REPLACE PARTITION "; -" Resample "; -" RESUME "; -" retention "; -" RIGHT "; -" ROLLUP "; -" RTRIM "; -" S "; -" SAMPLE "; -" SAMPLE BY "; -" SECOND "; -" SELECT "; -" SEMI "; -" sequenceCount "; -" sequenceMatch "; -" SET "; -" SETTINGS "; -" SHOW "; -" SHOW PROCESSLIST "; -" simpleLinearRegression "; -" skewPop "; -" skewSamp "; -" SOURCE "; -" SQL_TSI_DAY "; -" SQL_TSI_HOUR "; -" SQL_TSI_MINUTE "; -" SQL_TSI_MONTH "; -" SQL_TSI_QUARTER "; -" SQL_TSI_SECOND "; -" SQL_TSI_WEEK "; -" SQL_TSI_YEAR "; -" SS "; -" State "; -" stddevPop "; -" stddevPopStable "; -" stddevSamp "; -" stddevSampStable "; -" STEP "; -" stochasticLinearRegression "; -" stochasticLogisticRegression "; -" String "; -" SUBSTRING "; -" sum "; -" sumKahan "; -" sumMap "; -" sumMapFiltered "; -" sumMapFilteredWithOverflow "; -" sumMapWithOverflow "; -" sumWithOverflow "; -" SUSPEND "; -" TABLE "; -" TABLES "; -" TEMPORARY "; -" TIMESTAMP "; -" TIMESTAMP_ADD "; -" TIMESTAMPADD "; -" TIMESTAMP_DIFF "; -" TIMESTAMPDIFF "; -" TIMESTAMP_SUB "; -" TIMESTAMPSUB "; -" TO "; -" TO DISK "; -" TOP "; -" topK "; -" topKWeighted "; -" topLevelDomain "; -" TO TABLE "; -" TOTALS "; -" TO VOLUME "; -" TRAILING "; -" TRIM "; -" TRUNCATE "; -" TTL "; -" Tuple "; -" TYPE "; -" UInt16 "; -" UInt32 "; -" UInt64 "; -" UInt8 "; -" uniq "; -" uniqCombined "; -" uniqCombined64 "; -" uniqExact "; -" uniqHLL12 "; -" uniqUpTo "; -" UPDATE "; -" URLHierarchy "; -" URLPathHierarchy "; -" USE "; -" USING "; -" UUID "; -" VALUES "; -" varPop "; -" varPopStable "; -" varSamp "; -" varSampStable "; -" VIEW "; -" WATCH "; -" WEEK "; -" WHERE "; -" windowFunnel "; -" WITH "; -" WITH FILL "; -" WITH TIES "; -" WK "; -" WW "; -" YEAR "; -" YY "; -" YYYY "; diff --git a/src/Parsers/fuzzers/codegen_fuzzer/gen.py b/src/Parsers/fuzzers/codegen_fuzzer/gen.py index b594d1026e3..95936247489 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/gen.py +++ b/src/Parsers/fuzzers/codegen_fuzzer/gen.py @@ -154,7 +154,7 @@ class Parser: elif self.cur_tok == TOKEN_VAR: chain.append(Var(self.var_id)) else: - self.fatal_parsing_error(f"unexpected token {self.tok}") + self.fatal_parsing_error(f"unexpected token {self.cur_tok}") self.get_next_token() if not chain: diff --git a/src/Parsers/fuzzers/codegen_fuzzer/update.sh b/src/Parsers/fuzzers/codegen_fuzzer/update.sh index 0982d6d0686..daee56dcea1 100755 --- a/src/Parsers/fuzzers/codegen_fuzzer/update.sh +++ b/src/Parsers/fuzzers/codegen_fuzzer/update.sh @@ -20,11 +20,11 @@ _main() { while read line; do [[ -z "$line" ]] && continue - echo $line | sed -e 's/"\(.*\)"/" \1 ";/g' + echo $line | sed -e '/^#/d' -e 's/"\(.*\)"/" \1 ";/g' done < $dict_filename >> clickhouse.g } _main "$@" -# Sample run: ./update.sh ../../../../tests/fuzz/ast.dict +# Sample run: ./update.sh ${CLICKHOUSE_SOURCE_DIR}/tests/fuzz/all.dict # then run `python ./gen.py clickhouse.g out.cpp out.proto` to generate new files with tokens. Rebuild fuzzer From f0452b34d502fabbc069dec438c8473a0fd0e7e3 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 15 Oct 2021 18:21:02 +0300 Subject: [PATCH 249/438] Update clickhouse-v21.10-released.md --- website/blog/en/2021/clickhouse-v21.10-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2021/clickhouse-v21.10-released.md b/website/blog/en/2021/clickhouse-v21.10-released.md index e5d870147f4..5b6f6d8b9e3 100644 --- a/website/blog/en/2021/clickhouse-v21.10-released.md +++ b/website/blog/en/2021/clickhouse-v21.10-released.md @@ -2,7 +2,7 @@ title: 'ClickHouse v21.10 Released' image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-v21-10/featured.jpg' date: '2021-10-14' -author: '[Alexey Milovidov](https://github.com/alexey-milovidov)' +author: '[Rich Raposa](https://github.com/rfraposa), [Alexey Milovidov](https://github.com/alexey-milovidov)' tags: ['company', 'community'] --- From 09b8d697bd2a27d07dd7b2621f3cdd788cb14d6f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 18:26:56 +0300 Subject: [PATCH 250/438] Flat Hashed dictionary fix bytes_allocated for nullable attributes --- src/Dictionaries/FlatDictionary.cpp | 5 +++++ src/Dictionaries/HashedDictionary.cpp | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index e14ee5d30d1..532c68d0453 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -403,6 +403,11 @@ void FlatDictionary::calculateBytesAllocated() }; callOnDictionaryAttributeType(attribute.type, type_call); + + bytes_allocated += sizeof(attribute.is_nullable_set); + + if (attribute.is_nullable_set.has_value()) + bytes_allocated = attribute.is_nullable_set->getBufferSizeInBytes(); } if (update_field_loaded_block) diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 1f3821096da..00d339865f8 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -626,6 +626,11 @@ void HashedDictionary::calculateBytesAllocated() if (attributes[i].string_arena) bytes_allocated += attributes[i].string_arena->size(); + + bytes_allocated += sizeof(attributes[i].is_nullable_set); + + if (attributes[i].is_nullable_set.has_value()) + bytes_allocated = attributes[i].is_nullable_set->getBufferSizeInBytes(); } bytes_allocated += complex_key_arena.size(); From be4fc79d32cfad558202d380141a449f7a543cbf Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 15 Oct 2021 18:29:27 +0300 Subject: [PATCH 251/438] Better handling exceptions, update tests --- programs/client/Client.cpp | 14 +++----- programs/local/LocalServer.cpp | 6 ++++ src/Client/ClientBase.cpp | 15 +++++--- .../01527_clickhouse_local_optimize.sh | 2 +- .../01528_clickhouse_local_prepare_parts.sh | 16 ++++----- .../0_stateless/01600_detach_permanently.sh | 2 +- ..._bad_options_in_client_and_local.reference | 12 +++++++ .../02096_bad_options_in_client_and_local.sh | 34 +++++++++++++++++++ ...known_option_in_clickhouse_local.reference | 2 -- ...2096_unknown_option_in_clickhouse_local.sh | 9 ----- 10 files changed, 78 insertions(+), 34 deletions(-) create mode 100644 tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference create mode 100755 tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh delete mode 100644 tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference delete mode 100755 tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a5e4bd45c7f..3c50acb1df6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -25,9 +25,6 @@ #endif #include #include -#include -#include -#include #include #include #include "Common/MemoryTracker.h" @@ -35,13 +32,11 @@ #include #include #include -#include #include #include #include #include -#include #include #include @@ -53,16 +48,12 @@ #include #include #include -#include -#include -#include #include #include #include #include -#include #include "TestTags.h" #ifndef __clang__ @@ -1234,6 +1225,11 @@ int mainEntryClickHouseClient(int argc, char ** argv) std::cerr << DB::getExceptionMessage(e, false) << std::endl; return 1; } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 2180729438d..9e67f04699a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -652,6 +652,7 @@ void LocalServer::addOptions(OptionsDescription & options_description) ("logger.level", po::value(), "Log level") ("no-system-tables", "do not attach system tables (better startup time)") + ("path", po::value(), "Storage path") ; } @@ -713,6 +714,11 @@ int mainEntryClickHouseLocal(int argc, char ** argv) auto code = DB::getCurrentExceptionCode(); return code ? code : 1; } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index deb22ca60ef..631d3f2bcc3 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1510,12 +1510,19 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, { cmd_settings.addProgramOptions(options_description.main_description.value()); /// Parse main commandline options. - auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()); - parser.allow_unregistered(); + auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered(); po::parsed_options parsed = parser.run(); - auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + + /// Check unrecognized options without positional options. + auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); + if (!unrecognized_options.empty()) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); + + /// Check positional options (options after ' -- ', ex: clickhouse-client -- ). + unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); if (unrecognized_options.size() > 1) - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[1]); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional options are not supported."); + po::store(parsed, options); } diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh index d63765fc179..c1d5c357308 100755 --- a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh @@ -10,6 +10,6 @@ rm -rf "${WORKING_FOLDER_01527}" mkdir -p "${WORKING_FOLDER_01527}" # OPTIMIZE was crashing due to lack of temporary volume in local -${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" -- --path="${WORKING_FOLDER_01527}" +${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" --path="${WORKING_FOLDER_01527}" rm -rf "${WORKING_FOLDER_01527}" diff --git a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh index 8684582ad45..95ecbf09cf5 100755 --- a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh +++ b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh @@ -36,10 +36,10 @@ ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WO EOF ## feed the table -${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" -- --path="${WORKING_FOLDER_01528}" +${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" --path="${WORKING_FOLDER_01528}" ## check the parts were created -${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" --path="${WORKING_FOLDER_01528}" ################# @@ -49,36 +49,36 @@ cat < "${WORKING_FOLDER_01528}/metadata/local/stdin.sql" ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin); EOF -cat <&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -f 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} --query 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + + +${CLICKHOUSE_CLIENT} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --j 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --query 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + + + diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference deleted file mode 100644 index 2c4cf540812..00000000000 --- a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.reference +++ /dev/null @@ -1,2 +0,0 @@ -Code: 552. DB::Exception: Unrecognized option '--unknown-option'. (UNRECOGNIZED_ARGUMENTS) -Code: 552. DB::Exception: Unrecognized option '--unknown-option'. (UNRECOGNIZED_ARGUMENTS) diff --git a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh b/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh deleted file mode 100755 index 2fabc761d4c..00000000000 --- a/tests/queries/0_stateless/02096_unknown_option_in_clickhouse_local.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash -# shellcheck disable=SC2206 - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -${CLICKHOUSE_LOCAL} --unknown-option 2>&1 echo -${CLICKHOUSE_CLIENT} --unknown-option 2>&1 echo From f3c568fbe65dd07f476e11bd84d46143778729d4 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 18:33:10 +0300 Subject: [PATCH 252/438] PolygonDictionary fix bytes_allocated --- src/Dictionaries/PolygonDictionary.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index 7a34a9c2b25..8d0f0f1abc4 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -248,6 +248,9 @@ void IPolygonDictionary::calculateBytesAllocated() { /// Index allocated by subclass not counted because it take a small part in relation to attributes and polygons + if (configuration.store_polygon_key_column) + bytes_allocated += key_attribute_column->allocatedBytes(); + for (const auto & column : attributes_columns) bytes_allocated += column->allocatedBytes(); From 8dde0a5dfbc457af33ea9c9b4ce6f15465e62185 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 18:35:20 +0300 Subject: [PATCH 253/438] ComplexKeyHashedDictionary fix keys copy --- src/Dictionaries/HashedDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 1f3821096da..eec7cec0285 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -667,7 +667,7 @@ Pipe HashedDictionary::read(const Names & column_na if constexpr (dictionary_key_type == DictionaryKeyType::Simple) return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); else - return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), keys, column_names), max_block_size)); + return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); } template From 574970a852423dc88a254d162dc50994ab4a4c9b Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 15 Oct 2021 18:37:38 +0300 Subject: [PATCH 254/438] Update clickhouse-v21.10-released.md --- website/blog/en/2021/clickhouse-v21.10-released.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/blog/en/2021/clickhouse-v21.10-released.md b/website/blog/en/2021/clickhouse-v21.10-released.md index 5b6f6d8b9e3..b49bb284f90 100644 --- a/website/blog/en/2021/clickhouse-v21.10-released.md +++ b/website/blog/en/2021/clickhouse-v21.10-released.md @@ -15,7 +15,7 @@ Let's highlight some of these new exciting new capabilities in 21.10: * Instead of logging every query (which can be a lot of logs!), you can now log a random sample of your queries. The number of queries logged is determined by defining a specified probability between 0.0 (no queries logged) and 1.0 (all queries logged) using the new `log_queries_probability` setting. * Positional arguments are now available in your GROUP BY, ORDER BY and LIMIT BY clauses. For example, `SELECT foo, bar, baz FROM my_table ORDER BY 2,3` orders the results by whatever the bar and baz columns (no need to specify column names twice!) -We're always listening for new ideas, and we're happy to welcome new contributors to the ClickHouse project. Whether for submitting code or improving our documentation and examples, please get involved by sending us a pull request or submitting an issue. Our beginner developers contribution guide will help you get started [[https://clickhouse.com/docs/en/development/developer-instruction/](https://clickhouse.com/docs/en/development/developer-instruction/)] +We're always listening for new ideas, and we're happy to welcome new contributors to the ClickHouse project. Whether for submitting code or improving our documentation and examples, please get involved by sending us a pull request or submitting an issue. Our beginner developers contribution guide will help you get started: [https://clickhouse.com/docs/en/development/developer-instruction/](https://clickhouse.com/docs/en/development/developer-instruction/) ## ClickHouse Release Notes From c9638bacbf625fd8c08667175e42428add6fbc1b Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 18:53:17 +0300 Subject: [PATCH 255/438] Added HashedArray dictionary --- src/Dictionaries/CMakeLists.txt | 1 + src/Dictionaries/HashedArrayDictionary.cpp | 689 ++++++++++++++++++ src/Dictionaries/HashedArrayDictionary.h | 211 ++++++ src/Dictionaries/registerDictionaries.cpp | 2 + tests/performance/hashed_array_dictionary.xml | 126 ++++ ...shed_array_dictionary_simple_key.reference | 66 ++ ...098_hashed_array_dictionary_simple_key.sql | 125 ++++ ...hed_array_dictionary_complex_key.reference | 56 ++ 8 files changed, 1276 insertions(+) create mode 100644 src/Dictionaries/HashedArrayDictionary.cpp create mode 100644 src/Dictionaries/HashedArrayDictionary.h create mode 100644 tests/performance/hashed_array_dictionary.xml create mode 100644 tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference create mode 100644 tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql create mode 100644 tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference diff --git a/src/Dictionaries/CMakeLists.txt b/src/Dictionaries/CMakeLists.txt index bc5f0dc9567..b1b3d6d55e0 100644 --- a/src/Dictionaries/CMakeLists.txt +++ b/src/Dictionaries/CMakeLists.txt @@ -10,6 +10,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW set_source_files_properties( FlatDictionary.cpp HashedDictionary.cpp + HashedArrayDictionary.cpp CacheDictionary.cpp RangeHashedDictionary.cpp DirectDictionary.cpp diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp new file mode 100644 index 00000000000..7160cb67e10 --- /dev/null +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -0,0 +1,689 @@ +#include "HashedArrayDictionary.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int DICTIONARY_IS_EMPTY; + extern const int UNSUPPORTED_METHOD; +} + +template +HashedArrayDictionary::HashedArrayDictionary( + const StorageID & dict_id_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const HashedArrayDictionaryStorageConfiguration & configuration_, + BlockPtr update_field_loaded_block_) + : IDictionary(dict_id_) + , dict_struct(dict_struct_) + , source_ptr(std::move(source_ptr_)) + , configuration(configuration_) + , update_field_loaded_block(std::move(update_field_loaded_block_)) +{ + createAttributes(); + loadData(); + calculateBytesAllocated(); +} + +template +ColumnPtr HashedArrayDictionary::getColumn( + const std::string & attribute_name, + const DataTypePtr & result_type, + const Columns & key_columns, + const DataTypes & key_types [[maybe_unused]], + const ColumnPtr & default_values_column) const +{ + if (dictionary_key_type == DictionaryKeyType::Complex) + dict_struct.validateKeyTypes(key_types); + + ColumnPtr result; + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor extractor(key_columns, arena_holder.getComplexKeyArena()); + + const size_t size = extractor.getKeysSize(); + + const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type); + const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; + auto & attribute = attributes[attribute_index]; + + bool is_attribute_nullable = attribute.is_index_null.has_value(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to = nullptr; + if (attribute.is_index_null) + { + col_null_map_to = ColumnUInt8::create(size, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + using ColumnProvider = DictionaryAttributeColumnProvider; + + DictionaryDefaultValueExtractor default_value_extractor(dictionary_attribute.null_value, default_values_column); + + auto column = ColumnProvider::getColumn(dictionary_attribute, size); + + if constexpr (std::is_same_v) + { + auto * out = column.get(); + + getItemsImpl( + attribute, + extractor, + [&](const size_t, const Array & value, bool) { out->insert(value); }, + default_value_extractor); + } + else if constexpr (std::is_same_v) + { + auto * out = column.get(); + + if (is_attribute_nullable) + getItemsImpl( + attribute, + extractor, + [&](size_t row, const StringRef value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out->insertData(value.data, value.size); + }, + default_value_extractor); + else + getItemsImpl( + attribute, + extractor, + [&](size_t, const StringRef value, bool) { out->insertData(value.data, value.size); }, + default_value_extractor); + } + else + { + auto & out = column->getData(); + + if (is_attribute_nullable) + getItemsImpl( + attribute, + extractor, + [&](size_t row, const auto value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out[row] = value; + }, + default_value_extractor); + else + getItemsImpl( + attribute, + extractor, + [&](size_t row, const auto value, bool) { out[row] = value; }, + default_value_extractor); + } + + result = std::move(column); + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + + if (is_attribute_nullable) + result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + + return result; +} + +template +ColumnUInt8::Ptr HashedArrayDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const +{ + if (dictionary_key_type == DictionaryKeyType::Complex) + dict_struct.validateKeyTypes(key_types); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor extractor(key_columns, arena_holder.getComplexKeyArena()); + + size_t keys_size = extractor.getKeysSize(); + + auto result = ColumnUInt8::create(keys_size, false); + auto & out = result->getData(); + + if (attributes.empty()) + { + query_count.fetch_add(keys_size, std::memory_order_relaxed); + return result; + } + + size_t keys_found = 0; + + for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index) + { + auto requested_key = extractor.extractCurrentKey(); + + out[requested_key_index] = key_attribute.container.find(requested_key) != key_attribute.container.end(); + + keys_found += out[requested_key_index]; + extractor.rollbackCurrentKey(); + } + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + + return result; +} + +template +ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const +{ + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + { + PaddedPODArray keys_backup_storage; + const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); + + size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; + + const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; + const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; + + const auto & key_attribute_container = key_attribute.container; + + const UInt64 null_value = dictionary_attribute.null_value.template get(); + const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); + + auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); }; + + size_t keys_found = 0; + + auto get_parent_func = [&](auto & hierarchy_key) + { + std::optional result; + + auto it = key_attribute_container.find(hierarchy_key); + + if (it != key_attribute_container.end()) + result = parent_keys_container[it->getMapped()]; + + keys_found += result.has_value(); + + return result; + }; + + auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func); + + query_count.fetch_add(keys.size(), std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + + return dictionary_hierarchy_array; + } + else + { + return nullptr; + } +} + +template +ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( + ColumnPtr key_column [[maybe_unused]], + ColumnPtr in_key_column [[maybe_unused]], + const DataTypePtr &) const +{ + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + { + PaddedPODArray keys_backup_storage; + const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); + + PaddedPODArray keys_in_backup_storage; + const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage); + + size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; + + const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; + auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; + + const auto & key_attribute_container = key_attribute.container; + + const UInt64 null_value = dictionary_attribute.null_value.template get(); + const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); + + auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); }; + + size_t keys_found = 0; + + auto get_parent_func = [&](auto & hierarchy_key) + { + std::optional result; + + auto it = key_attribute_container.find(hierarchy_key); + + if (it != key_attribute_container.end()) + result = parent_keys_container[it->getMapped()]; + + keys_found += result.has_value(); + + return result; + }; + + auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func); + + query_count.fetch_add(keys.size(), std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + + return result; + } + else + { + return nullptr; + } +} + +template +ColumnPtr HashedArrayDictionary::getDescendants( + ColumnPtr key_column [[maybe_unused]], + const DataTypePtr &, + size_t level [[maybe_unused]]) const +{ + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + { + PaddedPODArray keys_backup; + const auto & keys = getColumnVectorData(this, key_column, keys_backup); + + size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; + + const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; + const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); + + const auto & key_attribute_container = key_attribute.container; + + HashMap index_to_key; + index_to_key.reserve(key_attribute.container.size()); + + for (auto & [key, value] : key_attribute_container) + index_to_key[value] = key; + + HashMap> parent_to_child; + + for (size_t i = 0; i < parent_keys_container.size(); ++i) + { + const auto * it = index_to_key.find(i); + if (it == index_to_key.end()) + continue; + + auto parent_key = it->getMapped(); + auto child_key = parent_keys_container[i]; + parent_to_child[parent_key].emplace_back(child_key); + } + + size_t keys_found = 0; + auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found); + + query_count.fetch_add(keys.size(), std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + + return result; + } + else + { + return nullptr; + } +} + +template +void HashedArrayDictionary::createAttributes() +{ + const auto size = dict_struct.attributes.size(); + attributes.reserve(size); + + for (const auto & dictionary_attribute : dict_struct.attributes) + { + auto type_call = [&, this](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + + auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional>() : std::optional>{}; + std::unique_ptr string_arena = std::is_same_v ? std::make_unique() : nullptr; + Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerType(), std::move(is_index_null), std::move(string_arena)}; + attributes.emplace_back(std::move(attribute)); + }; + + callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call); + } +} + +template +void HashedArrayDictionary::updateData() +{ + if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) + { + QueryPipeline pipeline(source_ptr->loadUpdatedAll()); + + PullingPipelineExecutor executor(pipeline); + Block block; + while (executor.pull(block)) + { + /// We are using this to keep saved data if input stream consists of multiple blocks + if (!update_field_loaded_block) + update_field_loaded_block = std::make_shared(block.cloneEmpty()); + + for (size_t attribute_index = 0; attribute_index < block.columns(); ++attribute_index) + { + const IColumn & update_column = *block.getByPosition(attribute_index).column.get(); + MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(attribute_index).column->assumeMutable(); + saved_column->insertRangeFrom(update_column, 0, update_column.size()); + } + } + } + else + { + auto pipe = source_ptr->loadUpdatedAll(); + mergeBlockWithPipe( + dict_struct.getKeysSize(), + *update_field_loaded_block, + std::move(pipe)); + } + + if (update_field_loaded_block) + { + resize(update_field_loaded_block->rows()); + blockToAttributes(*update_field_loaded_block.get()); + } +} + +template +void HashedArrayDictionary::blockToAttributes(const Block & block [[maybe_unused]]) +{ + size_t skip_keys_size_offset = dict_struct.getKeysSize(); + + Columns key_columns; + key_columns.reserve(skip_keys_size_offset); + + /// Split into keys columns and attribute columns + for (size_t i = 0; i < skip_keys_size_offset; ++i) + key_columns.emplace_back(block.safeGetByPosition(i).column); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + Field column_value_to_insert; + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + + auto it = key_attribute.container.find(key); + + if (it != key_attribute.container.end()) + { + keys_extractor.rollbackCurrentKey(); + continue; + } + + if constexpr (std::is_same_v) + key = copyKeyInArena(key); + + key_attribute.container.insert({key, element_count}); + + for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + { + const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column; + auto & attribute = attributes[attribute_index]; + bool attribute_is_nullable = attribute.is_index_null.has_value(); + + attribute_column.get(key_index, column_value_to_insert); + + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using AttributeValueType = DictionaryValueType; + + auto & attribute_container = std::get>(attribute.container); + attribute_container.emplace_back(); + + if (attribute_is_nullable) { + attribute.is_index_null->emplace_back(); + } + + if (attribute_is_nullable && column_value_to_insert.isNull()) + { + (*attribute.is_index_null).back() = true; + return; + } + + if constexpr (std::is_same_v) + { + String & value_to_insert = column_value_to_insert.get(); + size_t value_to_insert_size = value_to_insert.size(); + + const char * string_in_arena = attribute.string_arena->insert(value_to_insert.data(), value_to_insert_size); + + StringRef string_in_arena_reference = StringRef{string_in_arena, value_to_insert_size}; + attribute_container.back() = string_in_arena_reference; + } + else + { + auto value_to_insert = column_value_to_insert.get>(); + attribute_container.back() = value_to_insert; + } + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + } + + ++element_count; + keys_extractor.rollbackCurrentKey(); + } +} + +template +void HashedArrayDictionary::resize(size_t added_rows) +{ + if (unlikely(!added_rows)) + return; + + key_attribute.container.reserve(added_rows); +} + +template +template +void HashedArrayDictionary::getItemsImpl( + const Attribute & attribute, + DictionaryKeysExtractor & keys_extractor, + ValueSetter && set_value [[maybe_unused]], + DefaultValueExtractor & default_value_extractor) const +{ + const auto & key_attribute_container = key_attribute.container; + const auto & attribute_container = std::get>(attribute.container); + const size_t keys_size = keys_extractor.getKeysSize(); + + size_t keys_found = 0; + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + + const auto it = key_attribute_container.find(key); + + if (it != key_attribute_container.end()) + { + size_t element_index = it->getMapped(); + + const auto & element = attribute_container[element_index]; + + if constexpr (is_nullable) + set_value(key_index, element, (*attribute.is_index_null)[element_index]); + else + set_value(key_index, element, false); + + ++keys_found; + } + else + { + if constexpr (is_nullable) + set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); + else + set_value(key_index, default_value_extractor[key_index], false); + } + + keys_extractor.rollbackCurrentKey(); + } + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); +} + +template +StringRef HashedArrayDictionary::copyKeyInArena(StringRef key) +{ + size_t key_size = key.size; + char * place_for_key = complex_key_arena.alloc(key_size); + memcpy(reinterpret_cast(place_for_key), reinterpret_cast(key.data), key_size); + StringRef updated_key{place_for_key, key_size}; + return updated_key; +} + +template +void HashedArrayDictionary::loadData() +{ + if (!source_ptr->hasUpdateField()) + { + QueryPipeline pipeline; + pipeline = QueryPipeline(source_ptr->loadAll()); + + PullingPipelineExecutor executor(pipeline); + Block block; + while (executor.pull(block)) + { + resize(block.rows()); + blockToAttributes(block); + } + } + else + { + updateData(); + } + + if (configuration.require_nonempty && 0 == element_count) + throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, + "{}: dictionary source is empty and 'require_nonempty' property is set.", + full_name); +} + +template +void HashedArrayDictionary::calculateBytesAllocated() +{ + bytes_allocated += attributes.size() * sizeof(attributes.front()); + + bytes_allocated += key_attribute.container.size(); + + for (auto & attribute : attributes) + { + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + + const auto & container = std::get>(attribute.container); + bytes_allocated += sizeof(AttributeContainerType); + + if constexpr (std::is_same_v) + { + /// It is not accurate calculations + bytes_allocated += sizeof(Array) * container.size(); + } + else + { + bytes_allocated += container.allocated_bytes(); + } + + bucket_count = container.capacity(); + + if constexpr (std::is_same_v) + bytes_allocated += sizeof(Arena) + attribute.string_arena->size(); + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + + if (attribute.string_arena) + bytes_allocated += attribute.string_arena->size(); + } + + bytes_allocated += complex_key_arena.size(); + + if (update_field_loaded_block) + bytes_allocated += update_field_loaded_block->allocatedBytes(); +} + +template +Pipe HashedArrayDictionary::read(const Names & column_names, size_t max_block_size) const +{ + PaddedPODArray keys; + + for (auto & [key, value] : key_attribute.container) + keys.emplace_back(key); + + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); + else + return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); +} + +template class HashedArrayDictionary; +template class HashedArrayDictionary; + +void registerDictionaryArrayHashed(DictionaryFactory & factory) +{ + auto create_layout = [](const std::string & full_name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr, + DictionaryKeyType dictionary_key_type) -> DictionaryPtr + { + if (dictionary_key_type == DictionaryKeyType::Simple && dict_struct.key) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for simple key hashed array dictionary"); + else if (dictionary_key_type == DictionaryKeyType::Complex && dict_struct.id) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for complex key hashed array dictionary"); + + if (dict_struct.range_min || dict_struct.range_max) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "{}: elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + full_name); + + const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + + HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime}; + + if (dictionary_key_type == DictionaryKeyType::Simple) + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + else + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + }; + + using namespace std::placeholders; + + factory.registerLayout("hashed_array", + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple); }, false); + factory.registerLayout("complex_key_hashed_array", + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex); }, true); +} + +} diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h new file mode 100644 index 00000000000..053813bdc44 --- /dev/null +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -0,0 +1,211 @@ +#pragma once + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include + +/** This dictionary stores all attributes in arrays. + * Key is stored in hash table and value is index into attribute array. + */ + +namespace DB +{ + +struct HashedArrayDictionaryStorageConfiguration +{ + const bool require_nonempty; + const DictionaryLifetime lifetime; +}; + +template +class HashedArrayDictionary final : public IDictionary +{ +public: + using KeyType = std::conditional_t; + + HashedArrayDictionary( + const StorageID & dict_id_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const HashedArrayDictionaryStorageConfiguration & configuration_, + BlockPtr update_field_loaded_block_ = nullptr); + + std::string getTypeName() const override + { + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + return "HashedArray"; + else + return "ComplexHashedArray"; + } + + size_t getBytesAllocated() const override { return bytes_allocated; } + + size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } + + double getFoundRate() const override + { + size_t queries = query_count.load(std::memory_order_relaxed); + if (!queries) + return 0; + return static_cast(found_count.load(std::memory_order_relaxed)) / queries; + } + + double getHitRate() const override { return 1.0; } + + size_t getElementCount() const override { return element_count; } + + double getLoadFactor() const override { return static_cast(element_count) / bucket_count; } + + std::shared_ptr clone() const override + { + return std::make_shared>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); + } + + const IDictionarySource * getSource() const override { return source_ptr.get(); } + + const DictionaryLifetime & getLifetime() const override { return configuration.lifetime; } + + const DictionaryStructure & getStructure() const override { return dict_struct; } + + bool isInjective(const std::string & attribute_name) const override + { + return dict_struct.getAttribute(attribute_name).injective; + } + + DictionaryKeyType getKeyType() const override { return dictionary_key_type; } + + ColumnPtr getColumn( + const std::string& attribute_name, + const DataTypePtr & result_type, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnPtr & default_values_column) const override; + + ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override; + + bool hasHierarchy() const override { return dictionary_key_type == DictionaryKeyType::Simple && dict_struct.hierarchical_attribute_index.has_value(); } + + ColumnPtr getHierarchy(ColumnPtr key_column, const DataTypePtr & hierarchy_attribute_type) const override; + + ColumnUInt8::Ptr isInHierarchy( + ColumnPtr key_column, + ColumnPtr in_key_column, + const DataTypePtr & key_type) const override; + + ColumnPtr getDescendants( + ColumnPtr key_column, + const DataTypePtr & key_type, + size_t level) const override; + + Pipe read(const Names & column_names, size_t max_block_size) const override; + +private: + + using KeyContainerType = std::conditional_t< + dictionary_key_type == DictionaryKeyType::Simple, + HashMap, + HashMapWithSavedHash>>; + + template + using AttributeContainerType = std::conditional_t, std::vector, PaddedPODArray>; + + struct Attribute final + { + AttributeUnderlyingType type; + + std::variant< + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType> + container; + + std::optional> is_index_null; + std::unique_ptr string_arena; + }; + + struct KeyAttribute final + { + + KeyContainerType container; + + }; + + void createAttributes(); + + void blockToAttributes(const Block & block); + + void updateData(); + + void loadData(); + + void calculateBytesAllocated(); + + template + void getItemsImpl( + const Attribute & attribute, + DictionaryKeysExtractor & keys_extractor, + ValueSetter && set_value, + DefaultValueExtractor & default_value_extractor) const; + + template + void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func); + + template + void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func) const; + + void resize(size_t added_rows); + + StringRef copyKeyInArena(StringRef key); + + const DictionaryStructure dict_struct; + const DictionarySourcePtr source_ptr; + const HashedArrayDictionaryStorageConfiguration configuration; + + std::vector attributes; + + KeyAttribute key_attribute; + + size_t bytes_allocated = 0; + size_t element_count = 0; + size_t bucket_count = 0; + mutable std::atomic query_count{0}; + mutable std::atomic found_count{0}; + + BlockPtr update_field_loaded_block; + Arena complex_key_arena; +}; + +extern template class HashedArrayDictionary; +extern template class HashedArrayDictionary; + +} diff --git a/src/Dictionaries/registerDictionaries.cpp b/src/Dictionaries/registerDictionaries.cpp index 8d24a6ea979..df191edd1c3 100644 --- a/src/Dictionaries/registerDictionaries.cpp +++ b/src/Dictionaries/registerDictionaries.cpp @@ -28,6 +28,7 @@ void registerDictionaryComplexKeyHashed(DictionaryFactory & factory); void registerDictionaryTrie(DictionaryFactory & factory); void registerDictionaryFlat(DictionaryFactory & factory); void registerDictionaryHashed(DictionaryFactory & factory); +void registerDictionaryArrayHashed(DictionaryFactory & factory); void registerDictionaryCache(DictionaryFactory & factory); void registerDictionaryPolygon(DictionaryFactory & factory); void registerDictionaryDirect(DictionaryFactory & factory); @@ -60,6 +61,7 @@ void registerDictionaries() registerDictionaryTrie(factory); registerDictionaryFlat(factory); registerDictionaryHashed(factory); + registerDictionaryArrayHashed(factory); registerDictionaryCache(factory); registerDictionaryPolygon(factory); registerDictionaryDirect(factory); diff --git a/tests/performance/hashed_array_dictionary.xml b/tests/performance/hashed_array_dictionary.xml new file mode 100644 index 00000000000..a26e654248f --- /dev/null +++ b/tests/performance/hashed_array_dictionary.xml @@ -0,0 +1,126 @@ + + + CREATE TABLE simple_key_hashed_array_dictionary_source_table + ( + id UInt64, + value_int UInt64, + value_string String, + value_decimal Decimal64(8), + value_string_nullable Nullable(String) + ) ENGINE = Memory; + + + + CREATE TABLE complex_key_hashed_array_dictionary_source_table + ( + id UInt64, + id_key String, + value_int UInt64, + value_string String, + value_decimal Decimal64(8), + value_string_nullable Nullable(String) + ) ENGINE = Memory; + + + + CREATE DICTIONARY simple_key_hashed_array_dictionary + ( + id UInt64, + value_int UInt64, + value_string String, + value_decimal Decimal64(8), + value_string_nullable Nullable(String) + ) + PRIMARY KEY id + SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_key_hashed_array_dictionary_source_table')) + LAYOUT(HASHED_ARRAY()) + LIFETIME(MIN 0 MAX 1000); + + + + CREATE DICTIONARY complex_key_hashed_array_dictionary + ( + id UInt64, + id_key String, + value_int UInt64, + value_string String, + value_decimal Decimal64(8), + value_string_nullable Nullable(String) + ) + PRIMARY KEY id, id_key + SOURCE(CLICKHOUSE(DB 'default' TABLE 'complex_key_hashed_array_dictionary_source_table')) + LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) + LIFETIME(MIN 0 MAX 1000); + + + + INSERT INTO simple_key_hashed_array_dictionary_source_table + SELECT number, number, toString(number), toDecimal64(number, 8), toString(number) + FROM system.numbers + LIMIT 5000000; + + + + INSERT INTO complex_key_hashed_array_dictionary_source_table + SELECT number, toString(number), number, toString(number), toDecimal64(number, 8), toString(number) + FROM system.numbers + LIMIT 5000000; + + + + + column_name + + 'value_int' + 'value_string' + 'value_decimal' + 'value_string_nullable' + + + + + elements_count + + 5000000 + 7500000 + + + + + + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictGet('default.simple_key_hashed_array_dictionary', {column_name}, key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictHas('default.simple_key_hashed_array_dictionary', key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + + WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key + SELECT dictGet('default.complex_key_hashed_array_dictionary', {column_name}, key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key + SELECT dictHas('default.complex_key_hashed_array_dictionary', key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + DROP TABLE IF EXISTS simple_key_hashed_array_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_hashed_array_dictionary_source_table; + + DROP DICTIONARY IF EXISTS simple_key_hashed_array_dictionary; + DROP DICTIONARY IF EXISTS complex_key_hashed_array_dictionary; + + diff --git a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference new file mode 100644 index 00000000000..6e88bbad146 --- /dev/null +++ b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference @@ -0,0 +1,66 @@ +Dictionary hashed_array_dictionary_simple_key_simple_attributes +dictGet existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +dictGet with non existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +value_first_default value_second_default +dictGetOrDefault existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +dictGetOrDefault non existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +default default +dictHas +1 +1 +1 +0 +select all values as input stream +0 value_0 value_second_0 +1 value_1 value_second_1 +2 value_2 value_second_2 +Dictionary hashed_array_dictionary_simple_key_complex_attributes +dictGet existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +dictGet with non existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +value_first_default value_second_default +dictGetOrDefault existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +dictGetOrDefault non existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +default default +dictHas +1 +1 +1 +0 +select all values as input stream +0 value_0 value_second_0 +1 value_1 \N +2 value_2 value_second_2 +Dictionary hashed_array_dictionary_simple_key_hierarchy +dictGet +0 +0 +1 +1 +2 +dictGetHierarchy +[1] +[4,2,1] diff --git a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql new file mode 100644 index 00000000000..8d792836562 --- /dev/null +++ b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql @@ -0,0 +1,125 @@ +DROP TABLE IF EXISTS simple_key_simple_attributes_source_table; +CREATE TABLE simple_key_simple_attributes_source_table +( + id UInt64, + value_first String, + value_second String +) +ENGINE = TinyLog; + +INSERT INTO simple_key_simple_attributes_source_table VALUES(0, 'value_0', 'value_second_0'); +INSERT INTO simple_key_simple_attributes_source_table VALUES(1, 'value_1', 'value_second_1'); +INSERT INTO simple_key_simple_attributes_source_table VALUES(2, 'value_2', 'value_second_2'); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_simple_attributes; +CREATE DICTIONARY hashed_array_dictionary_simple_key_simple_attributes +( + id UInt64, + value_first String DEFAULT 'value_first_default', + value_second String DEFAULT 'value_second_default' +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'simple_key_simple_attributes_source_table')) +LAYOUT(HASHED_ARRAY()) +LIFETIME(MIN 1 MAX 1000); + +SELECT 'Dictionary hashed_array_dictionary_simple_key_simple_attributes'; +SELECT 'dictGet existing value'; +SELECT dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number) as value_first, + dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGet with non existing value'; +SELECT dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number) as value_first, + dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictGetOrDefault existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number, toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGetOrDefault non existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number, toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictHas'; +SELECT dictHas('hashed_array_dictionary_simple_key_simple_attributes', number) FROM system.numbers LIMIT 4; +SELECT 'select all values as input stream'; +SELECT * FROM hashed_array_dictionary_simple_key_simple_attributes ORDER BY id; + +DROP DICTIONARY hashed_array_dictionary_simple_key_simple_attributes; + +DROP TABLE simple_key_simple_attributes_source_table; + +DROP TABLE IF EXISTS simple_key_complex_attributes_source_table; +CREATE TABLE simple_key_complex_attributes_source_table +( + id UInt64, + value_first String, + value_second Nullable(String) +) +ENGINE = TinyLog; + +INSERT INTO simple_key_complex_attributes_source_table VALUES(0, 'value_0', 'value_second_0'); +INSERT INTO simple_key_complex_attributes_source_table VALUES(1, 'value_1', NULL); +INSERT INTO simple_key_complex_attributes_source_table VALUES(2, 'value_2', 'value_second_2'); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_complex_attributes; +CREATE DICTIONARY hashed_array_dictionary_simple_key_complex_attributes +( + id UInt64, + value_first String DEFAULT 'value_first_default', + value_second Nullable(String) DEFAULT 'value_second_default' +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'simple_key_complex_attributes_source_table')) +LAYOUT(HASHED_ARRAY()) +LIFETIME(MIN 1 MAX 1000); + +SELECT 'Dictionary hashed_array_dictionary_simple_key_complex_attributes'; +SELECT 'dictGet existing value'; +SELECT dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number) as value_first, + dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGet with non existing value'; +SELECT dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number) as value_first, + dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictGetOrDefault existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number, toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGetOrDefault non existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number, toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictHas'; +SELECT dictHas('hashed_array_dictionary_simple_key_complex_attributes', number) FROM system.numbers LIMIT 4; +SELECT 'select all values as input stream'; +SELECT * FROM hashed_array_dictionary_simple_key_complex_attributes ORDER BY id; + +DROP DICTIONARY hashed_array_dictionary_simple_key_complex_attributes; +DROP TABLE simple_key_complex_attributes_source_table; + +DROP TABLE IF EXISTS simple_key_hierarchy_table; +CREATE TABLE simple_key_hierarchy_table +( + id UInt64, + parent_id UInt64 +) ENGINE = TinyLog(); + +INSERT INTO simple_key_hierarchy_table VALUES (1, 0); +INSERT INTO simple_key_hierarchy_table VALUES (2, 1); +INSERT INTO simple_key_hierarchy_table VALUES (3, 1); +INSERT INTO simple_key_hierarchy_table VALUES (4, 2); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_hierarchy; +CREATE DICTIONARY hashed_array_dictionary_simple_key_hierarchy +( + id UInt64, + parent_id UInt64 HIERARCHICAL +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_hierarchy_table')) +LAYOUT(HASHED_ARRAY()) +LIFETIME(MIN 1 MAX 1000); + +SELECT 'Dictionary hashed_array_dictionary_simple_key_hierarchy'; +SELECT 'dictGet'; +SELECT dictGet('hashed_array_dictionary_simple_key_hierarchy', 'parent_id', number) FROM system.numbers LIMIT 5; +SELECT 'dictGetHierarchy'; +SELECT dictGetHierarchy('hashed_array_dictionary_simple_key_hierarchy', toUInt64(1)); +SELECT dictGetHierarchy('hashed_array_dictionary_simple_key_hierarchy', toUInt64(4)); + +DROP DICTIONARY hashed_array_dictionary_simple_key_hierarchy; +DROP TABLE simple_key_hierarchy_table; diff --git a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference new file mode 100644 index 00000000000..ec32fa72b4e --- /dev/null +++ b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference @@ -0,0 +1,56 @@ +Dictionary hashed_array_dictionary_complex_key_simple_attributes +dictGet existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +dictGet with non existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +value_first_default value_second_default +dictGetOrDefault existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +dictGetOrDefault non existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +default default +dictHas +1 +1 +1 +0 +select all values as input stream +0 id_key_0 value_0 value_second_0 +1 id_key_1 value_1 value_second_1 +2 id_key_2 value_2 value_second_2 +Dictionary hashed_array_dictionary_complex_key_complex_attributes +dictGet existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +dictGet with non existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +value_first_default value_second_default +dictGetOrDefault existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +dictGetOrDefault non existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +default default +dictHas +1 +1 +1 +0 +select all values as input stream +0 id_key_0 value_0 value_second_0 +1 id_key_1 value_1 \N +2 id_key_2 value_2 value_second_2 From 33ef3edcfabd41f2c1968fea08f445a22a730471 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 18:54:41 +0300 Subject: [PATCH 256/438] Updated tests --- ...99_hashed_array_dictionary_complex_key.sql | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql diff --git a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql new file mode 100644 index 00000000000..219b4ab9b71 --- /dev/null +++ b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql @@ -0,0 +1,97 @@ +DROP TABLE IF EXISTS complex_key_simple_attributes_source_table; +CREATE TABLE complex_key_simple_attributes_source_table +( + id UInt64, + id_key String, + value_first String, + value_second String +) +ENGINE = TinyLog; + +INSERT INTO complex_key_simple_attributes_source_table VALUES(0, 'id_key_0', 'value_0', 'value_second_0'); +INSERT INTO complex_key_simple_attributes_source_table VALUES(1, 'id_key_1', 'value_1', 'value_second_1'); +INSERT INTO complex_key_simple_attributes_source_table VALUES(2, 'id_key_2', 'value_2', 'value_second_2'); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_complex_key_simple_attributes; +CREATE DICTIONARY hashed_array_dictionary_complex_key_simple_attributes +( + id UInt64, + id_key String, + value_first String DEFAULT 'value_first_default', + value_second String DEFAULT 'value_second_default' +) +PRIMARY KEY id, id_key +SOURCE(CLICKHOUSE(TABLE 'complex_key_simple_attributes_source_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()); + +SELECT 'Dictionary hashed_array_dictionary_complex_key_simple_attributes'; +SELECT 'dictGet existing value'; +SELECT dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first, + dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGet with non existing value'; +SELECT dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first, + dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictGetOrDefault existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGetOrDefault non existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictHas'; +SELECT dictHas('hashed_array_dictionary_complex_key_simple_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4; +SELECT 'select all values as input stream'; +SELECT * FROM hashed_array_dictionary_complex_key_simple_attributes ORDER BY (id, id_key); + +DROP DICTIONARY hashed_array_dictionary_complex_key_simple_attributes; + +DROP TABLE complex_key_simple_attributes_source_table; + +DROP TABLE IF EXISTS complex_key_complex_attributes_source_table; +CREATE TABLE complex_key_complex_attributes_source_table +( + id UInt64, + id_key String, + value_first String, + value_second Nullable(String) +) +ENGINE = TinyLog; + +INSERT INTO complex_key_complex_attributes_source_table VALUES(0, 'id_key_0', 'value_0', 'value_second_0'); +INSERT INTO complex_key_complex_attributes_source_table VALUES(1, 'id_key_1', 'value_1', NULL); +INSERT INTO complex_key_complex_attributes_source_table VALUES(2, 'id_key_2', 'value_2', 'value_second_2'); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_complex_key_complex_attributes; +CREATE DICTIONARY hashed_array_dictionary_complex_key_complex_attributes +( + id UInt64, + id_key String, + + value_first String DEFAULT 'value_first_default', + value_second Nullable(String) DEFAULT 'value_second_default' +) +PRIMARY KEY id, id_key +SOURCE(CLICKHOUSE(TABLE 'complex_key_complex_attributes_source_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(()); + +SELECT 'Dictionary hashed_array_dictionary_complex_key_complex_attributes'; +SELECT 'dictGet existing value'; +SELECT dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first, + dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGet with non existing value'; +SELECT dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first, + dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictGetOrDefault existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGetOrDefault non existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictHas'; +SELECT dictHas('hashed_array_dictionary_complex_key_complex_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4; +SELECT 'select all values as input stream'; +SELECT * FROM hashed_array_dictionary_complex_key_complex_attributes ORDER BY (id, id_key); + +DROP DICTIONARY hashed_array_dictionary_complex_key_complex_attributes; +DROP TABLE complex_key_complex_attributes_source_table; From e66735e21e14406801caa06c7fd0922d3e8a4ca2 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 18:55:28 +0300 Subject: [PATCH 257/438] Fixed style check --- src/Dictionaries/HashedArrayDictionary.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 7160cb67e10..7706f7d6108 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -454,14 +454,15 @@ void HashedArrayDictionary::blockToAttributes(const Block & auto & attribute_container = std::get>(attribute.container); attribute_container.emplace_back(); - if (attribute_is_nullable) { - attribute.is_index_null->emplace_back(); - } - - if (attribute_is_nullable && column_value_to_insert.isNull()) + if (attribute_is_nullable) { - (*attribute.is_index_null).back() = true; - return; + attribute.is_index_null->emplace_back(); + + if (column_value_to_insert.isNull()) + { + (*attribute.is_index_null).back() = true; + return; + } } if constexpr (std::is_same_v) From 01fbd52758c0b72609999e16b4bee34126a6fe8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 15 Oct 2021 18:52:23 +0200 Subject: [PATCH 258/438] Fix crash with shortcircuit and locardinality in multiIf --- src/Functions/multiIf.cpp | 22 ++++--- ...owcardinality_shortcircuit_crash.reference | 60 +++++++++++++++++++ ...2049_lowcardinality_shortcircuit_crash.sql | 45 ++++++++++++++ 3 files changed, 115 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference create mode 100644 tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 1122d4892c6..3e5242d5f9b 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -124,8 +124,8 @@ public: */ struct Instruction { - const IColumn * condition = nullptr; - const IColumn * source = nullptr; + IColumn::Ptr condition = nullptr; + IColumn::Ptr source = nullptr; bool condition_always_true = false; bool condition_is_nullable = false; @@ -160,15 +160,15 @@ public: } else { - const ColumnWithTypeAndName & cond_col = arguments[i]; + IColumn::Ptr cond_col = arguments[i].column->convertToFullColumnIfLowCardinality(); /// We skip branches that are always false. /// If we encounter a branch that is always true, we can finish. - if (cond_col.column->onlyNull()) + if (cond_col->onlyNull()) continue; - if (const auto * column_const = checkAndGetColumn(*cond_col.column)) + if (const auto * column_const = checkAndGetColumn(*cond_col)) { Field value = column_const->getField(); @@ -181,26 +181,24 @@ public: } else { - if (isColumnNullable(*cond_col.column)) - instruction.condition_is_nullable = true; - - instruction.condition = cond_col.column.get(); + instruction.condition = cond_col; + instruction.condition_is_nullable = instruction.condition->isNullable(); } - instruction.condition_is_short = cond_col.column->size() < arguments[0].column->size(); + instruction.condition_is_short = cond_col->size() < arguments[0].column->size(); } const ColumnWithTypeAndName & source_col = arguments[source_idx]; instruction.source_is_short = source_col.column->size() < arguments[0].column->size(); if (source_col.type->equals(*return_type)) { - instruction.source = source_col.column.get(); + instruction.source = source_col.column; } else { /// Cast all columns to result type. converted_columns_holder.emplace_back(castColumn(source_col, return_type)); - instruction.source = converted_columns_holder.back().get(); + instruction.source = converted_columns_holder.back(); } if (instruction.source && isColumnConst(*instruction.source)) diff --git a/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference new file mode 100644 index 00000000000..c84236dce7d --- /dev/null +++ b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference @@ -0,0 +1,60 @@ +0 0 +1 1 +2 2 +3 3 +4 40 +5 50 +6 60 +7 70 +8 800 +9 900 +10 1000 +11 1100 +12 12000 +13 13000 +14 14000 +15 15000 +16 160000 +17 170000 +18 180000 +19 190000 +0 0 +1 1 +2 2 +3 3 +4 40 +5 50 +6 60 +7 70 +8 80000 +9 90000 +10 100000 +11 110000 +12 120000 +13 130000 +14 140000 +15 150000 +16 160000 +17 170000 +18 180000 +19 190000 +0 0 +1 1 +2 2 +3 3 +4 40 +5 50 +6 60 +7 70 +8 800 +9 900 +10 1000 +11 1100 +12 12000 +13 13000 +14 14000 +15 15000 +16 160000 +17 170000 +18 180000 +19 190000 diff --git a/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql new file mode 100644 index 00000000000..2a837380250 --- /dev/null +++ b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql @@ -0,0 +1,45 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/30231 +SELECT * +FROM ( + SELECT number, + multiIf( + CAST(number < 4, 'UInt8'), toString(number), + CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10), + CAST(number < 12, 'Nullable(UInt8)'), toString(number * 100), + CAST(number < 16, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000), + toString(number * 10000)) as m + FROM system.numbers + LIMIT 20 + ) +ORDER BY number +SETTINGS short_circuit_function_evaluation='enable'; + +SELECT * +FROM ( + SELECT number, + multiIf( + CAST(number < 4, 'UInt8'), toString(number), + CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10), + CAST(NULL, 'Nullable(UInt8)'), toString(number * 100), + CAST(NULL, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000), + toString(number * 10000)) as m + FROM system.numbers + LIMIT 20 + ) +ORDER BY number +SETTINGS short_circuit_function_evaluation='enable'; + +SELECT * +FROM ( + SELECT number, + multiIf( + CAST(number < 4, 'UInt8'), toString(number), + CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10)::LowCardinality(String), + CAST(number < 12, 'Nullable(UInt8)'), toString(number * 100)::Nullable(String), + CAST(number < 16, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000)::LowCardinality(Nullable(String)), + toString(number * 10000)) as m + FROM system.numbers + LIMIT 20 + ) +ORDER BY number +SETTINGS short_circuit_function_evaluation='enable'; From ec188000b63aa050a9539b331719469e53749dce Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 15 Oct 2021 17:06:20 +0000 Subject: [PATCH 259/438] Style + more uncomment --- src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt | 2 +- .../fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index f87340b5024..6f6b0d07661 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -30,7 +30,7 @@ add_custom_command( "${CURRENT_DIR_IN_BINARY}/out.cpp" "${CURRENT_DIR_IN_BINARY}/out.proto" COMMAND python3 gen.py clickhouse.g out.cpp out.proto - # DEPENDS "${CURRENT_DIR_IN_BINARY}/clickhouse.g" + DEPENDS "${CURRENT_DIR_IN_BINARY}/clickhouse.g" ) PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS "${CURRENT_DIR_IN_BINARY}/out.proto") diff --git a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp index 418a5014657..2bc7ad02e87 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp +++ b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp @@ -14,13 +14,14 @@ void GenerateSentence(const Sentence&, std::string &, int); -DEFINE_BINARY_PROTO_FUZZER(const Sentence& main) { +DEFINE_BINARY_PROTO_FUZZER(const Sentence& main) +{ static std::string input; input.reserve(4096); GenerateSentence(main, input, 0); - if (input.size()) { - + if (input.size()) + { std::cout << input << std::endl; DB::ParserQueryWithOutput parser(input.data() + input.size()); @@ -30,9 +31,8 @@ DEFINE_BINARY_PROTO_FUZZER(const Sentence& main) { DB::WriteBufferFromOStream out(std::cerr, 4096); DB::formatAST(*ast, out); std::cerr << std::endl; - } catch (...) { - } + catch (...) {} input.clear(); } From 882c876090ae88adeaf67db0e6ec1b47f4a2234b Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 15 Oct 2021 21:21:08 +0300 Subject: [PATCH 260/438] Update codegen_select_fuzzer.cpp --- src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp index 2bc7ad02e87..6a7a88a8545 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp +++ b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp @@ -25,7 +25,8 @@ DEFINE_BINARY_PROTO_FUZZER(const Sentence& main) std::cout << input << std::endl; DB::ParserQueryWithOutput parser(input.data() + input.size()); - try { + try + { DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); DB::WriteBufferFromOStream out(std::cerr, 4096); From f31bf32687257134a66571ab81cb03cfce4c2be8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 15 Oct 2021 22:37:25 +0300 Subject: [PATCH 261/438] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 718aa751cc2..adaaa0f1bc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -### ClickHouse release v21.10, 2021-10-14 +### ClickHouse release v21.10, 2021-10-16 #### Backward Incompatible Change From fceb763228e1268a5589db1a831ebcf929d28524 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 23:00:30 +0300 Subject: [PATCH 262/438] Fixed test --- .../0_stateless/02099_hashed_array_dictionary_complex_key.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql index 219b4ab9b71..4d2a825c8af 100644 --- a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql +++ b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql @@ -73,7 +73,7 @@ CREATE DICTIONARY hashed_array_dictionary_complex_key_complex_attributes PRIMARY KEY id, id_key SOURCE(CLICKHOUSE(TABLE 'complex_key_complex_attributes_source_table')) LIFETIME(MIN 1 MAX 1000) -LAYOUT(()); +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()); SELECT 'Dictionary hashed_array_dictionary_complex_key_complex_attributes'; SELECT 'dictGet existing value'; From e4ae49e2f4f15d766cb11cbd86f1bd4ac61b6d91 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 23:16:56 +0300 Subject: [PATCH 263/438] ComplexKeyHashedDictionary fix config parsing --- src/Dictionaries/HashedDictionary.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 1f3821096da..234f14a661c 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -731,8 +731,18 @@ void registerDictionaryHashed(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - const std::string & layout_prefix = sparse ? ".layout.sparse_hashed" : ".layout.hashed"; - const bool preallocate = config.getBool(config_prefix + layout_prefix + ".preallocate", false); + std::string dictionary_layout_name; + + if (dictionary_key_type == DictionaryKeyType::Simple) + dictionary_layout_name = "hashed"; + else + dictionary_layout_name = "complex_key_hashed"; + + if (sparse) + dictionary_layout_name = "sparse_" + dictionary_layout_name; + + const std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name; + const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false); HashedDictionaryStorageConfiguration configuration{preallocate, require_nonempty, dict_lifetime}; From fd14faeae22772322346cedda585a2665d9f866d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 15 Oct 2021 23:18:20 +0300 Subject: [PATCH 264/438] Remove DataStreams folder. --- programs/benchmark/Benchmark.cpp | 4 +-- programs/copier/Internals.h | 2 +- programs/server/Server.cpp | 2 +- src/Bridge/LibraryBridgeHelper.cpp | 2 +- src/CMakeLists.txt | 4 +-- src/Client/ClientBase.cpp | 5 ++- src/Client/ClientBase.h | 2 +- src/Client/Connection.cpp | 8 ++--- src/Client/Connection.h | 2 +- src/Client/IServerConnection.h | 4 +-- .../InternalTextLogs.cpp | 2 +- .../InternalTextLogs.h | 0 src/Client/LocalConnection.h | 2 +- .../SquashingTransform.cpp | 2 +- .../SquashingTransform.h | 0 src/Core/Block.cpp | 23 +++++++++++++ src/Core/Block.h | 4 +++ src/Core/SettingsEnums.h | 2 +- src/DataStreams/finalizeBlock.cpp | 27 ---------------- src/DataStreams/finalizeBlock.h | 9 ------ src/DataStreams/materializeBlock.cpp | 29 ----------------- src/DataStreams/materializeBlock.h | 14 -------- src/Databases/DatabaseReplicated.h | 2 +- src/Databases/MySQL/DatabaseMySQL.cpp | 2 +- .../MySQL/FetchTablesColumnsList.cpp | 2 +- src/Databases/MySQL/MaterializeMetadata.cpp | 2 +- .../MySQL/MaterializedMySQLSyncThread.cpp | 2 +- .../MySQL/MaterializedMySQLSyncThread.h | 2 +- .../ClickHouseDictionarySource.cpp | 2 +- .../ExecutableDictionarySource.cpp | 4 +-- .../ExecutablePoolDictionarySource.cpp | 2 +- .../ExecutablePoolDictionarySource.h | 2 +- src/Dictionaries/HTTPDictionarySource.cpp | 2 +- src/Dictionaries/MySQLDictionarySource.h | 2 +- .../MarkInCompressedFile.h | 0 src/{DataStreams => Formats}/NativeReader.cpp | 2 +- src/{DataStreams => Formats}/NativeReader.h | 2 +- src/{DataStreams => Formats}/NativeWriter.cpp | 4 +-- src/{DataStreams => Formats}/NativeWriter.h | 0 .../TemporaryFileStream.cpp | 29 ++--------------- .../TemporaryFileStream.h | 20 +----------- src/{DataStreams => Formats}/formatBlock.cpp | 2 +- src/{DataStreams => Formats}/formatBlock.h | 0 src/Functions/formatRow.cpp | 1 - src/Interpreters/Aggregator.cpp | 3 +- src/Interpreters/Aggregator.h | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- .../ExecuteScalarSubqueriesVisitor.cpp | 1 - src/Interpreters/HashJoin.cpp | 1 - src/Interpreters/HashJoin.h | 2 +- src/Interpreters/IInterpreter.h | 2 +- src/Interpreters/InterpreterDescribeQuery.cpp | 2 +- src/Interpreters/InterpreterExistsQuery.cpp | 2 +- src/Interpreters/InterpreterExplainQuery.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.h | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 -- .../InterpreterShowCreateQuery.cpp | 2 +- src/Interpreters/InterpreterWatchQuery.cpp | 2 +- src/Interpreters/InterpreterWatchQuery.h | 2 +- src/Interpreters/MergeJoin.cpp | 3 +- src/Interpreters/MergeJoin.h | 2 +- src/Interpreters/ProcessList.h | 2 +- src/Interpreters/Set.h | 2 +- src/Interpreters/SortedBlocksWriter.cpp | 4 +-- src/Interpreters/SortedBlocksWriter.h | 2 +- src/Interpreters/TableJoin.h | 2 +- .../UserDefinedExecutableFunction.cpp | 4 +-- .../UserDefinedExecutableFunction.h | 2 +- .../UserDefinedExecutableFunctionFactory.cpp | 4 +-- src/Interpreters/executeDDLQueryOnCluster.h | 2 +- src/Interpreters/executeQuery.cpp | 3 +- src/Interpreters/executeQuery.h | 2 +- src/Interpreters/join_common.cpp | 2 -- src/NOTICE | 3 -- .../PullingAsyncPipelineExecutor.cpp | 4 +-- .../Executors/PullingAsyncPipelineExecutor.h | 4 +-- .../Executors/PullingPipelineExecutor.cpp | 2 +- .../Executors/PullingPipelineExecutor.h | 4 +-- src/Processors/Formats/IRowInputFormat.h | 2 +- src/Processors/Formats/Impl/NativeFormat.cpp | 4 +-- src/Processors/Formats/LazyOutputFormat.h | 6 ++-- src/Processors/Formats/PullingOutputFormat.h | 6 ++-- src/Processors/Pipe.h | 2 +- src/Processors/QueryPlan/AggregatingStep.h | 2 +- src/Processors/QueryPlan/CreatingSetsStep.h | 2 +- src/Processors/QueryPlan/CubeStep.h | 2 +- src/Processors/QueryPlan/DistinctStep.h | 2 +- src/Processors/QueryPlan/MergeSortingStep.h | 2 +- .../QueryPlan/MergingAggregatedStep.h | 2 +- src/Processors/QueryPlan/MergingSortedStep.h | 2 +- src/Processors/QueryPlan/OffsetStep.h | 2 +- src/Processors/QueryPlan/PartialSortingStep.h | 2 +- src/Processors/QueryPlan/ReadFromRemote.cpp | 2 +- src/Processors/QueryPlan/RollupStep.h | 2 +- .../QueryPlan/SettingQuotaAndLimitsStep.h | 2 +- src/Processors/Sinks/RemoteSink.h | 27 ++++++++++++++++ .../Sources}/MySQLSource.cpp | 2 +- .../Sources}/MySQLSource.h | 0 src/Processors/Sources/RemoteSource.cpp | 6 ++-- .../Sources}/SQLiteSource.cpp | 0 .../Sources}/SQLiteSource.h | 0 .../Sources}/ShellCommandSource.h | 0 src/Processors/Sources/SourceWithProgress.h | 2 +- .../Sources/TemporaryFileLazySource.cpp | 32 +++++++++++++++++++ .../Sources/TemporaryFileLazySource.h | 28 ++++++++++++++++ .../Transforms/AggregatingTransform.cpp | 3 +- .../Transforms/CreatingSetsTransform.cpp | 1 - .../Transforms/CreatingSetsTransform.h | 2 +- src/Processors/Transforms/DistinctTransform.h | 2 +- .../Transforms/LimitsCheckingTransform.h | 4 +-- .../Transforms/MaterializingTransform.cpp | 1 - .../Transforms/MergeSortingTransform.cpp | 4 +-- .../Transforms/SortingTransform.cpp | 4 +-- .../Transforms/SquashingChunksTransform.h | 2 +- .../Transforms/TotalsHavingTransform.cpp | 21 +++++++++++- .../getSourceFromASTInsertQuery.cpp | 2 +- .../BlockIO.cpp | 2 +- src/{DataStreams => QueryPipeline}/BlockIO.h | 0 .../CMakeLists.txt | 0 .../ConnectionCollector.cpp | 2 +- .../ConnectionCollector.h | 0 .../ExecutionSpeedLimits.cpp | 2 +- .../ExecutionSpeedLimits.h | 2 +- .../ProfileInfo.cpp} | 16 +++++----- .../ProfileInfo.h} | 6 ++-- .../RemoteInserter.cpp} | 2 +- .../RemoteInserter.h} | 20 ------------ .../RemoteQueryExecutor.cpp | 6 ++-- .../RemoteQueryExecutor.h | 4 +-- .../RemoteQueryExecutorReadContext.cpp | 2 +- .../RemoteQueryExecutorReadContext.h | 0 .../SizeLimits.cpp | 2 +- .../SizeLimits.h | 0 .../StreamLocalLimits.h | 4 +-- .../examples/CMakeLists.txt | 0 .../narrowBlockInputStreams.cpp | 0 .../narrowBlockInputStreams.h | 0 .../gtest_blocks_size_merging_streams.cpp | 0 .../tests/gtest_check_sorted_stream.cpp | 0 src/Server/GRPCServer.cpp | 7 ++-- src/Server/TCPHandler.cpp | 6 ++-- src/Server/TCPHandler.h | 8 ++--- src/Storages/Distributed/DirectoryMonitor.cpp | 4 +-- src/Storages/Distributed/DistributedSink.cpp | 4 +-- src/Storages/MarkCache.h | 2 +- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- .../MergeTree/MergeTreeWriteAheadLog.h | 4 +-- src/Storages/MergeTree/MutateTask.cpp | 2 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageExecutable.cpp | 1 - src/Storages/StorageExecutable.h | 2 +- src/Storages/StorageMerge.cpp | 2 +- src/Storages/StorageMySQL.cpp | 2 +- src/Storages/StorageS3.cpp | 2 +- src/Storages/StorageS3Cluster.cpp | 4 +-- src/Storages/StorageSQLite.cpp | 2 +- src/Storages/StorageSet.cpp | 8 ++--- src/Storages/StorageStripeLog.cpp | 4 +-- src/Storages/getStructureOfRemoteTable.cpp | 2 +- src/TableFunctions/TableFunctionMySQL.cpp | 2 +- src/TableFunctions/TableFunctionS3Cluster.cpp | 2 +- 161 files changed, 311 insertions(+), 337 deletions(-) rename src/{DataStreams => Client}/InternalTextLogs.cpp (99%) rename src/{DataStreams => Client}/InternalTextLogs.h (100%) rename src/{DataStreams => Common}/SquashingTransform.cpp (98%) rename src/{DataStreams => Common}/SquashingTransform.h (100%) delete mode 100644 src/DataStreams/finalizeBlock.cpp delete mode 100644 src/DataStreams/finalizeBlock.h delete mode 100644 src/DataStreams/materializeBlock.cpp delete mode 100644 src/DataStreams/materializeBlock.h rename src/{DataStreams => Formats}/MarkInCompressedFile.h (100%) rename src/{DataStreams => Formats}/NativeReader.cpp (99%) rename src/{DataStreams => Formats}/NativeReader.h (98%) rename src/{DataStreams => Formats}/NativeWriter.cpp (98%) rename src/{DataStreams => Formats}/NativeWriter.h (100%) rename src/{DataStreams => Formats}/TemporaryFileStream.cpp (68%) rename src/{DataStreams => Formats}/TemporaryFileStream.h (63%) rename src/{DataStreams => Formats}/formatBlock.cpp (93%) rename src/{DataStreams => Formats}/formatBlock.h (100%) create mode 100644 src/Processors/Sinks/RemoteSink.h rename src/{Formats => Processors/Sources}/MySQLSource.cpp (99%) rename src/{Formats => Processors/Sources}/MySQLSource.h (100%) rename src/{DataStreams => Processors/Sources}/SQLiteSource.cpp (100%) rename src/{DataStreams => Processors/Sources}/SQLiteSource.h (100%) rename src/{DataStreams => Processors/Sources}/ShellCommandSource.h (100%) create mode 100644 src/Processors/Sources/TemporaryFileLazySource.cpp create mode 100644 src/Processors/Sources/TemporaryFileLazySource.h rename src/{DataStreams => QueryPipeline}/BlockIO.cpp (97%) rename src/{DataStreams => QueryPipeline}/BlockIO.h (100%) rename src/{DataStreams => QueryPipeline}/CMakeLists.txt (100%) rename src/{DataStreams => QueryPipeline}/ConnectionCollector.cpp (98%) rename src/{DataStreams => QueryPipeline}/ConnectionCollector.h (100%) rename src/{DataStreams => QueryPipeline}/ExecutionSpeedLimits.cpp (99%) rename src/{DataStreams => QueryPipeline}/ExecutionSpeedLimits.h (95%) rename src/{DataStreams/BlockStreamProfileInfo.cpp => QueryPipeline/ProfileInfo.cpp} (67%) rename src/{DataStreams/BlockStreamProfileInfo.h => QueryPipeline/ProfileInfo.h} (90%) rename src/{DataStreams/RemoteBlockOutputStream.cpp => QueryPipeline/RemoteInserter.cpp} (98%) rename src/{DataStreams/RemoteBlockOutputStream.h => QueryPipeline/RemoteInserter.h} (56%) rename src/{DataStreams => QueryPipeline}/RemoteQueryExecutor.cpp (99%) rename src/{DataStreams => QueryPipeline}/RemoteQueryExecutor.h (98%) rename src/{DataStreams => QueryPipeline}/RemoteQueryExecutorReadContext.cpp (99%) rename src/{DataStreams => QueryPipeline}/RemoteQueryExecutorReadContext.h (100%) rename src/{DataStreams => QueryPipeline}/SizeLimits.cpp (97%) rename src/{DataStreams => QueryPipeline}/SizeLimits.h (100%) rename src/{DataStreams => QueryPipeline}/StreamLocalLimits.h (91%) rename src/{DataStreams => QueryPipeline}/examples/CMakeLists.txt (100%) rename src/{DataStreams => QueryPipeline}/narrowBlockInputStreams.cpp (100%) rename src/{DataStreams => QueryPipeline}/narrowBlockInputStreams.h (100%) rename src/{DataStreams => QueryPipeline}/tests/gtest_blocks_size_merging_streams.cpp (100%) rename src/{DataStreams => QueryPipeline}/tests/gtest_check_sorted_stream.cpp (100%) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index caa0a87bde2..1c276a83768 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -432,7 +432,7 @@ private: Progress progress; executor.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); }); - BlockStreamProfileInfo info; + ProfileInfo info; while (Block block = executor.read()) info.update(block); diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index a9d8ca726fe..eb2622c6b26 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4ed5b114082..eb4b79e995b 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -62,7 +62,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Bridge/LibraryBridgeHelper.cpp b/src/Bridge/LibraryBridgeHelper.cpp index 319c6c344d7..9bc14627ac3 100644 --- a/src/Bridge/LibraryBridgeHelper.cpp +++ b/src/Bridge/LibraryBridgeHelper.cpp @@ -1,6 +1,6 @@ #include "LibraryBridgeHelper.h" -#include +#include #include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 08755542ed1..09aaa85c394 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,7 +49,7 @@ add_subdirectory (Backups) add_subdirectory (Columns) add_subdirectory (Common) add_subdirectory (Core) -add_subdirectory (DataStreams) +add_subdirectory (QueryPipeline) add_subdirectory (DataTypes) add_subdirectory (Dictionaries) add_subdirectory (Disks) @@ -185,7 +185,7 @@ add_object_library(clickhouse_backups Backups) add_object_library(clickhouse_core Core) add_object_library(clickhouse_core_mysql Core/MySQL) add_object_library(clickhouse_compression Compression) -add_object_library(clickhouse_datastreams DataStreams) +add_object_library(clickhouse_querypipeline QueryPipeline) add_object_library(clickhouse_datatypes DataTypes) add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations) add_object_library(clickhouse_databases Databases) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index e260ed11bd4..f8bed86d7ce 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -47,8 +47,7 @@ #include #include #include -#include -#include +#include namespace fs = std::filesystem; @@ -284,7 +283,7 @@ void ClientBase::onReceiveExceptionFromServer(std::unique_ptr && e) } -void ClientBase::onProfileInfo(const BlockStreamProfileInfo & profile_info) +void ClientBase::onProfileInfo(const ProfileInfo & profile_info) { if (profile_info.hasAppliedLimit() && output_format) output_format->setRowsBeforeLimit(profile_info.getRowsBeforeLimit()); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 92f9d48e070..a66e4a0d1ba 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -112,7 +112,7 @@ private: void onTotals(Block & block, ASTPtr parsed_query); void onExtremes(Block & block, ASTPtr parsed_query); void onReceiveExceptionFromServer(std::unique_ptr && e); - void onProfileInfo(const BlockStreamProfileInfo & profile_info); + void onProfileInfo(const ProfileInfo & profile_info); void onEndOfStream(); void sendData(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query); diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 2aa157bb318..bdaf4ffc76f 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -994,9 +994,9 @@ Progress Connection::receiveProgress() const } -BlockStreamProfileInfo Connection::receiveProfileInfo() const +ProfileInfo Connection::receiveProfileInfo() const { - BlockStreamProfileInfo profile_info; + ProfileInfo profile_info; profile_info.read(*in); return profile_info; } diff --git a/src/Client/Connection.h b/src/Client/Connection.h index a5130d876ea..b18b1f1e621 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -253,7 +253,7 @@ private: std::vector receiveMultistringMessage(UInt64 msg_type) const; std::unique_ptr receiveException() const; Progress receiveProgress() const; - BlockStreamProfileInfo receiveProfileInfo() const; + ProfileInfo receiveProfileInfo() const; void initInputBuffers(); void initBlockInput(); diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 42886c72182..7424afc969d 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include @@ -30,7 +30,7 @@ struct Packet std::unique_ptr exception; std::vector multistring_message; Progress progress; - BlockStreamProfileInfo profile_info; + ProfileInfo profile_info; std::vector part_uuids; Packet() : type(Protocol::Server::Hello) {} diff --git a/src/DataStreams/InternalTextLogs.cpp b/src/Client/InternalTextLogs.cpp similarity index 99% rename from src/DataStreams/InternalTextLogs.cpp rename to src/Client/InternalTextLogs.cpp index a5883d17f28..65592fee670 100644 --- a/src/DataStreams/InternalTextLogs.cpp +++ b/src/Client/InternalTextLogs.cpp @@ -1,4 +1,4 @@ -#include "InternalTextLogs.h" +#include #include #include #include diff --git a/src/DataStreams/InternalTextLogs.h b/src/Client/InternalTextLogs.h similarity index 100% rename from src/DataStreams/InternalTextLogs.h rename to src/Client/InternalTextLogs.h diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index dcea3ed0fc3..5536aeec964 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -2,7 +2,7 @@ #include "Connection.h" #include -#include +#include #include #include diff --git a/src/DataStreams/SquashingTransform.cpp b/src/Common/SquashingTransform.cpp similarity index 98% rename from src/DataStreams/SquashingTransform.cpp rename to src/Common/SquashingTransform.cpp index ea99dc49780..21fa25ed3af 100644 --- a/src/DataStreams/SquashingTransform.cpp +++ b/src/Common/SquashingTransform.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/src/DataStreams/SquashingTransform.h b/src/Common/SquashingTransform.h similarity index 100% rename from src/DataStreams/SquashingTransform.h rename to src/Common/SquashingTransform.h diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index a59ac60155e..40fc5767132 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -707,4 +707,27 @@ ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column return current_column; } + +Block materializeBlock(const Block & block) +{ + if (!block) + return block; + + Block res = block; + size_t columns = res.columns(); + for (size_t i = 0; i < columns; ++i) + { + auto & element = res.getByPosition(i); + element.column = element.column->convertToFullColumnIfConst(); + } + + return res; +} + +void materializeBlockInplace(Block & block) +{ + for (size_t i = 0; i < block.columns(); ++i) + block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst(); +} + } diff --git a/src/Core/Block.h b/src/Core/Block.h index a7e3cee194b..e0a032094f6 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -196,4 +196,8 @@ void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out /// Properly handles cases, when column is a subcolumn and when it is compressed. ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column); +/// Converts columns-constants to full columns ("materializes" them). +Block materializeBlock(const Block & block); +void materializeBlockInplace(Block & block); + } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index a699da3062c..33c5a6d8645 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include diff --git a/src/DataStreams/finalizeBlock.cpp b/src/DataStreams/finalizeBlock.cpp deleted file mode 100644 index 56068edcc29..00000000000 --- a/src/DataStreams/finalizeBlock.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - void finalizeBlock(Block & block) - { - for (size_t i = 0; i < block.columns(); ++i) - { - ColumnWithTypeAndName & current = block.getByPosition(i); - const DataTypeAggregateFunction * unfinalized_type = typeid_cast(current.type.get()); - - if (unfinalized_type) - { - current.type = unfinalized_type->getReturnType(); - if (current.column) - { - auto mut_column = IColumn::mutate(std::move(current.column)); - current.column = ColumnAggregateFunction::convertToValues(std::move(mut_column)); - } - } - } - } -} diff --git a/src/DataStreams/finalizeBlock.h b/src/DataStreams/finalizeBlock.h deleted file mode 100644 index 3c81ddae1c7..00000000000 --- a/src/DataStreams/finalizeBlock.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - /// Converts aggregate function columns with non-finalized states to final values - void finalizeBlock(Block & block); -} diff --git a/src/DataStreams/materializeBlock.cpp b/src/DataStreams/materializeBlock.cpp deleted file mode 100644 index 6b47cb87baa..00000000000 --- a/src/DataStreams/materializeBlock.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include - - -namespace DB -{ - -Block materializeBlock(const Block & block) -{ - if (!block) - return block; - - Block res = block; - size_t columns = res.columns(); - for (size_t i = 0; i < columns; ++i) - { - auto & element = res.getByPosition(i); - element.column = element.column->convertToFullColumnIfConst(); - } - - return res; -} - -void materializeBlockInplace(Block & block) -{ - for (size_t i = 0; i < block.columns(); ++i) - block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst(); -} - -} diff --git a/src/DataStreams/materializeBlock.h b/src/DataStreams/materializeBlock.h deleted file mode 100644 index 5e1499319c1..00000000000 --- a/src/DataStreams/materializeBlock.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - -/** Converts columns-constants to full columns ("materializes" them). - */ -Block materializeBlock(const Block & block); -void materializeBlockInplace(Block & block); - -} diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index aecbc1474f8..21d927dea77 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index a30341ae927..24a285f11c4 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -11,7 +11,7 @@ # include # include # include -# include +# include # include # include # include diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index 618f6bf6d34..ab144761e11 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index 514978f2456..8de42760dc2 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 9ec8a9523c6..d2c1195c0c5 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -16,7 +16,7 @@ # include # include # include -# include +# include # include # include # include diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.h b/src/Databases/MySQL/MaterializedMySQLSyncThread.h index 0cd0701439f..b8c985915dc 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.h @@ -8,7 +8,7 @@ # include # include -# include +# include # include # include # include diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index b563ac797c0..a19eca1fee7 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index 91aeda924a1..c09993c2a84 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 79e9b627836..8d1122b1194 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.h b/src/Dictionaries/ExecutablePoolDictionarySource.h index b7e8468b815..51215b6311b 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.h +++ b/src/Dictionaries/ExecutablePoolDictionarySource.h @@ -7,7 +7,7 @@ #include #include -#include +#include namespace DB diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index 844a6357e29..16e7f483978 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -1,5 +1,5 @@ #include "HTTPDictionarySource.h" -#include +#include #include #include #include diff --git a/src/Dictionaries/MySQLDictionarySource.h b/src/Dictionaries/MySQLDictionarySource.h index fa26c2b162a..1ecc41036be 100644 --- a/src/Dictionaries/MySQLDictionarySource.h +++ b/src/Dictionaries/MySQLDictionarySource.h @@ -12,7 +12,7 @@ # include "DictionaryStructure.h" # include "ExternalQueryBuilder.h" # include "IDictionarySource.h" -# include +# include namespace Poco { diff --git a/src/DataStreams/MarkInCompressedFile.h b/src/Formats/MarkInCompressedFile.h similarity index 100% rename from src/DataStreams/MarkInCompressedFile.h rename to src/Formats/MarkInCompressedFile.h diff --git a/src/DataStreams/NativeReader.cpp b/src/Formats/NativeReader.cpp similarity index 99% rename from src/DataStreams/NativeReader.cpp rename to src/Formats/NativeReader.cpp index 079dff80eae..2d8fdc160f5 100644 --- a/src/DataStreams/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include diff --git a/src/DataStreams/NativeReader.h b/src/Formats/NativeReader.h similarity index 98% rename from src/DataStreams/NativeReader.h rename to src/Formats/NativeReader.h index 95b03c71764..49c2db7703f 100644 --- a/src/DataStreams/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/DataStreams/NativeWriter.cpp b/src/Formats/NativeWriter.cpp similarity index 98% rename from src/DataStreams/NativeWriter.cpp rename to src/Formats/NativeWriter.cpp index 6e26c443e29..9da0c312362 100644 --- a/src/DataStreams/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -5,8 +5,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/DataStreams/NativeWriter.h b/src/Formats/NativeWriter.h similarity index 100% rename from src/DataStreams/NativeWriter.h rename to src/Formats/NativeWriter.h diff --git a/src/DataStreams/TemporaryFileStream.cpp b/src/Formats/TemporaryFileStream.cpp similarity index 68% rename from src/DataStreams/TemporaryFileStream.cpp rename to src/Formats/TemporaryFileStream.cpp index 4b7c9d50fe7..b19c4aeff35 100644 --- a/src/DataStreams/TemporaryFileStream.cpp +++ b/src/Formats/TemporaryFileStream.cpp @@ -1,6 +1,6 @@ -#include -#include -#include +#include +#include +#include #include #include #include @@ -41,27 +41,4 @@ void TemporaryFileStream::write(const std::string & path, const Block & header, compressed_buf.finalize(); } -TemporaryFileLazySource::TemporaryFileLazySource(const std::string & path_, const Block & header_) - : ISource(header_) - , path(path_) - , done(false) -{} - -Chunk TemporaryFileLazySource::generate() -{ - if (done) - return {}; - - if (!stream) - stream = std::make_unique(path, header); - - auto block = stream->block_in->read(); - if (!block) - { - done = true; - stream.reset(); - } - return Chunk(block.getColumns(), block.rows()); -} - } diff --git a/src/DataStreams/TemporaryFileStream.h b/src/Formats/TemporaryFileStream.h similarity index 63% rename from src/DataStreams/TemporaryFileStream.h rename to src/Formats/TemporaryFileStream.h index e288b5b30fa..5a1e0bc870a 100644 --- a/src/DataStreams/TemporaryFileStream.h +++ b/src/Formats/TemporaryFileStream.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB { @@ -23,22 +23,4 @@ struct TemporaryFileStream static void write(const std::string & path, const Block & header, QueryPipelineBuilder builder, const std::string & codec); }; - -class TemporaryFileLazySource : public ISource -{ -public: - TemporaryFileLazySource(const std::string & path_, const Block & header_); - String getName() const override { return "TemporaryFileLazySource"; } - -protected: - Chunk generate() override; - -private: - const std::string path; - Block header; - bool done; - - std::unique_ptr stream; -}; - } diff --git a/src/DataStreams/formatBlock.cpp b/src/Formats/formatBlock.cpp similarity index 93% rename from src/DataStreams/formatBlock.cpp rename to src/Formats/formatBlock.cpp index dab321be2e1..3284663fc42 100644 --- a/src/DataStreams/formatBlock.cpp +++ b/src/Formats/formatBlock.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/DataStreams/formatBlock.h b/src/Formats/formatBlock.h similarity index 100% rename from src/DataStreams/formatBlock.h rename to src/Formats/formatBlock.h diff --git a/src/Functions/formatRow.cpp b/src/Functions/formatRow.cpp index 20341cbe1dc..ee9696cf34f 100644 --- a/src/Functions/formatRow.cpp +++ b/src/Functions/formatRow.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 63e3577af55..4f4b981b44d 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -9,8 +9,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 975075eba96..6d6bf61834b 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -19,7 +19,7 @@ #include #include -#include +#include #include diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index b4ce9f352a2..7e0fa2ba003 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 2b858512b98..2117eec0063 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 156bbfc2d81..fd1c10e8495 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -23,7 +23,6 @@ #include -#include #include #include diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 07fd6d5b89f..b5790c047f4 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -20,7 +20,7 @@ #include #include -#include +#include #include diff --git a/src/Interpreters/IInterpreter.h b/src/Interpreters/IInterpreter.h index 1b4eada3c9f..665a46190fd 100644 --- a/src/Interpreters/IInterpreter.h +++ b/src/Interpreters/IInterpreter.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 2ebae17cd6b..3cd39ce7912 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 6ffeef5cc7d..24c30a8be30 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index c29eace1b55..487fa2538c2 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 5f44603a420..e5733a8c28b 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index b4ffa15a869..59fd1009381 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,5 +1,3 @@ -#include - #include #include diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index c191a73bc71..30a417f6fa7 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index 78c4eca5ca6..e5e447562c6 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterWatchQuery.h b/src/Interpreters/InterpreterWatchQuery.h index e43ed88af2f..ac167182a71 100644 --- a/src/Interpreters/InterpreterWatchQuery.h +++ b/src/Interpreters/InterpreterWatchQuery.h @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include #include #include #include diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 1fc551334e2..7789c74d596 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -4,8 +4,7 @@ #include #include -#include -#include +#include #include #include #include diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 8782a2f7535..0e2e771255d 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 2e300472647..9597c1ee558 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 118779f1935..3146b6af03f 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index ebe4aba71ab..ebec58dcca7 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -4,8 +4,8 @@ #include #include #include -#include -#include +#include +#include #include diff --git a/src/Interpreters/SortedBlocksWriter.h b/src/Interpreters/SortedBlocksWriter.h index 94bebce88f7..ac58ef2ab7b 100644 --- a/src/Interpreters/SortedBlocksWriter.h +++ b/src/Interpreters/SortedBlocksWriter.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 02dcd95ab41..7cd53442ffd 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/UserDefinedExecutableFunction.cpp b/src/Interpreters/UserDefinedExecutableFunction.cpp index 06830df68e6..d57978d0fd6 100644 --- a/src/Interpreters/UserDefinedExecutableFunction.cpp +++ b/src/Interpreters/UserDefinedExecutableFunction.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include namespace DB diff --git a/src/Interpreters/UserDefinedExecutableFunction.h b/src/Interpreters/UserDefinedExecutableFunction.h index 240422a02ca..1cb1de47578 100644 --- a/src/Interpreters/UserDefinedExecutableFunction.h +++ b/src/Interpreters/UserDefinedExecutableFunction.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp index d6ad2666ff1..cfa1171a84b 100644 --- a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp @@ -2,8 +2,8 @@ #include -#include -#include +#include +#include #include #include diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h index 0ad40dd3332..e7ec52d03cb 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.h +++ b/src/Interpreters/executeDDLQueryOnCluster.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 0a1130c721b..95fb8d38454 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include @@ -49,7 +49,6 @@ #include #include -#include #include #include diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index a2df9baec73..9c561d8b88c 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index dec925d68c1..bf5d30437ec 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -4,8 +4,6 @@ #include #include -#include - #include #include #include diff --git a/src/NOTICE b/src/NOTICE index d0d3efe3f8e..c68280b1529 100644 --- a/src/NOTICE +++ b/src/NOTICE @@ -18,9 +18,6 @@ Common/UInt128.h Core/Block.h Core/Defines.h Core/Settings.h -DataStreams/PushingToViewsBlockOutputStream.cpp -DataStreams/PushingToViewsBlockOutputStream.h -DataStreams/copyData.cpp Databases/DatabasesCommon.cpp IO/WriteBufferValidUTF8.cpp Interpreters/InterpreterAlterQuery.cpp diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 819344e4225..80ed8225c79 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -225,12 +225,12 @@ Block PullingAsyncPipelineExecutor::getExtremesBlock() return header.cloneWithColumns(extremes.detachColumns()); } -BlockStreamProfileInfo & PullingAsyncPipelineExecutor::getProfileInfo() +ProfileInfo & PullingAsyncPipelineExecutor::getProfileInfo() { if (lazy_format) return lazy_format->getProfileInfo(); - static BlockStreamProfileInfo profile_info; + static ProfileInfo profile_info; static std::once_flag flag; /// Calculate rows before limit here to avoid race. std::call_once(flag, []() { profile_info.getRowsBeforeLimit(); }); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.h b/src/Processors/Executors/PullingAsyncPipelineExecutor.h index 2ce75aecab7..7e45246ffd6 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.h +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.h @@ -8,7 +8,7 @@ class QueryPipeline; class Block; class Chunk; class LazyOutputFormat; -struct BlockStreamProfileInfo; +struct ProfileInfo; /// Asynchronous pulling executor for QueryPipeline. /// Always creates extra thread. If query is executed in single thread, use PullingPipelineExecutor. @@ -44,7 +44,7 @@ public: Block getExtremesBlock(); /// Get query profile info. - BlockStreamProfileInfo & getProfileInfo(); + ProfileInfo & getProfileInfo(); /// Internal executor data. struct Data; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 7da2a6d3059..ad7da63b0b1 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -118,7 +118,7 @@ Block PullingPipelineExecutor::getExtremesBlock() return header.cloneWithColumns(extremes.detachColumns()); } -BlockStreamProfileInfo & PullingPipelineExecutor::getProfileInfo() +ProfileInfo & PullingPipelineExecutor::getProfileInfo() { return pulling_format->getProfileInfo(); } diff --git a/src/Processors/Executors/PullingPipelineExecutor.h b/src/Processors/Executors/PullingPipelineExecutor.h index 878d66bd3d4..e05f4f3738d 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.h +++ b/src/Processors/Executors/PullingPipelineExecutor.h @@ -10,7 +10,7 @@ class Chunk; class QueryPipeline; class PipelineExecutor; class PullingOutputFormat; -struct BlockStreamProfileInfo; +struct ProfileInfo; using PipelineExecutorPtr = std::shared_ptr; @@ -46,7 +46,7 @@ public: Block getExtremesBlock(); /// Get query profile info. - BlockStreamProfileInfo & getProfileInfo(); + ProfileInfo & getProfileInfo(); private: std::atomic_bool has_data_flag = false; diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h index 19a94d41044..87caadd93da 100644 --- a/src/Processors/Formats/IRowInputFormat.h +++ b/src/Processors/Formats/IRowInputFormat.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include class Stopwatch; diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index 5daa38967b9..07cf4670981 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index 2c29f55c4f3..82a0cb2fc07 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include namespace DB @@ -25,7 +25,7 @@ public: bool isFinished() { return finished_processing && queue.size() == 0; } - BlockStreamProfileInfo & getProfileInfo() { return info; } + ProfileInfo & getProfileInfo() { return info; } void setRowsBeforeLimit(size_t rows_before_limit) override; @@ -65,7 +65,7 @@ private: /// Is not used. static WriteBuffer out; - BlockStreamProfileInfo info; + ProfileInfo info; std::atomic finished_processing; }; diff --git a/src/Processors/Formats/PullingOutputFormat.h b/src/Processors/Formats/PullingOutputFormat.h index 53b2086712f..a231b7679f3 100644 --- a/src/Processors/Formats/PullingOutputFormat.h +++ b/src/Processors/Formats/PullingOutputFormat.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { @@ -20,7 +20,7 @@ public: Chunk getTotals(); Chunk getExtremes(); - BlockStreamProfileInfo & getProfileInfo() { return info; } + ProfileInfo & getProfileInfo() { return info; } void setRowsBeforeLimit(size_t rows_before_limit) override; @@ -38,7 +38,7 @@ private: std::atomic_bool & has_data_flag; - BlockStreamProfileInfo info; + ProfileInfo info; /// Is not used. static WriteBuffer out; diff --git a/src/Processors/Pipe.h b/src/Processors/Pipe.h index a07c68f56b2..3341734430c 100644 --- a/src/Processors/Pipe.h +++ b/src/Processors/Pipe.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 8583e5be485..c7d67c75894 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index 8d20c764e8a..e20c28e10f4 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h index 45077d78a90..1079bed5398 100644 --- a/src/Processors/QueryPlan/CubeStep.h +++ b/src/Processors/QueryPlan/CubeStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include namespace DB diff --git a/src/Processors/QueryPlan/DistinctStep.h b/src/Processors/QueryPlan/DistinctStep.h index b08e93dffa9..a48a779425d 100644 --- a/src/Processors/QueryPlan/DistinctStep.h +++ b/src/Processors/QueryPlan/DistinctStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/MergeSortingStep.h b/src/Processors/QueryPlan/MergeSortingStep.h index d5daa041256..947ced829c6 100644 --- a/src/Processors/QueryPlan/MergeSortingStep.h +++ b/src/Processors/QueryPlan/MergeSortingStep.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include namespace DB diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.h b/src/Processors/QueryPlan/MergingAggregatedStep.h index 9171512571a..eeead41b5f9 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.h +++ b/src/Processors/QueryPlan/MergingAggregatedStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/MergingSortedStep.h b/src/Processors/QueryPlan/MergingSortedStep.h index 5d27e59ab76..e886de42ca8 100644 --- a/src/Processors/QueryPlan/MergingSortedStep.h +++ b/src/Processors/QueryPlan/MergingSortedStep.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include namespace DB diff --git a/src/Processors/QueryPlan/OffsetStep.h b/src/Processors/QueryPlan/OffsetStep.h index 488c55b6460..f16559bcfad 100644 --- a/src/Processors/QueryPlan/OffsetStep.h +++ b/src/Processors/QueryPlan/OffsetStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/PartialSortingStep.h b/src/Processors/QueryPlan/PartialSortingStep.h index bd8fd30ce02..9b7b8e8baa5 100644 --- a/src/Processors/QueryPlan/PartialSortingStep.h +++ b/src/Processors/QueryPlan/PartialSortingStep.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index cd2f42ece58..399e7d01839 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/RollupStep.h b/src/Processors/QueryPlan/RollupStep.h index 2ff3040d7a7..7cd71fecdc1 100644 --- a/src/Processors/QueryPlan/RollupStep.h +++ b/src/Processors/QueryPlan/RollupStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h index b36ddfb3768..a8d1eef4b08 100644 --- a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h +++ b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h new file mode 100644 index 00000000000..30cf958c072 --- /dev/null +++ b/src/Processors/Sinks/RemoteSink.h @@ -0,0 +1,27 @@ +#pragma once +#include +#include + +namespace DB +{ + +class RemoteSink final : public RemoteInserter, public SinkToStorage +{ +public: + explicit RemoteSink( + Connection & connection_, + const ConnectionTimeouts & timeouts, + const String & query_, + const Settings & settings_, + const ClientInfo & client_info_) + : RemoteInserter(connection_, timeouts, query_, settings_, client_info_) + , SinkToStorage(RemoteInserter::getHeader()) + { + } + + String getName() const override { return "RemoteSink"; } + void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } + void onFinish() override { RemoteInserter::onFinish(); } +}; + +} diff --git a/src/Formats/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp similarity index 99% rename from src/Formats/MySQLSource.cpp rename to src/Processors/Sources/MySQLSource.cpp index 069aa0cb56b..5bda662466c 100644 --- a/src/Formats/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -19,7 +19,7 @@ #include #include #include -#include "MySQLSource.h" +#include namespace DB diff --git a/src/Formats/MySQLSource.h b/src/Processors/Sources/MySQLSource.h similarity index 100% rename from src/Formats/MySQLSource.h rename to src/Processors/Sources/MySQLSource.h diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index bf3ef32214d..99ba459cf2c 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -1,6 +1,6 @@ #include -#include -#include +#include +#include #include #include @@ -56,7 +56,7 @@ std::optional RemoteSource::tryGenerate() query_executor->setProgressCallback([this](const Progress & value) { progress(value); }); /// Get rows_before_limit result for remote query from ProfileInfo packet. - query_executor->setProfileInfoCallback([this](const BlockStreamProfileInfo & info) + query_executor->setProfileInfoCallback([this](const ProfileInfo & info) { if (rows_before_limit && info.hasAppliedLimit()) rows_before_limit->set(info.getRowsBeforeLimit()); diff --git a/src/DataStreams/SQLiteSource.cpp b/src/Processors/Sources/SQLiteSource.cpp similarity index 100% rename from src/DataStreams/SQLiteSource.cpp rename to src/Processors/Sources/SQLiteSource.cpp diff --git a/src/DataStreams/SQLiteSource.h b/src/Processors/Sources/SQLiteSource.h similarity index 100% rename from src/DataStreams/SQLiteSource.h rename to src/Processors/Sources/SQLiteSource.h diff --git a/src/DataStreams/ShellCommandSource.h b/src/Processors/Sources/ShellCommandSource.h similarity index 100% rename from src/DataStreams/ShellCommandSource.h rename to src/Processors/Sources/ShellCommandSource.h diff --git a/src/Processors/Sources/SourceWithProgress.h b/src/Processors/Sources/SourceWithProgress.h index bf57c3b013b..912a548f977 100644 --- a/src/Processors/Sources/SourceWithProgress.h +++ b/src/Processors/Sources/SourceWithProgress.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include diff --git a/src/Processors/Sources/TemporaryFileLazySource.cpp b/src/Processors/Sources/TemporaryFileLazySource.cpp new file mode 100644 index 00000000000..0382229a7c0 --- /dev/null +++ b/src/Processors/Sources/TemporaryFileLazySource.cpp @@ -0,0 +1,32 @@ +#include +#include + +namespace DB +{ + +TemporaryFileLazySource::~TemporaryFileLazySource() = default; + +TemporaryFileLazySource::TemporaryFileLazySource(const std::string & path_, const Block & header_) + : ISource(header_) + , path(path_) + , done(false) +{} + +Chunk TemporaryFileLazySource::generate() +{ + if (done) + return {}; + + if (!stream) + stream = std::make_unique(path, header); + + auto block = stream->block_in->read(); + if (!block) + { + done = true; + stream.reset(); + } + return Chunk(block.getColumns(), block.rows()); +} + +} diff --git a/src/Processors/Sources/TemporaryFileLazySource.h b/src/Processors/Sources/TemporaryFileLazySource.h new file mode 100644 index 00000000000..b2e9d5d5500 --- /dev/null +++ b/src/Processors/Sources/TemporaryFileLazySource.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace DB +{ + +struct TemporaryFileStream; + +class TemporaryFileLazySource : public ISource +{ +public: + TemporaryFileLazySource(const std::string & path_, const Block & header_); + ~TemporaryFileLazySource() override; + String getName() const override { return "TemporaryFileLazySource"; } + +protected: + Chunk generate() override; + +private: + const std::string path; + Block header; + bool done; + + std::unique_ptr stream; +}; + +} diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 9011d188b81..bf3cafd6ff5 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -1,10 +1,9 @@ #include -#include +#include #include #include #include -#include #include namespace ProfileEvents diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index e6ae620e69b..fb3c8d6a87b 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -7,7 +7,6 @@ #include #include -#include namespace DB diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index a5a67e99afc..8e7a09b320d 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include diff --git a/src/Processors/Transforms/DistinctTransform.h b/src/Processors/Transforms/DistinctTransform.h index 236f9026c63..d80fdb5bc22 100644 --- a/src/Processors/Transforms/DistinctTransform.h +++ b/src/Processors/Transforms/DistinctTransform.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Processors/Transforms/LimitsCheckingTransform.h b/src/Processors/Transforms/LimitsCheckingTransform.h index 9de5cbf5125..50891ece654 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/src/Processors/Transforms/LimitsCheckingTransform.h @@ -1,10 +1,10 @@ #pragma once #include -#include +#include #include #include -#include +#include namespace DB { diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index f13d5376ebe..abf416e8047 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index 6e379a3c4ba..e272fd0f183 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -6,8 +6,8 @@ #include #include #include -#include -#include +#include +#include #include diff --git a/src/Processors/Transforms/SortingTransform.cpp b/src/Processors/Transforms/SortingTransform.cpp index 2c9098adaa6..eeb576731ab 100644 --- a/src/Processors/Transforms/SortingTransform.cpp +++ b/src/Processors/Transforms/SortingTransform.cpp @@ -9,8 +9,8 @@ #include #include -#include -#include +#include +#include namespace ProfileEvents diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 75a799e5af1..45a0f33d666 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index f30058fadb9..0b7797da24f 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include namespace DB @@ -29,6 +29,25 @@ void finalizeChunk(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } +void finalizeBlock(Block & block) +{ + for (size_t i = 0; i < block.columns(); ++i) + { + ColumnWithTypeAndName & current = block.getByPosition(i); + const DataTypeAggregateFunction * unfinalized_type = typeid_cast(current.type.get()); + + if (unfinalized_type) + { + current.type = unfinalized_type->getReturnType(); + if (current.column) + { + auto mut_column = IColumn::mutate(std::move(current.column)); + current.column = ColumnAggregateFunction::convertToValues(std::move(mut_column)); + } + } + } +} + Block TotalsHavingTransform::transformHeader( Block block, const ActionsDAG * expression, diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index a1cce03a1a5..57cfa6c01b8 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/DataStreams/BlockIO.cpp b/src/QueryPipeline/BlockIO.cpp similarity index 97% rename from src/DataStreams/BlockIO.cpp rename to src/QueryPipeline/BlockIO.cpp index 692b69388ea..671ba6e4c39 100644 --- a/src/DataStreams/BlockIO.cpp +++ b/src/QueryPipeline/BlockIO.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB diff --git a/src/DataStreams/BlockIO.h b/src/QueryPipeline/BlockIO.h similarity index 100% rename from src/DataStreams/BlockIO.h rename to src/QueryPipeline/BlockIO.h diff --git a/src/DataStreams/CMakeLists.txt b/src/QueryPipeline/CMakeLists.txt similarity index 100% rename from src/DataStreams/CMakeLists.txt rename to src/QueryPipeline/CMakeLists.txt diff --git a/src/DataStreams/ConnectionCollector.cpp b/src/QueryPipeline/ConnectionCollector.cpp similarity index 98% rename from src/DataStreams/ConnectionCollector.cpp rename to src/QueryPipeline/ConnectionCollector.cpp index 8e700c0ab7f..65b030fd623 100644 --- a/src/DataStreams/ConnectionCollector.cpp +++ b/src/QueryPipeline/ConnectionCollector.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/DataStreams/ConnectionCollector.h b/src/QueryPipeline/ConnectionCollector.h similarity index 100% rename from src/DataStreams/ConnectionCollector.h rename to src/QueryPipeline/ConnectionCollector.h diff --git a/src/DataStreams/ExecutionSpeedLimits.cpp b/src/QueryPipeline/ExecutionSpeedLimits.cpp similarity index 99% rename from src/DataStreams/ExecutionSpeedLimits.cpp rename to src/QueryPipeline/ExecutionSpeedLimits.cpp index e340ee71ab0..2738903cedb 100644 --- a/src/DataStreams/ExecutionSpeedLimits.cpp +++ b/src/QueryPipeline/ExecutionSpeedLimits.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/DataStreams/ExecutionSpeedLimits.h b/src/QueryPipeline/ExecutionSpeedLimits.h similarity index 95% rename from src/DataStreams/ExecutionSpeedLimits.h rename to src/QueryPipeline/ExecutionSpeedLimits.h index b8c320bb005..63658462c9f 100644 --- a/src/DataStreams/ExecutionSpeedLimits.h +++ b/src/QueryPipeline/ExecutionSpeedLimits.h @@ -2,7 +2,7 @@ #include #include -#include +#include class Stopwatch; diff --git a/src/DataStreams/BlockStreamProfileInfo.cpp b/src/QueryPipeline/ProfileInfo.cpp similarity index 67% rename from src/DataStreams/BlockStreamProfileInfo.cpp rename to src/QueryPipeline/ProfileInfo.cpp index 9a06d905223..32986f7259c 100644 --- a/src/DataStreams/BlockStreamProfileInfo.cpp +++ b/src/QueryPipeline/ProfileInfo.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -8,7 +8,7 @@ namespace DB { -void BlockStreamProfileInfo::read(ReadBuffer & in) +void ProfileInfo::read(ReadBuffer & in) { readVarUInt(rows, in); readVarUInt(blocks, in); @@ -19,7 +19,7 @@ void BlockStreamProfileInfo::read(ReadBuffer & in) } -void BlockStreamProfileInfo::write(WriteBuffer & out) const +void ProfileInfo::write(WriteBuffer & out) const { writeVarUInt(rows, out); writeVarUInt(blocks, out); @@ -30,7 +30,7 @@ void BlockStreamProfileInfo::write(WriteBuffer & out) const } -void BlockStreamProfileInfo::setFrom(const BlockStreamProfileInfo & rhs, bool skip_block_size_info) +void ProfileInfo::setFrom(const ProfileInfo & rhs, bool skip_block_size_info) { if (!skip_block_size_info) { @@ -44,24 +44,24 @@ void BlockStreamProfileInfo::setFrom(const BlockStreamProfileInfo & rhs, bool sk } -size_t BlockStreamProfileInfo::getRowsBeforeLimit() const +size_t ProfileInfo::getRowsBeforeLimit() const { return rows_before_limit; } -bool BlockStreamProfileInfo::hasAppliedLimit() const +bool ProfileInfo::hasAppliedLimit() const { return applied_limit; } -void BlockStreamProfileInfo::update(Block & block) +void ProfileInfo::update(Block & block) { update(block.rows(), block.bytes()); } -void BlockStreamProfileInfo::update(size_t num_rows, size_t num_bytes) +void ProfileInfo::update(size_t num_rows, size_t num_bytes) { ++blocks; rows += num_rows; diff --git a/src/DataStreams/BlockStreamProfileInfo.h b/src/QueryPipeline/ProfileInfo.h similarity index 90% rename from src/DataStreams/BlockStreamProfileInfo.h rename to src/QueryPipeline/ProfileInfo.h index 1707b941445..335092ce244 100644 --- a/src/DataStreams/BlockStreamProfileInfo.h +++ b/src/QueryPipeline/ProfileInfo.h @@ -13,7 +13,7 @@ class ReadBuffer; class WriteBuffer; /// Information for profiling. See IBlockInputStream.h -struct BlockStreamProfileInfo +struct ProfileInfo { bool started = false; Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time @@ -22,7 +22,7 @@ struct BlockStreamProfileInfo size_t blocks = 0; size_t bytes = 0; - using BlockStreamProfileInfos = std::vector; + using ProfileInfos = std::vector; /** Get the number of rows if there were no LIMIT. * If there is no LIMIT, 0 is returned. @@ -42,7 +42,7 @@ struct BlockStreamProfileInfo /// Sets main fields from other object (see methods above). /// If skip_block_size_info if true, then rows, bytes and block fields are ignored. - void setFrom(const BlockStreamProfileInfo & rhs, bool skip_block_size_info); + void setFrom(const ProfileInfo & rhs, bool skip_block_size_info); /// Only for Processors. void setRowsBeforeLimit(size_t rows_before_limit_) diff --git a/src/DataStreams/RemoteBlockOutputStream.cpp b/src/QueryPipeline/RemoteInserter.cpp similarity index 98% rename from src/DataStreams/RemoteBlockOutputStream.cpp rename to src/QueryPipeline/RemoteInserter.cpp index 7642098ff0c..c34c625dc6d 100644 --- a/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/src/QueryPipeline/RemoteInserter.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/DataStreams/RemoteBlockOutputStream.h b/src/QueryPipeline/RemoteInserter.h similarity index 56% rename from src/DataStreams/RemoteBlockOutputStream.h rename to src/QueryPipeline/RemoteInserter.h index f1f49015c9d..0688b555825 100644 --- a/src/DataStreams/RemoteBlockOutputStream.h +++ b/src/QueryPipeline/RemoteInserter.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -44,23 +43,4 @@ private: bool finished = false; }; -class RemoteSink final : public RemoteInserter, public SinkToStorage -{ -public: - explicit RemoteSink( - Connection & connection_, - const ConnectionTimeouts & timeouts, - const String & query_, - const Settings & settings_, - const ClientInfo & client_info_) - : RemoteInserter(connection_, timeouts, query_, settings_, client_info_) - , SinkToStorage(RemoteInserter::getHeader()) - { - } - - String getName() const override { return "RemoteSink"; } - void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } - void onFinish() override { RemoteInserter::onFinish(); } -}; - } diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp similarity index 99% rename from src/DataStreams/RemoteQueryExecutor.cpp rename to src/QueryPipeline/RemoteQueryExecutor.cpp index 3c78fddfd39..23fe3a5f220 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -1,6 +1,6 @@ -#include -#include -#include +#include +#include +#include #include #include diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h similarity index 98% rename from src/DataStreams/RemoteQueryExecutor.h rename to src/QueryPipeline/RemoteQueryExecutor.h index d82f9983894..b7a2509ea97 100644 --- a/src/DataStreams/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -21,8 +21,8 @@ using ThrottlerPtr = std::shared_ptr; struct Progress; using ProgressCallback = std::function; -struct BlockStreamProfileInfo; -using ProfileInfoCallback = std::function; +struct ProfileInfo; +using ProfileInfoCallback = std::function; class RemoteQueryExecutorReadContext; diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp similarity index 99% rename from src/DataStreams/RemoteQueryExecutorReadContext.cpp rename to src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 6bdf52d2831..5f7b5e24967 100644 --- a/src/DataStreams/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -1,6 +1,6 @@ #if defined(OS_LINUX) -#include +#include #include #include #include diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.h b/src/QueryPipeline/RemoteQueryExecutorReadContext.h similarity index 100% rename from src/DataStreams/RemoteQueryExecutorReadContext.h rename to src/QueryPipeline/RemoteQueryExecutorReadContext.h diff --git a/src/DataStreams/SizeLimits.cpp b/src/QueryPipeline/SizeLimits.cpp similarity index 97% rename from src/DataStreams/SizeLimits.cpp rename to src/QueryPipeline/SizeLimits.cpp index 06dde923e55..90005902f67 100644 --- a/src/DataStreams/SizeLimits.cpp +++ b/src/QueryPipeline/SizeLimits.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/DataStreams/SizeLimits.h b/src/QueryPipeline/SizeLimits.h similarity index 100% rename from src/DataStreams/SizeLimits.h rename to src/QueryPipeline/SizeLimits.h diff --git a/src/DataStreams/StreamLocalLimits.h b/src/QueryPipeline/StreamLocalLimits.h similarity index 91% rename from src/DataStreams/StreamLocalLimits.h rename to src/QueryPipeline/StreamLocalLimits.h index efda6a941cc..7f49a5d0b07 100644 --- a/src/DataStreams/StreamLocalLimits.h +++ b/src/QueryPipeline/StreamLocalLimits.h @@ -1,6 +1,6 @@ #pragma once -#include -#include +#include +#include namespace DB { diff --git a/src/DataStreams/examples/CMakeLists.txt b/src/QueryPipeline/examples/CMakeLists.txt similarity index 100% rename from src/DataStreams/examples/CMakeLists.txt rename to src/QueryPipeline/examples/CMakeLists.txt diff --git a/src/DataStreams/narrowBlockInputStreams.cpp b/src/QueryPipeline/narrowBlockInputStreams.cpp similarity index 100% rename from src/DataStreams/narrowBlockInputStreams.cpp rename to src/QueryPipeline/narrowBlockInputStreams.cpp diff --git a/src/DataStreams/narrowBlockInputStreams.h b/src/QueryPipeline/narrowBlockInputStreams.h similarity index 100% rename from src/DataStreams/narrowBlockInputStreams.h rename to src/QueryPipeline/narrowBlockInputStreams.h diff --git a/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp similarity index 100% rename from src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp rename to src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp diff --git a/src/DataStreams/tests/gtest_check_sorted_stream.cpp b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp similarity index 100% rename from src/DataStreams/tests/gtest_check_sorted_stream.cpp rename to src/QueryPipeline/tests/gtest_check_sorted_stream.cpp diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 5d62a295dad..5b727253dff 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -9,8 +9,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -595,7 +594,7 @@ namespace void addProgressToResult(); void addTotalsToResult(const Block & totals); void addExtremesToResult(const Block & extremes); - void addProfileInfoToResult(const BlockStreamProfileInfo & info); + void addProfileInfoToResult(const ProfileInfo & info); void addLogsToResult(); void sendResult(); void throwIfFailedToSendResult(); @@ -1381,7 +1380,7 @@ namespace format->doWriteSuffix(); } - void Call::addProfileInfoToResult(const BlockStreamProfileInfo & info) + void Call::addProfileInfoToResult(const ProfileInfo & info) { auto & stats = *result.mutable_stats(); stats.set_rows(info.rows); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index f3247e7bc2b..4e8291f5281 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -16,8 +16,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -772,7 +772,7 @@ void TCPHandler::sendReadTaskRequestAssumeLocked() out->next(); } -void TCPHandler::sendProfileInfo(const BlockStreamProfileInfo & info) +void TCPHandler::sendProfileInfo(const ProfileInfo & info) { writeVarUInt(Protocol::Server::ProfileInfo, *out); info.write(*out); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index e89d82cfcc8..cb14323906b 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -9,10 +9,10 @@ #include #include #include -#include +#include #include #include -#include +#include #include "IServer.h" @@ -30,7 +30,7 @@ namespace DB class Session; struct Settings; class ColumnsDescription; -struct BlockStreamProfileInfo; +struct ProfileInfo; /// State of query processing. struct QueryState @@ -224,7 +224,7 @@ private: void sendEndOfStream(); void sendPartUUIDs(); void sendReadTaskRequestAssumeLocked(); - void sendProfileInfo(const BlockStreamProfileInfo & info); + void sendProfileInfo(const ProfileInfo & info); void sendTotals(const Block & totals); void sendExtremes(const Block & extremes); diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 167e36ebbe3..254d82520dc 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 1841be22b72..cf349d1f8cf 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -14,8 +14,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/src/Storages/MarkCache.h b/src/Storages/MarkCache.h index ccf8a2e606d..06143e954f8 100644 --- a/src/Storages/MarkCache.h +++ b/src/Storages/MarkCache.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace ProfileEvents diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 9e09cd0036e..a1df4a13e6a 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 7624dc303e0..4fec5ce46bc 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -1,7 +1,7 @@ #pragma once -#include -#include +#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index da79b917b5a..d128ecb9e27 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index df92b270542..1dd75f09b92 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -4,7 +4,7 @@ #include -#include +#include #include #include diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 0f47f654428..21784952c23 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 0d17e003ce4..74df17f1463 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 6492c9e07c0..38db0b61e8d 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 3bdf3218b2e..a1724af38cb 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index d95a9465bd6..c94c519c3b9 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 0e7faad194e..8f7654821cb 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -23,11 +23,11 @@ #include #include #include -#include +#include #include #include "Processors/Sources/SourceWithProgress.h" #include -#include +#include #include #include #include diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index d7eef35e60b..4e2c6cfbe10 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -3,7 +3,7 @@ #if USE_SQLITE #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 2547af1b0ad..77231ce49fd 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -4,9 +4,9 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -215,7 +215,7 @@ void StorageSetOrJoinBase::restoreFromFile(const String & file_path) CompressedReadBuffer compressed_backup_buf(*backup_buf); NativeReader backup_stream(compressed_backup_buf, 0); - BlockStreamProfileInfo info; + ProfileInfo info; while (Block block = backup_stream.read()) { info.update(block); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 0cd07afc26c..66f49761793 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -14,8 +14,8 @@ #include #include -#include -#include +#include +#include #include diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 639692beda5..532abb8e2f3 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index 005a689f895..80f108eb68a 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 7bd8ad2e740..7e28decfdb0 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include From da45d55e22b141eb3764ef9eb9812ecaf258e641 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 15 Oct 2021 23:18:37 +0300 Subject: [PATCH 265/438] Remove DataStreams folder. --- utils/wal-dump/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/wal-dump/main.cpp b/utils/wal-dump/main.cpp index 0e47c39fb5a..3566936324b 100644 --- a/utils/wal-dump/main.cpp +++ b/utils/wal-dump/main.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include From 6f682d54b2ee0a27465dcebf23c43f4ee1934276 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 23:21:18 +0300 Subject: [PATCH 266/438] Fixed test --- src/Dictionaries/HashedDictionary.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index eec7cec0285..917eb4cbde3 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -664,10 +664,7 @@ Pipe HashedDictionary::read(const Names & column_na }); } - if constexpr (dictionary_key_type == DictionaryKeyType::Simple) - return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); - else - return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); + return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); } template From f3cbac79d343c0fa4e697fbbd40bc7ca0346eeb2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 15 Oct 2021 23:22:24 +0300 Subject: [PATCH 267/438] Try fix integration test. --- src/DataStreams/BlockStreamProfileInfo.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/DataStreams/BlockStreamProfileInfo.cpp b/src/DataStreams/BlockStreamProfileInfo.cpp index 9a06d905223..05c1ac96db0 100644 --- a/src/DataStreams/BlockStreamProfileInfo.cpp +++ b/src/DataStreams/BlockStreamProfileInfo.cpp @@ -46,12 +46,14 @@ void BlockStreamProfileInfo::setFrom(const BlockStreamProfileInfo & rhs, bool sk size_t BlockStreamProfileInfo::getRowsBeforeLimit() const { + calculated_rows_before_limit = true; return rows_before_limit; } bool BlockStreamProfileInfo::hasAppliedLimit() const { + calculated_rows_before_limit = true; return applied_limit; } From a35d7096d8117d1182dcf0d4951d3ab781fbc84d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 15 Oct 2021 23:26:09 +0300 Subject: [PATCH 268/438] Added concept HasIndexOperator --- src/Functions/FunctionsJSON.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 9558b856511..cb55ba6b83b 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -58,12 +58,11 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -template -struct HasIndexOperator : std::false_type {}; - -template -struct HasIndexOperator()[0])>> : std::true_type {}; - +template +concept HasIndexOperator = requires (T t) +{ + t[0]; +}; /// Functions to parse JSONs and extract values from it. /// The first argument of all these functions gets a JSON, @@ -285,7 +284,7 @@ private: return true; } - if constexpr (HasIndexOperator::value) + if constexpr (HasIndexOperator) { if (element.isObject()) { From 41acc524581782d54c6985915c5381f1badaef18 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Oct 2021 00:12:51 +0300 Subject: [PATCH 269/438] Preparation to build with Musl --- base/base/LineReader.cpp | 4 +++ .../include/jemalloc/jemalloc_defs.h | 6 +++-- .../include/jemalloc/jemalloc_protos.h | 2 +- .../internal/jemalloc_internal_defs.h.in | 14 +++++----- contrib/libcxx-cmake/CMakeLists.txt | 4 +++ .../linux_x86_64/include/portable.h | 4 ++- src/Common/QueryProfiler.cpp | 8 +++--- src/Common/malloc.cpp | 4 +++ src/IO/BitHelpers.h | 26 +++++-------------- 9 files changed, 40 insertions(+), 32 deletions(-) diff --git a/base/base/LineReader.cpp b/base/base/LineReader.cpp index 8600f4c7b65..d325154ee61 100644 --- a/base/base/LineReader.cpp +++ b/base/base/LineReader.cpp @@ -5,6 +5,10 @@ #include #include +#include +#include +#include + #ifdef OS_LINUX /// We can detect if code is linked with one or another readline variants or open the library dynamically. diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h index 6a03a231a0e..0aa4033f859 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h @@ -18,8 +18,10 @@ * Define overrides for non-standard allocator-related functions if they are * present on the system. */ -#define JEMALLOC_OVERRIDE_MEMALIGN -#define JEMALLOC_OVERRIDE_VALLOC +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE_MEMALIGN + #define JEMALLOC_OVERRIDE_VALLOC +#endif /* * At least Linux omits the "const" in: diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h index 8506237729d..2e35e7b6249 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h @@ -1,6 +1,6 @@ // OSX does not have this for system alloc functions, so you will get // "exception specification in declaration" error. -#if defined(__APPLE__) || defined(__FreeBSD__) +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(USE_MUSL) # undef JEMALLOC_NOTHROW # define JEMALLOC_NOTHROW diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index d5cf0e719ef..44ff2d9fad1 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -13,12 +13,14 @@ * Define overrides for non-standard allocator-related functions if they are * present on the system. */ -#define JEMALLOC_OVERRIDE___LIBC_CALLOC -#define JEMALLOC_OVERRIDE___LIBC_FREE -#define JEMALLOC_OVERRIDE___LIBC_MALLOC -#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN -#define JEMALLOC_OVERRIDE___LIBC_REALLOC -#define JEMALLOC_OVERRIDE___LIBC_VALLOC +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif /* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ /* diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 0cfb4191619..ac67f2563a3 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -56,6 +56,10 @@ if (USE_UNWIND) target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) endif () +if (USE_MUSL) + target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1) +endif () + # Override the deduced attribute support that causes error. if (OS_DARWIN AND COMPILER_GCC) add_compile_definitions(_LIBCPP_INIT_PRIORITY_MAX) diff --git a/contrib/openldap-cmake/linux_x86_64/include/portable.h b/contrib/openldap-cmake/linux_x86_64/include/portable.h index 2924b6713a4..ab7052bda91 100644 --- a/contrib/openldap-cmake/linux_x86_64/include/portable.h +++ b/contrib/openldap-cmake/linux_x86_64/include/portable.h @@ -98,7 +98,9 @@ #define HAVE_BCOPY 1 /* Define to 1 if you have the header file. */ -#define HAVE_BITS_TYPES_H 1 +#if !defined(USE_MUSL) + #define HAVE_BITS_TYPES_H 1 +#endif /* Define to 1 if you have the `chroot' function. */ #define HAVE_CHROOT 1 diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 7b905937e11..aa40226093a 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -124,11 +124,13 @@ QueryProfilerBase::QueryProfilerBase(const UInt64 thread_id, const sev.sigev_notify = SIGEV_THREAD_ID; sev.sigev_signo = pause_signal; -# if defined(OS_FREEBSD) +#if defined(OS_FREEBSD) sev._sigev_un._threadid = thread_id; -# else +#elif defined(USE_MUSL) + sev.sigev_notify_thread_id = thread_id; +#else sev._sigev_un._tid = thread_id; -# endif +#endif if (timer_create(clock_type, &sev, &timer_id)) { /// In Google Cloud Run, the function "timer_create" is implemented incorrectly as of 2020-01-25. diff --git a/src/Common/malloc.cpp b/src/Common/malloc.cpp index ec472d5d1d6..88281d9c80e 100644 --- a/src/Common/malloc.cpp +++ b/src/Common/malloc.cpp @@ -17,7 +17,9 @@ extern "C" void *aligned_alloc(size_t alignment, size_t size); void *valloc(size_t size); void *memalign(size_t alignment, size_t size); +#if !defined(USE_MUSL) void *pvalloc(size_t size); +#endif } #pragma GCC diagnostic pop @@ -39,6 +41,8 @@ static void dummyFunctionForInterposing() ignore(aligned_alloc(0, 0)); // -V575 NOLINT ignore(valloc(0)); // -V575 NOLINT ignore(memalign(0, 0)); // -V575 NOLINT +#if !defined(USE_MUSL) ignore(pvalloc(0)); // -V575 NOLINT +#endif } #endif diff --git a/src/IO/BitHelpers.h b/src/IO/BitHelpers.h index bcc36305021..d15297637a3 100644 --- a/src/IO/BitHelpers.h +++ b/src/IO/BitHelpers.h @@ -7,17 +7,6 @@ #include #include -#if defined(__OpenBSD__) || defined(__FreeBSD__) || defined (__ANDROID__) -# include -#elif defined(__sun) -# include -#elif defined(__APPLE__) -# include - -# define htobe64(x) OSSwapHostToBigInt64(x) -# define be64toh(x) OSSwapBigToHostInt64(x) -#endif - namespace DB { @@ -152,7 +141,7 @@ private: memcpy(&tmp_buffer, source_current, bytes_to_read); source_current += bytes_to_read; - tmp_buffer = be64toh(tmp_buffer); + tmp_buffer = __builtin_bswap64(tmp_buffer); bits_buffer |= BufferType(tmp_buffer) << ((sizeof(BufferType) - sizeof(tmp_buffer)) * 8 - bits_count); bits_count += static_cast(bytes_to_read) * 8; @@ -200,7 +189,7 @@ public: capacity = BIT_BUFFER_SIZE - bits_count; } -// write low bits of value as high bits of bits_buffer + // write low bits of value as high bits of bits_buffer const UInt64 mask = maskLowBits(bits_to_write); BufferType v = value & mask; v <<= capacity - bits_to_write; @@ -212,7 +201,7 @@ public: // flush contents of bits_buffer to the dest_current, partial bytes are completed with zeroes. inline void flush() { - bits_count = (bits_count + 8 - 1) & ~(8 - 1); // align UP to 8-bytes, so doFlush will write ALL data from bits_buffer + bits_count = (bits_count + 8 - 1) & ~(8 - 1); // align up to 8-bytes, so doFlush will write all data from bits_buffer while (bits_count != 0) doFlush(); } @@ -231,13 +220,12 @@ private: if (available < to_write) { - throw Exception("Can not write past end of buffer. Space available " - + std::to_string(available) + " bytes, required to write: " - + std::to_string(to_write) + ".", - ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER); + throw Exception(ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER, + "Can not write past end of buffer. Space available {} bytes, required to write {} bytes.", + available, to_write); } - const auto tmp_buffer = htobe64(static_cast(bits_buffer >> (sizeof(bits_buffer) - sizeof(UInt64)) * 8)); + const auto tmp_buffer = __builtin_bswap64(static_cast(bits_buffer >> (sizeof(bits_buffer) - sizeof(UInt64)) * 8)); memcpy(dest_current, &tmp_buffer, to_write); dest_current += to_write; From 0b3bf43d619ff4e753422a8cb0fbdff0d4815662 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 Sep 2021 23:56:24 +0300 Subject: [PATCH 270/438] Rewrite MergeTreeData::clearOldTemporaryDirectories() to use early continue --- src/Storages/MergeTree/MergeTreeData.cpp | 47 +++++++++++++----------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 790b95a9fa9..37e20204813 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1209,35 +1209,38 @@ void MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifet { for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) { - if (startsWith(it->name(), "tmp_")) + const std::string & basename = it->name(); + if (!startsWith(basename, "tmp_")) { - try + continue; + } + + try + { + if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline)) { - if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline)) - { - LOG_WARNING(log, "Removing temporary directory {}", fullPath(disk, it->path())); - disk->removeRecursive(it->path()); - } + LOG_WARNING(log, "Removing temporary directory {}", fullPath(disk, it->path())); + disk->removeRecursive(it->path()); } - /// see getModificationTime() - catch (const ErrnoException & e) + } + /// see getModificationTime() + catch (const ErrnoException & e) + { + if (e.getErrno() == ENOENT) { - if (e.getErrno() == ENOENT) - { - /// If the file is already deleted, do nothing. - } - else - throw; + /// If the file is already deleted, do nothing. } - catch (const fs::filesystem_error & e) + else + throw; + } + catch (const fs::filesystem_error & e) + { + if (e.code() == std::errc::no_such_file_or_directory) { - if (e.code() == std::errc::no_such_file_or_directory) - { - /// If the file is already deleted, do nothing. - } - else - throw; + /// If the file is already deleted, do nothing. } + else + throw; } } } From 07e8b2b3c7bfe6181287607629002e8aa6bd354c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 Sep 2021 00:16:09 +0300 Subject: [PATCH 271/438] Do not try to remove temporary paths that is currently in written by merge/mutation v2: rebase against MergeTask v3: rebase due to conflicts in src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp v4: - rebase due to conflicts in src/Storages/MergeTree/MergeTask.cpp - drop common/scope_guard_safe.h (not used) --- src/Storages/MergeTree/MergeTask.cpp | 20 ++++++++++++++--- src/Storages/MergeTree/MergeTask.h | 3 +++ src/Storages/MergeTree/MergeTreeData.cpp | 10 +++++++-- src/Storages/MergeTree/MergeTreeData.h | 3 ++- .../MergeTree/MergeTreeDataMergerMutator.cpp | 7 ++++++ .../MergeTree/MergeTreeDataMergerMutator.h | 22 +++++++++++++++++++ .../ReplicatedMergeTreeCleanupThread.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 4 ++-- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 9 files changed, 63 insertions(+), 10 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index aa3f91a4f00..2e123d849db 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -10,6 +10,7 @@ #include "Storages/MergeTree/IMergeTreeDataPart.h" #include "Storages/MergeTree/MergeTreeSequentialSource.h" #include "Storages/MergeTree/FutureMergedMutatedPart.h" +#include "Storages/MergeTree/MergeTreeDataMergerMutator.h" #include "Processors/Transforms/ExpressionTransform.h" #include "Processors/Transforms/MaterializingTransform.h" #include "Processors/Merges/MergingSortedTransform.h" @@ -117,11 +118,23 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() } ctx->disk = global_ctx->space_reservation->getDisk(); - auto local_new_part_relative_tmp_path_name = local_tmp_prefix + global_ctx->future_part->name + local_tmp_suffix; - auto local_new_part_tmp_path = global_ctx->data->relative_data_path + local_new_part_relative_tmp_path_name + "/"; + + String local_part_path = global_ctx->data->relative_data_path; + String local_tmp_part_basename = local_tmp_prefix + global_ctx->future_part->name + (global_ctx->parent_part ? ".proj" : ""); + String local_new_part_tmp_path = local_part_path + local_tmp_part_basename + "/"; + if (ctx->disk->exists(local_new_part_tmp_path)) throw Exception("Directory " + fullPath(ctx->disk, local_new_part_tmp_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + { + std::lock_guard lock(global_ctx->mutator->tmp_parts_lock); + global_ctx->mutator->tmp_parts.emplace(local_tmp_part_basename); + } + SCOPE_EXIT( + std::lock_guard lock(global_ctx->mutator->tmp_parts_lock); + global_ctx->mutator->tmp_parts.erase(local_tmp_part_basename); + ); + global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical(); global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical(); @@ -142,7 +155,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->future_part->type, global_ctx->future_part->part_info, local_single_disk_volume, - local_new_part_relative_tmp_path_name, + local_tmp_part_basename, global_ctx->parent_part); global_ctx->new_data_part->uuid = global_ctx->future_part->uuid; @@ -561,6 +574,7 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c global_ctx->new_data_part.get(), ".proj", global_ctx->data, + global_ctx->mutator, global_ctx->merges_blocker, global_ctx->ttl_merges_blocker)); } diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 22dc70bd78c..80c8e7165f8 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -60,6 +60,7 @@ public: const IMergeTreeDataPart * parent_part_, String suffix_, MergeTreeData * data_, + MergeTreeDataMergerMutator * mutator_, ActionBlocker * merges_blocker_, ActionBlocker * ttl_merges_blocker_) { @@ -78,6 +79,7 @@ public: global_ctx->deduplicate_by_columns = std::move(deduplicate_by_columns_); global_ctx->parent_part = std::move(parent_part_); global_ctx->data = std::move(data_); + global_ctx->mutator = std::move(mutator_); global_ctx->merges_blocker = std::move(merges_blocker_); global_ctx->ttl_merges_blocker = std::move(ttl_merges_blocker_); @@ -121,6 +123,7 @@ private: std::unique_ptr projection_merge_list_element; MergeListElement * merge_list_element_ptr{nullptr}; MergeTreeData * data{nullptr}; + MergeTreeDataMergerMutator * mutator{nullptr}; ActionBlocker * merges_blocker{nullptr}; ActionBlocker * ttl_merges_blocker{nullptr}; StorageMetadataPtr metadata_snapshot{nullptr}; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 37e20204813..83714b814a0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1193,7 +1193,7 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa } -void MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds) +void MergeTreeData::clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds) { /// If the method is already called from another thread, then we don't need to do anything. std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock); @@ -1214,12 +1214,18 @@ void MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifet { continue; } + const std::string & full_path = fullPath(disk, it->path()); + if (merger_mutator.hasTemporaryPart(basename)) + { + LOG_WARNING(log, "{} is an active destination for one of merge/mutation (consider increasing temporary_directories_lifetime setting)", full_path); + continue; + } try { if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline)) { - LOG_WARNING(log, "Removing temporary directory {}", fullPath(disk, it->path())); + LOG_WARNING(log, "Removing temporary directory {}", full_path); disk->removeRecursive(it->path()); } } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index e7f1db8f3ec..a05a106386c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -39,6 +39,7 @@ namespace DB class AlterCommands; class MergeTreePartsMover; +class MergeTreeDataMergerMutator; class MutationCommands; class Context; struct JobAndPool; @@ -536,7 +537,7 @@ public: /// Delete all directories which names begin with "tmp" /// Must be called with locked lockForShare() because it's using relative_data_path. - void clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds); + void clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds); void clearEmptyParts(); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 5d97c64b49b..f4c23293bf2 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -444,6 +444,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( parent_part, suffix, &data, + this, &merges_blocker, &ttl_merges_blocker); } @@ -774,4 +775,10 @@ ExecuteTTLType MergeTreeDataMergerMutator::shouldExecuteTTL(const StorageMetadat } +bool MergeTreeDataMergerMutator::hasTemporaryPart(const std::string & basename) const +{ + std::lock_guard lock(tmp_parts_lock); + return tmp_parts.contains(basename); +} + } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 22650ac4eca..e5c8a4d8285 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -136,6 +137,7 @@ private: MergeTreeData::DataPartsVector selectAllPartsFromPartition(const String & partition_id); friend class MutateTask; + friend class MergeTask; /** Split mutation commands into two parts: * First part should be executed by mutations interpreter. @@ -190,6 +192,26 @@ private: ITTLMergeSelector::PartitionIdToTTLs next_recompress_ttl_merge_times_by_partition; /// Performing TTL merges independently for each partition guarantees that /// there is only a limited number of TTL merges and no partition stores data, that is too stale + +public: + /// Returns true if passed part name is active. + /// (is the destination for one of active mutation/merge). + /// + /// NOTE: that it accept basename (i.e. dirname), not the path, + /// since later requires canonical form. + bool hasTemporaryPart(const std::string & basename) const; + +private: + /// Set of active temporary paths that is used as the destination. + /// List of such paths is required to avoid trying to remove them during cleanup. + /// + /// NOTE: It is pretty short, so use STL is fine. + std::unordered_set tmp_parts; + /// Lock for "tmp_parts". + /// + /// NOTE: mutable is required to mark hasTemporaryPath() const + mutable std::mutex tmp_parts_lock; + }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 06856c73888..5731092f2a8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -62,7 +62,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() /// Both use relative_data_path which changes during rename, so we /// do it under share lock storage.clearOldWriteAheadLogs(); - storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); + storage.clearOldTemporaryDirectories(storage.merger_mutator, storage.getSettings()->temporary_directories_lifetime.totalSeconds()); } /// This is loose condition: no problem if we actually had lost leadership at this moment diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index ab42da1dfa0..0ed906b10f8 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -105,7 +105,7 @@ void StorageMergeTree::startup() /// Temporary directories contain incomplete results of merges (after forced restart) /// and don't allow to reinitialize them, so delete each of them immediately - clearOldTemporaryDirectories(0); + clearOldTemporaryDirectories(merger_mutator, 0); /// NOTE background task will also do the above cleanups periodically. time_after_previous_cleanup_parts.restart(); @@ -1063,7 +1063,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign assignee.scheduleCommonTask(ExecutableLambdaAdapter::create( [this, share_lock] () { - clearOldTemporaryDirectories(getSettings()->temporary_directories_lifetime.totalSeconds()); + clearOldTemporaryDirectories(merger_mutator, getSettings()->temporary_directories_lifetime.totalSeconds()); return true; }, common_assignee_trigger, getStorageID())); scheduled = true; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 416d37cd351..9eb5ab7f800 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -479,7 +479,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( } /// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart), /// don't allow to reinitialize them, delete each of them immediately. - clearOldTemporaryDirectories(0); + clearOldTemporaryDirectories(merger_mutator, 0); clearOldWriteAheadLogs(); } From 55116ae399fdd09bc1808cdeabdf4d665831a697 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Oct 2021 01:57:22 +0300 Subject: [PATCH 272/438] Fix error --- base/base/phdr_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp index d2388666f73..8ae10f6bf83 100644 --- a/base/base/phdr_cache.cpp +++ b/base/base/phdr_cache.cpp @@ -6,7 +6,7 @@ #include -#if defined(__linux__) && !defined(THREAD_SANITIZER) +#if defined(__linux__) && !defined(THREAD_SANITIZER) && !defined(USE_MUSL) #define USE_PHDR_CACHE 1 #endif From 4848a6f97fd2203f7b46ea505b51dc395c1b6484 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Oct 2021 02:07:13 +0300 Subject: [PATCH 273/438] Add CMakeLists --- CMakeLists.txt | 6 +----- cmake/linux/default_libs.cmake | 10 +++++++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 685b2c25a0d..2699132f165 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -188,7 +188,7 @@ endif () option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF) -if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") +if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL) # Only for Linux, x86_64 or aarch64. option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON) elseif(GLIBC_COMPATIBILITY) @@ -203,10 +203,6 @@ if (GLIBC_COMPATIBILITY) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/base/glibc-compatibility/glibc-compat-2.32.h") endif() -if (NOT CMAKE_VERSION VERSION_GREATER "3.9.0") - message (WARNING "CMake version must be greater than 3.9.0 for production builds.") -endif () - # Make sure the final executable has symbols exported set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index a2da7ba1915..4abd0a951e1 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -14,6 +14,8 @@ endif () if (OS_ANDROID) # pthread and rt are included in libc set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl") +elseif (USE_MUSL) + set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -static -lc") else () set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl") endif () @@ -26,7 +28,7 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) # glibc-compatibility library relies to constant version of libc headers # (because minor changes in function attributes between different glibc versions will introduce incompatibilities) # This is for x86_64. For other architectures we have separate toolchains. -if (ARCH_AMD64 AND NOT_UNBUNDLED) +if (ARCH_AMD64 AND NOT_UNBUNDLED AND NOT CMAKE_CROSSCOMPILING) set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) endif () @@ -37,8 +39,10 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) if (NOT OS_ANDROID) - # Our compatibility layer doesn't build under Android, many errors in musl. - add_subdirectory(base/glibc-compatibility) + if (NOT USE_MUSL) + # Our compatibility layer doesn't build under Android, many errors in musl. + add_subdirectory(base/glibc-compatibility) + endif () add_subdirectory(base/harmful) endif () From 2c3709d8ae287d5b3739a1760a0298f8079df050 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Oct 2021 02:07:36 +0300 Subject: [PATCH 274/438] Add toolchain file --- cmake/linux/toolchain-x86_64-musl.cmake | 35 +++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 cmake/linux/toolchain-x86_64-musl.cmake diff --git a/cmake/linux/toolchain-x86_64-musl.cmake b/cmake/linux/toolchain-x86_64-musl.cmake new file mode 100644 index 00000000000..b616a92bb06 --- /dev/null +++ b/cmake/linux/toolchain-x86_64-musl.cmake @@ -0,0 +1,35 @@ +set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set (CMAKE_SYSTEM_NAME "Linux") +set (CMAKE_SYSTEM_PROCESSOR "x86_64") +set (CMAKE_C_COMPILER_TARGET "x86_64-linux-musl") +set (CMAKE_CXX_COMPILER_TARGET "x86_64-linux-musl") +set (CMAKE_ASM_COMPILER_TARGET "x86_64-linux-musl") + +set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_64-musl/x86_64-linux-musl-cross") + +set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-musl") + +find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-13" "llvm-ar-12" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8") +find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-13" "llvm-ranlib-12" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9") + +set (CMAKE_AR "${LLVM_AR_PATH}" CACHE FILEPATH "" FORCE) +set (CMAKE_RANLIB "${LLVM_RANLIB_PATH}" CACHE FILEPATH "" FORCE) + +set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") + +set (LINKER_NAME "ld.lld" CACHE STRING "" FORCE) + +set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld") +set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld") + +set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) +set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) + +set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) +set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) + +set (USE_MUSL 1) +add_definitions(-DUSE_MUSL=1) From 438d04795e851a29bacfb4f129bed4126aba7275 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Oct 2021 03:03:46 +0300 Subject: [PATCH 275/438] Update toolchain --- cmake/linux/toolchain-x86_64-musl.cmake | 4 ++-- contrib/sysroot | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/linux/toolchain-x86_64-musl.cmake b/cmake/linux/toolchain-x86_64-musl.cmake index b616a92bb06..0406b5de0ba 100644 --- a/cmake/linux/toolchain-x86_64-musl.cmake +++ b/cmake/linux/toolchain-x86_64-musl.cmake @@ -6,9 +6,9 @@ set (CMAKE_C_COMPILER_TARGET "x86_64-linux-musl") set (CMAKE_CXX_COMPILER_TARGET "x86_64-linux-musl") set (CMAKE_ASM_COMPILER_TARGET "x86_64-linux-musl") -set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_64-musl/x86_64-linux-musl-cross") +set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_64-musl") -set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-musl") +set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}") find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-13" "llvm-ar-12" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8") find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-13" "llvm-ranlib-12" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9") diff --git a/contrib/sysroot b/contrib/sysroot index 002415524b5..e4663925b73 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 002415524b5d14124bb8a61a3ce7ac65774f5479 +Subproject commit e4663925b73beb57dd29154844c8d50441146753 From e7d99c3e544c209537dbc4ee93d65ccada7e22fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Oct 2021 03:09:38 +0300 Subject: [PATCH 276/438] Update submodule --- contrib/fastops | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/fastops b/contrib/fastops index 012b777df9e..1460583af7d 160000 --- a/contrib/fastops +++ b/contrib/fastops @@ -1 +1 @@ -Subproject commit 012b777df9e2d145a24800a6c8c3d4a0249bb09e +Subproject commit 1460583af7d13c0e980ce46aec8ee9400314669a From c18d2834690f1fc9ca5af8727e44823b08cd4631 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 16 Oct 2021 03:57:26 +0300 Subject: [PATCH 277/438] Update codegen_select_fuzzer.cpp --- src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp index 6a7a88a8545..9310d7d59f7 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp +++ b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp @@ -25,7 +25,7 @@ DEFINE_BINARY_PROTO_FUZZER(const Sentence& main) std::cout << input << std::endl; DB::ParserQueryWithOutput parser(input.data() + input.size()); - try + try { DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); From bf35af1b5d02091f34d2e85387411102140de7c7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 16 Oct 2021 09:40:22 +0300 Subject: [PATCH 278/438] Fix ProfileInfo. --- src/QueryPipeline/ProfileInfo.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/QueryPipeline/ProfileInfo.cpp b/src/QueryPipeline/ProfileInfo.cpp index 32986f7259c..ee0ff8c69bf 100644 --- a/src/QueryPipeline/ProfileInfo.cpp +++ b/src/QueryPipeline/ProfileInfo.cpp @@ -46,12 +46,14 @@ void ProfileInfo::setFrom(const ProfileInfo & rhs, bool skip_block_size_info) size_t ProfileInfo::getRowsBeforeLimit() const { + calculated_rows_before_limit = true; return rows_before_limit; } bool ProfileInfo::hasAppliedLimit() const { + calculated_rows_before_limit = true; return applied_limit; } From 6c0eaf76da0a583c8046e7d9c6593f44dc2a8c97 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 16 Oct 2021 11:41:50 +0300 Subject: [PATCH 279/438] Query stage for local --- src/Client/LocalConnection.cpp | 13 ++++++----- src/Client/LocalConnection.h | 2 +- .../02048_clickhouse_local_stage.reference | 15 +++++++++++++ .../02048_clickhouse_local_stage.sh | 22 +++++++++++++++++++ 4 files changed, 45 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02048_clickhouse_local_stage.reference create mode 100755 tests/queries/0_stateless/02048_clickhouse_local_stage.sh diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index efd302622dd..4455ba3b9ad 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -60,15 +60,15 @@ void LocalConnection::updateProgress(const Progress & value) void LocalConnection::sendQuery( const ConnectionTimeouts &, - const String & query_, - const String & query_id_, - UInt64, + const String & query, + const String & query_id, + UInt64 stage, const Settings *, const ClientInfo *, bool) { query_context = session.makeQueryContext(); - query_context->setCurrentQueryId(query_id_); + query_context->setCurrentQueryId(query_id); if (send_progress) query_context->setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); }); @@ -77,8 +77,9 @@ void LocalConnection::sendQuery( state.reset(); state.emplace(); - state->query_id = query_id_; - state->query = query_; + state->query_id = query_id; + state->query = query; + state->stage = QueryProcessingStage::Enum(stage); if (send_progress) state->after_send_progress.restart(); diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index dcea3ed0fc3..242d23ddc36 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -76,7 +76,7 @@ public: void sendQuery( const ConnectionTimeouts & timeouts, const String & query, - const String & query_id_/* = "" */, + const String & query_id/* = "" */, UInt64 stage/* = QueryProcessingStage::Complete */, const Settings * settings/* = nullptr */, const ClientInfo * client_info/* = nullptr */, diff --git a/tests/queries/0_stateless/02048_clickhouse_local_stage.reference b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference new file mode 100644 index 00000000000..44c39f2a444 --- /dev/null +++ b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference @@ -0,0 +1,15 @@ +execute: default +"foo" +1 +execute: --stage fetch_columns +"dummy" +0 +execute: --stage with_mergeable_state +"1" +1 +execute: --stage with_mergeable_state_after_aggregation +"1" +1 +execute: --stage complete +"foo" +1 diff --git a/tests/queries/0_stateless/02048_clickhouse_local_stage.sh b/tests/queries/0_stateless/02048_clickhouse_local_stage.sh new file mode 100755 index 00000000000..5c1303b5160 --- /dev/null +++ b/tests/queries/0_stateless/02048_clickhouse_local_stage.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function execute_query() +{ + if [ $# -eq 0 ]; then + echo "execute: default" + else + echo "execute: $*" + fi + ${CLICKHOUSE_LOCAL} "$@" --format CSVWithNames -q "SELECT 1 AS foo" +} + +execute_query # default -- complete +execute_query --stage fetch_columns +execute_query --stage with_mergeable_state +execute_query --stage with_mergeable_state_after_aggregation +execute_query --stage complete From 50231460af503e6e723155fa893e21de1b36f7e0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 16 Oct 2021 11:28:10 +0300 Subject: [PATCH 280/438] Use forward declaration for Buffer<> in generic headers - changes in ReadHelpers.h -- recompiles 1000 modules - changes in FormatFactor.h -- recompiles 100 modules --- src/Dictionaries/SSDCacheDictionaryStorage.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatFactory.h | 7 +++++-- src/Formats/JSONEachRowUtils.cpp | 1 + src/Formats/JSONEachRowUtils.h | 4 ++++ src/IO/ReadHelpers.cpp | 5 +++-- src/IO/ReadHelpers.h | 9 ++++++--- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 1 + .../Formats/Impl/TabSeparatedRowInputFormat.cpp | 1 + 9 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index ffe0694d8c4..7c53ecc2b2c 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 34574ca13f8..ec7fa0a9e80 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -13,6 +13,7 @@ #include #include +#include #include namespace DB diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index f20cec56943..d5784219c6a 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -1,9 +1,9 @@ #pragma once +#include #include #include #include -#include #include #include @@ -34,6 +34,9 @@ struct RowOutputFormatParams; using InputFormatPtr = std::shared_ptr; using OutputFormatPtr = std::shared_ptr; +template +struct Memory; + FormatSettings getFormatSettings(ContextPtr context); template @@ -55,7 +58,7 @@ public: */ using FileSegmentationEngine = std::function( ReadBuffer & buf, - DB::Memory<> & memory, + DB::Memory> & memory, size_t min_chunk_bytes)>; /// This callback allows to perform some additional actions after writing a single row. diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp index b918825df79..8ef05fa584e 100644 --- a/src/Formats/JSONEachRowUtils.cpp +++ b/src/Formats/JSONEachRowUtils.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h index 79dd6c6c192..2d2d4ad5531 100644 --- a/src/Formats/JSONEachRowUtils.h +++ b/src/Formats/JSONEachRowUtils.h @@ -1,5 +1,9 @@ #pragma once +#include +#include +#include + namespace DB { diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index ffa050b71c8..5fe0fda88cd 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1120,7 +1121,7 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf) } } -void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current) +void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current) { assert(current >= in.position()); assert(current <= in.buffer().end()); @@ -1140,7 +1141,7 @@ void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current) in.position() = current; } -bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current) +bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current) { assert(current <= in.buffer().end()); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ca6affbf907..880173bfd96 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -29,7 +30,6 @@ #include #include #include -#include #include #include @@ -41,6 +41,9 @@ static constexpr auto DEFAULT_MAX_STRING_SIZE = 1_GiB; namespace DB { +template +struct Memory; + namespace ErrorCodes { extern const int CANNOT_PARSE_DATE; @@ -1290,7 +1293,7 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf); /** This function just copies the data from buffer's internal position (in.position()) * to current position (from arguments) into memory. */ -void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current); +void saveUpToPosition(ReadBuffer & in, Memory> & memory, char * current); /** This function is negative to eof(). * In fact it returns whether the data was loaded to internal ReadBuffers's buffer or not. @@ -1299,7 +1302,7 @@ void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current); * of our buffer and the current cursor in the end of the buffer. When we call eof() it calls next(). * And this function can fill the buffer with new data, so we will lose the data from previous buffer state. */ -bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current); +bool loadAtPosition(ReadBuffer & in, Memory> & memory, char * & current); struct PcgDeserializer diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 8ccc04faf35..4beb260b64a 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 1ff52c9f695..c1cf0a904ea 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include From 77a2022cf543226e2116c6ca896f7b226c0f9364 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 9 Oct 2021 17:00:39 +0800 Subject: [PATCH 281/438] Add primary key to minmax_count_projection --- src/Storages/MergeTree/MergeTreeData.cpp | 120 +++++++++++++----- src/Storages/MergeTree/MergeTreeData.h | 2 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 3 +- .../MergeTree/registerStorageMergeTree.cpp | 18 +-- src/Storages/ProjectionsDescription.cpp | 19 ++- src/Storages/ProjectionsDescription.h | 6 +- .../01710_minmax_count_projection.reference | 5 + .../01710_minmax_count_projection.sql | 28 ++++ 8 files changed, 157 insertions(+), 44 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c5b5e2a9a92..66b7f62a7c8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4398,6 +4398,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, const SelectQueryInfo & query_info, + const DataPartsVector & parts, + DataPartsVector & normal_parts, ContextPtr query_context) const { if (!metadata_snapshot->minmax_count_projection) @@ -4406,7 +4408,14 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( ErrorCodes::LOGICAL_ERROR); auto block = metadata_snapshot->minmax_count_projection->sample_block; + String primary_key_max_column_name; + if (metadata_snapshot->minmax_count_projection->has_primary_key_minmax) + primary_key_max_column_name = *(block.getNames().cend() - 2); + bool need_primary_key_max_column = std::any_of( + required_columns.begin(), required_columns.end(), [&](const auto & name) { return primary_key_max_column_name == name; }); + auto minmax_count_columns = block.mutateColumns(); + auto minmax_count_columns_size = minmax_count_columns.size(); auto insert = [](ColumnAggregateFunction & column, const Field & value) { @@ -4422,7 +4431,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( column.insertFrom(place); }; - auto parts = getDataPartsVector(); ASTPtr expression_ast; Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */, true /* ignore_empty */); if (virtual_columns_block.rows() == 0) @@ -4446,15 +4454,13 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( if (!part->minmax_idx->initialized) throw Exception("Found a non-empty part with uninitialized minmax_idx. It's a bug", ErrorCodes::LOGICAL_ERROR); - size_t minmax_idx_size = part->minmax_idx->hyperrectangle.size(); - if (2 * minmax_idx_size + 1 != minmax_count_columns.size()) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "minmax_count projection should have twice plus one the number of ranges in minmax_idx. 2 * minmax_idx_size + 1 = {}, " - "minmax_count_columns.size() = {}. It's a bug", - 2 * minmax_idx_size + 1, - minmax_count_columns.size()); + if (need_primary_key_max_column && !part->index_granularity.hasFinalMark()) + { + normal_parts.push_back(part); + continue; + } + size_t minmax_idx_size = part->minmax_idx->hyperrectangle.size(); for (size_t i = 0; i < minmax_idx_size; ++i) { size_t min_pos = i * 2; @@ -4466,6 +4472,16 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( insert(max_column, range.right); } + if (metadata_snapshot->minmax_count_projection->has_primary_key_minmax) + { + const auto & primary_key_column = *part->index[0]; + auto primary_key_column_size = primary_key_column.size(); + auto & min_column = assert_cast(*minmax_count_columns[minmax_count_columns_size - 3]); + auto & max_column = assert_cast(*minmax_count_columns[minmax_count_columns_size - 2]); + insert(min_column, primary_key_column[0]); + insert(max_column, primary_key_column[primary_key_column_size - 1]); + } + { auto & column = assert_cast(*minmax_count_columns.back()); auto func = column.getAggregateFunction(); @@ -4722,33 +4738,74 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( size_t min_sum_marks = std::numeric_limits::max(); if (metadata_snapshot->minmax_count_projection) add_projection_candidate(*metadata_snapshot->minmax_count_projection); + std::optional minmax_conut_projection_candidate; + if (!candidates.empty()) + { + minmax_conut_projection_candidate.emplace(std::move(candidates.front())); + candidates.clear(); + } + MergeTreeDataSelectExecutor reader(*this); + std::shared_ptr max_added_blocks; + if (settings.select_sequential_consistency) + { + if (const StorageReplicatedMergeTree * replicated = dynamic_cast(this)) + max_added_blocks = std::make_shared(replicated->getMaxAddedBlocks()); + } + auto parts = getDataPartsVector(); - // Only add more projection candidates if minmax_count_projection cannot match. - if (candidates.empty()) + // If minmax_count_projection is a valid candidate, check its completeness. + if (minmax_conut_projection_candidate) + { + DataPartsVector normal_parts; + query_info.minmax_count_projection_block = getMinMaxCountProjectionBlock( + metadata_snapshot, minmax_conut_projection_candidate->required_columns, query_info, parts, normal_parts, query_context); + + if (normal_parts.empty()) + { + selected_candidate = &*minmax_conut_projection_candidate; + selected_candidate->complete = true; + min_sum_marks = query_info.minmax_count_projection_block.rows(); + } + else + { + if (normal_parts.size() == parts.size()) + { + // minmax_count_projection is useless. + } + else + { + auto normal_result_ptr = reader.estimateNumMarksToRead( + normal_parts, + analysis_result.required_columns, + metadata_snapshot, + metadata_snapshot, + query_info, + query_context, + settings.max_threads, + max_added_blocks); + + if (!normal_result_ptr->error()) + { + selected_candidate = &*minmax_conut_projection_candidate; + selected_candidate->merge_tree_normal_select_result_ptr = normal_result_ptr; + min_sum_marks = query_info.minmax_count_projection_block.rows() + normal_result_ptr->marks(); + } + } + + // We cannot find a complete match of minmax_count_projection, add more projections to check. + for (const auto & projection : metadata_snapshot->projections) + add_projection_candidate(projection); + } + } + else { for (const auto & projection : metadata_snapshot->projections) add_projection_candidate(projection); } - else - { - selected_candidate = &candidates.front(); - query_info.minmax_count_projection_block - = getMinMaxCountProjectionBlock(metadata_snapshot, selected_candidate->required_columns, query_info, query_context); - min_sum_marks = query_info.minmax_count_projection_block.rows(); - } // Let's select the best projection to execute the query. - if (!candidates.empty() && !selected_candidate) + if (!candidates.empty()) { - std::shared_ptr max_added_blocks; - if (settings.select_sequential_consistency) - { - if (const StorageReplicatedMergeTree * replicated = dynamic_cast(this)) - max_added_blocks = std::make_shared(replicated->getMaxAddedBlocks()); - } - - auto parts = getDataPartsVector(); - MergeTreeDataSelectExecutor reader(*this); query_info.merge_tree_select_result_ptr = reader.estimateNumMarksToRead( parts, analysis_result.required_columns, @@ -4763,7 +4820,12 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( { // Add 1 to base sum_marks so that we prefer projections even when they have equal number of marks to read. // NOTE: It is not clear if we need it. E.g. projections do not support skip index for now. - min_sum_marks = query_info.merge_tree_select_result_ptr->marks() + 1; + auto sum_marks = query_info.merge_tree_select_result_ptr->marks() + 1; + if (sum_marks < min_sum_marks) + { + selected_candidate = nullptr; + min_sum_marks = sum_marks; + } } /// Favor aggregate projections diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b87a756bf9f..009abbb7b3b 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -368,6 +368,8 @@ public: const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, const SelectQueryInfo & query_info, + const DataPartsVector & parts, + DataPartsVector & normal_parts, ContextPtr query_context) const; bool getQueryProcessingStageWithAggregateProjection( diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a33296cbf24..55de92735c3 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -162,7 +162,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( LOG_DEBUG( log, - "Choose {} projection {}", + "Choose {} {} projection {}", + query_info.projection->complete ? "complete" : "incomplete", query_info.projection->desc->type, query_info.projection->desc->name); diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 379573a381f..982acfe62a4 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -651,10 +651,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// single default partition with name "all". metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_key, metadata.columns, args.getContext()); - auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); - metadata.minmax_count_projection.emplace( - ProjectionDescription::getMinMaxCountProjection(args.columns, minmax_columns, args.getContext())); - /// PRIMARY KEY without ORDER BY is allowed and considered as ORDER BY. if (!args.storage_def->order_by && args.storage_def->primary_key) args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone()); @@ -686,6 +682,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.primary_key.definition_ast = nullptr; } + auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); + auto primary_key_asts = metadata.primary_key.expression_list_ast->children; + metadata.minmax_count_projection.emplace( + ProjectionDescription::getMinMaxCountProjection(args.columns, minmax_columns, primary_key_asts, args.getContext())); + if (args.storage_def->sample_by) metadata.sampling_key = KeyDescription::getKeyFromAST(args.storage_def->sample_by->ptr(), metadata.columns, args.getContext()); @@ -736,10 +737,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, metadata.columns, args.getContext()); - auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); - metadata.minmax_count_projection.emplace( - ProjectionDescription::getMinMaxCountProjection(args.columns, minmax_columns, args.getContext())); - ++arg_num; /// If there is an expression for sampling @@ -765,6 +762,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) ++arg_num; + auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); + auto primary_key_asts = metadata.primary_key.expression_list_ast->children; + metadata.minmax_count_projection.emplace( + ProjectionDescription::getMinMaxCountProjection(args.columns, minmax_columns, primary_key_asts, args.getContext())); + const auto * ast = engine_args[arg_num]->as(); if (ast && ast->value.getType() == Field::Types::UInt64) storage_settings->index_granularity = safeGet(ast->value); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 42294b8152c..5ad79304e27 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -60,6 +60,7 @@ ProjectionDescription ProjectionDescription::clone() const other.metadata = metadata; other.key_size = key_size; other.is_minmax_count_projection = is_minmax_count_projection; + other.has_primary_key_minmax = has_primary_key_minmax; return other; } @@ -172,9 +173,15 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const return result; } -ProjectionDescription -ProjectionDescription::getMinMaxCountProjection(const ColumnsDescription & columns, const Names & minmax_columns, ContextPtr query_context) +ProjectionDescription ProjectionDescription::getMinMaxCountProjection( + const ColumnsDescription & columns, + const Names & minmax_columns, + const ASTs & primary_key_asts, + ContextPtr query_context) { + ProjectionDescription result; + result.is_minmax_count_projection = true; + auto select_query = std::make_shared(); ASTPtr select_expression_list = std::make_shared(); for (const auto & column : minmax_columns) @@ -182,10 +189,15 @@ ProjectionDescription::getMinMaxCountProjection(const ColumnsDescription & colum select_expression_list->children.push_back(makeASTFunction("min", std::make_shared(column))); select_expression_list->children.push_back(makeASTFunction("max", std::make_shared(column))); } + if (!primary_key_asts.empty()) + { + select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); + select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); + result.has_primary_key_minmax = true; + } select_expression_list->children.push_back(makeASTFunction("count")); select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); - ProjectionDescription result; result.definition_ast = select_query; result.name = MINMAX_COUNT_PROJECTION_NAME; result.query_ast = select_query->cloneToASTSelect(); @@ -203,7 +215,6 @@ ProjectionDescription::getMinMaxCountProjection(const ColumnsDescription & colum metadata.sorting_key = KeyDescription::buildEmptyKey(); metadata.primary_key = KeyDescription::buildEmptyKey(); result.metadata = std::make_shared(metadata); - result.is_minmax_count_projection = true; return result; } diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index b9c11cb0771..28cff7ce532 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -58,12 +58,14 @@ struct ProjectionDescription bool is_minmax_count_projection = false; + bool has_primary_key_minmax = false; + /// Parse projection from definition AST static ProjectionDescription getProjectionFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr query_context); - static ProjectionDescription - getMinMaxCountProjection(const ColumnsDescription & columns, const Names & minmax_columns, ContextPtr query_context); + static ProjectionDescription getMinMaxCountProjection( + const ColumnsDescription & columns, const Names & minmax_columns, const ASTs & primary_key_asts, ContextPtr query_context); ProjectionDescription() = default; diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.reference b/tests/queries/0_stateless/01710_minmax_count_projection.reference index ad9b87b998d..5591d5a9954 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.reference +++ b/tests/queries/0_stateless/01710_minmax_count_projection.reference @@ -3,3 +3,8 @@ 1 9999 5000 0 9998 5000 1 +0 +0 +0 +0 9999 +0 9999 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index 58af11f01f7..112487b219e 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -15,3 +15,31 @@ select min(i), max(i), count() from d where _partition_value.1 = 10 group by _pa select min(i) from d where 1 = _partition_value.1; drop table d; + +drop table if exists no_final_mark; +drop table if exists has_final_mark; +drop table if exists mixed_final_mark; + +create table no_final_mark (i int, j int) engine MergeTree partition by i % 2 order by j settings index_granularity = 10, write_final_mark = 0; +create table has_final_mark (i int, j int) engine MergeTree partition by i % 2 order by j settings index_granularity = 10, write_final_mark = 1; +create table mixed_final_mark (i int, j int) engine MergeTree partition by i % 2 order by j settings index_granularity = 10; + +set max_rows_to_read = 100000; + +insert into no_final_mark select number, number from numbers(10000); +insert into has_final_mark select number, number from numbers(10000); + +alter table mixed_final_mark attach partition 0 from no_final_mark; +alter table mixed_final_mark attach partition 1 from has_final_mark; + +set max_rows_to_read = 2; + +select min(j) from no_final_mark; +select min(j) from has_final_mark; +select min(j) from mixed_final_mark; + +select min(j), max(j) from no_final_mark; -- {serverError TOO_MANY_ROWS} +select min(j), max(j) from has_final_mark; + +set max_rows_to_read = 5001; -- one normal part 5000 + one minmax_count_projection part 1 +select min(j), max(j) from mixed_final_mark; From 77c4a5fa18ea84a33d65d2e67b5bf7d5b9c67263 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 14 Oct 2021 01:31:37 +0800 Subject: [PATCH 282/438] Better --- src/Storages/MergeTree/MergeTreeData.cpp | 38 ++++++++++++------------ src/Storages/MergeTree/MergeTreeData.h | 10 +++++++ src/Storages/ProjectionsDescription.cpp | 10 +++---- src/Storages/ProjectionsDescription.h | 5 ++++ 4 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 66b7f62a7c8..1f1b9a3f0a8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4408,15 +4408,16 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( ErrorCodes::LOGICAL_ERROR); auto block = metadata_snapshot->minmax_count_projection->sample_block; + bool need_primary_key_max_column = false; String primary_key_max_column_name; if (metadata_snapshot->minmax_count_projection->has_primary_key_minmax) - primary_key_max_column_name = *(block.getNames().cend() - 2); - bool need_primary_key_max_column = std::any_of( - required_columns.begin(), required_columns.end(), [&](const auto & name) { return primary_key_max_column_name == name; }); + { + primary_key_max_column_name = block.getNames()[ProjectionDescription::PRIMARY_KEY_MAX_COLUMN_POS]; + need_primary_key_max_column = std::any_of( + required_columns.begin(), required_columns.end(), [&](const auto & name) { return primary_key_max_column_name == name; }); + } auto minmax_count_columns = block.mutateColumns(); - auto minmax_count_columns_size = minmax_count_columns.size(); - auto insert = [](ColumnAggregateFunction & column, const Field & value) { auto func = column.getAggregateFunction(); @@ -4460,28 +4461,27 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( continue; } - size_t minmax_idx_size = part->minmax_idx->hyperrectangle.size(); - for (size_t i = 0; i < minmax_idx_size; ++i) - { - size_t min_pos = i * 2; - size_t max_pos = i * 2 + 1; - auto & min_column = assert_cast(*minmax_count_columns[min_pos]); - auto & max_column = assert_cast(*minmax_count_columns[max_pos]); - const auto & range = part->minmax_idx->hyperrectangle[i]; - insert(min_column, range.left); - insert(max_column, range.right); - } - + size_t pos = 0; if (metadata_snapshot->minmax_count_projection->has_primary_key_minmax) { const auto & primary_key_column = *part->index[0]; auto primary_key_column_size = primary_key_column.size(); - auto & min_column = assert_cast(*minmax_count_columns[minmax_count_columns_size - 3]); - auto & max_column = assert_cast(*minmax_count_columns[minmax_count_columns_size - 2]); + auto & min_column = assert_cast(*minmax_count_columns[pos++]); + auto & max_column = assert_cast(*minmax_count_columns[pos++]); insert(min_column, primary_key_column[0]); insert(max_column, primary_key_column[primary_key_column_size - 1]); } + size_t minmax_idx_size = part->minmax_idx->hyperrectangle.size(); + for (size_t i = 0; i < minmax_idx_size; ++i) + { + auto & min_column = assert_cast(*minmax_count_columns[pos++]); + auto & max_column = assert_cast(*minmax_count_columns[pos++]); + const auto & range = part->minmax_idx->hyperrectangle[i]; + insert(min_column, range.left); + insert(max_column, range.right); + } + { auto & column = assert_cast(*minmax_count_columns.back()); auto func = column.getAggregateFunction(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 009abbb7b3b..7151141cb15 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -364,6 +364,16 @@ public: bool attach, BrokenPartCallback broken_part_callback_ = [](const String &){}); + /// Build a block of minmax and count values of a MergeTree table. These values are extracted + /// from minmax_indices, the first expression of primary key, and part rows. + /// + /// query_info - used to filter unneeded parts + /// + /// parts - part set to filter + /// + /// normal_parts - collects parts that don't have all the needed values to form the block. + /// Specifically, this is when a part doesn't contain a final mark and the related max value is + /// required. Block getMinMaxCountProjectionBlock( const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 5ad79304e27..7b27604730c 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -184,17 +184,17 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( auto select_query = std::make_shared(); ASTPtr select_expression_list = std::make_shared(); - for (const auto & column : minmax_columns) - { - select_expression_list->children.push_back(makeASTFunction("min", std::make_shared(column))); - select_expression_list->children.push_back(makeASTFunction("max", std::make_shared(column))); - } if (!primary_key_asts.empty()) { select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); result.has_primary_key_minmax = true; } + for (const auto & column : minmax_columns) + { + select_expression_list->children.push_back(makeASTFunction("min", std::make_shared(column))); + select_expression_list->children.push_back(makeASTFunction("max", std::make_shared(column))); + } select_expression_list->children.push_back(makeASTFunction("count")); select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index 28cff7ce532..77b858b3ab1 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -30,6 +30,10 @@ struct ProjectionDescription static constexpr const char * MINMAX_COUNT_PROJECTION_NAME = "_minmax_count_projection"; + /// If minmax_count projection contains a primary key's minmax values. Their positions will be 0 and 1. + static constexpr const size_t PRIMARY_KEY_MIN_COLUMN_POS = 0; + static constexpr const size_t PRIMARY_KEY_MAX_COLUMN_POS = 1; + /// Definition AST of projection ASTPtr definition_ast; @@ -58,6 +62,7 @@ struct ProjectionDescription bool is_minmax_count_projection = false; + /// If a primary key expression is used in the minmax_count projection, this flag will be true. bool has_primary_key_minmax = false; /// Parse projection from definition AST From 9ceb668a4ddb614291ae74bab2b4b582bf6750ec Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 16 Oct 2021 18:13:11 +0800 Subject: [PATCH 283/438] Fix TSan --- src/Storages/MergeTree/MergeTreeData.cpp | 9 ++++----- src/Storages/ProjectionsDescription.cpp | 5 +++-- src/Storages/ProjectionsDescription.h | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 1f1b9a3f0a8..45748539e9d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4407,12 +4407,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( "Cannot find the definition of minmax_count projection but it's used in current query. It's a bug", ErrorCodes::LOGICAL_ERROR); - auto block = metadata_snapshot->minmax_count_projection->sample_block; + auto block = metadata_snapshot->minmax_count_projection->sample_block.cloneEmpty(); bool need_primary_key_max_column = false; - String primary_key_max_column_name; - if (metadata_snapshot->minmax_count_projection->has_primary_key_minmax) + const auto & primary_key_max_column_name = metadata_snapshot->minmax_count_projection->primary_key_max_column_name; + if (!primary_key_max_column_name.empty()) { - primary_key_max_column_name = block.getNames()[ProjectionDescription::PRIMARY_KEY_MAX_COLUMN_POS]; need_primary_key_max_column = std::any_of( required_columns.begin(), required_columns.end(), [&](const auto & name) { return primary_key_max_column_name == name; }); } @@ -4462,7 +4461,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( } size_t pos = 0; - if (metadata_snapshot->minmax_count_projection->has_primary_key_minmax) + if (!primary_key_max_column_name.empty()) { const auto & primary_key_column = *part->index[0]; auto primary_key_column_size = primary_key_column.size(); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 7b27604730c..b3b344e9ecc 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -60,7 +60,7 @@ ProjectionDescription ProjectionDescription::clone() const other.metadata = metadata; other.key_size = key_size; other.is_minmax_count_projection = is_minmax_count_projection; - other.has_primary_key_minmax = has_primary_key_minmax; + other.primary_key_max_column_name = primary_key_max_column_name; return other; } @@ -188,7 +188,6 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( { select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); - result.has_primary_key_minmax = true; } for (const auto & column : minmax_columns) { @@ -208,6 +207,8 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); + if (!primary_key_asts.empty()) + result.primary_key_max_column_name = result.sample_block.getNames()[ProjectionDescription::PRIMARY_KEY_MAX_COLUMN_POS]; result.type = ProjectionDescription::Type::Aggregate; StorageInMemoryMetadata metadata; metadata.setColumns(ColumnsDescription(result.sample_block.getNamesAndTypesList())); diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index 77b858b3ab1..4dd717239ad 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -62,8 +62,8 @@ struct ProjectionDescription bool is_minmax_count_projection = false; - /// If a primary key expression is used in the minmax_count projection, this flag will be true. - bool has_primary_key_minmax = false; + /// If a primary key expression is used in the minmax_count projection, store the name of max expression. + String primary_key_max_column_name; /// Parse projection from definition AST static ProjectionDescription From 9525437499311d154198bc9b8e1e22d95986c600 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 16 Oct 2021 13:17:00 +0300 Subject: [PATCH 284/438] Less threads in local, fix Ok. printing --- programs/local/LocalServer.cpp | 5 +- src/Client/LocalConnection.cpp | 17 +++--- src/Interpreters/Context.cpp | 9 ++++ src/Interpreters/Context.h | 3 ++ src/Interpreters/InterpreterCreateQuery.cpp | 9 ++++ .../02049_clickhouse_local_merge_tree.expect | 53 +++++++++++++++++++ ...2049_clickhouse_local_merge_tree.reference | 0 7 files changed, 84 insertions(+), 12 deletions(-) create mode 100755 tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect create mode 100644 tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 30082caaac1..cdd5ae13f99 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -514,19 +514,16 @@ void LocalServer::processConfig() format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")); insert_format = "Values"; + /// Setting value from cmd arg overrides one from config if (global_context->getSettingsRef().max_insert_block_size.changed) insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; else insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); - /// Skip networking - /// Sets external authenticators config (LDAP, Kerberos). global_context->setExternalAuthenticatorsConfig(config()); - global_context->initializeBackgroundExecutors(); - setupUsers(); /// Limit on total number of concurrently executing queries. diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index efd302622dd..e1324146330 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -266,19 +266,19 @@ bool LocalConnection::poll(size_t) } } - if (state->is_finished && send_progress && !state->sent_progress) - { - state->sent_progress = true; - next_packet_type = Protocol::Server::Progress; - return true; - } - if (state->is_finished) { finishQuery(); return true; } + if (send_progress && !state->sent_progress) + { + state->sent_progress = true; + next_packet_type = Protocol::Server::Progress; + return true; + } + if (state->block && state->block.value()) { next_packet_type = Protocol::Server::Data; @@ -292,7 +292,8 @@ bool LocalConnection::pollImpl() { Block block; auto next_read = pullBlock(block); - if (block) + + if (block && !state->io.null_format) { state->block.emplace(block); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 98acc786aa9..0ef92eaed39 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2895,8 +2895,15 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrasync_insert_queue = ptr; } +bool Context::isBackgroundExecutorsInitialized() const +{ + return is_background_executors_initialized; +} + void Context::initializeBackgroundExecutors() { + assert(!is_background_executors_initialized); + const size_t max_merges_and_mutations = getSettingsRef().background_pool_size * getSettingsRef().background_merges_mutations_concurrency_ratio; /// With this executor we can execute more tasks than threads we have @@ -2943,6 +2950,8 @@ void Context::initializeBackgroundExecutors() LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}", getSettingsRef().background_common_pool_size, getSettingsRef().background_common_pool_size); + + is_background_executors_initialized = true; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 247dbc74f22..15c4376aa6d 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -293,6 +293,8 @@ private: /// A flag, used to distinguish between user query and internal query to a database engine (MaterializedPostgreSQL). bool is_internal_query = false; + /// Has initializeBackgroundExecutors() method been executed? + bool is_background_executors_initialized = false; public: @@ -862,6 +864,7 @@ public: /// Background executors related methods void initializeBackgroundExecutors(); + bool isBackgroundExecutorsInitialized() const; MergeMutateBackgroundExecutorPtr getMergeMutateExecutor() const; OrdinaryBackgroundExecutorPtr getMovesExecutor() const; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 6d38c55bd62..5b993bce724 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -833,6 +833,15 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database.empty() ? current_database : create.database; + auto global_context = getContext()->getGlobalContext(); + if (global_context + && global_context->getApplicationType() == Context::ApplicationType::LOCAL + && !global_context->isBackgroundExecutorsInitialized() + && create.storage && endsWith(create.storage->engine->name, "MergeTree")) + { + global_context->initializeBackgroundExecutors(); + } + // If this is a stub ATTACH query, read the query definition from the database if (create.attach && !create.storage && !create.columns_list) { diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect new file mode 100755 index 00000000000..17b98b077d5 --- /dev/null +++ b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect @@ -0,0 +1,53 @@ +#!/usr/bin/expect -f +# Tags: no-fasttest + +log_user 0 +set timeout 20 +match_max 100000 + +# A default timeout action is to fail +expect_after { + timeout { + exit 1 + } + +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" +expect ":) " + +send -- "drop table if exists t\r" +expect "Ok." + +send -- "create table t engine=MergeTree() order by tuple() as select 1\r" +expect "Ok." + +send -- "set optimize_on_insert = 0\r" +expect "Ok." + +send -- "drop table if exists tt\r" +expect "Ok." + +send -- "create table tt (date Date, version UInt64, val UInt64) engine = ReplacingMergeTree(version) partition by date order by date\r" +expect "Ok." + +send -- "insert into tt values ('2020-01-01', 2, 2), ('2020-01-01', 1, 1)\r" +expect "Ok." + +send -- "insert into tt values ('2020-01-01', 0, 0)\r" +expect "Ok." + +send -- "OPTIMIZE TABLE tt\r" +expect "Ok." + +send -- "select * from tt order by version format TSV\r" +expect "2020-01-01\t2\t2" + +send -- "drop table tt\r" +expect "Ok." +send -- "drop table t\r" +expect "Ok." + +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference new file mode 100644 index 00000000000..e69de29bb2d From 1b50d26c5122df276379e4cd5a2d86aa456c65de Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 16 Oct 2021 13:41:56 +0300 Subject: [PATCH 285/438] Added documentation --- .../external-dicts-dict-layout.md | 54 +++++++++++++++++++ src/Dictionaries/HashedArrayDictionary.cpp | 11 ++-- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index f525ea64aa2..30e050ef9ef 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -155,6 +155,60 @@ Configuration example: LAYOUT(COMPLEX_KEY_HASHED()) ``` +### complex_key_sparse_hashed {#complex-key-sparse-hashed} + +This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `sparse_hashed`. + +Configuration example: + +``` xml + + + +``` + +``` sql +LAYOUT(COMPLEX_KEY_SPARSE_HASHED()) +``` + +### hashed_array {#dicts-external_dicts_dict_layout-hashed-array} + +The dictionary is completely stored in memory. Each attribute is stored in array. Key attribute is stored in the form of hashed table where value is index in attributes array. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items. + +All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety. + +Configuration example: + +``` xml + + + + +``` + +or + +``` sql +LAYOUT(HASHED_ARRAY()) +``` + +### complex_key_hashed_array {#complex-key-hashed-array} + +This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to `hashed_array`. + +Configuration example: + +``` xml + + + +``` + +``` sql +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) +``` + + ### range_hashed {#range-hashed} The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values. diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index 7706f7d6108..3c02f377c3e 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -622,6 +622,9 @@ void HashedArrayDictionary::calculateBytesAllocated() if (attribute.string_arena) bytes_allocated += attribute.string_arena->size(); + + if (attribute.is_index_null.has_value()) + bytes_allocated += (*attribute.is_index_null).size(); } bytes_allocated += complex_key_arena.size(); @@ -634,14 +637,12 @@ template Pipe HashedArrayDictionary::read(const Names & column_names, size_t max_block_size) const { PaddedPODArray keys; + keys.reserve(key_attribute.container.size()); - for (auto & [key, value] : key_attribute.container) + for (auto & [key, _] : key_attribute.container) keys.emplace_back(key); - if constexpr (dictionary_key_type == DictionaryKeyType::Simple) - return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); - else - return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); + return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); } template class HashedArrayDictionary; From 8a94e26bece5c3d6b5206acda686f1b0f7de4229 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sat, 16 Oct 2021 18:51:42 +0800 Subject: [PATCH 286/438] init --- src/Functions/ReplaceRegexpImpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 3e80dd5b337..1caced9cbde 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -110,7 +110,7 @@ struct ReplaceRegexpImpl res_data.resize(res_data.size() + bytes_to_copy); memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); res_offset += bytes_to_copy; - start_pos += bytes_to_copy + match.length(); + start_pos += bytes_to_copy + (match.length() > 0 ? match.length() : 1); /// Do substitution instructions for (const auto & it : instructions) @@ -129,7 +129,7 @@ struct ReplaceRegexpImpl } } - if (replace_one || match.length() == 0) /// Stop after match of zero length, to avoid infinite loop. + if (replace_one) /// Stop after match of zero length, to avoid infinite loop. can_finish_current_string = true; } else From c5b09b49079581527502b35ecd7efb83d93e47f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Sat, 16 Oct 2021 13:03:51 +0200 Subject: [PATCH 287/438] Tag resource heavy tests as no-parallel --- .../01509_check_many_parallel_quorum_inserts_long.sh | 2 +- tests/queries/0_stateless/02033_join_engine_deadlock_long.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh index 6533eeb12f5..a6f151d0f6f 100755 --- a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh +++ b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-replicated-database +# Tags: long, no-replicated-database, no-parallel # Tag no-replicated-database: Fails due to additional replicas or shards set -e diff --git a/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh b/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh index 2a887cbbcae..1e670b72fe7 100755 --- a/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh +++ b/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, deadlock +# Tags: long, deadlock, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From c79c0526ff5f9783648a85e6c87657ecb9d2d060 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 16 Oct 2021 15:18:00 +0300 Subject: [PATCH 288/438] Fix clickhouse-local syntax exception --- programs/local/LocalServer.cpp | 6 ++---- .../02050_clickhouse_local_parsing_exception.reference | 1 + .../02050_clickhouse_local_parsing_exception.sh | 8 ++++++++ 3 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.reference create mode 100755 tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 30082caaac1..10c4bf592ca 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -128,9 +127,8 @@ bool LocalServer::executeMultiQuery(const String & all_queries_text) } case MultiQueryProcessingStage::PARSING_EXCEPTION: { - this_query_end = find_first_symbols<'\n'>(this_query_end, all_queries_end); - this_query_begin = this_query_end; /// It's expected syntax error, skip the line - current_exception.reset(); + if (current_exception) + current_exception->rethrow(); continue; } case MultiQueryProcessingStage::EXECUTE_QUERY: diff --git a/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.reference b/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.sh b/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.sh new file mode 100755 index 00000000000..7a92fa6fefe --- /dev/null +++ b/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --query="SELECT number FROM system.numbers INTO OUTFILE test.native.zst FORMAT Native" 2>&1 | grep -q "Code: 62. DB::Exception: Syntax error: failed at position 48 ('test'): test.native.zst FORMAT Native. Expected string literal." && echo 'OK' || echo 'FAIL' ||: + From a08c98d7603e6207ae4a7af2bd78c543602fa76e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 16 Oct 2021 17:03:50 +0300 Subject: [PATCH 289/438] Move some files. --- programs/copier/ClusterCopier.cpp | 4 ++-- programs/library-bridge/Handlers.cpp | 4 ++-- programs/obfuscator/Obfuscator.cpp | 4 ++-- programs/odbc-bridge/MainHandler.cpp | 2 +- src/Bridge/LibraryBridgeHelper.cpp | 2 +- src/Client/ClientBase.cpp | 2 +- src/Client/Connection.cpp | 4 ++-- src/Client/IServerConnection.h | 2 +- src/Core/ExternalTable.cpp | 2 +- src/Databases/MySQL/DatabaseMySQL.cpp | 2 +- src/Databases/MySQL/FetchTablesColumnsList.cpp | 2 +- src/Databases/MySQL/MaterializeMetadata.cpp | 2 +- src/Databases/MySQL/MaterializedMySQLSyncThread.cpp | 2 +- src/Dictionaries/CacheDictionary.cpp | 2 +- src/Dictionaries/ClickHouseDictionarySource.cpp | 2 +- src/Dictionaries/DictionaryHelpers.h | 2 +- src/Dictionaries/DirectDictionary.cpp | 2 +- src/Dictionaries/FlatDictionary.cpp | 2 +- src/Dictionaries/IDictionarySource.h | 2 +- src/Dictionaries/MySQLDictionarySource.cpp | 4 ++-- src/Dictionaries/PolygonDictionary.cpp | 2 +- src/Dictionaries/PostgreSQLDictionarySource.cpp | 2 +- src/Dictionaries/XDBCDictionarySource.cpp | 2 +- src/Dictionaries/readInvalidateQuery.cpp | 2 +- src/Formats/TemporaryFileStream.h | 2 +- src/Formats/formatBlock.cpp | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- src/Interpreters/ClusterProxy/executeQuery.cpp | 2 +- src/Interpreters/InterpreterExplainQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Interpreters/MergeJoin.cpp | 2 +- src/Interpreters/MutationsInterpreter.cpp | 2 +- src/Interpreters/SortedBlocksWriter.cpp | 2 +- src/Interpreters/SortedBlocksWriter.h | 2 +- src/Interpreters/executeDDLQueryOnCluster.cpp | 2 +- src/Processors/Executors/CompletedPipelineExecutor.cpp | 2 +- src/Processors/Executors/PipelineExecutor.cpp | 2 +- src/Processors/Executors/PullingAsyncPipelineExecutor.cpp | 2 +- src/Processors/Executors/PullingPipelineExecutor.cpp | 2 +- src/Processors/Executors/PushingAsyncPipelineExecutor.cpp | 2 +- src/Processors/Executors/PushingPipelineExecutor.cpp | 2 +- src/Processors/QueryPlan/AggregatingStep.cpp | 2 +- src/Processors/QueryPlan/ArrayJoinStep.cpp | 2 +- src/Processors/QueryPlan/CreatingSetsStep.cpp | 2 +- src/Processors/QueryPlan/CubeStep.cpp | 2 +- src/Processors/QueryPlan/DistinctStep.cpp | 2 +- src/Processors/QueryPlan/ExpressionStep.cpp | 2 +- src/Processors/QueryPlan/ExtremesStep.cpp | 2 +- src/Processors/QueryPlan/FillingStep.cpp | 2 +- src/Processors/QueryPlan/FilterStep.cpp | 2 +- src/Processors/QueryPlan/FinishSortingStep.cpp | 2 +- src/Processors/QueryPlan/ISourceStep.cpp | 2 +- src/Processors/QueryPlan/ITransformingStep.cpp | 2 +- src/Processors/QueryPlan/IntersectOrExceptStep.cpp | 2 +- src/Processors/QueryPlan/JoinStep.cpp | 2 +- src/Processors/QueryPlan/LimitByStep.cpp | 2 +- src/Processors/QueryPlan/LimitStep.cpp | 2 +- src/Processors/QueryPlan/MergeSortingStep.cpp | 2 +- src/Processors/QueryPlan/MergingAggregatedStep.cpp | 2 +- src/Processors/QueryPlan/MergingSortedStep.cpp | 2 +- src/Processors/QueryPlan/OffsetStep.cpp | 2 +- src/Processors/QueryPlan/PartialSortingStep.cpp | 2 +- src/Processors/QueryPlan/QueryPlan.cpp | 2 +- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +- src/Processors/QueryPlan/ReadFromPreparedSource.cpp | 2 +- src/Processors/QueryPlan/ReadFromPreparedSource.h | 2 +- src/Processors/QueryPlan/ReadNothingStep.cpp | 2 +- src/Processors/QueryPlan/RollupStep.cpp | 2 +- src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp | 2 +- src/Processors/QueryPlan/TotalsHavingStep.cpp | 2 +- src/Processors/QueryPlan/UnionStep.cpp | 2 +- src/Processors/QueryPlan/WindowStep.cpp | 2 +- src/Processors/Sources/DelayedSource.h | 2 +- src/Processors/Sources/RemoteSource.h | 2 +- src/Processors/Sources/ShellCommandSource.h | 2 +- src/Processors/Transforms/AggregatingTransform.cpp | 2 +- src/Processors/Transforms/CreatingSetsTransform.h | 4 ++-- .../MergingAggregatedMemoryEfficientTransform.cpp | 2 +- src/Processors/Transforms/buildPushingToViewsChain.h | 2 +- src/Processors/Transforms/getSourceFromASTInsertQuery.cpp | 2 +- src/QueryPipeline/BlockIO.h | 2 +- src/{Processors => QueryPipeline}/Chain.cpp | 2 +- src/{Processors => QueryPipeline}/Chain.h | 2 +- src/{Processors => QueryPipeline}/Pipe.cpp | 2 +- src/{Processors => QueryPipeline}/Pipe.h | 4 ++-- .../PipelineResourcesHolder.cpp | 2 +- src/{Processors => QueryPipeline}/PipelineResourcesHolder.h | 0 src/{Processors => QueryPipeline}/QueryPipeline.cpp | 6 +++--- src/{Processors => QueryPipeline}/QueryPipeline.h | 2 +- src/{Processors => QueryPipeline}/QueryPipelineBuilder.cpp | 2 +- src/{Processors => QueryPipeline}/QueryPipelineBuilder.h | 4 ++-- src/QueryPipeline/RemoteQueryExecutor.cpp | 2 +- src/QueryPipeline/narrowBlockInputStreams.cpp | 2 +- src/{Processors => QueryPipeline}/printPipeline.cpp | 2 +- src/{Processors => QueryPipeline}/printPipeline.h | 0 .../tests/gtest_blocks_size_merging_streams.cpp | 4 ++-- src/QueryPipeline/tests/gtest_check_sorted_stream.cpp | 4 ++-- src/Server/GRPCServer.cpp | 2 +- src/Storages/Distributed/DistributedSink.h | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 4 ++-- src/Storages/IStorage.cpp | 2 +- src/Storages/IStorage.h | 2 +- src/Storages/LiveView/StorageBlocks.h | 2 +- src/Storages/MergeTree/StorageFromMergeTreeDataPart.h | 2 +- src/Storages/PartitionCommands.cpp | 2 +- src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 2 +- src/Storages/ProjectionsDescription.cpp | 2 +- src/Storages/ReadFinalForExternalReplicaStorage.h | 2 +- src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp | 2 +- src/Storages/StorageDictionary.cpp | 2 +- src/Storages/StorageExecutable.cpp | 2 +- src/Storages/StorageExternalDistributed.cpp | 2 +- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageGenerateRandom.cpp | 2 +- src/Storages/StorageInput.cpp | 2 +- src/Storages/StorageInput.h | 2 +- src/Storages/StorageJoin.cpp | 2 +- src/Storages/StorageLog.cpp | 2 +- src/Storages/StorageMaterializedMySQL.cpp | 2 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageMongoDB.cpp | 2 +- src/Storages/StorageMySQL.cpp | 2 +- src/Storages/StorageNull.h | 2 +- src/Storages/StorageProxy.h | 2 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- src/Storages/StorageS3.cpp | 4 ++-- src/Storages/StorageS3Cluster.cpp | 2 +- src/Storages/StorageStripeLog.cpp | 2 +- src/Storages/StorageTableFunction.h | 2 +- src/Storages/StorageTinyLog.cpp | 2 +- src/Storages/StorageURL.cpp | 2 +- src/Storages/StorageValues.cpp | 2 +- src/Storages/StorageView.cpp | 2 +- src/Storages/StorageXDBC.cpp | 2 +- src/Storages/System/IStorageSystemOneBlock.h | 2 +- src/Storages/System/StorageSystemNumbers.cpp | 2 +- src/Storages/System/StorageSystemOne.cpp | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- src/Storages/System/StorageSystemZeros.cpp | 2 +- src/Storages/tests/gtest_storage_log.cpp | 4 ++-- 141 files changed, 154 insertions(+), 154 deletions(-) rename src/{Processors => QueryPipeline}/Chain.cpp (99%) rename src/{Processors => QueryPipeline}/Chain.h (97%) rename src/{Processors => QueryPipeline}/Pipe.cpp (99%) rename src/{Processors => QueryPipeline}/Pipe.h (98%) rename src/{Processors => QueryPipeline}/PipelineResourcesHolder.cpp (94%) rename src/{Processors => QueryPipeline}/PipelineResourcesHolder.h (100%) rename src/{Processors => QueryPipeline}/QueryPipeline.cpp (99%) rename src/{Processors => QueryPipeline}/QueryPipeline.h (98%) rename src/{Processors => QueryPipeline}/QueryPipelineBuilder.cpp (99%) rename src/{Processors => QueryPipeline}/QueryPipelineBuilder.h (99%) rename src/{Processors => QueryPipeline}/printPipeline.cpp (99%) rename src/{Processors => QueryPipeline}/printPipeline.h (100%) diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index e7aeea8cbad..5b21a7c2aef 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/programs/library-bridge/Handlers.cpp b/programs/library-bridge/Handlers.cpp index abc5118baad..bf9ace679ba 100644 --- a/programs/library-bridge/Handlers.cpp +++ b/programs/library-bridge/Handlers.cpp @@ -11,11 +11,11 @@ #include #include #include -#include +#include #include #include #include -#include +#include #include #include diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index f18281a9fdf..caccc726923 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 6362c7dfa9b..82d1bd61c24 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Bridge/LibraryBridgeHelper.cpp b/src/Bridge/LibraryBridgeHelper.cpp index 9bc14627ac3..e5c6c09ba62 100644 --- a/src/Bridge/LibraryBridgeHelper.cpp +++ b/src/Bridge/LibraryBridgeHelper.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index a226407e276..21d605e7b45 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 5841c6136af..3c920e8cabe 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -25,8 +25,8 @@ #include "Core/Block.h" #include #include -#include -#include +#include +#include #include #include #include diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 7424afc969d..cb76c7ff639 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -8,7 +8,7 @@ #include -#include +#include #include #include diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 4dd8b0cf016..b4adbcc0662 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 83fb101dba8..71b0c2ec6ea 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -13,7 +13,7 @@ # include # include # include -# include +# include # include # include # include diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index ab144761e11..851c9bc74bd 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index 8de42760dc2..0facdfc20be 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index d2c1195c0c5..86481d9fd84 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -9,7 +9,7 @@ # include # include # include -# include +# include # include # include # include diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 7683f9d4244..f7e9ce6624c 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include namespace ProfileEvents { diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index a19eca1fee7..a5a04d277da 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 6266bd2cf4f..65c40898983 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 03d3b579ec3..f4f33439638 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include namespace DB diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 532c68d0453..a7cf69bb14d 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/IDictionarySource.h b/src/Dictionaries/IDictionarySource.h index 661f5b8eeb8..5071b69d2bf 100644 --- a/src/Dictionaries/IDictionarySource.h +++ b/src/Dictionaries/IDictionarySource.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 5fabe9cf287..f6de6ca0cc1 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -11,8 +11,8 @@ #include "registerDictionaries.h" #include #include -#include -#include +#include +#include #include diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index 8d0f0f1abc4..0a9ba1f5ea8 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index a3324b7d769..c9fb8b86b77 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -11,7 +11,7 @@ #include #include "readInvalidateQuery.h" #include -#include +#include #include #endif diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index b0be90c26a5..ba993ec5783 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -16,7 +16,7 @@ #include "readInvalidateQuery.h" #include "registerDictionaries.h" #include -#include +#include #include diff --git a/src/Dictionaries/readInvalidateQuery.cpp b/src/Dictionaries/readInvalidateQuery.cpp index c2c42eece58..370e1457832 100644 --- a/src/Dictionaries/readInvalidateQuery.cpp +++ b/src/Dictionaries/readInvalidateQuery.cpp @@ -1,5 +1,5 @@ #include "readInvalidateQuery.h" -#include +#include #include #include #include diff --git a/src/Formats/TemporaryFileStream.h b/src/Formats/TemporaryFileStream.h index 5a1e0bc870a..4a2aa2d55e0 100644 --- a/src/Formats/TemporaryFileStream.h +++ b/src/Formats/TemporaryFileStream.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/src/Formats/formatBlock.cpp b/src/Formats/formatBlock.cpp index 3284663fc42..d2b401207aa 100644 --- a/src/Formats/formatBlock.cpp +++ b/src/Formats/formatBlock.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 7e0fa2ba003..cc04c5f013f 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 95b279fd59b..0db07267231 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 487fa2538c2..3afb1e1fbb0 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 59fd1009381..5dbde2344d3 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 7789c74d596..7fb9f1a3ceb 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 3d0813579ce..5f08ae6b845 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index ebec58dcca7..1945824636f 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/SortedBlocksWriter.h b/src/Interpreters/SortedBlocksWriter.h index ac58ef2ab7b..0262a274c68 100644 --- a/src/Interpreters/SortedBlocksWriter.h +++ b/src/Interpreters/SortedBlocksWriter.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 45e1c580f01..c3b8cc5c677 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index be388386e9d..a4e3dea89fa 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 0616a392027..ec07cee8738 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 80ed8225c79..fdddfdef2a4 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index ad7da63b0b1..a9c73b9f8fb 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index a1a0755f952..0b6d5334716 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp index 2e2b5e9ca1e..c1e851e3425 100644 --- a/src/Processors/Executors/PushingPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 023f9016cc4..53036360b8d 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ArrayJoinStep.cpp b/src/Processors/QueryPlan/ArrayJoinStep.cpp index 35b974baa83..3ca5b9109e6 100644 --- a/src/Processors/QueryPlan/ArrayJoinStep.cpp +++ b/src/Processors/QueryPlan/ArrayJoinStep.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 83a4c291bf2..45c3719ebca 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 3d61d3ef36b..23c5115ec68 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/DistinctStep.cpp b/src/Processors/QueryPlan/DistinctStep.cpp index d53d1fa9310..5aeb33fdc7b 100644 --- a/src/Processors/QueryPlan/DistinctStep.cpp +++ b/src/Processors/QueryPlan/DistinctStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index d1b9c2cad63..33d2ad6e1cf 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ExtremesStep.cpp b/src/Processors/QueryPlan/ExtremesStep.cpp index 117ccd414ca..4524b9883d6 100644 --- a/src/Processors/QueryPlan/ExtremesStep.cpp +++ b/src/Processors/QueryPlan/ExtremesStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index 204559ecc3b..223892aa528 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 483055810cf..df75c37dc97 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/FinishSortingStep.cpp b/src/Processors/QueryPlan/FinishSortingStep.cpp index c219c09f3bd..6347b69901c 100644 --- a/src/Processors/QueryPlan/FinishSortingStep.cpp +++ b/src/Processors/QueryPlan/FinishSortingStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ISourceStep.cpp b/src/Processors/QueryPlan/ISourceStep.cpp index 61c0a9254cd..0644d9b44eb 100644 --- a/src/Processors/QueryPlan/ISourceStep.cpp +++ b/src/Processors/QueryPlan/ISourceStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/ITransformingStep.cpp b/src/Processors/QueryPlan/ITransformingStep.cpp index 1c7f836378f..629fb89be1e 100644 --- a/src/Processors/QueryPlan/ITransformingStep.cpp +++ b/src/Processors/QueryPlan/ITransformingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index a4d81e69fe0..c031303cc7f 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 9c5f8ae2e5f..494a2a6aa0e 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/LimitByStep.cpp b/src/Processors/QueryPlan/LimitByStep.cpp index 12ad933a159..39086e995fc 100644 --- a/src/Processors/QueryPlan/LimitByStep.cpp +++ b/src/Processors/QueryPlan/LimitByStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/LimitStep.cpp b/src/Processors/QueryPlan/LimitStep.cpp index 3db59e0684a..8c5e3e3c87c 100644 --- a/src/Processors/QueryPlan/LimitStep.cpp +++ b/src/Processors/QueryPlan/LimitStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/MergeSortingStep.cpp b/src/Processors/QueryPlan/MergeSortingStep.cpp index 820bbc31b74..534f05a4d6e 100644 --- a/src/Processors/QueryPlan/MergeSortingStep.cpp +++ b/src/Processors/QueryPlan/MergeSortingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp index d02be59ae84..8dfb9f9c923 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp +++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/MergingSortedStep.cpp b/src/Processors/QueryPlan/MergingSortedStep.cpp index 87d1af4d2bd..ed1f24db55b 100644 --- a/src/Processors/QueryPlan/MergingSortedStep.cpp +++ b/src/Processors/QueryPlan/MergingSortedStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/OffsetStep.cpp b/src/Processors/QueryPlan/OffsetStep.cpp index b48327eb36c..e0c70ba2f28 100644 --- a/src/Processors/QueryPlan/OffsetStep.cpp +++ b/src/Processors/QueryPlan/OffsetStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/PartialSortingStep.cpp b/src/Processors/QueryPlan/PartialSortingStep.cpp index cf7cb157e4c..420e7b583ca 100644 --- a/src/Processors/QueryPlan/PartialSortingStep.cpp +++ b/src/Processors/QueryPlan/PartialSortingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 6fb6a24f65b..f319e562bfb 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 8d3005e725f..cc400aacf2a 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index c8213d58db6..fc8136177cf 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 407b968b728..bb6e814ad9f 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/ReadNothingStep.cpp b/src/Processors/QueryPlan/ReadNothingStep.cpp index 7019b88f0b2..253f3a5b980 100644 --- a/src/Processors/QueryPlan/ReadNothingStep.cpp +++ b/src/Processors/QueryPlan/ReadNothingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include namespace DB diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp index 114fe661c70..acaeb2bc9a7 100644 --- a/src/Processors/QueryPlan/RollupStep.cpp +++ b/src/Processors/QueryPlan/RollupStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp index 47f8187c3aa..4ca3d0ebf54 100644 --- a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp +++ b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include namespace DB diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index db14950b229..be2cd2348a4 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/UnionStep.cpp b/src/Processors/QueryPlan/UnionStep.cpp index 85705595479..5d40a9e241e 100644 --- a/src/Processors/QueryPlan/UnionStep.cpp +++ b/src/Processors/QueryPlan/UnionStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index ca09f4a9474..cd4bb5f6730 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/Sources/DelayedSource.h b/src/Processors/Sources/DelayedSource.h index 7c2b104b61e..3d17c13ad4c 100644 --- a/src/Processors/Sources/DelayedSource.h +++ b/src/Processors/Sources/DelayedSource.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 2109cb5eba4..23c3ddec401 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include namespace DB diff --git a/src/Processors/Sources/ShellCommandSource.h b/src/Processors/Sources/ShellCommandSource.h index 18dbd2e03aa..4974c33f290 100644 --- a/src/Processors/Sources/ShellCommandSource.h +++ b/src/Processors/Sources/ShellCommandSource.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index bf3cafd6ff5..8357a997960 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index 8e7a09b320d..839ab0cac88 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -4,8 +4,8 @@ #include #include #include -#include -#include +#include +#include #include #include diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index df2ea4b03f0..7f0893a6c0d 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h index 6956dedbc41..260fdfb3a19 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.h +++ b/src/Processors/Transforms/buildPushingToViewsChain.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index 57cfa6c01b8..86998614189 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include "IO/CompressionMethod.h" #include "Parsers/ASTLiteral.h" diff --git a/src/QueryPipeline/BlockIO.h b/src/QueryPipeline/BlockIO.h index d699d525f2f..0f05beca4a8 100644 --- a/src/QueryPipeline/BlockIO.h +++ b/src/QueryPipeline/BlockIO.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB diff --git a/src/Processors/Chain.cpp b/src/QueryPipeline/Chain.cpp similarity index 99% rename from src/Processors/Chain.cpp rename to src/QueryPipeline/Chain.cpp index 5e3b2e6a678..ca7b0b80692 100644 --- a/src/Processors/Chain.cpp +++ b/src/QueryPipeline/Chain.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/Chain.h b/src/QueryPipeline/Chain.h similarity index 97% rename from src/Processors/Chain.h rename to src/QueryPipeline/Chain.h index da5167f9c7a..c5fdc34cecf 100644 --- a/src/Processors/Chain.h +++ b/src/QueryPipeline/Chain.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/Pipe.cpp b/src/QueryPipeline/Pipe.cpp similarity index 99% rename from src/Processors/Pipe.cpp rename to src/QueryPipeline/Pipe.cpp index acf46b95346..6cef7cc28bd 100644 --- a/src/Processors/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Processors/Pipe.h b/src/QueryPipeline/Pipe.h similarity index 98% rename from src/Processors/Pipe.h rename to src/QueryPipeline/Pipe.h index 3341734430c..0af02a5e662 100644 --- a/src/Processors/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -1,8 +1,8 @@ #pragma once #include -#include -#include +#include +#include #include #include diff --git a/src/Processors/PipelineResourcesHolder.cpp b/src/QueryPipeline/PipelineResourcesHolder.cpp similarity index 94% rename from src/Processors/PipelineResourcesHolder.cpp rename to src/QueryPipeline/PipelineResourcesHolder.cpp index 9cb2ea301ad..a4b85ed662b 100644 --- a/src/Processors/PipelineResourcesHolder.cpp +++ b/src/QueryPipeline/PipelineResourcesHolder.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB diff --git a/src/Processors/PipelineResourcesHolder.h b/src/QueryPipeline/PipelineResourcesHolder.h similarity index 100% rename from src/Processors/PipelineResourcesHolder.h rename to src/QueryPipeline/PipelineResourcesHolder.h diff --git a/src/Processors/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp similarity index 99% rename from src/Processors/QueryPipeline.cpp rename to src/QueryPipeline/QueryPipeline.cpp index 98ac81f7217..ce1c9473f60 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -1,10 +1,10 @@ #include -#include +#include #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/src/Processors/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h similarity index 98% rename from src/Processors/QueryPipeline.h rename to src/QueryPipeline/QueryPipeline.h index 42fdb429a14..beb46361f95 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include namespace DB diff --git a/src/Processors/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp similarity index 99% rename from src/Processors/QueryPipelineBuilder.cpp rename to src/QueryPipeline/QueryPipelineBuilder.cpp index 08a568b48f1..40c64046560 100644 --- a/src/Processors/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/Processors/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h similarity index 99% rename from src/Processors/QueryPipelineBuilder.h rename to src/QueryPipeline/QueryPipelineBuilder.h index 7e0ddbc0285..d2bbea03ce5 100644 --- a/src/Processors/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -2,8 +2,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index a77a791278a..b01ed7ba9a2 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -7,7 +7,7 @@ #include #include #include "Core/Protocol.h" -#include +#include #include #include #include diff --git a/src/QueryPipeline/narrowBlockInputStreams.cpp b/src/QueryPipeline/narrowBlockInputStreams.cpp index 17e44e38293..19bebe4a0bf 100644 --- a/src/QueryPipeline/narrowBlockInputStreams.cpp +++ b/src/QueryPipeline/narrowBlockInputStreams.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include "narrowBlockInputStreams.h" diff --git a/src/Processors/printPipeline.cpp b/src/QueryPipeline/printPipeline.cpp similarity index 99% rename from src/Processors/printPipeline.cpp rename to src/QueryPipeline/printPipeline.cpp index cbf8cb3a77d..40c88502ed0 100644 --- a/src/Processors/printPipeline.cpp +++ b/src/QueryPipeline/printPipeline.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Processors/printPipeline.h b/src/QueryPipeline/printPipeline.h similarity index 100% rename from src/Processors/printPipeline.h rename to src/QueryPipeline/printPipeline.h diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp index 7625fe8fa09..d39c49bb61c 100644 --- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp @@ -4,10 +4,10 @@ #include #include #include -#include +#include #include #include -#include +#include using namespace DB; diff --git a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp index 04ccc64fd7c..751f7ef8635 100644 --- a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp +++ b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp @@ -5,8 +5,8 @@ #include #include #include -#include -#include +#include +#include #include diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 25085f38889..ba2644e0fba 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 3c0b6333fc1..668cec22e8b 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 12558054d81..6e81f5577ab 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -24,8 +24,8 @@ #include #include #include -#include -#include +#include +#include #include diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 8effb5ed9bf..7c158794caf 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index ec6e58f3fbb..74e17442fe8 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index 6cf7ce59fa2..f010ef50ecd 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 23ea280e88c..729b545e9a0 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/PartitionCommands.cpp b/src/Storages/PartitionCommands.cpp index 917ea156ab7..fa36588513b 100644 --- a/src/Storages/PartitionCommands.cpp +++ b/src/Storages/PartitionCommands.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 4e9e2c450b1..f771b2239ef 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 42294b8152c..19f1d7780b0 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.h b/src/Storages/ReadFinalForExternalReplicaStorage.h index f09a115919d..1be0aa0f4a6 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.h +++ b/src/Storages/ReadFinalForExternalReplicaStorage.h @@ -7,7 +7,7 @@ #if USE_MYSQL || USE_LIBPQXX #include -#include +#include namespace DB diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 3a37e57e008..18bf0e2c19b 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index e305d4c6183..349d894b2f1 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 21784952c23..76be3353808 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index cedb7d26b48..0af8fe6f7df 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 4ae55272db6..7c6543c5519 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index cfec328bbfc..a4dfbfc3f96 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 1138794adb0..2ed7a77b59d 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include namespace DB diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h index bfb634c6bba..b28bc143bb0 100644 --- a/src/Storages/StorageInput.h +++ b/src/Storages/StorageInput.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB { diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index c2c1cff5f53..8ac341661bf 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include /// toLower diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index ecf3da83ea6..41d178fc04d 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -23,7 +23,7 @@ #include "StorageLogSettings.h" #include #include -#include +#include #include #include diff --git a/src/Storages/StorageMaterializedMySQL.cpp b/src/Storages/StorageMaterializedMySQL.cpp index 52f53b9ceee..37a9838593b 100644 --- a/src/Storages/StorageMaterializedMySQL.cpp +++ b/src/Storages/StorageMaterializedMySQL.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 299e39a3836..37cb238ba0f 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index ab42da1dfa0..acc40787fed 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 15430f60285..146fd8b616a 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index a1724af38cb..50d2124958c 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 96e72c77f00..de667c1d75c 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index c81ef6febdc..62e0ccc76aa 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 65e07fa6144..bc03dbb45ae 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index c94c519c3b9..8cbec46d0e0 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include @@ -52,7 +52,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index b78a09ebcc4..9d05235552c 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include "Processors/Sources/SourceWithProgress.h" #include #include diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index cc5eb835438..50113c391cc 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 557f378ab77..0b7ab30fa24 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index d1778342ec5..74d922d5594 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -36,7 +36,7 @@ #include #include -#include +#include #define DBMS_STORAGE_LOG_DATA_FILE_EXTENSION ".bin" diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 9397986fadd..174ee58ee42 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index ace5ca3667c..650782afbba 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index df774554365..9ffea587b97 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -14,7 +14,7 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index a0924896437..a90e21a2edb 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index c074659af2b..6897b4a3a79 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 3a88cc96639..136c2489be2 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include namespace DB diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 7c28f897121..7558ae0ae92 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 254e6f77e0c..f32a609077f 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index d1456d72685..624fc54998c 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 5b891c43aae..f86295cd06b 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -18,8 +18,8 @@ #include #include #include -#include -#include +#include +#include #if !defined(__clang__) # pragma GCC diagnostic push From fa92fd28bf23dd9a0522020d44ee1cb67e08679b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sat, 16 Oct 2021 17:34:31 +0300 Subject: [PATCH 290/438] Update LocalServer.cpp --- programs/local/LocalServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 10c4bf592ca..766123339af 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -129,7 +129,7 @@ bool LocalServer::executeMultiQuery(const String & all_queries_text) { if (current_exception) current_exception->rethrow(); - continue; + return true; } case MultiQueryProcessingStage::EXECUTE_QUERY: { From 3102d24872e1579dd28d2133171cb6e2d4aceccc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 15 Oct 2021 10:38:11 +0300 Subject: [PATCH 291/438] Fix --hung-check in clickhouse-test JSONEachRow cannot be parsed with a simple json.loads(), instead it should be passed to json.loads() line by line. Fixes: #30065 --- tests/clickhouse-test | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index b2a9358371a..7b934844dc4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -109,7 +109,10 @@ def clickhouse_execute_json(base_args, query, timeout=30, settings=None): data = clickhouse_execute_http(base_args, query, timeout, settings, 'JSONEachRow') if not data: return None - return json.loads(data) + rows = [] + for row in data.strip().split(b'\n'): + rows.append(json.loads(row)) + return rows class Terminated(KeyboardInterrupt): From c6e61e9497f708589eaf0602a9727943c015190b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 16 Oct 2021 17:58:19 +0300 Subject: [PATCH 292/438] clickhouse-test: use splitlines() over split('\n') --- tests/clickhouse-test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 7b934844dc4..6512c40c44b 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -110,7 +110,7 @@ def clickhouse_execute_json(base_args, query, timeout=30, settings=None): if not data: return None rows = [] - for row in data.strip().split(b'\n'): + for row in data.strip().splitlines(): rows.append(json.loads(row)) return rows @@ -478,19 +478,19 @@ class TestCase: if os.path.isfile(self.stdout_file): description += ", result:\n\n" - description += '\n'.join(open(self.stdout_file).read().split('\n')[:100]) + description += '\n'.join(open(self.stdout_file).read().splitlines()[:100]) description += '\n' description += "\nstdout:\n{}\n".format(stdout) return TestResult(self.name, TestStatus.FAIL, reason, total_time, description) if stderr: - description += "\n{}\n".format('\n'.join(stderr.split('\n')[:100])) + description += "\n{}\n".format('\n'.join(stderr.splitlines()[:100])) description += "\nstdout:\n{}\n".format(stdout) return TestResult(self.name, TestStatus.FAIL, FailureReason.STDERR, total_time, description) if 'Exception' in stdout: - description += "\n{}\n".format('\n'.join(stdout.split('\n')[:100])) + description += "\n{}\n".format('\n'.join(stdout.splitlines()[:100])) return TestResult(self.name, TestStatus.FAIL, FailureReason.EXCEPTION, total_time, description) if '@@SKIP@@' in stdout: From 612a21cc9775590900c40ebe987da6d9571b2cf2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 16 Oct 2021 17:59:17 +0300 Subject: [PATCH 293/438] clickhouse-test: do not propagate CLICKHOUSE_PORT_HTTP to clickhouse-client --- tests/clickhouse-test | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 6512c40c44b..1e8c2bfb8ad 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1395,7 +1395,6 @@ if __name__ == '__main__': http_port = os.getenv("CLICKHOUSE_PORT_HTTP") if http_port is not None: args.http_port = int(http_port) - args.client += f" --port={http_port}" else: args.http_port = 8123 From 6752be4c4ea3f8d7ee3056f6927d7f966c103de4 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sat, 16 Oct 2021 23:11:45 +0800 Subject: [PATCH 294/438] fix bug and add test --- src/Functions/ReplaceRegexpImpl.h | 16 ++++++++++++++-- .../02100_replaceRegexpAll_bug.reference | 3 +++ .../0_stateless/02100_replaceRegexpAll_bug.sql | 3 +++ 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference create mode 100644 tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 1caced9cbde..7662b747feb 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -96,6 +96,9 @@ struct ReplaceRegexpImpl re2_st::StringPiece matches[max_captures]; size_t start_pos = 0; + bool is_first_match = true; + bool is_start_pos_added_one = false; + while (start_pos < static_cast(input.length())) { /// If no more replacements possible for current string @@ -103,6 +106,9 @@ struct ReplaceRegexpImpl if (searcher.Match(input, start_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) { + if (is_start_pos_added_one) + start_pos -= 1; + const auto & match = matches[0]; size_t bytes_to_copy = (match.data() - input.data()) - start_pos; @@ -110,7 +116,12 @@ struct ReplaceRegexpImpl res_data.resize(res_data.size() + bytes_to_copy); memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); res_offset += bytes_to_copy; - start_pos += bytes_to_copy + (match.length() > 0 ? match.length() : 1); + start_pos += bytes_to_copy + match.length(); + if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) + { + start_pos += 1; + is_start_pos_added_one = true; + } /// Do substitution instructions for (const auto & it : instructions) @@ -129,8 +140,9 @@ struct ReplaceRegexpImpl } } - if (replace_one) /// Stop after match of zero length, to avoid infinite loop. + if (replace_one || !is_first_match) /// Stop after match of zero length, to avoid infinite loop. can_finish_current_string = true; + is_first_match = false; } else can_finish_current_string = true; diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference new file mode 100644 index 00000000000..2bb40778ca6 --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference @@ -0,0 +1,3 @@ +aaaabb +b aaaa +aaaa diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql new file mode 100644 index 00000000000..d0caeacfa0e --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql @@ -0,0 +1,3 @@ +SELECT trim(leading 'b ' FROM 'b aaaabb ') x; +SELECT trim(trailing 'b ' FROM 'b aaaabb ') x; +SELECT trim(both 'b ' FROM 'b aaaabb ') x; From 1541593b2f6cb6e991af22b5a1885f6848f908b1 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sat, 16 Oct 2021 23:28:29 +0800 Subject: [PATCH 295/438] add notes --- src/Functions/ReplaceRegexpImpl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 7662b747feb..a297be42aaf 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -117,6 +117,8 @@ struct ReplaceRegexpImpl memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); res_offset += bytes_to_copy; start_pos += bytes_to_copy + match.length(); + + /// To avoid infinite loop. if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) { start_pos += 1; @@ -140,7 +142,7 @@ struct ReplaceRegexpImpl } } - if (replace_one || !is_first_match) /// Stop after match of zero length, to avoid infinite loop. + if (replace_one || !is_first_match) can_finish_current_string = true; is_first_match = false; } From 434de66a2b6f34eb8f8fc4704eded45bb0763fd0 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sat, 16 Oct 2021 12:59:01 -0300 Subject: [PATCH 296/438] Doc. ArgMax/Min are not supported anymore by SimpleAggregateFunction --- docs/ru/sql-reference/data-types/simpleaggregatefunction.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 7b81c577762..8f47bd0902b 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -21,8 +21,6 @@ - [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) - [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) - [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) -- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md) -- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md) !!! note "Примечание" Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются. From bd2c016cf32eb0ca9649c289baee07032294e480 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sun, 17 Oct 2021 00:33:57 +0800 Subject: [PATCH 297/438] fix bug --- src/Functions/ReplaceRegexpImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index a297be42aaf..b2c5470cd96 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -142,7 +142,7 @@ struct ReplaceRegexpImpl } } - if (replace_one || !is_first_match) + if (replace_one || (!is_first_match && match.length() == 0)) can_finish_current_string = true; is_first_match = false; } From 329d81a0efb7fab70fe91a8da5cd0ee36c47ad82 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sat, 16 Oct 2021 20:03:45 +0300 Subject: [PATCH 298/438] Various fixes to install procedure --- programs/install/Install.cpp | 516 +++++++++++++++++++++-------------- tests/CMakeLists.txt | 22 +- 2 files changed, 323 insertions(+), 215 deletions(-) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index e001d0ceb53..6b2d3a58a22 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -66,6 +66,7 @@ namespace ErrorCodes extern const int CANNOT_OPEN_FILE; extern const int SYSTEM_ERROR; extern const int NOT_ENOUGH_SPACE; + extern const int NOT_IMPLEMENTED; extern const int CANNOT_KILL; } @@ -75,8 +76,17 @@ namespace ErrorCodes #define HILITE "\033[1m" #define END_HILITE "\033[0m" -static constexpr auto CLICKHOUSE_BRIDGE_USER = "clickhouse-bridge"; -static constexpr auto CLICKHOUSE_BRIDGE_GROUP = "clickhouse-bridge"; +#if defined(OS_DARWIN) +static constexpr auto DEFAULT_CLICKHOUSE_SERVER_USER = ""; +static constexpr auto DEFAULT_CLICKHOUSE_SERVER_GROUP = ""; +static constexpr auto DEFAULT_CLICKHOUSE_BRIDGE_USER = ""; +static constexpr auto DEFAULT_CLICKHOUSE_BRIDGE_GROUP = ""; +#else +static constexpr auto DEFAULT_CLICKHOUSE_SERVER_USER = "clickhouse"; +static constexpr auto DEFAULT_CLICKHOUSE_SERVER_GROUP = "clickhouse"; +static constexpr auto DEFAULT_CLICKHOUSE_BRIDGE_USER = "clickhouse-bridge"; +static constexpr auto DEFAULT_CLICKHOUSE_BRIDGE_GROUP = "clickhouse-bridge"; +#endif using namespace DB; namespace po = boost::program_options; @@ -127,36 +137,77 @@ static bool filesEqual(std::string path1, std::string path2) && 0 == memcmp(in1.buffer().begin(), in2.buffer().begin(), in1.buffer().size()); } +static void changeOwnership(const String & file_name, const String & user_name, const String & group_name = {}, bool recursive = true) +{ + if (!user_name.empty() || !group_name.empty()) + { + std::string command = fmt::format("chown {} {}:{} '{}'", (recursive ? "-R" : ""), user_name, group_name, file_name); + fmt::print(" {}\n", command); + executeScript(command); + } +} + +static void createGroup(const String & group_name) +{ + if (!group_name.empty()) + { +#if defined(OS_DARWIN) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unable to create a group in macOS"); +#else + std::string command = fmt::format("groupadd -r {}", group_name); + fmt::print(" {}\n", command); + executeScript(command); +#endif + } +} + +static void createUser(const String & user_name, [[maybe_unused]] const String & group_name) +{ + if (!user_name.empty()) + { +#if defined(OS_DARWIN) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unable to create a user in macOS"); +#else + std::string command = group_name.empty() + ? fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent --user-group {}", user_name) + : fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent -g {} {}", group_name, user_name); + fmt::print(" {}\n", command); + executeScript(command); +#endif + } +} + int mainEntryClickHouseInstall(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("prefix", po::value()->default_value(""), "prefix for all paths") - ("binary-path", po::value()->default_value("/usr/bin"), "where to install binaries") - ("config-path", po::value()->default_value("/etc/clickhouse-server"), "where to install configs") - ("log-path", po::value()->default_value("/var/log/clickhouse-server"), "where to create log directory") - ("data-path", po::value()->default_value("/var/lib/clickhouse"), "directory for data") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ("user", po::value()->default_value("clickhouse"), "clickhouse user to create") - ("group", po::value()->default_value("clickhouse"), "clickhouse group to create") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " install [options]\n"; - std::cout << desc << '\n'; - } - try { + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("binary-path", po::value()->default_value("usr/bin"), "where to install binaries") + ("config-path", po::value()->default_value("etc/clickhouse-server"), "where to install configs") + ("log-path", po::value()->default_value("var/log/clickhouse-server"), "where to create log directory") + ("data-path", po::value()->default_value("var/lib/clickhouse"), "directory for data") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user to create") + ("group", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_GROUP), "clickhouse group to create") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " install [options]\n"; + std::cout << desc << '\n'; + return 1; + } + /// We need to copy binary to the binary directory. /// The binary is currently run. We need to obtain its path from procfs (on Linux). @@ -171,6 +222,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (res != 0) Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary"); + if (path.back() == '\0') + path.pop_back(); + fs::path binary_self_path(path); #else fs::path binary_self_path = "/proc/self/exe"; @@ -186,8 +240,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv) /// TODO An option to link instead of copy - useful for developers. - fs::path prefix = fs::path(options["prefix"].as()); - fs::path bin_dir = prefix / fs::path(options["binary-path"].as()); + fs::path prefix = options["prefix"].as(); + fs::path bin_dir = prefix / options["binary-path"].as(); fs::path main_bin_path = bin_dir / "clickhouse"; fs::path main_bin_tmp_path = bin_dir / "clickhouse.new"; @@ -225,6 +279,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } else { + if (!fs::exists(bin_dir)) + { + fmt::print("Creating binary directory {}.\n", bin_dir.string()); + fs::create_directories(bin_dir); + } + size_t available_space = fs::space(bin_dir).available; if (available_space < binary_size) throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.", @@ -326,34 +386,18 @@ int mainEntryClickHouseInstall(int argc, char ** argv) std::string user = options["user"].as(); std::string group = options["group"].as(); - auto create_group = [](const String & group_name) - { - std::string command = fmt::format("groupadd -r {}", group_name); - fmt::print(" {}\n", command); - executeScript(command); - }; - if (!group.empty()) { fmt::print("Creating clickhouse group if it does not exist.\n"); - create_group(group); + createGroup(group); } else - fmt::print("Will not create clickhouse group"); - - auto create_user = [](const String & user_name, const String & group_name) - { - std::string command = group_name.empty() - ? fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent --user-group {}", user_name) - : fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent -g {} {}", group_name, user_name); - fmt::print(" {}\n", command); - executeScript(command); - }; + fmt::print("Will not create a dedicated clickhouse group.\n"); if (!user.empty()) { fmt::print("Creating clickhouse user if it does not exist.\n"); - create_user(user, group); + createUser(user, group); if (group.empty()) group = user; @@ -361,6 +405,11 @@ int mainEntryClickHouseInstall(int argc, char ** argv) /// Setting ulimits. try { +#if defined(OS_DARWIN) + + /// TODO Set ulimits on macOS. + +#else fs::path ulimits_dir = "/etc/security/limits.d"; fs::path ulimits_file = ulimits_dir / fmt::format("{}.conf", user); fmt::print("Will set ulimits for {} user in {}.\n", user, ulimits_file.string()); @@ -374,16 +423,15 @@ int mainEntryClickHouseInstall(int argc, char ** argv) out.write(ulimits_content.data(), ulimits_content.size()); out.sync(); out.finalize(); +#endif } catch (...) { std::cerr << "Cannot set ulimits: " << getCurrentExceptionMessage(false) << "\n"; } - - /// TODO Set ulimits on Mac OS X } else - fmt::print("Will not create clickhouse user.\n"); + fmt::print("Will not create a dedicated clickhouse user.\n"); /// Creating configuration files and directories. @@ -400,9 +448,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv) fs::path config_d = config_dir / "config.d"; fs::path users_d = config_dir / "users.d"; - std::string log_path = prefix / options["log-path"].as(); - std::string data_path = prefix / options["data-path"].as(); - std::string pid_path = prefix / options["pid-path"].as(); + fs::path log_path = prefix / options["log-path"].as(); + fs::path data_path = prefix / options["data-path"].as(); + fs::path pid_path = prefix / options["pid-path"].as(); bool has_password_for_default_user = false; @@ -427,10 +475,78 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } else { - WriteBufferFromFile out(main_config_file.string()); - out.write(main_config_content.data(), main_config_content.size()); - out.sync(); - out.finalize(); + { + WriteBufferFromFile out(main_config_file.string()); + out.write(main_config_content.data(), main_config_content.size()); + out.sync(); + out.finalize(); + } + + /// Override the default paths. + + /// Data paths. + { + std::string data_file = config_d / "data-paths.xml"; + WriteBufferFromFile out(data_file); + out << "\n" + " " << data_path.string() << "\n" + " " << (data_path / "tmp").string() << "\n" + " " << (data_path / "user_files").string() << "\n" + " " << (data_path / "format_schemas").string() << "\n" + "\n"; + out.sync(); + out.finalize(); + fmt::print("Data path configuration override is saved to file {}.\n", data_file); + } + + /// Logger. + { + std::string logger_file = config_d / "logger.xml"; + WriteBufferFromFile out(logger_file); + out << "\n" + " \n" + " " << (log_path / "clickhouse-server.log").string() << "\n" + " " << (log_path / "clickhouse-server.err.log").string() << "\n" + " \n" + "\n"; + out.sync(); + out.finalize(); + fmt::print("Log path configuration override is saved to file {}.\n", logger_file); + } + + /// User directories. + { + std::string user_directories_file = config_d / "user-directories.xml"; + WriteBufferFromFile out(user_directories_file); + out << "\n" + " \n" + " \n" + " " << (data_path / "access").string() << "\n" + " \n" + " \n" + "\n"; + out.sync(); + out.finalize(); + fmt::print("User directory path configuration override is saved to file {}.\n", user_directories_file); + } + + /// OpenSSL. + { + std::string openssl_file = config_d / "openssl.xml"; + WriteBufferFromFile out(openssl_file); + out << "\n" + " \n" + " \n" + " " << (config_dir / "server.crt").string() << "\n" + " " << (config_dir / "server.key").string() << "\n" + " " << (config_dir / "dhparam.pem").string() << "\n" + " \n" + " \n" + "\n"; + out.sync(); + out.finalize(); + fmt::print("OpenSSL path configuration override is saved to file {}.\n", openssl_file); + } } } else @@ -443,13 +559,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (configuration->has("path")) { data_path = configuration->getString("path"); - fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path); + fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path.string()); } if (configuration->has("logger.log")) { log_path = fs::path(configuration->getString("logger.log")).remove_filename(); - fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path); + fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path.string()); } } @@ -485,82 +601,44 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } } - auto change_ownership = [](const String & file_name, const String & user_name, const String & group_name) - { - std::string command = fmt::format("chown --recursive {}:{} '{}'", user_name, group_name, file_name); - fmt::print(" {}\n", command); - executeScript(command); - }; - - /// Chmod and chown configs - change_ownership(config_dir.string(), user, group); - - /// Symlink "preprocessed_configs" is created by the server, so "write" is needed. - fs::permissions(config_dir, fs::perms::owner_all, fs::perm_options::replace); - - /// Subdirectories, so "execute" is needed. - if (fs::exists(config_d)) - fs::permissions(config_d, fs::perms::owner_read | fs::perms::owner_exec, fs::perm_options::replace); - if (fs::exists(users_d)) - fs::permissions(users_d, fs::perms::owner_read | fs::perms::owner_exec, fs::perm_options::replace); - - /// Readonly. - if (fs::exists(main_config_file)) - fs::permissions(main_config_file, fs::perms::owner_read, fs::perm_options::replace); - if (fs::exists(users_config_file)) - fs::permissions(users_config_file, fs::perms::owner_read, fs::perm_options::replace); - /// Create directories for data and log. if (fs::exists(log_path)) { - fmt::print("Log directory {} already exists.\n", log_path); + fmt::print("Log directory {} already exists.\n", log_path.string()); } else { - fmt::print("Creating log directory {}.\n", log_path); + fmt::print("Creating log directory {}.\n", log_path.string()); fs::create_directories(log_path); } if (fs::exists(data_path)) { - fmt::print("Data directory {} already exists.\n", data_path); + fmt::print("Data directory {} already exists.\n", data_path.string()); } else { - fmt::print("Creating data directory {}.\n", data_path); + fmt::print("Creating data directory {}.\n", data_path.string()); fs::create_directories(data_path); } if (fs::exists(pid_path)) { - fmt::print("Pid directory {} already exists.\n", pid_path); + fmt::print("Pid directory {} already exists.\n", pid_path.string()); } else { - fmt::print("Creating pid directory {}.\n", pid_path); + fmt::print("Creating pid directory {}.\n", pid_path.string()); fs::create_directories(pid_path); } /// Chmod and chown data and log directories - { - std::string command = fmt::format("chown --recursive {}:{} '{}'", user, group, log_path); - fmt::print(" {}\n", command); - executeScript(command); - } + changeOwnership(log_path, user, group); + changeOwnership(pid_path, user, group); - { - std::string command = fmt::format("chown --recursive {}:{} '{}'", user, group, pid_path); - fmt::print(" {}\n", command); - executeScript(command); - } - - { - /// Not recursive, because there can be a huge number of files and it will be slow. - std::string command = fmt::format("chown {}:{} '{}'", user, group, data_path); - fmt::print(" {}\n", command); - executeScript(command); - } + /// Not recursive, because there can be a huge number of files and it will be slow. + changeOwnership(data_path, user, group, /* recursive= */ false); /// All users are allowed to read pid file (for clickhouse status command). fs::permissions(pid_path, fs::perms::owner_all | fs::perms::group_read | fs::perms::others_read, fs::perm_options::replace); @@ -576,13 +654,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (fs::exists(odbc_bridge_path) || fs::exists(library_bridge_path)) { - create_group(CLICKHOUSE_BRIDGE_GROUP); - create_user(CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP); + createGroup(DEFAULT_CLICKHOUSE_BRIDGE_GROUP); + createUser(DEFAULT_CLICKHOUSE_BRIDGE_USER, DEFAULT_CLICKHOUSE_BRIDGE_GROUP); if (fs::exists(odbc_bridge_path)) - change_ownership(odbc_bridge_path, CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP); + changeOwnership(odbc_bridge_path, DEFAULT_CLICKHOUSE_BRIDGE_USER, DEFAULT_CLICKHOUSE_BRIDGE_GROUP); if (fs::exists(library_bridge_path)) - change_ownership(library_bridge_path, CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP); + changeOwnership(library_bridge_path, DEFAULT_CLICKHOUSE_BRIDGE_USER, DEFAULT_CLICKHOUSE_BRIDGE_GROUP); } bool stdin_is_a_tty = isatty(STDIN_FILENO); @@ -701,6 +779,25 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } } + /// Chmod and chown configs + changeOwnership(config_dir, user, group); + + /// Symlink "preprocessed_configs" is created by the server, so "write" is needed. + fs::permissions(config_dir, fs::perms::owner_all, fs::perm_options::replace); + + /// Subdirectories, so "execute" is needed. + if (fs::exists(config_d)) + fs::permissions(config_d, fs::perms::owner_read | fs::perms::owner_exec, fs::perm_options::replace); + if (fs::exists(users_d)) + fs::permissions(users_d, fs::perms::owner_read | fs::perms::owner_exec, fs::perm_options::replace); + + /// Readonly. + if (fs::exists(main_config_file)) + fs::permissions(main_config_file, fs::perms::owner_read, fs::perm_options::replace); + if (fs::exists(users_config_file)) + fs::permissions(users_config_file, fs::perms::owner_read, fs::perm_options::replace); + + std::string maybe_password; if (has_password_for_default_user) maybe_password = " --password"; @@ -766,11 +863,7 @@ namespace /// All users are allowed to read pid file (for clickhouse status command). fs::permissions(pid_path, fs::perms::owner_all | fs::perms::group_read | fs::perms::others_read, fs::perm_options::replace); - { - std::string command = fmt::format("chown --recursive {} '{}'", user, pid_path.string()); - fmt::print(" {}\n", command); - executeScript(command); - } + changeOwnership(pid_path, user); } std::string command = fmt::format("{} --config-file {} --pid-file {} --daemon", @@ -974,34 +1067,36 @@ namespace int mainEntryClickHouseStart(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("binary-path", po::value()->default_value("/usr/bin"), "directory with binary") - ("config-path", po::value()->default_value("/etc/clickhouse-server"), "directory with configs") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ("user", po::value()->default_value("clickhouse"), "clickhouse user") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " start\n"; - return 1; - } - try { + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") + ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " start\n"; + return 1; + } + std::string user = options["user"].as(); - fs::path executable = fs::path(options["binary-path"].as()) / "clickhouse-server"; - fs::path config = fs::path(options["config-path"].as()) / "config.xml"; - fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; + fs::path prefix = options["prefix"].as(); + fs::path executable = prefix / options["binary-path"].as() / "clickhouse-server"; + fs::path config = prefix / options["config-path"].as() / "config.xml"; + fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; return start(user, executable, config, pid_file); } @@ -1015,28 +1110,30 @@ int mainEntryClickHouseStart(int argc, char ** argv) int mainEntryClickHouseStop(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ("force", po::bool_switch(), "Stop with KILL signal instead of TERM") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " stop\n"; - return 1; - } - try { - fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ("force", po::bool_switch(), "Stop with KILL signal instead of TERM") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " stop\n"; + return 1; + } + + fs::path prefix = options["prefix"].as(); + fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; return stop(pid_file, options["force"].as()); } @@ -1050,72 +1147,79 @@ int mainEntryClickHouseStop(int argc, char ** argv) int mainEntryClickHouseStatus(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " status\n"; - return 1; - } - try { - fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " status\n"; + return 1; + } + + fs::path prefix = options["prefix"].as(); + fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; + isRunning(pid_file); - return 0; } catch (...) { std::cerr << getCurrentExceptionMessage(false) << '\n'; return getCurrentExceptionCode(); } + + return 0; } int mainEntryClickHouseRestart(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("binary-path", po::value()->default_value("/usr/bin"), "directory with binary") - ("config-path", po::value()->default_value("/etc/clickhouse-server"), "directory with configs") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ("user", po::value()->default_value("clickhouse"), "clickhouse user") - ("force", po::value()->default_value(false), "Stop with KILL signal instead of TERM") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " restart\n"; - return 1; - } - try { + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") + ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") + ("force", po::value()->default_value(false), "Stop with KILL signal instead of TERM") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " restart\n"; + return 1; + } + std::string user = options["user"].as(); - fs::path executable = fs::path(options["binary-path"].as()) / "clickhouse-server"; - fs::path config = fs::path(options["config-path"].as()) / "config.xml"; - fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; + fs::path prefix = options["prefix"].as(); + fs::path executable = prefix / options["binary-path"].as() / "clickhouse-server"; + fs::path config = prefix / options["config-path"].as() / "config.xml"; + fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; if (int res = stop(pid_file, options["force"].as())) return res; + return start(user, executable, config, pid_file); } catch (...) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 132909438da..c9858910837 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,15 +7,19 @@ else () include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake) endif () -install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) -install ( - DIRECTORY queries performance config - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse-test - USE_SOURCE_PERMISSIONS - COMPONENT clickhouse - PATTERN "CMakeLists.txt" EXCLUDE - PATTERN ".gitignore" EXCLUDE -) +option (ENABLE_CLICKHOUSE_TEST "Install clickhouse-test script and relevant tests scenarios" ON) + +if (ENABLE_CLICKHOUSE_TEST) + install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + install ( + DIRECTORY queries performance config + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse-test + USE_SOURCE_PERMISSIONS + COMPONENT clickhouse + PATTERN "CMakeLists.txt" EXCLUDE + PATTERN ".gitignore" EXCLUDE + ) +endif () if (ENABLE_TEST_INTEGRATION) add_subdirectory (integration) From 662b5d40e97c36259edff267595129d0fc28f396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Sat, 16 Oct 2021 20:35:41 +0200 Subject: [PATCH 299/438] Adjust the tests to do less work --- ...k_many_parallel_quorum_inserts_long.reference | 16 ++++++---------- ...09_check_many_parallel_quorum_inserts_long.sh | 12 ++++++------ .../02033_join_engine_deadlock_long.sh | 4 ++-- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.reference b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.reference index 52dea650ebc..e9b7db9d530 100644 --- a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.reference +++ b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.reference @@ -1,10 +1,6 @@ -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 +30 0 54 810 +30 0 54 810 +30 0 54 810 +30 0 54 810 +30 0 54 810 +30 0 54 810 diff --git a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh index a6f151d0f6f..030ae017e71 100755 --- a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh +++ b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-replicated-database, no-parallel +# Tags: long, no-replicated-database # Tag no-replicated-database: Fails due to additional replicas or shards set -e @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -NUM_REPLICAS=10 +NUM_REPLICAS=6 for i in $(seq 1 $NUM_REPLICAS); do $CLICKHOUSE_CLIENT -n -q " @@ -20,17 +20,17 @@ done function thread { i=0 retries=300 while [[ $i -lt $retries ]]; do # server can be dead - $CLICKHOUSE_CLIENT --insert_quorum 5 --insert_quorum_parallel 1 --query "INSERT INTO r$1 SELECT $2" && break + $CLICKHOUSE_CLIENT --insert_quorum 3 --insert_quorum_parallel 1 --query "INSERT INTO r$1 SELECT $2" && break ((++i)) sleep 0.1 done } for i in $(seq 1 $NUM_REPLICAS); do - for j in {0..9}; do + for j in {0..4}; do a=$((($i - 1) * 10 + $j)) - # Note: making 100 connections simultaneously is a mini-DoS when server is build with sanitizers and CI environment is overloaded. + # Note: making 30 connections simultaneously is a mini-DoS when server is build with sanitizers and CI environment is overloaded. # That's why we repeat "socket timeout" errors. thread $i $a 2>&1 | grep -v -P 'SOCKET_TIMEOUT|NETWORK_ERROR|^$' & done @@ -46,5 +46,5 @@ for i in $(seq 1 $NUM_REPLICAS); do done for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS r$i;" + $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS r$i SYNC;" done diff --git a/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh b/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh index 1e670b72fe7..13cf013b53b 100755 --- a/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh +++ b/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, deadlock, no-parallel +# Tags: long, deadlock CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -29,7 +29,7 @@ populate_table_bg () { $CLICKHOUSE_CLIENT --query " INSERT INTO join_block_test SELECT toString(number) as id, number * number as num - FROM system.numbers LIMIT 3000000 + FROM system.numbers LIMIT 500000 " >/dev/null ) & } From d34d752688ec706694a05c5fd0c568c651b57c14 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 16 Oct 2021 18:37:46 +0000 Subject: [PATCH 300/438] Fix tests --- programs/local/LocalServer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index cdd5ae13f99..0c5f64ea913 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -579,6 +579,11 @@ void LocalServer::processConfig() { String path = global_context->getPath(); + /// When tables are loaded from .sql we initialize background executors + /// regardless there are MergeTree tables or not, because no better place was found. + /// In other cases it will be initialized only when there are mergeTree tables. + global_context->initializeBackgroundExecutors(); + /// Lock path directory before read status.emplace(fs::path(path) / "status", StatusFile::write_full_info); From 8b0359584afa84a310f43c37aeb01caf1ec7450a Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 16 Oct 2021 19:08:37 +0000 Subject: [PATCH 301/438] Update test --- tests/queries/0_stateless/01196_max_parser_depth.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01196_max_parser_depth.reference b/tests/queries/0_stateless/01196_max_parser_depth.reference index a72c1b18aa2..072fc270acd 100644 --- a/tests/queries/0_stateless/01196_max_parser_depth.reference +++ b/tests/queries/0_stateless/01196_max_parser_depth.reference @@ -1,3 +1,4 @@ Code: 306 Code: 306 Code: 306 +Code: 306 From 443efe8f5c207689f0575d3c79b03d9589b2d191 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sat, 16 Oct 2021 16:21:18 -0300 Subject: [PATCH 302/438] Update quotas.md --- docs/ru/operations/system-tables/quotas.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/system-tables/quotas.md b/docs/ru/operations/system-tables/quotas.md index 3715bc89596..6c8b5a3eebf 100644 --- a/docs/ru/operations/system-tables/quotas.md +++ b/docs/ru/operations/system-tables/quotas.md @@ -11,7 +11,7 @@ - `[]` — Все пользователи используют одну и ту же квоту. - `['user_name']` — Соединения с одинаковым именем пользователя используют одну и ту же квоту. - `['ip_address']` — Соединения с одинаковым IP-адресом используют одну и ту же квоту. - - `['client_key']` — Соединения с одинаковым ключом используют одну и ту же квоту. Ключ может быть явно задан клиентом. При использовании [clickhouse-client](../../interfaces/cli.md), передайте ключевое значение в параметре `--quota-key`, или используйте параметр `quota_key` файле настроек клиента. В случае использования HTTP интерфейса, используйте заголовок `X-ClickHouse-Quota`. + - `['client_key']` — Соединения с одинаковым ключом используют одну и ту же квоту. Ключ может быть явно задан клиентом. При использовании [clickhouse-client](../../interfaces/cli.md), передайте ключевое значение в параметре `--quota_key`, или используйте параметр `quota_key` файле настроек клиента. В случае использования HTTP интерфейса, используйте заголовок `X-ClickHouse-Quota`. - `['user_name', 'client_key']` — Соединения с одинаковым ключом используют одну и ту же квоту. Если ключ не предоставлен клиентом, то квота отслеживается для `user_name`. - `['client_key', 'ip_address']` — Соединения с одинаковым ключом используют одну и ту же квоту. Если ключ не предоставлен клиентом, то квота отслеживается для `ip_address`. - `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Длины временных интервалов для расчета потребления ресурсов, в секундах. From 5e0e0a3946390736f631a94ebf5b88a0588662d6 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sat, 16 Oct 2021 16:22:19 -0300 Subject: [PATCH 303/438] Update quotas.md --- docs/en/operations/system-tables/quotas.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index d7a73de5046..bdcc13340f0 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -10,7 +10,7 @@ Columns: - `[]` — All users share the same quota. - `['user_name']` — Connections with the same user name share the same quota. - `['ip_address']` — Connections from the same IP share the same quota. - - `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header. + - `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota_key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header. - `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`. - `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`. - `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds. From 4390dde76cfea689602bc4c599dda626cdea3e52 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 16 Oct 2021 19:48:51 +0000 Subject: [PATCH 304/438] Fix local break on timeout --- src/Client/ClientBase.cpp | 3 ++- src/Client/Connection.h | 2 ++ src/Client/IServerConnection.h | 8 ++++++++ src/Client/LocalConnection.h | 2 ++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index e030d8994d5..0af952d666e 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -517,6 +517,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query) const size_t poll_interval = std::max(min_poll_interval, std::min(receive_timeout.totalMicroseconds(), default_poll_interval)); + bool break_on_timeout = connection->getConnectionType() != IServerConnection::Type::LOCAL; while (true) { Stopwatch receive_watch(CLOCK_MONOTONIC_COARSE); @@ -547,7 +548,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query) else { double elapsed = receive_watch.elapsedSeconds(); - if (elapsed > receive_timeout.totalSeconds()) + if (break_on_timeout && elapsed > receive_timeout.totalSeconds()) { std::cout << "Timeout exceeded while receiving data from server." << " Waited for " << static_cast(elapsed) << " seconds," diff --git a/src/Client/Connection.h b/src/Client/Connection.h index b6054941aeb..848b848a104 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -60,6 +60,8 @@ public: ~Connection() override; + IServerConnection::Type getConnectionType() const override { return IServerConnection::Type::SERVER; } + static ServerConnectionPtr createConnection(const ConnectionParameters & parameters, ContextPtr context); /// Set throttler of network traffic. One throttler could be used for multiple connections to limit total traffic. diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 42886c72182..5a853126982 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -56,6 +56,14 @@ class IServerConnection : boost::noncopyable public: virtual ~IServerConnection() = default; + enum class Type + { + SERVER, + LOCAL + }; + + virtual Type getConnectionType() const = 0; + virtual void setDefaultDatabase(const String & database) = 0; virtual void getServerVersion( diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 242d23ddc36..2bd812f200f 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -56,6 +56,8 @@ public: ~LocalConnection() override; + IServerConnection::Type getConnectionType() const override { return IServerConnection::Type::LOCAL; } + static ServerConnectionPtr createConnection(const ConnectionParameters & connection_parameters, ContextPtr current_context, bool send_progress = false); void setDefaultDatabase(const String & database) override; From 5fc2279adf5cc901b37ccc57155857ba70d3ce9d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Oct 2021 01:23:55 +0300 Subject: [PATCH 305/438] Add RISC-V build --- cmake/linux/toolchain-riscv64.cmake | 32 +++++++++++++++++++++++++++++ contrib/sysroot | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 cmake/linux/toolchain-riscv64.cmake diff --git a/cmake/linux/toolchain-riscv64.cmake b/cmake/linux/toolchain-riscv64.cmake new file mode 100644 index 00000000000..1ccbd3ee0da --- /dev/null +++ b/cmake/linux/toolchain-riscv64.cmake @@ -0,0 +1,32 @@ +set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set (CMAKE_SYSTEM_NAME "Linux") +set (CMAKE_SYSTEM_PROCESSOR "riscv64") +set (CMAKE_C_COMPILER_TARGET "riscv64-linux-gnu") +set (CMAKE_CXX_COMPILER_TARGET "riscv64-linux-gnu") +set (CMAKE_ASM_COMPILER_TARGET "riscv64-linux-gnu") + +set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-riscv64") + +set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}") + +find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-13" "llvm-ar-12" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8") +find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-13" "llvm-ranlib-12" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9") + +set (CMAKE_AR "${LLVM_AR_PATH}" CACHE FILEPATH "" FORCE) +set (CMAKE_RANLIB "${LLVM_RANLIB_PATH}" CACHE FILEPATH "" FORCE) + +set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") + +set (LINKER_NAME "ld.lld" CACHE STRING "" FORCE) + +set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld") +set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld") + +set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) +set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) + +set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) +set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) diff --git a/contrib/sysroot b/contrib/sysroot index e4663925b73..6172893931e 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit e4663925b73beb57dd29154844c8d50441146753 +Subproject commit 6172893931e19b028f9cabb7095a44361be863df From ceb98cf42ff20032fadc23698ea031aec7ab4483 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 17 Oct 2021 03:04:53 +0400 Subject: [PATCH 306/438] Add comments --- programs/install/Install.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 6b2d3a58a22..606af7ecd0d 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -77,6 +77,7 @@ namespace ErrorCodes #define END_HILITE "\033[0m" #if defined(OS_DARWIN) +/// Until createUser() and createGroup() are implemented, only sudo-less installations are supported/default for macOS. static constexpr auto DEFAULT_CLICKHOUSE_SERVER_USER = ""; static constexpr auto DEFAULT_CLICKHOUSE_SERVER_GROUP = ""; static constexpr auto DEFAULT_CLICKHOUSE_BRIDGE_USER = ""; @@ -152,6 +153,9 @@ static void createGroup(const String & group_name) if (!group_name.empty()) { #if defined(OS_DARWIN) + + // TODO: implement. + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unable to create a group in macOS"); #else std::string command = fmt::format("groupadd -r {}", group_name); @@ -166,6 +170,9 @@ static void createUser(const String & user_name, [[maybe_unused]] const String & if (!user_name.empty()) { #if defined(OS_DARWIN) + + // TODO: implement. + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unable to create a user in macOS"); #else std::string command = group_name.empty() From b034c913db468b9db95d47ea8882172e54763541 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 15 Oct 2021 14:16:41 +0800 Subject: [PATCH 307/438] update --- contrib/replxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/replxx b/contrib/replxx index 89abeea7516..b0c266c2d8a 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 89abeea7516a2a9b6aad7bfecc132f608ff14a3d +Subproject commit b0c266c2d8a835784181e17292b421848c78c6b8 From 0285a15aeca720c4ecccb16755b9d0bd81de0d45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Oct 2021 05:52:00 +0300 Subject: [PATCH 308/438] Minor changes to install script --- docs/_includes/install/universal.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 7cba682e772..eaea4cc69ed 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -47,13 +47,17 @@ then fi URL="https://builds.clickhouse.com/master/${DIR}/clickhouse" +echo echo "Will download ${URL}" +echo curl -O "${URL}" && chmod a+x clickhouse && +echo echo "Successfully downloaded the ClickHouse binary, you can run it as: ./clickhouse" if [ "${OS}" = "Linux" ] then + echo echo "You can also install it: sudo ./clickhouse install" fi From 131aa7701738cfc057e6f452afb79a424aae7f81 Mon Sep 17 00:00:00 2001 From: WangZengrui Date: Sun, 17 Oct 2021 11:21:58 +0800 Subject: [PATCH 309/438] fix style --- src/Functions/ReplaceRegexpImpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index b2c5470cd96..678189f8558 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -98,7 +98,7 @@ struct ReplaceRegexpImpl size_t start_pos = 0; bool is_first_match = true; bool is_start_pos_added_one = false; - + while (start_pos < static_cast(input.length())) { /// If no more replacements possible for current string @@ -117,7 +117,7 @@ struct ReplaceRegexpImpl memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); res_offset += bytes_to_copy; start_pos += bytes_to_copy + match.length(); - + /// To avoid infinite loop. if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) { From c2faf450d129ac9a81337b96fbb819ef22edf1e7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Oct 2021 06:37:51 +0300 Subject: [PATCH 310/438] Fix error --- src/IO/ReadHelpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index e4452a7af0a..fda8c213ebf 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -281,7 +281,7 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) { /// 123+ or +123+, just stop after 123 or +123. if (has_number) - return ReturnType(true); + goto end; /// No digits read yet, but we already read sign, like ++, -+. if (has_sign) @@ -300,7 +300,7 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) case '-': { if (has_number) - return ReturnType(true); + goto end; if (has_sign) { From 059fc1de6997ca36e8b7b009ff31a175282bd02a Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 17 Oct 2021 11:42:36 +0300 Subject: [PATCH 311/438] Allow symlinks in file storage --- src/Common/filesystemHelpers.cpp | 7 +++-- src/Common/filesystemHelpers.h | 5 ++-- src/Dictionaries/FileDictionarySource.cpp | 2 +- src/Dictionaries/LibraryDictionarySource.cpp | 8 +---- src/Storages/StorageFile.cpp | 10 +++++-- .../02051_symlinks_to_user_files.reference | 1 + .../02051_symlinks_to_user_files.sh | 30 +++++++++++++++++++ 7 files changed, 47 insertions(+), 16 deletions(-) create mode 100644 tests/queries/0_stateless/02051_symlinks_to_user_files.reference create mode 100755 tests/queries/0_stateless/02051_symlinks_to_user_files.sh diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 89214ad496e..f9fe8c97a14 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -118,7 +118,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p return absolute_path.starts_with(absolute_prefix_path); } -bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) +bool fileOrSymlinkPathStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) { /// Differs from pathStartsWith in how `path` is normalized before comparison. /// Make `path` absolute if it was relative and put it into normalized form: remove @@ -140,13 +140,14 @@ bool pathStartsWith(const String & path, const String & prefix_path) return pathStartsWith(filesystem_path, filesystem_prefix_path); } -bool symlinkStartsWith(const String & path, const String & prefix_path) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path) { auto filesystem_path = std::filesystem::path(path); auto filesystem_prefix_path = std::filesystem::path(prefix_path); - return symlinkStartsWith(filesystem_path, filesystem_prefix_path); + return fileOrSymlinkPathStartsWith(filesystem_path, filesystem_prefix_path); } + } diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index de5802cde6d..f626198920e 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -35,8 +35,9 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p /// Returns true if path starts with prefix path bool pathStartsWith(const String & path, const String & prefix_path); -/// Returns true if symlink starts with prefix path -bool symlinkStartsWith(const String & path, const String & prefix_path); +/// Same as pathStartsWith, but wihtout canonization, i.e. allowed to check symlinks. +/// (Path is made absolute and normalized) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path); } diff --git a/src/Dictionaries/FileDictionarySource.cpp b/src/Dictionaries/FileDictionarySource.cpp index 8c1f099f344..7fd2dbf80f1 100644 --- a/src/Dictionaries/FileDictionarySource.cpp +++ b/src/Dictionaries/FileDictionarySource.cpp @@ -31,7 +31,7 @@ FileDictionarySource::FileDictionarySource( , context(context_) { auto user_files_path = context->getUserFilesPath(); - if (created_from_ddl && !pathStartsWith(filepath, user_files_path)) + if (created_from_ddl && !fileOrSymlinkPathStartsWith(filepath, user_files_path)) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path); } diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index f117cfb179e..42683fb884c 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -41,13 +41,7 @@ LibraryDictionarySource::LibraryDictionarySource( , context(Context::createCopy(context_)) { auto dictionaries_lib_path = context->getDictionariesLibPath(); - bool path_checked = false; - if (fs::is_symlink(path)) - path_checked = symlinkStartsWith(path, dictionaries_lib_path); - else - path_checked = pathStartsWith(path, dictionaries_lib_path); - - if (created_from_ddl && !path_checked) + if (created_from_ddl && !fileOrSymlinkPathStartsWith(path, dictionaries_lib_path)) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, dictionaries_lib_path); if (!fs::exists(path)) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 4ae55272db6..24377017987 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -124,8 +125,8 @@ void checkCreationIsAllowed(ContextPtr context_global, const std::string & db_di return; /// "/dev/null" is allowed for perf testing - if (!startsWith(table_path, db_dir_path) && table_path != "/dev/null") - throw Exception("File is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (!fileOrSymlinkPathStartsWith(table_path, db_dir_path) && table_path != "/dev/null") + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File `{}` is not inside `{}`", table_path, db_dir_path); if (fs::exists(table_path) && fs::is_directory(table_path)) throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); @@ -140,7 +141,10 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user fs_table_path = user_files_absolute_path / fs_table_path; Strings paths; - const String path = fs::weakly_canonical(fs_table_path); + /// Do not use fs::canonical or fs::weakly_canonical. + /// Otherwise it will not allow to work with symlinks in `user_files_path` directory. + String path = std::filesystem::absolute(fs_table_path); + path = fs::path(path).lexically_normal(); /// Normalize path. if (path.find_first_of("*?{") == std::string::npos) { std::error_code error; diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.reference b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh new file mode 100755 index 00000000000..3a9882a441c --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# See 01658_read_file_to_string_column.sh +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +mkdir -p "${user_files_path}/" +chmod 777 "${user_files_path}" + +export FILE="test_symlink_${CLICKHOUSE_DATABASE}" + +symlink_path=${user_files_path}/${FILE} +file_path=$CUR_DIR/${FILE} + +function cleanup() +{ + rm ${symlink_path} ${file_path} +} +trap cleanup EXIT + +touch ${file_path} +ln -s ${file_path} ${symlink_path} + +${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'"; +${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')"; + From cd22ca616752c07f1808eeb676c9747693e88a1e Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 13:13:47 +0300 Subject: [PATCH 312/438] Update filesystemHelpers.h --- src/Common/filesystemHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index f626198920e..2b805ce0c68 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -35,7 +35,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p /// Returns true if path starts with prefix path bool pathStartsWith(const String & path, const String & prefix_path); -/// Same as pathStartsWith, but wihtout canonization, i.e. allowed to check symlinks. +/// Same as pathStartsWith, but without canonization, i.e. allowed to check symlinks. /// (Path is made absolute and normalized) bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path); From 07b44713b63b0e5292987f4937435830076dd203 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 13:45:32 +0300 Subject: [PATCH 313/438] Ping CI --- src/Common/filesystemHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index 2b805ce0c68..fc3a4f15573 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -36,7 +36,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p bool pathStartsWith(const String & path, const String & prefix_path); /// Same as pathStartsWith, but without canonization, i.e. allowed to check symlinks. -/// (Path is made absolute and normalized) +/// (Path is made absolute and normalized.) bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path); } From 5633865df159132e65242d9ce09f9f0206174ed8 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 14:16:40 +0300 Subject: [PATCH 314/438] Update src/Storages/StorageFile.cpp Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> --- src/Storages/StorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 24377017987..bdc0c203d59 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -143,7 +143,7 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user Strings paths; /// Do not use fs::canonical or fs::weakly_canonical. /// Otherwise it will not allow to work with symlinks in `user_files_path` directory. - String path = std::filesystem::absolute(fs_table_path); + String path = fs::absolute(fs_table_path); path = fs::path(path).lexically_normal(); /// Normalize path. if (path.find_first_of("*?{") == std::string::npos) { From 5dab184d8b0c2ef1e59d5e194b21316750598e04 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 18:21:43 +0300 Subject: [PATCH 315/438] Update 02051_symlinks_to_user_files.sh --- tests/queries/0_stateless/02051_symlinks_to_user_files.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index 3a9882a441c..7d1fffba74d 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -16,6 +16,8 @@ export FILE="test_symlink_${CLICKHOUSE_DATABASE}" symlink_path=${user_files_path}/${FILE} file_path=$CUR_DIR/${FILE} +chmod +w ${file_path} + function cleanup() { rm ${symlink_path} ${file_path} From 301caa80918f36ce32139d8e6554e314ba494183 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Oct 2021 20:52:21 +0300 Subject: [PATCH 316/438] Update test --- .../0_stateless/01425_decimal_parse_big_negative_exponent.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql index 7f276d1f8d4..1387206b882 100644 --- a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql +++ b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql @@ -1,4 +1,4 @@ -SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 6 } +SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 } SELECT '-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 } SELECT '1E-9' AS x, toDecimal32(x, 0); SELECT '1E-8' AS x, toDecimal32(x, 0); From a8a7ba90056d09dfaa7ab717a992f0535fcddc00 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 17 Oct 2021 22:09:17 +0300 Subject: [PATCH 317/438] Update 02051_symlinks_to_user_files.sh --- tests/queries/0_stateless/02051_symlinks_to_user_files.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index 7d1fffba74d..53c50542b06 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-parallel CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -16,7 +16,9 @@ export FILE="test_symlink_${CLICKHOUSE_DATABASE}" symlink_path=${user_files_path}/${FILE} file_path=$CUR_DIR/${FILE} +touch ${file_path} chmod +w ${file_path} +ln -s ${file_path} ${symlink_path} function cleanup() { @@ -24,9 +26,6 @@ function cleanup() } trap cleanup EXIT -touch ${file_path} -ln -s ${file_path} ${symlink_path} - ${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'"; ${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')"; From eb66442feb7429c7f09f1125f2b82b3f1fc7376d Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 17 Oct 2021 16:21:21 -0300 Subject: [PATCH 318/438] Update external-dicts-dict-layout.md --- .../external-dictionaries/external-dicts-dict-layout.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 30e050ef9ef..c4a359efca4 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -422,7 +422,7 @@ Similar to `cache`, but stores data on SSD and index in RAM. All cache dictionar 1048576 - /var/lib/clickhouse/clickhouse_dictionaries/test_dict + /var/lib/clickhouse/user_files/test_dict ``` @@ -431,7 +431,7 @@ or ``` sql LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 - PATH ./user_files/test_dict)) + PATH '/var/lib/clickhouse/user_files/test_dict')) ``` ### complex_key_ssd_cache {#complex-key-ssd-cache} From 084938fa8c6c1ee7c5c026e8f2b2dbe912fdc6c0 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sun, 17 Oct 2021 16:28:22 -0300 Subject: [PATCH 319/438] Update external-dicts-dict-layout.md --- .../external-dictionaries/external-dicts-dict-layout.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 06fe4ae327a..73736344c57 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -352,7 +352,7 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) 1048576 - /var/lib/clickhouse/clickhouse_dictionaries/test_dict + /var/lib/clickhouse/user_files/test_dict ``` @@ -361,7 +361,7 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) ``` sql LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 - PATH ./user_files/test_dict)) + PATH '/var/lib/clickhouse/user_files/test_dict')) ``` ### complex_key_ssd_cache {#complex-key-ssd-cache} From 9de534f96c751d524f96961d620ae043618e3cdc Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 17 Oct 2021 23:10:01 +0300 Subject: [PATCH 320/438] Revert special contribs and set folder manually for them until fixed --- cmake/find/cxx.cmake | 2 ++ cmake/find/unwind.cmake | 1 + contrib/CMakeLists.txt | 9 --------- contrib/libcxx-cmake/CMakeLists.txt | 1 + contrib/libcxxabi-cmake/CMakeLists.txt | 1 + contrib/libunwind-cmake/CMakeLists.txt | 1 + 6 files changed, 6 insertions(+), 9 deletions(-) diff --git a/cmake/find/cxx.cmake b/cmake/find/cxx.cmake index b96ba1e1b65..b1da125e219 100644 --- a/cmake/find/cxx.cmake +++ b/cmake/find/cxx.cmake @@ -50,6 +50,8 @@ endif () if (NOT HAVE_LIBCXX AND NOT MISSING_INTERNAL_LIBCXX_LIBRARY) set (LIBCXX_LIBRARY cxx) set (LIBCXXABI_LIBRARY cxxabi) + add_subdirectory(contrib/libcxxabi-cmake) + add_subdirectory(contrib/libcxx-cmake) # Exception handling library is embedded into libcxxabi. diff --git a/cmake/find/unwind.cmake b/cmake/find/unwind.cmake index 9ae23ae23c7..c9f5f30a5d6 100644 --- a/cmake/find/unwind.cmake +++ b/cmake/find/unwind.cmake @@ -1,6 +1,7 @@ option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES}) if (USE_UNWIND) + add_subdirectory(contrib/libunwind-cmake) set (UNWIND_LIBRARIES unwind) set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES}) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1be61db40db..676654452de 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -21,15 +21,6 @@ endif() set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) -if (USE_INTERNAL_LIBCXX_LIBRARY AND NOT MISSING_INTERNAL_LIBCXX_LIBRARY) - add_subdirectory(libcxxabi-cmake) - add_subdirectory(libcxx-cmake) -endif () - -if (USE_UNWIND) - add_subdirectory(libunwind-cmake) -endif () - add_subdirectory (abseil-cpp-cmake) add_subdirectory (magic-enum-cmake) add_subdirectory (boost-cmake) diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index ac67f2563a3..2ec6dbff1a1 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -47,6 +47,7 @@ set(SRCS ) add_library(cxx ${SRCS}) +set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") target_include_directories(cxx SYSTEM BEFORE PUBLIC $) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 0bb5d663633..425111d9b26 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -22,6 +22,7 @@ set(SRCS ) add_library(cxxabi ${SRCS}) +set_target_properties(cxxabi PROPERTIES FOLDER "contrib/libcxxabi-cmake") # Third party library may have substandard code. target_compile_options(cxxabi PRIVATE -w) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 1a9f5e50abd..155853a0bca 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -39,6 +39,7 @@ set(LIBUNWIND_SOURCES ${LIBUNWIND_ASM_SOURCES}) add_library(unwind ${LIBUNWIND_SOURCES}) +set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY) From 34810456e1d10510c33241f9db5178914b40edd7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:07:48 +0300 Subject: [PATCH 321/438] Fix alignment for prefetch in AsynchronousReadBufferFromFileDescriptor --- src/IO/AsynchronousReadBufferFromFileDescriptor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.h b/src/IO/AsynchronousReadBufferFromFileDescriptor.h index c64341089d0..50d8f5819fe 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.h +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.h @@ -40,6 +40,7 @@ public: : ReadBufferFromFileBase(buf_size, existing_memory, alignment), reader(std::move(reader_)), priority(priority_), required_alignment(alignment), fd(fd_) { + prefetch_buffer.alignment = alignment; } ~AsynchronousReadBufferFromFileDescriptor() override; From 9cc1178ebc11f0f44e60670762306a4a3f854b0a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:07:48 +0300 Subject: [PATCH 322/438] BufferWithOwnMemory: do not try to align if buffer already aligned --- src/IO/BufferWithOwnMemory.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 1d9267a8518..fe2aa29f46f 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -101,6 +101,9 @@ private: if (!alignment) return value; + if (!(value % alignment)) + return value; + return (value + alignment - 1) / alignment * alignment; } From 348033d8bd8abdb32585c392d7e9f8af98c897ab Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:07:49 +0300 Subject: [PATCH 323/438] BufferWithOwnMemory: make size aligned not capacity This will fix pread_fake_async/pread_threadpool with min_bytes_to_use_direct_io --- src/IO/BufferWithOwnMemory.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index fe2aa29f46f..17bd0df9993 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -88,7 +88,7 @@ struct Memory : boost::noncopyable, Allocator } else { - size_t new_capacity = align(new_size + pad_right, alignment); + size_t new_capacity = align(new_size, alignment) + pad_right; m_data = static_cast(Allocator::realloc(m_data, m_capacity, new_capacity, alignment)); m_capacity = new_capacity; m_size = m_capacity - pad_right; @@ -115,12 +115,10 @@ private: return; } - size_t padded_capacity = m_capacity + pad_right; - ProfileEvents::increment(ProfileEvents::IOBufferAllocs); - ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, padded_capacity); + ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, m_capacity); - size_t new_capacity = align(padded_capacity, alignment); + size_t new_capacity = align(m_capacity, alignment) + pad_right; m_data = static_cast(Allocator::alloc(new_capacity, alignment)); m_capacity = new_capacity; m_size = m_capacity - pad_right; From fb54d6b6e0d38a3ef9fd7635c6ee6a84072728be Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:07:49 +0300 Subject: [PATCH 324/438] BufferWithOwnMemory: take reallocs into account --- src/IO/BufferWithOwnMemory.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 17bd0df9993..0d571d6ae7c 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -89,6 +89,10 @@ struct Memory : boost::noncopyable, Allocator else { size_t new_capacity = align(new_size, alignment) + pad_right; + + size_t diff = new_capacity - m_capacity; + ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, diff); + m_data = static_cast(Allocator::realloc(m_data, m_capacity, new_capacity, alignment)); m_capacity = new_capacity; m_size = m_capacity - pad_right; From 14baed927f65e6e11426683a7d5f03d89bc11b46 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:07:49 +0300 Subject: [PATCH 325/438] Add extensive test for various read settings The following settings had been covered: - min_bytes_to_use_direct_io - local_filesystem_read_method - local_filesystem_read_prefetch - read_priority - max_read_buffer_size --- .../02051_read_settings.reference.j2 | 11 +++++++ .../0_stateless/02051_read_settings.sql.j2 | 31 +++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 tests/queries/0_stateless/02051_read_settings.reference.j2 create mode 100644 tests/queries/0_stateless/02051_read_settings.sql.j2 diff --git a/tests/queries/0_stateless/02051_read_settings.reference.j2 b/tests/queries/0_stateless/02051_read_settings.reference.j2 new file mode 100644 index 00000000000..86aa67a9d2d --- /dev/null +++ b/tests/queries/0_stateless/02051_read_settings.reference.j2 @@ -0,0 +1,11 @@ +{% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] -%} +{% for direct_io in [0, 1] -%} +{% for prefetch in [0, 1] -%} +{% for priority in [0, 1] -%} +{% for buffer_size in [65505, 1048576] -%} +1000000 +{% endfor -%} +{% endfor -%} +{% endfor -%} +{% endfor -%} +{% endfor -%} diff --git a/tests/queries/0_stateless/02051_read_settings.sql.j2 b/tests/queries/0_stateless/02051_read_settings.sql.j2 new file mode 100644 index 00000000000..9f02274e732 --- /dev/null +++ b/tests/queries/0_stateless/02051_read_settings.sql.j2 @@ -0,0 +1,31 @@ +-- Tags: long +-- +-- Test for testing various read settings. + +drop table if exists data_02051; + +create table data_02051 (key Int, value String) engine=MergeTree() order by key +as select number, repeat(toString(number), 5) from numbers(1e6); + +{# check each local_filesystem_read_method #} +{% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] %} +{# check w/ O_DIRECT and w/o (min_bytes_to_use_direct_io) #} +{% for direct_io in [0, 1] %} +{# check local_filesystem_read_prefetch (just a smoke test) #} +{% for prefetch in [0, 1] %} +{# check read_priority (just a smoke test) #} +{% for priority in [0, 1] %} +{# check alignment for O_DIRECT with various max_read_buffer_size #} +{% for buffer_size in [65505, 1048576] %} +select count(ignore(*)) from data_02051 settings + min_bytes_to_use_direct_io={{ direct_io }}, + local_filesystem_read_method='{{ read_method }}', + local_filesystem_read_prefetch={{ prefetch }}, + read_priority={{ priority }}, + max_read_buffer_size={{ buffer_size }} +; +{% endfor %} +{% endfor %} +{% endfor %} +{% endfor %} +{% endfor %} From 0e34a9d550cfe6924fe575871f36c44dd44acdaa Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 17 Oct 2021 23:47:51 +0300 Subject: [PATCH 326/438] Add MSan instrumentation for preadv2 MSan report [1]: ==10672==WARNING: MemorySanitizer: use-of-uninitialized-value 3 0x30dd6005 in DB::CompressionCodecFactory::get(unsigned char) const obj-x86_64-linux-gnu/../src/Compression/CompressionFactory.cpp:105:65 4 0x30dab4d5 in DB::CompressedReadBufferBase::readCompressedData(unsigned long&, unsigned long&, bool) obj-x86_64-linux-gnu/../src/Compression/CompressedReadBufferBase.cpp:127:53 5 0x30db54af in DB::CompressedReadBufferFromFile::readBig(char*, unsigned long) obj-x86_64-linux-gnu/../src/Compression/CompressedReadBufferFromFile.cpp:119:38 Uninitialized value was stored to memory at 0 0x9ef4e99 in __msan_memcpy (/src/ch/tmp/30191/clickhouse-msan+0x9ef4e99) 1 0xa0c1999 in DB::ReadBuffer::read() obj-x86_64-linux-gnu/../src/IO/ReadBuffer.h:173:13 2 0xa0c1999 in DB::ReadBuffer::readStrict() obj-x86_64-linux-gnu/../src/IO/ReadBuffer.h:184:27 3 0x30daafd1 in DB::CompressedReadBufferBase::readCompressedData() obj-x86_64-linux-gnu/../src/Compression/CompressedReadBufferBase.cpp:120:20 4 0x30db54af in DB::CompressedReadBufferFromFile::readBig() obj-x86_64-linux-gnu/../src/Compression/CompressedReadBufferFromFile.cpp:119:38 Uninitialized value was created by a heap allocation 0 0x9ef5285 in posix_memalign (/src/ch/tmp/30191/clickhouse-msan+0x9ef5285) 1 0xa0cb98d in Allocator::allocNoTrack(unsigned long, unsigned long) obj-x86_64-linux-gnu/../src/Common/Allocator.h:235:27 2 0xa0caf0d in Allocator::alloc(unsigned long, unsigned long) obj-x86_64-linux-gnu/../src/Common/Allocator.h:96:16 3 0xa0caf0d in DB::Memory >::alloc() obj-x86_64-linux-gnu/../src/IO/BufferWithOwnMemory.h:126:49 4 0xa0ca4a1 in DB::Memory >::Memory() obj-x86_64-linux-gnu/../src/IO/BufferWithOwnMemory.h:43:9 5 0xa0ca4a1 in DB::BufferWithOwnMemory::BufferWithOwnMemory(unsigned long, char*, unsigned long) obj-x86_64-linux-gnu/../src/IO/BufferWithOwnMemory.h:153:29 6 0xa0ca4a1 in DB::ReadBufferFromFileBase::ReadBufferFromFileBase() obj-x86_64-linux-gnu/../src/IO/ReadBufferFromFileBase.cpp:11:7 7 0xa59d980 in DB::AsynchronousReadBufferFromFileDescriptor::AsynchronousReadBufferFromFileDescriptor() obj-x86_64-linux-gnu/../src/IO/AsynchronousReadBufferFromFileDescriptor.h:40:11 SUMMARY: MemorySanitizer: use-of-uninitialized-value obj-x86_64-linux-gnu/../contrib/libcxx/include/__hash_table:116:10 in std::__1::__constrain_hash(unsigned long, unsigned long) [1]: https://clickhouse-test-reports.s3.yandex.net/30191/18bb0e7d8d47dd694390f3f7b2ecc921a167afac/fuzzer_msan/report.html#fail1 --- src/IO/ThreadPoolReader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/IO/ThreadPoolReader.cpp b/src/IO/ThreadPoolReader.cpp index 514075569f6..701fa759848 100644 --- a/src/IO/ThreadPoolReader.cpp +++ b/src/IO/ThreadPoolReader.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -151,6 +152,7 @@ std::future ThreadPoolReader::submit(Request reques else { bytes_read += res; + __msan_unpoison(request.buf, res); } } From f032fb71f0903e181e92479803a88fb482adbec1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Oct 2021 02:09:22 +0300 Subject: [PATCH 327/438] Minor modification in hardware benchmark --- benchmark/hardware.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index 76328e1509d..69e05cf804b 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -13,6 +13,7 @@ TRIES=3 AMD64_BIN_URL="https://builds.clickhouse.com/master/amd64/clickhouse" AARCH64_BIN_URL="https://builds.clickhouse.com/master/aarch64/clickhouse" +POWERPC64_BIN_URL="https://builds.clickhouse.com/master/ppc64le/clickhouse" # Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'. @@ -38,6 +39,8 @@ if [[ ! -f clickhouse ]]; then $FASTER_DOWNLOAD "$AMD64_BIN_URL" elif [[ $CPU == aarch64 ]]; then $FASTER_DOWNLOAD "$AARCH64_BIN_URL" + elif [[ $CPU == powerpc64le ]]; then + $FASTER_DOWNLOAD "$POWERPC64_BIN_URL" else echo "Unsupported CPU type: $CPU" exit 1 @@ -52,7 +55,7 @@ fi if [[ ! -d data ]]; then if [[ ! -f $DATASET ]]; then - $FASTER_DOWNLOAD "https://clickhouse-datasets.s3.yandex.net/hits/partitions/$DATASET" + $FASTER_DOWNLOAD "https://datasets.clickhouse.com/hits/partitions/$DATASET" fi tar $TAR_PARAMS --strip-components=1 --directory=. -x -v -f $DATASET From 2cef9983fa9f9d630055028a2350bb778b7618f4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 18 Oct 2021 02:29:04 +0300 Subject: [PATCH 328/438] Remove recursive submodules --- cmake/find/amqpcpp.cmake | 2 +- cmake/find/avro.cmake | 2 +- cmake/find/base64.cmake | 4 ++-- cmake/find/brotli.cmake | 2 +- cmake/find/bzip2.cmake | 2 +- cmake/find/capnp.cmake | 2 +- cmake/find/cassandra.cmake | 2 +- cmake/find/cxx.cmake | 2 +- cmake/find/cyrus-sasl.cmake | 2 +- cmake/find/datasketches.cmake | 2 +- cmake/find/fast_float.cmake | 2 +- cmake/find/fastops.cmake | 2 +- cmake/find/grpc.cmake | 2 +- cmake/find/gtest.cmake | 2 +- cmake/find/h3.cmake | 2 +- cmake/find/hdfs3.cmake | 2 +- cmake/find/icu.cmake | 2 +- cmake/find/krb5.cmake | 2 +- cmake/find/ldap.cmake | 2 +- cmake/find/libgsasl.cmake | 2 +- cmake/find/libpqxx.cmake | 4 ++-- cmake/find/libprotobuf-mutator.cmake | 2 +- cmake/find/libuv.cmake | 2 +- cmake/find/libxml2.cmake | 2 +- cmake/find/llvm.cmake | 2 +- cmake/find/msgpack.cmake | 2 +- cmake/find/mysqlclient.cmake | 2 +- cmake/find/nanodbc.cmake | 2 +- cmake/find/nlp.cmake | 6 +++--- cmake/find/nuraft.cmake | 2 +- cmake/find/orc.cmake | 2 +- cmake/find/parquet.cmake | 2 +- cmake/find/protobuf.cmake | 2 +- cmake/find/rapidjson.cmake | 2 +- cmake/find/rdkafka.cmake | 4 ++-- cmake/find/re2.cmake | 2 +- cmake/find/rocksdb.cmake | 2 +- cmake/find/s2geometry.cmake | 2 +- cmake/find/s3.cmake | 2 +- cmake/find/sentry.cmake | 2 +- cmake/find/simdjson.cmake | 2 +- cmake/find/sqlite.cmake | 2 +- cmake/find/ssl.cmake | 2 +- cmake/find/stats.cmake | 4 ++-- cmake/find/xz.cmake | 2 +- cmake/find/yaml-cpp.cmake | 2 +- cmake/find/zlib.cmake | 2 +- cmake/find/zstd.cmake | 2 +- docker/test/fasttest/run.sh | 2 +- docs/en/development/developer-instruction.md | 12 ++++++------ docs/ja/development/developer-instruction.md | 12 ++++++------ docs/ru/development/developer-instruction.md | 12 ++++++------ docs/zh/development/developer-instruction.md | 12 ++++++------ utils/build/build_msvc2017.bat | 14 -------------- 54 files changed, 79 insertions(+), 93 deletions(-) delete mode 100644 utils/build/build_msvc2017.bat diff --git a/cmake/find/amqpcpp.cmake b/cmake/find/amqpcpp.cmake index 05e5d2da751..374e6dd6d7e 100644 --- a/cmake/find/amqpcpp.cmake +++ b/cmake/find/amqpcpp.cmake @@ -10,7 +10,7 @@ if (NOT ENABLE_AMQPCPP) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt") - message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal AMQP-CPP library") set (USE_AMQPCPP 0) return() diff --git a/cmake/find/avro.cmake b/cmake/find/avro.cmake index 74ccda3489f..351fa15d2d3 100644 --- a/cmake/find/avro.cmake +++ b/cmake/find/avro.cmake @@ -13,7 +13,7 @@ option (USE_INTERNAL_AVRO_LIBRARY if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/avro/lang/c++/CMakeLists.txt") if (USE_INTERNAL_AVRO_LIBRARY) - message(WARNING "submodule contrib/avro is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/avro is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot find internal avro") set(USE_INTERNAL_AVRO_LIBRARY 0) endif() diff --git a/cmake/find/base64.cmake b/cmake/find/base64.cmake index acade11eb2f..ee12fbb11ba 100644 --- a/cmake/find/base64.cmake +++ b/cmake/find/base64.cmake @@ -10,11 +10,11 @@ endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/base64/LICENSE") set (MISSING_INTERNAL_BASE64_LIBRARY 1) - message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init") endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/base64") - message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init") else() set (BASE64_LIBRARY base64) set (USE_BASE64 1) diff --git a/cmake/find/brotli.cmake b/cmake/find/brotli.cmake index bf498802922..4b2ee3d6de0 100644 --- a/cmake/find/brotli.cmake +++ b/cmake/find/brotli.cmake @@ -16,7 +16,7 @@ endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include/brotli/decode.h") if (USE_INTERNAL_BROTLI_LIBRARY) - message (WARNING "submodule contrib/brotli is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/brotli is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot find internal brotli") set (USE_INTERNAL_BROTLI_LIBRARY 0) endif () diff --git a/cmake/find/bzip2.cmake b/cmake/find/bzip2.cmake index 15532a67c00..5e6a6fb5841 100644 --- a/cmake/find/bzip2.cmake +++ b/cmake/find/bzip2.cmake @@ -6,7 +6,7 @@ if (NOT ENABLE_BZIP2) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/bzip2/bzlib.h") - message (WARNING "submodule contrib/bzip2 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/bzip2 is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal bzip2 library") set (USE_NLP 0) return() diff --git a/cmake/find/capnp.cmake b/cmake/find/capnp.cmake index ee4735bd175..b0e4cc419f6 100644 --- a/cmake/find/capnp.cmake +++ b/cmake/find/capnp.cmake @@ -11,7 +11,7 @@ option (USE_INTERNAL_CAPNP_LIBRARY "Set to FALSE to use system capnproto library if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/capnproto/CMakeLists.txt") if(USE_INTERNAL_CAPNP_LIBRARY) - message(WARNING "submodule contrib/capnproto is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/capnproto is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "cannot find internal capnproto") set(USE_INTERNAL_CAPNP_LIBRARY 0) endif() diff --git a/cmake/find/cassandra.cmake b/cmake/find/cassandra.cmake index b6e97ff5ef8..7fcbdbb90a5 100644 --- a/cmake/find/cassandra.cmake +++ b/cmake/find/cassandra.cmake @@ -14,7 +14,7 @@ if (APPLE) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") - message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal Cassandra") set (USE_CASSANDRA 0) return() diff --git a/cmake/find/cxx.cmake b/cmake/find/cxx.cmake index b1da125e219..f38ac77b1ea 100644 --- a/cmake/find/cxx.cmake +++ b/cmake/find/cxx.cmake @@ -17,7 +17,7 @@ option (USE_INTERNAL_LIBCXX_LIBRARY "Disable to use system libcxx and libcxxabi if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxx/CMakeLists.txt") if (USE_INTERNAL_LIBCXX_LIBRARY) - message(WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libcxx") set(USE_INTERNAL_LIBCXX_LIBRARY 0) endif() diff --git a/cmake/find/cyrus-sasl.cmake b/cmake/find/cyrus-sasl.cmake index 974b8148fdc..f0c088995b0 100644 --- a/cmake/find/cyrus-sasl.cmake +++ b/cmake/find/cyrus-sasl.cmake @@ -6,7 +6,7 @@ endif() OPTION(ENABLE_CYRUS_SASL "Enable cyrus-sasl" ${DEFAULT_ENABLE_CYRUS_SASL}) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cyrus-sasl/README") - message (WARNING "submodule contrib/cyrus-sasl is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/cyrus-sasl is missing. to fix try run: \n git submodule update --init") set (ENABLE_CYRUS_SASL 0) endif () diff --git a/cmake/find/datasketches.cmake b/cmake/find/datasketches.cmake index 44ef324a9f2..2d7e644890a 100644 --- a/cmake/find/datasketches.cmake +++ b/cmake/find/datasketches.cmake @@ -6,7 +6,7 @@ option (USE_INTERNAL_DATASKETCHES_LIBRARY "Set to FALSE to use system DataSketch if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/CMakeLists.txt") if (USE_INTERNAL_DATASKETCHES_LIBRARY) - message(WARNING "submodule contrib/datasketches-cpp is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/datasketches-cpp is missing. to fix try run: \n git submodule update --init") endif() set(MISSING_INTERNAL_DATASKETCHES_LIBRARY 1) set(USE_INTERNAL_DATASKETCHES_LIBRARY 0) diff --git a/cmake/find/fast_float.cmake b/cmake/find/fast_float.cmake index 4b215c710ad..3e8b7cc5280 100644 --- a/cmake/find/fast_float.cmake +++ b/cmake/find/fast_float.cmake @@ -1,5 +1,5 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/fast_float/fast_float.h") - message (FATAL_ERROR "submodule contrib/fast_float is missing. to fix try run: \n git submodule update --init --recursive") + message (FATAL_ERROR "submodule contrib/fast_float is missing. to fix try run: \n git submodule update --init") endif () set(FAST_FLOAT_LIBRARY fast_float) diff --git a/cmake/find/fastops.cmake b/cmake/find/fastops.cmake index 1675646654e..72426eb5912 100644 --- a/cmake/find/fastops.cmake +++ b/cmake/find/fastops.cmake @@ -10,7 +10,7 @@ if(NOT ENABLE_FASTOPS) endif() if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/fastops/fastops/fastops.h") - message(WARNING "submodule contrib/fastops is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/fastops is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal fastops library") set(MISSING_INTERNAL_FASTOPS_LIBRARY 1) endif() diff --git a/cmake/find/grpc.cmake b/cmake/find/grpc.cmake index 1e440b3b350..f4b280876ef 100644 --- a/cmake/find/grpc.cmake +++ b/cmake/find/grpc.cmake @@ -26,7 +26,7 @@ option(USE_INTERNAL_GRPC_LIBRARY "Set to FALSE to use system gRPC library instea if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/grpc/CMakeLists.txt") if(USE_INTERNAL_GRPC_LIBRARY) - message(WARNING "submodule contrib/grpc is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/grpc is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal grpc") set(USE_INTERNAL_GRPC_LIBRARY 0) endif() diff --git a/cmake/find/gtest.cmake b/cmake/find/gtest.cmake index 9d4ab2608cb..c5f987d7368 100644 --- a/cmake/find/gtest.cmake +++ b/cmake/find/gtest.cmake @@ -4,7 +4,7 @@ option (USE_INTERNAL_GTEST_LIBRARY "Set to FALSE to use system Google Test inste if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest/CMakeLists.txt") if (USE_INTERNAL_GTEST_LIBRARY) - message (WARNING "submodule contrib/googletest is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/googletest is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal gtest") set (USE_INTERNAL_GTEST_LIBRARY 0) endif () diff --git a/cmake/find/h3.cmake b/cmake/find/h3.cmake index 03b6f32fc3c..e692b431e90 100644 --- a/cmake/find/h3.cmake +++ b/cmake/find/h3.cmake @@ -11,7 +11,7 @@ option(USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include/h3Index.h") if(USE_INTERNAL_H3_LIBRARY) - message(WARNING "submodule contrib/h3 is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/h3 is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal h3 library") set(USE_INTERNAL_H3_LIBRARY 0) endif() diff --git a/cmake/find/hdfs3.cmake b/cmake/find/hdfs3.cmake index 3aab2b612ef..aac6b99dfa2 100644 --- a/cmake/find/hdfs3.cmake +++ b/cmake/find/hdfs3.cmake @@ -16,7 +16,7 @@ option(USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of b if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/hdfs.h") if(USE_INTERNAL_HDFS3_LIBRARY) - message(WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal HDFS3 library") set(USE_INTERNAL_HDFS3_LIBRARY 0) endif() diff --git a/cmake/find/icu.cmake b/cmake/find/icu.cmake index 40fb391656d..0b775a68eda 100644 --- a/cmake/find/icu.cmake +++ b/cmake/find/icu.cmake @@ -16,7 +16,7 @@ option (USE_INTERNAL_ICU_LIBRARY "Set to FALSE to use system ICU library instead if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/icu/icu4c/LICENSE") if (USE_INTERNAL_ICU_LIBRARY) - message (WARNING "submodule contrib/icu is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/icu is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ICU") set (USE_INTERNAL_ICU_LIBRARY 0) endif () diff --git a/cmake/find/krb5.cmake b/cmake/find/krb5.cmake index 49b7462b710..24cc51325dc 100644 --- a/cmake/find/krb5.cmake +++ b/cmake/find/krb5.cmake @@ -1,7 +1,7 @@ OPTION(ENABLE_KRB5 "Enable krb5" ${ENABLE_LIBRARIES}) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/krb5/README") - message (WARNING "submodule contrib/krb5 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/krb5 is missing. to fix try run: \n git submodule update --init") set (ENABLE_KRB5 0) endif () diff --git a/cmake/find/ldap.cmake b/cmake/find/ldap.cmake index 71222d26c66..0d14e2c4199 100644 --- a/cmake/find/ldap.cmake +++ b/cmake/find/ldap.cmake @@ -15,7 +15,7 @@ option (USE_INTERNAL_LDAP_LIBRARY "Set to FALSE to use system *LDAP library inst if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/openldap/README") if (USE_INTERNAL_LDAP_LIBRARY) - message (WARNING "Submodule contrib/openldap is missing. To fix try running:\n git submodule update --init --recursive") + message (WARNING "Submodule contrib/openldap is missing. To fix try running:\n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal LDAP library") endif () diff --git a/cmake/find/libgsasl.cmake b/cmake/find/libgsasl.cmake index 3c742af2566..3aec5c0c30a 100644 --- a/cmake/find/libgsasl.cmake +++ b/cmake/find/libgsasl.cmake @@ -16,7 +16,7 @@ endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libgsasl/src/gsasl.h") if (USE_INTERNAL_LIBGSASL_LIBRARY) - message (WARNING "submodule contrib/libgsasl is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libgsasl is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libgsasl") set (USE_INTERNAL_LIBGSASL_LIBRARY 0) endif () diff --git a/cmake/find/libpqxx.cmake b/cmake/find/libpqxx.cmake index f981df19aaa..b2a1e217b10 100644 --- a/cmake/find/libpqxx.cmake +++ b/cmake/find/libpqxx.cmake @@ -5,14 +5,14 @@ if (NOT ENABLE_LIBPQXX) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpqxx/CMakeLists.txt") - message (WARNING "submodule contrib/libpqxx is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libpqxx is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpqxx library") set (USE_LIBPQXX 0) return() endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpq/include") - message (ERROR "submodule contrib/libpq is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/libpq is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpq needed for libpqxx") set (USE_LIBPQXX 0) return() diff --git a/cmake/find/libprotobuf-mutator.cmake b/cmake/find/libprotobuf-mutator.cmake index 8aa595230cd..a308db67c8b 100644 --- a/cmake/find/libprotobuf-mutator.cmake +++ b/cmake/find/libprotobuf-mutator.cmake @@ -7,5 +7,5 @@ endif() set(LibProtobufMutator_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator") if (NOT EXISTS "${LibProtobufMutator_SOURCE_DIR}/README.md") - message (ERROR "submodule contrib/libprotobuf-mutator is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/libprotobuf-mutator is missing. to fix try run: \n git submodule update --init") endif() diff --git a/cmake/find/libuv.cmake b/cmake/find/libuv.cmake index f0023209309..c94dfd50b76 100644 --- a/cmake/find/libuv.cmake +++ b/cmake/find/libuv.cmake @@ -5,7 +5,7 @@ if (OS_DARWIN AND COMPILER_GCC) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv") - message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init") SET(MISSING_INTERNAL_LIBUV_LIBRARY 1) return() endif() diff --git a/cmake/find/libxml2.cmake b/cmake/find/libxml2.cmake index cdf079c33d2..8f7e79d84c9 100644 --- a/cmake/find/libxml2.cmake +++ b/cmake/find/libxml2.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_LIBXML2_LIBRARY "Set to FALSE to use system libxml2 library if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libxml2/libxml.h") if (USE_INTERNAL_LIBXML2_LIBRARY) - message (WARNING "submodule contrib/libxml2 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libxml2 is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libxml") set (USE_INTERNAL_LIBXML2_LIBRARY 0) endif () diff --git a/cmake/find/llvm.cmake b/cmake/find/llvm.cmake index 84ac29991ab..ece5d5434a0 100644 --- a/cmake/find/llvm.cmake +++ b/cmake/find/llvm.cmake @@ -12,7 +12,7 @@ if (NOT ENABLE_EMBEDDED_COMPILER) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/CMakeLists.txt") - message (${RECONFIGURE_MESSAGE_LEVEL} "submodule /contrib/llvm is missing. to fix try run: \n git submodule update --init --recursive") + message (${RECONFIGURE_MESSAGE_LEVEL} "submodule /contrib/llvm is missing. to fix try run: \n git submodule update --init") endif () set (USE_EMBEDDED_COMPILER 1) diff --git a/cmake/find/msgpack.cmake b/cmake/find/msgpack.cmake index 130aa007ad5..c15fedd0e30 100644 --- a/cmake/find/msgpack.cmake +++ b/cmake/find/msgpack.cmake @@ -11,7 +11,7 @@ option (USE_INTERNAL_MSGPACK_LIBRARY "Set to FALSE to use system msgpack library if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include/msgpack.hpp") if(USE_INTERNAL_MSGPACK_LIBRARY) - message(WARNING "Submodule contrib/msgpack-c is missing. To fix try run: \n git submodule update --init --recursive") + message(WARNING "Submodule contrib/msgpack-c is missing. To fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal msgpack") set(USE_INTERNAL_MSGPACK_LIBRARY 0) endif() diff --git a/cmake/find/mysqlclient.cmake b/cmake/find/mysqlclient.cmake index 634681d98f6..0af03676d71 100644 --- a/cmake/find/mysqlclient.cmake +++ b/cmake/find/mysqlclient.cmake @@ -16,7 +16,7 @@ option(USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient librar if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mariadb-connector-c/README") if(USE_INTERNAL_MYSQL_LIBRARY) - message(WARNING "submodule contrib/mariadb-connector-c is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/mariadb-connector-c is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal mysql library") set(USE_INTERNAL_MYSQL_LIBRARY 0) endif() diff --git a/cmake/find/nanodbc.cmake b/cmake/find/nanodbc.cmake index 894a2a60bad..d48e294c9e5 100644 --- a/cmake/find/nanodbc.cmake +++ b/cmake/find/nanodbc.cmake @@ -7,7 +7,7 @@ if (NOT USE_INTERNAL_NANODBC_LIBRARY) endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/CMakeLists.txt") - message (FATAL_ERROR "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init --recursive") + message (FATAL_ERROR "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init") endif() set (NANODBC_LIBRARY nanodbc) diff --git a/cmake/find/nlp.cmake b/cmake/find/nlp.cmake index f1204a85dea..efa9b39ddae 100644 --- a/cmake/find/nlp.cmake +++ b/cmake/find/nlp.cmake @@ -7,21 +7,21 @@ if (NOT ENABLE_NLP) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libstemmer_c/Makefile") - message (WARNING "submodule contrib/libstemmer_c is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libstemmer_c is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libstemmer_c library, NLP functions will be disabled") set (USE_NLP 0) return() endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/wordnet-blast/CMakeLists.txt") - message (WARNING "submodule contrib/wordnet-blast is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/wordnet-blast is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal wordnet-blast library, NLP functions will be disabled") set (USE_NLP 0) return() endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lemmagen-c/README.md") - message (WARNING "submodule contrib/lemmagen-c is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/lemmagen-c is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal lemmagen-c library, NLP functions will be disabled") set (USE_NLP 0) return() diff --git a/cmake/find/nuraft.cmake b/cmake/find/nuraft.cmake index 4e5258e132f..59caa9e7373 100644 --- a/cmake/find/nuraft.cmake +++ b/cmake/find/nuraft.cmake @@ -5,7 +5,7 @@ if (NOT ENABLE_NURAFT) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/CMakeLists.txt") - message (WARNING "submodule contrib/NuRaft is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/NuRaft is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal NuRaft library") set (USE_NURAFT 0) return() diff --git a/cmake/find/orc.cmake b/cmake/find/orc.cmake index 01734224a6a..a5c3f57468a 100644 --- a/cmake/find/orc.cmake +++ b/cmake/find/orc.cmake @@ -18,7 +18,7 @@ include(cmake/find/snappy.cmake) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include/orc/OrcFile.hh") if(USE_INTERNAL_ORC_LIBRARY) - message(WARNING "submodule contrib/orc is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/orc is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ORC") set(USE_INTERNAL_ORC_LIBRARY 0) endif() diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake index eb1b529fbfe..4b56a829df5 100644 --- a/cmake/find/parquet.cmake +++ b/cmake/find/parquet.cmake @@ -20,7 +20,7 @@ endif() if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/CMakeLists.txt") if(USE_INTERNAL_PARQUET_LIBRARY) - message(WARNING "submodule contrib/arrow (required for Parquet) is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/arrow (required for Parquet) is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal parquet library") set(USE_INTERNAL_PARQUET_LIBRARY 0) endif() diff --git a/cmake/find/protobuf.cmake b/cmake/find/protobuf.cmake index eb9fbe3edef..096288fd2ab 100644 --- a/cmake/find/protobuf.cmake +++ b/cmake/find/protobuf.cmake @@ -15,7 +15,7 @@ option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instea if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt") if(USE_INTERNAL_PROTOBUF_LIBRARY) - message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf") set(USE_INTERNAL_PROTOBUF_LIBRARY 0) endif() diff --git a/cmake/find/rapidjson.cmake b/cmake/find/rapidjson.cmake index f880d19143e..62db4695c58 100644 --- a/cmake/find/rapidjson.cmake +++ b/cmake/find/rapidjson.cmake @@ -10,7 +10,7 @@ option(USE_INTERNAL_RAPIDJSON_LIBRARY "Set to FALSE to use system rapidjson libr if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include/rapidjson/rapidjson.h") if(USE_INTERNAL_RAPIDJSON_LIBRARY) - message(WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal rapidjson library") set(USE_INTERNAL_RAPIDJSON_LIBRARY 0) endif() diff --git a/cmake/find/rdkafka.cmake b/cmake/find/rdkafka.cmake index f6460c1d9a3..5b370a42cdc 100644 --- a/cmake/find/rdkafka.cmake +++ b/cmake/find/rdkafka.cmake @@ -11,7 +11,7 @@ option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka inst if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cppkafka/CMakeLists.txt") if(USE_INTERNAL_RDKAFKA_LIBRARY) - message (WARNING "submodule contrib/cppkafka is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/cppkafka is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal cppkafka") set (USE_INTERNAL_RDKAFKA_LIBRARY 0) endif() @@ -20,7 +20,7 @@ endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/CMakeLists.txt") if(USE_INTERNAL_RDKAFKA_LIBRARY OR MISSING_INTERNAL_CPPKAFKA_LIBRARY) - message (WARNING "submodule contrib/librdkafka is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/librdkafka is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal rdkafka") set (USE_INTERNAL_RDKAFKA_LIBRARY 0) endif() diff --git a/cmake/find/re2.cmake b/cmake/find/re2.cmake index 87bc974c788..09240f33f7d 100644 --- a/cmake/find/re2.cmake +++ b/cmake/find/re2.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_RE2_LIBRARY "Set to FALSE to use system re2 library instead if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/re2/CMakeLists.txt") if(USE_INTERNAL_RE2_LIBRARY) - message(WARNING "submodule contrib/re2 is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/re2 is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal re2 library") endif() set(USE_INTERNAL_RE2_LIBRARY 0) diff --git a/cmake/find/rocksdb.cmake b/cmake/find/rocksdb.cmake index 94278a603d7..109eabc271b 100644 --- a/cmake/find/rocksdb.cmake +++ b/cmake/find/rocksdb.cmake @@ -15,7 +15,7 @@ option(USE_INTERNAL_ROCKSDB_LIBRARY "Set to FALSE to use system ROCKSDB library if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rocksdb/CMakeLists.txt") if (USE_INTERNAL_ROCKSDB_LIBRARY) - message (WARNING "submodule contrib is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "cannot find internal rocksdb") endif() set (MISSING_INTERNAL_ROCKSDB 1) diff --git a/cmake/find/s2geometry.cmake b/cmake/find/s2geometry.cmake index 2364c6ba193..348805b342e 100644 --- a/cmake/find/s2geometry.cmake +++ b/cmake/find/s2geometry.cmake @@ -3,7 +3,7 @@ option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) if (ENABLE_S2_GEOMETRY) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/s2geometry") - message (WARNING "submodule contrib/s2geometry is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/s2geometry is missing. to fix try run: \n git submodule update --init") set (ENABLE_S2_GEOMETRY 0) set (USE_S2_GEOMETRY 0) else() diff --git a/cmake/find/s3.cmake b/cmake/find/s3.cmake index a2ed3e416d0..9a10c3f13ef 100644 --- a/cmake/find/s3.cmake +++ b/cmake/find/s3.cmake @@ -23,7 +23,7 @@ if (NOT USE_INTERNAL_AWS_S3_LIBRARY) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3") - message (WARNING "submodule contrib/aws is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/aws is missing. to fix try run: \n git submodule update --init") if (USE_INTERNAL_AWS_S3_LIBRARY) message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal S3 library") endif () diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index a986599abce..4283e75f9ef 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -2,7 +2,7 @@ set (SENTRY_LIBRARY "sentry") set (SENTRY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native/include") if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") - message (WARNING "submodule contrib/sentry-native is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/sentry-native is missing. to fix try run: \n git submodule update --init") if (USE_SENTRY) message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal sentry library") endif() diff --git a/cmake/find/simdjson.cmake b/cmake/find/simdjson.cmake index cffe20bdb2d..bf22a331f04 100644 --- a/cmake/find/simdjson.cmake +++ b/cmake/find/simdjson.cmake @@ -1,7 +1,7 @@ option (USE_SIMDJSON "Use simdjson" ${ENABLE_LIBRARIES}) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson.h") - message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init") if (USE_SIMDJSON) message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal simdjson library") endif() diff --git a/cmake/find/sqlite.cmake b/cmake/find/sqlite.cmake index cfa33fdebbb..083a9faea59 100644 --- a/cmake/find/sqlite.cmake +++ b/cmake/find/sqlite.cmake @@ -5,7 +5,7 @@ if (NOT ENABLE_SQLITE) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/sqlite-amalgamation/sqlite3.c") - message (WARNING "submodule contrib/sqlite3-amalgamation is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/sqlite3-amalgamation is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal sqlite library") set (USE_SQLITE 0) return() diff --git a/cmake/find/ssl.cmake b/cmake/find/ssl.cmake index fdc0bfb27d3..fb411b93593 100644 --- a/cmake/find/ssl.cmake +++ b/cmake/find/ssl.cmake @@ -13,7 +13,7 @@ option(USE_INTERNAL_SSL_LIBRARY "Set to FALSE to use system *ssl library instead if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boringssl/README.md") if(USE_INTERNAL_SSL_LIBRARY) - message(WARNING "submodule contrib/boringssl is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/boringssl is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ssl library") endif() set(USE_INTERNAL_SSL_LIBRARY 0) diff --git a/cmake/find/stats.cmake b/cmake/find/stats.cmake index dea108ed920..589da1603d5 100644 --- a/cmake/find/stats.cmake +++ b/cmake/find/stats.cmake @@ -2,11 +2,11 @@ option(ENABLE_STATS "Enable StatsLib library" ${ENABLE_LIBRARIES}) if (ENABLE_STATS) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/stats") - message (WARNING "submodule contrib/stats is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/stats is missing. to fix try run: \n git submodule update --init") set (ENABLE_STATS 0) set (USE_STATS 0) elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/gcem") - message (WARNING "submodule contrib/gcem is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/gcem is missing. to fix try run: \n git submodule update --init") set (ENABLE_STATS 0) set (USE_STATS 0) else() diff --git a/cmake/find/xz.cmake b/cmake/find/xz.cmake index 0d19859c6b1..f4c230859bc 100644 --- a/cmake/find/xz.cmake +++ b/cmake/find/xz.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_XZ_LIBRARY "Set to OFF to use system xz (lzma) library inst if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/xz/src/liblzma/api/lzma.h") if(USE_INTERNAL_XZ_LIBRARY) - message(WARNING "submodule contrib/xz is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/xz is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal xz (lzma) library") set(USE_INTERNAL_XZ_LIBRARY 0) endif() diff --git a/cmake/find/yaml-cpp.cmake b/cmake/find/yaml-cpp.cmake index 4633d559980..2aba6808e31 100644 --- a/cmake/find/yaml-cpp.cmake +++ b/cmake/find/yaml-cpp.cmake @@ -5,5 +5,5 @@ if (NOT USE_YAML_CPP) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/yaml-cpp/README.md") - message (ERROR "submodule contrib/yaml-cpp is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/yaml-cpp is missing. to fix try run: \n git submodule update --init") endif() diff --git a/cmake/find/zlib.cmake b/cmake/find/zlib.cmake index f66f9e6713d..50a5bc63d15 100644 --- a/cmake/find/zlib.cmake +++ b/cmake/find/zlib.cmake @@ -12,7 +12,7 @@ endif () if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}/zlib.h") if(USE_INTERNAL_ZLIB_LIBRARY) - message(WARNING "submodule contrib/${INTERNAL_ZLIB_NAME} is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/${INTERNAL_ZLIB_NAME} is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal zlib library") endif() set(USE_INTERNAL_ZLIB_LIBRARY 0) diff --git a/cmake/find/zstd.cmake b/cmake/find/zstd.cmake index b12bb701e0e..812e1eed139 100644 --- a/cmake/find/zstd.cmake +++ b/cmake/find/zstd.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_ZSTD_LIBRARY "Set to FALSE to use system zstd library inste if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib/zstd.h") if(USE_INTERNAL_ZSTD_LIBRARY) - message(WARNING "submodule contrib/zstd is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/zstd is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal zstd library") set(USE_INTERNAL_ZSTD_LIBRARY 0) endif() diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index f4b99603554..2e37522f1b4 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -189,7 +189,7 @@ function clone_submodules ) git submodule sync - git submodule update --depth 1 --init --recursive "${SUBMODULES_TO_UPDATE[@]}" + git submodule update --depth 1 --init "${SUBMODULES_TO_UPDATE[@]}" git submodule foreach git reset --hard git submodule foreach git checkout @ -f git submodule foreach git clean -xfd diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index be6a08d397c..024ce27d60d 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -37,7 +37,7 @@ Next, you need to download the source files onto your working machine. This is c In the command line terminal run: - git clone --recursive git@github.com:your_github_username/ClickHouse.git + git clone git@github.com:your_github_username/ClickHouse.git cd ClickHouse Note: please, substitute *your_github_username* with what is appropriate! @@ -79,7 +79,7 @@ After successfully running this command you will be able to pull updates from th Working with submodules in git could be painful. Next commands will help to manage it: - # ! each command accepts --recursive + # ! each command accepts # Update remote URLs for submodules. Barely rare case git submodule sync # Add new submodules @@ -92,16 +92,16 @@ Working with submodules in git could be painful. Next commands will help to mana The next commands would help you to reset all submodules to the initial state (!WARNING! - any changes inside will be deleted): # Synchronizes submodules' remote URL with .gitmodules - git submodule sync --recursive + git submodule sync # Update the registered submodules with initialize not yet initialized - git submodule update --init --recursive + git submodule update --init # Reset all changes done after HEAD git submodule foreach git reset --hard # Clean files from .gitignore git submodule foreach git clean -xfd # Repeat last 4 commands for all submodule - git submodule foreach git submodule sync --recursive - git submodule foreach git submodule update --init --recursive + git submodule foreach git submodule sync + git submodule foreach git submodule update --init git submodule foreach git submodule foreach git reset --hard git submodule foreach git submodule foreach git clean -xfd diff --git a/docs/ja/development/developer-instruction.md b/docs/ja/development/developer-instruction.md index 3eafbf0481d..c95dc0e2ea4 100644 --- a/docs/ja/development/developer-instruction.md +++ b/docs/ja/development/developer-instruction.md @@ -41,7 +41,7 @@ Ubuntuでこれを行うには、コマンドラインターミナルで実行 コマンドラインターミナルで実行: - git clone --recursive git@github.com:your_github_username/ClickHouse.git + git clone git@github.com:your_github_username/ClickHouse.git cd ClickHouse 注:、代理して下さい *your_github_username* 適切なもので! @@ -83,7 +83,7 @@ ClickHouseリポジトリは以下を使用します `submodules`. That is what Gitでサブモジュールを操作するのは苦痛です。 次のコマンドは管理に役立ちます: - # ! each command accepts --recursive + # ! each command accepts # Update remote URLs for submodules. Barely rare case git submodule sync # Add new submodules @@ -96,16 +96,16 @@ Gitでサブモジュールを操作するのは苦痛です。 次のコマン 次のコマンドは、すべてのサブモジュールを初期状態にリセットするのに役立ちます(!ツづツつキツ。 -内部の変更は削除されます): # Synchronizes submodules' remote URL with .gitmodules - git submodule sync --recursive + git submodule sync # Update the registered submodules with initialize not yet initialized - git submodule update --init --recursive + git submodule update --init # Reset all changes done after HEAD git submodule foreach git reset --hard # Clean files from .gitignore git submodule foreach git clean -xfd # Repeat last 4 commands for all submodule - git submodule foreach git submodule sync --recursive - git submodule foreach git submodule update --init --recursive + git submodule foreach git submodule sync + git submodule foreach git submodule update --init git submodule foreach git submodule foreach git reset --hard git submodule foreach git submodule foreach git clean -xfd diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 215a13a465e..8466c709ad1 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -40,7 +40,7 @@ ClickHouse не работает и не собирается на 32-битны Выполните в терминале: - git clone --recursive git@github.com:ClickHouse/ClickHouse.git + git clone git@github.com:ClickHouse/ClickHouse.git cd ClickHouse Замените первое вхождение слова `ClickHouse` в команде для git на имя вашего аккаунта на GitHub. @@ -82,7 +82,7 @@ ClickHouse не работает и не собирается на 32-битны Работа с сабмодулями git может быть достаточно болезненной. Следующие команды позволят содержать их в порядке: - # ! Каждая команда принимает аргумент --recursive + # ! Каждая команда принимает аргумент # Обновить URLs удалённого репозитория для каждого сабмодуля, используется относительно редко git submodule sync # Добавить новые сабмодули @@ -96,16 +96,16 @@ ClickHouse не работает и не собирается на 32-битны # Synchronizes submodules' remote URL with .gitmodules # Обновить URLs удалённого репозитория для каждого сабмодуля - git submodule sync --recursive + git submodule sync # Обновить существующие модули и добавить отсутствующие - git submodule update --init --recursive + git submodule update --init # Удалить все изменения в сабмодуле относительно HEAD git submodule foreach git reset --hard # Очистить игнорируемые файлы git submodule foreach git clean -xfd # Повторить последние 4 команды для каждого из сабмодулей - git submodule foreach git submodule sync --recursive - git submodule foreach git submodule update --init --recursive + git submodule foreach git submodule sync + git submodule foreach git submodule update --init git submodule foreach git submodule foreach git reset --hard git submodule foreach git submodule foreach git clean -xfd diff --git a/docs/zh/development/developer-instruction.md b/docs/zh/development/developer-instruction.md index e37533fb36b..bd7a197f926 100644 --- a/docs/zh/development/developer-instruction.md +++ b/docs/zh/development/developer-instruction.md @@ -29,7 +29,7 @@ ClickHose支持Linux,FreeBSD 及 Mac OS X 系统。 在终端命令行输入下列指令: - git clone --recursive git@guthub.com:your_github_username/ClickHouse.git + git clone git@guthub.com:your_github_username/ClickHouse.git cd ClickHouse 请注意,您需要将*your_github_username* 替换成实际使用的账户名! @@ -71,7 +71,7 @@ ClickHose支持Linux,FreeBSD 及 Mac OS X 系统。 在git中使用子模块可能会很痛苦。 接下来的命令将有助于管理它: - # ! each command accepts --recursive + # ! each command accepts # Update remote URLs for submodules. Barely rare case git submodule sync # Add new submodules @@ -84,16 +84,16 @@ ClickHose支持Linux,FreeBSD 及 Mac OS X 系统。 接下来的命令将帮助您将所有子模块重置为初始状态(!华林! -里面的任何chenges将被删除): # Synchronizes submodules' remote URL with .gitmodules - git submodule sync --recursive + git submodule sync # Update the registered submodules with initialize not yet initialized - git submodule update --init --recursive + git submodule update --init # Reset all changes done after HEAD git submodule foreach git reset --hard # Clean files from .gitignore git submodule foreach git clean -xfd # Repeat last 4 commands for all submodule - git submodule foreach git submodule sync --recursive - git submodule foreach git submodule update --init --recursive + git submodule foreach git submodule sync + git submodule foreach git submodule update --init git submodule foreach git submodule foreach git reset --hard git submodule foreach git submodule foreach git clean -xfd diff --git a/utils/build/build_msvc2017.bat b/utils/build/build_msvc2017.bat deleted file mode 100644 index 880802999ab..00000000000 --- a/utils/build/build_msvc2017.bat +++ /dev/null @@ -1,14 +0,0 @@ - -:: WINDOWS BUILD NOT SUPPORTED! -:: Script only for development - -cd ../.. -git clone --recursive https://github.com/madler/zlib contrib/zlib -md build -cd build - -:: Stage 1: try build client -cmake .. -G "Visual Studio 15 2017 Win64" -DENABLE_CLICKHOUSE_ALL=0 -DENABLE_CLICKHOUSE_CLIENT=1 > cmake.log -cmake --build . --target clickhouse -- /m > build.log -:: Stage 2: try build minimal server -:: Stage 3: enable all possible features (ssl, ...) From ab9d5d8cc789438ab0b01f6b0a4d712e190fed6f Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 18 Oct 2021 06:06:38 +0000 Subject: [PATCH 329/438] Better --- programs/local/LocalServer.cpp | 5 ----- src/Databases/DatabaseOnDisk.cpp | 9 +++++++++ src/Interpreters/loadMetadata.cpp | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 0c5f64ea913..cdd5ae13f99 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -579,11 +579,6 @@ void LocalServer::processConfig() { String path = global_context->getPath(); - /// When tables are loaded from .sql we initialize background executors - /// regardless there are MergeTree tables or not, because no better place was found. - /// In other cases it will be initialized only when there are mergeTree tables. - global_context->initializeBackgroundExecutors(); - /// Lock path directory before read status.emplace(fs::path(path) / "status", StatusFile::write_full_info); diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 97e59f53f64..a71d539e3c5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -53,6 +53,15 @@ std::pair createTableFromAST( ast_create_query.attach = true; ast_create_query.database = database_name; + auto global_context = context->getGlobalContext(); + if (global_context + && global_context->getApplicationType() == Context::ApplicationType::LOCAL + && !global_context->isBackgroundExecutorsInitialized() + && ast_create_query.storage && endsWith(ast_create_query.storage->engine->name, "MergeTree")) + { + global_context->initializeBackgroundExecutors(); + } + if (ast_create_query.as_table_function) { const auto & factory = TableFunctionFactory::instance(); diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 6a3db48e835..65b2065b2ad 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -161,7 +161,7 @@ void loadMetadata(ContextMutablePtr context, const String & default_database_nam bool create_default_db_if_not_exists = !default_database_name.empty(); bool metadata_dir_for_default_db_already_exists = databases.count(default_database_name); if (create_default_db_if_not_exists && !metadata_dir_for_default_db_already_exists) - databases.emplace(default_database_name, path + "/" + escapeForFileName(default_database_name)); + databases.emplace(default_database_name, std::filesystem::path(path) / escapeForFileName(default_database_name)); TablesLoader::Databases loaded_databases; for (const auto & [name, db_path] : databases) From 6d4af3bac1aaeb3c423ad96f19627c700c3f3170 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 18 Oct 2021 10:23:46 +0300 Subject: [PATCH 330/438] Move SquashingTransform to Interpreters (to fix split build) clickhouse_common_io requires clickhouse_core: ld.lld: error: undefined symbol: DB::blocksHaveEqualStructure(DB::Block const&, DB::Block const&) >>> referenced by SquashingTransform.cpp:92 (/src/ch/clickhouse/src/Common/SquashingTransform.cpp:92) >>> src/CMakeFiles/clickhouse_common_io.dir/Common/SquashingTransform.cpp.o:(void DB::SquashingTransform::append(DB::Block&&)) >>> referenced by SquashingTransform.cpp:92 (/src/ch/clickhouse/src/Common/SquashingTransform.cpp:92) >>> src/CMakeFiles/clickhouse_common_io.dir/Common/SquashingTransform.cpp.o:(void DB::SquashingTransform::append(DB::Block const&)) while clickhouse_core requires clickhouse_common_io: "clickhouse_core" of type SHARED_LIBRARY depends on "roaring" (weak) depends on "clickhouse_common_io" (weak) depends on "clickhouse_common_config" (weak) depends on "clickhouse_common_zookeeper" (weak) depends on "clickhouse_dictionaries_embedded" (weak) depends on "clickhouse_parsers" (weak) Follow-up for: #30247 (cc @KochetovNicolai) --- src/{Common => Interpreters}/SquashingTransform.cpp | 3 +-- src/{Common => Interpreters}/SquashingTransform.h | 0 src/Processors/Transforms/SquashingChunksTransform.h | 3 ++- src/Storages/MergeTree/MutateTask.cpp | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) rename src/{Common => Interpreters}/SquashingTransform.cpp (98%) rename src/{Common => Interpreters}/SquashingTransform.h (100%) diff --git a/src/Common/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp similarity index 98% rename from src/Common/SquashingTransform.cpp rename to src/Interpreters/SquashingTransform.cpp index 21fa25ed3af..54d9a1db25e 100644 --- a/src/Common/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,5 +1,4 @@ -#include -#include +#include namespace DB diff --git a/src/Common/SquashingTransform.h b/src/Interpreters/SquashingTransform.h similarity index 100% rename from src/Common/SquashingTransform.h rename to src/Interpreters/SquashingTransform.h diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 45a0f33d666..bf4a051891b 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -1,6 +1,7 @@ #pragma once + #include -#include +#include namespace DB { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index eed8cb3822c..7ca676b04bf 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2,12 +2,12 @@ #include #include +#include +#include #include #include -#include #include #include -#include #include #include #include From ec6b7785015c45a69a9c4224413a19df0a0fe412 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 18 Oct 2021 10:29:57 +0300 Subject: [PATCH 331/438] Update 02051_symlinks_to_user_files.sh --- .../0_stateless/02051_symlinks_to_user_files.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index 53c50542b06..fe3073f9ff2 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -8,17 +8,18 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # See 01658_read_file_to_string_column.sh user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') -mkdir -p "${user_files_path}/" -chmod 777 "${user_files_path}" +FILE_PATH="${user_files_path}/file/" +mkdir -p ${FILE_PATH} +chmod 777 ${FILE_PATH} -export FILE="test_symlink_${CLICKHOUSE_DATABASE}" +FILE="test_symlink_${CLICKHOUSE_DATABASE}" -symlink_path=${user_files_path}/${FILE} +symlink_path=${FILE_PATH}/${FILE} file_path=$CUR_DIR/${FILE} touch ${file_path} -chmod +w ${file_path} ln -s ${file_path} ${symlink_path} +chmod +w ${symlink_path} function cleanup() { From 09b782a52e8f0a558ee5a75474825a6474dde760 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 18 Oct 2021 10:39:52 +0300 Subject: [PATCH 332/438] Increase default wait of the server start in clickhouse-test Set --server-check-retries to 90 (and this is ~45 seconds), since right now sometimes it is not enough [1]. [1]: https://clickhouse-test-reports.s3.yandex.net/30191/0e34a9d550cfe6924fe575871f36c44dd44acdaa/functional_stateless_tests_(memory).html#fail1 And the reason I guess is clickhouse-test had been rewritten to http.client in #30065, and since now it does not need to execute clickhouse-client binary, which in debug/sanitizers builds can take also sometime. That said that with clickhouse-client for hung check it was not 15 seconds, but more (each clickhouse-client requires 0.6sec with sanitizers for simple SELECT 1, while w/o 0.1second, also too much should be optimized) --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 1e8c2bfb8ad..5f6960e57c4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1309,7 +1309,7 @@ if __name__ == '__main__': parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel') parser.add_argument('--test-runs', default=1, nargs='?', type=int, help='Run each test many times (useful for e.g. flaky check)') parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context') - parser.add_argument('-r', '--server-check-retries', default=30, type=int, help='Num of tries to execute SELECT 1 before tests started') + parser.add_argument('-r', '--server-check-retries', default=90, type=int, help='Num of tries to execute SELECT 1 before tests started') parser.add_argument('--db-engine', help='Database engine name') parser.add_argument('--replicated-database', action='store_true', default=False, help='Run tests with Replicated database engine') parser.add_argument('--fast-tests-only', action='store_true', default=False, help='Run only fast tests (the tests without the "no-fasttest" tag)') From 109e664188fb658697282d9919b09ca05d90bd62 Mon Sep 17 00:00:00 2001 From: lhuang0928 Date: Mon, 18 Oct 2021 08:53:25 +0000 Subject: [PATCH 333/438] fix date32 comparison with datetime/datetime64 --- src/DataTypes/getLeastSupertype.cpp | 12 +++++++++--- src/Functions/FunctionsComparison.h | 9 +++++---- .../0_stateless/02098_date32_comparison.reference | 2 ++ .../queries/0_stateless/02098_date32_comparison.sql | 2 ++ 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index b77fcdcdfca..f8d10535be2 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -277,7 +277,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// For Date and DateTime/DateTime64, the common type is DateTime/DateTime64. No other types are compatible. { UInt32 have_date = type_ids.count(TypeIndex::Date); - UInt32 have_date32 = type_ids.count(TypeIndex::Date32); + UInt32 have_date32 = type_ids.count(TypeIndex::Date32); UInt32 have_datetime = type_ids.count(TypeIndex::DateTime); UInt32 have_datetime64 = type_ids.count(TypeIndex::DateTime64); @@ -299,8 +299,8 @@ DataTypePtr getLeastSupertype(const DataTypes & types) return std::make_shared(); } - /// For Date and Date32, the common type is Date32 - if (have_datetime == 0 && have_datetime64 == 0) + /// For Date and Date32, the common type is Date32 + if (have_datetime == 0 && have_datetime64 == 0) { for (const auto & type : types) { @@ -309,6 +309,12 @@ DataTypePtr getLeastSupertype(const DataTypes & types) } } + /// For Datetime and Date32, the common type is Datetime64 + if (have_datetime == 1 && have_date32 == 1 && have_datetime64 == 0) + { + return std::make_shared(0); + } + UInt8 max_scale = 0; size_t max_scale_date_time_index = 0; diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 9b94ac589a3..945090781dc 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1222,8 +1222,8 @@ public: } else if ((isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type))) { - // Comparing Date and DateTime64 requires implicit conversion, - if (date_and_datetime && (isDate(left_type) || isDate(right_type))) + // Comparing Date/Date32 and DateTime64 requires implicit conversion, + if (date_and_datetime && (isDateOrDate32(left_type) || isDateOrDate32(right_type))) { DataTypePtr common_type = getLeastSupertype({left_type, right_type}); ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, common_type); @@ -1247,9 +1247,10 @@ public: ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, common_type); ColumnPtr c1_converted = castColumn(col_with_type_and_name_right, common_type); if (!((res = executeNumLeftType(c0_converted.get(), c1_converted.get())) + || (res = executeNumLeftType(c0_converted.get(), c1_converted.get())) || (res = executeNumLeftType(c0_converted.get(), c1_converted.get())) - || (res = executeNumLeftType(c0_converted.get(), c1_converted.get())))) - throw Exception("Date related common types can only be UInt32 or UInt64", ErrorCodes::LOGICAL_ERROR); + || (res = executeDecimal({c0_converted, common_type, "left"}, {c1_converted, common_type, "right"})))) + throw Exception("Date related common types can only be UInt32/UInt64/Int32/Decimal", ErrorCodes::LOGICAL_ERROR); return res; } else if (left_type->equals(*right_type)) diff --git a/tests/queries/0_stateless/02098_date32_comparison.reference b/tests/queries/0_stateless/02098_date32_comparison.reference index a9e2f17562a..9e1f26e6d5e 100644 --- a/tests/queries/0_stateless/02098_date32_comparison.reference +++ b/tests/queries/0_stateless/02098_date32_comparison.reference @@ -4,3 +4,5 @@ 1 1 1 +1 +1 \ No newline at end of file diff --git a/tests/queries/0_stateless/02098_date32_comparison.sql b/tests/queries/0_stateless/02098_date32_comparison.sql index d0a632977c4..dd4fde790c7 100644 --- a/tests/queries/0_stateless/02098_date32_comparison.sql +++ b/tests/queries/0_stateless/02098_date32_comparison.sql @@ -2,5 +2,7 @@ select toDate32('1990-01-01') = toDate('1990-01-01'); select toDate('1991-01-02') > toDate32('1990-01-01'); select toDate32('1990-01-01') = toDateTime('1990-01-01'); select toDateTime('1991-01-02') > toDate32('1990-01-01'); +select toDate32('1990-01-01') = toDateTime64('1990-01-01',2); +select toDateTime64('1991-01-02',2) > toDate32('1990-01-01'); select toDate32('1990-01-01') = '1990-01-01'; select '1991-01-02' > toDate32('1990-01-01'); \ No newline at end of file From 6d24ca4c3ab3b52b4ef1de7d79064d23f4af45bb Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 18 Oct 2021 12:13:24 +0300 Subject: [PATCH 334/438] Review fixes --- programs/keeper/Keeper.cpp | 2 +- programs/server/Server.cpp | 14 +++- src/Interpreters/Context.cpp | 65 +++++++++++++++++-- src/Interpreters/Context.h | 10 +-- .../test_keeper_three_nodes_two_alive/test.py | 6 ++ 5 files changed, 86 insertions(+), 11 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a3034150219..da3c42a3213 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -358,7 +358,7 @@ int Keeper::main(const std::vector & /*args*/) auto servers = std::make_shared>(); - /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. + /// Initialize keeper RAFT. Do nothing if no keeper_server in config. global_context->initializeKeeperDispatcher(/* start_async = */false); for (const auto & listen_host : listen_hosts) { diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index fac039f2454..22a529df1e0 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -997,9 +997,19 @@ if (ThreadFuzzer::instance().isEffective()) if (config().has("keeper_server")) { #if USE_NURAFT - bool has_connection = has_zookeeper && global_context->tryCheckZooKeeperConnection(); + //// If we don't have configured connection probably someone trying to use clickhouse-server instead + //// of clickhouse-keeper, so start synchronously. + bool can_initialize_keeper_async = false; + + if (has_zookeeper) /// We have configured connection to some zookeeper cluster + { + /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start + /// synchronously. + can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); + } /// Initialize keeper RAFT. - global_context->initializeKeeperDispatcher(has_connection); + global_context->initializeKeeperDispatcher(can_initialize_keeper_async); + for (const auto & listen_host : listen_hosts) { /// TCP Keeper diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7ae531a6266..a61c2669ef2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1806,12 +1806,60 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const return shared->zookeeper; } +namespace +{ -bool Context::tryCheckZooKeeperConnection() const +bool checkZooKeeperConfigIsLocal(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) +{ + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_name, keys); + + for (const auto & key : keys) + { + if (startsWith(key, "node")) + { + String host = config.getString(config_name + "." + key + ".host"); + if (isLocalAddress(DNSResolver::instance().resolveHost(host))) + return true; + } + } + return false; +} + +} + + +bool Context::tryCheckClientConnectionToMyKeeperCluster() const { try { - getZooKeeper(); + /// If our server is part of main Keeper cluster + if (checkZooKeeperConfigIsLocal(getConfigRef(), "zookeeper")) + { + LOG_DEBUG(shared->log, "Keeper server is participant of the main zookeeper cluster, will try to connect to it"); + getZooKeeper(); + /// Connected, return true + return true; + } + else + { + Poco::Util::AbstractConfiguration::Keys keys; + getConfigRef().keys("auxiliary_zookeepers", keys); + + /// If our server is part of some auxiliary_zookeeper + for (const auto & aux_zk_name : keys) + { + if (checkZooKeeperConfigIsLocal(getConfigRef(), "auxiliary_zookeepers." + aux_zk_name)) + { + LOG_DEBUG(shared->log, "Our Keeper server is participant of the auxiliary zookeeper cluster ({}), will try to connect to it", aux_zk_name); + getAuxiliaryZooKeeper(aux_zk_name); + /// Connected, return true + return true; + } + } + } + + /// Our server doesn't depend on our Keeper cluster return true; } catch (...) @@ -1860,8 +1908,17 @@ void Context::initializeKeeperDispatcher(bool start_async) const if (config.has("keeper_server")) { bool is_standalone_app = getApplicationType() == ApplicationType::KEEPER; - if (start_async && !is_standalone_app) - LOG_INFO(shared->log, "Connected to ZooKeeper (or Keeper) before internal Keeper start, will wait for Keeper asynchronously"); + if (start_async) + { + assert(!is_standalone_app); + LOG_INFO(shared->log, "Connected to ZooKeeper (or Keeper) before internal Keeper start or we don't depend on our Keeper cluster" + ", will wait for Keeper asynchronously"); + } + else + { + LOG_INFO(shared->log, "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start," + "will wait for Keeper synchronously"); + } shared->keeper_storage_dispatcher = std::make_shared(); shared->keeper_storage_dispatcher->initialize(config, is_standalone_app, start_async); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index dc74c4ac49a..5a28e3fac97 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #if !defined(ARCADIA_BUILD) @@ -664,10 +665,11 @@ public: /// Same as above but return a zookeeper connection from auxiliary_zookeepers configuration entry. std::shared_ptr getAuxiliaryZooKeeper(const String & name) const; - /// Try to connect to ZooKeeper using getZooKeeper. Useful for internal - /// Keeper start (check connection to some other node). Return true if - /// connected successfully (without exception). - bool tryCheckZooKeeperConnection() const; + /// Try to connect to Keeper using get(Auxiliary)ZooKeeper. Useful for + /// internal Keeper start (check connection to some other node). Return true + /// if connected successfully (without exception) or our zookeeper client + /// connection configured for some other cluster without our node. + bool tryCheckClientConnectionToMyKeeperCluster() const; UInt32 getZooKeeperSessionUptime() const; diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index 2c13d3ef22b..eb63d28b3e2 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -49,6 +49,9 @@ def test_start_offline(started_cluster): time.sleep(3) p.map(start, [node2, node3]) + assert node2.contains_in_log("Cannot connect to ZooKeeper (or Keeper) before internal Keeper start") + assert node3.contains_in_log("Cannot connect to ZooKeeper (or Keeper) before internal Keeper start") + node2_zk = get_fake_zk("node2") node2_zk.create("/test_dead", b"data") finally: @@ -68,6 +71,9 @@ def test_start_non_existing(started_cluster): time.sleep(3) p.map(start, [node2, node1]) + assert node1.contains_in_log("Cannot connect to ZooKeeper (or Keeper) before internal Keeper start") + assert node2.contains_in_log("Cannot connect to ZooKeeper (or Keeper) before internal Keeper start") + node2_zk = get_fake_zk("node2") node2_zk.create("/test_non_exising", b"data") finally: From 1131a3ebe8056071b6a3aad19f88e53d90cc5ab9 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 18 Oct 2021 12:15:08 +0300 Subject: [PATCH 335/438] fix test --- .../0_stateless/01604_explain_ast_of_nonselect_query.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference b/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference index 3545278ec1a..3c92ffb0a8c 100644 --- a/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference +++ b/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference @@ -1,6 +1,6 @@ AlterQuery t1 (children 1) ExpressionList (children 1) - AlterCommand 32 (children 1) + AlterCommand 33 (children 1) Function equals (children 1) ExpressionList (children 2) Identifier date From 6e479b301a3052d691e81770fe4a076ca5718479 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Oct 2021 12:54:12 +0300 Subject: [PATCH 336/438] Update memory optimisation for MergingSorted. --- src/Interpreters/InterpreterSelectQuery.cpp | 4 +--- src/Processors/Merges/AggregatingSortedTransform.h | 2 +- src/Processors/Merges/CollapsingSortedTransform.h | 2 +- .../Merges/FinishAggregatingInOrderTransform.h | 2 +- src/Processors/Merges/GraphiteRollupSortedTransform.h | 2 +- src/Processors/Merges/IMergingTransform.cpp | 9 ++++++--- src/Processors/Merges/IMergingTransform.h | 8 ++++---- src/Processors/Merges/MergingSortedTransform.cpp | 3 +-- src/Processors/Merges/MergingSortedTransform.h | 1 - src/Processors/Merges/ReplacingSortedTransform.h | 2 +- src/Processors/Merges/SummingSortedTransform.h | 2 +- src/Processors/Merges/VersionedCollapsingTransform.h | 2 +- src/Processors/QueryPlan/FinishSortingStep.cpp | 8 ++------ src/Processors/QueryPlan/FinishSortingStep.h | 4 +--- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 3 +-- src/Processors/Transforms/ColumnGathererTransform.cpp | 2 +- src/Processors/Transforms/MergeSortingTransform.cpp | 1 - .../tests/gtest_blocks_size_merging_streams.cpp | 4 ++-- src/Storages/MergeTree/MergeTask.cpp | 2 +- 19 files changed, 27 insertions(+), 36 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5dbde2344d3..35c8c32c65b 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2303,14 +2303,12 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input { const Settings & settings = context->getSettingsRef(); - const auto & query = getSelectQuery(); auto finish_sorting_step = std::make_unique( query_plan.getCurrentDataStream(), input_sorting_info->order_key_prefix_descr, output_order_descr, settings.max_block_size, - limit, - query.hasFiltration()); + limit); query_plan.addStep(std::move(finish_sorting_step)); } diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h index e8bf90c2b31..b0cdf4c8a3c 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.h +++ b/src/Processors/Merges/AggregatingSortedTransform.h @@ -16,7 +16,7 @@ public: const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index 87c466f31e8..a37e1c8402f 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -20,7 +20,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/FinishAggregatingInOrderTransform.h b/src/Processors/Merges/FinishAggregatingInOrderTransform.h index 6d5e334311f..58bd399afda 100644 --- a/src/Processors/Merges/FinishAggregatingInOrderTransform.h +++ b/src/Processors/Merges/FinishAggregatingInOrderTransform.h @@ -19,7 +19,7 @@ public: SortDescription description, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, params, diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h index 46272f00eed..e6307c629ea 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -15,7 +15,7 @@ public: SortDescription description_, size_t max_block_size, Graphite::Params params_, time_t time_of_merge_) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index cba78390c97..2e0e21b7fe0 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -15,10 +15,10 @@ IMergingTransformBase::IMergingTransformBase( const Block & input_header, const Block & output_header, bool have_all_inputs_, - bool has_limit_below_one_block_) + size_t limit_hint_) : IProcessor(InputPorts(num_inputs, input_header), {output_header}) , have_all_inputs(have_all_inputs_) - , has_limit_below_one_block(has_limit_below_one_block_) + , limit_hint(limit_hint_) { } @@ -79,7 +79,10 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs() /// setNotNeeded after reading first chunk, because in optimismtic case /// (e.g. with optimized 'ORDER BY primary_key LIMIT n' and small 'n') /// we won't have to read any chunks anymore; - auto chunk = input.pull(has_limit_below_one_block); + auto chunk = input.pull(limit_hint != 0); + if (limit_hint && chunk.getNumRows() < limit_hint) + input.setNeeded(); + if (!chunk.hasRows()) { if (!input.isFinished()) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 8b0a44ae025..155b705450b 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -17,7 +17,7 @@ public: const Block & input_header, const Block & output_header, bool have_all_inputs_, - bool has_limit_below_one_block_); + size_t limit_hint_); OutputPort & getOutputPort() { return outputs.front(); } @@ -67,7 +67,7 @@ private: std::vector input_states; std::atomic have_all_inputs; bool is_initialized = false; - bool has_limit_below_one_block = false; + size_t limit_hint = 0; IProcessor::Status prepareInitializeInputs(); }; @@ -83,9 +83,9 @@ public: const Block & input_header, const Block & output_header, bool have_all_inputs_, - bool has_limit_below_one_block_, + size_t limit_hint_, Args && ... args) - : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, has_limit_below_one_block_) + : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, limit_hint_) , algorithm(std::forward(args) ...) { } diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp index 667972e3cf6..ec864b561e9 100644 --- a/src/Processors/Merges/MergingSortedTransform.cpp +++ b/src/Processors/Merges/MergingSortedTransform.cpp @@ -13,13 +13,12 @@ MergingSortedTransform::MergingSortedTransform( SortDescription description_, size_t max_block_size, UInt64 limit_, - bool has_limit_below_one_block_, WriteBuffer * out_row_sources_buf_, bool quiet_, bool use_average_block_sizes, bool have_all_inputs_) : IMergingTransform( - num_inputs, header, header, have_all_inputs_, has_limit_below_one_block_, + num_inputs, header, header, have_all_inputs_, limit_, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/MergingSortedTransform.h b/src/Processors/Merges/MergingSortedTransform.h index 1fa9b1275bd..93bd36d8aec 100644 --- a/src/Processors/Merges/MergingSortedTransform.h +++ b/src/Processors/Merges/MergingSortedTransform.h @@ -17,7 +17,6 @@ public: SortDescription description, size_t max_block_size, UInt64 limit_ = 0, - bool has_limit_below_one_block_ = false, WriteBuffer * out_row_sources_buf_ = nullptr, bool quiet_ = false, bool use_average_block_sizes = false, diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index e760cdf0d2b..dfb386684fc 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -18,7 +18,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/SummingSortedTransform.h b/src/Processors/Merges/SummingSortedTransform.h index 0287caed5aa..0530ac2e96b 100644 --- a/src/Processors/Merges/SummingSortedTransform.h +++ b/src/Processors/Merges/SummingSortedTransform.h @@ -19,7 +19,7 @@ public: const Names & partition_key_columns, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h index f260e20f1da..5eced1cb58d 100644 --- a/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/src/Processors/Merges/VersionedCollapsingTransform.h @@ -19,7 +19,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/QueryPlan/FinishSortingStep.cpp b/src/Processors/QueryPlan/FinishSortingStep.cpp index 6347b69901c..9002c804e7b 100644 --- a/src/Processors/QueryPlan/FinishSortingStep.cpp +++ b/src/Processors/QueryPlan/FinishSortingStep.cpp @@ -31,14 +31,12 @@ FinishSortingStep::FinishSortingStep( SortDescription prefix_description_, SortDescription result_description_, size_t max_block_size_, - UInt64 limit_, - bool has_filtration_) + UInt64 limit_) : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_)) , prefix_description(std::move(prefix_description_)) , result_description(std::move(result_description_)) , max_block_size(max_block_size_) , limit(limit_) - , has_filtration(has_filtration_) { /// TODO: check input_stream is sorted by prefix_description. output_stream->sort_description = result_description; @@ -60,14 +58,12 @@ void FinishSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const if (pipeline.getNumStreams() > 1) { UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit); - bool has_limit_below_one_block = !has_filtration && limit_for_merging && limit_for_merging < max_block_size; auto transform = std::make_shared( pipeline.getHeader(), pipeline.getNumStreams(), prefix_description, max_block_size, - limit_for_merging, - has_limit_below_one_block); + limit_for_merging); pipeline.addTransform(std::move(transform)); } diff --git a/src/Processors/QueryPlan/FinishSortingStep.h b/src/Processors/QueryPlan/FinishSortingStep.h index ac34aea9df4..fd56c4353e7 100644 --- a/src/Processors/QueryPlan/FinishSortingStep.h +++ b/src/Processors/QueryPlan/FinishSortingStep.h @@ -14,8 +14,7 @@ public: SortDescription prefix_description_, SortDescription result_description_, size_t max_block_size_, - UInt64 limit_, - bool has_filtration_); + UInt64 limit_); String getName() const override { return "FinishSorting"; } @@ -32,7 +31,6 @@ private: SortDescription result_description; size_t max_block_size; UInt64 limit; - bool has_filtration; }; } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index cc400aacf2a..57785a5cc2d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -485,8 +485,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( pipe.getHeader(), pipe.numOutputPorts(), sort_description, - max_block_size, - 0, true); + max_block_size); pipe.addTransform(std::move(transform)); } diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index ddb8a5a0d68..d0cb4975290 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -127,7 +127,7 @@ ColumnGathererTransform::ColumnGathererTransform( ReadBuffer & row_sources_buf_, size_t block_preferred_size_) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, num_inputs, row_sources_buf_, block_preferred_size_) , log(&Poco::Logger::get("ColumnGathererStream")) { diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index e272fd0f183..73817d7de4a 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -197,7 +197,6 @@ void MergeSortingTransform::consume(Chunk chunk) description, max_merged_block_size, limit, - false, nullptr, quiet, use_average_block_sizes, diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp index d39c49bb61c..fb10601216e 100644 --- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp @@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true); + DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true); pipe.addTransform(std::move(transform)); @@ -130,7 +130,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true); + DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true); pipe.addTransform(std::move(transform)); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index ea4821fc33d..5cb819c44a4 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -778,7 +778,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() { case MergeTreeData::MergingParams::Ordinary: merged_transform = std::make_shared( - header, pipes.size(), sort_description, merge_block_size, 0, false, ctx->rows_sources_write_buf.get(), true, ctx->blocks_are_granules_size); + header, pipes.size(), sort_description, merge_block_size, 0, ctx->rows_sources_write_buf.get(), true, ctx->blocks_are_granules_size); break; case MergeTreeData::MergingParams::Collapsing: From a6937a21cf1f8c22fef470f8fd3461527be6f88d Mon Sep 17 00:00:00 2001 From: lhuang0928 Date: Mon, 18 Oct 2021 09:58:53 +0000 Subject: [PATCH 337/438] fix a bug:reference file without a newline at the end --- tests/queries/0_stateless/02098_date32_comparison.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02098_date32_comparison.reference b/tests/queries/0_stateless/02098_date32_comparison.reference index 9e1f26e6d5e..c18b4e9b082 100644 --- a/tests/queries/0_stateless/02098_date32_comparison.reference +++ b/tests/queries/0_stateless/02098_date32_comparison.reference @@ -5,4 +5,4 @@ 1 1 1 -1 \ No newline at end of file +1 From 042e61febf9cf6ac95ed3acb0d830c0a772ad941 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 15 Oct 2021 13:53:25 +0300 Subject: [PATCH 338/438] Allow identifiers staring with numbers in multiple joins --- .../JoinToSubqueryTransformVisitor.cpp | 5 ++-- .../01120_join_constants.reference | 2 ++ .../0_stateless/01120_join_constants.sql | 18 +++++++++++++ ...96_join_unusual_identifier_begin.reference | 2 ++ .../02096_join_unusual_identifier_begin.sql | 27 +++++++++++++++++++ 5 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference create mode 100644 tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 3cd076f91bb..ed20b1b2048 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -18,7 +18,7 @@ #include #include #include - +#include namespace DB { @@ -524,7 +524,8 @@ std::vector normalizeColumnNamesExtractNeeded( size_t count = countTablesWithColumn(tables, short_name); - if (count > 1 || aliases.count(short_name)) + /// isValidIdentifierBegin retuired to be consistent with TableJoin::deduplicateAndQualifyColumnNames + if (count > 1 || aliases.count(short_name) || !isValidIdentifierBegin(short_name.at(0))) { const auto & table = tables[*table_pos]; IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column diff --git a/tests/queries/0_stateless/01120_join_constants.reference b/tests/queries/0_stateless/01120_join_constants.reference index a16427fbdf7..91838e7a2bb 100644 --- a/tests/queries/0_stateless/01120_join_constants.reference +++ b/tests/queries/0_stateless/01120_join_constants.reference @@ -1,2 +1,4 @@ 1 hello 1 world world 1 2 hello 0 world 1 +1 321 1 123 123 1 +2 321 0 0 123 1 diff --git a/tests/queries/0_stateless/01120_join_constants.sql b/tests/queries/0_stateless/01120_join_constants.sql index 443559c3ea1..d6d6a1be43b 100644 --- a/tests/queries/0_stateless/01120_join_constants.sql +++ b/tests/queries/0_stateless/01120_join_constants.sql @@ -15,3 +15,21 @@ LEFT JOIN arrayJoin([1, 3]) AS k, 'world' ) AS t2 ON t1.k = t2.k; + +SELECT + t1.*, + t2.*, + 123, + isConstant('world') +FROM +( + SELECT + arrayJoin([1, 2]) AS k, + 321 +) AS t1 +LEFT JOIN +( + SELECT + arrayJoin([1, 3]) AS k, + 123 +) AS t2 ON t1.k = t2.k; diff --git a/tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference new file mode 100644 index 00000000000..e8cc5e526c0 --- /dev/null +++ b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference @@ -0,0 +1,2 @@ +1 1 1 1 1 1 +1 diff --git a/tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql new file mode 100644 index 00000000000..fc6be2eff7b --- /dev/null +++ b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +CREATE TABLE t1 ( `a1` Int64, `1a1` Int64 ) ENGINE = Memory; +INSERT INTO t1 VALUES (1, 1); + +CREATE TABLE t2 ( `b1` Int64, `1b1` Int64 ) ENGINE = Memory; +INSERT INTO t2 VALUES (1, 1); + +CREATE TABLE t3 ( `c1` Int64, `1c1` Int64 ) ENGINE = Memory; +INSERT INTO t3 VALUES (1, 1); + +SELECT + * +FROM t1 AS t1 +INNER JOIN t2 AS t2 ON t1.a1 = t2.b1 +INNER JOIN t3 AS t3 ON t1.a1 = t3.c1; + +SELECT t2.`1b1` FROM t1 JOIN t2 ON a1 = b1; + +-- Without quialification it doesn't work: +-- SELECT `1b1` FROM t1 JOIN t2 ON a1 = b1; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; From a28b048415bb330d5427a25d982378d425eba57e Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 18 Oct 2021 14:22:27 +0300 Subject: [PATCH 339/438] Fix test --- .../0_stateless/01339_client_unrecognized_option.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01339_client_unrecognized_option.sh b/tests/queries/0_stateless/01339_client_unrecognized_option.sh index f88d890f33c..00c153ec915 100755 --- a/tests/queries/0_stateless/01339_client_unrecognized_option.sh +++ b/tests/queries/0_stateless/01339_client_unrecognized_option.sh @@ -5,14 +5,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT xyzgarbage 2>&1 | grep -q "Code: 552" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT xyzgarbage 2>&1 | grep -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT -xyzgarbage 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT --xyzgarbage 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT --xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "Code: 552" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external -xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external -xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' From 6350957709831641b3b7c43d97a641ca86677d50 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 18 Oct 2021 14:30:02 +0300 Subject: [PATCH 340/438] Fix special build --- src/Formats/CapnProtoUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index 1dc37ff51ec..ecfa5df8351 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -422,7 +422,7 @@ void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Blo getCapnProtoFullTypeName(field.getType())); if (!additional_error_message.empty()) e.addMessage(additional_error_message); - throw e; + throw std::move(e); } } } From 3c8e9634205951e373cb25cfef963e5a41401903 Mon Sep 17 00:00:00 2001 From: michon470 <71978106+michon470@users.noreply.github.com> Date: Mon, 18 Oct 2021 15:40:26 +0300 Subject: [PATCH 341/438] DOCSUP-15198: output_format_csv_null_representation setting translation (#29977) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Перевод без изменений содержания * в ру-версию добавлены ngrambf_v1 и tokenbf_v1 * Update mergetree.md * Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> * Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> * Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> * Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> * Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> * Update docs/ru/engines/table-engines/mergetree-family/mergetree.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> * Update docs/ru/engines/table-engines/mergetree-family/mergetree.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> * Update docs/ru/engines/table-engines/mergetree-family/mergetree.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> * Corrections and translation * in EN ver. lines 349-351 were included into the codeblock -- moved them to the proper place * ... Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../mergetree-family/mergetree.md | 9 ++--- .../mergetree-family/mergetree.md | 17 +++++++-- docs/ru/operations/settings/settings.md | 37 +++++++++++++++++++ 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 4f473279067..aeaf39e28cb 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -320,7 +320,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 - `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` - Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with strings. Can be used for optimization of `equals`, `like` and `in` expressions. + Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with datatypes: [String](../../../sql-reference/data-types/string.md), [FixedString](../../../sql-reference/data-types/fixedstring.md) and [Map](../../../sql-reference/data-types/map.md). Can be used for optimization of `EQUALS`, `LIKE` and `IN` expressions. - `n` — ngram size, - `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here, for example, 256 or 512, because it can be compressed well). @@ -337,7 +337,9 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`, `Map`. - For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function. + For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function. + + The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem). Example of index creation for `Map` data type @@ -346,9 +348,6 @@ INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1 INDEX map_key_index mapValues(map_column) TYPE bloom_filter GRANULARITY 1 ``` - The following functions can use it: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md). - - ``` sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index bef14924d36..07e67ad1b85 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -316,17 +316,26 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 #### Доступные индексы {#available-types-of-indices} -- `minmax` — Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их для пропуска блоков аналогично первичному ключу. +- `minmax` — хранит минимум и максимум выражения (если выражение - [Tuple](../../../sql-reference/data-types/tuple.md), то для каждого элемента `Tuple`), используя их для пропуска блоков аналогично первичному ключу. -- `set(max_rows)` — Хранит уникальные значения выражения на блоке в количестве не более `max_rows` (если `max_rows = 0`, то ограничений нет), используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных. +- `set(max_rows)` — хранит уникальные значения выражения на блоке в количестве не более `max_rows` (если `max_rows = 0`, то ограничений нет), используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных. + +- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` — хранит [фильтр Блума](https://en.wikipedia.org/wiki/Bloom_filter), содержащий все N-граммы блока данных. Работает только с данными форматов [String](../../../sql-reference/data-types/string.md), [FixedString](../../../sql-reference/data-types/fixedstring.md) и [Map](../../../sql-reference/data-types/map.md) с ключами типа `String` или `fixedString`. Может быть использован для оптимизации выражений `EQUALS`, `LIKE` и `IN`. + + - `n` — размер N-граммы, + - `size_of_bloom_filter_in_bytes` — размер в байтах фильтра Блума (можно использовать большие значения, например, 256 или 512, поскольку сжатие компенсирует возможные издержки). + - `number_of_hash_functions` — количество хеш-функций, использующихся в фильтре Блума. + - `random_seed` — состояние генератора случайных чисел для хеш-функций фильтра Блума. + +- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` — то же, что и`ngrambf_v1`, но хранит токены вместо N-грамм. Токены — это последовательности символов, разделенные не буквенно-цифровыми символами. - `bloom_filter([false_positive])` — [фильтр Блума](https://en.wikipedia.org/wiki/Bloom_filter) для указанных стоблцов. Необязательный параметр `false_positive` — это вероятность получения ложноположительного срабатывания. Возможные значения: (0, 1). Значение по умолчанию: 0.025. - Поддержанные типы данных: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`. + Поддерживаемые типы данных: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`. - Фильтром могут пользоваться функции: [equals](../../../engines/table-engines/mergetree-family/mergetree.md), [notEquals](../../../engines/table-engines/mergetree-family/mergetree.md), [in](../../../engines/table-engines/mergetree-family/mergetree.md), [notIn](../../../engines/table-engines/mergetree-family/mergetree.md). + Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem). **Примеры** diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e6e697e5eb6..71b6ac513fc 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2684,6 +2684,43 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x); Значение по умолчанию: `1`. +## output_format_csv_null_representation {#output_format_csv_null_representation} + +Определяет представление `NULL` для формата выходных данных [CSV](../../interfaces/formats.md#csv). Пользователь может установить в качестве значения любую строку, например, `My NULL`. + +Значение по умолчанию: `\N`. + +**Примеры** + +Запрос: + +```sql +SELECT * FROM csv_custom_null FORMAT CSV; +``` + +Результат: + +```text +788 +\N +\N +``` + +Запрос: + +```sql +SET output_format_csv_null_representation = 'My NULL'; +SELECT * FROM csv_custom_null FORMAT CSV; +``` + +Результат: + +```text +788 +My NULL +My NULL +``` + ## output_format_tsv_null_representation {#output_format_tsv_null_representation} Определяет представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Пользователь может установить в качестве значения любую строку. From 5a4a752cfbf3de81f17e407a3dfccaffa6b27e13 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Oct 2021 17:37:46 +0300 Subject: [PATCH 342/438] Unite sorting steps. --- src/Interpreters/InterpreterSelectQuery.cpp | 50 +--- src/Processors/QueryPlan/MergeSortingStep.cpp | 96 ------- src/Processors/QueryPlan/MergeSortingStep.h | 47 ---- .../QueryPlan/MergingSortedStep.cpp | 85 ------ src/Processors/QueryPlan/MergingSortedStep.h | 36 --- .../QueryPlan/PartialSortingStep.cpp | 93 ------- src/Processors/QueryPlan/PartialSortingStep.h | 35 --- src/Processors/QueryPlan/SortingStep.cpp | 241 ++++++++++++++++++ src/Processors/QueryPlan/SortingStep.h | 76 ++++++ 9 files changed, 327 insertions(+), 432 deletions(-) delete mode 100644 src/Processors/QueryPlan/MergeSortingStep.cpp delete mode 100644 src/Processors/QueryPlan/MergeSortingStep.h delete mode 100644 src/Processors/QueryPlan/MergingSortedStep.cpp delete mode 100644 src/Processors/QueryPlan/MergingSortedStep.h delete mode 100644 src/Processors/QueryPlan/PartialSortingStep.cpp delete mode 100644 src/Processors/QueryPlan/PartialSortingStep.h create mode 100644 src/Processors/QueryPlan/SortingStep.cpp create mode 100644 src/Processors/QueryPlan/SortingStep.h diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 35c8c32c65b..c6ef18d7660 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -43,15 +43,12 @@ #include #include #include -#include #include #include #include -#include +#include #include -#include #include -#include #include #include #include @@ -2260,35 +2257,20 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan) // happens in case of `over ()`. if (!w.full_sort_description.empty() && (i == 0 || !sortIsPrefix(w, *windows_sorted[i - 1]))) { - auto partial_sorting = std::make_unique( - query_plan.getCurrentDataStream(), - w.full_sort_description, - 0 /* LIMIT */, - SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode)); - partial_sorting->setStepDescription("Sort each block for window '" + w.window_name + "'"); - query_plan.addStep(std::move(partial_sorting)); - auto merge_sorting_step = std::make_unique( + auto sorting_step = std::make_unique( query_plan.getCurrentDataStream(), w.full_sort_description, settings.max_block_size, 0 /* LIMIT */, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), settings.max_bytes_before_remerge_sort, settings.remerge_sort_lowered_memory_bytes_ratio, settings.max_bytes_before_external_sort, context->getTemporaryVolume(), settings.min_free_disk_space_for_temporary_data); - merge_sorting_step->setStepDescription("Merge sorted blocks for window '" + w.window_name + "'"); - query_plan.addStep(std::move(merge_sorting_step)); - - // First MergeSorted, now MergingSorted. - auto merging_sorted = std::make_unique( - query_plan.getCurrentDataStream(), - w.full_sort_description, - settings.max_block_size, - 0 /* LIMIT */); - merging_sorted->setStepDescription("Merge sorted streams for window '" + w.window_name + "'"); - query_plan.addStep(std::move(merging_sorted)); + sorting_step->setStepDescription("Sorting for window '" + w.window_name + "'"); + query_plan.addStep(std::move(sorting_step)); } auto window_step = std::make_unique(query_plan.getCurrentDataStream(), w, w.window_functions); @@ -2303,7 +2285,7 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input { const Settings & settings = context->getSettingsRef(); - auto finish_sorting_step = std::make_unique( + auto finish_sorting_step = std::make_unique( query_plan.getCurrentDataStream(), input_sorting_info->order_key_prefix_descr, output_order_descr, @@ -2333,32 +2315,20 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo const Settings & settings = context->getSettingsRef(); - auto partial_sorting = std::make_unique( - query_plan.getCurrentDataStream(), - output_order_descr, - limit, - SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode)); - - partial_sorting->setStepDescription("Sort each block for ORDER BY"); - query_plan.addStep(std::move(partial_sorting)); - /// Merge the sorted blocks. - auto merge_sorting_step = std::make_unique( + auto sorting_step = std::make_unique( query_plan.getCurrentDataStream(), output_order_descr, settings.max_block_size, limit, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), settings.max_bytes_before_remerge_sort, settings.remerge_sort_lowered_memory_bytes_ratio, settings.max_bytes_before_external_sort, context->getTemporaryVolume(), settings.min_free_disk_space_for_temporary_data); - merge_sorting_step->setStepDescription("Merge sorted blocks for ORDER BY"); - query_plan.addStep(std::move(merge_sorting_step)); - - /// If there are several streams, we merge them into one - executeMergeSorted(query_plan, output_order_descr, limit, "for ORDER BY"); + sorting_step->setStepDescription("Sorting for ORDER BY"); } @@ -2376,7 +2346,7 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const So const Settings & settings = context->getSettingsRef(); auto merging_sorted - = std::make_unique(query_plan.getCurrentDataStream(), sort_description, settings.max_block_size, limit); + = std::make_unique(query_plan.getCurrentDataStream(), sort_description, settings.max_block_size, limit); merging_sorted->setStepDescription("Merge sorted streams " + description); query_plan.addStep(std::move(merging_sorted)); diff --git a/src/Processors/QueryPlan/MergeSortingStep.cpp b/src/Processors/QueryPlan/MergeSortingStep.cpp deleted file mode 100644 index 534f05a4d6e..00000000000 --- a/src/Processors/QueryPlan/MergeSortingStep.cpp +++ /dev/null @@ -1,96 +0,0 @@ -#include -#include -#include -#include -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits(size_t limit) -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = false, - .preserves_number_of_streams = true, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = limit == 0, - } - }; -} - -MergeSortingStep::MergeSortingStep( - const DataStream & input_stream, - const SortDescription & description_, - size_t max_merged_block_size_, - UInt64 limit_, - size_t max_bytes_before_remerge_, - double remerge_lowered_memory_bytes_ratio_, - size_t max_bytes_before_external_sort_, - VolumePtr tmp_volume_, - size_t min_free_disk_space_) - : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) - , description(description_) - , max_merged_block_size(max_merged_block_size_) - , limit(limit_) - , max_bytes_before_remerge(max_bytes_before_remerge_) - , remerge_lowered_memory_bytes_ratio(remerge_lowered_memory_bytes_ratio_) - , max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_volume(tmp_volume_) - , min_free_disk_space(min_free_disk_space_) -{ - /// TODO: check input_stream is partially sorted by the same description. - output_stream->sort_description = description; - output_stream->sort_mode = input_stream.has_single_port ? DataStream::SortMode::Stream - : DataStream::SortMode::Port; -} - -void MergeSortingStep::updateLimit(size_t limit_) -{ - if (limit_ && (limit == 0 || limit_ < limit)) - { - limit = limit_; - transform_traits.preserves_number_of_rows = false; - } -} - -void MergeSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr - { - if (stream_type == QueryPipelineBuilder::StreamType::Totals) - return nullptr; - - return std::make_shared( - header, description, max_merged_block_size, limit, - max_bytes_before_remerge / pipeline.getNumStreams(), - remerge_lowered_memory_bytes_ratio, - max_bytes_before_external_sort, - tmp_volume, - min_free_disk_space); - }); -} - -void MergeSortingStep::describeActions(FormatSettings & settings) const -{ - String prefix(settings.offset, ' '); - settings.out << prefix << "Sort description: "; - dumpSortDescription(description, input_streams.front().header, settings.out); - settings.out << '\n'; - - if (limit) - settings.out << prefix << "Limit " << limit << '\n'; -} - -void MergeSortingStep::describeActions(JSONBuilder::JSONMap & map) const -{ - map.add("Sort Description", explainSortDescription(description, input_streams.front().header)); - - if (limit) - map.add("Limit", limit); -} - -} diff --git a/src/Processors/QueryPlan/MergeSortingStep.h b/src/Processors/QueryPlan/MergeSortingStep.h deleted file mode 100644 index 947ced829c6..00000000000 --- a/src/Processors/QueryPlan/MergeSortingStep.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace DB -{ - -/// Sorts stream of data. See MergeSortingTransform. -class MergeSortingStep : public ITransformingStep -{ -public: - explicit MergeSortingStep( - const DataStream & input_stream, - const SortDescription & description_, - size_t max_merged_block_size_, - UInt64 limit_, - size_t max_bytes_before_remerge_, - double remerge_lowered_memory_bytes_ratio_, - size_t max_bytes_before_external_sort_, - VolumePtr tmp_volume_, - size_t min_free_disk_space_); - - String getName() const override { return "MergeSorting"; } - - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void describeActions(JSONBuilder::JSONMap & map) const override; - void describeActions(FormatSettings & settings) const override; - - /// Add limit or change it to lower value. - void updateLimit(size_t limit_); - -private: - SortDescription description; - size_t max_merged_block_size; - UInt64 limit; - - size_t max_bytes_before_remerge; - double remerge_lowered_memory_bytes_ratio; - size_t max_bytes_before_external_sort; - VolumePtr tmp_volume; - size_t min_free_disk_space; -}; - -} diff --git a/src/Processors/QueryPlan/MergingSortedStep.cpp b/src/Processors/QueryPlan/MergingSortedStep.cpp deleted file mode 100644 index ed1f24db55b..00000000000 --- a/src/Processors/QueryPlan/MergingSortedStep.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include -#include -#include -#include -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits(size_t limit) -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = true, - .preserves_number_of_streams = false, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = limit == 0, - } - }; -} - -MergingSortedStep::MergingSortedStep( - const DataStream & input_stream, - SortDescription sort_description_, - size_t max_block_size_, - UInt64 limit_) - : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) - , sort_description(std::move(sort_description_)) - , max_block_size(max_block_size_) - , limit(limit_) -{ - /// TODO: check input_stream is partially sorted (each port) by the same description. - output_stream->sort_description = sort_description; - output_stream->sort_mode = DataStream::SortMode::Stream; -} - -void MergingSortedStep::updateLimit(size_t limit_) -{ - if (limit_ && (limit == 0 || limit_ < limit)) - { - limit = limit_; - transform_traits.preserves_number_of_rows = false; - } -} - -void MergingSortedStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - /// If there are several streams, then we merge them into one - if (pipeline.getNumStreams() > 1) - { - - auto transform = std::make_shared( - pipeline.getHeader(), - pipeline.getNumStreams(), - sort_description, - max_block_size, limit); - - pipeline.addTransform(std::move(transform)); - } -} - -void MergingSortedStep::describeActions(FormatSettings & settings) const -{ - String prefix(settings.offset, ' '); - settings.out << prefix << "Sort description: "; - dumpSortDescription(sort_description, input_streams.front().header, settings.out); - settings.out << '\n'; - - if (limit) - settings.out << prefix << "Limit " << limit << '\n'; -} - -void MergingSortedStep::describeActions(JSONBuilder::JSONMap & map) const -{ - map.add("Sort Description", explainSortDescription(sort_description, input_streams.front().header)); - - if (limit) - map.add("Limit", limit); -} - -} diff --git a/src/Processors/QueryPlan/MergingSortedStep.h b/src/Processors/QueryPlan/MergingSortedStep.h deleted file mode 100644 index e886de42ca8..00000000000 --- a/src/Processors/QueryPlan/MergingSortedStep.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace DB -{ - -/// Merge streams of data into single sorted stream. -class MergingSortedStep : public ITransformingStep -{ -public: - explicit MergingSortedStep( - const DataStream & input_stream, - SortDescription sort_description_, - size_t max_block_size_, - UInt64 limit_ = 0); - - String getName() const override { return "MergingSorted"; } - - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void describeActions(JSONBuilder::JSONMap & map) const override; - void describeActions(FormatSettings & settings) const override; - - /// Add limit or change it to lower value. - void updateLimit(size_t limit_); - -private: - SortDescription sort_description; - size_t max_block_size; - UInt64 limit; -}; - -} diff --git a/src/Processors/QueryPlan/PartialSortingStep.cpp b/src/Processors/QueryPlan/PartialSortingStep.cpp deleted file mode 100644 index 420e7b583ca..00000000000 --- a/src/Processors/QueryPlan/PartialSortingStep.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits(size_t limit) -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = false, - .preserves_number_of_streams = true, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = limit == 0, - } - }; -} - -PartialSortingStep::PartialSortingStep( - const DataStream & input_stream, - SortDescription sort_description_, - UInt64 limit_, - SizeLimits size_limits_) - : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) - , sort_description(std::move(sort_description_)) - , limit(limit_) - , size_limits(size_limits_) -{ - output_stream->sort_description = sort_description; - output_stream->sort_mode = DataStream::SortMode::Chunk; -} - -void PartialSortingStep::updateLimit(size_t limit_) -{ - if (limit_ && (limit == 0 || limit_ < limit)) - { - limit = limit_; - transform_traits.preserves_number_of_rows = false; - } -} - -void PartialSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr - { - if (stream_type != QueryPipelineBuilder::StreamType::Main) - return nullptr; - - return std::make_shared(header, sort_description, limit); - }); - - StreamLocalLimits limits; - limits.mode = LimitsMode::LIMITS_CURRENT; //-V1048 - limits.size_limits = size_limits; - - pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr - { - if (stream_type != QueryPipelineBuilder::StreamType::Main) - return nullptr; - - auto transform = std::make_shared(header, limits); - return transform; - }); -} - -void PartialSortingStep::describeActions(FormatSettings & settings) const -{ - String prefix(settings.offset, ' '); - settings.out << prefix << "Sort description: "; - dumpSortDescription(sort_description, input_streams.front().header, settings.out); - settings.out << '\n'; - - if (limit) - settings.out << prefix << "Limit " << limit << '\n'; -} - -void PartialSortingStep::describeActions(JSONBuilder::JSONMap & map) const -{ - map.add("Sort Description", explainSortDescription(sort_description, input_streams.front().header)); - - if (limit) - map.add("Limit", limit); -} - -} diff --git a/src/Processors/QueryPlan/PartialSortingStep.h b/src/Processors/QueryPlan/PartialSortingStep.h deleted file mode 100644 index 9b7b8e8baa5..00000000000 --- a/src/Processors/QueryPlan/PartialSortingStep.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#include -#include -#include - -namespace DB -{ - -/// Sort separate chunks of data. -class PartialSortingStep : public ITransformingStep -{ -public: - explicit PartialSortingStep( - const DataStream & input_stream, - SortDescription sort_description_, - UInt64 limit_, - SizeLimits size_limits_); - - String getName() const override { return "PartialSorting"; } - - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void describeActions(JSONBuilder::JSONMap & map) const override; - void describeActions(FormatSettings & settings) const override; - - /// Add limit or change it to lower value. - void updateLimit(size_t limit_); - -private: - SortDescription sort_description; - UInt64 limit; - SizeLimits size_limits; -}; - -} diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp new file mode 100644 index 00000000000..32b314b1c50 --- /dev/null +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -0,0 +1,241 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +static ITransformingStep::Traits getTraits(size_t limit) +{ + return ITransformingStep::Traits + { + { + .preserves_distinct_columns = true, + .returns_single_stream = true, + .preserves_number_of_streams = false, + .preserves_sorting = false, + }, + { + .preserves_number_of_rows = limit == 0, + } + }; +} + +SortingStep::SortingStep( + const DataStream & input_stream, + const SortDescription & description_, + size_t max_block_size_, + UInt64 limit_, + SizeLimits size_limits_, + size_t max_bytes_before_remerge_, + double remerge_lowered_memory_bytes_ratio_, + size_t max_bytes_before_external_sort_, + VolumePtr tmp_volume_, + size_t min_free_disk_space_) + : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) + , type(Type::Full) + , result_description(description_) + , max_block_size(max_block_size_) + , limit(limit_) + , size_limits(size_limits_) + , max_bytes_before_remerge(max_bytes_before_remerge_) + , remerge_lowered_memory_bytes_ratio(remerge_lowered_memory_bytes_ratio_) + , max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_volume(tmp_volume_) + , min_free_disk_space(min_free_disk_space_) +{ + /// TODO: check input_stream is partially sorted by the same description. + output_stream->sort_description = result_description; + output_stream->sort_mode = DataStream::SortMode::Stream; +} + +SortingStep::SortingStep( + const DataStream & input_stream_, + SortDescription prefix_description_, + SortDescription result_description_, + size_t max_block_size_, + UInt64 limit_) + : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_)) + , type(Type::FinishSorting) + , prefix_description(std::move(prefix_description_)) + , result_description(std::move(result_description_)) + , max_block_size(max_block_size_) + , limit(limit_) +{ + /// TODO: check input_stream is sorted by prefix_description. + output_stream->sort_description = result_description; + output_stream->sort_mode = DataStream::SortMode::Stream; +} + +SortingStep::SortingStep( + const DataStream & input_stream, + SortDescription sort_description_, + size_t max_block_size_, + UInt64 limit_) + : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) + , type(Type::MergingSorted) + , result_description(std::move(sort_description_)) + , max_block_size(max_block_size_) + , limit(limit_) +{ + /// TODO: check input_stream is partially sorted (each port) by the same description. + output_stream->sort_description = result_description; + output_stream->sort_mode = DataStream::SortMode::Stream; +} + +void SortingStep::updateLimit(size_t limit_) +{ + if (limit_ && (limit == 0 || limit_ < limit)) + { + limit = limit_; + transform_traits.preserves_number_of_rows = false; + } +} + +void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + if (type == Type::FinishSorting) + { + bool need_finish_sorting = (prefix_description.size() < result_description.size()); + if (pipeline.getNumStreams() > 1) + { + UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit); + auto transform = std::make_shared( + pipeline.getHeader(), + pipeline.getNumStreams(), + prefix_description, + max_block_size, + limit_for_merging); + + pipeline.addTransform(std::move(transform)); + } + + if (need_finish_sorting) + { + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type != QueryPipelineBuilder::StreamType::Main) + return nullptr; + + return std::make_shared(header, result_description, limit); + }); + + /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform + pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr + { + return std::make_shared( + header, prefix_description, result_description, max_block_size, limit); + }); + } + } + else if (type == Type::Full) + { + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type != QueryPipelineBuilder::StreamType::Main) + return nullptr; + + return std::make_shared(header, result_description, limit); + }); + + StreamLocalLimits limits; + limits.mode = LimitsMode::LIMITS_CURRENT; //-V1048 + limits.size_limits = size_limits; + + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type != QueryPipelineBuilder::StreamType::Main) + return nullptr; + + auto transform = std::make_shared(header, limits); + return transform; + }); + + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type == QueryPipelineBuilder::StreamType::Totals) + return nullptr; + + return std::make_shared( + header, result_description, max_block_size, limit, + max_bytes_before_remerge / pipeline.getNumStreams(), + remerge_lowered_memory_bytes_ratio, + max_bytes_before_external_sort, + tmp_volume, + min_free_disk_space); + }); + + /// If there are several streams, then we merge them into one + if (pipeline.getNumStreams() > 1) + { + + auto transform = std::make_shared( + pipeline.getHeader(), + pipeline.getNumStreams(), + result_description, + max_block_size, limit); + + pipeline.addTransform(std::move(transform)); + } + } + else if (type == Type::MergingSorted) + { /// If there are several streams, then we merge them into one + if (pipeline.getNumStreams() > 1) + { + + auto transform = std::make_shared( + pipeline.getHeader(), + pipeline.getNumStreams(), + result_description, + max_block_size, limit); + + pipeline.addTransform(std::move(transform)); + } + } +} + +void SortingStep::describeActions(FormatSettings & settings) const +{ + String prefix(settings.offset, ' '); + + if (!prefix_description.empty()) + { + settings.out << prefix << "Prefix sort description: "; + dumpSortDescription(prefix_description, input_streams.front().header, settings.out); + settings.out << '\n'; + + settings.out << prefix << "Result sort description: "; + dumpSortDescription(result_description, input_streams.front().header, settings.out); + settings.out << '\n'; + } + else + { + settings.out << prefix << "Sort description: "; + dumpSortDescription(result_description, input_streams.front().header, settings.out); + settings.out << '\n'; + } + + if (limit) + settings.out << prefix << "Limit " << limit << '\n'; +} + +void SortingStep::describeActions(JSONBuilder::JSONMap & map) const +{ + if (!prefix_description.empty()) + { + map.add("Prefix Sort Description", explainSortDescription(prefix_description, input_streams.front().header)); + map.add("Result Sort Description", explainSortDescription(result_description, input_streams.front().header)); + } + else + map.add("Sort Description", explainSortDescription(result_description, input_streams.front().header)); + + if (limit) + map.add("Limit", limit); +} + +} diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h new file mode 100644 index 00000000000..e34ee2842da --- /dev/null +++ b/src/Processors/QueryPlan/SortingStep.h @@ -0,0 +1,76 @@ +#pragma once +#include +#include +#include +#include + +namespace DB +{ + +/// Sort data stream +class SortingStep : public ITransformingStep +{ +public: + /// Full + SortingStep( + const DataStream & input_stream, + const SortDescription & description_, + size_t max_block_size_, + UInt64 limit_, + SizeLimits size_limits_, + size_t max_bytes_before_remerge_, + double remerge_lowered_memory_bytes_ratio_, + size_t max_bytes_before_external_sort_, + VolumePtr tmp_volume_, + size_t min_free_disk_space_); + + /// FinishSorting + SortingStep( + const DataStream & input_stream_, + SortDescription prefix_description_, + SortDescription result_description_, + size_t max_block_size_, + UInt64 limit_); + + /// MergingSorted + SortingStep( + const DataStream & input_stream, + SortDescription sort_description_, + size_t max_block_size_, + UInt64 limit_ = 0); + + String getName() const override { return "Sorting"; } + + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + void describeActions(JSONBuilder::JSONMap & map) const override; + void describeActions(FormatSettings & settings) const override; + + /// Add limit or change it to lower value. + void updateLimit(size_t limit_); + +private: + + enum class Type + { + Full, + FinishSorting, + MergingSorted, + }; + + Type type; + + SortDescription prefix_description; + SortDescription result_description; + size_t max_block_size; + UInt64 limit; + SizeLimits size_limits; + + size_t max_bytes_before_remerge; + double remerge_lowered_memory_bytes_ratio; + size_t max_bytes_before_external_sort; + VolumePtr tmp_volume; + size_t min_free_disk_space; +}; + +} From b7a53df9de5b8ec3b49fa6a5ee897b12c4757835 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 18 Oct 2021 14:53:42 +0000 Subject: [PATCH 343/438] Send columns description in clickhouse-local --- src/Client/LocalConnection.cpp | 37 +++++++++++++++++-- src/Client/LocalConnection.h | 2 + ...khouse_local_columns_description.reference | 1 + ...00_clickhouse_local_columns_description.sh | 7 ++++ 4 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03000_clickhouse_local_columns_description.reference create mode 100755 tests/queries/0_stateless/03000_clickhouse_local_columns_description.sh diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 4455ba3b9ad..20f28bb5337 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -5,7 +5,7 @@ #include #include #include -#include "Core/Protocol.h" +#include namespace DB @@ -105,6 +105,16 @@ void LocalConnection::sendQuery( state->pushing_executor->start(); state->block = state->pushing_executor->getHeader(); } + + const auto & table_id = query_context->getInsertionTable(); + if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields) + { + if (!table_id.empty()) + { + auto storage_ptr = DatabaseCatalog::instance().getTable(table_id, query_context); + state->columns_description = storage_ptr->getInMemoryMetadataPtr()->getColumns(); + } + } } else if (state->io.pipeline.pulling()) { @@ -117,7 +127,9 @@ void LocalConnection::sendQuery( executor.execute(); } - if (state->block) + if (state->columns_description) + next_packet_type = Protocol::Server::TableColumns; + else if (state->block) next_packet_type = Protocol::Server::Data; } catch (const Exception & e) @@ -337,21 +349,41 @@ Packet LocalConnection::receivePacket() packet.block = std::move(state->block.value()); state->block.reset(); } + next_packet_type.reset(); + break; + } + case Protocol::Server::TableColumns: + { + if (state->columns_description) + { + /// Send external table name (empty name is the main table) + /// (see TCPHandler::sendTableColumns) + packet.multistring_message = {"", state->columns_description->toString()}; + } + + if (state->block) + { + next_packet_type = Protocol::Server::Data; + } + break; } case Protocol::Server::Exception: { packet.exception = std::make_unique(*state->exception); + next_packet_type.reset(); break; } case Protocol::Server::Progress: { packet.progress = std::move(state->progress); state->progress.reset(); + next_packet_type.reset(); break; } case Protocol::Server::EndOfStream: { + next_packet_type.reset(); break; } default: @@ -359,7 +391,6 @@ Packet LocalConnection::receivePacket() "Unknown packet {} for {}", toString(packet.type), getDescription()); } - next_packet_type.reset(); return packet; } diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index b596360db64..1cc23defa6e 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -33,6 +34,7 @@ struct LocalQueryState /// Current block to be sent next. std::optional block; + std::optional columns_description; /// Is request cancelled bool is_cancelled = false; diff --git a/tests/queries/0_stateless/03000_clickhouse_local_columns_description.reference b/tests/queries/0_stateless/03000_clickhouse_local_columns_description.reference new file mode 100644 index 00000000000..0e291f3ac0d --- /dev/null +++ b/tests/queries/0_stateless/03000_clickhouse_local_columns_description.reference @@ -0,0 +1 @@ +1 42 diff --git a/tests/queries/0_stateless/03000_clickhouse_local_columns_description.sh b/tests/queries/0_stateless/03000_clickhouse_local_columns_description.sh new file mode 100755 index 00000000000..f88a8de80f5 --- /dev/null +++ b/tests/queries/0_stateless/03000_clickhouse_local_columns_description.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --query "create table t (n int, m int default 42) engine=Memory;insert into t values (1, NULL);select * from t" From 7962eddf60c94da5e4d0b25a2853c516bff809b0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Oct 2021 19:18:09 +0300 Subject: [PATCH 344/438] Review fixes. --- src/Processors/Merges/IMergingTransform.cpp | 2 +- src/Processors/Merges/IMergingTransform.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index 2e0e21b7fe0..52acf36a4d7 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -15,7 +15,7 @@ IMergingTransformBase::IMergingTransformBase( const Block & input_header, const Block & output_header, bool have_all_inputs_, - size_t limit_hint_) + UInt64 limit_hint_) : IProcessor(InputPorts(num_inputs, input_header), {output_header}) , have_all_inputs(have_all_inputs_) , limit_hint(limit_hint_) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 155b705450b..4da49b8155c 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -17,7 +17,7 @@ public: const Block & input_header, const Block & output_header, bool have_all_inputs_, - size_t limit_hint_); + UInt64 limit_hint_); OutputPort & getOutputPort() { return outputs.front(); } @@ -67,7 +67,7 @@ private: std::vector input_states; std::atomic have_all_inputs; bool is_initialized = false; - size_t limit_hint = 0; + UInt64 limit_hint = 0; IProcessor::Status prepareInitializeInputs(); }; @@ -83,7 +83,7 @@ public: const Block & input_header, const Block & output_header, bool have_all_inputs_, - size_t limit_hint_, + UInt64 limit_hint_, Args && ... args) : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, limit_hint_) , algorithm(std::forward(args) ...) From ef3597f0543b8ed1b02285934654a6bcc4daf635 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Oct 2021 19:21:49 +0300 Subject: [PATCH 345/438] Fix build. --- .../Optimizations/filterPushDown.cpp | 10 ++----- .../QueryPlan/Optimizations/limitPushDown.cpp | 28 +++---------------- 2 files changed, 6 insertions(+), 32 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 687946659b6..e81cec723a1 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -7,10 +7,7 @@ #include #include #include -#include -#include -#include -#include +#include #include #include #include @@ -237,10 +234,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes // { // } - if (typeid_cast(child.get()) - || typeid_cast(child.get()) - || typeid_cast(child.get()) - || typeid_cast(child.get())) + if (typeid_cast(child.get())) { Names allowed_inputs = child->getOutputStream().header.getNames(); if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs)) diff --git a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp index 01af6a2bbde..eb65f49103b 100644 --- a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp @@ -2,10 +2,7 @@ #include #include #include -#include -#include -#include -#include +#include #include namespace DB::QueryPlanOptimizations @@ -21,32 +18,15 @@ static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit) QueryPlan::Node * child = nullptr; bool updated = false; - if (auto * merging_sorted = typeid_cast(step.get())) + if (auto * sorting = typeid_cast(step.get())) { /// TODO: remove LimitStep here. - merging_sorted->updateLimit(limit); + sorting->updateLimit(limit); updated = true; child = node->children.front(); } - else if (auto * finish_sorting = typeid_cast(step.get())) - { - /// TODO: remove LimitStep here. - finish_sorting->updateLimit(limit); - updated = true; - } - else if (auto * merge_sorting = typeid_cast(step.get())) - { - merge_sorting->updateLimit(limit); - updated = true; - child = node->children.front(); - } - else if (auto * partial_sorting = typeid_cast(step.get())) - { - partial_sorting->updateLimit(limit); - updated = true; - } - /// We often have chain PartialSorting -> MergeSorting -> MergingSorted + /// In case we have several sorting steps. /// Try update limit for them also if possible. if (child) tryUpdateLimitForSortingSteps(child, limit); From 34d46245c3987a726c37bd3ed13c54f06ae99c83 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 18 Oct 2021 20:53:30 +0300 Subject: [PATCH 346/438] Try fix some tests. --- src/Interpreters/InterpreterSelectQuery.cpp | 1 + .../QueryPlan/FinishSortingStep.cpp | 115 ------------------ src/Processors/QueryPlan/FinishSortingStep.h | 36 ------ 3 files changed, 1 insertion(+), 151 deletions(-) delete mode 100644 src/Processors/QueryPlan/FinishSortingStep.cpp delete mode 100644 src/Processors/QueryPlan/FinishSortingStep.h diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c6ef18d7660..494497fce9a 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2329,6 +2329,7 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo settings.min_free_disk_space_for_temporary_data); sorting_step->setStepDescription("Sorting for ORDER BY"); + query_plan.addStep(std::move(sorting_step)); } diff --git a/src/Processors/QueryPlan/FinishSortingStep.cpp b/src/Processors/QueryPlan/FinishSortingStep.cpp deleted file mode 100644 index 9002c804e7b..00000000000 --- a/src/Processors/QueryPlan/FinishSortingStep.cpp +++ /dev/null @@ -1,115 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits(size_t limit) -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = true, - .preserves_number_of_streams = false, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = limit == 0, - } - }; -} - -FinishSortingStep::FinishSortingStep( - const DataStream & input_stream_, - SortDescription prefix_description_, - SortDescription result_description_, - size_t max_block_size_, - UInt64 limit_) - : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_)) - , prefix_description(std::move(prefix_description_)) - , result_description(std::move(result_description_)) - , max_block_size(max_block_size_) - , limit(limit_) -{ - /// TODO: check input_stream is sorted by prefix_description. - output_stream->sort_description = result_description; - output_stream->sort_mode = DataStream::SortMode::Stream; -} - -void FinishSortingStep::updateLimit(size_t limit_) -{ - if (limit_ && (limit == 0 || limit_ < limit)) - { - limit = limit_; - transform_traits.preserves_number_of_rows = false; - } -} - -void FinishSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - bool need_finish_sorting = (prefix_description.size() < result_description.size()); - if (pipeline.getNumStreams() > 1) - { - UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit); - auto transform = std::make_shared( - pipeline.getHeader(), - pipeline.getNumStreams(), - prefix_description, - max_block_size, - limit_for_merging); - - pipeline.addTransform(std::move(transform)); - } - - if (need_finish_sorting) - { - pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr - { - if (stream_type != QueryPipelineBuilder::StreamType::Main) - return nullptr; - - return std::make_shared(header, result_description, limit); - }); - - /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform - pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr - { - return std::make_shared( - header, prefix_description, result_description, max_block_size, limit); - }); - } -} - -void FinishSortingStep::describeActions(FormatSettings & settings) const -{ - String prefix(settings.offset, ' '); - - settings.out << prefix << "Prefix sort description: "; - dumpSortDescription(prefix_description, input_streams.front().header, settings.out); - settings.out << '\n'; - - settings.out << prefix << "Result sort description: "; - dumpSortDescription(result_description, input_streams.front().header, settings.out); - settings.out << '\n'; - - if (limit) - settings.out << prefix << "Limit " << limit << '\n'; -} - -void FinishSortingStep::describeActions(JSONBuilder::JSONMap & map) const -{ - map.add("Prefix Sort Description", explainSortDescription(prefix_description, input_streams.front().header)); - map.add("Result Sort Description", explainSortDescription(result_description, input_streams.front().header)); - - if (limit) - map.add("Limit", limit); -} - -} diff --git a/src/Processors/QueryPlan/FinishSortingStep.h b/src/Processors/QueryPlan/FinishSortingStep.h deleted file mode 100644 index fd56c4353e7..00000000000 --- a/src/Processors/QueryPlan/FinishSortingStep.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include -#include - -namespace DB -{ - -/// Finish sorting of pre-sorted data. See FinishSortingTransform. -class FinishSortingStep : public ITransformingStep -{ -public: - FinishSortingStep( - const DataStream & input_stream_, - SortDescription prefix_description_, - SortDescription result_description_, - size_t max_block_size_, - UInt64 limit_); - - String getName() const override { return "FinishSorting"; } - - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void describeActions(JSONBuilder::JSONMap & map) const override; - void describeActions(FormatSettings & settings) const override; - - /// Add limit or change it to lower value. - void updateLimit(size_t limit_); - -private: - SortDescription prefix_description; - SortDescription result_description; - size_t max_block_size; - UInt64 limit; -}; - -} From 3ffca6e138c7e23faf8b23942ff8f0a22147e75f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 18 Oct 2021 21:04:40 +0300 Subject: [PATCH 347/438] ExecutablePool dictionary source fix borrow timeout milliseconds --- src/Dictionaries/ExecutablePoolDictionarySource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 8d1122b1194..dce2ce94b93 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -100,7 +100,7 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block) config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, configuration.command_termination_timeout }; auto shell_command = ShellCommand::execute(config); return shell_command; - }, configuration.max_command_execution_time * 1000); + }, configuration.max_command_execution_time * 10000); if (!result) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, From 4e3910e564efcfd308fb3bccb271a7f36aa3386b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:11:54 +0300 Subject: [PATCH 348/438] Add a test for adjusting last granula with max_read_buffer_size=0 --- ...ast_granula_adjust_LOGICAL_ERROR.reference | 8 ++++++++ ...2_last_granula_adjust_LOGICAL_ERROR.sql.j2 | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference create mode 100644 tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference new file mode 100644 index 00000000000..d7d3ee8f362 --- /dev/null +++ b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference @@ -0,0 +1,8 @@ +1 +1 +10 +10 +100 +100 +10000 +10000 diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 new file mode 100644 index 00000000000..465aa22beb3 --- /dev/null +++ b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 @@ -0,0 +1,19 @@ +-- Tags: long + +{% for rows_in_table in [1, 10, 100, 10000] %} +{% for wide in [0, 100000000] %} +drop table if exists data_02052_{{ rows_in_table }}_wide{{ wide }}; +create table data_02052_{{ rows_in_table }}_wide{{ wide }} (key Int, value String) +engine=MergeTree() +order by key +settings + min_bytes_for_wide_part={{ wide }} +as select number, repeat(toString(number), 5) from numbers({{ rows_in_table }}); + +-- avoid any optimizations with ignore(*) +select count(ignore(*)) from data_02052_{{ rows_in_table }}_wide{{ wide }} settings max_read_buffer_size=1, max_threads=1; +select count(ignore(*)) from data_02052_{{ rows_in_table }}_wide{{ wide }} settings max_read_buffer_size=0, max_threads=1; -- { serverError CANNOT_READ_ALL_DATA } + +drop table data_02052_{{ rows_in_table }}_wide{{ wide }}; +{% endfor %} +{% endfor %} From 41e6df0f4b410a83b386d50dd11c4339334bddbf Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 17 Oct 2021 00:27:28 +0300 Subject: [PATCH 349/438] Remove unused offset_columns from MergeTreeReaderWide::readRows() --- src/Storages/MergeTree/MergeTreeReaderWide.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 206469da7be..29cc45a5c60 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -69,10 +69,6 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si size_t num_columns = columns.size(); checkNumberOfColumns(num_columns); - /// Pointers to offset columns that are common to the nested data structure columns. - /// If append is true, then the value will be equal to nullptr and will be used only to - /// check that the offsets column has been already read. - OffsetColumns offset_columns; std::unordered_map caches; std::unordered_set prefetched_streams; From cd4b33c8c9066f58594e21a605ce27d093026127 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:11:54 +0300 Subject: [PATCH 350/438] Verify that all rows was read in MergeTreeReaderCompact v0: Use fmt-like style exception in MergeTreeReaderCompact v2: Update the check --- src/Storages/MergeTree/MergeTreeReaderCompact.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 9b879283c10..15c5795ee7b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -160,9 +160,10 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, readData(column_from_part, column, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]); size_t read_rows_in_column = column->size() - column_size_before_reading; - if (read_rows_in_column < rows_to_read) - throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) + - ". Rows expected: " + toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA); + if (read_rows_in_column != rows_to_read) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read all data in MergeTreeReaderCompact. Rows read: {}. Rows expected: {}.", + read_rows_in_column, rows_to_read); } catch (Exception & e) { From e576fd17bd806666886cd8da7a9466d4268b88e7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 14 Oct 2021 21:11:54 +0300 Subject: [PATCH 351/438] Do not allow reading to empty buffer in MergeTreeReaderStream Changelog: - Verify that all rows was read in MergeTreeReaderWide - Ignore some exceptions for Wide parts - Take max_rows_to_read into account - Verify that requested rows is not zero (otherwise it is too tricky) - Simply verify that buffer is not empty --- src/Storages/MergeTree/MergeTreeReaderStream.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index fc57b48e86d..f225ecae8fa 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -10,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; + extern const int CANNOT_READ_ALL_DATA; } @@ -76,6 +77,10 @@ MergeTreeReaderStream::MergeTreeReaderStream( if (max_mark_range_bytes != 0) read_settings = read_settings.adjustBufferSize(max_mark_range_bytes); + //// Empty buffer does not makes progress. + if (!read_settings.local_fs_buffer_size || !read_settings.remote_fs_buffer_size) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read to empty buffer."); + /// Initialize the objects that shall be used to perform read operations. if (uncompressed_cache) { From 0d2ce2e711fd0a5b4aa7a1061b848f12f22ccaa6 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 18 Oct 2021 21:56:04 +0300 Subject: [PATCH 352/438] Update adopters.md --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 700c804ea66..498de206bb9 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -166,5 +166,6 @@ toc_title: Adopters | Beeline | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) | | Ecommpay | Payment Processing | Logs | — | — | [Video, Nov 2019](https://www.youtube.com/watch?v=d3GdZTOWGLk) | | Omnicomm | Transportation Monitoring | — | — | — | [Facebook post, Oct 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) | +| Ok.ru | Social Network | — | — | 72 servers, 810 TB, 50bn rows/day, 1.5 TB/day | [SmartData conference, Oct 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) From 1c64a656f48d3632aa96592b15022264f8f9e8f8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 18 Oct 2021 21:56:43 +0300 Subject: [PATCH 353/438] Update adopters.md --- docs/en/introduction/adopters.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 498de206bb9..73db0b3c952 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -166,6 +166,6 @@ toc_title: Adopters | Beeline | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) | | Ecommpay | Payment Processing | Logs | — | — | [Video, Nov 2019](https://www.youtube.com/watch?v=d3GdZTOWGLk) | | Omnicomm | Transportation Monitoring | — | — | — | [Facebook post, Oct 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) | -| Ok.ru | Social Network | — | — | 72 servers, 810 TB, 50bn rows/day, 1.5 TB/day | [SmartData conference, Oct 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) | +| Ok.ru | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, Oct 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) From f7b76373ceecc024eb93f3cbf5198b0022acdcc9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 13 Oct 2021 23:52:39 +0300 Subject: [PATCH 354/438] Move ProfileEvents packet type from TCPHandler into ProfileEventsExt --- src/Interpreters/ProfileEventsExt.cpp | 5 +++++ src/Interpreters/ProfileEventsExt.h | 10 ++++++++++ src/Server/TCPHandler.cpp | 20 ++++---------------- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 4386c294316..472efc109fb 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -11,6 +11,11 @@ namespace ProfileEvents { +std::shared_ptr TypeEnum = std::make_shared(DB::DataTypeEnum8::Values{ + { "increment", static_cast(INCREMENT)}, + { "gauge", static_cast(GAUGE)}, +}); + /// Put implementation here to avoid extra linking dependencies for clickhouse_common_io void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only) { diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 699c997d904..8a92eadec79 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include @@ -9,4 +10,13 @@ namespace ProfileEvents /// Dumps profile events to columns Map(String, UInt64) void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true); +/// This is for ProfileEvents packets. +enum Type : int8_t +{ + INCREMENT = 1, + GAUGE = 2, +}; + +extern std::shared_ptr TypeEnum; + } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 87cc76b1812..729cb33371a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -831,12 +832,6 @@ namespace { using namespace ProfileEvents; - enum ProfileEventTypes : int8_t - { - INCREMENT = 1, - GAUGE = 2, - }; - constexpr size_t NAME_COLUMN_INDEX = 4; constexpr size_t VALUE_COLUMN_INDEX = 5; @@ -879,7 +874,7 @@ namespace columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEventTypes::INCREMENT); + columns[i++]->insert(ProfileEvents::Type::INCREMENT); } } @@ -893,7 +888,7 @@ namespace columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEventTypes::GAUGE); + columns[i++]->insert(ProfileEvents::Type::GAUGE); columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); columns[i++]->insert(snapshot.memory_usage); @@ -907,18 +902,11 @@ void TCPHandler::sendProfileEvents() if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS) return; - auto profile_event_type = std::make_shared( - DataTypeEnum8::Values - { - { "increment", static_cast(INCREMENT)}, - { "gauge", static_cast(GAUGE)}, - }); - NamesAndTypesList column_names_and_types = { { "host_name", std::make_shared() }, { "current_time", std::make_shared() }, { "thread_id", std::make_shared() }, - { "type", profile_event_type }, + { "type", ProfileEvents::TypeEnum }, { "name", std::make_shared() }, { "value", std::make_shared() }, }; From 424bf6fcf49ce377a26270f9872e0939912dac79 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 12 Oct 2021 21:03:54 +0300 Subject: [PATCH 355/438] client: add ability to print raw profile events This can be useful for debugging and for testing (since you will not need to obtain query_id and look at query_log). v2: - mark test as long - add option to docs - add type of profile event into logs too v3: - resolve conflicts - and fix onProfileEvents callback v4: - add --print-profile-events separate switch --- docs/en/interfaces/cli.md | 2 + src/Client/ClientBase.cpp | 88 ++++++++++++++----- src/Client/ClientBase.h | 11 +++ src/Client/InternalTextLogs.cpp | 68 +++++++++++++- src/Client/InternalTextLogs.h | 29 +++++- .../02050_client_profile_events.reference | 4 + .../02050_client_profile_events.sh | 15 ++++ 7 files changed, 188 insertions(+), 29 deletions(-) create mode 100644 tests/queries/0_stateless/02050_client_profile_events.reference create mode 100755 tests/queries/0_stateless/02050_client_profile_events.sh diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index c4305d229cf..eaf7a96ce42 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -128,6 +128,8 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--history_file` — Path to a file containing command history. - `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). - `--hardware-utilization` — Print hardware utilization information in progress bar. +- `--print-profile-events` – Print `ProfileEvents` packets. +- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled). diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7bcff4f5ef7..54e679e4c0f 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -266,7 +266,7 @@ void ClientBase::onLogData(Block & block) { initLogsOutputStream(); progress_indication.clearProgressOutput(); - logs_out_stream->write(block); + logs_out_stream->writeLogs(block); logs_out_stream->flush(); } @@ -668,39 +668,61 @@ void ClientBase::onEndOfStream() void ClientBase::onProfileEvents(Block & block) { const auto rows = block.rows(); - if (rows == 0 || !progress_indication.print_hardware_utilization) + if (rows == 0) return; - const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); - const auto & names = typeid_cast(*block.getByName("name").column); - const auto & host_names = typeid_cast(*block.getByName("host_name").column); - const auto & array_values = typeid_cast(*block.getByName("value").column).getData(); - const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds); - const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds); - - HostToThreadTimesMap thread_times; - for (size_t i = 0; i < rows; ++i) + if (progress_indication.print_hardware_utilization) { - auto thread_id = array_thread_id[i]; - auto host_name = host_names.getDataAt(i).toString(); - if (thread_id != 0) - progress_indication.addThreadIdToList(host_name, thread_id); - auto event_name = names.getDataAt(i); - auto value = array_values[i]; - if (event_name == user_time_name) + const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); + const auto & names = typeid_cast(*block.getByName("name").column); + const auto & host_names = typeid_cast(*block.getByName("host_name").column); + const auto & array_values = typeid_cast(*block.getByName("value").column).getData(); + + const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds); + const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds); + + HostToThreadTimesMap thread_times; + for (size_t i = 0; i < rows; ++i) { - thread_times[host_name][thread_id].user_ms = value; + auto thread_id = array_thread_id[i]; + auto host_name = host_names.getDataAt(i).toString(); + if (thread_id != 0) + progress_indication.addThreadIdToList(host_name, thread_id); + auto event_name = names.getDataAt(i); + auto value = array_values[i]; + if (event_name == user_time_name) + { + thread_times[host_name][thread_id].user_ms = value; + } + else if (event_name == system_time_name) + { + thread_times[host_name][thread_id].system_ms = value; + } + else if (event_name == MemoryTracker::USAGE_EVENT_NAME) + { + thread_times[host_name][thread_id].memory_usage = value; + } } - else if (event_name == system_time_name) + progress_indication.updateThreadEventData(thread_times); + } + + if (profile_events.print) + { + if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms) { - thread_times[host_name][thread_id].system_ms = value; + initLogsOutputStream(); + progress_indication.clearProgressOutput(); + logs_out_stream->writeProfileEvents(block); + logs_out_stream->flush(); + + profile_events.watch.restart(); + profile_events.last_block = {}; } - else if (event_name == MemoryTracker::USAGE_EVENT_NAME) + else { - thread_times[host_name][thread_id].memory_usage = value; + profile_events.last_block = block; } } - progress_indication.updateThreadEventData(thread_times); } @@ -1023,6 +1045,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin processed_rows = 0; written_first_block = false; progress_indication.resetProgress(); + profile_events.watch.restart(); { /// Temporarily apply query settings to context. @@ -1091,6 +1114,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } } + /// Always print last block (if it was not printed already) + if (profile_events.last_block) + { + initLogsOutputStream(); + progress_indication.clearProgressOutput(); + logs_out_stream->writeProfileEvents(profile_events.last_block); + logs_out_stream->flush(); + } + if (is_interactive) { std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; @@ -1561,6 +1593,8 @@ void ClientBase::init(int argc, char ** argv) ("ignore-error", "do not stop processing in multiquery mode") ("stacktrace", "print stack traces of exceptions") ("hardware-utilization", "print hardware utilization information in progress bar") + ("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets") + ("profile-events-delay-ms", po::value()->default_value(profile_events.delay_ms), "Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet)") ; addAndCheckOptions(options_description, options, common_arguments); @@ -1611,6 +1645,10 @@ void ClientBase::init(int argc, char ** argv) config().setBool("vertical", true); if (options.count("stacktrace")) config().setBool("stacktrace", true); + if (options.count("print-profile-events")) + config().setBool("print-profile-events", true); + if (options.count("profile-events-delay-ms")) + config().setInt("profile-events-delay-ms", options["profile-events-delay-ms"].as()); if (options.count("progress")) config().setBool("progress", true); if (options.count("echo")) @@ -1631,6 +1669,8 @@ void ClientBase::init(int argc, char ** argv) progress_indication.print_hardware_utilization = true; query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); + profile_events.print = options.count("print-profile-events"); + profile_events.delay_ms = options["profile-events-delay-ms"].as(); processOptions(options_description, options, external_tables_arguments); argsToConfig(common_arguments, config(), 100); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index fce706d7cf8..b31eff82b30 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -217,6 +218,16 @@ protected: QueryFuzzer fuzzer; int query_fuzzer_runs = 0; + struct + { + bool print = false; + /// UINT64_MAX -- print only last + UInt64 delay_ms = 0; + Stopwatch watch; + /// For printing only last (delay_ms == 0). + Block last_block; + } profile_events; + QueryProcessingStage::Enum query_processing_stage; }; diff --git a/src/Client/InternalTextLogs.cpp b/src/Client/InternalTextLogs.cpp index 65592fee670..430ba6daf0a 100644 --- a/src/Client/InternalTextLogs.cpp +++ b/src/Client/InternalTextLogs.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -13,7 +14,7 @@ namespace DB { -void InternalTextLogs::write(const Block & block) +void InternalTextLogs::writeLogs(const Block & block) { const auto & array_event_time = typeid_cast(*block.getByName("event_time").column).getData(); const auto & array_microseconds = typeid_cast(*block.getByName("event_time_microseconds").column).getData(); @@ -97,4 +98,69 @@ void InternalTextLogs::write(const Block & block) } } +void InternalTextLogs::writeProfileEvents(const Block & block) +{ + const auto & column_host_name = typeid_cast(*block.getByName("host_name").column); + const auto & array_current_time = typeid_cast(*block.getByName("current_time").column).getData(); + const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); + const auto & array_type = typeid_cast(*block.getByName("type").column).getData(); + const auto & column_name = typeid_cast(*block.getByName("name").column); + const auto & array_value = typeid_cast(*block.getByName("value").column).getData(); + + for (size_t row_num = 0; row_num < block.rows(); ++row_num) + { + /// host_name + auto host_name = column_host_name.getDataAt(row_num); + if (host_name.size) + { + writeCString("[", wb); + if (color) + writeString(setColor(StringRefHash()(host_name)), wb); + writeString(host_name, wb); + if (color) + writeCString(resetColor(), wb); + writeCString("] ", wb); + } + + /// current_time + auto current_time = array_current_time[row_num]; + writeDateTimeText<'.', ':'>(current_time, wb); + + /// thread_id + UInt64 thread_id = array_thread_id[row_num]; + writeCString(" [ ", wb); + if (color) + writeString(setColor(intHash64(thread_id)), wb); + writeIntText(thread_id, wb); + if (color) + writeCString(resetColor(), wb); + writeCString(" ] ", wb); + + /// name + auto name = column_name.getDataAt(row_num); + if (color) + writeString(setColor(StringRefHash()(name)), wb); + DB::writeString(name, wb); + if (color) + writeCString(resetColor(), wb); + writeCString(": ", wb); + + /// value + UInt64 value = array_value[row_num]; + writeIntText(value, wb); + + //// type + Int8 type = array_type[row_num]; + writeCString(" (", wb); + if (color) + writeString(setColor(intHash64(type)), wb); + writeString(toString(ProfileEvents::TypeEnum->castToName(type)), wb); + if (color) + writeCString(resetColor(), wb); + writeCString(")", wb); + + writeChar('\n', wb); + } +} + } diff --git a/src/Client/InternalTextLogs.h b/src/Client/InternalTextLogs.h index a8b119b0f69..0690211fd24 100644 --- a/src/Client/InternalTextLogs.h +++ b/src/Client/InternalTextLogs.h @@ -6,16 +6,37 @@ namespace DB { -/// Prints internal server logs -/// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock() +/// Prints internal server logs or profile events with colored output (if requested). /// NOTE: IRowOutputFormat does not suite well for this case class InternalTextLogs { public: InternalTextLogs(WriteBuffer & buf_out, bool color_) : wb(buf_out), color(color_) {} - - void write(const Block & block); + /// Print internal server logs + /// + /// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock(): + /// - event_time + /// - event_time_microseconds + /// - host_name + /// - query_id + /// - thread_id + /// - priority + /// - source + /// - text + void writeLogs(const Block & block); + /// Print profile events. + /// + /// Block: + /// - host_name + /// - current_time + /// - thread_id + /// - type + /// - name + /// - value + /// + /// See also TCPHandler::sendProfileEvents() for block columns. + void writeProfileEvents(const Block & block); void flush() { diff --git a/tests/queries/0_stateless/02050_client_profile_events.reference b/tests/queries/0_stateless/02050_client_profile_events.reference new file mode 100644 index 00000000000..00fc3b5d06a --- /dev/null +++ b/tests/queries/0_stateless/02050_client_profile_events.reference @@ -0,0 +1,4 @@ +0 +SelectedRows: 131010 (increment) +OK +OK diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh new file mode 100755 index 00000000000..5c3887cf5fb --- /dev/null +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# do not print any ProfileEvents packets +$CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows' +# print only last +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5) format Null' |& grep -o 'SelectedRows: .*$' +# print everything +test "$($CLICKHOUSE_CLIENT --print-profile-events -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL +# print each 100 ms +test "$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=100 -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL From fb91b1d92b8309e8a925cce25f4e1adaf967a306 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 19 Oct 2021 01:04:07 +0300 Subject: [PATCH 356/438] StorageExecutable fix small issues --- src/Storages/StorageExecutable.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 76be3353808..6a82fc88977 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include @@ -111,9 +113,16 @@ Pipe StorageExecutable::read( { auto user_scripts_path = context->getUserScriptsPath(); auto script_path = user_scripts_path + '/' + script_name; - if (!std::filesystem::exists(std::filesystem::path(script_path))) + + if (!pathStartsWith(script_path, user_scripts_path)) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Executable file {} does not exists inside {}", + "Executable file {} must be inside user scripts folder {}", + script_name, + user_scripts_path); + + if (!std::filesystem::exists(std::filesystem::path(script_path))) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} does not exists inside user scripts folder {}", script_name, user_scripts_path); @@ -139,9 +148,9 @@ Pipe StorageExecutable::read( bool result = process_pool->tryBorrowObject(process, [&config, this]() { config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, settings.command_termination_timeout }; - auto shell_command = ShellCommand::execute(config); + auto shell_command = ShellCommand::executeDirect(config); return shell_command; - }, settings.max_command_execution_time * 1000); + }, settings.max_command_execution_time * 10000); if (!result) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, From 254eecf2270548206b60d9fbeab8b7795e87b3a8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Oct 2021 11:11:54 +0300 Subject: [PATCH 357/438] Update some tests. --- ...monotonous_functions_in_order_by.reference | 14 ++-- .../01576_alias_column_rewrite.reference | 18 +++-- .../01591_window_functions.reference | 26 +++---- .../01655_plan_optimizations.reference | 12 ++-- .../0_stateless/01655_plan_optimizations.sh | 4 +- ...1951_distributed_push_down_limit.reference | 28 ++++---- ...istributed_group_by_sharding_key.reference | 68 ++++++++----------- 7 files changed, 72 insertions(+), 98 deletions(-) diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference index bf9bff06959..d8c9b88d8e8 100644 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference @@ -6,12 +6,10 @@ ORDER BY timestamp ASC LIMIT 10 Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromMergeTree SELECT timestamp, key @@ -20,7 +18,7 @@ ORDER BY toDate(timestamp) ASC LIMIT 10 Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - FinishSorting + Sorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromMergeTree @@ -34,7 +32,7 @@ ORDER BY LIMIT 10 Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - FinishSorting + Sorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromMergeTree diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 2a824e62158..07d361cfa46 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -23,21 +23,19 @@ lambda optimize_read_in_order Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - FinishSorting + Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromMergeTree Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - FinishSorting + Sorting + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromMergeTree +Expression (Projection) + Limit (preliminary LIMIT (without OFFSET)) + Sorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromMergeTree diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index f54c10ee8b9..07562557369 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -908,12 +908,10 @@ Expression ((Projection + Before ORDER BY)) Window (Window step for window \'\') Window (Window step for window \'PARTITION BY p\') Window (Window step for window \'PARTITION BY p ORDER BY o ASC\') - MergingSorted (Merge sorted streams for window \'PARTITION BY p ORDER BY o ASC\') - MergeSorting (Merge sorted blocks for window \'PARTITION BY p ORDER BY o ASC\') - PartialSorting (Sort each block for window \'PARTITION BY p ORDER BY o ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + Sorting (Sorting for window \'PARTITION BY p ORDER BY o ASC\') + Expression ((Before window functions + (Projection + Before ORDER BY))) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) explain select count(*) over (order by o, number), count(*) over (order by number) @@ -923,16 +921,12 @@ from ; Expression ((Projection + Before ORDER BY)) Window (Window step for window \'ORDER BY o ASC, number ASC\') - MergingSorted (Merge sorted streams for window \'ORDER BY o ASC, number ASC\') - MergeSorting (Merge sorted blocks for window \'ORDER BY o ASC, number ASC\') - PartialSorting (Sort each block for window \'ORDER BY o ASC, number ASC\') - Window (Window step for window \'ORDER BY number ASC\') - MergingSorted (Merge sorted streams for window \'ORDER BY number ASC\') - MergeSorting (Merge sorted blocks for window \'ORDER BY number ASC\') - PartialSorting (Sort each block for window \'ORDER BY number ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + Sorting (Sorting for window \'ORDER BY o ASC, number ASC\') + Window (Window step for window \'ORDER BY number ASC\') + Sorting (Sorting for window \'ORDER BY number ASC\') + Expression ((Before window functions + (Projection + Before ORDER BY))) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) -- A test case for the sort comparator found by fuzzer. SELECT max(number) OVER (ORDER BY number DESC NULLS FIRST), diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 22876207862..33a7ff44b74 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -5,11 +5,8 @@ FUNCTION sipHash64 Filter column: equals > sorting steps should know about limit Limit 10 -MergingSorted -Limit 10 -MergeSorting -Limit 10 -PartialSorting +Sorting +Sorting Limit 10 -- filter push down -- > filter should be pushed down after aggregating @@ -108,9 +105,8 @@ Filter column: notEquals(y, 2) 1 0 1 1 > filter is pushed down before sorting steps -MergingSorted -MergeSorting -PartialSorting +Sorting +Sorting Filter column: and(notEquals(x, 0), notEquals(y, 0)) 1 2 1 1 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index b835bae0e27..de3d3ac3eb6 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -10,7 +10,7 @@ $CLICKHOUSE_CLIENT -q "select x + 1 from (select y + 2 as x from (select dummy + echo "> sipHash should be calculated after filtration" $CLICKHOUSE_CLIENT -q "explain actions = 1 select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000) limit 1000000000) where y = 0" | grep -o "FUNCTION sipHash64\|Filter column: equals" echo "> sorting steps should know about limit" -$CLICKHOUSE_CLIENT -q "explain actions = 1 select number from (select number from numbers(500000000) order by -number) limit 10" | grep -o "MergingSorted\|MergeSorting\|PartialSorting\|Limit 10" +$CLICKHOUSE_CLIENT -q "explain actions = 1 select number from (select number from numbers(500000000) order by -number) limit 10" | grep -o "Sorting\|Limit 10" echo "-- filter push down --" echo "> filter should be pushed down after aggregating" @@ -132,7 +132,7 @@ $CLICKHOUSE_CLIENT -q " select number % 2 as x, number % 3 as y from numbers(6) order by y desc ) where x != 0 and y != 0 settings enable_optimize_predicate_expression = 0" | - grep -o "MergingSorted\|MergeSorting\|PartialSorting\|Filter column: and(notEquals(x, 0), notEquals(y, 0))" + grep -o "Sorting\|Filter column: and(notEquals(x, 0), notEquals(y, 0))" $CLICKHOUSE_CLIENT -q " select x, y from ( select number % 2 as x, number % 3 as y from numbers(6) order by y desc diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference index ca3bbc4cb33..d0e7a9ef15b 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference @@ -2,31 +2,27 @@ explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - MergingSorted (Merge sorted streams after aggregation stage for ORDER BY) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - MergingSorted (Merge sorted streams after aggregation stage for ORDER BY) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union Limit (preliminary LIMIT (with OFFSET)) - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference index 10787068f43..b23631395ff 100644 --- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference +++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference @@ -50,66 +50,58 @@ SettingQuotaAndLimits (Set limits and quota after reading from storage) explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized Expression (Projection) Distinct - MergingSorted (Merge sorted streams for ORDER BY, without aggregation) + Sorting (Merge sorted streams for ORDER BY, without aggregation) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) + Sorting (Sorting for ORDER BY) + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized Expression (Projection) - MergingSorted (Merge sorted streams after aggregation stage for ORDER BY) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union Distinct - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) + Sorting (Sorting for ORDER BY) + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized Expression (Projection) LimitBy Expression (Before LIMIT BY) - MergingSorted (Merge sorted streams for ORDER BY, without aggregation) + Sorting (Merge sorted streams for ORDER BY, without aggregation) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union LimitBy Expression (Before LIMIT BY) - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) - ReadFromRemote (Read from remote replica) -explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized -Expression (Projection) - MergingSorted (Merge sorted streams after aggregation stage for ORDER BY) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - Union - LimitBy - Expression (Before LIMIT BY) - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) + Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemNumbers) + ReadFromRemote (Read from remote replica) +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized +Expression (Projection) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Union + LimitBy + Expression (Before LIMIT BY) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) From e53335bc6fc061ce47a40b94d3b5a91ac042717f Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 19 Oct 2021 08:19:43 +0000 Subject: [PATCH 358/438] Better way --- programs/server/Server.cpp | 2 +- src/Databases/DatabaseOnDisk.cpp | 9 --------- src/Interpreters/Context.cpp | 11 ++++------- src/Interpreters/Context.h | 3 +-- src/Interpreters/InterpreterCreateQuery.cpp | 9 --------- src/Storages/MergeTree/MergeTreeData.cpp | 2 ++ 6 files changed, 8 insertions(+), 28 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 2b526608715..bbd9af1e97e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -919,7 +919,7 @@ if (ThreadFuzzer::instance().isEffective()) /// Initialize background executors after we load default_profile config. /// This is needed to load proper values of background_pool_size etc. - global_context->initializeBackgroundExecutors(); + global_context->initializeBackgroundExecutorsIfNeeded(); if (settings.async_insert_threads) global_context->setAsynchronousInsertQueue(std::make_shared( diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index a71d539e3c5..97e59f53f64 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -53,15 +53,6 @@ std::pair createTableFromAST( ast_create_query.attach = true; ast_create_query.database = database_name; - auto global_context = context->getGlobalContext(); - if (global_context - && global_context->getApplicationType() == Context::ApplicationType::LOCAL - && !global_context->isBackgroundExecutorsInitialized() - && ast_create_query.storage && endsWith(ast_create_query.storage->engine->name, "MergeTree")) - { - global_context->initializeBackgroundExecutors(); - } - if (ast_create_query.as_table_function) { const auto & factory = TableFunctionFactory::instance(); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 1602e6a6a31..bbad7e782ed 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2895,14 +2895,11 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrasync_insert_queue = ptr; } -bool Context::isBackgroundExecutorsInitialized() const +void Context::initializeBackgroundExecutorsIfNeeded() { - return is_background_executors_initialized; -} - -void Context::initializeBackgroundExecutors() -{ - assert(!is_background_executors_initialized); + auto lock = getLock(); + if (is_background_executors_initialized) + return; const size_t max_merges_and_mutations = getSettingsRef().background_pool_size * getSettingsRef().background_merges_mutations_concurrency_ratio; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 93be367e46d..b20274c2cb8 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -862,8 +862,7 @@ public: void setReadTaskCallback(ReadTaskCallback && callback); /// Background executors related methods - void initializeBackgroundExecutors(); - bool isBackgroundExecutorsInitialized() const; + void initializeBackgroundExecutorsIfNeeded(); MergeMutateBackgroundExecutorPtr getMergeMutateExecutor() const; OrdinaryBackgroundExecutorPtr getMovesExecutor() const; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 5b993bce724..6d38c55bd62 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -833,15 +833,6 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database.empty() ? current_database : create.database; - auto global_context = getContext()->getGlobalContext(); - if (global_context - && global_context->getApplicationType() == Context::ApplicationType::LOCAL - && !global_context->isBackgroundExecutorsInitialized() - && create.storage && endsWith(create.storage->engine->name, "MergeTree")) - { - global_context->initializeBackgroundExecutors(); - } - // If this is a stub ATTACH query, read the query definition from the database if (create.attach && !create.storage && !create.columns_list) { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 10fa18186ee..8b03c1e614d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -205,6 +205,8 @@ MergeTreeData::MergeTreeData( , background_operations_assignee(*this, BackgroundJobsAssignee::Type::DataProcessing, getContext()) , background_moves_assignee(*this, BackgroundJobsAssignee::Type::Moving, getContext()) { + context_->getGlobalContext()->initializeBackgroundExecutorsIfNeeded(); + const auto settings = getSettings(); allow_nullable_key = attach || settings->allow_nullable_key; From eb0ce68f10e860bfc864bcca7d6bcdeca2072ab4 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 19 Oct 2021 11:27:48 +0300 Subject: [PATCH 359/438] Update 02051_symlinks_to_user_files.sh --- tests/queries/0_stateless/02051_symlinks_to_user_files.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh index fe3073f9ff2..dfdc71e0f0b 100755 --- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -19,7 +19,7 @@ file_path=$CUR_DIR/${FILE} touch ${file_path} ln -s ${file_path} ${symlink_path} -chmod +w ${symlink_path} +chmod ugo+w ${symlink_path} function cleanup() { From 40677bffa51f7ad8c905e47f81b32c80c16e5dd1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Oct 2021 12:28:49 +0300 Subject: [PATCH 360/438] Fix PVS warning. --- src/Processors/QueryPlan/SortingStep.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index e34ee2842da..8e253e71f44 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -66,11 +66,11 @@ private: UInt64 limit; SizeLimits size_limits; - size_t max_bytes_before_remerge; - double remerge_lowered_memory_bytes_ratio; - size_t max_bytes_before_external_sort; + size_t max_bytes_before_remerge = 0; + double remerge_lowered_memory_bytes_ratio = 0; + size_t max_bytes_before_external_sort = 0; VolumePtr tmp_volume; - size_t min_free_disk_space; + size_t min_free_disk_space = 0; }; } From 3dfbc80f0b0a4aec7649cf2678d4257a19b10b1a Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 19 Oct 2021 12:38:11 +0300 Subject: [PATCH 361/438] Add cases to test replaceRegexpAll_bug --- .../02100_replaceRegexpAll_bug.reference | 14 +++++++++++--- .../0_stateless/02100_replaceRegexpAll_bug.sql | 17 ++++++++++++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference index 2bb40778ca6..993dd9b1cde 100644 --- a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference @@ -1,3 +1,11 @@ -aaaabb -b aaaa -aaaa +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql index d0caeacfa0e..32f7f63f6d0 100644 --- a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql @@ -1,3 +1,14 @@ -SELECT trim(leading 'b ' FROM 'b aaaabb ') x; -SELECT trim(trailing 'b ' FROM 'b aaaabb ') x; -SELECT trim(both 'b ' FROM 'b aaaabb ') x; +SELECT 'aaaabb ' == trim(leading 'b ' FROM 'b aaaabb ') x; +SELECT 'b aaaa' == trim(trailing 'b ' FROM 'b aaaabb ') x; +SELECT 'aaaa' == trim(both 'b ' FROM 'b aaaabb ') x; + +SELECT '1' == replaceRegexpAll(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll(',,1', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll('1,,', '^[,]*|[,]*$', '') x; + +SELECT '1,,' == replaceRegexpOne(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpOne(',,1', '^[,]*|[,]*$', '') x; +SELECT '1,,' == replaceRegexpOne('1,,', '^[,]*|[,]*$', '') x; + +SELECT '5935,5998,6014' == trim(BOTH ', ' FROM '5935,5998,6014, ') x; +SELECT '5935,5998,6014' == replaceRegexpAll('5935,5998,6014, ', concat('^[', regexpQuoteMeta(', '), ']*|[', regexpQuoteMeta(', '), ']*$'), '') AS x; From 36bb4033ba9a0f8dc49a6ae1f604167e284e4d67 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 19 Oct 2021 12:44:55 +0300 Subject: [PATCH 362/438] Whitespace change in kerberized_hadoop/Dockerfile --- docker/test/integration/kerberized_hadoop/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 11da590f901..4a2a8866b8d 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -1,6 +1,7 @@ # docker build -t clickhouse/kerberized-hadoop . FROM sequenceiq/hadoop-docker:2.7.0 + RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo From a92dc0a8260cc2436f098ce31cae6c5b0bdc5e03 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Oct 2021 12:58:10 +0300 Subject: [PATCH 363/438] Update obsolete comments. --- src/Formats/FormatFactory.h | 2 +- src/Formats/NativeReader.cpp | 1 - src/IO/Progress.h | 2 +- src/Interpreters/Aggregator.h | 2 -- src/Interpreters/Context.h | 4 ++-- src/Processors/Formats/IOutputFormat.h | 3 ++- src/Processors/Sources/SourceWithProgress.cpp | 9 +++------ src/QueryPipeline/ProfileInfo.h | 2 +- src/QueryPipeline/QueryPipelineBuilder.h | 1 - 9 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index d5784219c6a..ee3824081bb 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -42,7 +42,7 @@ FormatSettings getFormatSettings(ContextPtr context); template FormatSettings getFormatSettings(ContextPtr context, const T & settings); -/** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format. +/** Allows to create an IInputFormat or IOutputFormat by the name of the format. * Note: format and compression are independent things. */ class FormatFactory final : private boost::noncopyable diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index 2d8fdc160f5..9ef248dc904 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -56,7 +56,6 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_, } } -// also resets few vars from IBlockInputStream (I didn't want to propagate resetParser upthere) void NativeReader::resetParser() { istr_concrete = nullptr; diff --git a/src/IO/Progress.h b/src/IO/Progress.h index 7118de844f2..c00eea98ff4 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -121,7 +121,7 @@ struct Progress /** Callback to track the progress of the query. - * Used in IBlockInputStream and Context. + * Used in QueryPipeline and Context. * The function takes the number of rows in the last block, the number of bytes in the last block. * Note that the callback can be called from different threads. */ diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 6d6bf61834b..3c53769e128 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -43,8 +43,6 @@ namespace ErrorCodes extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; } -class IBlockOutputStream; - /** Different data structures that can be used for aggregation * For efficiency, the aggregation data itself is put into the pool. * Data and pool ownership (states of aggregate functions) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 5a28e3fac97..c6bb266120a 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -636,13 +636,13 @@ public: const Settings & getSettingsRef() const { return settings; } void setProgressCallback(ProgressCallback callback); - /// Used in InterpreterSelectQuery to pass it to the IBlockInputStream. + /// Used in executeQuery() to pass it to the QueryPipeline. ProgressCallback getProgressCallback() const; void setFileProgressCallback(FileProgressCallback && callback) { file_progress_callback = callback; } FileProgressCallback getFileProgressCallback() const { return file_progress_callback; } - /** Set in executeQuery and InterpreterSelectQuery. Then it is used in IBlockInputStream, + /** Set in executeQuery and InterpreterSelectQuery. Then it is used in QueryPipeline, * to update and monitor information about the total number of resources spent for the query. */ void setProcessListElement(QueryStatus * elem); diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index b647338d6fb..ba4dcee6f70 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -72,7 +72,8 @@ public: InputPort & getPort(PortKind kind) { return *std::next(inputs.begin(), kind); } - /// Compatible to IBlockOutputStream interface + /// Compatibility with old interface. + /// TODO: separate formats and processors. void write(const Block & block); diff --git a/src/Processors/Sources/SourceWithProgress.cpp b/src/Processors/Sources/SourceWithProgress.cpp index 15d64dee3ee..0ebdd968997 100644 --- a/src/Processors/Sources/SourceWithProgress.cpp +++ b/src/Processors/Sources/SourceWithProgress.cpp @@ -69,8 +69,7 @@ void SourceWithProgress::work() } } -/// Aggregated copy-paste from IBlockInputStream::progressImpl. -/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream. +/// TODO: Most of this must be done in PipelineExecutor outside. void SourceWithProgress::progress(const Progress & value) { was_progress_called = true; @@ -135,14 +134,12 @@ void SourceWithProgress::progress(const Progress & value) if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) { - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. + /// TODO: Should be done in PipelineExecutor. CurrentThread::updatePerformanceCounters(); last_profile_events_update_time = total_elapsed_microseconds; } - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. + /// TODO: Should be done in PipelineExecutor. limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); if (quota && limits.mode == LimitsMode::LIMITS_TOTAL) diff --git a/src/QueryPipeline/ProfileInfo.h b/src/QueryPipeline/ProfileInfo.h index 335092ce244..0a5800cd409 100644 --- a/src/QueryPipeline/ProfileInfo.h +++ b/src/QueryPipeline/ProfileInfo.h @@ -12,7 +12,7 @@ class Block; class ReadBuffer; class WriteBuffer; -/// Information for profiling. See IBlockInputStream.h +/// Information for profiling. See SourceWithProgress.h struct ProfileInfo { bool started = false; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index d2bbea03ce5..12f74805173 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -129,7 +129,6 @@ public: void setLeafLimits(const SizeLimits & limits) { pipe.setLeafLimits(limits); } void setQuota(const std::shared_ptr & quota) { pipe.setQuota(quota); } - /// For compatibility with IBlockInputStream. void setProgressCallback(const ProgressCallback & callback); void setProcessListElement(QueryStatus * elem); From cfa685c29ca99679eb2130e99afafc53e9c3b9e7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 19 Oct 2021 13:16:30 +0300 Subject: [PATCH 364/438] Fix another test. --- .../0_stateless/01823_explain_json.reference | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/tests/queries/0_stateless/01823_explain_json.reference b/tests/queries/0_stateless/01823_explain_json.reference index 5c7845a22d5..f75cd69dbf3 100644 --- a/tests/queries/0_stateless/01823_explain_json.reference +++ b/tests/queries/0_stateless/01823_explain_json.reference @@ -111,31 +111,3 @@ } ], "Limit": 3, --- - "Sort Description": [ - { - "Column": "number", - "Ascending": false, - "With Fill": false - }, - { - "Column": "plus(number, 1)", - "Ascending": true, - "With Fill": false - } - ], - "Limit": 3, --- - "Sort Description": [ - { - "Column": "number", - "Ascending": false, - "With Fill": false - }, - { - "Column": "plus(number, 1)", - "Ascending": true, - "With Fill": false - } - ], - "Limit": 3, From 4fbd332bf1bc0db29dee09699c4c737bfd2e64b0 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 19 Oct 2021 11:53:55 +0000 Subject: [PATCH 365/438] Done --- .../sql-reference/statements/select/from.md | 2 +- .../sql-reference/statements/select/from.md | 2 +- .../sql-reference/statements/select/from.md | 2 +- .../QueryPlan/ReadFromMergeTree.cpp | 20 ++++++++++--------- src/Storages/MergeTree/MergeTreeData.h | 6 +----- .../queries/0_stateless/01236_graphite_mt.sql | 4 +--- 6 files changed, 16 insertions(+), 20 deletions(-) diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index 7c5ea732122..df30a0fb0d2 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -20,7 +20,7 @@ Subquery is another `SELECT` query that may be specified in parenthesis inside ` When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine. -It is applicable when selecting data from tables that use the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-engine family (except `GraphiteMergeTree`). Also supported for: +It is applicable when selecting data from tables that use the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-engine family. Also supported for: - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines. - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables. diff --git a/docs/ru/sql-reference/statements/select/from.md b/docs/ru/sql-reference/statements/select/from.md index b62b2fd51d4..0711d602cd1 100644 --- a/docs/ru/sql-reference/statements/select/from.md +++ b/docs/ru/sql-reference/statements/select/from.md @@ -20,7 +20,7 @@ toc_title: FROM Если в запросе используется модификатор `FINAL`, то ClickHouse полностью мёржит данные перед выдачей результата, таким образом выполняя все преобразования данных, которые производятся движком таблиц при мёржах. -Он применим при выборе данных из таблиц, использующих [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)- семейство движков (кроме `GraphiteMergeTree`). Также поддерживается для: +Он применим при выборе данных из таблиц, использующих [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)- семейство движков. Также поддерживается для: - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) варианты исполнения `MergeTree` движков. - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), и [MaterializedView](../../../engines/table-engines/special/materializedview.md), которые работают поверх других движков, если они созданы для таблиц с движками семейства `MergeTree`. diff --git a/docs/zh/sql-reference/statements/select/from.md b/docs/zh/sql-reference/statements/select/from.md index fae25c0c3c1..c47e74e5e1f 100644 --- a/docs/zh/sql-reference/statements/select/from.md +++ b/docs/zh/sql-reference/statements/select/from.md @@ -20,7 +20,7 @@ toc_title: FROM 当 `FINAL` 被指定,ClickHouse会在返回结果之前完全合并数据,从而执行给定表引擎合并期间发生的所有数据转换。 -它适用于从使用 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-引擎族(除了 `GraphiteMergeTree`). 还支持: +它适用于从使用 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-引擎族. 还支持: - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎 - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md),和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎,只要是它们底层是 `MergeTree`-引擎表即可。 diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 57785a5cc2d..3b1d7254e2c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -506,38 +507,39 @@ static void addMergingFinal( const auto & header = pipe.getHeader(); size_t num_outputs = pipe.numOutputPorts(); + auto now = time(nullptr); + auto get_merging_processor = [&]() -> MergingTransformPtr { switch (merging_params.mode) { case MergeTreeData::MergingParams::Ordinary: - { return std::make_shared(header, num_outputs, - sort_description, max_block_size); - } + sort_description, max_block_size); case MergeTreeData::MergingParams::Collapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, true, max_block_size); + sort_description, merging_params.sign_column, true, max_block_size); case MergeTreeData::MergingParams::Summing: return std::make_shared(header, num_outputs, - sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size); + sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size); case MergeTreeData::MergingParams::Aggregating: return std::make_shared(header, num_outputs, - sort_description, max_block_size); + sort_description, max_block_size); case MergeTreeData::MergingParams::Replacing: return std::make_shared(header, num_outputs, - sort_description, merging_params.version_column, max_block_size); + sort_description, merging_params.version_column, max_block_size); case MergeTreeData::MergingParams::VersionedCollapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, max_block_size); + sort_description, merging_params.sign_column, max_block_size); case MergeTreeData::MergingParams::Graphite: - throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); + return std::make_shared(header, num_outputs, + sort_description, max_block_size, merging_params.graphite_params, now); } __builtin_unreachable(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c4536c463d5..d20d0024222 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -402,11 +402,7 @@ public: bool supportsFinal() const override { - return merging_params.mode == MergingParams::Collapsing - || merging_params.mode == MergingParams::Summing - || merging_params.mode == MergingParams::Aggregating - || merging_params.mode == MergingParams::Replacing - || merging_params.mode == MergingParams::VersionedCollapsing; + return merging_params.mode != MergingParams::Ordinary; } bool supportsSubcolumns() const override { return true; } diff --git a/tests/queries/0_stateless/01236_graphite_mt.sql b/tests/queries/0_stateless/01236_graphite_mt.sql index a6dd4b8b6fb..3697a1d01d8 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.sql +++ b/tests/queries/0_stateless/01236_graphite_mt.sql @@ -32,8 +32,6 @@ WITH dates AS select 1, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200) union all select 2, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200); -optimize table test_graphite final; - -select key, Path, Value, Version, col from test_graphite order by key, Path, Time desc; +select key, Path, Value, Version, col from test_graphite order by key, Path, Time desc final; drop table test_graphite; From 1114d06bc0191bc2b204cfeae3aa23ac6673c610 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 19 Oct 2021 12:11:18 +0000 Subject: [PATCH 366/438] Done --- src/Functions/readWkt.cpp | 32 ++++++------- src/Functions/registerFunctionsGeo.cpp | 4 +- src/Functions/svg.cpp | 1 + tests/fuzz/all.dict | 9 ++-- tests/fuzz/dictionaries/functions.dict | 9 ++-- tests/queries/0_stateless/01300_read_wkt.sql | 14 +++--- tests/queries/0_stateless/01300_svg.sql | 48 ++++++++++---------- 7 files changed, 60 insertions(+), 57 deletions(-) diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index 14e12fb310c..c3ae6516e0f 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -18,10 +18,10 @@ namespace ErrorCodes template -class FunctionReadWkt : public IFunction +class FunctionReadWKT : public IFunction { public: - explicit FunctionReadWkt() = default; + explicit FunctionReadWKT() = default; static constexpr const char * name = NameHolder::name; @@ -72,36 +72,36 @@ public: static FunctionPtr create(ContextPtr) { - return std::make_shared>(); + return std::make_shared>(); } }; -struct ReadWktPointNameHolder +struct ReadWKTPointNameHolder { - static constexpr const char * name = "readWktPoint"; + static constexpr const char * name = "readWKTPoint"; }; -struct ReadWktRingNameHolder +struct ReadWKTRingNameHolder { - static constexpr const char * name = "readWktRing"; + static constexpr const char * name = "readWKTRing"; }; -struct ReadWktPolygonNameHolder +struct ReadWKTPolygonNameHolder { - static constexpr const char * name = "readWktPolygon"; + static constexpr const char * name = "readWKTPolygon"; }; -struct ReadWktMultiPolygonNameHolder +struct ReadWKTMultiPolygonNameHolder { - static constexpr const char * name = "readWktMultiPolygon"; + static constexpr const char * name = "readWKTMultiPolygon"; }; -void registerFunctionReadWkt(FunctionFactory & factory) +void registerFunctionReadWKT(FunctionFactory & factory) { - factory.registerFunction, ReadWktPointNameHolder>>(); - factory.registerFunction, ReadWktRingNameHolder>>(); - factory.registerFunction, ReadWktPolygonNameHolder>>(); - factory.registerFunction, ReadWktMultiPolygonNameHolder>>(); + factory.registerFunction, ReadWKTPointNameHolder>>(); + factory.registerFunction, ReadWKTRingNameHolder>>(); + factory.registerFunction, ReadWKTPolygonNameHolder>>(); + factory.registerFunction, ReadWKTMultiPolygonNameHolder>>(); } } diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index a0ae38f6b85..fd55c9cc20a 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -23,7 +23,7 @@ void registerFunctionGeohashEncode(FunctionFactory & factory); void registerFunctionGeohashDecode(FunctionFactory & factory); void registerFunctionGeohashesInBox(FunctionFactory & factory); void registerFunctionWkt(FunctionFactory & factory); -void registerFunctionReadWkt(FunctionFactory & factory); +void registerFunctionReadWKT(FunctionFactory & factory); void registerFunctionSvg(FunctionFactory & factory); #if USE_H3 @@ -79,7 +79,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionGeohashDecode(factory); registerFunctionGeohashesInBox(factory); registerFunctionWkt(factory); - registerFunctionReadWkt(factory); + registerFunctionReadWKT(factory); registerFunctionSvg(factory); #if USE_H3 diff --git a/src/Functions/svg.cpp b/src/Functions/svg.cpp index 4495e668add..b3a89c0393c 100644 --- a/src/Functions/svg.cpp +++ b/src/Functions/svg.cpp @@ -102,6 +102,7 @@ public: void registerFunctionSvg(FunctionFactory & factory) { factory.registerFunction(); + factory.registerAlias("SVG", "svg"); } } diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index 4a9afc348cf..bf25f1fa484 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -985,10 +985,10 @@ "RANGE" "rank" "rankCorr" -"readWktMultiPolygon" -"readWktPoint" -"readWktPolygon" -"readWktRing" +"readWKTMultiPolygon" +"readWKTPoint" +"readWKTPolygon" +"readWKTRing" "REAL" "REFRESH" "regexpQuoteMeta" @@ -1177,6 +1177,7 @@ "sumWithOverflow" "SUSPEND" "svg" +"SVG" "SYNC" "synonyms" "SYNTAX" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index fb35375f284..722e931dc09 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -52,6 +52,7 @@ "h3GetResolution" "h3EdgeLengthM" "svg" +"SVG" "equals" "geohashesInBox" "polygonsIntersectionCartesian" @@ -114,7 +115,7 @@ "replaceOne" "emptyArrayInt32" "extract" -"readWktPolygon" +"readWKTPolygon" "notILike" "geohashDecode" "toModifiedJulianDay" @@ -164,7 +165,7 @@ "lessOrEquals" "subtractQuarters" "ngramSearch" -"readWktRing" +"readWKTRing" "trimRight" "endsWith" "ngramDistanceCaseInsensitive" @@ -713,13 +714,13 @@ "s2RectContains" "toDate" "regexpQuoteMeta" -"readWktMultiPolygon" +"readWKTMultiPolygon" "emptyArrayString" "bitmapOr" "cutWWW" "emptyArrayInt8" "less" -"readWktPoint" +"readWKTPoint" "reinterpretAsDateTime" "notEquals" "geoToS2" diff --git a/tests/queries/0_stateless/01300_read_wkt.sql b/tests/queries/0_stateless/01300_read_wkt.sql index 8121bdf6084..1995c5153d7 100644 --- a/tests/queries/0_stateless/01300_read_wkt.sql +++ b/tests/queries/0_stateless/01300_read_wkt.sql @@ -1,14 +1,14 @@ -SELECT readWktPoint('POINT(0 0)'); -SELECT readWktPolygon('POLYGON((1 0,10 0,10 10,0 10,1 0))'); -SELECT readWktPolygon('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))'); -SELECT readWktMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))'); +SELECT readWKTPoint('POINT(0 0)'); +SELECT readWKTPolygon('POLYGON((1 0,10 0,10 10,0 10,1 0))'); +SELECT readWKTPolygon('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))'); +SELECT readWKTMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))'); DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); INSERT INTO geo VALUES ('POINT(0 0)', 1); INSERT INTO geo VALUES ('POINT(1 0)', 2); INSERT INTO geo VALUES ('POINT(2 0)', 3); -SELECT readWktPoint(s) FROM geo ORDER BY id; +SELECT readWKTPoint(s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); @@ -18,13 +18,13 @@ INSERT INTO geo VALUES ('POLYGON((2 0,10 0,10 10,0 10,2 0))', 3); INSERT INTO geo VALUES ('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))', 4); INSERT INTO geo VALUES ('POLYGON((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4))', 5); INSERT INTO geo VALUES ('POLYGON((1 0,10 0,10 10,0 10,1 0),(4 4,5 4,5 5,4 5,4 4))', 6); -SELECT readWktPolygon(s) FROM geo ORDER BY id; +SELECT readWKTPolygon(s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); INSERT INTO geo VALUES ('MULTIPOLYGON(((1 0,10 0,10 10,0 10,1 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 1); INSERT INTO geo VALUES ('MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 2); INSERT INTO geo VALUES ('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 3); -SELECT readWktMultiPolygon(s) FROM geo ORDER BY id; +SELECT readWKTMultiPolygon(s) FROM geo ORDER BY id; DROP TABLE geo; diff --git a/tests/queries/0_stateless/01300_svg.sql b/tests/queries/0_stateless/01300_svg.sql index a1deb1745c3..cf794f2190b 100644 --- a/tests/queries/0_stateless/01300_svg.sql +++ b/tests/queries/0_stateless/01300_svg.sql @@ -1,50 +1,50 @@ -SELECT svg((0., 0.)); -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)]); -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]]); -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]]); -SELECT svg((0., 0.), 'b'); -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b'); -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], 'b'); -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b'); +SELECT SVG((0., 0.)); +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)]); +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]]); +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]]); +SELECT SVG((0., 0.), 'b'); +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b'); +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], 'b'); +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b'); DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Tuple(Float64, Float64), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ((0., 0.), 'b', 1); INSERT INTO geo VALUES ((1., 0.), 'c', 2); INSERT INTO geo VALUES ((2., 0.), 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg((0., 0.), s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG((0., 0.), s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Tuple(Float64, Float64)), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b', 1); INSERT INTO geo VALUES ([(1., 0.), (10, 0), (10, 10), (0, 10)], 'c', 2); INSERT INTO geo VALUES ([(2., 0.), (10, 0), (10, 10), (0, 10)], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Array(Tuple(Float64, Float64))), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'b', 1); INSERT INTO geo VALUES ([[(1., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'c', 2); INSERT INTO geo VALUES ([[(2., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Array(Array(Tuple(Float64, Float64)))), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b', 1); INSERT INTO geo VALUES ([[[(1., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'c', 2); INSERT INTO geo VALUES ([[[(2., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE geo; From 75487be8998d02b22a800e27463a7b942053a80d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 19 Oct 2021 12:16:33 +0000 Subject: [PATCH 367/438] White list of storages that supports final --- src/Storages/MergeTree/MergeTreeData.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index d20d0024222..2ea6a89002c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -402,7 +402,12 @@ public: bool supportsFinal() const override { - return merging_params.mode != MergingParams::Ordinary; + return merging_params.mode == MergingParams::Collapsing + || merging_params.mode == MergingParams::Summing + || merging_params.mode == MergingParams::Aggregating + || merging_params.mode == MergingParams::Replacing + || merging_params.mode == MergingParams::Graphite + || merging_params.mode == MergingParams::VersionedCollapsing; } bool supportsSubcolumns() const override { return true; } From 08f3a01830867cac67703cd680a1c7280a44a079 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 19 Oct 2021 16:26:16 +0300 Subject: [PATCH 368/438] Update 01236_graphite_mt.sql --- tests/queries/0_stateless/01236_graphite_mt.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01236_graphite_mt.sql b/tests/queries/0_stateless/01236_graphite_mt.sql index 3697a1d01d8..1d531f88ecb 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.sql +++ b/tests/queries/0_stateless/01236_graphite_mt.sql @@ -32,6 +32,6 @@ WITH dates AS select 1, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200) union all select 2, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200); -select key, Path, Value, Version, col from test_graphite order by key, Path, Time desc final; +select key, Path, Value, Version, col from test_graphite final order by key, Path, Time desc; drop table test_graphite; From 1d78f1c63b525812699fc16586f7fb7409162bc0 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 19 Oct 2021 16:40:06 +0300 Subject: [PATCH 369/438] Fix ca-bundle.crt in clickhouse/kerberized-hadoop --- docker/test/integration/kerberized_hadoop/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 4a2a8866b8d..7bc0a99f9e9 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -5,6 +5,10 @@ FROM sequenceiq/hadoop-docker:2.7.0 RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo + +# https://community.letsencrypt.org/t/rhel-centos-6-openssl-client-compatibility-after-dst-root-ca-x3-expiration/161032/81 +RUN sed -i s/xMDkzMDE0MDExNVow/0MDkzMDE4MTQwM1ow/ /etc/pki/tls/certs/ca-bundle.crt + RUN yum clean all && \ rpm --rebuilddb && \ yum -y update && \ From acf416900dbc3181859c674d13d46849e848d1bf Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 19 Oct 2021 16:45:03 +0300 Subject: [PATCH 370/438] Minor fix in clickhouse/kerberized-hadoop Dockerfile --- docker/test/integration/kerberized_hadoop/Dockerfile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 7bc0a99f9e9..00944cbfc00 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -2,9 +2,9 @@ FROM sequenceiq/hadoop-docker:2.7.0 -RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo +RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo # https://community.letsencrypt.org/t/rhel-centos-6-openssl-client-compatibility-after-dst-root-ca-x3-expiration/161032/81 RUN sed -i s/xMDkzMDE0MDExNVow/0MDkzMDE4MTQwM1ow/ /etc/pki/tls/certs/ca-bundle.crt @@ -14,8 +14,9 @@ RUN yum clean all && \ yum -y update && \ yum -y install yum-plugin-ovl && \ yum --quiet -y install krb5-workstation.x86_64 + RUN cd /tmp && \ - curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ + curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ tar xzf commons-daemon-1.0.15-src.tar.gz && \ cd commons-daemon-1.0.15-src/src/native/unix && \ ./configure && \ From 0f2e23d775f39d49c32b2a0d03f61b46b468fe35 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 19 Oct 2021 17:41:59 +0300 Subject: [PATCH 371/438] Update StorageExecutable.cpp --- src/Storages/StorageExecutable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 6a82fc88977..16647d0b60f 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -122,7 +122,7 @@ Pipe StorageExecutable::read( if (!std::filesystem::exists(std::filesystem::path(script_path))) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Executable file {} does not exists inside user scripts folder {}", + "Executable file {} does not exist inside user scripts folder {}", script_name, user_scripts_path); From cef993233fa5d2deb67e64287b628f645d81fad6 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 19 Oct 2021 15:54:28 +0300 Subject: [PATCH 372/438] Attempt to fix #30162 Added some logging to the Session --- src/Interpreters/Session.cpp | 52 ++++++++++++++++++++++++++++++++++-- src/Interpreters/Session.h | 2 ++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 98d0bcb88ac..75117bd860c 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -240,10 +241,34 @@ void Session::shutdownNamedSessions() NamedSessionsStorage::instance().shutdown(); } +namespace +{ +String getSessionPrefix(ClientInfo::Interface interface) +{ + switch (interface) + { + case ClientInfo::Interface::TCP: + return "TCP"; + case ClientInfo::Interface::HTTP: + return "HTTP"; + case ClientInfo::Interface::GRPC: + return "GRPC"; + case ClientInfo::Interface::MYSQL: + return "MySQL"; + case ClientInfo::Interface::POSTGRESQL: + return "PostgreSQL"; + case ClientInfo::Interface::LOCAL: + return "Local"; + case ClientInfo::Interface::TCP_INTERSERVER: + return "Interserver"; + } +} +} Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_) : session_id(UUIDHelpers::generateV4()), - global_context(global_context_) + global_context(global_context_), + log(&Poco::Logger::get(getSessionPrefix(interface_) + "-Session")) { prepared_client_info.emplace(); prepared_client_info->interface = interface_; @@ -253,6 +278,12 @@ Session::Session(Session &&) = default; Session::~Session() { + LOG_DEBUG(log, "{} Destroying {} of user {}", + toString(session_id), + (named_session ? "named session '" + named_session->key.second + "'" : "unnamed session"), + (user_id ? toString(*user_id) : "") + ); + /// Early release a NamedSessionData. if (named_session) named_session->release(); @@ -298,12 +329,18 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So if ((address == Poco::Net::SocketAddress{}) && (prepared_client_info->interface == ClientInfo::Interface::LOCAL)) address = Poco::Net::SocketAddress{"127.0.0.1", 0}; + LOG_DEBUG(log, "{} Authenticating user '{}' from {}", + toString(session_id), credentials_.getUserName(), address.toString()); + try { user_id = global_context->getAccessControlManager().login(credentials_, address.host()); + LOG_DEBUG(log, "{} Authenticated with global context as user {}", + toString(session_id), user_id ? toString(*user_id) : ""); } catch (const Exception & e) { + LOG_DEBUG(log, "{} Authentication failed with error: {}", toString(session_id), e.what()); if (auto session_log = getSessionLog()) session_log->addLoginFailure(session_id, *prepared_client_info, credentials_.getUserName(), e); throw; @@ -336,6 +373,8 @@ ContextMutablePtr Session::makeSessionContext() if (query_context_created) throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR); + LOG_DEBUG(log, "{} Creating session context with user_id: {}", + toString(session_id), user_id ? toString(*user_id) : ""); /// Make a new session context. ContextMutablePtr new_session_context; new_session_context = Context::createCopy(global_context); @@ -364,6 +403,9 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: if (query_context_created) throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR); + LOG_DEBUG(log, "{} Creating named session context with name: {}, user_id: {}", + toString(session_id), session_name_, user_id ? toString(*user_id) : ""); + /// Make a new session context OR /// if the `session_id` and `user_id` were used before then just get a previously created session context. std::shared_ptr new_named_session; @@ -420,6 +462,12 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t ContextMutablePtr query_context = Context::createCopy(from_session_context ? session_context : global_context); query_context->makeQueryContext(); + LOG_DEBUG(log, "{} Creating query context from {} context, user_id: {}, parent context user: {}", + toString(session_id), + from_session_context ? "session" : "global", + user_id ? toString(*user_id) : "", + query_context->getUser() ? query_context->getUser()->getName() : ""); + /// Copy the specified client info to the new query context. auto & res_client_info = query_context->getClientInfo(); if (client_info_to_move) @@ -460,7 +508,7 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t if (!notified_session_log_about_login) { - if (auto session_log = getSessionLog(); session_log && user) + if (auto session_log = getSessionLog(); user && user_id && session_log) { session_log->addLoginSuccess( session_id, diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 494b7750f1e..26457186523 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -90,6 +90,8 @@ private: std::shared_ptr named_session; bool named_session_created = false; + + Poco::Logger * log = nullptr; }; } From 905418b46868d1345d1de444003b5f057c616829 Mon Sep 17 00:00:00 2001 From: Vladimir Goncharov Date: Tue, 19 Oct 2021 18:33:22 +0300 Subject: [PATCH 373/438] Update argmin.md because https://github.com/ClickHouse/ClickHouse/pull/23393 --- .../sql-reference/aggregate-functions/reference/argmin.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md index 4ee78a73a84..6205cd0cf09 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md @@ -14,12 +14,6 @@ toc_priority: 105 argMin(arg, val) ``` -или - -``` sql -argMin(tuple(arg, val)) -``` - **Аргументы** - `arg` — аргумент. From aa8bc93be8c2e245ebf12c7b5872910a2f071722 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 19 Oct 2021 23:35:59 +0800 Subject: [PATCH 374/438] Fix build --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a61c2669ef2..106147d95fc 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1895,7 +1895,7 @@ void Context::setSystemZooKeeperLogAfterInitializationIfNeeded() zk.second->setZooKeeperLog(shared->system_logs->zookeeper_log); } -void Context::initializeKeeperDispatcher(bool start_async) const +void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const { #if USE_NURAFT std::lock_guard lock(shared->keeper_storage_dispatcher_mutex); From 05d93796dcf5ee30a9daebcd840e2b35fbf32fb2 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 19 Oct 2021 23:36:07 +0800 Subject: [PATCH 375/438] Fix minmax_count projection with primary key in partition expr --- src/Storages/MergeTree/MergeTreeData.cpp | 20 +++++++++---------- src/Storages/ProjectionsDescription.cpp | 20 ++++++++++++------- src/Storages/ProjectionsDescription.h | 4 ---- .../01710_minmax_count_projection.reference | 1 + .../01710_minmax_count_projection.sql | 6 ++++++ 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 10fa18186ee..ced385a18c4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4470,16 +4470,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( } size_t pos = 0; - if (!primary_key_max_column_name.empty()) - { - const auto & primary_key_column = *part->index[0]; - auto primary_key_column_size = primary_key_column.size(); - auto & min_column = assert_cast(*minmax_count_columns[pos++]); - auto & max_column = assert_cast(*minmax_count_columns[pos++]); - insert(min_column, primary_key_column[0]); - insert(max_column, primary_key_column[primary_key_column_size - 1]); - } - size_t minmax_idx_size = part->minmax_idx->hyperrectangle.size(); for (size_t i = 0; i < minmax_idx_size; ++i) { @@ -4490,6 +4480,16 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( insert(max_column, range.right); } + if (!primary_key_max_column_name.empty()) + { + const auto & primary_key_column = *part->index[0]; + auto primary_key_column_size = primary_key_column.size(); + auto & min_column = assert_cast(*minmax_count_columns[pos++]); + auto & max_column = assert_cast(*minmax_count_columns[pos++]); + insert(min_column, primary_key_column[0]); + insert(max_column, primary_key_column[primary_key_column_size - 1]); + } + { auto & column = assert_cast(*minmax_count_columns.back()); auto func = column.getAggregateFunction(); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index b204c288000..e5117a306ee 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -184,16 +184,16 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( auto select_query = std::make_shared(); ASTPtr select_expression_list = std::make_shared(); - if (!primary_key_asts.empty()) - { - select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); - select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); - } for (const auto & column : minmax_columns) { select_expression_list->children.push_back(makeASTFunction("min", std::make_shared(column))); select_expression_list->children.push_back(makeASTFunction("max", std::make_shared(column))); } + if (!primary_key_asts.empty()) + { + select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); + select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); + } select_expression_list->children.push_back(makeASTFunction("count")); select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); @@ -207,8 +207,14 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); - if (!primary_key_asts.empty()) - result.primary_key_max_column_name = result.sample_block.getNames()[ProjectionDescription::PRIMARY_KEY_MAX_COLUMN_POS]; + /// If we have primary key and it's not in minmax_columns, it will be used as one additional minmax columns. + if (!primary_key_asts.empty() && result.sample_block.columns() == 2 * (minmax_columns.size() + 1) + 1) + { + /// min(p1), max(p1), min(p2), max(p2), ..., min(k1), max(k1), count() + /// ^ + /// size - 2 + result.primary_key_max_column_name = *(result.sample_block.getNames().cend() - 2); + } result.type = ProjectionDescription::Type::Aggregate; StorageInMemoryMetadata metadata; metadata.setColumns(ColumnsDescription(result.sample_block.getNamesAndTypesList())); diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index 4dd717239ad..7c254182ba4 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -30,10 +30,6 @@ struct ProjectionDescription static constexpr const char * MINMAX_COUNT_PROJECTION_NAME = "_minmax_count_projection"; - /// If minmax_count projection contains a primary key's minmax values. Their positions will be 0 and 1. - static constexpr const size_t PRIMARY_KEY_MIN_COLUMN_POS = 0; - static constexpr const size_t PRIMARY_KEY_MAX_COLUMN_POS = 1; - /// Definition AST of projection ASTPtr definition_ast; diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.reference b/tests/queries/0_stateless/01710_minmax_count_projection.reference index 5591d5a9954..da7d2fbe2bd 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.reference +++ b/tests/queries/0_stateless/01710_minmax_count_projection.reference @@ -8,3 +8,4 @@ 0 0 9999 0 9999 +3 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index 112487b219e..b7077de1fe6 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -43,3 +43,9 @@ select min(j), max(j) from has_final_mark; set max_rows_to_read = 5001; -- one normal part 5000 + one minmax_count_projection part 1 select min(j), max(j) from mixed_final_mark; + +-- The first primary expr is the same of some partition column +drop table if exists t; +create table t (server_date Date, something String) engine MergeTree partition by (toYYYYMM(server_date), server_date) order by (server_date, something); +insert into t values ('2019-01-01', 'test1'), ('2019-02-01', 'test2'), ('2019-03-01', 'test3'); +select count() from t; From 0f2620a31ef37179c2009385835e490fc3baf89e Mon Sep 17 00:00:00 2001 From: Vladimir Goncharov Date: Tue, 19 Oct 2021 19:07:04 +0300 Subject: [PATCH 376/438] Update argmin.md --- .../aggregate-functions/reference/argmin.md | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md index 6205cd0cf09..4d2363abe6d 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md @@ -25,12 +25,6 @@ argMin(arg, val) Тип: соответствует типу `arg`. -Если передан кортеж: - -- Кортеж `(arg, val)` c минимальным значением `val` и соответствующим ему `arg`. - -Тип: [Tuple](../../../sql-reference/data-types/tuple.md). - **Пример** Исходная таблица: @@ -46,14 +40,14 @@ argMin(arg, val) Запрос: ``` sql -SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary; +SELECT argMin(user, salary) FROM salary; ``` Результат: ``` text -┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐ -│ worker │ ('worker',1000) │ -└──────────────────────┴─────────────────────────────┘ +┌─argMin(user, salary)─┐ +│ worker │ +└──────────────────────┘ ``` From c6af087f4bf89dd27f06cd6bb9972f6fc89e781b Mon Sep 17 00:00:00 2001 From: Vladimir Goncharov Date: Tue, 19 Oct 2021 19:08:50 +0300 Subject: [PATCH 377/438] Update argmax.md --- .../aggregate-functions/reference/argmax.md | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md index 71289423035..84419523beb 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md @@ -14,12 +14,6 @@ toc_priority: 106 argMax(arg, val) ``` -или - -``` sql -argMax(tuple(arg, val)) -``` - **Аргументы** - `arg` — аргумент. @@ -31,12 +25,6 @@ argMax(tuple(arg, val)) Тип: соответствует типу `arg`. -Если передан кортеж: - -- кортеж `(arg, val)` c максимальным значением `val` и соответствующим ему `arg`. - -Тип: [Tuple](../../../sql-reference/data-types/tuple.md). - **Пример** Исходная таблица: @@ -52,14 +40,14 @@ argMax(tuple(arg, val)) Запрос: ``` sql -SELECT argMax(user, salary), argMax(tuple(user, salary), salary), argMax(tuple(user, salary)) FROM salary; +SELECT argMax(user, salary), argMax(tuple(user, salary), salary) FROM salary; ``` Результат: ``` text -┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┬─argMax(tuple(user, salary))─┐ -│ director │ ('director',5000) │ ('director',5000) │ -└──────────────────────┴─────────────────────────────────────┴─────────────────────────────┘ +┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┐ +│ director │ ('director',5000) │ +└──────────────────────┴─────────────────────────────────────┘ ``` From f49e946943fe6bd6d9da9d0b1a99539bd5eac37a Mon Sep 17 00:00:00 2001 From: Vladimir Goncharov Date: Tue, 19 Oct 2021 19:11:15 +0300 Subject: [PATCH 378/438] Update argmax.md --- .../aggregate-functions/reference/argmax.md | 20 ++++--------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md index 9d90590b2f1..0c82cb8a4bb 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md @@ -14,12 +14,6 @@ toc_priority: 106 argMax(arg, val) ``` -或 - -``` sql -argMax(tuple(arg, val)) -``` - **参数** - `arg` — Argument. @@ -31,12 +25,6 @@ argMax(tuple(arg, val)) 类型: 匹配 `arg` 类型。 -对于输入中的元组: - -- 元组 `(arg, val)`, 其中 `val` 最大值,`arg` 是对应的值。 - -类型: [元组](../../../sql-reference/data-types/tuple.md)。 - **示例** 输入表: @@ -52,13 +40,13 @@ argMax(tuple(arg, val)) 查询: ``` sql -SELECT argMax(user, salary), argMax(tuple(user, salary), salary), argMax(tuple(user, salary)) FROM salary; +SELECT argMax(user, salary), argMax(tuple(user, salary), salary) FROM salary; ``` 结果: ``` text -┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┬─argMax(tuple(user, salary))─┐ -│ director │ ('director',5000) │ ('director',5000) │ -└──────────────────────┴─────────────────────────────────────┴─────────────────────────────┘ +┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┐ +│ director │ ('director',5000) │ +└──────────────────────┴─────────────────────────────────────┘ ``` From 2b3841f0030c0a702ea88ef6fe4499f9ad6d0705 Mon Sep 17 00:00:00 2001 From: Vladimir Goncharov Date: Tue, 19 Oct 2021 19:14:14 +0300 Subject: [PATCH 379/438] Update argmin.md --- .../sql-reference/aggregate-functions/reference/argmin.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md index 0dd4625ac0d..6c2839062eb 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md @@ -25,13 +25,13 @@ toc_priority: 105 查询: ``` sql -SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary; +SELECT argMin(user, salary), argMin(tuple(user, salary), salary) FROM salary; ``` 结果: ``` text -┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐ -│ worker │ ('worker',1000) │ -└──────────────────────┴─────────────────────────────┘ +┌─argMin(user, salary)─┬─argMin(tuple(user, salary), salary)─┐ +│ worker │ ('worker',1000) │ +└──────────────────────┴─────────────────────────────────────┘ ``` From e72ec27d5bfc4f6056ad88f775bc234b921e3d9e Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 19 Oct 2021 19:45:48 +0300 Subject: [PATCH 380/438] Fixed builds and using magic_enum --- src/Interpreters/Session.cpp | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 75117bd860c..bb3d9352496 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -4,13 +4,15 @@ #include #include #include -#include +#include #include #include #include #include #include +#include + #include #include #include @@ -241,34 +243,10 @@ void Session::shutdownNamedSessions() NamedSessionsStorage::instance().shutdown(); } -namespace -{ -String getSessionPrefix(ClientInfo::Interface interface) -{ - switch (interface) - { - case ClientInfo::Interface::TCP: - return "TCP"; - case ClientInfo::Interface::HTTP: - return "HTTP"; - case ClientInfo::Interface::GRPC: - return "GRPC"; - case ClientInfo::Interface::MYSQL: - return "MySQL"; - case ClientInfo::Interface::POSTGRESQL: - return "PostgreSQL"; - case ClientInfo::Interface::LOCAL: - return "Local"; - case ClientInfo::Interface::TCP_INTERSERVER: - return "Interserver"; - } -} -} - Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_) : session_id(UUIDHelpers::generateV4()), global_context(global_context_), - log(&Poco::Logger::get(getSessionPrefix(interface_) + "-Session")) + log(&Poco::Logger::get(String{magic_enum::enum_name(interface_)} + "-Session")) { prepared_client_info.emplace(); prepared_client_info->interface = interface_; From 9699a71806c63f6fd47099bde51031bb12f403c9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 19 Oct 2021 22:39:55 +0300 Subject: [PATCH 381/438] Update amis --- tests/ci/metrics_lambda/app.py | 2 +- tests/ci/termination_lambda/app.py | 2 +- tests/ci/token_lambda/app.py | 2 +- tests/ci/worker/ubuntu_ami.sh | 47 ++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 tests/ci/worker/ubuntu_ami.sh diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py index 6c6fc594847..d2fb048638b 100644 --- a/tests/ci/metrics_lambda/app.py +++ b/tests/ci/metrics_lambda/app.py @@ -10,7 +10,7 @@ from collections import namedtuple def get_key_and_app_from_aws(): import boto3 - secret_name = "clickhouse_github_secret_key_1" + secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( service_name='secretsmanager', diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py index 7fd7c400db9..0b39cf73f25 100644 --- a/tests/ci/termination_lambda/app.py +++ b/tests/ci/termination_lambda/app.py @@ -10,7 +10,7 @@ from collections import namedtuple def get_key_and_app_from_aws(): import boto3 - secret_name = "clickhouse_github_secret_key_1" + secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( service_name='secretsmanager', diff --git a/tests/ci/token_lambda/app.py b/tests/ci/token_lambda/app.py index 4edd3e8d08c..731d6c040de 100644 --- a/tests/ci/token_lambda/app.py +++ b/tests/ci/token_lambda/app.py @@ -39,7 +39,7 @@ def get_runner_registration_token(access_token): def get_key_and_app_from_aws(): import boto3 - secret_name = "clickhouse_github_secret_key_1" + secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( service_name='secretsmanager', diff --git a/tests/ci/worker/ubuntu_ami.sh b/tests/ci/worker/ubuntu_ami.sh new file mode 100644 index 00000000000..2609c1a69f3 --- /dev/null +++ b/tests/ci/worker/ubuntu_ami.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "Running prepare script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_VERSION=2.283.1 +export RUNNER_HOME=/home/ubuntu/actions-runner + +apt-get update + +apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + python3-pip \ + unzip + +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null + +apt-get update + +apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io + +usermod -aG docker ubuntu + +pip install boto3 pygithub requests urllib3 unidiff + +mkdir -p $RUNNER_HOME && cd $RUNNER_HOME + +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz + +tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +./bin/installdependencies.sh + +chown -R ubuntu:ubuntu $RUNNER_HOME + +cd /home/ubuntu +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip awscliv2.zip +./aws/install + +rm -rf /home/ubuntu/awscliv2.zip /home/ubuntu/aws From 2266856c472cf083e598af4dad25491cfd8927ca Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 19 Oct 2021 23:03:45 +0300 Subject: [PATCH 382/438] Follow-up for #30282 --- src/Interpreters/Context.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 243bc721f92..84795f11f2a 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -293,8 +293,8 @@ private: /// A flag, used to distinguish between user query and internal query to a database engine (MaterializedPostgreSQL). bool is_internal_query = false; - /// Has initializeBackgroundExecutors() method been executed? - bool is_background_executors_initialized = false; + /// Has background executors for MergeTree tables been initialized? + std::atomic is_background_executors_initialized = false; public: From 008a13df8b7398cd4c6ebbb4530f791a9f2cc2c6 Mon Sep 17 00:00:00 2001 From: lhuang0928 Date: Wed, 20 Oct 2021 03:36:07 +0000 Subject: [PATCH 383/438] fix date32 comparison with datetime/datetime64 --- src/Functions/FunctionsConversion.h | 4 ++-- src/Interpreters/convertFieldToType.cpp | 2 +- .../0_stateless/02098_date32_comparison.reference | 11 +++++++++++ .../queries/0_stateless/02098_date32_comparison.sql | 13 ++++++++++++- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 95db7a9af25..9238cc81c37 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -301,7 +301,7 @@ struct ToDateTimeImpl return time_zone.fromDayNum(DayNum(d)); } - static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.fromDayNum(ExtendedDayNum(d)); } @@ -638,7 +638,7 @@ struct ToDateTime64Transform inline DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const { const auto dt = ToDateTimeImpl::execute(d, time_zone); - return execute(dt, time_zone); + return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); } inline DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index ef86f45b759..a91cb1fbeb2 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -216,7 +216,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } if (which_type.isDateTime64() - && (which_from_type.isNativeInt() || which_from_type.isNativeUInt() || which_from_type.isDate() || which_from_type.isDateTime() || which_from_type.isDateTime64())) + && (which_from_type.isNativeInt() || which_from_type.isNativeUInt() || which_from_type.isDate() || which_from_type.isDate32() || which_from_type.isDateTime() || which_from_type.isDateTime64())) { const auto scale = static_cast(type).getScale(); const auto decimal_value = DecimalUtils::decimalFromComponents(src.reinterpret(), 0, scale); diff --git a/tests/queries/0_stateless/02098_date32_comparison.reference b/tests/queries/0_stateless/02098_date32_comparison.reference index c18b4e9b082..16d1f96acfd 100644 --- a/tests/queries/0_stateless/02098_date32_comparison.reference +++ b/tests/queries/0_stateless/02098_date32_comparison.reference @@ -6,3 +6,14 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02098_date32_comparison.sql b/tests/queries/0_stateless/02098_date32_comparison.sql index dd4fde790c7..5fd7172e0bb 100644 --- a/tests/queries/0_stateless/02098_date32_comparison.sql +++ b/tests/queries/0_stateless/02098_date32_comparison.sql @@ -1,8 +1,19 @@ select toDate32('1990-01-01') = toDate('1990-01-01'); select toDate('1991-01-02') > toDate32('1990-01-01'); +select toDate32('1925-01-01') <= toDate('1990-01-01'); +select toDate('1991-01-01') < toDate32('2283-11-11'); select toDate32('1990-01-01') = toDateTime('1990-01-01'); select toDateTime('1991-01-02') > toDate32('1990-01-01'); +select toDate32('1925-01-01') <= toDateTime('1990-01-01'); +select toDateTime('1991-01-01') < toDate32('2283-11-11'); select toDate32('1990-01-01') = toDateTime64('1990-01-01',2); select toDateTime64('1991-01-02',2) > toDate32('1990-01-01'); +select toDate32('1925-01-01') = toDateTime64('1925-01-01',2); +select toDateTime64('1925-01-02',2) > toDate32('1925-01-01'); +select toDate32('2283-11-11') = toDateTime64('2283-11-11',2); +select toDateTime64('2283-11-11',2) > toDate32('1925-01-01'); select toDate32('1990-01-01') = '1990-01-01'; -select '1991-01-02' > toDate32('1990-01-01'); \ No newline at end of file +select '1991-01-02' > toDate32('1990-01-01'); +select toDate32('1925-01-01') = '1925-01-01'; +select '2283-11-11' >= toDate32('2283-11-10'); +select '2283-11-11' > toDate32('1925-01-01'); \ No newline at end of file From 8f97765cbdec3c7ed9c37df46c18ef131ac92f00 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 19 Oct 2021 21:06:08 +0000 Subject: [PATCH 384/438] Fix --- src/Interpreters/Context.h | 2 +- src/Storages/MergeTree/registerStorageMergeTree.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 84795f11f2a..85b4fbf23d5 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -294,7 +294,7 @@ private: /// A flag, used to distinguish between user query and internal query to a database engine (MaterializedPostgreSQL). bool is_internal_query = false; /// Has background executors for MergeTree tables been initialized? - std::atomic is_background_executors_initialized = false; + bool is_background_executors_initialized = false; public: diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 982acfe62a4..218d990e0ce 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -301,6 +301,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replicated) name_part = name_part.substr(strlen("Replicated")); + args.getContext()->getGlobalContext()->initializeBackgroundExecutorsIfNeeded(); + MergeTreeData::MergingParams merging_params; merging_params.mode = MergeTreeData::MergingParams::Ordinary; From 5e24f337a049f2aaee9efc8dc03d354103ab0ca2 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Wed, 20 Oct 2021 09:29:41 +0300 Subject: [PATCH 385/438] Update StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f554986402c..ba115d36a5d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5995,12 +5995,12 @@ void StorageReplicatedMergeTree::replacePartitionFrom( MutableDataPartsVector dst_parts; Strings block_id_paths; Strings part_checksums; + auto zookeeper = getZooKeeper(); std::vector ephemeral_locks; LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); static const String TMP_PREFIX = "tmp_replace_from_"; - auto zookeeper = getZooKeeper(); String alter_partition_version_path = zookeeper_path + "/alter_partition_version"; Coordination::Stat alter_partition_version_stat; From 36635736e6aaaf60c1faf7e1632384e06a278313 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 12:30:17 +0300 Subject: [PATCH 386/438] Also run on master --- .github/workflows/main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 49760995dfc..baa40e99418 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,5 +1,8 @@ name: Ligthweight GithubActions on: # yamllint disable-line rule:truthy + push: + branches: + - master pull_request: types: - labeled From 83787e26f2f1a287c12886ad316733dcbc3676cf Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 12:43:03 +0300 Subject: [PATCH 387/438] SQLUserDefinedFunctions support lambdas --- .../InterpreterCreateFunctionQuery.cpp | 40 +++---------------- .../InterpreterCreateFunctionQuery.h | 1 - .../UserDefinedSQLFunctionVisitor.cpp | 10 ++++- ...l_user_defined_functions_aliases.reference | 1 + ...098_sql_user_defined_functions_aliases.sql | 4 ++ ...ql_user_defined_functions_lambda.reference | 1 + ...2099_sql_user_defined_functions_lambda.sql | 4 ++ 7 files changed, 24 insertions(+), 37 deletions(-) create mode 100644 tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference create mode 100644 tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql create mode 100644 tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference create mode 100644 tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index ccb5f4040ec..9d92466c440 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -1,14 +1,17 @@ +#include + +#include + #include #include #include #include #include #include -#include #include #include #include -#include + namespace DB { @@ -66,42 +69,9 @@ void InterpreterCreateFunctionQuery::validateFunction(ASTPtr function, const Str } ASTPtr function_body = function->as()->children.at(0)->children.at(1); - std::unordered_set identifiers_in_body = getIdentifiers(function_body); - - for (const auto & identifier : identifiers_in_body) - { - if (!arguments.contains(identifier)) - throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Identifier {} does not exist in arguments", backQuote(identifier)); - } - validateFunctionRecursiveness(function_body, name); } -std::unordered_set InterpreterCreateFunctionQuery::getIdentifiers(ASTPtr node) -{ - std::unordered_set identifiers; - - std::stack ast_nodes_to_process; - ast_nodes_to_process.push(node); - - while (!ast_nodes_to_process.empty()) - { - auto ast_node_to_process = ast_nodes_to_process.top(); - ast_nodes_to_process.pop(); - - for (const auto & child : ast_node_to_process->children) - { - auto identifier_name_opt = tryGetIdentifierName(child); - if (identifier_name_opt) - identifiers.insert(identifier_name_opt.value()); - - ast_nodes_to_process.push(child); - } - } - - return identifiers; -} - void InterpreterCreateFunctionQuery::validateFunctionRecursiveness(ASTPtr node, const String & function_to_create) { for (const auto & child : node->children) diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.h b/src/Interpreters/InterpreterCreateFunctionQuery.h index fdc03b379db..a67fdb9605d 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.h +++ b/src/Interpreters/InterpreterCreateFunctionQuery.h @@ -22,7 +22,6 @@ public: private: static void validateFunction(ASTPtr function, const String & name); - static std::unordered_set getIdentifiers(ASTPtr node); static void validateFunctionRecursiveness(ASTPtr node, const String & function_to_create); ASTPtr query_ptr; diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp index cc5db020387..8df6932b153 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp @@ -25,6 +25,7 @@ void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data &) return; auto result = tryToReplaceFunction(*function); + if (result) ast = result; } @@ -83,9 +84,16 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f if (identifier_name_opt) { auto function_argument_it = identifier_name_to_function_argument.find(*identifier_name_opt); - assert(function_argument_it != identifier_name_to_function_argument.end()); + if (function_argument_it == identifier_name_to_function_argument.end()) + continue; + + auto child_alias = child->tryGetAlias(); child = function_argument_it->second->clone(); + + if (!child_alias.empty()) + child->setAlias(child_alias); + continue; } diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference new file mode 100644 index 00000000000..8ab2f6d0ac6 --- /dev/null +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference @@ -0,0 +1 @@ +8 4 diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql new file mode 100644 index 00000000000..fef2daf8fd1 --- /dev/null +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql @@ -0,0 +1,4 @@ +-- Tags: no-parallel +CREATE FUNCTION alias_function AS x -> (((x * 2) AS x_doubled) + x_doubled); +SELECT alias_function(2); +DROP FUNCTION alias_function; diff --git a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference new file mode 100644 index 00000000000..8f6cd5ccd03 --- /dev/null +++ b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference @@ -0,0 +1 @@ +[2,4,6] diff --git a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql new file mode 100644 index 00000000000..e66651c1e5a --- /dev/null +++ b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql @@ -0,0 +1,4 @@ +-- Tags: no-parallel +CREATE FUNCTION lambda_function AS x -> arrayMap(array_element -> array_element * 2, x); +SELECT lambda_function([1,2,3]); +DROP FUNCTION lambda_function; From 75d77339e9b05f15a4de59cf76086ec59838149e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 12:45:02 +0300 Subject: [PATCH 388/438] Remove master --- .github/workflows/main.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index baa40e99418..49760995dfc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,8 +1,5 @@ name: Ligthweight GithubActions on: # yamllint disable-line rule:truthy - push: - branches: - - master pull_request: types: - labeled From 0ad7f9bba25e4cbf53dfea3b1fbd51379aab2dc3 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Wed, 20 Oct 2021 12:45:32 +0300 Subject: [PATCH 389/438] Fixed PVS warning --- src/Interpreters/Session.cpp | 2 -- src/Interpreters/Session.h | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index bb3d9352496..865c9551219 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -252,8 +252,6 @@ Session::Session(const ContextPtr & global_context_, ClientInfo::Interface inter prepared_client_info->interface = interface_; } -Session::Session(Session &&) = default; - Session::~Session() { LOG_DEBUG(log, "{} Destroying {} of user {}", diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 26457186523..b62327103e9 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -33,9 +33,10 @@ public: static void shutdownNamedSessions(); Session(const ContextPtr & global_context_, ClientInfo::Interface interface_); - Session(Session &&); ~Session(); + Session(const Session &&) = delete; + Session& operator=(const Session &&) = delete; Session(const Session &) = delete; Session& operator=(const Session &) = delete; From 3496cd1bfec9ac4bce9119651fbb14f58e1824d3 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 12:53:03 +0300 Subject: [PATCH 390/438] ExecutableUDF example --- src/Common/examples/CMakeLists.txt | 3 ++ src/Common/examples/executable_udf.cpp | 44 ++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 src/Common/examples/executable_udf.cpp diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index e72681621cb..020f3cc4446 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -77,3 +77,6 @@ target_link_libraries (average PRIVATE clickhouse_common_io) add_executable (shell_command_inout shell_command_inout.cpp) target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io) + +add_executable (executable_udf executable_udf.cpp) +target_link_libraries (executable_udf PRIVATE dbms) diff --git a/src/Common/examples/executable_udf.cpp b/src/Common/examples/executable_udf.cpp new file mode 100644 index 00000000000..78a248fcddf --- /dev/null +++ b/src/Common/examples/executable_udf.cpp @@ -0,0 +1,44 @@ +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +using namespace DB; + +int main(int argc, char **argv) +{ + (void)(argc); + (void)(argv); + + std::string buffer; + + ReadBufferFromFileDescriptor read_buffer(0); + WriteBufferFromFileDescriptor write_buffer(1); + size_t rows = 0; + char dummy; + + while (!read_buffer.eof()) { + readIntText(rows, read_buffer); + readChar(dummy, read_buffer); + + for (size_t i = 0; i < rows; ++i) { + readString(buffer, read_buffer); + readChar(dummy, read_buffer); + + writeString("Key ", write_buffer); + writeString(buffer, write_buffer); + writeChar('\n', write_buffer); + } + + write_buffer.next(); + } + + return 0; +} From ddcf12c83b94f4e1c08d74289ce50b6300a70da4 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 20 Oct 2021 11:52:48 +0800 Subject: [PATCH 391/438] JSONExtractString raw string support. --- src/Functions/FunctionsJSON.cpp | 17 +++++++---------- .../01915_json_extract_raw_string.reference | 3 +++ .../01915_json_extract_raw_string.sql | 4 ++++ 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index cb55ba6b83b..e861e99861b 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -744,6 +744,8 @@ public: } }; +template +class JSONExtractRawImpl; template class JSONExtractStringImpl @@ -760,9 +762,12 @@ public: static bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &) { - if (!element.isString()) + if (element.isNull()) return false; + if (!element.isString()) + return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); + auto str = element.getString(); ColumnString & col_str = assert_cast(dest); col_str.insertData(str.data(), str.size()); @@ -770,9 +775,6 @@ public: } }; -template -class JSONExtractRawImpl; - /// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. template struct JSONExtractTree @@ -856,12 +858,7 @@ struct JSONExtractTree public: bool insertResultToColumn(IColumn & dest, const Element & element) override { - if (element.isString()) - return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); - else if (element.isNull()) - return false; - else - return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); + return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); } }; diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.reference b/tests/queries/0_stateless/01915_json_extract_raw_string.reference index 3a41f35710c..e88c7e018d2 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.reference +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.reference @@ -1,2 +1,5 @@ ('123','456','[7,8,9]') \N +123 +123 + diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.sql b/tests/queries/0_stateless/01915_json_extract_raw_string.sql index 884c599c206..98bff692d71 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.sql +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.sql @@ -3,3 +3,7 @@ select JSONExtract('{"a": "123", "b": 456, "c": [7, 8, 9]}', 'Tuple(a String, b String, c String)'); with '{"string_value":null}' as json select JSONExtract(json, 'string_value', 'Nullable(String)'); + +select JSONExtractString('{"a": 123}', 'a'); +select JSONExtractString('{"a": "123"}', 'a'); +select JSONExtractString('{"a": null}', 'a'); From ad409d9b47ae1dd3c492a46e32d0f66701dd122c Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 13:05:57 +0300 Subject: [PATCH 392/438] SQLUserDefinedFunctions added DROP IF EXISTS support --- src/Interpreters/InterpreterDropFunctionQuery.cpp | 5 +++++ src/Interpreters/UserDefinedSQLFunctionFactory.cpp | 5 +++++ src/Interpreters/UserDefinedSQLFunctionFactory.h | 2 ++ src/Parsers/ASTDropFunctionQuery.cpp | 2 ++ src/Parsers/ASTDropFunctionQuery.h | 2 ++ src/Parsers/ParserDropFunctionQuery.cpp | 7 +++++++ ...1_sql_user_defined_functions_drop_if_exists.reference | 1 + .../02101_sql_user_defined_functions_drop_if_exists.sql | 9 +++++++++ 8 files changed, 33 insertions(+) create mode 100644 tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference create mode 100644 tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index 53cb96b42fe..b788c8f960f 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -18,6 +18,11 @@ BlockIO InterpreterDropFunctionQuery::execute() FunctionNameNormalizer().visit(query_ptr.get()); auto & drop_function_query = query_ptr->as(); + auto & user_defined_functions_factory = UserDefinedSQLFunctionFactory::instance(); + + if (drop_function_query.if_exists && !user_defined_functions_factory.has(drop_function_query.function_name)) + return {}; + UserDefinedSQLFunctionFactory::instance().unregisterFunction(drop_function_query.function_name); UserDefinedSQLObjectsLoader::instance().removeObject(current_context, UserDefinedSQLObjectType::Function, drop_function_query.function_name); diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp index 434f5523b42..1d2a80305c6 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp @@ -77,6 +77,11 @@ ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) return it->second; } +bool UserDefinedSQLFunctionFactory::has(const String & function_name) const +{ + return tryGet(function_name) != nullptr; +} + std::vector UserDefinedSQLFunctionFactory::getAllRegisteredNames() const { std::vector registered_names; diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.h b/src/Interpreters/UserDefinedSQLFunctionFactory.h index 366e27e833d..6838c2f9892 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.h +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.h @@ -23,6 +23,8 @@ public: ASTPtr tryGet(const String & function_name) const; + bool has(const String & function_name) const; + std::vector getAllRegisteredNames() const override; private: diff --git a/src/Parsers/ASTDropFunctionQuery.cpp b/src/Parsers/ASTDropFunctionQuery.cpp index 5800a7ba9cb..0a46940e73d 100644 --- a/src/Parsers/ASTDropFunctionQuery.cpp +++ b/src/Parsers/ASTDropFunctionQuery.cpp @@ -14,6 +14,8 @@ void ASTDropFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAS { settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION " << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); + if (if_exists) + settings.ostr << "IF EXISTS"; } } diff --git a/src/Parsers/ASTDropFunctionQuery.h b/src/Parsers/ASTDropFunctionQuery.h index e32bf93a64d..a9d70a3016f 100644 --- a/src/Parsers/ASTDropFunctionQuery.h +++ b/src/Parsers/ASTDropFunctionQuery.h @@ -10,6 +10,8 @@ class ASTDropFunctionQuery : public IAST public: String function_name; + bool if_exists = false; + String getID(char) const override { return "DropFunctionQuery"; } ASTPtr clone() const override; diff --git a/src/Parsers/ParserDropFunctionQuery.cpp b/src/Parsers/ParserDropFunctionQuery.cpp index 04d26109836..d8c86646410 100644 --- a/src/Parsers/ParserDropFunctionQuery.cpp +++ b/src/Parsers/ParserDropFunctionQuery.cpp @@ -11,7 +11,10 @@ bool ParserDropFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expec { ParserKeyword s_drop("DROP"); ParserKeyword s_function("FUNCTION"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserIdentifier function_name_p; + bool if_exists = false; ASTPtr function_name; @@ -21,10 +24,14 @@ bool ParserDropFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expec if (!s_function.ignore(pos, expected)) return false; + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + if (!function_name_p.parse(pos, function_name, expected)) return false; auto drop_function_query = std::make_shared(); + drop_function_query->if_exists = if_exists; node = drop_function_query; drop_function_query->function_name = function_name->as().name(); diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql new file mode 100644 index 00000000000..09e2677774c --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql @@ -0,0 +1,9 @@ +-- Tags: no-parallel + +CREATE FUNCTION 02101_test_function AS x -> x + 1; + +SELECT 02101_test_function(1); + +DROP FUNCTION 02101_test_function; +DROP FUNCTION 02101_test_function; --{serverError 46} +DROP FUNCTION IF EXISTS 02101_test_function; From fe93533ba45727225ff2e00cfcb87e3bc753d813 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 13:09:33 +0300 Subject: [PATCH 393/438] Fixed tests --- .../02098_sql_user_defined_functions_aliases.sql | 6 +++--- .../0_stateless/02099_sql_user_defined_functions_lambda.sql | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql index fef2daf8fd1..c5bd2b5b5f2 100644 --- a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql @@ -1,4 +1,4 @@ -- Tags: no-parallel -CREATE FUNCTION alias_function AS x -> (((x * 2) AS x_doubled) + x_doubled); -SELECT alias_function(2); -DROP FUNCTION alias_function; +CREATE FUNCTION 02098_alias_function AS x -> (((x * 2) AS x_doubled) + x_doubled); +SELECT 02098_alias_function(2); +DROP FUNCTION 02098_alias_function; diff --git a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql index e66651c1e5a..1c926faf3a1 100644 --- a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql +++ b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql @@ -1,4 +1,4 @@ -- Tags: no-parallel -CREATE FUNCTION lambda_function AS x -> arrayMap(array_element -> array_element * 2, x); -SELECT lambda_function([1,2,3]); -DROP FUNCTION lambda_function; +CREATE FUNCTION 02099_lambda_function AS x -> arrayMap(array_element -> array_element * 2, x); +SELECT 02099_lambda_function([1,2,3]); +DROP FUNCTION 02099_lambda_function; From dc964080126b5446ff9dec209ce9f8a7fa2a648e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 13:09:48 +0300 Subject: [PATCH 394/438] Fixed build --- src/Interpreters/InterpreterCreateFunctionQuery.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index 9d92466c440..c3d02fa4f34 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -18,7 +18,6 @@ namespace DB namespace ErrorCodes { - extern const int UNKNOWN_IDENTIFIER; extern const int CANNOT_CREATE_RECURSIVE_FUNCTION; extern const int UNSUPPORTED_METHOD; } From ba442b7ce5e04775801f2e7118eb05111e6cc200 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 13:31:48 +0300 Subject: [PATCH 395/438] Use robot token in actions for statuses --- .github/workflows/main.yml | 20 ++++++++++++++++---- tests/ci/docker_images_check.py | 3 ++- tests/ci/finish_check.py | 3 ++- tests/ci/get_robot_token.py | 14 ++++++++++++++ tests/ci/pvs_check.py | 3 ++- tests/ci/run_check.py | 3 ++- tests/ci/style_check.py | 3 ++- 7 files changed, 40 insertions(+), 9 deletions(-) create mode 100644 tests/ci/get_robot_token.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 49760995dfc..60be4368df7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,7 +18,10 @@ jobs: - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] @@ -30,7 +33,10 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} - name: Upload images files to artifacts uses: actions/upload-artifact@v2 @@ -52,7 +58,10 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py FinishCheck: needs: [StyleCheck, DockerHubPush, CheckLabels] @@ -63,4 +72,7 @@ jobs: - name: Finish label run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 141d075cc6d..d1954d70e71 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,6 +8,7 @@ import os from pr_info import PRInfo from github import Github import shutil +from get_robot_token import get_best_robot_token NAME = "Push to Dockerhub (actions)" @@ -222,7 +223,7 @@ if __name__ == "__main__": url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description=description, state=status, target_url=url) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 89139468fd6..db405cf8f73 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -4,6 +4,7 @@ from github import Github from pr_info import PRInfo import json import os +from get_robot_token import get_best_robot_token NAME = 'Run Check (actions)' @@ -34,7 +35,7 @@ if __name__ == "__main__": event = json.load(event_file) pr_info = PRInfo(event, need_orgs=True) - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py new file mode 100644 index 00000000000..75b688e5b44 --- /dev/null +++ b/tests/ci/get_robot_token.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +import os +from github import Github + +def get_best_robot_token(token_prefix_env_name="ROBOT_TOKEN_", total_tokens=4): + tokens = {} + for i in range(total_tokens): + token_name = token_prefix_env_name + str(i) + token = os.getenv(token_name) + gh = Github(token) + rest, _ = gh.rate_limiting + tokens[token] = rest + + return max(tokens.items(), key=lambda x: x[1])[0] diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index c254ad74ae4..34052adecdf 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -9,6 +9,7 @@ from s3_helper import S3Helper from pr_info import PRInfo import shutil import sys +from get_robot_token import get_best_robot_token NAME = 'PVS Studio (actions)' LICENCE_NAME = 'Free license: ClickHouse, Yandex' @@ -83,7 +84,7 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/pvs-test' diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 95e827671ca..e6bc7259330 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -6,6 +6,7 @@ from pr_info import PRInfo import sys import logging from github import Github +from get_robot_token import get_best_robot_token NAME = 'Run Check (actions)' @@ -113,7 +114,7 @@ if __name__ == "__main__": pr_info = PRInfo(event, need_orgs=True) can_run, description = should_run_checks_for_pr(pr_info) - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 71978379099..0b1d673e628 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -10,6 +10,7 @@ from s3_helper import S3Helper import time import json from pr_info import PRInfo +from get_robot_token import get_best_robot_token NAME = "Style Check (actions)" @@ -108,7 +109,7 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(os.getenv("GITHUB_TOKEN")) + gh = Github(get_best_robot_token()) images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/style-test' From 0dcb36df0155182727726bf326eb08510752cc15 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 13:57:41 +0300 Subject: [PATCH 396/438] Remove statuses from actions --- .github/workflows/main.yml | 26 -------------------------- tests/ci/docker_images_check.py | 10 ---------- tests/ci/pvs_check.py | 10 ---------- tests/ci/run_check.py | 7 ------- tests/ci/style_check.py | 11 ----------- 5 files changed, 64 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 60be4368df7..46a66ce98ce 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,11 +17,6 @@ jobs: uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py - env: - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] @@ -33,10 +28,6 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} - name: Upload images files to artifacts uses: actions/upload-artifact@v2 @@ -58,21 +49,4 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py - FinishCheck: - needs: [StyleCheck, DockerHubPush, CheckLabels] - runs-on: [self-hosted] - steps: - - name: Check out repository code - uses: actions/checkout@v2 - - name: Finish label - run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py - env: - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index d1954d70e71..0ddca0718e7 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,7 +8,6 @@ import os from pr_info import PRInfo from github import Github import shutil -from get_robot_token import get_best_robot_token NAME = "Push to Dockerhub (actions)" @@ -168,11 +167,6 @@ def upload_results(s3_client, pr_number, commit_sha, test_results): logging.info("Search result in url %s", url) return url -def get_commit(gh, commit_sha): - repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) - commit = repo.get_commit(commit_sha) - return commit - if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) @@ -223,10 +217,6 @@ if __name__ == "__main__": url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) - gh = Github(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=url) - with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 34052adecdf..94e046c0a68 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -9,7 +9,6 @@ from s3_helper import S3Helper from pr_info import PRInfo import shutil import sys -from get_robot_token import get_best_robot_token NAME = 'PVS Studio (actions)' LICENCE_NAME = 'Free license: ClickHouse, Yandex' @@ -40,11 +39,6 @@ def _process_txt_report(path): errors.append(':'.join(line.split('\t')[0:2])) return warnings, errors -def get_commit(gh, commit_sha): - repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) - commit = repo.get_commit(commit_sha) - return commit - def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_') additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) @@ -84,8 +78,6 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(get_best_robot_token()) - images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/pvs-test' if os.path.exists(images_path): @@ -138,8 +130,6 @@ if __name__ == "__main__": report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) print("::notice ::Report url: {}".format(report_url)) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=report_url) except Exception as ex: print("Got an exception", ex) sys.exit(1) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index e6bc7259330..3371e5a5720 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,8 +5,6 @@ import requests from pr_info import PRInfo import sys import logging -from github import Github -from get_robot_token import get_best_robot_token NAME = 'Run Check (actions)' @@ -114,13 +112,8 @@ if __name__ == "__main__": pr_info = PRInfo(event, need_orgs=True) can_run, description = should_run_checks_for_pr(pr_info) - gh = Github(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: print("::notice ::Cannot run") - commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: print("::notice ::Can run") - commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 0b1d673e628..c7c25d2a95b 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -10,7 +10,6 @@ from s3_helper import S3Helper import time import json from pr_info import PRInfo -from get_robot_token import get_best_robot_token NAME = "Style Check (actions)" @@ -79,12 +78,6 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi logging.info("Search result in url %s", url) return url - -def get_commit(gh, commit_sha): - repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) - commit = repo.get_commit(commit_sha) - return commit - def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " @@ -109,8 +102,6 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(get_best_robot_token()) - images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/style-test' if os.path.exists(images_path): @@ -141,5 +132,3 @@ if __name__ == "__main__": state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From 0d6712532ad00274bbcfa9b20d5832c0123db88c Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 14:25:14 +0300 Subject: [PATCH 397/438] Revert "Remove statuses from actions" This reverts commit 0dcb36df0155182727726bf326eb08510752cc15. --- .github/workflows/main.yml | 26 ++++++++++++++++++++++++++ tests/ci/docker_images_check.py | 10 ++++++++++ tests/ci/pvs_check.py | 10 ++++++++++ tests/ci/run_check.py | 7 +++++++ tests/ci/style_check.py | 11 +++++++++++ 5 files changed, 64 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 46a66ce98ce..60be4368df7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,6 +17,11 @@ jobs: uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py + env: + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] @@ -28,6 +33,10 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} - name: Upload images files to artifacts uses: actions/upload-artifact@v2 @@ -49,4 +58,21 @@ jobs: env: YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + FinishCheck: + needs: [StyleCheck, DockerHubPush, CheckLabels] + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Finish label + run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py + env: + ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} + ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} + ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} + ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 0ddca0718e7..d1954d70e71 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,6 +8,7 @@ import os from pr_info import PRInfo from github import Github import shutil +from get_robot_token import get_best_robot_token NAME = "Push to Dockerhub (actions)" @@ -167,6 +168,11 @@ def upload_results(s3_client, pr_number, commit_sha, test_results): logging.info("Search result in url %s", url) return url +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) @@ -217,6 +223,10 @@ if __name__ == "__main__": url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=url) + with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: json.dump(result_images, images_file) diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 94e046c0a68..34052adecdf 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -9,6 +9,7 @@ from s3_helper import S3Helper from pr_info import PRInfo import shutil import sys +from get_robot_token import get_best_robot_token NAME = 'PVS Studio (actions)' LICENCE_NAME = 'Free license: ClickHouse, Yandex' @@ -39,6 +40,11 @@ def _process_txt_report(path): errors.append(':'.join(line.split('\t')[0:2])) return warnings, errors +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_') additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) @@ -78,6 +84,8 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + gh = Github(get_best_robot_token()) + images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/pvs-test' if os.path.exists(images_path): @@ -130,6 +138,8 @@ if __name__ == "__main__": report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) except Exception as ex: print("Got an exception", ex) sys.exit(1) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 3371e5a5720..e6bc7259330 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,6 +5,8 @@ import requests from pr_info import PRInfo import sys import logging +from github import Github +from get_robot_token import get_best_robot_token NAME = 'Run Check (actions)' @@ -112,8 +114,13 @@ if __name__ == "__main__": pr_info = PRInfo(event, need_orgs=True) can_run, description = should_run_checks_for_pr(pr_info) + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" if not can_run: print("::notice ::Cannot run") + commit.create_status(context=NAME, description=description, state="failure", target_url=url) sys.exit(1) else: print("::notice ::Can run") + commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index c7c25d2a95b..0b1d673e628 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -10,6 +10,7 @@ from s3_helper import S3Helper import time import json from pr_info import PRInfo +from get_robot_token import get_best_robot_token NAME = "Style Check (actions)" @@ -78,6 +79,12 @@ def upload_results(s3_client, pr_number, commit_sha, test_results, additional_fi logging.info("Search result in url %s", url) return url + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + def update_check_with_curl(check_id): cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " "--header 'authorization: Bearer {}' " @@ -102,6 +109,8 @@ if __name__ == "__main__": aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") + gh = Github(get_best_robot_token()) + images_path = os.path.join(temp_path, 'changed_images.json') docker_image = 'clickhouse/style-test' if os.path.exists(images_path): @@ -132,3 +141,5 @@ if __name__ == "__main__": state, description, test_results, additional_files = process_result(temp_path) report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=state, target_url=report_url) From f607a5d0ab12d6aa8d38c602493996392b906187 Mon Sep 17 00:00:00 2001 From: vesslanjin Date: Wed, 20 Oct 2021 07:40:06 -0400 Subject: [PATCH 398/438] remove branch in ColumnDecimal, same as pull req #29881 Co-authored-by: Zhu Jasper jasper.zhu@intel.com Signed-off-by: vesslanjin --- src/Columns/ColumnDecimal.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index d8c5ced4b6b..dc236fafbd9 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -237,6 +237,26 @@ ColumnPtr ColumnDecimal::filter(const IColumn::Filter & filt, ssize_t result_ const UInt8 * filt_end = filt_pos + size; const T * data_pos = data.data(); +#ifdef __SSE2__ + static constexpr size_t SIMD_BYTES = 16; + const __m128i zero16 = _mm_setzero_si128(); + const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_sse) + { + UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast(filt_pos)), zero16)); + mask = ~mask; + while (mask) + { + size_t index = __builtin_ctz(mask); + res_data.push_back(*(data_pos + index)); + mask = mask & (mask - 1); + } + filt_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } +#endif + while (filt_pos < filt_end) { if (*filt_pos) From d120d3720af4d59013d696536a0aa6f950ae6394 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 14:48:27 +0300 Subject: [PATCH 399/438] Trying aws secrets --- .github/workflows/main.yml | 25 ------------------------- tests/ci/docker_images_check.py | 9 +++------ tests/ci/get_robot_token.py | 14 ++++++++++---- tests/ci/pvs_check.py | 8 +------- tests/ci/s3_helper.py | 7 ++++--- tests/ci/style_check.py | 8 +------- 6 files changed, 19 insertions(+), 52 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 60be4368df7..7f20206a7b3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,11 +17,6 @@ jobs: uses: actions/checkout@v2 - name: Labels check run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py - env: - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} DockerHubPush: needs: CheckLabels runs-on: [self-hosted] @@ -30,14 +25,6 @@ jobs: uses: actions/checkout@v2 - name: Images check run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} - DOCKER_ROBOT_PASSWORD: ${{ secrets.DOCKER_ROBOT_PASSWORD }} - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: @@ -55,13 +42,6 @@ jobs: - name: Check out repository code uses: actions/checkout@v2 - name: Style Check - env: - YANDEX_S3_ACCESS_KEY_ID: ${{ secrets.YANDEX_S3_ACCESS_KEY_ID }} - YANDEX_S3_ACCESS_SECRET_KEY: ${{ secrets.YANDEX_S3_ACCESS_SECRET_KEY }} - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py FinishCheck: needs: [StyleCheck, DockerHubPush, CheckLabels] @@ -71,8 +51,3 @@ jobs: uses: actions/checkout@v2 - name: Finish label run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py - env: - ROBOT_TOKEN_0: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN }} - ROBOT_TOKEN_1: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_1 }} - ROBOT_TOKEN_2: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_2 }} - ROBOT_TOKEN_3: ${{ secrets.ROBOT_CLICKHOUSE_PERSONAL_ACCESS_TOKEN_3 }} diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index d1954d70e71..9bd3f431429 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,7 +8,7 @@ import os from pr_info import PRInfo from github import Github import shutil -from get_robot_token import get_best_robot_token +from get_robot_token import get_best_robot_token, get_parameter_from_ssm NAME = "Push to Dockerhub (actions)" @@ -177,7 +177,7 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO) repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') - dockerhub_password = os.getenv('DOCKER_ROBOT_PASSWORD') + dockerhub_password = get_parameter_from_ssm('dockerhub_robot_password') if os.path.exists(temp_path): shutil.rmtree(temp_path) @@ -213,10 +213,7 @@ if __name__ == "__main__": if len(description) >= 140: description = description[:136] + "..." - aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") - aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - - s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + s3_helper = S3Helper('https://s3.amazonaws.com') s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index 75b688e5b44..73fdcd670fd 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -1,12 +1,18 @@ #!/usr/bin/env python3 -import os +import boto3 from github import Github -def get_best_robot_token(token_prefix_env_name="ROBOT_TOKEN_", total_tokens=4): +def get_parameter_from_ssm(name, decrypt=True, client=None): + if not client: + client = boto3.client('ssm') + return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value'] + +def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4): + client = boto3.client('ssm') tokens = {} - for i in range(total_tokens): + for i in range(1, total_tokens + 1): token_name = token_prefix_env_name + str(i) - token = os.getenv(token_name) + token = get_parameter_from_ssm(token_name, True, client) gh = Github(token) rest, _ = gh.rate_limiting tokens[token] = rest diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 34052adecdf..f8b1b58f307 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -81,9 +81,6 @@ if __name__ == "__main__": # this check modify repository so copy it to the temp directory logging.info("Repo copy path %s", repo_path) - aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") - aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(get_best_robot_token()) images_path = os.path.join(temp_path, 'changed_images.json') @@ -98,10 +95,7 @@ if __name__ == "__main__": logging.info("Got docker image %s", docker_image) - if not aws_secret_key_id or not aws_secret_key: - logging.info("No secrets, will not upload anything to S3") - - s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + s3_helper = S3Helper('https://s3.amazonaws.com') licence_key = os.getenv('PVS_STUDIO_KEY') cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index b9ae0de6e02..4054f650223 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -6,6 +6,7 @@ import boto3 from botocore.exceptions import ClientError, BotoCoreError from multiprocessing.dummy import Pool from compress_files import compress_file_fast +from get_robot_token import get_parameter_from_ssm def _md5(fname): hash_md5 = hashlib.md5() @@ -27,8 +28,8 @@ def _flatten_list(lst): class S3Helper(object): - def __init__(self, host, aws_access_key_id, aws_secret_access_key): - self.session = boto3.session.Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) + def __init__(self, host): + self.session = boto3.session.Session() self.client = self.session.client('s3', endpoint_url=host) def _upload_file_to_s3(self, bucket_name, file_path, s3_path): @@ -55,7 +56,7 @@ class S3Helper(object): self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) logging.info("Upload {} to {}. Meta: {}".format(file_path, s3_path, metadata)) - return "https://storage.yandexcloud.net/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) + return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) def upload_test_report_to_s3(self, file_path, s3_path): return self._upload_file_to_s3('clickhouse-test-reports', file_path, s3_path) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 0b1d673e628..4a8cde70bc2 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -106,9 +106,6 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - aws_secret_key_id = os.getenv("YANDEX_S3_ACCESS_KEY_ID", "") - aws_secret_key = os.getenv("YANDEX_S3_ACCESS_SECRET_KEY", "") - gh = Github(get_best_robot_token()) images_path = os.path.join(temp_path, 'changed_images.json') @@ -132,10 +129,7 @@ if __name__ == "__main__": else: raise Exception(f"Cannot pull dockerhub for image {docker_image}") - if not aws_secret_key_id or not aws_secret_key: - logging.info("No secrets, will not upload anything to S3") - - s3_helper = S3Helper('https://storage.yandexcloud.net', aws_access_key_id=aws_secret_key_id, aws_secret_access_key=aws_secret_key) + s3_helper = S3Helper('https://s3.amazonaws.com') subprocess.check_output(f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) state, description, test_results, additional_files = process_result(temp_path) From 6ea04b2ea66c50f815d102345dc08afc7d56ca85 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 20 Oct 2021 14:52:03 +0300 Subject: [PATCH 400/438] Fix region --- tests/ci/get_robot_token.py | 4 ++-- tests/ci/s3_helper.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index 73fdcd670fd..db37ee311c5 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -4,11 +4,11 @@ from github import Github def get_parameter_from_ssm(name, decrypt=True, client=None): if not client: - client = boto3.client('ssm') + client = boto3.client('ssm', region_name='us-east-1') return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value'] def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4): - client = boto3.client('ssm') + client = boto3.client('ssm', region_name='us-east-1') tokens = {} for i in range(1, total_tokens + 1): token_name = token_prefix_env_name + str(i) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 4054f650223..3c930f26634 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -29,7 +29,7 @@ def _flatten_list(lst): class S3Helper(object): def __init__(self, host): - self.session = boto3.session.Session() + self.session = boto3.session.Session(region_name='us-east-1') self.client = self.session.client('s3', endpoint_url=host) def _upload_file_to_s3(self, bucket_name, file_path, s3_path): From ac358d08a6477715448a6b0b2834e19b3430d613 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 15:33:20 +0300 Subject: [PATCH 401/438] Fix style check --- src/Common/examples/executable_udf.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Common/examples/executable_udf.cpp b/src/Common/examples/executable_udf.cpp index 78a248fcddf..8d2d9f7314e 100644 --- a/src/Common/examples/executable_udf.cpp +++ b/src/Common/examples/executable_udf.cpp @@ -24,11 +24,13 @@ int main(int argc, char **argv) size_t rows = 0; char dummy; - while (!read_buffer.eof()) { + while (!read_buffer.eof()) + { readIntText(rows, read_buffer); readChar(dummy, read_buffer); - for (size_t i = 0; i < rows; ++i) { + for (size_t i = 0; i < rows; ++i) + { readString(buffer, read_buffer); readChar(dummy, read_buffer); From 54d37204265a523a6aa26fbfd6506186d45333da Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 16:08:13 +0300 Subject: [PATCH 402/438] ASTDropFunctionQuery formatting fix --- src/Parsers/ASTDropFunctionQuery.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ASTDropFunctionQuery.cpp b/src/Parsers/ASTDropFunctionQuery.cpp index 0a46940e73d..47665aa52f9 100644 --- a/src/Parsers/ASTDropFunctionQuery.cpp +++ b/src/Parsers/ASTDropFunctionQuery.cpp @@ -12,10 +12,13 @@ ASTPtr ASTDropFunctionQuery::clone() const void ASTDropFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION " << (settings.hilite ? hilite_none : ""); - settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION "; + if (if_exists) - settings.ostr << "IF EXISTS"; + settings.ostr << "IF EXISTS "; + + settings.ostr << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); } } From 240895fba765180d3e6021b9db9ba8590a580c98 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 16 Oct 2021 23:07:55 +0800 Subject: [PATCH 403/438] Fix projections with JOIN alias columns --- src/Interpreters/ExpressionAnalyzer.cpp | 3 ++- .../0_stateless/01710_projection_with_joins.reference | 0 tests/queries/0_stateless/01710_projection_with_joins.sql | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01710_projection_with_joins.reference create mode 100644 tests/queries/0_stateless/01710_projection_with_joins.sql diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index a1f7a3c71e5..3cb3c1b47ab 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -890,9 +890,10 @@ static std::unique_ptr buildJoinedPlan( * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. * - this function shows the expression JOIN _data1. + * - JOIN tables will need aliases to correctly resolve USING clause. */ auto interpreter = interpretSubquery( - join_element.table_expression, context, original_right_columns, query_options.copy().setWithAllColumns()); + join_element.table_expression, context, original_right_columns, query_options.copy().setWithAllColumns().ignoreAlias(false)); auto joined_plan = std::make_unique(); interpreter->buildQueryPlan(*joined_plan); { diff --git a/tests/queries/0_stateless/01710_projection_with_joins.reference b/tests/queries/0_stateless/01710_projection_with_joins.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01710_projection_with_joins.sql b/tests/queries/0_stateless/01710_projection_with_joins.sql new file mode 100644 index 00000000000..97dc396f362 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_with_joins.sql @@ -0,0 +1,6 @@ +drop table if exists t; + +create table t (s UInt16, l UInt16, projection p (select s, l order by l)) engine MergeTree order by s; + +set allow_experimental_projection_optimization=1; +select s from t join (select toUInt16(1) as s) x using (s); From 2f615e9176d8bb75da7a9c69586e9f5e4d81c46e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 19 Oct 2021 22:09:43 +0800 Subject: [PATCH 404/438] Use original_query for projection analysis --- src/Interpreters/InterpreterSelectQuery.cpp | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/SelectQueryInfo.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 35c8c32c65b..59d0b88bae7 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -272,6 +272,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query_info.ignore_projections = options.ignore_projections; query_info.is_projection_query = options.is_projection_query; + query_info.original_query = query_ptr->clone(); initSettings(); const Settings & settings = context->getSettingsRef(); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ee55a6826c3..6503e6dca12 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4531,7 +4531,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query) return false; - const auto & query_ptr = query_info.query; + const auto & query_ptr = query_info.original_query; if (auto * select = query_ptr->as(); select) { diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index b08818a2baa..fe7b22d331b 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -133,6 +133,7 @@ struct SelectQueryInfo { ASTPtr query; ASTPtr view_query; /// Optimized VIEW query + ASTPtr original_query; /// Unmodified query for projection analysis /// Cluster for the query. ClusterPtr cluster; From 367e58357ad6c7e286860640ec02161f3e338801 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 20 Oct 2021 13:43:39 +0000 Subject: [PATCH 405/438] Better test --- .../0_stateless/01236_graphite_mt.reference | 344 ++++++++++++++++++ .../queries/0_stateless/01236_graphite_mt.sql | 4 + 2 files changed, 348 insertions(+) diff --git a/tests/queries/0_stateless/01236_graphite_mt.reference b/tests/queries/0_stateless/01236_graphite_mt.reference index a30d2495265..0f2e8e81377 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.reference +++ b/tests/queries/0_stateless/01236_graphite_mt.reference @@ -342,3 +342,347 @@ 2 sum_2 98950 1 940 2 sum_2 108950 1 1040 2 sum_2 70170 1 1140 +1 max_1 9 1 0 +1 max_1 19 1 10 +1 max_1 29 1 20 +1 max_1 39 1 30 +1 max_1 49 1 40 +1 max_1 59 1 50 +1 max_1 69 1 60 +1 max_1 79 1 70 +1 max_1 89 1 80 +1 max_1 99 1 90 +1 max_1 109 1 100 +1 max_1 119 1 110 +1 max_1 129 1 120 +1 max_1 139 1 130 +1 max_1 149 1 140 +1 max_1 159 1 150 +1 max_1 169 1 160 +1 max_1 179 1 170 +1 max_1 189 1 180 +1 max_1 199 1 190 +1 max_1 209 1 200 +1 max_1 219 1 210 +1 max_1 229 1 220 +1 max_1 239 1 230 +1 max_1 249 1 240 +1 max_1 259 1 250 +1 max_1 269 1 260 +1 max_1 279 1 270 +1 max_1 289 1 280 +1 max_1 299 1 290 +1 max_1 39 1 0 +1 max_1 139 1 40 +1 max_1 239 1 140 +1 max_1 339 1 240 +1 max_1 439 1 340 +1 max_1 539 1 440 +1 max_1 639 1 540 +1 max_1 739 1 640 +1 max_1 839 1 740 +1 max_1 939 1 840 +1 max_1 1039 1 940 +1 max_1 1139 1 1040 +1 max_1 1199 1 1140 +1 max_2 9 1 0 +1 max_2 19 1 10 +1 max_2 29 1 20 +1 max_2 39 1 30 +1 max_2 49 1 40 +1 max_2 59 1 50 +1 max_2 69 1 60 +1 max_2 79 1 70 +1 max_2 89 1 80 +1 max_2 99 1 90 +1 max_2 109 1 100 +1 max_2 119 1 110 +1 max_2 129 1 120 +1 max_2 139 1 130 +1 max_2 149 1 140 +1 max_2 159 1 150 +1 max_2 169 1 160 +1 max_2 179 1 170 +1 max_2 189 1 180 +1 max_2 199 1 190 +1 max_2 209 1 200 +1 max_2 219 1 210 +1 max_2 229 1 220 +1 max_2 239 1 230 +1 max_2 249 1 240 +1 max_2 259 1 250 +1 max_2 269 1 260 +1 max_2 279 1 270 +1 max_2 289 1 280 +1 max_2 299 1 290 +1 max_2 39 1 0 +1 max_2 139 1 40 +1 max_2 239 1 140 +1 max_2 339 1 240 +1 max_2 439 1 340 +1 max_2 539 1 440 +1 max_2 639 1 540 +1 max_2 739 1 640 +1 max_2 839 1 740 +1 max_2 939 1 840 +1 max_2 1039 1 940 +1 max_2 1139 1 1040 +1 max_2 1199 1 1140 +1 sum_1 45 1 0 +1 sum_1 145 1 10 +1 sum_1 245 1 20 +1 sum_1 345 1 30 +1 sum_1 445 1 40 +1 sum_1 545 1 50 +1 sum_1 645 1 60 +1 sum_1 745 1 70 +1 sum_1 845 1 80 +1 sum_1 945 1 90 +1 sum_1 1045 1 100 +1 sum_1 1145 1 110 +1 sum_1 1245 1 120 +1 sum_1 1345 1 130 +1 sum_1 1445 1 140 +1 sum_1 1545 1 150 +1 sum_1 1645 1 160 +1 sum_1 1745 1 170 +1 sum_1 1845 1 180 +1 sum_1 1945 1 190 +1 sum_1 2045 1 200 +1 sum_1 2145 1 210 +1 sum_1 2245 1 220 +1 sum_1 2345 1 230 +1 sum_1 2445 1 240 +1 sum_1 2545 1 250 +1 sum_1 2645 1 260 +1 sum_1 2745 1 270 +1 sum_1 2845 1 280 +1 sum_1 2945 1 290 +1 sum_1 780 1 0 +1 sum_1 8950 1 40 +1 sum_1 18950 1 140 +1 sum_1 28950 1 240 +1 sum_1 38950 1 340 +1 sum_1 48950 1 440 +1 sum_1 58950 1 540 +1 sum_1 68950 1 640 +1 sum_1 78950 1 740 +1 sum_1 88950 1 840 +1 sum_1 98950 1 940 +1 sum_1 108950 1 1040 +1 sum_1 70170 1 1140 +1 sum_2 45 1 0 +1 sum_2 145 1 10 +1 sum_2 245 1 20 +1 sum_2 345 1 30 +1 sum_2 445 1 40 +1 sum_2 545 1 50 +1 sum_2 645 1 60 +1 sum_2 745 1 70 +1 sum_2 845 1 80 +1 sum_2 945 1 90 +1 sum_2 1045 1 100 +1 sum_2 1145 1 110 +1 sum_2 1245 1 120 +1 sum_2 1345 1 130 +1 sum_2 1445 1 140 +1 sum_2 1545 1 150 +1 sum_2 1645 1 160 +1 sum_2 1745 1 170 +1 sum_2 1845 1 180 +1 sum_2 1945 1 190 +1 sum_2 2045 1 200 +1 sum_2 2145 1 210 +1 sum_2 2245 1 220 +1 sum_2 2345 1 230 +1 sum_2 2445 1 240 +1 sum_2 2545 1 250 +1 sum_2 2645 1 260 +1 sum_2 2745 1 270 +1 sum_2 2845 1 280 +1 sum_2 2945 1 290 +1 sum_2 780 1 0 +1 sum_2 8950 1 40 +1 sum_2 18950 1 140 +1 sum_2 28950 1 240 +1 sum_2 38950 1 340 +1 sum_2 48950 1 440 +1 sum_2 58950 1 540 +1 sum_2 68950 1 640 +1 sum_2 78950 1 740 +1 sum_2 88950 1 840 +1 sum_2 98950 1 940 +1 sum_2 108950 1 1040 +1 sum_2 70170 1 1140 +2 max_1 9 1 0 +2 max_1 19 1 10 +2 max_1 29 1 20 +2 max_1 39 1 30 +2 max_1 49 1 40 +2 max_1 59 1 50 +2 max_1 69 1 60 +2 max_1 79 1 70 +2 max_1 89 1 80 +2 max_1 99 1 90 +2 max_1 109 1 100 +2 max_1 119 1 110 +2 max_1 129 1 120 +2 max_1 139 1 130 +2 max_1 149 1 140 +2 max_1 159 1 150 +2 max_1 169 1 160 +2 max_1 179 1 170 +2 max_1 189 1 180 +2 max_1 199 1 190 +2 max_1 209 1 200 +2 max_1 219 1 210 +2 max_1 229 1 220 +2 max_1 239 1 230 +2 max_1 249 1 240 +2 max_1 259 1 250 +2 max_1 269 1 260 +2 max_1 279 1 270 +2 max_1 289 1 280 +2 max_1 299 1 290 +2 max_1 39 1 0 +2 max_1 139 1 40 +2 max_1 239 1 140 +2 max_1 339 1 240 +2 max_1 439 1 340 +2 max_1 539 1 440 +2 max_1 639 1 540 +2 max_1 739 1 640 +2 max_1 839 1 740 +2 max_1 939 1 840 +2 max_1 1039 1 940 +2 max_1 1139 1 1040 +2 max_1 1199 1 1140 +2 max_2 9 1 0 +2 max_2 19 1 10 +2 max_2 29 1 20 +2 max_2 39 1 30 +2 max_2 49 1 40 +2 max_2 59 1 50 +2 max_2 69 1 60 +2 max_2 79 1 70 +2 max_2 89 1 80 +2 max_2 99 1 90 +2 max_2 109 1 100 +2 max_2 119 1 110 +2 max_2 129 1 120 +2 max_2 139 1 130 +2 max_2 149 1 140 +2 max_2 159 1 150 +2 max_2 169 1 160 +2 max_2 179 1 170 +2 max_2 189 1 180 +2 max_2 199 1 190 +2 max_2 209 1 200 +2 max_2 219 1 210 +2 max_2 229 1 220 +2 max_2 239 1 230 +2 max_2 249 1 240 +2 max_2 259 1 250 +2 max_2 269 1 260 +2 max_2 279 1 270 +2 max_2 289 1 280 +2 max_2 299 1 290 +2 max_2 39 1 0 +2 max_2 139 1 40 +2 max_2 239 1 140 +2 max_2 339 1 240 +2 max_2 439 1 340 +2 max_2 539 1 440 +2 max_2 639 1 540 +2 max_2 739 1 640 +2 max_2 839 1 740 +2 max_2 939 1 840 +2 max_2 1039 1 940 +2 max_2 1139 1 1040 +2 max_2 1199 1 1140 +2 sum_1 45 1 0 +2 sum_1 145 1 10 +2 sum_1 245 1 20 +2 sum_1 345 1 30 +2 sum_1 445 1 40 +2 sum_1 545 1 50 +2 sum_1 645 1 60 +2 sum_1 745 1 70 +2 sum_1 845 1 80 +2 sum_1 945 1 90 +2 sum_1 1045 1 100 +2 sum_1 1145 1 110 +2 sum_1 1245 1 120 +2 sum_1 1345 1 130 +2 sum_1 1445 1 140 +2 sum_1 1545 1 150 +2 sum_1 1645 1 160 +2 sum_1 1745 1 170 +2 sum_1 1845 1 180 +2 sum_1 1945 1 190 +2 sum_1 2045 1 200 +2 sum_1 2145 1 210 +2 sum_1 2245 1 220 +2 sum_1 2345 1 230 +2 sum_1 2445 1 240 +2 sum_1 2545 1 250 +2 sum_1 2645 1 260 +2 sum_1 2745 1 270 +2 sum_1 2845 1 280 +2 sum_1 2945 1 290 +2 sum_1 780 1 0 +2 sum_1 8950 1 40 +2 sum_1 18950 1 140 +2 sum_1 28950 1 240 +2 sum_1 38950 1 340 +2 sum_1 48950 1 440 +2 sum_1 58950 1 540 +2 sum_1 68950 1 640 +2 sum_1 78950 1 740 +2 sum_1 88950 1 840 +2 sum_1 98950 1 940 +2 sum_1 108950 1 1040 +2 sum_1 70170 1 1140 +2 sum_2 45 1 0 +2 sum_2 145 1 10 +2 sum_2 245 1 20 +2 sum_2 345 1 30 +2 sum_2 445 1 40 +2 sum_2 545 1 50 +2 sum_2 645 1 60 +2 sum_2 745 1 70 +2 sum_2 845 1 80 +2 sum_2 945 1 90 +2 sum_2 1045 1 100 +2 sum_2 1145 1 110 +2 sum_2 1245 1 120 +2 sum_2 1345 1 130 +2 sum_2 1445 1 140 +2 sum_2 1545 1 150 +2 sum_2 1645 1 160 +2 sum_2 1745 1 170 +2 sum_2 1845 1 180 +2 sum_2 1945 1 190 +2 sum_2 2045 1 200 +2 sum_2 2145 1 210 +2 sum_2 2245 1 220 +2 sum_2 2345 1 230 +2 sum_2 2445 1 240 +2 sum_2 2545 1 250 +2 sum_2 2645 1 260 +2 sum_2 2745 1 270 +2 sum_2 2845 1 280 +2 sum_2 2945 1 290 +2 sum_2 780 1 0 +2 sum_2 8950 1 40 +2 sum_2 18950 1 140 +2 sum_2 28950 1 240 +2 sum_2 38950 1 340 +2 sum_2 48950 1 440 +2 sum_2 58950 1 540 +2 sum_2 68950 1 640 +2 sum_2 78950 1 740 +2 sum_2 88950 1 840 +2 sum_2 98950 1 940 +2 sum_2 108950 1 1040 +2 sum_2 70170 1 1140 diff --git a/tests/queries/0_stateless/01236_graphite_mt.sql b/tests/queries/0_stateless/01236_graphite_mt.sql index 1d531f88ecb..0ec905fa0a8 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.sql +++ b/tests/queries/0_stateless/01236_graphite_mt.sql @@ -34,4 +34,8 @@ WITH dates AS select key, Path, Value, Version, col from test_graphite final order by key, Path, Time desc; +optimize table test_graphite final; + +select key, Path, Value, Version, col from test_graphite order by key, Path, Time desc; + drop table test_graphite; From f47aec6751c830f9d3f8a099d813e9225646a143 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 17:52:28 +0300 Subject: [PATCH 406/438] Fixed tests --- tests/queries/0_stateless/01856_create_function.sql | 1 - .../02098_sql_user_defined_functions_aliases.reference | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01856_create_function.sql b/tests/queries/0_stateless/01856_create_function.sql index 10f87f4a3df..cdc4baad1af 100644 --- a/tests/queries/0_stateless/01856_create_function.sql +++ b/tests/queries/0_stateless/01856_create_function.sql @@ -4,7 +4,6 @@ CREATE FUNCTION 01856_test_function_0 AS (a, b, c) -> a * b * c; SELECT 01856_test_function_0(2, 3, 4); SELECT isConstant(01856_test_function_0(1, 2, 3)); DROP FUNCTION 01856_test_function_0; -CREATE FUNCTION 01856_test_function_1 AS (a, b) -> a || b || c; --{serverError 47} CREATE FUNCTION 01856_test_function_1 AS (a, b) -> 01856_test_function_1(a, b) + 01856_test_function_1(a, b); --{serverError 611} CREATE FUNCTION cast AS a -> a + 1; --{serverError 609} CREATE FUNCTION sum AS (a, b) -> a + b; --{serverError 609} diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference index 8ab2f6d0ac6..45a4fb75db8 100644 --- a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference @@ -1 +1 @@ -8 4 +8 From ff48017f4a35ee222f34d88f25df78eedf30f322 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 20 Oct 2021 16:04:02 +0300 Subject: [PATCH 407/438] SQLUserDefinedFunctions support CREATE OR REPLACE, CREATE IF NOT EXISTS --- .../InterpreterCreateFunctionQuery.cpp | 20 +++++++++++++++---- src/Interpreters/InterpreterFactory.cpp | 2 +- .../UserDefinedSQLFunctionFactory.cpp | 16 ++++++++++----- .../UserDefinedSQLFunctionFactory.h | 12 ++++++++++- .../UserDefinedSQLObjectsLoader.cpp | 10 +++++----- .../UserDefinedSQLObjectsLoader.h | 2 +- src/Parsers/ASTCreateFunctionQuery.cpp | 13 +++++++++++- src/Parsers/ASTCreateFunctionQuery.h | 3 +++ src/Parsers/ParserCreateFunctionQuery.cpp | 17 +++++++++++++++- ...ined_functions_create_or_replace.reference | 4 ++++ ...er_defined_functions_create_or_replace.sql | 13 ++++++++++++ ...d_functions_create_if_not_exists.reference | 1 + ...defined_functions_create_if_not_exists.sql | 8 ++++++++ 13 files changed, 102 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference create mode 100644 tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql create mode 100644 tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference create mode 100644 tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index ccb5f4040ec..39fec4a941c 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -31,20 +31,32 @@ BlockIO InterpreterCreateFunctionQuery::execute() if (!create_function_query) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected CREATE FUNCTION query"); + auto & user_defined_function_factory = UserDefinedSQLFunctionFactory::instance(); + auto & function_name = create_function_query->function_name; + + bool if_not_exists = create_function_query->if_not_exists; + bool replace = create_function_query->or_replace; + + create_function_query->if_not_exists = false; + create_function_query->or_replace = false; + + if (if_not_exists && user_defined_function_factory.tryGet(function_name) != nullptr) + return {}; + validateFunction(create_function_query->function_core, function_name); - UserDefinedSQLFunctionFactory::instance().registerFunction(function_name, query_ptr); + user_defined_function_factory.registerFunction(function_name, query_ptr, replace); - if (!persist_function) + if (persist_function) { try { - UserDefinedSQLObjectsLoader::instance().storeObject(current_context, UserDefinedSQLObjectType::Function, function_name, *query_ptr); + UserDefinedSQLObjectsLoader::instance().storeObject(current_context, UserDefinedSQLObjectType::Function, function_name, *query_ptr, replace); } catch (Exception & exception) { - UserDefinedSQLFunctionFactory::instance().unregisterFunction(function_name); + user_defined_function_factory.unregisterFunction(function_name); exception.addMessage(fmt::format("while storing user defined function {} on disk", backQuote(function_name))); throw; } diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 54307ae848b..fcf5f19aef6 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -278,7 +278,7 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut } else if (query->as()) { - return std::make_unique(query, context, false /*is_internal*/); + return std::make_unique(query, context, true /*persist_function*/); } else if (query->as()) { diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp index 1d2a80305c6..f036741ca21 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp @@ -19,7 +19,7 @@ UserDefinedSQLFunctionFactory & UserDefinedSQLFunctionFactory::instance() return result; } -void UserDefinedSQLFunctionFactory::registerFunction(const String & function_name, ASTPtr create_function_query) +void UserDefinedSQLFunctionFactory::registerFunction(const String & function_name, ASTPtr create_function_query, bool replace) { if (FunctionFactory::instance().hasNameOrAlias(function_name)) throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name); @@ -29,11 +29,17 @@ void UserDefinedSQLFunctionFactory::registerFunction(const String & function_nam std::lock_guard lock(mutex); - auto [_, inserted] = function_name_to_create_query.emplace(function_name, std::move(create_function_query)); + auto [it, inserted] = function_name_to_create_query.emplace(function_name, create_function_query); + if (!inserted) - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, - "The function name '{}' is not unique", - function_name); + { + if (replace) + it->second = std::move(create_function_query); + else + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, + "The function name '{}' is not unique", + function_name); + } } void UserDefinedSQLFunctionFactory::unregisterFunction(const String & function_name) diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.h b/src/Interpreters/UserDefinedSQLFunctionFactory.h index 6838c2f9892..6487b951705 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.h +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.h @@ -10,21 +10,31 @@ namespace DB { +/// Factory for SQLUserDefinedFunctions class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory> { public: static UserDefinedSQLFunctionFactory & instance(); - void registerFunction(const String & function_name, ASTPtr create_function_query); + /** Register function for function_name in factory for specified create_function_query. + * If replace = true and function with function_name already exists replace it with create_function_query. + * Otherwise throws exception. + */ + void registerFunction(const String & function_name, ASTPtr create_function_query, bool replace); + /// Unregister function for function_name void unregisterFunction(const String & function_name); + /// Get function create query for function_name. If no function registered with function_name throws exception. ASTPtr get(const String & function_name) const; + /// Get function create query for function_name. If no function registered with function_name return nullptr. ASTPtr tryGet(const String & function_name) const; + /// Check if function with function_name registered. bool has(const String & function_name) const; + /// Get all user defined functions registered names. std::vector getAllRegisteredNames() const override; private: diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp index e4eb97f3002..a71f1f0799c 100644 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp +++ b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp @@ -69,7 +69,7 @@ void UserDefinedSQLObjectsLoader::loadUserDefinedObject(ContextPtr context, User 0, context->getSettingsRef().max_parser_depth); - InterpreterCreateFunctionQuery interpreter(ast, context, true /*is internal*/); + InterpreterCreateFunctionQuery interpreter(ast, context, false /*persist_function*/); interpreter.execute(); } } @@ -111,7 +111,7 @@ void UserDefinedSQLObjectsLoader::loadObjects(ContextPtr context) } } -void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast) +void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace) { if (unlikely(!enable_persistence)) return; @@ -127,7 +127,7 @@ void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQL } } - if (std::filesystem::exists(file_path)) + if (!replace && std::filesystem::exists(file_path)) throw Exception(ErrorCodes::OBJECT_ALREADY_STORED_ON_DISK, "User defined object {} already stored on disk", backQuote(file_path)); LOG_DEBUG(log, "Storing object {} to file {}", backQuote(object_name), file_path); @@ -135,9 +135,9 @@ void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQL WriteBufferFromOwnString create_statement_buf; formatAST(ast, create_statement_buf, false); writeChar('\n', create_statement_buf); - String create_statement = create_statement_buf.str(); - WriteBufferFromFile out(file_path, create_statement.size(), O_WRONLY | O_CREAT | O_EXCL); + + WriteBufferFromFile out(file_path, create_statement.size()); writeString(create_statement, out); out.next(); if (context->getSettingsRef().fsync_metadata) diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.h b/src/Interpreters/UserDefinedSQLObjectsLoader.h index 17493933f21..2e747f67a8d 100644 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.h +++ b/src/Interpreters/UserDefinedSQLObjectsLoader.h @@ -21,7 +21,7 @@ public: UserDefinedSQLObjectsLoader(); void loadObjects(ContextPtr context); - void storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast); + void storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace); void removeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name); /// For ClickHouse local if path is not set we can disable loader. diff --git a/src/Parsers/ASTCreateFunctionQuery.cpp b/src/Parsers/ASTCreateFunctionQuery.cpp index 0b3991ddc44..4e1e7de660d 100644 --- a/src/Parsers/ASTCreateFunctionQuery.cpp +++ b/src/Parsers/ASTCreateFunctionQuery.cpp @@ -12,7 +12,18 @@ ASTPtr ASTCreateFunctionQuery::clone() const void ASTCreateFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState & state, IAST::FormatStateStacked frame) const { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE FUNCTION " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE "; + + if (or_replace) + settings.ostr << "OR REPLACE "; + + settings.ostr << "FUNCTION "; + + if (if_not_exists) + settings.ostr << "IF NOT EXISTS "; + + settings.ostr << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : ""); function_core->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTCreateFunctionQuery.h b/src/Parsers/ASTCreateFunctionQuery.h index 3adddad8fbd..a58fe64c435 100644 --- a/src/Parsers/ASTCreateFunctionQuery.h +++ b/src/Parsers/ASTCreateFunctionQuery.h @@ -12,6 +12,9 @@ public: String function_name; ASTPtr function_core; + bool or_replace = false; + bool if_not_exists = false; + String getID(char) const override { return "CreateFunctionQuery"; } ASTPtr clone() const override; diff --git a/src/Parsers/ParserCreateFunctionQuery.cpp b/src/Parsers/ParserCreateFunctionQuery.cpp index fbfd02415e7..5d84b6bc2dc 100644 --- a/src/Parsers/ParserCreateFunctionQuery.cpp +++ b/src/Parsers/ParserCreateFunctionQuery.cpp @@ -1,10 +1,12 @@ +#include + #include #include #include #include #include #include -#include + namespace DB { @@ -13,6 +15,8 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp { ParserKeyword s_create("CREATE"); ParserKeyword s_function("FUNCTION"); + ParserKeyword s_or_replace("OR REPLACE"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserIdentifier function_name_p; ParserKeyword s_as("AS"); ParserLambdaExpression lambda_p; @@ -20,12 +24,21 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp ASTPtr function_name; ASTPtr function_core; + bool or_replace = false; + bool if_not_exists = false; + if (!s_create.ignore(pos, expected)) return false; + if (s_or_replace.ignore(pos, expected)) + or_replace = true; + if (!s_function.ignore(pos, expected)) return false; + if (!or_replace && s_if_not_exists.ignore(pos, expected)) + if_not_exists = true; + if (!function_name_p.parse(pos, function_name, expected)) return false; @@ -40,6 +53,8 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp create_function_query->function_name = function_name->as().name(); create_function_query->function_core = function_core; + create_function_query->or_replace = or_replace; + create_function_query->if_not_exists = if_not_exists; return true; } diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference new file mode 100644 index 00000000000..437cc81afba --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference @@ -0,0 +1,4 @@ +CREATE FUNCTION `02101_test_function` AS x -> (x + 1) +2 +CREATE FUNCTION `02101_test_function` AS x -> (x + 2) +3 diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql new file mode 100644 index 00000000000..7b0ad311bd4 --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel + +CREATE OR REPLACE FUNCTION 02101_test_function AS x -> x + 1; + +SELECT create_query FROM system.functions WHERE name = '02101_test_function'; +SELECT 02101_test_function(1); + +CREATE OR REPLACE FUNCTION 02101_test_function AS x -> x + 2; + +SELECT create_query FROM system.functions WHERE name = '02101_test_function'; +SELECT 02101_test_function(1); + +DROP FUNCTION 02101_test_function; diff --git a/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql new file mode 100644 index 00000000000..092fa660cb0 --- /dev/null +++ b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql @@ -0,0 +1,8 @@ +-- Tags: no-parallel + +CREATE FUNCTION IF NOT EXISTS 02102_test_function AS x -> x + 1; +SELECT 02102_test_function(1); + +CREATE FUNCTION 02102_test_function AS x -> x + 1; --{serverError 609} +CREATE FUNCTION IF NOT EXISTS 02102_test_function AS x -> x + 1; +DROP FUNCTION 02102_test_function; From 7cb6cddf6dfe6c76a8baa397ef587aeb3edee852 Mon Sep 17 00:00:00 2001 From: gyuton Date: Wed, 20 Oct 2021 19:52:18 +0300 Subject: [PATCH 408/438] Initial draft --- .../functions/splitting-merging-functions.md | 37 ++++++++++++++++++ .../functions/splitting-merging-functions.md | 38 +++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 718d5a977b9..bc06193612a 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -270,3 +270,40 @@ Result: │ [['abc','123'],['8','"hkl"']] │ └───────────────────────────────────────────────────────────────────────┘ ``` + +## ngrams {#ngrams} + +Splits the ASCII string into n-grams of `ngramsize` symbols. + +**Syntax** + +``` sql +ngrams(string, ngramsize) +``` + +**Arguments** + +- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md). + +**Returned values** + +- Array with n-grams. + +Type: [Array](../../sql-reference/data-types/array.md)([FixedString](../../sql-reference/data-types/fixedstring.md)). + +**Example** + +Query: + +``` sql +SELECT ngrams('ClickHouse', 3); +``` + +Result: + +``` text +┌─ngrams('ClickHouse', 3)───────────────────────────┐ +│ ['Cli','lic','ick','ckH','kHo','Hou','ous','use'] │ +└───────────────────────────────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/functions/splitting-merging-functions.md b/docs/ru/sql-reference/functions/splitting-merging-functions.md index efe74dba043..9b9aab4c437 100644 --- a/docs/ru/sql-reference/functions/splitting-merging-functions.md +++ b/docs/ru/sql-reference/functions/splitting-merging-functions.md @@ -232,3 +232,41 @@ SELECT alphaTokens('abca1abc'); │ ['abca','abc'] │ └─────────────────────────┘ ``` + +## ngrams {#ngrams} + +Выделяет из ASCII строки отрезки (n-граммы) размером `ngramsize` символов. + +**Синтаксис** + +``` sql +ngrams(string, ngramsize) +``` + +**Аргументы** + +- `string` — строка. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `ngramsize` — размер n-грамм. [UInt](../../sql-reference/data-types/int-uint.md). + +**Возвращаемые значения** + +- Массив с n-граммами. + +Тип: [Array](../../sql-reference/data-types/array.md)([FixedString](../../sql-reference/data-types/fixedstring.md)). + +**Пример** + +Запрос: + +``` sql +SELECT ngrams('ClickHouse', 3); +``` + +Результат: + +``` text +┌─ngrams('ClickHouse', 3)───────────────────────────┐ +│ ['Cli','lic','ick','ckH','kHo','Hou','ous','use'] │ +└───────────────────────────────────────────────────┘ +``` + From 220791b21c31cfa73a8e8d656c44080d38d80033 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 Oct 2021 17:15:07 +0000 Subject: [PATCH 409/438] Fix --- src/Interpreters/Context.cpp | 7 +++++-- src/Interpreters/Context.h | 2 -- src/Storages/MergeTree/registerStorageMergeTree.cpp | 2 -- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d01057d5259..03216b717d3 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -267,6 +267,9 @@ struct ContextSharedPart bool shutdown_called = false; + /// Has background executors for MergeTree tables been initialized? + bool is_background_executors_initialized = false; + Stopwatch uptime_watch; Context::ApplicationType application_type = Context::ApplicationType::SERVER; @@ -2974,7 +2977,7 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptris_background_executors_initialized) return; const size_t max_merges_and_mutations = getSettingsRef().background_pool_size * getSettingsRef().background_merges_mutations_concurrency_ratio; @@ -3024,7 +3027,7 @@ void Context::initializeBackgroundExecutorsIfNeeded() LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}", getSettingsRef().background_common_pool_size, getSettingsRef().background_common_pool_size); - is_background_executors_initialized = true; + shared->is_background_executors_initialized = true; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 85b4fbf23d5..8518275e529 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -293,8 +293,6 @@ private: /// A flag, used to distinguish between user query and internal query to a database engine (MaterializedPostgreSQL). bool is_internal_query = false; - /// Has background executors for MergeTree tables been initialized? - bool is_background_executors_initialized = false; public: diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 218d990e0ce..982acfe62a4 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -301,8 +301,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replicated) name_part = name_part.substr(strlen("Replicated")); - args.getContext()->getGlobalContext()->initializeBackgroundExecutorsIfNeeded(); - MergeTreeData::MergingParams merging_params; merging_params.mode = MergeTreeData::MergingParams::Ordinary; From b0752b43ac9529b4f744af72e32806c4240824b4 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Wed, 20 Oct 2021 20:20:14 +0300 Subject: [PATCH 410/438] Add the getOSKernelVersion function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Задокументировал английскую версию функции getOSKernelVersion. --- .../functions/other-functions.md | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 6864ba7705b..a7f81ab8325 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2427,3 +2427,39 @@ Type: [UInt32](../../sql-reference/data-types/int-uint.md). **See Also** - [shardNum()](#shard-num) function example also contains `shardCount()` function call. + +## getOSKernelVersion {#getoskernelversion} + +Returns string with the current OS kernel version. + +**Syntax** + +``` sql +getOSKernelVersion() +``` + +**Arguments** + +- None. + +**Returned value** + +- The current OS kernel version. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT getOSKernelVersion(); +``` + +Result: + +``` text +┌─getOSKernelVersion()────┐ +│ Linux 4.15.0-55-generic │ +└─────────────────────────┘ +``` From 360ec76c29a6bbfc550ed3286661becc5d40642b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Figueiredo?= Date: Wed, 20 Oct 2021 22:35:17 +0200 Subject: [PATCH 411/438] Grammar suggestions to distributed.md * fixed some typos. * improved wording of some statements. --- docs/en/engines/table-engines/special/distributed.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 6593a5dc17f..708dab6fb7d 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -5,7 +5,7 @@ toc_title: Distributed # Distributed Table Engine {#distributed} -Tables with Distributed engine do not store any data by their own, but allow distributed query processing on multiple servers. +Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers. Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any. The Distributed engine accepts parameters: @@ -167,20 +167,20 @@ If this parameter is set to `true`, the write operation selects the first health If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data. -To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19). +To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weights` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19). The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID). -A simple reminder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. +A simple remainder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. -SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer the old data to it. You can write new data with a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. +SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer old data into it. Instead, you can write new data to it by using a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. You should be concerned about the sharding scheme in the following cases: - Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient. - A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries. -Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. +Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The periodicity for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used. From c96b884951a32c4d1637ea9053be88c2cbd08846 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 Oct 2021 23:40:29 +0300 Subject: [PATCH 412/438] Fix --- src/Functions/FunctionSQLJSON.h | 41 +++++----- .../0_stateless/01889_sql_json_functions.sql | 74 +++++++++---------- 2 files changed, 56 insertions(+), 59 deletions(-) diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index d59ead2ebf5..4c27f090fb2 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -50,36 +50,33 @@ public: throw Exception{"JSONPath functions require at least 2 arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; } - const auto & first_column = arguments[0]; + const auto & json_column = arguments[0]; - /// Check 1 argument: must be of type String (JSONPath) - if (!isString(first_column.type)) + if (!isString(json_column.type)) { throw Exception( - "JSONPath functions require 1 argument to be JSONPath of type string, illegal type: " + first_column.type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - /// Check 1 argument: must be const (JSONPath) - if (!isColumnConst(*first_column.column)) - { - throw Exception("1 argument (JSONPath) must be const", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - - const auto & second_column = arguments[1]; - - /// Check 2 argument: must be of type String (JSON) - if (!isString(second_column.type)) - { - throw Exception( - "JSONPath functions require 2 argument to be JSON of string, illegal type: " + second_column.type->getName(), + "JSONPath functions require first argument to be JSON of string, illegal type: " + json_column.type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - const ColumnPtr & arg_jsonpath = first_column.column; + const auto & json_path_column = arguments[1]; + + if (!isString(json_path_column.type)) + { + throw Exception( + "JSONPath functions require second argument to be JSONPath of type string, illegal type: " + json_path_column.type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + if (!isColumnConst(*json_path_column.column)) + { + throw Exception("Second argument (JSONPath) must be constant string", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + const ColumnPtr & arg_jsonpath = json_path_column.column; const auto * arg_jsonpath_const = typeid_cast(arg_jsonpath.get()); const auto * arg_jsonpath_string = typeid_cast(arg_jsonpath_const->getDataColumnPtr().get()); - const ColumnPtr & arg_json = second_column.column; + const ColumnPtr & arg_json = json_column.column; const auto * col_json_const = typeid_cast(arg_json.get()); const auto * col_json_string = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); @@ -152,7 +149,7 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql index 0c4f3aff9a3..087f029e635 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.sql +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -1,46 +1,46 @@ -- Tags: no-fasttest SELECT '--JSON_VALUE--'; -SELECT JSON_VALUE('$', '{"hello":1}'); -- root is a complex object => default value (empty string) -SELECT JSON_VALUE('$.hello', '{"hello":1}'); -SELECT JSON_VALUE('$.hello', '{"hello":1.2}'); -SELECT JSON_VALUE('$.hello', '{"hello":true}'); -SELECT JSON_VALUE('$.hello', '{"hello":"world"}'); -SELECT JSON_VALUE('$.hello', '{"hello":null}'); -SELECT JSON_VALUE('$.hello', '{"hello":["world","world2"]}'); -SELECT JSON_VALUE('$.hello', '{"hello":{"world":"!"}}'); -SELECT JSON_VALUE('$.hello', '{hello:world}'); -- invalid json => default value (empty string) -SELECT JSON_VALUE('$.hello', ''); +SELECT JSON_VALUE('{"hello":1}', '$'); -- root is a complex object => default value (empty string) +SELECT JSON_VALUE('{"hello":1}', '$.hello'); +SELECT JSON_VALUE('{"hello":1.2}', '$.hello'); +SELECT JSON_VALUE('{"hello":true}', '$.hello'); +SELECT JSON_VALUE('{"hello":"world"}', '$.hello'); +SELECT JSON_VALUE('{"hello":null}', '$.hello'); +SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello'); +SELECT JSON_VALUE('{"hello":{"world":"!"}}', '$.hello'); +SELECT JSON_VALUE('{hello:world}', '$.hello'); -- invalid json => default value (empty string) +SELECT JSON_VALUE('', '$.hello'); SELECT '--JSON_QUERY--'; -SELECT JSON_QUERY('$', '{"hello":1}'); -SELECT JSON_QUERY('$.hello', '{"hello":1}'); -SELECT JSON_QUERY('$.hello', '{"hello":1.2}'); -SELECT JSON_QUERY('$.hello', '{"hello":true}'); -SELECT JSON_QUERY('$.hello', '{"hello":"world"}'); -SELECT JSON_QUERY('$.hello', '{"hello":null}'); -SELECT JSON_QUERY('$.hello', '{"hello":["world","world2"]}'); -SELECT JSON_QUERY('$.hello', '{"hello":{"world":"!"}}'); -SELECT JSON_QUERY('$.hello', '{hello:{"world":"!"}}}'); -- invalid json => default value (empty string) -SELECT JSON_QUERY('$.hello', ''); -SELECT JSON_QUERY('$.array[*][0 to 2, 4]', '{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}'); +SELECT JSON_QUERY('{"hello":1}', '$'); +SELECT JSON_QUERY('{"hello":1}', '$.hello'); +SELECT JSON_QUERY('{"hello":1.2}', '$.hello'); +SELECT JSON_QUERY('{"hello":true}', '$.hello'); +SELECT JSON_QUERY('{"hello":"world"}', '$.hello'); +SELECT JSON_QUERY('{"hello":null}', '$.hello'); +SELECT JSON_QUERY('{"hello":["world","world2"]}', '$.hello'); +SELECT JSON_QUERY('{"hello":{"world":"!"}}', '$.hello'); +SELECT JSON_QUERY( '{hello:{"world":"!"}}}', '$.hello'); -- invalid json => default value (empty string) +SELECT JSON_QUERY('', '$.hello'); +SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); SELECT '--JSON_EXISTS--'; -SELECT JSON_EXISTS('$', '{"hello":1}'); -SELECT JSON_EXISTS('$', ''); -SELECT JSON_EXISTS('$', '{}'); -SELECT JSON_EXISTS('$.hello', '{"hello":1}'); -SELECT JSON_EXISTS('$.world', '{"hello":1,"world":2}'); -SELECT JSON_EXISTS('$.world', '{"hello":{"world":1}}'); -SELECT JSON_EXISTS('$.hello.world', '{"hello":{"world":1}}'); -SELECT JSON_EXISTS('$.hello', '{hello:world}'); -- invalid json => default value (zero integer) -SELECT JSON_EXISTS('$.hello', ''); -SELECT JSON_EXISTS('$.hello[*]', '{"hello":["world"]}'); -SELECT JSON_EXISTS('$.hello[0]', '{"hello":["world"]}'); -SELECT JSON_EXISTS('$.hello[1]', '{"hello":["world"]}'); -SELECT JSON_EXISTS('$.a[*].b', '{"a":[{"b":1},{"c":2}]}'); -SELECT JSON_EXISTS('$.a[*].f', '{"a":[{"b":1},{"c":2}]}'); -SELECT JSON_EXISTS('$.a[*][0].h', '{"a":[[{"b":1}, {"g":1}],[{"h":1},{"y":1}]]}'); +SELECT JSON_EXISTS('{"hello":1}', '$'); +SELECT JSON_EXISTS('', '$'); +SELECT JSON_EXISTS('{}', '$'); +SELECT JSON_EXISTS('{"hello":1}', '$.hello'); +SELECT JSON_EXISTS('{"hello":1,"world":2}', '$.world'); +SELECT JSON_EXISTS('{"hello":{"world":1}}', '$.world'); +SELECT JSON_EXISTS('{"hello":{"world":1}}', '$.hello.world'); +SELECT JSON_EXISTS('{hello:world}', '$.hello'); -- invalid json => default value (zero integer) +SELECT JSON_EXISTS('', '$.hello'); +SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]'); +SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]'); +SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[1]'); +SELECT JSON_EXISTS('{"a":[{"b":1},{"c":2}]}', '$.a[*].b'); +SELECT JSON_EXISTS('{"a":[{"b":1},{"c":2}]}', '$.a[*].f'); +SELECT JSON_EXISTS('{"a":[[{"b":1}, {"g":1}],[{"h":1},{"y":1}]]}', '$.a[*][0].h'); SELECT '--MANY ROWS--'; DROP TABLE IF EXISTS 01889_sql_json; @@ -48,5 +48,5 @@ CREATE TABLE 01889_sql_json (id UInt8, json String) ENGINE = MergeTree ORDER BY INSERT INTO 01889_sql_json(id, json) VALUES(0, '{"name":"Ivan","surname":"Ivanov","friends":["Vasily","Kostya","Artyom"]}'); INSERT INTO 01889_sql_json(id, json) VALUES(1, '{"name":"Katya","surname":"Baltica","friends":["Tihon","Ernest","Innokentiy"]}'); INSERT INTO 01889_sql_json(id, json) VALUES(2, '{"name":"Vitali","surname":"Brown","friends":["Katya","Anatoliy","Ivan","Oleg"]}'); -SELECT id, JSON_QUERY('$.friends[0 to 2]', json) FROM 01889_sql_json ORDER BY id; +SELECT id, JSON_QUERY(json, '$.friends[0 to 2]') FROM 01889_sql_json ORDER BY id; DROP TABLE 01889_sql_json; From 422b0ba54705bdfb9908066a281de0e959950fba Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 20 Oct 2021 16:12:22 -0600 Subject: [PATCH 413/438] Update release date and add training link --- website/blog/en/2021/clickhouse-v21.10-released.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/website/blog/en/2021/clickhouse-v21.10-released.md b/website/blog/en/2021/clickhouse-v21.10-released.md index b49bb284f90..ed0aab88017 100644 --- a/website/blog/en/2021/clickhouse-v21.10-released.md +++ b/website/blog/en/2021/clickhouse-v21.10-released.md @@ -15,6 +15,8 @@ Let's highlight some of these new exciting new capabilities in 21.10: * Instead of logging every query (which can be a lot of logs!), you can now log a random sample of your queries. The number of queries logged is determined by defining a specified probability between 0.0 (no queries logged) and 1.0 (all queries logged) using the new `log_queries_probability` setting. * Positional arguments are now available in your GROUP BY, ORDER BY and LIMIT BY clauses. For example, `SELECT foo, bar, baz FROM my_table ORDER BY 2,3` orders the results by whatever the bar and baz columns (no need to specify column names twice!) +We're also thrilled to announce some new free training available to you in our Learn ClickHouse portal: [https://clickhouse.com/learn/lessons/whatsnew-clickhouse-21.10/](https://clickhouse.com/learn/lessons/whatsnew-clickhouse-21.10/) + We're always listening for new ideas, and we're happy to welcome new contributors to the ClickHouse project. Whether for submitting code or improving our documentation and examples, please get involved by sending us a pull request or submitting an issue. Our beginner developers contribution guide will help you get started: [https://clickhouse.com/docs/en/development/developer-instruction/](https://clickhouse.com/docs/en/development/developer-instruction/) @@ -22,6 +24,6 @@ We're always listening for new ideas, and we're happy to welcome new contributor Release 21.10 -Release Date: 2021-10-21 +Release Date: 2021-10-17 Release Notes: [21.10](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) From cbf397fa4e218d826f23e2480f7112879507450a Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Wed, 20 Oct 2021 17:17:39 -0600 Subject: [PATCH 414/438] Fix horizontal scroll bar --- website/css/main.css | 2 +- website/src/scss/components/_page.scss | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/website/css/main.css b/website/css/main.css index 229b74cb3f7..1d0b6f5c116 100644 --- a/website/css/main.css +++ b/website/css/main.css @@ -1 +1 @@ -@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:flex;justify-content:center}.btns .btn+.btn{margin-left:24px}.btns .btn-lg+.btn-lg{margin-left:40px}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(10%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(60%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3,.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s}}@media screen and (min-width:980px){.case-study-card .col-lg-3{transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3,.case-study-card.is-open .col-lg-auto{opacity:1;transform:none}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{transition-delay:.4s}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}.page{overflow:hidden;width:100vw}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;overflow:hidden;position:relative;width:100%}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{color:#495057;list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file +@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:flex;justify-content:center}.btns .btn+.btn{margin-left:24px}.btns .btn-lg+.btn-lg{margin-left:40px}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(10%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(60%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3,.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s}}@media screen and (min-width:980px){.case-study-card .col-lg-3{transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3,.case-study-card.is-open .col-lg-auto{opacity:1;transform:none}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{transition-delay:.4s}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{color:#495057;list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file diff --git a/website/src/scss/components/_page.scss b/website/src/scss/components/_page.scss index 926f9656b73..a411ff7a15c 100644 --- a/website/src/scss/components/_page.scss +++ b/website/src/scss/components/_page.scss @@ -1,4 +1,4 @@ .page { overflow: hidden; - width: 100vw; + width: 100%; } From e679e952c89e1af257a2fecd0a1e960ef1b67401 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 21 Oct 2021 10:16:11 +0800 Subject: [PATCH 415/438] Let's check without projection just in case --- tests/queries/0_stateless/01710_projection_with_joins.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01710_projection_with_joins.sql b/tests/queries/0_stateless/01710_projection_with_joins.sql index 97dc396f362..fcd1c586fa3 100644 --- a/tests/queries/0_stateless/01710_projection_with_joins.sql +++ b/tests/queries/0_stateless/01710_projection_with_joins.sql @@ -2,5 +2,7 @@ drop table if exists t; create table t (s UInt16, l UInt16, projection p (select s, l order by l)) engine MergeTree order by s; -set allow_experimental_projection_optimization=1; -select s from t join (select toUInt16(1) as s) x using (s); +select s from t join (select toUInt16(1) as s) x using (s) settings allow_experimental_projection_optimization = 1; +select s from t join (select toUInt16(1) as s) x using (s) settings allow_experimental_projection_optimization = 0; + +drop table t; From 650a79a90771d64b6e70ac8708316923a5fa60aa Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 21 Oct 2021 10:08:08 +0300 Subject: [PATCH 416/438] SQLUserDefinedFunctions composition fix --- src/Interpreters/UserDefinedSQLFunctionVisitor.cpp | 5 ++++- src/Parsers/ASTFunction.cpp | 8 ++++++-- ..._sql_user_defined_functions_composition.reference | 2 ++ .../02103_sql_user_defined_functions_composition.sql | 12 ++++++++++++ 4 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02103_sql_user_defined_functions_composition.reference create mode 100644 tests/queries/0_stateless/02103_sql_user_defined_functions_composition.sql diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp index 8df6932b153..d9ac53097ab 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp @@ -18,7 +18,7 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } -void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data &) +void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data & data) { auto * function = ast->as(); if (!function) @@ -27,7 +27,10 @@ void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data &) auto result = tryToReplaceFunction(*function); if (result) + { ast = result; + visit(ast, data); + } } bool UserDefinedSQLFunctionMatcher::needChildVisit(const ASTPtr &, const ASTPtr &) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 3c78c4060d6..093baca375c 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -440,6 +440,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (!written && 0 == strcmp(name.c_str(), "lambda")) { + /// Special case: zero elements tuple in lhs of lambda is printed as (). /// Special case: one-element tuple in lhs of lambda is printed as its element. if (frame.need_parens) @@ -449,9 +450,12 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (first_arg_func && first_arg_func->name == "tuple" && first_arg_func->arguments - && first_arg_func->arguments->children.size() == 1) + && (first_arg_func->arguments->children.size() == 1 || first_arg_func->arguments->children.size() == 0)) { - first_arg_func->arguments->children[0]->formatImpl(settings, state, nested_need_parens); + if (first_arg_func->arguments->children.size() == 1) + first_arg_func->arguments->children[0]->formatImpl(settings, state, nested_need_parens); + else + settings.ostr << "()"; } else arguments->children[0]->formatImpl(settings, state, nested_need_parens); diff --git a/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.reference b/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.reference new file mode 100644 index 00000000000..51993f072d5 --- /dev/null +++ b/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.reference @@ -0,0 +1,2 @@ +2 +2 diff --git a/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.sql b/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.sql new file mode 100644 index 00000000000..3d34413b9d3 --- /dev/null +++ b/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.sql @@ -0,0 +1,12 @@ +-- Tags: no-parallel + +CREATE FUNCTION 02103_test_function AS x -> x + 1; +CREATE FUNCTION 02103_test_function_with_nested_function_empty_args AS () -> 02103_test_function(1); +CREATE FUNCTION 02103_test_function_with_nested_function_arg AS (x) -> 02103_test_function(x); + +SELECT 02103_test_function_with_nested_function_empty_args(); +SELECT 02103_test_function_with_nested_function_arg(1); + +DROP FUNCTION 02103_test_function_with_nested_function_empty_args; +DROP FUNCTION 02103_test_function_with_nested_function_arg; +DROP FUNCTION 02103_test_function; From 2ef25184d0ac658e8e05681317a5d642648a9fe1 Mon Sep 17 00:00:00 2001 From: Vladimir Goncharov Date: Thu, 21 Oct 2021 15:19:48 +0300 Subject: [PATCH 417/438] Update argmax.md --- docs/ru/sql-reference/aggregate-functions/reference/argmax.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md index 84419523beb..bdf7b1d1df6 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md @@ -6,8 +6,6 @@ toc_priority: 106 Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений. -Если функции передан кортеж, то будет выведен кортеж с максимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). - **Синтаксис** ``` sql From a43d293d12320545119570089da3b00585a03123 Mon Sep 17 00:00:00 2001 From: Vladimir Goncharov Date: Thu, 21 Oct 2021 15:20:25 +0300 Subject: [PATCH 418/438] Update argmax.md --- docs/zh/sql-reference/aggregate-functions/reference/argmax.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md index 0c82cb8a4bb..ed3d2d07a58 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md @@ -6,8 +6,6 @@ toc_priority: 106 计算 `val` 最大值对应的 `arg` 值。 如果 `val` 最大值存在几个不同的 `arg` 值,输出遇到的第一个值。 -这个函数的Tuple版本将返回 `val` 最大值对应的元组。本函数适合和 `SimpleAggregateFunction` 搭配使用。 - **语法** ``` sql From 3afb070b15613f2f99ed25b4e6479f86c194ff9c Mon Sep 17 00:00:00 2001 From: Vladimir Goncharov Date: Thu, 21 Oct 2021 15:20:52 +0300 Subject: [PATCH 419/438] Update argmin.md --- docs/ru/sql-reference/aggregate-functions/reference/argmin.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md index 4d2363abe6d..dd923061943 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md @@ -6,8 +6,6 @@ toc_priority: 105 Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений. -Если функции передан кортеж, то будет выведен кортеж с минимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). - **Синтаксис** ``` sql From a48da6294971be5f0a6e31aa11a95e13543dd742 Mon Sep 17 00:00:00 2001 From: Vladimir Goncharov Date: Thu, 21 Oct 2021 15:21:33 +0300 Subject: [PATCH 420/438] Update argmin.md --- docs/zh/sql-reference/aggregate-functions/reference/argmin.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md index 6c2839062eb..8d987300ad4 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md @@ -8,8 +8,6 @@ toc_priority: 105 计算 `val` 最小值对应的 `arg` 值。 如果 `val` 最小值存在几个不同的 `arg` 值,输出遇到的第一个(`arg`)值。 -这个函数的Tuple版本将返回 `val` 最小值对应的tuple。本函数适合和`SimpleAggregateFunction`搭配使用。 - **示例:** 输入表: From 8885748a6c0c033bc946b4f1d0b9e677b0539d0c Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 21 Oct 2021 17:25:53 +0300 Subject: [PATCH 421/438] StorageDictionary fix potential configuration race --- src/Storages/StorageDictionary.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 349d894b2f1..51a73eb511e 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -212,11 +212,20 @@ void StorageDictionary::renameInMemory(const StorageID & new_table_id) auto old_table_id = getStorageID(); IStorage::renameInMemory(new_table_id); - if (configuration) + bool has_configuration = false; { - configuration->setString("dictionary.database", new_table_id.database_name); - configuration->setString("dictionary.name", new_table_id.table_name); + std::lock_guard lock(dictionary_config_mutex); + if (configuration) + { + has_configuration = true; + configuration->setString("dictionary.database", new_table_id.database_name); + configuration->setString("dictionary.name", new_table_id.table_name); + } + } + + if (has_configuration) + { const auto & external_dictionaries_loader = getContext()->getExternalDictionariesLoader(); auto result = external_dictionaries_loader.getLoadResult(old_table_id.getInternalDictionaryName()); From a874ca29edfdaf5e39efb19ec0d3743441525661 Mon Sep 17 00:00:00 2001 From: Cody Baker Date: Thu, 21 Oct 2021 09:31:38 -0600 Subject: [PATCH 422/438] Change link in nav from community -> learn (#30508) --- website/templates/global/nav.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/global/nav.html b/website/templates/global/nav.html index 7e120c99126..d775c88e4a5 100644 --- a/website/templates/global/nav.html +++ b/website/templates/global/nav.html @@ -42,7 +42,7 @@ Documentation