From 105ea620ca1ae7f48828957e3c3d701395856260 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 14 Mar 2018 00:33:56 +0100 Subject: [PATCH 01/90] Documenting numbers table function, VerticalRaw format, HTTP sessions, HTTP compression. --- docs/en/formats/verticalraw.md | 24 ++++++++++++++++++++++++ docs/en/interfaces/http_interface.md | 13 ++++++++++--- docs/en/table_functions/numbers.md | 17 +++++++++++++++++ docs/mkdocs_en.yml | 2 ++ docs/mkdocs_ru.yml | 2 ++ docs/ru/formats/verticalraw.md | 26 ++++++++++++++++++++++++++ docs/ru/interfaces/http_interface.md | 11 +++++++++-- docs/ru/table_functions/numbers.md | 16 ++++++++++++++++ 8 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 docs/en/formats/verticalraw.md create mode 100644 docs/en/table_functions/numbers.md create mode 100644 docs/ru/formats/verticalraw.md create mode 100644 docs/ru/table_functions/numbers.md diff --git a/docs/en/formats/verticalraw.md b/docs/en/formats/verticalraw.md new file mode 100644 index 00000000000..440967eb598 --- /dev/null +++ b/docs/en/formats/verticalraw.md @@ -0,0 +1,24 @@ +# VerticalRaw + +Differs from `TabSeparated` format in that the rows are written without escaping. +This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). + +Samples: +``` +:) SHOW CREATE TABLE geonames FORMAT VerticalRaw; +Row 1: +────── +statement: CREATE TABLE default.geonames ( geonameid UInt32, name String, asciiname String, alternatenames String, latitude Float32, longitude Float32, feature_class String, feature_code String, country_code String, cc2 String, admin1_code String, admin2_code String, admin3_code String, admin4_code String, population Int64, elevation String, dem String, timezone String, modification_date Date, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) + +:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT VerticalRaw; +Row 1: +────── +test: string with 'quotes' and with some special + characters + +-- the same in Vertical format: +:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical; +Row 1: +────── +test: string with \'quotes\' and \t with some special \n characters +``` diff --git a/docs/en/interfaces/http_interface.md b/docs/en/interfaces/http_interface.md index 91c6790f975..8c223cf69cf 100644 --- a/docs/en/interfaces/http_interface.md +++ b/docs/en/interfaces/http_interface.md @@ -130,11 +130,15 @@ POST 'http://localhost:8123/?query=DROP TABLE t' For successful requests that don't return a data table, an empty response body is returned. -You can use compression when transmitting data. The compressed data has a non-standard format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). +You can use compression when transmitting data. +For using ClickHouse internal compression format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). If you specified 'compress=1' in the URL, the server will compress the data it sends you. If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method. +Also standard gzip-based HTTP compression can be used. To send gzip compressed POST data just add `Content-Encoding: gzip` to request headers, and gzip POST body. +To get response compressed, you need to add `Accept-Encoding: gzip` to request headers, and turn on ClickHouse setting called `enable_http_compression`. + You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed. You can use the 'database' URL parameter to specify the default database. @@ -190,7 +194,11 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 For information about other parameters, see the section "SET". -In contrast to the native interface, the HTTP interface does not support the concept of sessions or session settings, does not allow aborting a query (to be exact, it allows this in only a few cases), and does not show the progress of query processing. Parsing and data formatting are performed on the server side, and using the network might be ineffective. +You can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` GET parameter in HTTP request. You can use any alphanumeric string as a session_id. By default session will be timed out after 60 seconds of inactivity. You can change that by setting `default_session_timeout` in server config file, or by adding GET parameter `session_timeout`. You can also check the status of the session by using GET parameter `session_check=1`. When using sessions you can't run 2 queries with the same session_id simultaneously. + +You can get the progress of query execution in X-ClickHouse-Progress headers, by enabling setting send_progress_in_http_headers. + +Running query are not aborted automatically after closing HTTP connection. Parsing and data formatting are performed on the server side, and using the network might be ineffective. The optional 'query_id' parameter can be passed as the query ID (any string). For more information, see the section "Settings, replace_running_query". The optional 'quota_key' parameter can be passed as the quota key (any string). For more information, see the section "Quotas". @@ -212,4 +220,3 @@ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&wa ``` Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage. - diff --git a/docs/en/table_functions/numbers.md b/docs/en/table_functions/numbers.md new file mode 100644 index 00000000000..b055f1cd56e --- /dev/null +++ b/docs/en/table_functions/numbers.md @@ -0,0 +1,17 @@ +# numbers + +`numbers(N)` - returns the table with one column named `number` (UInt64 type), containing integer numbers from 0 to N-1. + +`numbers(N)` (like a table `system.numbers`) can be used in tests or for sequences generation. + +Two following queries are equal: +```sql +SELECT * FROM numbers(10); +SELECT * FROM system.numbers LIMIT 10; +``` + +Samples: +```sql +-- generation of sequence of dates from 2010-01-01 to 2010-12-31 +select toDate('2010-01-01') + number as d FROM numbers(365); +``` diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index 1daf36ecfd8..c94e33c7818 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -122,6 +122,7 @@ pages: - 'Introduction': 'table_functions/index.md' - 'remote': 'table_functions/remote.md' - 'merge': 'table_functions/merge.md' + - 'numbers': 'table_functions/numbers.md' - 'Formats': - 'Introduction': 'formats/index.md' @@ -133,6 +134,7 @@ pages: - 'CSVWithNames': 'formats/csvwithnames.md' - 'Values': 'formats/values.md' - 'Vertical': 'formats/vertical.md' + - 'VerticalRaw': 'formats/verticalraw.md' - 'JSON': 'formats/json.md' - 'JSONCompact': 'formats/jsoncompact.md' - 'JSONEachRow': 'formats/jsoneachrow.md' diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml index 23734934bb5..eb8c002847d 100644 --- a/docs/mkdocs_ru.yml +++ b/docs/mkdocs_ru.yml @@ -124,6 +124,7 @@ pages: - 'Введение': 'table_functions/index.md' - 'remote': 'table_functions/remote.md' - 'merge': 'table_functions/merge.md' + - 'numbers': 'table_functions/numbers.md' - 'Форматы': - 'Введение': 'formats/index.md' @@ -135,6 +136,7 @@ pages: - 'CSVWithNames': 'formats/csvwithnames.md' - 'Values': 'formats/values.md' - 'Vertical': 'formats/vertical.md' + - 'VerticalRaw': 'formats/verticalraw.md' - 'JSON': 'formats/json.md' - 'JSONCompact': 'formats/jsoncompact.md' - 'JSONEachRow': 'formats/jsoneachrow.md' diff --git a/docs/ru/formats/verticalraw.md b/docs/ru/formats/verticalraw.md new file mode 100644 index 00000000000..9693cabe3c3 --- /dev/null +++ b/docs/ru/formats/verticalraw.md @@ -0,0 +1,26 @@ +# VerticalRaw + +Отличается от формата `Vertical` тем, что строки выводятся без экранирования. +Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу). + +Примеры: +``` +:) SHOW CREATE TABLE geonames FORMAT VerticalRaw; +Row 1: +────── +statement: CREATE TABLE default.geonames ( geonameid UInt32, name String, asciiname String, alternatenames String, latitude Float32, longitude Float32, feature_class String, feature_code String, country_code String, cc2 String, admin1_code String, admin2_code String, admin3_code String, admin4_code String, population Int64, elevation String, dem String, timezone String, modification_date Date, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) + +:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT VerticalRaw; +Row 1: +────── +test: string with 'quotes' and with some special + characters +``` + +Для сравнения - формат Vertical: +``` +:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical; +Row 1: +────── +test: string with \'quotes\' and \t with some special \n characters +``` diff --git a/docs/ru/interfaces/http_interface.md b/docs/ru/interfaces/http_interface.md index 28cb8ce6c66..a20ca4d844e 100644 --- a/docs/ru/interfaces/http_interface.md +++ b/docs/ru/interfaces/http_interface.md @@ -132,11 +132,14 @@ POST 'http://localhost:8123/?query=DROP TABLE t' Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа. -Вы можете использовать сжатие при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу compressor (sudo apt-get install compressor-metrika-yandex). +Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу compressor (sudo apt-get install compressor-metrika-yandex). Если вы указали в URL compress=1, то сервер будет сжимать отправляемые вам данные. Если вы указали в URL decompress=1, то сервер будет разжимать те данные, которые вы передаёте ему POST-ом. +Также имеется возможность использования стандартного сжатия HTTP, на основе gzip. Чтобы отправить POST-запрос, сжатый с помощью gzip, добавьте к запросу заголовок `Content-Encoding: gzip`. +Чтобы ClickHouse сжимал ответ на запрос с помощью gzip, необходимо добавить `Accept-Encoding: gzip` к заголовкам запроса, и включить настройку ClickHouse `enable_http_compression`. + Это может быть использовано для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов. В параметре URL database может быть указана БД по умолчанию. @@ -193,7 +196,11 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 Об остальных параметрах смотри раздел "SET". -В отличие от родного интерфейса, HTTP интерфейс не поддерживает понятие сессии и настройки в пределах сессии, не позволяет (вернее, позволяет лишь в некоторых случаях) прервать выполнение запроса, не показывает прогресс выполнения запроса. Парсинг и форматирование данных производится на стороне сервера и использование сети может быть неэффективным. +В HTTP-протоколе можно использовать ClickHouse-сессии, для этого необходимо добавить к запросу GET-пaраметр `session_id`. В качестве идентификатора сессии можно использовать произвольную строку. По умолчанию через 60 секунд бездействия сессия будет прервана. Можно изменить этот таймаут, изменяя настройку `default_session_timeout` в конфигурации сервера, или добавив к запросу GET параметр `session_timeout`. Статус сессии можно проверить с помощью параметра `session_check=1`. В рамках одной сессии одновременно может испольняться только один запрос. + +Имеется возможность получать информацию о прогрессе выполнения запроса в залоголвках X-ClickHouse-Progress, для этого нужно включить настройку send_progress_in_http_headers. + +Запущенные запросы не останавливаются автоматически при разрыве HTTP соединения. Парсинг и форматирование данных производится на стороне сервера и использование сети может быть неэффективным. Может быть передан необязательный параметр query_id - идентификатор запроса, произвольная строка. Подробнее смотрите раздел "Настройки, replace_running_query". Может быть передан необязательный параметр quota_key - ключ квоты, произвольная строка. Подробнее смотрите раздел "Квоты". diff --git a/docs/ru/table_functions/numbers.md b/docs/ru/table_functions/numbers.md new file mode 100644 index 00000000000..21bf3492ca1 --- /dev/null +++ b/docs/ru/table_functions/numbers.md @@ -0,0 +1,16 @@ +# numbers + +`numbers(N)` - возвращает таблицу с единственным столбцом number (тип UInt64), содержащую натуральные числа от 0 до N-1. + +Так же как и таблица `system.numbers` может использоваться для тестов и генерации последовательных значений. + +Следующие 2 запроса эквивалентны: +```sql +SELECT * FROM numbers(10); +SELECT * FROM system.numbers LIMIT 10; +``` +Примеры: +```sql +-- генарация последовательности всех дат от 2010-01-01 до 2010-12-31 +select toDate('2010-01-01') + number as d FROM numbers(365); +``` From 6bb8e52f71a86d88d6eb495bd7ec9cd7e4af8d7d Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 14 Mar 2018 00:42:06 +0100 Subject: [PATCH 02/90] Fixing bad copy-paste, shorten sample --- docs/en/formats/verticalraw.md | 4 ++-- docs/ru/formats/verticalraw.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/formats/verticalraw.md b/docs/en/formats/verticalraw.md index 440967eb598..9bb53ee1260 100644 --- a/docs/en/formats/verticalraw.md +++ b/docs/en/formats/verticalraw.md @@ -1,6 +1,6 @@ # VerticalRaw -Differs from `TabSeparated` format in that the rows are written without escaping. +Differs from `Vertical` format in that the rows are written without escaping. This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). Samples: @@ -8,7 +8,7 @@ Samples: :) SHOW CREATE TABLE geonames FORMAT VerticalRaw; Row 1: ────── -statement: CREATE TABLE default.geonames ( geonameid UInt32, name String, asciiname String, alternatenames String, latitude Float32, longitude Float32, feature_class String, feature_code String, country_code String, cc2 String, admin1_code String, admin2_code String, admin3_code String, admin4_code String, population Int64, elevation String, dem String, timezone String, modification_date Date, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) +statement: CREATE TABLE default.geonames ( geonameid UInt32, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) :) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT VerticalRaw; Row 1: diff --git a/docs/ru/formats/verticalraw.md b/docs/ru/formats/verticalraw.md index 9693cabe3c3..fb497430fcd 100644 --- a/docs/ru/formats/verticalraw.md +++ b/docs/ru/formats/verticalraw.md @@ -8,7 +8,7 @@ :) SHOW CREATE TABLE geonames FORMAT VerticalRaw; Row 1: ────── -statement: CREATE TABLE default.geonames ( geonameid UInt32, name String, asciiname String, alternatenames String, latitude Float32, longitude Float32, feature_class String, feature_code String, country_code String, cc2 String, admin1_code String, admin2_code String, admin3_code String, admin4_code String, population Int64, elevation String, dem String, timezone String, modification_date Date, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) +statement: CREATE TABLE default.geonames ( geonameid UInt32, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) :) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT VerticalRaw; Row 1: From efc0ed06a886ef2acb011624b7f0cb07c67debde Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 14 Mar 2018 00:49:12 +0100 Subject: [PATCH 03/90] Fixing obsolete name of clickhouse-compressor --- docs/en/interfaces/http_interface.md | 2 +- docs/ru/interfaces/http_interface.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/http_interface.md b/docs/en/interfaces/http_interface.md index 8c223cf69cf..5c989a59d65 100644 --- a/docs/en/interfaces/http_interface.md +++ b/docs/en/interfaces/http_interface.md @@ -132,7 +132,7 @@ For successful requests that don't return a data table, an empty response body i You can use compression when transmitting data. -For using ClickHouse internal compression format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). +For using ClickHouse internal compression format, and you will need to use the special clickhouse-compressor program to work with it (installed as a part of clickhouse-client package). If you specified 'compress=1' in the URL, the server will compress the data it sends you. If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method. diff --git a/docs/ru/interfaces/http_interface.md b/docs/ru/interfaces/http_interface.md index a20ca4d844e..8f2ae4377f4 100644 --- a/docs/ru/interfaces/http_interface.md +++ b/docs/ru/interfaces/http_interface.md @@ -132,7 +132,7 @@ POST 'http://localhost:8123/?query=DROP TABLE t' Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа. -Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу compressor (sudo apt-get install compressor-metrika-yandex). +Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу clickhouse-compressor (устанавливается вместе с пакетом clickhouse-client). Если вы указали в URL compress=1, то сервер будет сжимать отправляемые вам данные. Если вы указали в URL decompress=1, то сервер будет разжимать те данные, которые вы передаёте ему POST-ом. From 80f8b2bdee6c481abb01dc9d5da4b53bb16bbc72 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 14 Mar 2018 00:52:38 +0100 Subject: [PATCH 04/90] fix word end --- docs/ru/table_functions/numbers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/table_functions/numbers.md b/docs/ru/table_functions/numbers.md index 21bf3492ca1..bd5d566f78e 100644 --- a/docs/ru/table_functions/numbers.md +++ b/docs/ru/table_functions/numbers.md @@ -1,6 +1,6 @@ # numbers -`numbers(N)` - возвращает таблицу с единственным столбцом number (тип UInt64), содержащую натуральные числа от 0 до N-1. +`numbers(N)` - возвращает таблицу с единственным столбцом number (тип UInt64), содержащим натуральные числа от 0 до N-1. Так же как и таблица `system.numbers` может использоваться для тестов и генерации последовательных значений. From 88c4081a823bee118377ad658a08b941148eef0a Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 4 Jul 2018 20:02:47 +0300 Subject: [PATCH 05/90] 3578: defaults for input streams (in progress) --- contrib/CMakeLists.txt | 2 +- dbms/programs/client/Client.cpp | 5 +- dbms/src/Core/Block.h | 1 + dbms/src/Core/BlockInfo.cpp | 16 ++++ dbms/src/Core/BlockInfo.h | 20 ++++ .../AddingDefaultBlockOutputStream.cpp | 1 + .../AddingDefaultBlockOutputStream.h | 1 - .../AddingDefaultsBlockInputStream.cpp | 91 +++++++++++++++++++ .../AddingDefaultsBlockInputStream.h | 32 +++++++ .../BlockInputStreamFromRowInputStream.cpp | 20 +++- dbms/src/Formats/IRowInputStream.h | 10 ++ .../src/Formats/JSONEachRowRowInputStream.cpp | 10 +- dbms/src/Formats/JSONEachRowRowInputStream.h | 1 + .../Interpreters/evaluateMissingDefaults.cpp | 41 +++++++-- .../Interpreters/evaluateMissingDefaults.h | 5 + 15 files changed, 242 insertions(+), 14 deletions(-) create mode 100644 dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp create mode 100644 dbms/src/DataStreams/AddingDefaultsBlockInputStream.h diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2f5e003fc2f..3021b315136 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -148,6 +148,6 @@ if (USE_INTERNAL_POCO_LIBRARY) endif () endif () -if (USE_INTERNAL_LLVM_LIBRARY) +if (ENABLE_EMBEDDED_COMPILER AND USE_INTERNAL_LLVM_LIBRARY) add_subdirectory (llvm/llvm) endif () diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index b056f82d1a7..23a9fcf2030 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -941,7 +942,9 @@ private: BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - BlockInputStreamPtr async_block_input = std::make_shared(block_input); + ColumnDefaults column_defaults; // TODO: get from server + BlockInputStreamPtr defs_block_input = std::make_shared(block_input, column_defaults, context); + BlockInputStreamPtr async_block_input = std::make_shared(defs_block_input); async_block_input->readPrefix(); diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 94a4147aac2..b7c19548963 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -35,6 +35,7 @@ private: public: BlockInfo info; + BlockDelayedDefaults delayed_defaults; Block() = default; Block(std::initializer_list il); diff --git a/dbms/src/Core/BlockInfo.cpp b/dbms/src/Core/BlockInfo.cpp index 77ef2e01007..15d7d9efa12 100644 --- a/dbms/src/Core/BlockInfo.cpp +++ b/dbms/src/Core/BlockInfo.cpp @@ -58,4 +58,20 @@ void BlockInfo::read(ReadBuffer & in) } } +void BlockDelayedDefaults::setBit(size_t column_idx, size_t row_idx) +{ + BitMask & mask = columns_defaults[column_idx]; + mask.resize(row_idx + 1); + mask[row_idx] = true; +} + +const BlockDelayedDefaults::BitMask & BlockDelayedDefaults::getColumnBitmask(size_t column_idx) const +{ + static BitMask none; + auto it = columns_defaults.find(column_idx); + if (it != columns_defaults.end()) + return it->second; + return none; +} + } diff --git a/dbms/src/Core/BlockInfo.h b/dbms/src/Core/BlockInfo.h index ebfbd117de7..00c0821a33e 100644 --- a/dbms/src/Core/BlockInfo.h +++ b/dbms/src/Core/BlockInfo.h @@ -1,5 +1,7 @@ #pragma once +#include + #include @@ -43,4 +45,22 @@ struct BlockInfo void read(ReadBuffer & in); }; +/// Block extention to support delayed defaults. +/// It's expected that it would be lots unset defaults or none. +/// NOTE It's possible to make better solution for sparse values. +class BlockDelayedDefaults +{ +public: + using BitMask = std::vector; + using MaskById = std::unordered_map; + + const BitMask & getColumnBitmask(size_t column_idx) const; + void setBit(size_t column_idx, size_t row_idx); + bool empty() const { return columns_defaults.empty(); } + size_t size() const { return columns_defaults.size(); } + +private: + MaskById columns_defaults; +}; + } diff --git a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp index a43dfab6e13..52f8c3e453a 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h index 06daf5532df..b36aaee501f 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h +++ b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp new file mode 100644 index 00000000000..82c36bfab85 --- /dev/null +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; +} + + +AddingDefaultsBlockInputStream::AddingDefaultsBlockInputStream(const BlockInputStreamPtr & input, + const ColumnDefaults & column_defaults_, + const Context & context_) + : column_defaults(column_defaults_), + context(context_) +{ + children.push_back(input); + header = input->getHeader(); +} + + +Block AddingDefaultsBlockInputStream::readImpl() +{ + Block res = children.back()->read(); + if (!res) + return res; + + if (column_defaults.empty()) + return res; + + BlockDelayedDefaults delayed_defaults = res.delayed_defaults; + if (delayed_defaults.empty()) + return res; + + Block evaluate_block{res}; + for (const auto & column : column_defaults) + evaluate_block.erase(column.first); + + evaluateMissingDefaultsUnsafe(evaluate_block, header.getNamesAndTypesList(), column_defaults, context); + + ColumnsWithTypeAndName mixed_columns; + mixed_columns.reserve(std::min(column_defaults.size(), delayed_defaults.size())); + + for (const ColumnWithTypeAndName & column_def : evaluate_block) + { + const String & column_name = column_def.name; + + if (column_defaults.count(column_name) == 0) + continue; + + size_t block_column_position = res.getPositionByName(column_name); + const ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position); + + if (column_read.column->size() != column_def.column->size()) + throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + const BlockDelayedDefaults::BitMask & mask = delayed_defaults.getColumnBitmask(block_column_position); + MutableColumnPtr column_mixed = column_read.column->cloneEmpty(); + + for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) + { + if (mask[row_idx]) + column_mixed->insertFrom(*column_def.column, row_idx); + else + column_mixed->insertFrom(*column_read.column, row_idx); + } + + ColumnWithTypeAndName mix = column_read.cloneEmpty(); + mix.column = std::move(column_mixed); + mixed_columns.emplace_back(std::move(mix)); + } + + for (auto & column : mixed_columns) + { + res.erase(column.name); + res.insert(std::move(column)); + } + + return res; +} + +} diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h new file mode 100644 index 00000000000..5caaec244da --- /dev/null +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +/// Adds defaults to columns using BlockDelayedDefaults bitmask attached to Block by child InputStream. +class AddingDefaultsBlockInputStream : public IProfilingBlockInputStream +{ +public: + AddingDefaultsBlockInputStream( + const BlockInputStreamPtr & input, + const ColumnDefaults & column_defaults_, + const Context & context_); + + String getName() const override { return "AddingDefaults"; } + Block getHeader() const override { return header; } + +protected: + Block readImpl() override; + +private: + Block header; + const ColumnDefaults column_defaults; + const Context & context; +}; + +} diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 21457a4a74f..aa4c2968539 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -16,6 +16,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_NUMBER; extern const int CANNOT_PARSE_UUID; extern const int TOO_LARGE_STRING_SIZE; + extern const int INCORRECT_NUMBER_OF_COLUMNS; } @@ -47,6 +48,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() { size_t num_columns = sample.columns(); MutableColumns columns = sample.cloneEmptyColumns(); + BlockDelayedDefaults delayed_defaults; try { @@ -55,8 +57,19 @@ Block BlockInputStreamFromRowInputStream::readImpl() try { ++total_rows; - if (!row_input->read(columns)) + RowReadExtention info; + if (!row_input->extendedRead(columns, info)) break; + + for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) + { + if (!info.read_columns[column_idx]) { + size_t column_size = columns[column_idx]->size(); + if (column_size == 0) + throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + delayed_defaults.setBit(column_idx, column_size - 1); + } + } } catch (Exception & e) { @@ -125,7 +138,10 @@ Block BlockInputStreamFromRowInputStream::readImpl() if (columns.empty() || columns[0]->empty()) return {}; - return sample.cloneWithColumns(std::move(columns)); + auto out_block = sample.cloneWithColumns(std::move(columns)); + if (!delayed_defaults.empty()) + out_block.delayed_defaults = std::move(delayed_defaults); + return out_block; } } diff --git a/dbms/src/Formats/IRowInputStream.h b/dbms/src/Formats/IRowInputStream.h index 49019740d10..1ffa205edc6 100644 --- a/dbms/src/Formats/IRowInputStream.h +++ b/dbms/src/Formats/IRowInputStream.h @@ -10,6 +10,15 @@ namespace DB { +/// A way to set some extentions to read and return extra information too. +struct RowReadExtention +{ + using BitMask = std::vector; + + /// IRowInputStream.extendedRead() output value. + /// Contains true for columns that actually read from the source and false for defaults + BitMask read_columns; +}; /** Interface of stream, that allows to read data by rows. */ @@ -20,6 +29,7 @@ public: * If no more rows - return false. */ virtual bool read(MutableColumns & columns) = 0; + virtual bool extendedRead(MutableColumns & columns, RowReadExtention & ) { return read(columns); } virtual void readPrefix() {} /// delimiter before begin of result virtual void readSuffix() {} /// delimiter after end of result diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.cpp b/dbms/src/Formats/JSONEachRowRowInputStream.cpp index c47111e047d..15c9e6f5861 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.cpp +++ b/dbms/src/Formats/JSONEachRowRowInputStream.cpp @@ -62,6 +62,12 @@ static void skipColonDelimeter(ReadBuffer & istr) bool JSONEachRowRowInputStream::read(MutableColumns & columns) +{ + RowReadExtention tmp; + return extendedRead(columns, tmp); +} + +bool JSONEachRowRowInputStream::extendedRead(MutableColumns & columns, RowReadExtention & ext) { skipWhitespaceIfAny(istr); @@ -84,8 +90,8 @@ bool JSONEachRowRowInputStream::read(MutableColumns & columns) /// Set of columns for which the values were read. The rest will be filled with default values. /// TODO Ability to provide your DEFAULTs. - bool read_columns[num_columns]; - memset(read_columns, 0, num_columns); + auto & read_columns = ext.read_columns; + read_columns.assign(num_columns, false); bool first = true; while (true) diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.h b/dbms/src/Formats/JSONEachRowRowInputStream.h index 737811be51c..dea3597de29 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.h +++ b/dbms/src/Formats/JSONEachRowRowInputStream.h @@ -23,6 +23,7 @@ public: JSONEachRowRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & format_settings); bool read(MutableColumns & columns) override; + bool extendedRead(MutableColumns & columns, RowReadExtention & ext) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 0b9bcb5417f..ad7d430c3e1 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -11,14 +11,8 @@ namespace DB { -void evaluateMissingDefaults(Block & block, - const NamesAndTypesList & required_columns, - const ColumnDefaults & column_defaults, - const Context & context) +static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & required_columns, const ColumnDefaults & column_defaults) { - if (column_defaults.empty()) - return; - ASTPtr default_expr_list = std::make_shared(); for (const auto & column : required_columns) @@ -34,6 +28,19 @@ void evaluateMissingDefaults(Block & block, setAlias(it->second.expression->clone(), it->first)); } + return default_expr_list; +} + + +void evaluateMissingDefaults(Block & block, + const NamesAndTypesList & required_columns, + const ColumnDefaults & column_defaults, + const Context & context) +{ + if (column_defaults.empty()) + return; + + ASTPtr default_expr_list = requiredExpressions(block, required_columns, column_defaults); /// nothing to evaluate if (default_expr_list->children.empty()) return; @@ -59,4 +66,24 @@ void evaluateMissingDefaults(Block & block, } } + +void evaluateMissingDefaultsUnsafe(Block & block, + const NamesAndTypesList & required_columns, + const std::unordered_map & column_defaults, + const Context & context) +{ + if (column_defaults.empty()) + return; + + ASTPtr default_expr_list = requiredExpressions(block, required_columns, column_defaults); + if (default_expr_list->children.empty()) + return; + + NamesAndTypesList available_columns; + for (size_t i = 0, size = block.columns(); i < size; ++i) + available_columns.emplace_back(block.getByPosition(i).name, block.getByPosition(i).type); + + ExpressionAnalyzer{default_expr_list, context, {}, available_columns}.getActions(true)->execute(block); +} + } diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.h b/dbms/src/Interpreters/evaluateMissingDefaults.h index a9b46d8dee5..ce0c649f3d0 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.h +++ b/dbms/src/Interpreters/evaluateMissingDefaults.h @@ -17,4 +17,9 @@ void evaluateMissingDefaults(Block & block, const std::unordered_map & column_defaults, const Context & context); +void evaluateMissingDefaultsUnsafe(Block & block, + const NamesAndTypesList & required_columns, + const std::unordered_map & column_defaults, + const Context & context); + } From e34bc2782ec28557060b001b02462bd1f163ff67 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 5 Jul 2018 14:01:25 +0300 Subject: [PATCH 06/90] 3578: get ColumnDefaults from context (it's empty now) --- dbms/programs/client/Client.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 23a9fcf2030..2a26a30c0b8 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -933,16 +933,25 @@ private: void sendDataFrom(ReadBuffer & buf, Block & sample) { String current_format = insert_format; + ColumnDefaults column_defaults; /// Data format can be specified in the INSERT query. if (ASTInsertQuery * insert = typeid_cast(&*parsed_query)) + { if (!insert->format.empty()) current_format = insert->format; + if (context.isTableExist(insert->database, insert->table)) + { + StoragePtr table = context.getTable(insert->database, insert->table); + if (table) + column_defaults = table->getColumns().defaults; + } + } + BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - ColumnDefaults column_defaults; // TODO: get from server BlockInputStreamPtr defs_block_input = std::make_shared(block_input, column_defaults, context); BlockInputStreamPtr async_block_input = std::make_shared(defs_block_input); From c26ca0232d68bda7f456e47cb6c4b014ded579c3 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 6 Jul 2018 18:49:33 +0300 Subject: [PATCH 07/90] metadata in capn proto format (in progress) --- dbms/programs/client/CMakeLists.txt | 3 +- dbms/programs/client/Client.cpp | 6 +- dbms/src/CMakeLists.txt | 3 + dbms/src/Core/Protocol.h | 16 +++-- dbms/src/Proto/CMakeLists.txt | 13 ++++ dbms/src/Proto/ServerMessage.capnp | 34 ++++++++++ dbms/src/Proto/protoHelpers.cpp | 98 +++++++++++++++++++++++++++++ dbms/src/Proto/protoHelpers.h | 11 ++++ 8 files changed, 177 insertions(+), 7 deletions(-) create mode 100644 dbms/src/Proto/CMakeLists.txt create mode 100644 dbms/src/Proto/ServerMessage.capnp create mode 100644 dbms/src/Proto/protoHelpers.cpp create mode 100644 dbms/src/Proto/protoHelpers.h diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index c7d2311b11e..0f84270c8c9 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,5 +1,6 @@ add_library (clickhouse-client-lib Client.cpp) -target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_proto + ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) if (CLICKHOUSE_SPLIT_BINARY) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 2a26a30c0b8..84d2d640d80 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -54,9 +54,9 @@ #include #include #include +#include #include - /// http://en.wikipedia.org/wiki/ANSI_escape_code /// Similar codes \e[s, \e[u don't work in VT100 and Mosh. @@ -1086,6 +1086,10 @@ private: last_exception = std::move(packet.exception); return false; + case Protocol::Server::CapnProto: + loadContext(packet.block.getColumnsWithTypeAndName()[0], context); + return receiveSampleBlock(out); + default: throw NetException("Unexpected packet from server (expected Data, got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); diff --git a/dbms/src/CMakeLists.txt b/dbms/src/CMakeLists.txt index 30bd7c134cc..25b84ec1acc 100644 --- a/dbms/src/CMakeLists.txt +++ b/dbms/src/CMakeLists.txt @@ -14,3 +14,6 @@ add_subdirectory (Client) add_subdirectory (TableFunctions) add_subdirectory (Analyzers) add_subdirectory (Formats) +if (USE_CAPNP) + add_subdirectory (Proto) +endif () diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index cd5456cca34..5006173dacb 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -69,6 +69,7 @@ namespace Protocol Totals = 7, /// A block with totals (compressed or not). Extremes = 8, /// A block with minimums and maximums (compressed or not). TablesStatusResponse = 9, /// A response to TablesStatus request. + CapnProto = 10, /// Cap'n Proto }; /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 @@ -77,8 +78,9 @@ namespace Protocol /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values inline const char * toString(UInt64 packet) { - static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", "Extremes", "TablesStatusResponse" }; - return packet < 10 + static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", + "Extremes", "TablesStatusResponse", "CapnProto" }; + return packet < 11 ? data[packet] : "Unknown packet"; } @@ -97,14 +99,18 @@ namespace Protocol Cancel = 3, /// Cancel the query execution. Ping = 4, /// Check that connection to the server is alive. TablesStatusRequest = 5, /// Check status of tables on the server. + /// + CapnProto = 10, /// Cap'n Proto }; inline const char * toString(UInt64 packet) { - static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest" }; - return packet < 6 + static const char * unknown = "Unknown packet"; + static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest", + unknown, unknown, unknown, unknown, "CapnProto" }; + return packet < 11 ? data[packet] - : "Unknown packet"; + : unknown; } } diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt new file mode 100644 index 00000000000..b3871cd53e8 --- /dev/null +++ b/dbms/src/Proto/CMakeLists.txt @@ -0,0 +1,13 @@ +set (CAPNP_PATH ${CMAKE_BINARY_DIR}/contrib/capnproto/c++/src/capnp) +set (CAPNP_BIN ${CAPNP_PATH}/capnp) + +add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CMAKE_CURRENT_BINARY_DIR}/ServerMessage.capnp + COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_PATH} ${CAPNP_BIN} compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp) + +add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) +target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) +target_include_directories (clickhouse_proto PUBLIC ${CAPNP_INCLUDE_DIR} ${DBMS_INCLUDE_DIR}) +target_include_directories (clickhouse_proto PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} ${COMMON_INCLUDE_DIR} ${DBMS_INCLUDE_DIR} ${CITYHASH_CONTRIB_INCLUDE_DIR}) diff --git a/dbms/src/Proto/ServerMessage.capnp b/dbms/src/Proto/ServerMessage.capnp new file mode 100644 index 00000000000..317430fce91 --- /dev/null +++ b/dbms/src/Proto/ServerMessage.capnp @@ -0,0 +1,34 @@ +@0xfdd1e2948338b156; + +using Cxx = import "/capnp/c++.capnp"; +$Cxx.namespace("DB::Proto"); + +struct ColumnDefault +{ + kind @0 :UInt16; + expression @1 :Text; +} + +struct Column +{ + name @0 :Text; + type @1 :Text; + default @2 :ColumnDefault; +} + +struct Table +{ + name @0 :Text; + columns @1 :List(Column); +} + +struct Database +{ + name @0 :Text; + tables @1 :List(Table); +} + +struct Context +{ + databases @0 :List(Database); +} diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp new file mode 100644 index 00000000000..9af677156b7 --- /dev/null +++ b/dbms/src/Proto/protoHelpers.cpp @@ -0,0 +1,98 @@ +#include "protoHelpers.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +namespace DB +{ + ColumnWithTypeAndName storeContext(Context & context) + { + capnp::MallocMessageBuilder message; + Proto::Context::Builder proto_context = message.initRoot(); + + Databases dbs = context.getDatabases(); + auto proto_databases = proto_context.initDatabases(dbs.size()); + + size_t db_nomber = 0; + for (auto & pr_db : dbs) + { + const String& db_name = pr_db.first; + IDatabase& db = *pr_db.second; + + auto proto_db = proto_databases[db_nomber]; + proto_db.setName(db_name); + + std::unordered_map tables; + DatabaseIteratorPtr it_tables = db.getIterator(context); + while (it_tables->isValid()) + { + tables[it_tables->name()] = it_tables->table(); + it_tables->next(); + } + + auto proto_tables = proto_db.initTables(tables.size()); + size_t table_no = 0; + for (const auto & pr_table : tables) + { + auto current_table = proto_tables[table_no]; + current_table.setName(pr_table.first); + + const ColumnsDescription & columns = pr_table.second->getColumns(); + auto proto_columns = current_table.initColumns(columns.defaults.size()); + + size_t column_no = 0; + for (const auto& pr_column : columns.defaults) + { + const String & column_name = pr_column.first; + const ColumnDefault & def = pr_column.second; + std::stringstream ss; + ss << def.expression; + + auto current_column = proto_columns[column_no]; + current_column.setName(column_name); + current_column.getDefault().setKind(static_cast(def.kind)); + current_column.getDefault().setExpression(ss.str()); + + ++column_no; + } + + ++table_no; + } + + ++db_nomber; + } + + ColumnWithTypeAndName proto_column; + proto_column.name = "context"; + proto_column.type = std::make_shared(); + MutableColumnPtr data = proto_column.type->createColumn(); + + kj::Array serialized = messageToFlatArray(message); + data->insertData(reinterpret_cast(serialized.begin()), serialized.size() * sizeof(capnp::word)); + + proto_column.column = std::move(data); + return proto_column; + } + + void loadContext(const ColumnWithTypeAndName & , Context & ) + { +#if 0 + kj::Array messageToFlatArray(MessageBuilder& builder); + + capnp::MallocMessageBuilder message; + Proto::ServerMessage::Builder serverMessage = message.initRoot(); + /// TODO +#endif + } +} diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h new file mode 100644 index 00000000000..64c99995722 --- /dev/null +++ b/dbms/src/Proto/protoHelpers.h @@ -0,0 +1,11 @@ +#pragma once + + +namespace DB +{ + class Context; + struct ColumnWithTypeAndName; + + ColumnWithTypeAndName storeContext(Context & context); + void loadContext(const ColumnWithTypeAndName & proto_column, Context & context); +} From 597778c074368b7f1759f1b157c927c156eac911 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 6 Jul 2018 22:19:06 +0300 Subject: [PATCH 08/90] metadata in capn proto (in progress) --- dbms/src/Proto/protoHelpers.cpp | 81 ++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 9af677156b7..567faaba2f0 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -4,18 +4,39 @@ #include #include #include +#include +#include #include #include #include #include -#include -#include -#include +#include namespace DB { + template + static MutableColumnPtr serializeProto(const ColumnT & column_type, capnp::MessageBuilder & message) + { + MutableColumnPtr data = column_type.createColumn(); + + kj::Array serialized = messageToFlatArray(message); + + data->insertData(reinterpret_cast(serialized.begin()), serialized.size() * sizeof(capnp::word)); + return data; + } + + template + typename T::Reader deserializeProto(const char * data, size_t data_size) + { + const capnp::word * ptr = reinterpret_cast(data); + auto serialized = kj::arrayPtr(ptr, data_size / sizeof(capnp::word)); + + capnp::FlatArrayMessageReader reader(serialized); + return reader.getRoot(); + } + ColumnWithTypeAndName storeContext(Context & context) { capnp::MallocMessageBuilder message; @@ -76,23 +97,51 @@ namespace DB ColumnWithTypeAndName proto_column; proto_column.name = "context"; proto_column.type = std::make_shared(); - MutableColumnPtr data = proto_column.type->createColumn(); - - kj::Array serialized = messageToFlatArray(message); - data->insertData(reinterpret_cast(serialized.begin()), serialized.size() * sizeof(capnp::word)); - - proto_column.column = std::move(data); + proto_column.column = std::move(serializeProto(*proto_column.type, message)); return proto_column; } - void loadContext(const ColumnWithTypeAndName & , Context & ) + void loadContext(const ColumnWithTypeAndName & proto_column, Context & context) { -#if 0 - kj::Array messageToFlatArray(MessageBuilder& builder); + StringRef plain_data = proto_column.column->getDataAt(0); + size_t data_size = proto_column.column->byteSize(); + Proto::Context::Reader proto_context = deserializeProto(plain_data.data, data_size); - capnp::MallocMessageBuilder message; - Proto::ServerMessage::Builder serverMessage = message.initRoot(); - /// TODO -#endif + // or ParserCompoundColumnDeclaration ? + ParserColumnDeclaration parser_defaults; + + for (auto proto_database : proto_context.getDatabases()) + { + String database_name = proto_database.getName().cStr(); + if (!context.isDatabaseExist(database_name)) + { + // TODO + } + + for (auto proto_table : proto_database.getTables()) + { + String table_name = proto_table.getName().cStr(); + if (!context.isTableExist(database_name, table_name)) + { + // TODO + } + + StoragePtr table = context.tryGetTable(database_name, table_name); + // TODO: throw on fail + + ColumnsDescription column_description; + for (auto column : proto_table.getColumns()) + { + String column_name = column.getName().cStr(); + String expression = column.getDefault().getExpression().cStr(); + ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); + ASTPtr ast = parseQuery(parser_defaults, expression, expression.size()); + + column_description.defaults[column_name] = ColumnDefault{expression_kind, ast}; + } + + table->setColumns(column_description); + } + } } } From 2e4c2328af07de9e229da4f498f288255d0155fd Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 9 Jul 2018 19:31:24 +0300 Subject: [PATCH 09/90] metadata in capn proto format (in progress - need apply on client) --- dbms/programs/client/Client.cpp | 2 +- dbms/programs/server/TCPHandler.cpp | 19 ++++++++++ dbms/programs/server/TCPHandler.h | 1 + dbms/src/Client/Connection.cpp | 1 + dbms/src/Proto/protoHelpers.cpp | 59 ++++++++++++++++++++--------- dbms/src/Proto/protoHelpers.h | 6 +-- 6 files changed, 67 insertions(+), 21 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 84d2d640d80..3c7f5e013ce 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1087,7 +1087,7 @@ private: return false; case Protocol::Server::CapnProto: - loadContext(packet.block.getColumnsWithTypeAndName()[0], context); + loadContextBlock(packet.block, context); return receiveSampleBlock(out); default: diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 53ca6c8699f..128dc0090bd 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -28,6 +28,8 @@ #include +#include + #include "TCPHandler.h" #include @@ -310,6 +312,10 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) */ state.io.out->writePrefix(); + /// Send query metadata (column defaults) + Block meta_block = storeContextBlock(query_context); + sendMetadata(meta_block); + /// Send block to the client - table structure. Block block = state.io.out->getHeader(); sendData(block); @@ -762,6 +768,19 @@ void TCPHandler::sendData(const Block & block) } +void TCPHandler::sendMetadata(const Block & block) +{ + initBlockOutput(block); + + writeVarUInt(Protocol::Server::CapnProto, *out); + writeStringBinary("", *out); + + state.block_out->write(block); + state.maybe_compressed_out->next(); + out->next(); +} + + void TCPHandler::sendException(const Exception & e) { writeVarUInt(Protocol::Server::Exception, *out); diff --git a/dbms/programs/server/TCPHandler.h b/dbms/programs/server/TCPHandler.h index e01987d3bbd..0a764ac9e2a 100644 --- a/dbms/programs/server/TCPHandler.h +++ b/dbms/programs/server/TCPHandler.h @@ -139,6 +139,7 @@ private: void sendHello(); void sendData(const Block & block); /// Write a block to the network. + void sendMetadata(const Block & block); void sendException(const Exception & e); void sendProgress(); void sendEndOfStream(); diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index c461b4cafde..7dce0afa0c0 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -521,6 +521,7 @@ Connection::Packet Connection::receivePacket() switch (res.type) { case Protocol::Server::Data: + case Protocol::Server::CapnProto: res.block = receiveData(); return res; diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 567faaba2f0..a95e5be8964 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -5,25 +5,30 @@ #include #include #include -#include +#include #include #include +#include #include #include #include +/// @sa https://capnproto.org/cxx.html namespace DB { - template - static MutableColumnPtr serializeProto(const ColumnT & column_type, capnp::MessageBuilder & message) + static MutableColumnPtr serializeProto(capnp::MessageBuilder & message) { - MutableColumnPtr data = column_type.createColumn(); + MutableColumnPtr data = DataTypeUInt8().createColumn(); kj::Array serialized = messageToFlatArray(message); + kj::ArrayPtr bytes = serialized.asChars(); + + data->reserve(bytes.size()); + for (size_t i = 0 ; i < bytes.size(); ++i) + data->insertData(&bytes[i], 1); - data->insertData(reinterpret_cast(serialized.begin()), serialized.size() * sizeof(capnp::word)); return data; } @@ -37,7 +42,7 @@ namespace DB return reader.getRoot(); } - ColumnWithTypeAndName storeContext(Context & context) + static ColumnWithTypeAndName storeContext(const String & column_name, Context & context) { capnp::MallocMessageBuilder message; Proto::Context::Builder proto_context = message.initRoot(); @@ -48,11 +53,14 @@ namespace DB size_t db_nomber = 0; for (auto & pr_db : dbs) { - const String& db_name = pr_db.first; - IDatabase& db = *pr_db.second; + const String & database_name = pr_db.first; + if (database_name == "system") + continue; + + IDatabase & db = *pr_db.second; auto proto_db = proto_databases[db_nomber]; - proto_db.setName(db_name); + proto_db.setName(database_name); std::unordered_map tables; DatabaseIteratorPtr it_tables = db.getIterator(context); @@ -95,24 +103,23 @@ namespace DB } ColumnWithTypeAndName proto_column; - proto_column.name = "context"; - proto_column.type = std::make_shared(); - proto_column.column = std::move(serializeProto(*proto_column.type, message)); + proto_column.name = column_name; + proto_column.type = std::make_shared(); + proto_column.column = std::move(serializeProto(message)); return proto_column; } - void loadContext(const ColumnWithTypeAndName & proto_column, Context & context) + static void loadContext(const ColumnWithTypeAndName & proto_column, Context & context) { StringRef plain_data = proto_column.column->getDataAt(0); size_t data_size = proto_column.column->byteSize(); Proto::Context::Reader proto_context = deserializeProto(plain_data.data, data_size); - // or ParserCompoundColumnDeclaration ? - ParserColumnDeclaration parser_defaults; + ParserExpressionElement parser; for (auto proto_database : proto_context.getDatabases()) { - String database_name = proto_database.getName().cStr(); + const String & database_name = proto_database.getName().cStr(); if (!context.isDatabaseExist(database_name)) { // TODO @@ -135,8 +142,8 @@ namespace DB String column_name = column.getName().cStr(); String expression = column.getDefault().getExpression().cStr(); ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); - ASTPtr ast = parseQuery(parser_defaults, expression, expression.size()); + ASTPtr ast = parseQuery(parser, expression, expression.size()); column_description.defaults[column_name] = ColumnDefault{expression_kind, ast}; } @@ -144,4 +151,22 @@ namespace DB } } } + + static constexpr const char * contextColumnName() + { + return "context"; + } + + Block storeContextBlock(Context & context) + { + Block block; + block.insert(storeContext(contextColumnName(), context)); + return block; + } + + void loadContextBlock(const Block & block, Context & context) + { + const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); + loadContext(column, context); + } } diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index 64c99995722..14512d3453c 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -4,8 +4,8 @@ namespace DB { class Context; - struct ColumnWithTypeAndName; + class Block; - ColumnWithTypeAndName storeContext(Context & context); - void loadContext(const ColumnWithTypeAndName & proto_column, Context & context); + Block storeContextBlock(Context & context); + void loadContextBlock(const Block & block, Context & context); } From bac1f77620492b5096600479972cba42257db743 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 10 Jul 2018 14:40:33 +0300 Subject: [PATCH 10/90] insert defaults (first working version) --- dbms/programs/client/Client.cpp | 28 +++++++++++----------------- dbms/src/Proto/protoHelpers.cpp | 28 +++++++++------------------- dbms/src/Proto/protoHelpers.h | 14 +++++++++++++- 3 files changed, 33 insertions(+), 37 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 3c7f5e013ce..414ba81f3c5 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -865,11 +865,12 @@ private: /// Receive description of table structure. Block sample; - if (receiveSampleBlock(sample)) + TableMetaInfo table_meta(parsed_insert_query.database, parsed_insert_query.table); + if (receiveSampleBlock(sample, table_meta)) { /// If structure was received (thus, server has not thrown an exception), /// send our data with that structure. - sendData(sample); + sendData(sample, table_meta); receivePacket(); } } @@ -907,7 +908,7 @@ private: } - void sendData(Block & sample) + void sendData(Block & sample, const TableMetaInfo & table_meta) { /// If INSERT data must be sent. const ASTInsertQuery * parsed_insert_query = typeid_cast(&*parsed_query); @@ -918,35 +919,28 @@ private: { /// Send data contained in the query. ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); - sendDataFrom(data_in, sample); + sendDataFrom(data_in, sample, table_meta); } else if (!is_interactive) { /// Send data read from stdin. - sendDataFrom(std_in, sample); + sendDataFrom(std_in, sample, table_meta); } else throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); } - void sendDataFrom(ReadBuffer & buf, Block & sample) + void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetaInfo & table_meta) { String current_format = insert_format; - ColumnDefaults column_defaults; + const ColumnDefaults & column_defaults = table_meta.column_defaults; /// Data format can be specified in the INSERT query. if (ASTInsertQuery * insert = typeid_cast(&*parsed_query)) { if (!insert->format.empty()) current_format = insert->format; - - if (context.isTableExist(insert->database, insert->table)) - { - StoragePtr table = context.getTable(insert->database, insert->table); - if (table) - column_defaults = table->getColumns().defaults; - } } BlockInputStreamPtr block_input = context.getInputFormat( @@ -1071,7 +1065,7 @@ private: /// Receive the block that serves as an example of the structure of table where data will be inserted. - bool receiveSampleBlock(Block & out) + bool receiveSampleBlock(Block & out, TableMetaInfo & table_meta) { Connection::Packet packet = connection->receivePacket(); @@ -1087,8 +1081,8 @@ private: return false; case Protocol::Server::CapnProto: - loadContextBlock(packet.block, context); - return receiveSampleBlock(out); + loadTableMetaInfo(packet.block, table_meta); + return receiveSampleBlock(out, table_meta); default: throw NetException("Unexpected packet from server (expected Data, got " diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index a95e5be8964..2ae1fda40f4 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -109,7 +109,7 @@ namespace DB return proto_column; } - static void loadContext(const ColumnWithTypeAndName & proto_column, Context & context) + static void loadTableMetaInfo(const ColumnWithTypeAndName & proto_column, TableMetaInfo & table_meta) { StringRef plain_data = proto_column.column->getDataAt(0); size_t data_size = proto_column.column->byteSize(); @@ -120,23 +120,15 @@ namespace DB for (auto proto_database : proto_context.getDatabases()) { const String & database_name = proto_database.getName().cStr(); - if (!context.isDatabaseExist(database_name)) - { - // TODO - } + if (database_name != table_meta.database) + continue; for (auto proto_table : proto_database.getTables()) { String table_name = proto_table.getName().cStr(); - if (!context.isTableExist(database_name, table_name)) - { - // TODO - } + if (table_name != table_meta.table) + continue; - StoragePtr table = context.tryGetTable(database_name, table_name); - // TODO: throw on fail - - ColumnsDescription column_description; for (auto column : proto_table.getColumns()) { String column_name = column.getName().cStr(); @@ -144,10 +136,8 @@ namespace DB ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); ASTPtr ast = parseQuery(parser, expression, expression.size()); - column_description.defaults[column_name] = ColumnDefault{expression_kind, ast}; + table_meta.column_defaults.emplace(column_name, ColumnDefault{expression_kind, ast}); } - - table->setColumns(column_description); } } } @@ -164,9 +154,9 @@ namespace DB return block; } - void loadContextBlock(const Block & block, Context & context) + void loadTableMetaInfo(const Block & block, TableMetaInfo & table_meta) { const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); - loadContext(column, context); + loadTableMetaInfo(column, table_meta); } } diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index 14512d3453c..ee3da1649a1 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -1,11 +1,23 @@ #pragma once +#include namespace DB { class Context; class Block; + struct TableMetaInfo + { + TableMetaInfo(const String & database_, const String & table_) + : database(database_), table(table_) + {} + + const String & database; + const String & table; + ColumnDefaults column_defaults; + }; + Block storeContextBlock(Context & context); - void loadContextBlock(const Block & block, Context & context); + void loadTableMetaInfo(const Block & block, TableMetaInfo & table_meta); } From 5036309d26f0423b5adeab0dd59e96d5525c2701 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 10 Jul 2018 16:33:41 +0300 Subject: [PATCH 11/90] fix const defaults --- dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 82c36bfab85..99f5f28f88d 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -69,7 +69,12 @@ Block AddingDefaultsBlockInputStream::readImpl() for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) { if (mask[row_idx]) - column_mixed->insertFrom(*column_def.column, row_idx); + { + if (column_def.column->isColumnConst()) + column_mixed->insert((*column_def.column)[row_idx]); + else + column_mixed->insertFrom(*column_def.column, row_idx); + } else column_mixed->insertFrom(*column_read.column, row_idx); } From 519102b11037759ce0b3b44cddffbc07f6516b93 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 10 Jul 2018 20:20:55 +0300 Subject: [PATCH 12/90] defaults for http inserts --- dbms/programs/client/Client.cpp | 11 +++++---- .../InputStreamFromASTInsertQuery.cpp | 10 ++++++-- dbms/src/Proto/protoHelpers.cpp | 7 +++--- dbms/src/Proto/protoHelpers.h | 16 ++----------- dbms/src/Storages/TableMetadata.cpp | 14 +++++++++++ dbms/src/Storages/TableMetadata.h | 23 +++++++++++++++++++ 6 files changed, 57 insertions(+), 24 deletions(-) create mode 100644 dbms/src/Storages/TableMetadata.cpp create mode 100644 dbms/src/Storages/TableMetadata.h diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 414ba81f3c5..da18cbad6e1 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -55,6 +55,7 @@ #include #include #include +#include #include /// http://en.wikipedia.org/wiki/ANSI_escape_code @@ -865,7 +866,7 @@ private: /// Receive description of table structure. Block sample; - TableMetaInfo table_meta(parsed_insert_query.database, parsed_insert_query.table); + TableMetadata table_meta(parsed_insert_query.database, parsed_insert_query.table); if (receiveSampleBlock(sample, table_meta)) { /// If structure was received (thus, server has not thrown an exception), @@ -908,7 +909,7 @@ private: } - void sendData(Block & sample, const TableMetaInfo & table_meta) + void sendData(Block & sample, const TableMetadata & table_meta) { /// If INSERT data must be sent. const ASTInsertQuery * parsed_insert_query = typeid_cast(&*parsed_query); @@ -931,7 +932,7 @@ private: } - void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetaInfo & table_meta) + void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetadata & table_meta) { String current_format = insert_format; const ColumnDefaults & column_defaults = table_meta.column_defaults; @@ -1065,7 +1066,7 @@ private: /// Receive the block that serves as an example of the structure of table where data will be inserted. - bool receiveSampleBlock(Block & out, TableMetaInfo & table_meta) + bool receiveSampleBlock(Block & out, TableMetadata & table_meta) { Connection::Packet packet = connection->receivePacket(); @@ -1081,7 +1082,7 @@ private: return false; case Protocol::Server::CapnProto: - loadTableMetaInfo(packet.block, table_meta); + loadTableMetadata(packet.block, table_meta); return receiveSampleBlock(out, table_meta); default: diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 0e4f876925d..163fee44ea5 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -4,7 +4,8 @@ #include #include #include - +#include +#include namespace DB { @@ -43,7 +44,12 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( input_buffer_contacenated = std::make_unique(buffers); - res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); + TableMetadata table_meta(ast_insert_query->database, ast_insert_query->table); + table_meta.loadFromContext(context); + + BlockInputStreamPtr block_input = + context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); + res_stream = std::make_shared(block_input, table_meta.column_defaults, context); } } diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 2ae1fda40f4..5121529c207 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -109,7 +110,7 @@ namespace DB return proto_column; } - static void loadTableMetaInfo(const ColumnWithTypeAndName & proto_column, TableMetaInfo & table_meta) + static void loadTableMetadata(const ColumnWithTypeAndName & proto_column, TableMetadata & table_meta) { StringRef plain_data = proto_column.column->getDataAt(0); size_t data_size = proto_column.column->byteSize(); @@ -154,9 +155,9 @@ namespace DB return block; } - void loadTableMetaInfo(const Block & block, TableMetaInfo & table_meta) + void loadTableMetadata(const Block & block, TableMetadata & table_meta) { const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); - loadTableMetaInfo(column, table_meta); + loadTableMetadata(column, table_meta); } } diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index ee3da1649a1..c0a514a5bb1 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -1,23 +1,11 @@ #pragma once -#include - namespace DB { class Context; class Block; - - struct TableMetaInfo - { - TableMetaInfo(const String & database_, const String & table_) - : database(database_), table(table_) - {} - - const String & database; - const String & table; - ColumnDefaults column_defaults; - }; + class TableMetadata; Block storeContextBlock(Context & context); - void loadTableMetaInfo(const Block & block, TableMetaInfo & table_meta); + void loadTableMetadata(const Block & block, TableMetadata & table_meta); } diff --git a/dbms/src/Storages/TableMetadata.cpp b/dbms/src/Storages/TableMetadata.cpp new file mode 100644 index 00000000000..e07c6a31e97 --- /dev/null +++ b/dbms/src/Storages/TableMetadata.cpp @@ -0,0 +1,14 @@ +#include +#include +#include "TableMetadata.h" + + +namespace DB +{ + void TableMetadata::loadFromContext(const Context & context) + { + StoragePtr storage = context.getTable(database, table); + const ColumnsDescription & table_columns = storage->getColumns(); + column_defaults = table_columns.defaults; + } +} diff --git a/dbms/src/Storages/TableMetadata.h b/dbms/src/Storages/TableMetadata.h new file mode 100644 index 00000000000..a88808ed96a --- /dev/null +++ b/dbms/src/Storages/TableMetadata.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + class Context; + class Block; + + /// Addition information for query that could not be get from sample block + struct TableMetadata + { + TableMetadata(const String & database_, const String & table_) + : database(database_), table(table_) + {} + + const String & database; + const String & table; + ColumnDefaults column_defaults; + + void loadFromContext(const Context & context); + }; +} From fe1b393e55636aac2e8743560281711854599f5a Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 11 Jul 2018 15:05:04 +0300 Subject: [PATCH 13/90] defaults for storages with formated input --- dbms/programs/client/Client.cpp | 5 +++-- dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp | 8 +++++--- dbms/src/Storages/StorageFile.cpp | 7 ++++++- dbms/src/Storages/StorageKafka.cpp | 5 +++++ dbms/src/Storages/StorageURL.cpp | 10 ++++++++-- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index da18cbad6e1..af16a4998fb 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -947,8 +947,9 @@ private: BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - BlockInputStreamPtr defs_block_input = std::make_shared(block_input, column_defaults, context); - BlockInputStreamPtr async_block_input = std::make_shared(defs_block_input); + if (!column_defaults.empty()) + block_input = std::make_shared(block_input, column_defaults, context); + BlockInputStreamPtr async_block_input = std::make_shared(block_input); async_block_input->readPrefix(); diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 163fee44ea5..945fc782327 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -47,9 +47,11 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( TableMetadata table_meta(ast_insert_query->database, ast_insert_query->table); table_meta.loadFromContext(context); - BlockInputStreamPtr block_input = - context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); - res_stream = std::make_shared(block_input, table_meta.column_defaults, context); + res_stream = context.getInputFormat( + format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); + + if (!table_meta.column_defaults.empty()) + res_stream = std::make_shared(res_stream, table_meta.column_defaults, context); } } diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index c63e3a263ed..f24686cfea3 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -194,7 +195,11 @@ BlockInputStreams StorageFile::read( size_t max_block_size, unsigned /*num_streams*/) { - return BlockInputStreams(1, std::make_shared(*this, context, max_block_size)); + BlockInputStreamPtr block_input = std::make_shared(*this, context, max_block_size); + const ColumnsDescription & columns = getColumns(); + if (columns.defaults.empty()) + return {block_input}; + return {std::make_shared(block_input, columns.defaults, context)}; } diff --git a/dbms/src/Storages/StorageKafka.cpp b/dbms/src/Storages/StorageKafka.cpp index a9666bab22c..9afbbfd656f 100644 --- a/dbms/src/Storages/StorageKafka.cpp +++ b/dbms/src/Storages/StorageKafka.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -145,6 +146,10 @@ public: LOG_TRACE(storage.log, "Creating formatted reader"); read_buf = std::make_unique(consumer->stream, storage.log); reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); + + const ColumnsDescription & columns = getColumns(); + if (!columns.defaults.empty()) + reader = std::make_shared(reader, columns.defaults, context); } ~KafkaBlockInputStream() override diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 1c3b8246492..fa8c30c24a6 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -13,6 +13,7 @@ #include #include +#include #include @@ -135,14 +136,19 @@ BlockInputStreams StorageURL::read( size_t max_block_size, unsigned /*num_streams*/) { - return {std::make_shared( + BlockInputStreamPtr block_input = std::make_shared( uri, format_name, getName(), getSampleBlock(), context, max_block_size, - ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef()))}; + ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef())); + + const ColumnsDescription & columns = getColumns(); + if (columns.defaults.empty()) + return {block_input}; + return {std::make_shared(block_input, columns.defaults, context)}; } void StorageURL::rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & /*new_table_name*/) {} From 82d22574d40370e6c1c54ed682e8abd2f6403c92 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 11 Jul 2018 17:11:47 +0300 Subject: [PATCH 14/90] fix empty defaults mask issue and add some comments --- dbms/src/Core/Block.h | 2 + dbms/src/Core/BlockInfo.cpp | 6 +-- dbms/src/Core/BlockInfo.h | 16 ++++---- .../AddingDefaultsBlockInputStream.cpp | 37 ++++++++++--------- dbms/src/Formats/IRowInputStream.h | 6 +-- 5 files changed, 37 insertions(+), 30 deletions(-) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index b7c19548963..0db8954247e 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -35,6 +35,8 @@ private: public: BlockInfo info; + /// Input stream could use delayed_defaults to add addition info at which rows it have inserted default values. + /// Such values would be replaced later by column defaults in AddingDefaultsBlockInputStream (if any). BlockDelayedDefaults delayed_defaults; Block() = default; diff --git a/dbms/src/Core/BlockInfo.cpp b/dbms/src/Core/BlockInfo.cpp index 15d7d9efa12..f214d2782e3 100644 --- a/dbms/src/Core/BlockInfo.cpp +++ b/dbms/src/Core/BlockInfo.cpp @@ -60,14 +60,14 @@ void BlockInfo::read(ReadBuffer & in) void BlockDelayedDefaults::setBit(size_t column_idx, size_t row_idx) { - BitMask & mask = columns_defaults[column_idx]; + RowsBitMask & mask = columns_defaults[column_idx]; mask.resize(row_idx + 1); mask[row_idx] = true; } -const BlockDelayedDefaults::BitMask & BlockDelayedDefaults::getColumnBitmask(size_t column_idx) const +const BlockDelayedDefaults::RowsBitMask & BlockDelayedDefaults::getDefaultsBitmask(size_t column_idx) const { - static BitMask none; + static RowsBitMask none; auto it = columns_defaults.find(column_idx); if (it != columns_defaults.end()) return it->second; diff --git a/dbms/src/Core/BlockInfo.h b/dbms/src/Core/BlockInfo.h index 00c0821a33e..abc5383ddcb 100644 --- a/dbms/src/Core/BlockInfo.h +++ b/dbms/src/Core/BlockInfo.h @@ -45,22 +45,24 @@ struct BlockInfo void read(ReadBuffer & in); }; -/// Block extention to support delayed defaults. -/// It's expected that it would be lots unset defaults or none. -/// NOTE It's possible to make better solution for sparse values. +/// Block extention to support delayed defaults. Used in AddingDefaultsBlockInputStream to replace type defauls set by RowInputStream +/// with column defaults. class BlockDelayedDefaults { public: - using BitMask = std::vector; - using MaskById = std::unordered_map; + using RowsBitMask = std::vector; /// a bit per row for a column - const BitMask & getColumnBitmask(size_t column_idx) const; + const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; void setBit(size_t column_idx, size_t row_idx); bool empty() const { return columns_defaults.empty(); } size_t size() const { return columns_defaults.size(); } private: - MaskById columns_defaults; + using RowsMaskByColumnId = std::unordered_map; + + /// If columns_defaults[column_id][row_id] is true related value in Block should be replaced with column default. + /// It could contain less columns and rows then related block. + RowsMaskByColumnId columns_defaults; }; } diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 99f5f28f88d..ffde573a24f 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -63,25 +63,28 @@ Block AddingDefaultsBlockInputStream::readImpl() if (column_read.column->size() != column_def.column->size()) throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - const BlockDelayedDefaults::BitMask & mask = delayed_defaults.getColumnBitmask(block_column_position); - MutableColumnPtr column_mixed = column_read.column->cloneEmpty(); - - for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) + const auto & defaults_mask = delayed_defaults.getDefaultsBitmask(block_column_position); + if (!defaults_mask.empty()) { - if (mask[row_idx]) - { - if (column_def.column->isColumnConst()) - column_mixed->insert((*column_def.column)[row_idx]); - else - column_mixed->insertFrom(*column_def.column, row_idx); - } - else - column_mixed->insertFrom(*column_read.column, row_idx); - } + MutableColumnPtr column_mixed = column_read.column->cloneEmpty(); - ColumnWithTypeAndName mix = column_read.cloneEmpty(); - mix.column = std::move(column_mixed); - mixed_columns.emplace_back(std::move(mix)); + for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) + { + if (defaults_mask[row_idx]) + { + if (column_def.column->isColumnConst()) + column_mixed->insert((*column_def.column)[row_idx]); + else + column_mixed->insertFrom(*column_def.column, row_idx); + } + else + column_mixed->insertFrom(*column_read.column, row_idx); + } + + ColumnWithTypeAndName mix = column_read.cloneEmpty(); + mix.column = std::move(column_mixed); + mixed_columns.emplace_back(std::move(mix)); + } } for (auto & column : mixed_columns) diff --git a/dbms/src/Formats/IRowInputStream.h b/dbms/src/Formats/IRowInputStream.h index 1ffa205edc6..8b3e7043f3b 100644 --- a/dbms/src/Formats/IRowInputStream.h +++ b/dbms/src/Formats/IRowInputStream.h @@ -10,13 +10,13 @@ namespace DB { -/// A way to set some extentions to read and return extra information too. +/// A way to set some extentions to read and return extra information too. IRowInputStream.extendedRead() output. struct RowReadExtention { using BitMask = std::vector; - /// IRowInputStream.extendedRead() output value. - /// Contains true for columns that actually read from the source and false for defaults + /// Contains one bit per column in resently read row. IRowInputStream could leave it empty, or partialy set. + /// It should contain true for columns that actually read from the source and false for defaults. BitMask read_columns; }; From a765aef4dbdc3ea3a362b25161d82584301c61fe Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 11 Jul 2018 19:24:29 +0300 Subject: [PATCH 15/90] fix KafkaStorage build and add sql test for defaults --- dbms/src/Storages/StorageKafka.cpp | 2 +- .../00651_insert_json_with_defaults.reference | 7 +++++++ .../00651_insert_json_with_defaults.sql | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference create mode 100644 dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql diff --git a/dbms/src/Storages/StorageKafka.cpp b/dbms/src/Storages/StorageKafka.cpp index 9afbbfd656f..43966aec22a 100644 --- a/dbms/src/Storages/StorageKafka.cpp +++ b/dbms/src/Storages/StorageKafka.cpp @@ -147,7 +147,7 @@ public: read_buf = std::make_unique(consumer->stream, storage.log); reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); - const ColumnsDescription & columns = getColumns(); + const ColumnsDescription & columns = storage.getColumns(); if (!columns.defaults.empty()) reader = std::make_shared(reader, columns.defaults, context); } diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference new file mode 100644 index 00000000000..f513c6d6f40 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference @@ -0,0 +1,7 @@ +0 0 6 6 6 +0 5 5 1.7917595 5 +1 1 2 1.0986123 42 +1 1 2 1.0986123 42 +2 2 4 1.609438 2 +3 3 3 3 3 +4 0 4 1.609438 42 diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql new file mode 100644 index 00000000000..d25ecdc1f5a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql @@ -0,0 +1,19 @@ +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + x UInt32, + y UInt32, + a UInt32 DEFAULT x + y, + b Float32 DEFAULT log(1 + x + y), + c UInt32 DEFAULT 42 +) ENGINE = Memory; + +INSERT INTO test.defaults FORMAT JSONEachRow {"x":1, "y":1}; +INSERT INTO test.defaults (x, y) SELECT x, y FROM test.defaults LIMIT 1; +INSERT INTO test.defaults FORMAT JSONEachRow {"x":2, "y":2, "c":2}; +INSERT INTO test.defaults FORMAT JSONEachRow {"x":3, "y":3, "a":3, "b":3, "c":3}; +INSERT INTO test.defaults FORMAT JSONEachRow {"x":4} {"y":5, "c":5} {"a":6, "b":6, "c":6}; + +SELECT * FROM test.defaults ORDER BY (x, y); +DROP TABLE IF EXISTS test.defaults; From 3c39f2fc691865459aba046cc4515f3532047384 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 11 Jul 2018 21:34:12 +0300 Subject: [PATCH 16/90] fix some tests --- dbms/src/DataStreams/RemoteBlockOutputStream.cpp | 6 +++++- dbms/src/DataStreams/RemoteBlockOutputStream.h | 1 + .../00651_insert_json_with_defaults.reference | 14 +++++++------- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp index d9095ec91b9..7d0c17407c7 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp @@ -33,13 +33,17 @@ RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, const if (!header) throw Exception("Logical error: empty block received as table structure", ErrorCodes::LOGICAL_ERROR); } + else if (Protocol::Server::CapnProto == packet.type) + { + metadata = packet.block; + } else if (Protocol::Server::Exception == packet.type) { packet.exception->rethrow(); return; } else - throw NetException("Unexpected packet from server (expected Data or Exception, got " + throw NetException("Unexpected packet from server (expected Data, CapnProto or Exception, got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); } diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.h b/dbms/src/DataStreams/RemoteBlockOutputStream.h index 41740c39837..e0269a1ea4d 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.h +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.h @@ -35,6 +35,7 @@ private: String query; const Settings * settings; Block header; + Block metadata; bool finished = false; }; diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference index f513c6d6f40..9d95fa08690 100644 --- a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference @@ -1,7 +1,7 @@ -0 0 6 6 6 -0 5 5 1.7917595 5 -1 1 2 1.0986123 42 -1 1 2 1.0986123 42 -2 2 4 1.609438 2 -3 3 3 3 3 -4 0 4 1.609438 42 +0 0 6 6 6 +0 5 5 1.7917595 5 +1 1 2 1.0986123 42 +1 1 2 1.0986123 42 +2 2 4 1.609438 2 +3 3 3 3 3 +4 0 4 1.609438 42 From 2876aadba77cd91818fdbcc6f0679533fed52a05 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 12 Jul 2018 11:49:20 +0300 Subject: [PATCH 17/90] fix materialized --- dbms/src/Proto/protoHelpers.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 5121529c207..c59516d42c2 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -136,8 +136,11 @@ namespace DB String expression = column.getDefault().getExpression().cStr(); ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); - ASTPtr ast = parseQuery(parser, expression, expression.size()); - table_meta.column_defaults.emplace(column_name, ColumnDefault{expression_kind, ast}); + if (expression_kind == ColumnDefaultKind::Default) + { + ASTPtr ast = parseQuery(parser, expression, expression.size()); + table_meta.column_defaults.emplace(column_name, ColumnDefault{expression_kind, ast}); + } } } } From a7fcae2759d2285b7feece194f66270231367c38 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 12 Jul 2018 13:03:49 +0300 Subject: [PATCH 18/90] fix defaults with list expressions --- dbms/src/Proto/protoHelpers.cpp | 4 ++-- .../queries/0_stateless/00651_insert_json_with_defaults.sql | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index c59516d42c2..329cd40b5f7 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -116,7 +116,7 @@ namespace DB size_t data_size = proto_column.column->byteSize(); Proto::Context::Reader proto_context = deserializeProto(plain_data.data, data_size); - ParserExpressionElement parser; + ParserTernaryOperatorExpression parser; for (auto proto_database : proto_context.getDatabases()) { diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql index d25ecdc1f5a..270778d9b49 100644 --- a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql @@ -6,7 +6,10 @@ CREATE TABLE IF NOT EXISTS test.defaults y UInt32, a UInt32 DEFAULT x + y, b Float32 DEFAULT log(1 + x + y), - c UInt32 DEFAULT 42 + c UInt32 DEFAULT 42, + d DEFAULT x + y, + e MATERIALIZED x + y, + f ALIAS x + y ) ENGINE = Memory; INSERT INTO test.defaults FORMAT JSONEachRow {"x":1, "y":1}; From f89e476c0f540fb17a89e861053114f7dcd43b35 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 12 Jul 2018 17:33:57 +0300 Subject: [PATCH 19/90] add option to disable sending metadata --- dbms/programs/server/TCPHandler.cpp | 8 ++++++-- dbms/src/Interpreters/Settings.h | 1 + dbms/src/Storages/StorageDistributed.cpp | 3 +++ .../0_stateless/00651_insert_json_with_defaults.sql | 3 +-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 128dc0090bd..9f8800ec1fe 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -313,8 +313,12 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) state.io.out->writePrefix(); /// Send query metadata (column defaults) - Block meta_block = storeContextBlock(query_context); - sendMetadata(meta_block); + if (global_settings.insert_sample_with_metadata && + query_context.getSettingsRef().insert_sample_with_metadata) + { + Block meta_block = storeContextBlock(query_context); + sendMetadata(meta_block); + } /// Send block to the client - table structure. Block block = state.io.out->getHeader(); diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index c4d0d7654e6..8165cc776d9 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -123,6 +123,7 @@ struct Settings M(SettingUInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.") \ \ M(SettingBool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be preformed") \ + M(SettingBool, insert_sample_with_metadata, true, "For INSERT queries, specifies that need add metadata before sample block") \ \ M(SettingUInt64, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled.") \ M(SettingMilliseconds, insert_quorum_timeout, 600000, "") \ diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index a125806515c..49b20acf9eb 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -165,6 +165,9 @@ StorageDistributed::StorageDistributed( if (num_local_shards && remote_database == database_name && remote_table == table_name) throw Exception("Distributed table " + table_name + " looks at itself", ErrorCodes::INFINITE_LOOP); } + + /// HACK: disable metadata for StorageDistributed queries + const_cast(context).getSettingsRef().insert_sample_with_metadata = false; } diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql index 270778d9b49..12b9e4538d1 100644 --- a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql @@ -4,10 +4,9 @@ CREATE TABLE IF NOT EXISTS test.defaults ( x UInt32, y UInt32, - a UInt32 DEFAULT x + y, + a DEFAULT x + y, b Float32 DEFAULT log(1 + x + y), c UInt32 DEFAULT 42, - d DEFAULT x + y, e MATERIALIZED x + y, f ALIAS x + y ) ENGINE = Memory; From c78a67d57316a39ec40116558d00ce5e0b93fb6b Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 01:05:03 +0300 Subject: [PATCH 20/90] fix case with unexpected aliases --- dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index ffde573a24f..7cc90f6a641 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -43,7 +43,11 @@ Block AddingDefaultsBlockInputStream::readImpl() Block evaluate_block{res}; for (const auto & column : column_defaults) - evaluate_block.erase(column.first); + { + /// column_defaults contain aliases that could be ommited in evaluate_block + if (evaluate_block.has(column.first)) + evaluate_block.erase(column.first); + } evaluateMissingDefaultsUnsafe(evaluate_block, header.getNamesAndTypesList(), column_defaults, context); From 4e0d9aa8d44e92f744925797c29e9df2a94f11b8 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 15:46:29 +0300 Subject: [PATCH 21/90] revert unneeded changes --- dbms/programs/client/Client.cpp | 4 +--- dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp | 1 - dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp | 5 ++--- dbms/src/DataStreams/RemoteBlockOutputStream.cpp | 6 +----- dbms/src/DataStreams/RemoteBlockOutputStream.h | 1 - 5 files changed, 4 insertions(+), 13 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 5dddf053482..5ee25d38c33 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -936,18 +936,16 @@ private: void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetadata & table_meta) { String current_format = insert_format; - const ColumnDefaults & column_defaults = table_meta.column_defaults; /// Data format can be specified in the INSERT query. if (ASTInsertQuery * insert = typeid_cast(&*parsed_query)) - { if (!insert->format.empty()) current_format = insert->format; - } BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); + const ColumnDefaults & column_defaults = table_meta.column_defaults; if (!column_defaults.empty()) block_input = std::make_shared(block_input, column_defaults, context); BlockInputStreamPtr async_block_input = std::make_shared(block_input); diff --git a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp index 7a1ab14e7fc..fe773b40776 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp @@ -6,7 +6,6 @@ #include #include #include -#include namespace DB diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 945fc782327..855840a15ee 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -44,12 +44,11 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( input_buffer_contacenated = std::make_unique(buffers); + res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); + TableMetadata table_meta(ast_insert_query->database, ast_insert_query->table); table_meta.loadFromContext(context); - res_stream = context.getInputFormat( - format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); - if (!table_meta.column_defaults.empty()) res_stream = std::make_shared(res_stream, table_meta.column_defaults, context); } diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp index 7d0c17407c7..d9095ec91b9 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp @@ -33,17 +33,13 @@ RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, const if (!header) throw Exception("Logical error: empty block received as table structure", ErrorCodes::LOGICAL_ERROR); } - else if (Protocol::Server::CapnProto == packet.type) - { - metadata = packet.block; - } else if (Protocol::Server::Exception == packet.type) { packet.exception->rethrow(); return; } else - throw NetException("Unexpected packet from server (expected Data, CapnProto or Exception, got " + throw NetException("Unexpected packet from server (expected Data or Exception, got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); } diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.h b/dbms/src/DataStreams/RemoteBlockOutputStream.h index e0269a1ea4d..41740c39837 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.h +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.h @@ -35,7 +35,6 @@ private: String query; const Settings * settings; Block header; - Block metadata; bool finished = false; }; From 2c4949dd5d5b735574dac9f32843a341737e8920 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 16:47:13 +0300 Subject: [PATCH 22/90] fix build without CAPNP --- cmake/find_capnp.cmake | 4 ++++ dbms/programs/client/CMakeLists.txt | 2 +- dbms/programs/client/Client.cpp | 2 ++ dbms/programs/server/TCPHandler.cpp | 2 ++ dbms/src/Proto/CMakeLists.txt | 1 + dbms/src/Proto/protoHelpers.h | 3 +++ 6 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index 03ecadda6a1..426031db15e 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -1,5 +1,9 @@ option (ENABLE_CAPNP "Enable Cap'n Proto" ${NOT_MSVC}) +unset (USE_CAPNP CACHE) +unset (USE_INTERNAL_CAPNP_LIBRARY CACHE) +unset (MISSING_INTERNAL_CAPNP_LIBRARY CACHE) + if (ENABLE_CAPNP) # cmake 3.5.1 bug: # capnproto uses this cmake feature: diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index 0f84270c8c9..659e8018a49 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,5 +1,5 @@ add_library (clickhouse-client-lib Client.cpp) -target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_proto +target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions ${PROTO_LIB} ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 5ee25d38c33..74eff658133 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1082,7 +1082,9 @@ private: return false; case Protocol::Server::CapnProto: +#if USE_CAPNP loadTableMetadata(packet.block, table_meta); +#endif return receiveSampleBlock(out, table_meta); default: diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 9f8800ec1fe..c5800aa11df 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -312,6 +312,7 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) */ state.io.out->writePrefix(); +#if USE_CAPNP /// Send query metadata (column defaults) if (global_settings.insert_sample_with_metadata && query_context.getSettingsRef().insert_sample_with_metadata) @@ -319,6 +320,7 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) Block meta_block = storeContextBlock(query_context); sendMetadata(meta_block); } +#endif /// Send block to the client - table structure. Block block = state.io.out->getHeader(); diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt index b3871cd53e8..4ca555914f7 100644 --- a/dbms/src/Proto/CMakeLists.txt +++ b/dbms/src/Proto/CMakeLists.txt @@ -6,6 +6,7 @@ add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_PATH} ${CAPNP_BIN} compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp) +set (PROTO_LIB clickhouse_proto) add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) target_include_directories (clickhouse_proto PUBLIC ${CAPNP_INCLUDE_DIR} ${DBMS_INCLUDE_DIR}) diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index c0a514a5bb1..88e3c299fa2 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -1,4 +1,5 @@ #pragma once +#if USE_CAPNP namespace DB { @@ -9,3 +10,5 @@ namespace DB Block storeContextBlock(Context & context); void loadTableMetadata(const Block & block, TableMetadata & table_meta); } + +#endif From bc5d5bcf4a2a79e920b8aa238e2ada77a8af488e Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 18:10:42 +0300 Subject: [PATCH 23/90] fix build with CAPNP --- dbms/CMakeLists.txt | 1 + dbms/src/Proto/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index eaf21b0b6ac..3c7a7ac7198 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -208,6 +208,7 @@ if (USE_CAPNP) if (NOT USE_INTERNAL_CAPNP_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${CAPNP_INCLUDE_DIR}) endif () + set (PROTO_LIB clickhouse_proto) endif () if (USE_RDKAFKA) diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt index 4ca555914f7..b3871cd53e8 100644 --- a/dbms/src/Proto/CMakeLists.txt +++ b/dbms/src/Proto/CMakeLists.txt @@ -6,7 +6,6 @@ add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_PATH} ${CAPNP_BIN} compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp) -set (PROTO_LIB clickhouse_proto) add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) target_include_directories (clickhouse_proto PUBLIC ${CAPNP_INCLUDE_DIR} ${DBMS_INCLUDE_DIR}) From 1d7f6c32a06bf7dc042486ed92d50ed880a7bf88 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 18:32:20 +0300 Subject: [PATCH 24/90] one more build fix --- libs/libcommon/include/common/config_common.h.in | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index 0cc0950efba..bc24a0c6fe2 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -4,6 +4,7 @@ #cmakedefine01 USE_TCMALLOC #cmakedefine01 USE_JEMALLOC +#cmakedefine01 USE_CAPNP #cmakedefine01 USE_READLINE #cmakedefine01 USE_LIBEDIT #cmakedefine01 HAVE_READLINE_HISTORY From 53f1f4794dd440fcc842c0bc845460db732709da Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 21:06:47 +0300 Subject: [PATCH 25/90] fix inserts into unknown table, ex. 'table function url(...)' --- dbms/src/Storages/TableMetadata.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/Storages/TableMetadata.cpp b/dbms/src/Storages/TableMetadata.cpp index e07c6a31e97..a489e5ba92e 100644 --- a/dbms/src/Storages/TableMetadata.cpp +++ b/dbms/src/Storages/TableMetadata.cpp @@ -7,6 +7,9 @@ namespace DB { void TableMetadata::loadFromContext(const Context & context) { + if (!context.isTableExist(database, table)) + return; + StoragePtr storage = context.getTable(database, table); const ColumnsDescription & table_columns = storage->getColumns(); column_defaults = table_columns.defaults; From 925e4c7dbb43845c4fad955f30bb01d2d2bde930 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 16 Jul 2018 14:28:22 +0300 Subject: [PATCH 26/90] backward compatibility --- dbms/cmake/version.cmake | 4 ++-- dbms/programs/server/TCPHandler.cpp | 3 ++- dbms/src/Core/Defines.h | 1 + dbms/src/Proto/protoHelpers.cpp | 8 ++++++-- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index bba2600d441..9d5cf8d5bf4 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,6 +1,6 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54394-testing) -set(VERSION_REVISION 54394) +set(VERSION_DESCRIBE v1.1.54400-testing) +set(VERSION_REVISION 54400) set(VERSION_GITHASH 875ea0f4eaa3592f1fe628b6a1150d91b04ad574) # end of autochange diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index c5800aa11df..e85c08a5574 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -314,7 +314,8 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) #if USE_CAPNP /// Send query metadata (column defaults) - if (global_settings.insert_sample_with_metadata && + if (client_revision >= DBMS_MIN_REVISION_WITH_PROTO_METADATA && + global_settings.insert_sample_with_metadata && query_context.getSettingsRef().insert_sample_with_metadata) { Block meta_block = storeContextBlock(query_context); diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index 2ed07dce9b6..35198c2cb4f 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -49,6 +49,7 @@ #define DBMS_MIN_REVISION_WITH_TABLES_STATUS 54226 #define DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE 54337 #define DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME 54372 +#define DBMS_MIN_REVISION_WITH_PROTO_METADATA 54400 /// Version of ClickHouse TCP protocol. Set to git tag with latest protocol change. #define DBMS_TCP_PROTOCOL_VERSION 54226 diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 329cd40b5f7..0d451625ea6 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -160,7 +160,11 @@ namespace DB void loadTableMetadata(const Block & block, TableMetadata & table_meta) { - const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); - loadTableMetadata(column, table_meta); + /// select metadata type by column name + if (block.has(contextColumnName())) + { + const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); + loadTableMetadata(column, table_meta); + } } } From 2cddb4b840553d439d0a394637fcfa5fabf3fd79 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 16 Jul 2018 17:52:02 +0300 Subject: [PATCH 27/90] send metadata only for affected table --- dbms/programs/server/TCPHandler.cpp | 12 +- dbms/src/Interpreters/Context.cpp | 15 +++ dbms/src/Interpreters/Context.h | 3 + .../Interpreters/InterpreterInsertQuery.cpp | 7 ++ .../src/Interpreters/InterpreterInsertQuery.h | 2 + dbms/src/Interpreters/executeQuery.cpp | 8 ++ dbms/src/Proto/protoHelpers.cpp | 108 +++++++----------- dbms/src/Proto/protoHelpers.h | 2 +- dbms/src/Storages/TableMetadata.cpp | 5 +- dbms/src/Storages/TableMetadata.h | 3 +- 10 files changed, 91 insertions(+), 74 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index e85c08a5574..096eea8512c 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -25,6 +25,7 @@ #include #include +#include #include @@ -313,13 +314,16 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) state.io.out->writePrefix(); #if USE_CAPNP - /// Send query metadata (column defaults) + /// Send table metadata (column defaults) if (client_revision >= DBMS_MIN_REVISION_WITH_PROTO_METADATA && - global_settings.insert_sample_with_metadata && query_context.getSettingsRef().insert_sample_with_metadata) { - Block meta_block = storeContextBlock(query_context); - sendMetadata(meta_block); + TableMetadata table_meta(query_context.getCurrentDatabase(), query_context.getCurrentTable()); + if (table_meta.loadFromContext(query_context) && table_meta.hasDefaults()) + { + Block meta_block = storeTableMetadata(table_meta); + sendMetadata(meta_block); + } } #endif diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 9fed370cfbc..4b48b6e233c 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1003,6 +1003,12 @@ String Context::getCurrentDatabase() const } +String Context::getCurrentTable() const +{ + return current_table; +} + + String Context::getCurrentQueryId() const { return client_info.current_query_id; @@ -1017,6 +1023,15 @@ void Context::setCurrentDatabase(const String & name) } +void Context::setCurrentTable(const String & database, const String & table) +{ + auto lock = getLock(); + assertTableExists(database, table); + current_database = database; + current_table = table; +} + + void Context::setCurrentQueryId(const String & query_id) { if (!client_info.current_query_id.empty()) diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 1c867d65e8f..4ab675dfaf2 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -104,6 +104,7 @@ private: std::shared_ptr quota; /// Current quota. By default - empty quota, that have no limits. String current_database; + String current_table; Settings settings; /// Setting for query execution. using ProgressCallback = std::function; ProgressCallback progress_callback; /// Callback for tracking progress of query execution. @@ -211,8 +212,10 @@ public: std::unique_ptr getDDLGuardIfTableDoesntExist(const String & database, const String & table, const String & message) const; String getCurrentDatabase() const; + String getCurrentTable() const; String getCurrentQueryId() const; void setCurrentDatabase(const String & name); + void setCurrentTable(const String & database, const String & table); void setCurrentQueryId(const String & query_id); String getDefaultFormat() const; /// If default_format is not specified, some global default format is returned. diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 86164ef2704..f11c12b67d9 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -153,4 +153,11 @@ void InterpreterInsertQuery::checkAccess(const ASTInsertQuery & query) throw Exception("Cannot insert into table in readonly mode", ErrorCodes::READONLY); } +void InterpreterInsertQuery::getDatabaseTable(String & database, String & table) const +{ + ASTInsertQuery & query = typeid_cast(*query_ptr); + database = query.database; + table = query.table; +} + } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.h b/dbms/src/Interpreters/InterpreterInsertQuery.h index 2180ebe0550..4ec7460fb75 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.h +++ b/dbms/src/Interpreters/InterpreterInsertQuery.h @@ -24,6 +24,8 @@ public: */ BlockIO execute() override; + void getDatabaseTable(String & database, String & table) const; + private: StoragePtr getTable(const ASTInsertQuery & query); Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index de36a84fd26..ebfc71ef537 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -200,6 +200,14 @@ static std::tuple executeQueryImpl( auto interpreter = InterpreterFactory::get(ast, context, stage); res = interpreter->execute(); + if (InterpreterInsertQuery * insertInterpreter = typeid_cast(&*interpreter)) + { + String database; + String table_name; + insertInterpreter->getDatabaseTable(database, table_name); + if (!database.empty()) + context.setCurrentTable(database, table_name); + } /// Delayed initialization of query streams (required for KILL QUERY purposes) if (process_list_entry) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 0d451625ea6..371f17d2d81 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -33,6 +33,7 @@ namespace DB return data; } + template typename T::Reader deserializeProto(const char * data, size_t data_size) { @@ -43,78 +44,45 @@ namespace DB return reader.getRoot(); } - static ColumnWithTypeAndName storeContext(const String & column_name, Context & context) + + static MutableColumnPtr storeTableMeta(const TableMetadata & meta) { capnp::MallocMessageBuilder message; Proto::Context::Builder proto_context = message.initRoot(); - Databases dbs = context.getDatabases(); - auto proto_databases = proto_context.initDatabases(dbs.size()); + auto proto_databases = proto_context.initDatabases(1); + auto proto_db = proto_databases[0]; + proto_db.setName(meta.database); - size_t db_nomber = 0; - for (auto & pr_db : dbs) + auto proto_db_tables = proto_db.initTables(1); + auto proto_table = proto_db_tables[0]; + proto_table.setName(meta.table); + + auto proto_columns = proto_table.initColumns(meta.column_defaults.size()); + + size_t column_no = 0; + for (const auto & pr_column : meta.column_defaults) { - const String & database_name = pr_db.first; - if (database_name == "system") - continue; + const String & column_name = pr_column.first; + const ColumnDefault & def = pr_column.second; + std::stringstream ss; + ss << def.expression; - IDatabase & db = *pr_db.second; + auto current_column = proto_columns[column_no]; + current_column.setName(column_name); + current_column.getDefault().setKind(static_cast(def.kind)); + current_column.getDefault().setExpression(ss.str()); - auto proto_db = proto_databases[db_nomber]; - proto_db.setName(database_name); - - std::unordered_map tables; - DatabaseIteratorPtr it_tables = db.getIterator(context); - while (it_tables->isValid()) - { - tables[it_tables->name()] = it_tables->table(); - it_tables->next(); - } - - auto proto_tables = proto_db.initTables(tables.size()); - size_t table_no = 0; - for (const auto & pr_table : tables) - { - auto current_table = proto_tables[table_no]; - current_table.setName(pr_table.first); - - const ColumnsDescription & columns = pr_table.second->getColumns(); - auto proto_columns = current_table.initColumns(columns.defaults.size()); - - size_t column_no = 0; - for (const auto& pr_column : columns.defaults) - { - const String & column_name = pr_column.first; - const ColumnDefault & def = pr_column.second; - std::stringstream ss; - ss << def.expression; - - auto current_column = proto_columns[column_no]; - current_column.setName(column_name); - current_column.getDefault().setKind(static_cast(def.kind)); - current_column.getDefault().setExpression(ss.str()); - - ++column_no; - } - - ++table_no; - } - - ++db_nomber; + ++column_no; } - ColumnWithTypeAndName proto_column; - proto_column.name = column_name; - proto_column.type = std::make_shared(); - proto_column.column = std::move(serializeProto(message)); - return proto_column; + return serializeProto(message); } - static void loadTableMetadata(const ColumnWithTypeAndName & proto_column, TableMetadata & table_meta) + + static void loadTableMeta(const char * data, size_t data_size, TableMetadata & table_meta) { - StringRef plain_data = proto_column.column->getDataAt(0); - size_t data_size = proto_column.column->byteSize(); - Proto::Context::Reader proto_context = deserializeProto(plain_data.data, data_size); + Proto::Context::Reader proto_context = deserializeProto(data, data_size); ParserTernaryOperatorExpression parser; @@ -146,25 +114,33 @@ namespace DB } } - static constexpr const char * contextColumnName() + + static constexpr const char * tableMetaColumnName() { - return "context"; + return "tableMeta"; } - Block storeContextBlock(Context & context) + + Block storeTableMetadata(const TableMetadata & table_meta) { + ColumnWithTypeAndName proto_column; + proto_column.name = tableMetaColumnName(); + proto_column.type = std::make_shared(); + proto_column.column = std::move(storeTableMeta(table_meta)); + Block block; - block.insert(storeContext(contextColumnName(), context)); + block.insert(std::move(proto_column)); return block; } + void loadTableMetadata(const Block & block, TableMetadata & table_meta) { /// select metadata type by column name - if (block.has(contextColumnName())) + if (block.has(tableMetaColumnName())) { - const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); - loadTableMetadata(column, table_meta); + const ColumnWithTypeAndName & column = block.getByName(tableMetaColumnName()); + loadTableMeta(column.column->getDataAt(0).data, column.column->byteSize(), table_meta); } } } diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index 88e3c299fa2..2909ebc3e9d 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -7,7 +7,7 @@ namespace DB class Block; class TableMetadata; - Block storeContextBlock(Context & context); + Block storeTableMetadata(const TableMetadata & table_meta); void loadTableMetadata(const Block & block, TableMetadata & table_meta); } diff --git a/dbms/src/Storages/TableMetadata.cpp b/dbms/src/Storages/TableMetadata.cpp index a489e5ba92e..5549574275a 100644 --- a/dbms/src/Storages/TableMetadata.cpp +++ b/dbms/src/Storages/TableMetadata.cpp @@ -5,13 +5,14 @@ namespace DB { - void TableMetadata::loadFromContext(const Context & context) + bool TableMetadata::loadFromContext(const Context & context) { if (!context.isTableExist(database, table)) - return; + return false; StoragePtr storage = context.getTable(database, table); const ColumnsDescription & table_columns = storage->getColumns(); column_defaults = table_columns.defaults; + return true; } } diff --git a/dbms/src/Storages/TableMetadata.h b/dbms/src/Storages/TableMetadata.h index a88808ed96a..2194f2c8465 100644 --- a/dbms/src/Storages/TableMetadata.h +++ b/dbms/src/Storages/TableMetadata.h @@ -18,6 +18,7 @@ namespace DB const String & table; ColumnDefaults column_defaults; - void loadFromContext(const Context & context); + bool loadFromContext(const Context & context); + bool hasDefaults() const { return !column_defaults.empty(); } }; } From 2d3aa8cdbaf0a93c75632a2074cdffacc4d69ca7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 16 Jul 2018 19:41:15 +0300 Subject: [PATCH 28/90] Update TableMetadata.h --- dbms/src/Storages/TableMetadata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/TableMetadata.h b/dbms/src/Storages/TableMetadata.h index 2194f2c8465..ddb5b79b68b 100644 --- a/dbms/src/Storages/TableMetadata.h +++ b/dbms/src/Storages/TableMetadata.h @@ -7,7 +7,7 @@ namespace DB class Context; class Block; - /// Addition information for query that could not be get from sample block + /// Additional information for query that could not be get from sample block struct TableMetadata { TableMetadata(const String & database_, const String & table_) From 0463fb6dd316f1a3bbe6c0b67ea213338dced3cd Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 16 Jul 2018 19:48:56 +0300 Subject: [PATCH 29/90] Update Settings.h --- dbms/src/Interpreters/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 5d946a7d6cb..b07e9914f34 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -123,7 +123,7 @@ struct Settings M(SettingUInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.") \ \ M(SettingBool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be preformed") \ - M(SettingBool, insert_sample_with_metadata, true, "For INSERT queries, specifies that need add metadata before sample block") \ + M(SettingBool, insert_sample_with_metadata, true, "For INSERT queries, specifies that the server need to send metadata about column defaults to the client. This will be used to calculate default expressions.") \ \ M(SettingUInt64, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled.") \ M(SettingMilliseconds, insert_quorum_timeout, 600000, "") \ From 07a782e637249a760f15d73bfecb3329cde3f902 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 16 Jul 2018 22:28:07 +0300 Subject: [PATCH 30/90] fix wrong defaults at columns tail and some build fixes --- cmake/find_capnp.cmake | 4 ---- dbms/programs/client/Client.cpp | 1 + .../DataStreams/AddingDefaultsBlockInputStream.cpp | 2 +- .../Formats/BlockInputStreamFromRowInputStream.cpp | 11 ++--------- libs/libcommon/include/common/config_common.h.in | 1 - 5 files changed, 4 insertions(+), 15 deletions(-) diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index 426031db15e..03ecadda6a1 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -1,9 +1,5 @@ option (ENABLE_CAPNP "Enable Cap'n Proto" ${NOT_MSVC}) -unset (USE_CAPNP CACHE) -unset (USE_INTERNAL_CAPNP_LIBRARY CACHE) -unset (MISSING_INTERNAL_CAPNP_LIBRARY CACHE) - if (ENABLE_CAPNP) # cmake 3.5.1 bug: # capnproto uses this cmake feature: diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 74eff658133..51ba397a000 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 7cc90f6a641..63d18cd0285 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -74,7 +74,7 @@ Block AddingDefaultsBlockInputStream::readImpl() for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) { - if (defaults_mask[row_idx]) + if (row_idx < defaults_mask.size() && defaults_mask[row_idx]) { if (column_def.column->isColumnConst()) column_mixed->insert((*column_def.column)[row_idx]); diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index aa4c2968539..f3ec72a4a6c 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -16,7 +16,6 @@ namespace ErrorCodes extern const int CANNOT_PARSE_NUMBER; extern const int CANNOT_PARSE_UUID; extern const int TOO_LARGE_STRING_SIZE; - extern const int INCORRECT_NUMBER_OF_COLUMNS; } @@ -62,14 +61,8 @@ Block BlockInputStreamFromRowInputStream::readImpl() break; for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) - { - if (!info.read_columns[column_idx]) { - size_t column_size = columns[column_idx]->size(); - if (column_size == 0) - throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); - delayed_defaults.setBit(column_idx, column_size - 1); - } - } + if (!info.read_columns[column_idx]) + delayed_defaults.setBit(column_idx, rows); } catch (Exception & e) { diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index bc24a0c6fe2..0cc0950efba 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -4,7 +4,6 @@ #cmakedefine01 USE_TCMALLOC #cmakedefine01 USE_JEMALLOC -#cmakedefine01 USE_CAPNP #cmakedefine01 USE_READLINE #cmakedefine01 USE_LIBEDIT #cmakedefine01 HAVE_READLINE_HISTORY From c7d9314189d7449641abacb3fef6bae8fff88d3b Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 15:18:20 +0300 Subject: [PATCH 31/90] allow build with system capnp --- cmake/find_capnp.cmake | 3 +++ dbms/src/Proto/CMakeLists.txt | 7 ++----- utils/build/build_debian_unbundled.sh | 2 +- utils/travis/normal.sh | 2 +- utils/travis/pbuilder.sh | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index 03ecadda6a1..b72746b436a 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -25,6 +25,7 @@ if (ENABLE_CAPNP) if (NOT USE_INTERNAL_CAPNP_LIBRARY) set (CAPNP_PATHS "/usr/local/opt/capnp/lib") + set (CAPNP_BIN_PATH "/usr/bin:/usr/local/bin") set (CAPNP_INCLUDE_PATHS "/usr/local/opt/capnp/include") find_library (CAPNP capnp PATHS ${CAPNP_PATHS}) find_library (CAPNPC capnpc PATHS ${CAPNP_PATHS}) @@ -40,6 +41,8 @@ if (ENABLE_CAPNP) set (CAPNP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/capnproto/c++/src") set (CAPNP_LIBRARY capnpc) set (USE_CAPNP 1) + set (CAPNP_BIN_PATH ${ClickHouse_BINARY_DIR}/contrib/capnproto/c++/src/capnp) + set (CAPNP_BIN_TARGET capnp_tool) endif () endif () diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt index b3871cd53e8..1f5d07089af 100644 --- a/dbms/src/Proto/CMakeLists.txt +++ b/dbms/src/Proto/CMakeLists.txt @@ -1,10 +1,7 @@ -set (CAPNP_PATH ${CMAKE_BINARY_DIR}/contrib/capnproto/c++/src/capnp) -set (CAPNP_BIN ${CAPNP_PATH}/capnp) - add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CMAKE_CURRENT_BINARY_DIR}/ServerMessage.capnp - COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_PATH} ${CAPNP_BIN} compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp) + COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_BIN_PATH} capnp compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CAPNP_BIN_TARGET}) add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index dc47c8fc3a3..53b7a12a239 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc gdb clang-6.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc gdb clang-6.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev capnproto libcapnp-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index b361744a3ec..f2e935f9bfa 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -32,7 +32,7 @@ cmake $CUR_DIR/../.. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPIL `# Use all possible contrib libs from system` \ -DUNBUNDLED=1 \ `# Disable all features` \ - -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ + -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ && ninja clickhouse-bundle \ `# Skip tests:` \ `# 00281 requires internal compiler` \ diff --git a/utils/travis/pbuilder.sh b/utils/travis/pbuilder.sh index 796dcf3e8d9..a1487ba2783 100755 --- a/utils/travis/pbuilder.sh +++ b/utils/travis/pbuilder.sh @@ -24,10 +24,10 @@ env TEST_RUN=${TEST_RUN=1} \ DEB_CC=${DEB_CC=$CC} DEB_CXX=${DEB_CXX=$CXX} \ CCACHE_SIZE=${CCACHE_SIZE:=4G} \ `# Disable all features` \ - CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ + CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_CAPNP_LIBRARY=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ - EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev libcapnp-dev capnproto librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ `# Travis trusty cant unpack bionic: E: debootstrap failed, TODO: check again, can be fixed` \ DIST=${DIST=artful} \ $CUR_DIR/../../release $RELEASE_OPT From 2edda7dee56111b671f03d4744cb23a091ca22e7 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 16:55:55 +0300 Subject: [PATCH 32/90] try fix Travis build (use internal capnp) --- utils/travis/pbuilder.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/travis/pbuilder.sh b/utils/travis/pbuilder.sh index a1487ba2783..7257e597f1a 100755 --- a/utils/travis/pbuilder.sh +++ b/utils/travis/pbuilder.sh @@ -24,10 +24,10 @@ env TEST_RUN=${TEST_RUN=1} \ DEB_CC=${DEB_CC=$CC} DEB_CXX=${DEB_CXX=$CXX} \ CCACHE_SIZE=${CCACHE_SIZE:=4G} \ `# Disable all features` \ - CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_CAPNP_LIBRARY=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ + CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ - EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev libcapnp-dev capnproto librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ `# Travis trusty cant unpack bionic: E: debootstrap failed, TODO: check again, can be fixed` \ DIST=${DIST=artful} \ $CUR_DIR/../../release $RELEASE_OPT From f12edacbcf79b899bbf781c42a7dc2855e2f6665 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 19:22:31 +0300 Subject: [PATCH 33/90] Travis build with internal capnp (normal) --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 705b6977114..401c8d96856 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ matrix: # update: true # sources: # - ubuntu-toolchain-r-test -# packages: [ g++-7, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl ] +# packages: [ g++-7, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl ] # # env: # - MATRIX_EVAL="export CC=gcc-7 && export CXX=g++-7" @@ -38,7 +38,7 @@ matrix: sources: - ubuntu-toolchain-r-test - llvm-toolchain-trusty-5.0 - packages: [ ninja-build, g++-7, clang-5.0, lld-5.0, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl] + packages: [ ninja-build, g++-7, clang-5.0, lld-5.0, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl] env: - MATRIX_EVAL="export CC=clang-5.0 && export CXX=clang++-5.0" From cbf1c220ae8bd17c40054111cdafd4e1f5dde0f7 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 21:18:46 +0300 Subject: [PATCH 34/90] better proto deserialize --- dbms/src/Proto/protoHelpers.cpp | 38 ++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 371f17d2d81..c898d182df8 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -19,6 +19,13 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + static MutableColumnPtr serializeProto(capnp::MessageBuilder & message) { MutableColumnPtr data = DataTypeUInt8().createColumn(); @@ -34,19 +41,29 @@ namespace DB } + /// template - typename T::Reader deserializeProto(const char * data, size_t data_size) + class ProtoDeserializer { - const capnp::word * ptr = reinterpret_cast(data); - auto serialized = kj::arrayPtr(ptr, data_size / sizeof(capnp::word)); + public: + ProtoDeserializer(const char * data, size_t data_size) + : serialized(kj::arrayPtr(reinterpret_cast(data), data_size / sizeof(capnp::word))), + reader(serialized) + {} - capnp::FlatArrayMessageReader reader(serialized); - return reader.getRoot(); - } + typename T::Reader getReader() { return reader.getRoot(); } + + private: + kj::ArrayPtr serialized; + capnp::FlatArrayMessageReader reader; + }; static MutableColumnPtr storeTableMeta(const TableMetadata & meta) { + if (meta.database.empty() || meta.table.empty()) + throw Exception("storeTableMeta: table is not set", ErrorCodes::LOGICAL_ERROR); + capnp::MallocMessageBuilder message; Proto::Context::Builder proto_context = message.initRoot(); @@ -82,7 +99,11 @@ namespace DB static void loadTableMeta(const char * data, size_t data_size, TableMetadata & table_meta) { - Proto::Context::Reader proto_context = deserializeProto(data, data_size); + if (data == nullptr || data_size == 0) + throw Exception("loadTableMeta: empty metadata column", ErrorCodes::LOGICAL_ERROR); + + ProtoDeserializer deserializer(data, data_size); + Proto::Context::Reader proto_context = deserializer.getReader(); ParserTernaryOperatorExpression parser; @@ -140,7 +161,8 @@ namespace DB if (block.has(tableMetaColumnName())) { const ColumnWithTypeAndName & column = block.getByName(tableMetaColumnName()); - loadTableMeta(column.column->getDataAt(0).data, column.column->byteSize(), table_meta); + StringRef raw_data = column.column->getRawData(); + loadTableMeta(raw_data.data, raw_data.size, table_meta); } } } From 955293cea78736390501b46cdfa5412f8af5a87c Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 21:42:05 +0300 Subject: [PATCH 35/90] some more build improvements --- cmake/find_capnp.cmake | 2 +- dbms/src/Proto/CMakeLists.txt | 2 +- dbms/src/Proto/protoHelpers.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index b72746b436a..abe46316676 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -42,7 +42,7 @@ if (ENABLE_CAPNP) set (CAPNP_LIBRARY capnpc) set (USE_CAPNP 1) set (CAPNP_BIN_PATH ${ClickHouse_BINARY_DIR}/contrib/capnproto/c++/src/capnp) - set (CAPNP_BIN_TARGET capnp_tool) + set (CAPNP_BIN_TARGETS capnp_tool capnpc_cpp capnpc_capnp) endif () endif () diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt index 1f5d07089af..1e150d9f86b 100644 --- a/dbms/src/Proto/CMakeLists.txt +++ b/dbms/src/Proto/CMakeLists.txt @@ -1,7 +1,7 @@ add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CMAKE_CURRENT_BINARY_DIR}/ServerMessage.capnp COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_BIN_PATH} capnp compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CAPNP_BIN_TARGET}) + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CAPNP_BIN_TARGETS}) add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index 2909ebc3e9d..7cef5a28013 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -5,7 +5,7 @@ namespace DB { class Context; class Block; - class TableMetadata; + struct TableMetadata; Block storeTableMetadata(const TableMetadata & table_meta); void loadTableMetadata(const Block & block, TableMetadata & table_meta); From 3d133a6a5a861161a4dde3b7b76f6ae39f9a9781 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 23:02:37 +0300 Subject: [PATCH 36/90] save block structure when add defaults --- .../AddingDefaultsBlockInputStream.cpp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 63d18cd0285..ace3841e67b 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -51,8 +51,7 @@ Block AddingDefaultsBlockInputStream::readImpl() evaluateMissingDefaultsUnsafe(evaluate_block, header.getNamesAndTypesList(), column_defaults, context); - ColumnsWithTypeAndName mixed_columns; - mixed_columns.reserve(std::min(column_defaults.size(), delayed_defaults.size())); + std::unordered_map mixed_columns; for (const ColumnWithTypeAndName & column_def : evaluate_block) { @@ -85,16 +84,21 @@ Block AddingDefaultsBlockInputStream::readImpl() column_mixed->insertFrom(*column_read.column, row_idx); } - ColumnWithTypeAndName mix = column_read.cloneEmpty(); - mix.column = std::move(column_mixed); - mixed_columns.emplace_back(std::move(mix)); + mixed_columns.emplace(std::make_pair(block_column_position, std::move(column_mixed))); } } - for (auto & column : mixed_columns) + if (!mixed_columns.empty()) { - res.erase(column.name); - res.insert(std::move(column)); + /// replace columns saving block structure + MutableColumns mutation = res.mutateColumns(); + for (size_t position = 0; position < mutation.size(); ++position) + { + auto it = mixed_columns.find(position); + if (it != mixed_columns.end()) + mutation[position] = std::move(it->second); + } + res.setColumns(std::move(mutation)); } return res; From af4cceb87789a9b94265a154fa9490e08ddfe649 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 12 Nov 2018 18:25:21 +0300 Subject: [PATCH 37/90] make branch up to date --- dbms/src/Interpreters/evaluateMissingDefaults.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index b0280fe9ae7..8d84c8f6576 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -90,7 +90,8 @@ void evaluateMissingDefaultsUnsafe(Block & block, for (size_t i = 0, size = block.columns(); i < size; ++i) available_columns.emplace_back(block.getByPosition(i).name, block.getByPosition(i).type); - ExpressionAnalyzer{default_expr_list, context, {}, available_columns}.getActions(true)->execute(block); + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, available_columns); + ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); } } From c1518da50f1e7620a49b9c7494f0fccc614e85e6 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 12 Nov 2018 20:29:02 +0300 Subject: [PATCH 38/90] fix build with CAPNP --- dbms/programs/server/TCPHandler.cpp | 2 +- dbms/src/Proto/protoHelpers.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index e6347342a42..f06fd070d98 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -30,8 +30,8 @@ #include #include #include - #include +#include #include "TCPHandler.h" diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index c898d182df8..cc91cbc5905 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -147,7 +147,7 @@ namespace ErrorCodes ColumnWithTypeAndName proto_column; proto_column.name = tableMetaColumnName(); proto_column.type = std::make_shared(); - proto_column.column = std::move(storeTableMeta(table_meta)); + proto_column.column = storeTableMeta(table_meta); Block block; block.insert(std::move(proto_column)); From 9aae1d0d8fc5211ac903464676c043db99414d79 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 12 Nov 2018 21:34:43 +0300 Subject: [PATCH 39/90] fix branches merge mistake --- dbms/programs/client/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 7642ca54787..c976d50bad8 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1129,7 +1129,7 @@ private: #if USE_CAPNP loadTableMetadata(packet.block, table_meta); #endif - return receiveSampleBlock(packet.block, table_meta); + return receiveSampleBlock(out, table_meta); case Protocol::Server::Exception: onException(*packet.exception); From 9d3325f56e09a420f3600568bed962339ce5280e Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 13 Nov 2018 16:36:53 +0300 Subject: [PATCH 40/90] one more fix for bad merge of branches --- .../BlockInputStreamFromRowInputStream.cpp | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 894c597ef14..4a23a594876 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes extern const int TOO_LARGE_STRING_SIZE; extern const int CANNOT_READ_ALL_DATA; extern const int INCORRECT_DATA; + extern const int INCORRECT_NUMBER_OF_COLUMNS; } @@ -52,6 +53,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() { size_t num_columns = sample.columns(); MutableColumns columns = sample.cloneEmptyColumns(); + BlockDelayedDefaults delayed_defaults; try { @@ -60,8 +62,19 @@ Block BlockInputStreamFromRowInputStream::readImpl() try { ++total_rows; - if (!row_input->read(columns)) + RowReadExtention info; + if (!row_input->extendedRead(columns, info)) break; + + for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) + { + if (!info.read_columns[column_idx]) { + size_t column_size = columns[column_idx]->size(); + if (column_size == 0) + throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + delayed_defaults.setBit(column_idx, column_size - 1); + } + } } catch (Exception & e) { @@ -130,7 +143,10 @@ Block BlockInputStreamFromRowInputStream::readImpl() if (columns.empty() || columns[0]->empty()) return {}; - return sample.cloneWithColumns(std::move(columns)); + auto out_block = sample.cloneWithColumns(std::move(columns)); + if (!delayed_defaults.empty()) + out_block.delayed_defaults = std::move(delayed_defaults); + return out_block; } From ebf3d6018683dcc1cffd1b243881685df69a04e1 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 14 Nov 2018 18:23:00 +0300 Subject: [PATCH 41/90] column defaults without CapNProto [CLICKHOUSE-3578] --- cmake/find_capnp.cmake | 3 - dbms/CMakeLists.txt | 1 - dbms/programs/client/CMakeLists.txt | 6 +- dbms/programs/client/Client.cpp | 27 +-- dbms/programs/server/TCPHandler.cpp | 35 +--- dbms/programs/server/TCPHandler.h | 1 - dbms/src/Client/Connection.cpp | 1 - dbms/src/Core/Block.cpp | 7 + dbms/src/Core/Block.h | 3 + dbms/src/Core/Defines.h | 2 +- dbms/src/Core/Protocol.h | 20 +-- .../InputStreamFromASTInsertQuery.cpp | 10 +- .../BlockInputStreamFromRowInputStream.cpp | 3 +- dbms/src/Interpreters/Settings.h | 2 +- dbms/src/Proto/CMakeLists.txt | 10 -- dbms/src/Proto/ServerMessage.capnp | 34 ---- dbms/src/Proto/protoHelpers.cpp | 168 ------------------ dbms/src/Proto/protoHelpers.h | 14 -- dbms/src/Storages/ColumnDefault.cpp | 82 ++++++++- dbms/src/Storages/ColumnDefault.h | 12 ++ dbms/src/Storages/TableMetadata.cpp | 18 -- dbms/src/Storages/TableMetadata.h | 24 --- .../00760_insert_json_with_defaults.sql | 2 + utils/build/build_debian_unbundled.sh | 2 +- utils/travis/normal.sh | 2 +- utils/travis/pbuilder.sh | 2 +- 26 files changed, 139 insertions(+), 352 deletions(-) delete mode 100644 dbms/src/Proto/CMakeLists.txt delete mode 100644 dbms/src/Proto/ServerMessage.capnp delete mode 100644 dbms/src/Proto/protoHelpers.cpp delete mode 100644 dbms/src/Proto/protoHelpers.h delete mode 100644 dbms/src/Storages/TableMetadata.cpp delete mode 100644 dbms/src/Storages/TableMetadata.h diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index a54e6c0413f..ec591afdc38 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -25,7 +25,6 @@ if (ENABLE_CAPNP) if (NOT USE_INTERNAL_CAPNP_LIBRARY) set (CAPNP_PATHS "/usr/local/opt/capnp/lib") - set (CAPNP_BIN_PATH "/usr/bin:/usr/local/bin") set (CAPNP_INCLUDE_PATHS "/usr/local/opt/capnp/include") find_library (CAPNP capnp PATHS ${CAPNP_PATHS}) find_library (CAPNPC capnpc PATHS ${CAPNP_PATHS}) @@ -41,8 +40,6 @@ if (ENABLE_CAPNP) set (CAPNP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/capnproto/c++/src") set (CAPNP_LIBRARY capnpc) set (USE_CAPNP 1) - set (CAPNP_BIN_PATH ${ClickHouse_BINARY_DIR}/contrib/capnproto/c++/src/capnp) - set (CAPNP_BIN_TARGETS capnp_tool capnpc_cpp capnpc_capnp) endif () endif () diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 7dd9df62e10..dd8437d1e52 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -238,7 +238,6 @@ if (USE_CAPNP) if (NOT USE_INTERNAL_CAPNP_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${CAPNP_INCLUDE_DIR}) endif () - set (PROTO_LIB clickhouse_proto) endif () if (USE_RDKAFKA) diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index 5e99f97106b..65353094c26 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,12 +1,8 @@ add_library (clickhouse-client-lib ${LINK_MODE} Client.cpp) -target_link_libraries (clickhouse-client-lib clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions - ${PROTO_LIB} ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-client-lib clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (READLINE_INCLUDE_DIR) target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) endif () -if (TARGET clickhouse_proto) - target_link_libraries (clickhouse-client-lib clickhouse_proto) -endif() if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-client clickhouse-client.cpp) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index c976d50bad8..e8f4e1f74b9 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -56,13 +56,11 @@ #include #include #include -#include #include #include #include #include -#include -#include +#include #if USE_READLINE #include "Suggest.h" // Y_IGNORE @@ -895,12 +893,11 @@ private: /// Receive description of table structure. Block sample; - TableMetadata table_meta(parsed_insert_query.database, parsed_insert_query.table); - if (receiveSampleBlock(sample, table_meta)) + if (receiveSampleBlock(sample)) { /// If structure was received (thus, server has not thrown an exception), /// send our data with that structure. - sendData(sample, table_meta); + sendData(sample); receiveEndOfQuery(); } } @@ -938,7 +935,7 @@ private: } - void sendData(Block & sample, const TableMetadata & table_meta) + void sendData(Block & sample) { /// If INSERT data must be sent. const ASTInsertQuery * parsed_insert_query = typeid_cast(&*parsed_query); @@ -949,19 +946,19 @@ private: { /// Send data contained in the query. ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); - sendDataFrom(data_in, sample, table_meta); + sendDataFrom(data_in, sample); } else if (!is_interactive) { /// Send data read from stdin. - sendDataFrom(std_in, sample, table_meta); + sendDataFrom(std_in, sample); } else throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); } - void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetadata & table_meta) + void sendDataFrom(ReadBuffer & buf, Block & sample) { String current_format = insert_format; @@ -973,7 +970,7 @@ private: BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - const ColumnDefaults & column_defaults = table_meta.column_defaults; + auto column_defaults = ColumnDefaultsHelper::extract(sample); if (!column_defaults.empty()) block_input = std::make_shared(block_input, column_defaults, context); BlockInputStreamPtr async_block_input = std::make_shared(block_input); @@ -1113,7 +1110,7 @@ private: /// Receive the block that serves as an example of the structure of table where data will be inserted. - bool receiveSampleBlock(Block & out, TableMetadata & table_meta) + bool receiveSampleBlock(Block & out) { while (true) { @@ -1125,12 +1122,6 @@ private: out = packet.block; return true; - case Protocol::Server::CapnProto: -#if USE_CAPNP - loadTableMetadata(packet.block, table_meta); -#endif - return receiveSampleBlock(out, table_meta); - case Protocol::Server::Exception: onException(*packet.exception); last_exception = std::move(packet.exception); diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index f06fd070d98..b66c1e8bb5d 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -30,8 +30,7 @@ #include #include #include -#include -#include +#include #include "TCPHandler.h" @@ -361,22 +360,13 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) */ state.io.out->writePrefix(); -#if USE_CAPNP - /// Send table metadata (column defaults) - if (client_revision >= DBMS_MIN_REVISION_WITH_PROTO_METADATA && - query_context.getSettingsRef().insert_sample_with_metadata) - { - TableMetadata table_meta(query_context.getCurrentDatabase(), query_context.getCurrentTable()); - if (table_meta.loadFromContext(query_context) && table_meta.hasDefaults()) - { - Block meta_block = storeTableMetadata(table_meta); - sendMetadata(meta_block); - } - } -#endif - /// Send block to the client - table structure. Block block = state.io.out->getHeader(); + + /// attach table metadata (column defaults) + if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA) + ColumnDefaultsHelper::attachFromContext(query_context, block); + sendData(block); readData(global_settings); @@ -860,19 +850,6 @@ void TCPHandler::sendLogData(const Block & block) } -void TCPHandler::sendMetadata(const Block & block) -{ - initBlockOutput(block); - - writeVarUInt(Protocol::Server::CapnProto, *out); - writeStringBinary("", *out); - - state.block_out->write(block); - state.maybe_compressed_out->next(); - out->next(); -} - - void TCPHandler::sendException(const Exception & e, bool with_stack_trace) { writeVarUInt(Protocol::Server::Exception, *out); diff --git a/dbms/programs/server/TCPHandler.h b/dbms/programs/server/TCPHandler.h index 22177edf77a..af422921f07 100644 --- a/dbms/programs/server/TCPHandler.h +++ b/dbms/programs/server/TCPHandler.h @@ -144,7 +144,6 @@ private: void sendHello(); void sendData(const Block & block); /// Write a block to the network. void sendLogData(const Block & block); - void sendMetadata(const Block & block); void sendException(const Exception & e, bool with_stack_trace); void sendProgress(); void sendLogs(); diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 2f23525f677..ce6246fba3a 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -575,7 +575,6 @@ Connection::Packet Connection::receivePacket() switch (res.type) { case Protocol::Server::Data: - case Protocol::Server::CapnProto: res.block = receiveData(); return res; diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 716de954974..de0e831f37c 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -99,6 +99,13 @@ void Block::insertUnique(ColumnWithTypeAndName && elem) } +void Block::erase(const std::set & positions) +{ + for (auto it = positions.rbegin(); it != positions.rend(); ++it) + erase(*it); +} + + void Block::erase(size_t position) { if (data.empty()) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 0d6eeae9278..2ee6fc4a78f 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -54,6 +55,8 @@ public: void insertUnique(ColumnWithTypeAndName && elem); /// remove the column at the specified position void erase(size_t position); + /// remove the columns at the specified positions + void erase(const std::set & positions); /// remove the column with the specified name void erase(const String & name); diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index 2b3650ea975..264d1d95147 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -51,7 +51,7 @@ /// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules /// (keys will be placed in different buckets and result will not be fully aggregated). #define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54408 -#define DBMS_MIN_REVISION_WITH_PROTO_METADATA 54410 +#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410 /// Version of ClickHouse TCP protocol. Set to git tag with latest protocol change. #define DBMS_TCP_PROTOCOL_VERSION 54226 diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index c925886db20..27df4341de9 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -69,8 +69,7 @@ namespace Protocol Totals = 7, /// A block with totals (compressed or not). Extremes = 8, /// A block with minimums and maximums (compressed or not). TablesStatusResponse = 9, /// A response to TablesStatus request. - Log = 10, /// System logs of the query execution - CapnProto = 11, /// Cap'n Proto + Log = 10 /// System logs of the query execution }; /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 @@ -79,9 +78,8 @@ namespace Protocol /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values inline const char * toString(UInt64 packet) { - static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", - "Extremes", "TablesStatusResponse", "Log", "CapnProto" }; - return packet < 12 + static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", "Extremes", "TablesStatusResponse", "Log" }; + return packet < 11 ? data[packet] : "Unknown packet"; } @@ -100,17 +98,15 @@ namespace Protocol Cancel = 3, /// Cancel the query execution. Ping = 4, /// Check that connection to the server is alive. TablesStatusRequest = 5, /// Check status of tables on the server. - KeepAlive = 6, /// Keep the connection alive - /// - CapnProto = 11, /// Cap'n Proto + KeepAlive = 6 /// Keep the connection alive }; inline const char * toString(UInt64 packet) { - static const char * unknown = "Unknown packet"; - static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest", "KeepAlive", - unknown, unknown, unknown, unknown, "CapnProto" }; - return (packet < 12) ? data[packet] : unknown; + static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest", "KeepAlive" }; + return packet < 7 + ? data[packet] + : "Unknown packet"; } } diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 855840a15ee..c4f63d3bd22 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { @@ -46,11 +46,9 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); - TableMetadata table_meta(ast_insert_query->database, ast_insert_query->table); - table_meta.loadFromContext(context); - - if (!table_meta.column_defaults.empty()) - res_stream = std::make_shared(res_stream, table_meta.column_defaults, context); + auto column_defaults = ColumnDefaultsHelper::loadFromContext(context, ast_insert_query->database, ast_insert_query->table); + if (column_defaults) + res_stream = std::make_shared(res_stream, *column_defaults, context); } } diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 4a23a594876..5670830136b 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -68,7 +68,8 @@ Block BlockInputStreamFromRowInputStream::readImpl() for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) { - if (!info.read_columns[column_idx]) { + if (!info.read_columns[column_idx]) + { size_t column_size = columns[column_idx]->size(); if (column_size == 0) throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index ccd5b7ffb93..428f92b6a9d 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -125,7 +125,7 @@ struct Settings M(SettingUInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.") \ \ M(SettingBool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be preformed") \ - M(SettingBool, insert_sample_with_metadata, true, "For INSERT queries, specifies that the server need to send metadata about column defaults to the client. This will be used to calculate default expressions.") \ + M(SettingBool, insert_sample_with_metadata, false, "For INSERT queries, specifies that the server need to send metadata about column defaults to the client. This will be used to calculate default expressions.") \ \ M(SettingUInt64, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled.") \ M(SettingMilliseconds, insert_quorum_timeout, 600000, "") \ diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt deleted file mode 100644 index 1e150d9f86b..00000000000 --- a/dbms/src/Proto/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CMAKE_CURRENT_BINARY_DIR}/ServerMessage.capnp - COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_BIN_PATH} capnp compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CAPNP_BIN_TARGETS}) - -add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) -target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) -target_include_directories (clickhouse_proto PUBLIC ${CAPNP_INCLUDE_DIR} ${DBMS_INCLUDE_DIR}) -target_include_directories (clickhouse_proto PRIVATE - ${CMAKE_CURRENT_BINARY_DIR} ${COMMON_INCLUDE_DIR} ${DBMS_INCLUDE_DIR} ${CITYHASH_CONTRIB_INCLUDE_DIR}) diff --git a/dbms/src/Proto/ServerMessage.capnp b/dbms/src/Proto/ServerMessage.capnp deleted file mode 100644 index 317430fce91..00000000000 --- a/dbms/src/Proto/ServerMessage.capnp +++ /dev/null @@ -1,34 +0,0 @@ -@0xfdd1e2948338b156; - -using Cxx = import "/capnp/c++.capnp"; -$Cxx.namespace("DB::Proto"); - -struct ColumnDefault -{ - kind @0 :UInt16; - expression @1 :Text; -} - -struct Column -{ - name @0 :Text; - type @1 :Text; - default @2 :ColumnDefault; -} - -struct Table -{ - name @0 :Text; - columns @1 :List(Column); -} - -struct Database -{ - name @0 :Text; - tables @1 :List(Table); -} - -struct Context -{ - databases @0 :List(Database); -} diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp deleted file mode 100644 index cc91cbc5905..00000000000 --- a/dbms/src/Proto/protoHelpers.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include "protoHelpers.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/// @sa https://capnproto.org/cxx.html - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - - - static MutableColumnPtr serializeProto(capnp::MessageBuilder & message) - { - MutableColumnPtr data = DataTypeUInt8().createColumn(); - - kj::Array serialized = messageToFlatArray(message); - kj::ArrayPtr bytes = serialized.asChars(); - - data->reserve(bytes.size()); - for (size_t i = 0 ; i < bytes.size(); ++i) - data->insertData(&bytes[i], 1); - - return data; - } - - - /// - template - class ProtoDeserializer - { - public: - ProtoDeserializer(const char * data, size_t data_size) - : serialized(kj::arrayPtr(reinterpret_cast(data), data_size / sizeof(capnp::word))), - reader(serialized) - {} - - typename T::Reader getReader() { return reader.getRoot(); } - - private: - kj::ArrayPtr serialized; - capnp::FlatArrayMessageReader reader; - }; - - - static MutableColumnPtr storeTableMeta(const TableMetadata & meta) - { - if (meta.database.empty() || meta.table.empty()) - throw Exception("storeTableMeta: table is not set", ErrorCodes::LOGICAL_ERROR); - - capnp::MallocMessageBuilder message; - Proto::Context::Builder proto_context = message.initRoot(); - - auto proto_databases = proto_context.initDatabases(1); - auto proto_db = proto_databases[0]; - proto_db.setName(meta.database); - - auto proto_db_tables = proto_db.initTables(1); - auto proto_table = proto_db_tables[0]; - proto_table.setName(meta.table); - - auto proto_columns = proto_table.initColumns(meta.column_defaults.size()); - - size_t column_no = 0; - for (const auto & pr_column : meta.column_defaults) - { - const String & column_name = pr_column.first; - const ColumnDefault & def = pr_column.second; - std::stringstream ss; - ss << def.expression; - - auto current_column = proto_columns[column_no]; - current_column.setName(column_name); - current_column.getDefault().setKind(static_cast(def.kind)); - current_column.getDefault().setExpression(ss.str()); - - ++column_no; - } - - return serializeProto(message); - } - - - static void loadTableMeta(const char * data, size_t data_size, TableMetadata & table_meta) - { - if (data == nullptr || data_size == 0) - throw Exception("loadTableMeta: empty metadata column", ErrorCodes::LOGICAL_ERROR); - - ProtoDeserializer deserializer(data, data_size); - Proto::Context::Reader proto_context = deserializer.getReader(); - - ParserTernaryOperatorExpression parser; - - for (auto proto_database : proto_context.getDatabases()) - { - const String & database_name = proto_database.getName().cStr(); - if (database_name != table_meta.database) - continue; - - for (auto proto_table : proto_database.getTables()) - { - String table_name = proto_table.getName().cStr(); - if (table_name != table_meta.table) - continue; - - for (auto column : proto_table.getColumns()) - { - String column_name = column.getName().cStr(); - String expression = column.getDefault().getExpression().cStr(); - ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); - - if (expression_kind == ColumnDefaultKind::Default) - { - ASTPtr ast = parseQuery(parser, expression, expression.size()); - table_meta.column_defaults.emplace(column_name, ColumnDefault{expression_kind, ast}); - } - } - } - } - } - - - static constexpr const char * tableMetaColumnName() - { - return "tableMeta"; - } - - - Block storeTableMetadata(const TableMetadata & table_meta) - { - ColumnWithTypeAndName proto_column; - proto_column.name = tableMetaColumnName(); - proto_column.type = std::make_shared(); - proto_column.column = storeTableMeta(table_meta); - - Block block; - block.insert(std::move(proto_column)); - return block; - } - - - void loadTableMetadata(const Block & block, TableMetadata & table_meta) - { - /// select metadata type by column name - if (block.has(tableMetaColumnName())) - { - const ColumnWithTypeAndName & column = block.getByName(tableMetaColumnName()); - StringRef raw_data = column.column->getRawData(); - loadTableMeta(raw_data.data, raw_data.size, table_meta); - } - } -} diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h deleted file mode 100644 index 7cef5a28013..00000000000 --- a/dbms/src/Proto/protoHelpers.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#if USE_CAPNP - -namespace DB -{ - class Context; - class Block; - struct TableMetadata; - - Block storeTableMetadata(const TableMetadata & table_meta); - void loadTableMetadata(const Block & block, TableMetadata & table_meta); -} - -#endif diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index 0ff885ab1e5..a6fc0b39bb6 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -1,6 +1,14 @@ -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { @@ -37,4 +45,74 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs) return lhs.kind == rhs.kind && queryToString(lhs.expression) == queryToString(rhs.expression); } +ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context) +{ + return loadFromContext(context, context.getCurrentDatabase(), context.getCurrentTable()); +} + +ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context, const String & database, const String & table) +{ + if (context.getSettingsRef().insert_sample_with_metadata) + { + if (!context.isTableExist(database, table)) + return {}; + + StoragePtr storage = context.getTable(database, table); + const ColumnsDescription & table_columns = storage->getColumns(); + return table_columns.defaults; + } + return {}; +} + +void ColumnDefaultsHelper::attachFromContext(const Context & context, Block & sample) +{ + ColumnDefaults column_defaults = loadFromContext(context); + if (column_defaults.empty()) + return; + + for (auto pr : column_defaults) + { + std::stringstream ss; + ss << *pr.second.expression; + + ColumnWithTypeAndName col; + col.type = std::make_shared(); + col.name = String(" ") + toString(pr.second.kind) + ' ' + pr.first + ' ' + ss.str(); + col.column = col.type->createColumnConst(sample.rows(), ""); + + sample.insert(std::move(col)); + } +} + +ColumnDefaults ColumnDefaultsHelper::extract(Block & sample) +{ + ParserTernaryOperatorExpression parser; + ColumnDefaults column_defaults; + std::set pos_to_erase; + + for (size_t i = 0; i < sample.columns(); ++i) + { + const ColumnWithTypeAndName & column_wtn = sample.safeGetByPosition(i); + + if (column_wtn.name.size() && column_wtn.name[0] == ' ') + { + String str_kind, column_name; + std::stringstream ss; + ss << column_wtn.name; + ss >> str_kind >> column_name; + String expression = column_wtn.name.substr(str_kind.size() + column_name.size() + 3); + + ColumnDefault def; + def.kind = columnDefaultKindFromString(str_kind); + def.expression = parseQuery(parser, expression, expression.size()); + + column_defaults.emplace(column_name, def); + pos_to_erase.insert(i); + } + } + + sample.erase(pos_to_erase); + return column_defaults; +} + } diff --git a/dbms/src/Storages/ColumnDefault.h b/dbms/src/Storages/ColumnDefault.h index 95eb4d5b597..dfeb05dc39c 100644 --- a/dbms/src/Storages/ColumnDefault.h +++ b/dbms/src/Storages/ColumnDefault.h @@ -9,6 +9,9 @@ namespace DB { +class Context; +class Block; + enum class ColumnDefaultKind { Default, @@ -33,5 +36,14 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs); using ColumnDefaults = std::unordered_map; +/// Static methods to manipulate column defaults +struct ColumnDefaultsHelper +{ + static void attachFromContext(const Context & context, Block & sample); + static ColumnDefaults extract(Block & sample); + + static ColumnDefaults loadFromContext(const Context & context, const String & database, const String & table); + static ColumnDefaults loadFromContext(const Context & context); /// FIXME: we need another way to store current table +}; } diff --git a/dbms/src/Storages/TableMetadata.cpp b/dbms/src/Storages/TableMetadata.cpp deleted file mode 100644 index 5549574275a..00000000000 --- a/dbms/src/Storages/TableMetadata.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include "TableMetadata.h" - - -namespace DB -{ - bool TableMetadata::loadFromContext(const Context & context) - { - if (!context.isTableExist(database, table)) - return false; - - StoragePtr storage = context.getTable(database, table); - const ColumnsDescription & table_columns = storage->getColumns(); - column_defaults = table_columns.defaults; - return true; - } -} diff --git a/dbms/src/Storages/TableMetadata.h b/dbms/src/Storages/TableMetadata.h deleted file mode 100644 index ddb5b79b68b..00000000000 --- a/dbms/src/Storages/TableMetadata.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - class Context; - class Block; - - /// Additional information for query that could not be get from sample block - struct TableMetadata - { - TableMetadata(const String & database_, const String & table_) - : database(database_), table(table_) - {} - - const String & database; - const String & table; - ColumnDefaults column_defaults; - - bool loadFromContext(const Context & context); - bool hasDefaults() const { return !column_defaults.empty(); } - }; -} diff --git a/dbms/tests/queries/0_stateless/00760_insert_json_with_defaults.sql b/dbms/tests/queries/0_stateless/00760_insert_json_with_defaults.sql index 12b9e4538d1..a141b64f98a 100644 --- a/dbms/tests/queries/0_stateless/00760_insert_json_with_defaults.sql +++ b/dbms/tests/queries/0_stateless/00760_insert_json_with_defaults.sql @@ -1,3 +1,5 @@ +SET insert_sample_with_metadata=1; + CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS test.defaults; CREATE TABLE IF NOT EXISTS test.defaults diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index da673f1de55..a78b0fc25de 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc gdb clang-6.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev capnproto libjemalloc-dev libssl-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc gdb clang-6.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index 8c44f6a7d58..7f45641d45f 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -32,7 +32,7 @@ cmake $CUR_DIR/../.. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPIL `# Use all possible contrib libs from system` \ -DUNBUNDLED=1 \ `# Disable all features` \ - -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 $CMAKE_FLAGS + -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 $CMAKE_FLAGS ninja clickhouse-bundle diff --git a/utils/travis/pbuilder.sh b/utils/travis/pbuilder.sh index 3a434219fd7..d993e8715b8 100755 --- a/utils/travis/pbuilder.sh +++ b/utils/travis/pbuilder.sh @@ -24,7 +24,7 @@ env TEST_RUN=${TEST_RUN=1} \ DEB_CC=${DEB_CC=$CC} DEB_CXX=${DEB_CXX=$CXX} \ CCACHE_SIZE=${CCACHE_SIZE:=4G} \ `# Disable all features` \ - CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_RDKAFKA=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ + CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libjemalloc-dev $EXTRAPACKAGES" \ From 8284de8268e73d8d50c15defb44433a0a52085d7 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 13:32:20 +0300 Subject: [PATCH 42/90] fix build --- dbms/src/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbms/src/CMakeLists.txt b/dbms/src/CMakeLists.txt index 410f8a4edad..f6fa96e1d47 100644 --- a/dbms/src/CMakeLists.txt +++ b/dbms/src/CMakeLists.txt @@ -13,6 +13,3 @@ add_subdirectory (AggregateFunctions) add_subdirectory (Client) add_subdirectory (TableFunctions) add_subdirectory (Formats) -if (USE_CAPNP) - add_subdirectory (Proto) -endif () From 5c67e0220554480b6dbc4dc83c5e65030c6fd75b Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 13:39:13 +0300 Subject: [PATCH 43/90] fix build --- dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index c4f63d3bd22..d232fee96ce 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -47,8 +47,8 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); auto column_defaults = ColumnDefaultsHelper::loadFromContext(context, ast_insert_query->database, ast_insert_query->table); - if (column_defaults) - res_stream = std::make_shared(res_stream, *column_defaults, context); + if (!column_defaults.empty()) + res_stream = std::make_shared(res_stream, column_defaults, context); } } From 65d34ca231a433654288889526b57ec399d58cdc Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 18:03:13 +0300 Subject: [PATCH 44/90] do not override current_database in context [CLICKHOUSE-3578] --- dbms/programs/server/TCPHandler.cpp | 8 +++- dbms/src/Core/Block.h | 11 +++++ dbms/src/Interpreters/Context.cpp | 15 ------ dbms/src/Interpreters/Context.h | 7 +-- .../Interpreters/InterpreterInsertQuery.cpp | 5 +- .../src/Interpreters/InterpreterInsertQuery.h | 2 +- dbms/src/Interpreters/executeQuery.cpp | 10 +--- dbms/src/Storages/ColumnDefault.cpp | 46 +++++++++++-------- dbms/src/Storages/ColumnDefault.h | 3 +- 9 files changed, 55 insertions(+), 52 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index b66c1e8bb5d..d1ccc3e788f 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -363,9 +363,13 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) /// Send block to the client - table structure. Block block = state.io.out->getHeader(); - /// attach table metadata (column defaults) + /// attach column defaults to sample block (allow client to attach defaults for ommited source values) if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA) - ColumnDefaultsHelper::attachFromContext(query_context, block); + { + auto db_and_table = query_context.getInsertionTable(); + ColumnDefaults column_defaults = ColumnDefaultsHelper::loadFromContext(query_context, db_and_table.first, db_and_table.second); + ColumnDefaultsHelper::attach(column_defaults, block); + } sendData(block); diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 2ee6fc4a78f..0473ff72419 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -34,6 +34,9 @@ private: Container data; IndexByName index_by_name; + /// Regular column can't start with ' ', so it's possible to attach some hidden columns with a prefix + constexpr static const char SPECIAL_COLUMN_PREFIX = ' '; + public: BlockInfo info; /// Input stream could use delayed_defaults to add addition info at which rows it have inserted default values. @@ -103,6 +106,14 @@ public: operator bool() const { return !data.empty(); } bool operator!() const { return data.empty(); } + static String mkSpecialColumnName(const String & col_name) { return String(1, SPECIAL_COLUMN_PREFIX) + col_name; } + static bool isSpecialColumnName(const String & col_name) { return !col_name.empty() && col_name[0] == SPECIAL_COLUMN_PREFIX; } + + static bool isSpecialColumnName(const String & col_name, const String & pattern) + { + return col_name.find(String(1, SPECIAL_COLUMN_PREFIX) + pattern) == 0; + } + /** Get a list of column names separated by commas. */ std::string dumpNames() const; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index ad110718a27..6e7051ba387 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1014,12 +1014,6 @@ String Context::getCurrentDatabase() const } -String Context::getCurrentTable() const -{ - return current_table; -} - - String Context::getCurrentQueryId() const { return client_info.current_query_id; @@ -1034,15 +1028,6 @@ void Context::setCurrentDatabase(const String & name) } -void Context::setCurrentTable(const String & database, const String & table) -{ - auto lock = getLock(); - assertTableExists(database, table); - current_database = database; - current_table = table; -} - - void Context::setCurrentQueryId(const String & query_id) { if (!client_info.current_query_id.empty()) diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 89153640a8f..5d6d326a460 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -118,11 +118,11 @@ private: std::shared_ptr quota; /// Current quota. By default - empty quota, that have no limits. String current_database; - String current_table; Settings settings; /// Setting for query execution. using ProgressCallback = std::function; ProgressCallback progress_callback; /// Callback for tracking progress of query execution. QueryStatus * process_list_elem = nullptr; /// For tracking total resource usage for query. + std::pair insertion_table; /// Saved insertion table in query context String default_format; /// Format, used when server formats data by itself and if query does not have FORMAT specification. /// Thus, used in HTTP interface. If not specified - then some globally default format is used. @@ -229,12 +229,13 @@ public: std::unique_ptr getDDLGuard(const String & database, const String & table) const; String getCurrentDatabase() const; - String getCurrentTable() const; String getCurrentQueryId() const; void setCurrentDatabase(const String & name); - void setCurrentTable(const String & database, const String & table); void setCurrentQueryId(const String & query_id); + void setInsertionTable(std::pair && db_and_table) { insertion_table = db_and_table; } + const std::pair & getInsertionTable() const { return insertion_table; } + String getDefaultFormat() const; /// If default_format is not specified, some global default format is returned. void setDefaultFormat(const String & name); diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 08147ea47d1..652b098098b 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -159,11 +159,10 @@ void InterpreterInsertQuery::checkAccess(const ASTInsertQuery & query) throw Exception("Cannot insert into table in readonly mode", ErrorCodes::READONLY); } -void InterpreterInsertQuery::getDatabaseTable(String & database, String & table) const +std::pair InterpreterInsertQuery::getDatabaseTable() const { ASTInsertQuery & query = typeid_cast(*query_ptr); - database = query.database; - table = query.table; + return {query.database, query.table}; } } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.h b/dbms/src/Interpreters/InterpreterInsertQuery.h index 4ec7460fb75..9cde2c274fe 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.h +++ b/dbms/src/Interpreters/InterpreterInsertQuery.h @@ -24,7 +24,7 @@ public: */ BlockIO execute() override; - void getDatabaseTable(String & database, String & table) const; + std::pair getDatabaseTable() const; private: StoragePtr getTable(const ASTInsertQuery & query); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 34839eb2b59..2a885285b89 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -204,14 +204,8 @@ static std::tuple executeQueryImpl( auto interpreter = InterpreterFactory::get(ast, context, stage); res = interpreter->execute(); - if (InterpreterInsertQuery * insertInterpreter = typeid_cast(&*interpreter)) - { - String database; - String table_name; - insertInterpreter->getDatabaseTable(database, table_name); - if (!database.empty()) - context.setCurrentTable(database, table_name); - } + if (auto * insert_interpreter = typeid_cast(&*interpreter)) + context.setInsertionTable(insert_interpreter->getDatabaseTable()); if (process_list_entry) { diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index a6fc0b39bb6..cd79c5ca093 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -10,16 +10,27 @@ #include #include -namespace DB +namespace { +struct AliasNames +{ + static constexpr const char * DEFAULT = "DEFAULT"; + static constexpr const char * MATERIALIZED = "MATERIALIZED"; + static constexpr const char * ALIAS = "ALIAS"; +}; + +} + +namespace DB +{ ColumnDefaultKind columnDefaultKindFromString(const std::string & str) { static const std::unordered_map map{ - { "DEFAULT", ColumnDefaultKind::Default }, - { "MATERIALIZED", ColumnDefaultKind::Materialized }, - { "ALIAS", ColumnDefaultKind::Alias } + { AliasNames::DEFAULT, ColumnDefaultKind::Default }, + { AliasNames::MATERIALIZED, ColumnDefaultKind::Materialized }, + { AliasNames::ALIAS, ColumnDefaultKind::Alias } }; const auto it = map.find(str); @@ -30,9 +41,9 @@ ColumnDefaultKind columnDefaultKindFromString(const std::string & str) std::string toString(const ColumnDefaultKind kind) { static const std::unordered_map map{ - { ColumnDefaultKind::Default, "DEFAULT" }, - { ColumnDefaultKind::Materialized, "MATERIALIZED" }, - { ColumnDefaultKind::Alias, "ALIAS" } + { ColumnDefaultKind::Default, AliasNames::DEFAULT }, + { ColumnDefaultKind::Materialized, AliasNames::MATERIALIZED }, + { ColumnDefaultKind::Alias, AliasNames::ALIAS } }; const auto it = map.find(kind); @@ -45,11 +56,6 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs) return lhs.kind == rhs.kind && queryToString(lhs.expression) == queryToString(rhs.expression); } -ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context) -{ - return loadFromContext(context, context.getCurrentDatabase(), context.getCurrentTable()); -} - ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context, const String & database, const String & table) { if (context.getSettingsRef().insert_sample_with_metadata) @@ -64,9 +70,8 @@ ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context, co return {}; } -void ColumnDefaultsHelper::attachFromContext(const Context & context, Block & sample) +void ColumnDefaultsHelper::attach(const ColumnDefaults & column_defaults, Block & sample) { - ColumnDefaults column_defaults = loadFromContext(context); if (column_defaults.empty()) return; @@ -75,9 +80,11 @@ void ColumnDefaultsHelper::attachFromContext(const Context & context, Block & sa std::stringstream ss; ss << *pr.second.expression; + /// Serialize defaults to special columns names. + /// It looks better to send expression as a column data but sample block has 0 rows. ColumnWithTypeAndName col; col.type = std::make_shared(); - col.name = String(" ") + toString(pr.second.kind) + ' ' + pr.first + ' ' + ss.str(); + col.name = Block::mkSpecialColumnName(toString(pr.second.kind) + ' ' + pr.first + ' ' + ss.str()); col.column = col.type->createColumnConst(sample.rows(), ""); sample.insert(std::move(col)); @@ -94,17 +101,20 @@ ColumnDefaults ColumnDefaultsHelper::extract(Block & sample) { const ColumnWithTypeAndName & column_wtn = sample.safeGetByPosition(i); - if (column_wtn.name.size() && column_wtn.name[0] == ' ') + if (Block::isSpecialColumnName(column_wtn.name, AliasNames::DEFAULT) || + Block::isSpecialColumnName(column_wtn.name, AliasNames::MATERIALIZED) || + Block::isSpecialColumnName(column_wtn.name, AliasNames::ALIAS)) { String str_kind, column_name; std::stringstream ss; ss << column_wtn.name; ss >> str_kind >> column_name; - String expression = column_wtn.name.substr(str_kind.size() + column_name.size() + 3); + size_t expression_pos = str_kind.size() + column_name.size() + 3; + StringRef expression(&column_wtn.name[expression_pos], column_wtn.name.size() - expression_pos); ColumnDefault def; def.kind = columnDefaultKindFromString(str_kind); - def.expression = parseQuery(parser, expression, expression.size()); + def.expression = parseQuery(parser, expression.data, expression.size); column_defaults.emplace(column_name, def); pos_to_erase.insert(i); diff --git a/dbms/src/Storages/ColumnDefault.h b/dbms/src/Storages/ColumnDefault.h index dfeb05dc39c..00693b54ad5 100644 --- a/dbms/src/Storages/ColumnDefault.h +++ b/dbms/src/Storages/ColumnDefault.h @@ -39,11 +39,10 @@ using ColumnDefaults = std::unordered_map; /// Static methods to manipulate column defaults struct ColumnDefaultsHelper { - static void attachFromContext(const Context & context, Block & sample); + static void attach(const ColumnDefaults & column_defaults, Block & sample); static ColumnDefaults extract(Block & sample); static ColumnDefaults loadFromContext(const Context & context, const String & database, const String & table); - static ColumnDefaults loadFromContext(const Context & context); /// FIXME: we need another way to store current table }; } From 13646eb4c4c10c37764b40e91dea6545e1a1073e Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 18:57:20 +0300 Subject: [PATCH 45/90] move BlockMissingValues from Block to IBlockInputStream::getMissingValues() --- dbms/src/Core/Block.h | 3 --- dbms/src/Core/BlockInfo.cpp | 4 ++-- dbms/src/Core/BlockInfo.h | 6 +++--- dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp | 2 +- dbms/src/DataStreams/IBlockInputStream.h | 7 +++++++ dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp | 7 ++----- dbms/src/Formats/BlockInputStreamFromRowInputStream.h | 3 +++ 7 files changed, 18 insertions(+), 14 deletions(-) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 0473ff72419..1523665f451 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -39,9 +39,6 @@ private: public: BlockInfo info; - /// Input stream could use delayed_defaults to add addition info at which rows it have inserted default values. - /// Such values would be replaced later by column defaults in AddingDefaultsBlockInputStream (if any). - BlockDelayedDefaults delayed_defaults; Block() = default; Block(std::initializer_list il); diff --git a/dbms/src/Core/BlockInfo.cpp b/dbms/src/Core/BlockInfo.cpp index f214d2782e3..3c8d1ccef7c 100644 --- a/dbms/src/Core/BlockInfo.cpp +++ b/dbms/src/Core/BlockInfo.cpp @@ -58,14 +58,14 @@ void BlockInfo::read(ReadBuffer & in) } } -void BlockDelayedDefaults::setBit(size_t column_idx, size_t row_idx) +void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) { RowsBitMask & mask = columns_defaults[column_idx]; mask.resize(row_idx + 1); mask[row_idx] = true; } -const BlockDelayedDefaults::RowsBitMask & BlockDelayedDefaults::getDefaultsBitmask(size_t column_idx) const +const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const { static RowsBitMask none; auto it = columns_defaults.find(column_idx); diff --git a/dbms/src/Core/BlockInfo.h b/dbms/src/Core/BlockInfo.h index abc5383ddcb..9e23de688af 100644 --- a/dbms/src/Core/BlockInfo.h +++ b/dbms/src/Core/BlockInfo.h @@ -45,9 +45,8 @@ struct BlockInfo void read(ReadBuffer & in); }; -/// Block extention to support delayed defaults. Used in AddingDefaultsBlockInputStream to replace type defauls set by RowInputStream -/// with column defaults. -class BlockDelayedDefaults +/// Block extention to support delayed defaults. AddingDefaultsBlockInputStream uses it to replace missing values with column defaults. +class BlockMissingValues { public: using RowsBitMask = std::vector; /// a bit per row for a column @@ -56,6 +55,7 @@ public: void setBit(size_t column_idx, size_t row_idx); bool empty() const { return columns_defaults.empty(); } size_t size() const { return columns_defaults.size(); } + void clear() { columns_defaults.clear(); } private: using RowsMaskByColumnId = std::unordered_map; diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index ace3841e67b..900a923b23a 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -37,7 +37,7 @@ Block AddingDefaultsBlockInputStream::readImpl() if (column_defaults.empty()) return res; - BlockDelayedDefaults delayed_defaults = res.delayed_defaults; + const BlockMissingValues & delayed_defaults = children.back()->getMissingValues(); if (delayed_defaults.empty()) return res; diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index b0f2d269f56..2e305b4b451 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -19,6 +19,7 @@ class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; using BlockInputStreams = std::vector; +class BlockMissingValues; class TableStructureReadLock; using TableStructureReadLockPtr = std::shared_ptr; @@ -70,6 +71,12 @@ public: throw Exception("Method getBlockExtraInfo is not supported by the data stream " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + virtual const BlockMissingValues & getMissingValues() const + { + static const BlockMissingValues none; + return none; + } + /** Read something before starting all data or after the end of all data. * In the `readSuffix` function, you can implement a finalization that can lead to an exception. * readPrefix() must be called before the first call to read(). diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 5670830136b..1440375eb14 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -53,7 +53,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() { size_t num_columns = sample.columns(); MutableColumns columns = sample.cloneEmptyColumns(); - BlockDelayedDefaults delayed_defaults; + delayed_defaults.clear(); try { @@ -144,10 +144,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() if (columns.empty() || columns[0]->empty()) return {}; - auto out_block = sample.cloneWithColumns(std::move(columns)); - if (!delayed_defaults.empty()) - out_block.delayed_defaults = std::move(delayed_defaults); - return out_block; + return sample.cloneWithColumns(std::move(columns)); } diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h index 72d11a02610..65d6fa04469 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h @@ -33,6 +33,8 @@ public: Block getHeader() const override { return sample; } + const BlockMissingValues & getMissingValues() const override { return delayed_defaults; } + protected: Block readImpl() override; @@ -40,6 +42,7 @@ private: RowInputStreamPtr row_input; Block sample; size_t max_block_size; + BlockMissingValues delayed_defaults; UInt64 allow_errors_num; Float64 allow_errors_ratio; From c642e16ee1ac3a5c9986c898e5d8c58c4e705bb7 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 19:57:20 +0300 Subject: [PATCH 46/90] clearer evaluateMissingDefaults [CLICKHOUSE-3578] --- .../AddingDefaultsBlockInputStream.cpp | 2 +- dbms/src/DataStreams/IBlockInputStream.h | 1 - .../Interpreters/evaluateMissingDefaults.cpp | 42 ++++++------------- .../Interpreters/evaluateMissingDefaults.h | 7 +--- 4 files changed, 14 insertions(+), 38 deletions(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 900a923b23a..3d02b0c6415 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -49,7 +49,7 @@ Block AddingDefaultsBlockInputStream::readImpl() evaluate_block.erase(column.first); } - evaluateMissingDefaultsUnsafe(evaluate_block, header.getNamesAndTypesList(), column_defaults, context); + evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), column_defaults, context, false); std::unordered_map mixed_columns; diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 2e305b4b451..eb5f75ef46c 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -19,7 +19,6 @@ class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; using BlockInputStreams = std::vector; -class BlockMissingValues; class TableStructureReadLock; using TableStructureReadLockPtr = std::shared_ptr; diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 8d84c8f6576..40b75c0b673 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -29,33 +29,36 @@ static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & requi setAlias(it->second.expression->clone(), it->first)); } + if (default_expr_list->children.empty()) + return nullptr; return default_expr_list; } - void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const ColumnDefaults & column_defaults, - const Context & context) + const Context & context, bool with_block_copy) { if (column_defaults.empty()) return; ASTPtr default_expr_list = requiredExpressions(block, required_columns, column_defaults); - /// nothing to evaluate - if (default_expr_list->children.empty()) + if (!default_expr_list) return; + if (!with_block_copy) + { + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); + ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); + return; + } + /** ExpressionAnalyzer eliminates "unused" columns, in order to ensure their safety * we are going to operate on a copy instead of the original block */ Block copy_block{block}; /// evaluate default values for defaulted columns - NamesAndTypesList available_columns; - for (size_t i = 0, size = block.columns(); i < size; ++i) - available_columns.emplace_back(block.getByPosition(i).name, block.getByPosition(i).type); - - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, available_columns); + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(copy_block); /// move evaluated columns to the original block, materializing them at the same time @@ -73,25 +76,4 @@ void evaluateMissingDefaults(Block & block, } } - -void evaluateMissingDefaultsUnsafe(Block & block, - const NamesAndTypesList & required_columns, - const std::unordered_map & column_defaults, - const Context & context) -{ - if (column_defaults.empty()) - return; - - ASTPtr default_expr_list = requiredExpressions(block, required_columns, column_defaults); - if (default_expr_list->children.empty()) - return; - - NamesAndTypesList available_columns; - for (size_t i = 0, size = block.columns(); i < size; ++i) - available_columns.emplace_back(block.getByPosition(i).name, block.getByPosition(i).type); - - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, available_columns); - ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); -} - } diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.h b/dbms/src/Interpreters/evaluateMissingDefaults.h index ce0c649f3d0..71f6fab9753 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.h +++ b/dbms/src/Interpreters/evaluateMissingDefaults.h @@ -15,11 +15,6 @@ struct ColumnDefault; void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const std::unordered_map & column_defaults, - const Context & context); - -void evaluateMissingDefaultsUnsafe(Block & block, - const NamesAndTypesList & required_columns, - const std::unordered_map & column_defaults, - const Context & context); + const Context & context, bool with_block_copy = true); } From 811b824b01bbdbefe3fc0ea5935cb86476a01014 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 23:26:51 +0300 Subject: [PATCH 47/90] AddingDefaultsBlockInputStream fixed types optimisation --- .../AddingDefaultsBlockInputStream.cpp | 148 +++++++++++++++--- .../AddingDefaultsBlockInputStream.h | 6 + dbms/src/DataTypes/IDataType.h | 7 + 3 files changed, 137 insertions(+), 24 deletions(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 3d02b0c6415..89202ea9ecd 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -1,19 +1,33 @@ -#include -#include -#include -#include -#include #include +#include +#include #include #include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + namespace DB { namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; + extern const int TYPE_MISMATCH; } @@ -61,30 +75,25 @@ Block AddingDefaultsBlockInputStream::readImpl() continue; size_t block_column_position = res.getPositionByName(column_name); - const ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position); - - if (column_read.column->size() != column_def.column->size()) - throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - + ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position); const auto & defaults_mask = delayed_defaults.getDefaultsBitmask(block_column_position); + + checkCalculated(column_read, column_def, defaults_mask.size()); + if (!defaults_mask.empty()) { - MutableColumnPtr column_mixed = column_read.column->cloneEmpty(); - - for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) + /// TODO: FixedString + if (isColumnedAsNumber(column_read.type) || isDecimal(column_read.type)) { - if (row_idx < defaults_mask.size() && defaults_mask[row_idx]) - { - if (column_def.column->isColumnConst()) - column_mixed->insert((*column_def.column)[row_idx]); - else - column_mixed->insertFrom(*column_def.column, row_idx); - } - else - column_mixed->insertFrom(*column_read.column, row_idx); + MutableColumnPtr column_mixed = (*std::move(column_read.column)).mutate(); + mixNumberColumns(column_read.type->getTypeId(), column_mixed, column_def.column, defaults_mask); + column_read.column = std::move(column_mixed); + } + else + { + MutableColumnPtr column_mixed = mixColumns(column_read, column_def, defaults_mask); + mixed_columns.emplace(block_column_position, std::move(column_mixed)); } - - mixed_columns.emplace(std::make_pair(block_column_position, std::move(column_mixed))); } } @@ -104,4 +113,95 @@ Block AddingDefaultsBlockInputStream::readImpl() return res; } +void AddingDefaultsBlockInputStream::checkCalculated(const ColumnWithTypeAndName & col_read, + const ColumnWithTypeAndName & col_defaults, + size_t defaults_needed) const +{ + size_t column_size = col_read.column->size(); + + if (column_size != col_defaults.column->size()) + throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + if (column_size < defaults_needed) + throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + if (col_read.type->getTypeId() != col_defaults.type->getTypeId()) + throw Exception("Mismach column types while adding defaults", ErrorCodes::TYPE_MISMATCH); +} + +void AddingDefaultsBlockInputStream::mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & column_mixed, const ColumnPtr & column_defs, + const BlockMissingValues::RowsBitMask & defaults_mask) const +{ + auto call = [&](const auto & types) -> bool + { + using Types = std::decay_t; + using DataType = typename Types::LeftType; + + if constexpr (!std::is_same_v && !std::is_same_v) + { + using FieldType = typename DataType::FieldType; + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + + auto col_read = typeid_cast(column_mixed.get()); + if (!col_read) + return false; + + typename ColVecType::Container & dst = col_read->getData(); + + if (auto const_col_defs = checkAndGetColumnConst(column_defs.get())) + { + FieldType value = checkAndGetColumn(const_col_defs->getDataColumnPtr().get())->getData()[0]; + + for (size_t i = 0; i < defaults_mask.size(); ++i) + if (defaults_mask[i]) + dst[i] = value; + + return true; + } + else if (auto col_defs = checkAndGetColumn(column_defs.get())) + { + auto & src = col_defs->getData(); + for (size_t i = 0; i < defaults_mask.size(); ++i) + if (defaults_mask[i]) + dst[i] = src[i]; + + return true; + } + } + + return false; + }; + + if (!callOnIndexAndDataType(type_idx, call)) + throw Exception("Unexpected type on mixNumberColumns", ErrorCodes::LOGICAL_ERROR); +} + +MutableColumnPtr AddingDefaultsBlockInputStream::mixColumns(const ColumnWithTypeAndName & col_read, + const ColumnWithTypeAndName & col_defaults, + const BlockMissingValues::RowsBitMask & defaults_mask) const +{ + size_t column_size = col_read.column->size(); + size_t defaults_needed = defaults_mask.size(); + + MutableColumnPtr column_mixed = col_read.column->cloneEmpty(); + + for (size_t i = 0; i < defaults_needed; ++i) + { + if (defaults_mask[i]) + { + if (col_defaults.column->isColumnConst()) + column_mixed->insert((*col_defaults.column)[i]); + else + column_mixed->insertFrom(*col_defaults.column, i); + } + else + column_mixed->insertFrom(*col_read.column, i); + } + + for (size_t i = defaults_needed; i < column_size; ++i) + column_mixed->insertFrom(*col_read.column, i); + + return column_mixed; +} + } diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h index 5caaec244da..6711a3daee9 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h @@ -27,6 +27,12 @@ private: Block header; const ColumnDefaults column_defaults; const Context & context; + + void checkCalculated(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults, size_t needed) const; + MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults, + const BlockMissingValues::RowsBitMask & defaults_mask) const; + void mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & col_mixed, const ColumnPtr & col_defaults, + const BlockMissingValues::RowsBitMask & defaults_mask) const; }; } diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 727d80540ce..063c69ed311 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -512,6 +512,13 @@ inline bool isNumber(const T & data_type) return which.isInt() || which.isUInt() || which.isFloat(); } +template +inline bool isColumnedAsNumber(const T & data_type) +{ + WhichDataType which(data_type); + return which.isInt() || which.isUInt() || which.isFloat() || which.isDateOrDateTime() || which.isUUID(); +} + template inline bool isString(const T & data_type) { From b4b58b292f70d5ddea4c7a013eb08e4abcc28fb5 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 16 Nov 2018 22:19:47 +0300 Subject: [PATCH 48/90] remove hack for StorageDistributed & insert_sample_with_metadata --- dbms/src/Storages/StorageDistributed.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index d0eb764a917..5d3860f449d 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -174,9 +174,6 @@ StorageDistributed::StorageDistributed( if (num_local_shards && remote_database == database_name && remote_table == table_name) throw Exception("Distributed table " + table_name + " looks at itself", ErrorCodes::INFINITE_LOOP); } - - /// HACK: disable metadata for StorageDistributed queries - const_cast(context).getSettingsRef().insert_sample_with_metadata = false; } From 65b654a1b654f2a6f25ca746f60d71a85aae1f9a Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 30 Nov 2018 22:49:35 +0800 Subject: [PATCH 49/90] Versatile StorageJoin This commit does the following: 1. StorageJoin with simple keys now supports reading 2. StorageJoin can be created with Join settings applied. Syntax is similar to MergeTree and Kafka 3. Left Any StorageJoin with one simple key can be used as a dictionary-like structure by function joinGet. Examples are listed in the related test file. --- dbms/src/Functions/FunctionJoinGet.cpp | 74 +++++ dbms/src/Functions/FunctionJoinGet.h | 53 ++++ .../registerFunctionsMiscellaneous.cpp | 2 + dbms/src/Interpreters/ExpressionActions.cpp | 13 +- dbms/src/Interpreters/ExpressionActions.h | 3 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 11 +- dbms/src/Interpreters/Join.cpp | 110 ++++++-- dbms/src/Interpreters/Join.h | 41 ++- dbms/src/Storages/StorageFactory.cpp | 4 +- dbms/src/Storages/StorageJoin.cpp | 254 +++++++++++++++++- dbms/src/Storages/StorageJoin.h | 13 + .../00800_versatile_storage_join.reference | 19 ++ .../00800_versatile_storage_join.sql | 51 ++++ 13 files changed, 591 insertions(+), 57 deletions(-) create mode 100644 dbms/src/Functions/FunctionJoinGet.cpp create mode 100644 dbms/src/Functions/FunctionJoinGet.h create mode 100644 dbms/tests/queries/0_stateless/00800_versatile_storage_join.reference create mode 100644 dbms/tests/queries/0_stateless/00800_versatile_storage_join.sql diff --git a/dbms/src/Functions/FunctionJoinGet.cpp b/dbms/src/Functions/FunctionJoinGet.cpp new file mode 100644 index 00000000000..d2f65148b23 --- /dev/null +++ b/dbms/src/Functions/FunctionJoinGet.cpp @@ -0,0 +1,74 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +FunctionBasePtr FunctionBuilderJoinGet::buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const +{ + if (arguments.size() != 3) + throw Exception{"Function " + getName() + " takes 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + String join_name; + if (auto name_col = checkAndGetColumnConst(arguments[0].column.get())) + { + join_name = name_col->getValue(); + } + else + throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + + ", expected a const string.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + auto table = context.getTable("", join_name); + + StorageJoin * storage_join = dynamic_cast(table.get()); + + if (!storage_join) + throw Exception{"Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + auto join = storage_join->getJoin(); + String attr_name; + if (auto name_col = checkAndGetColumnConst(arguments[1].column.get())) + { + attr_name = name_col->getValue(); + } + else + throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + + ", expected a const string.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + DataTypes data_types(arguments.size()); + + for (size_t i = 0; i < arguments.size(); ++i) + data_types[i] = arguments[i].type; + + return std::make_shared( + std::make_shared(join, attr_name), data_types, join->joinGetReturnType(attr_name)); +} + +void FunctionJoinGet::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) +{ + auto & ctn = block.getByPosition(arguments[2]); + ctn.name = ""; // make sure the key name never collide with the join columns + Block key_block = {ctn}; + join->joinGet(key_block, attr_name); + block.getByPosition(result) = key_block.getByPosition(1); +} + +void registerFunctionJoinGet(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/FunctionJoinGet.h b/dbms/src/Functions/FunctionJoinGet.h new file mode 100644 index 00000000000..6573dcf9405 --- /dev/null +++ b/dbms/src/Functions/FunctionJoinGet.h @@ -0,0 +1,53 @@ +#include + +namespace DB +{ +class Context; +class Join; +using JoinPtr = std::shared_ptr; + +class FunctionJoinGet final : public IFunction, public std::enable_shared_from_this +{ +public: + static constexpr auto name = "joinGet"; + + FunctionJoinGet(JoinPtr join, const String & attr_name) : join(std::move(join)), attr_name(attr_name) {} + + String getName() const override { return name; } + +protected: + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return nullptr; } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; + +private: + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + +private: + JoinPtr join; + const String attr_name; +}; + +class FunctionBuilderJoinGet final : public FunctionBuilderImpl +{ +public: + static constexpr auto name = "joinGet"; + static FunctionBuilderPtr create(const Context & context) { return std::make_shared(context); } + + FunctionBuilderJoinGet(const Context & context) : context(context) {} + + String getName() const override { return name; } + +protected: + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override; + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return nullptr; } + +private: + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + +private: + const Context & context; +}; + +} diff --git a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp index e483deee961..59c49a5c950 100644 --- a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp @@ -40,6 +40,7 @@ void registerFunctionToLowCardinality(FunctionFactory &); void registerFunctionLowCardinalityIndices(FunctionFactory &); void registerFunctionLowCardinalityKeys(FunctionFactory &); void registerFunctionsIn(FunctionFactory &); +void registerFunctionJoinGet(FunctionFactory &); void registerFunctionsMiscellaneous(FunctionFactory & factory) { @@ -80,6 +81,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionLowCardinalityIndices(factory); registerFunctionLowCardinalityKeys(factory); registerFunctionsIn(factory); + registerFunctionJoinGet(factory); } } diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index b1fab40a654..20bfecc26d0 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -150,15 +150,18 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column return a; } -ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr join_, - const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_) +ExpressionAction ExpressionAction::ordinaryJoin( + std::shared_ptr join_, + const Names & join_key_names_left, + const NamesAndTypesList & columns_added_by_join_, + const NameSet & columns_added_by_join_from_right_keys_) { ExpressionAction a; a.type = JOIN; a.join = std::move(join_); a.join_key_names_left = join_key_names_left; a.columns_added_by_join = columns_added_by_join_; + a.columns_added_by_join_from_right_keys = columns_added_by_join_from_right_keys_; return a; } @@ -427,7 +430,7 @@ void ExpressionAction::execute(Block & block) const case JOIN: { - join->joinBlock(block); + join->joinBlock(block, join_key_names_left, columns_added_by_join_from_right_keys); break; } @@ -1085,7 +1088,7 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh { for (const auto & action : actions) if (action.join && (action.join->getKind() == ASTTableJoin::Kind::Full || action.join->getKind() == ASTTableJoin::Kind::Right)) - return action.join->createStreamWithNonJoinedRows(source_header, max_block_size); + return action.join->createStreamWithNonJoinedRows(source_header, action.join_key_names_left, max_block_size); return {}; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 781134dbeb2..8c42e8ae492 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -102,6 +102,7 @@ public: std::shared_ptr join; Names join_key_names_left; NamesAndTypesList columns_added_by_join; + NameSet columns_added_by_join_from_right_keys; /// For PROJECT. NamesWithAliases projection; @@ -118,7 +119,7 @@ public: static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_); + const NamesAndTypesList & columns_added_by_join_, const NameSet & columns_added_by_join_from_right_keys_); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index d9f2406ef4b..e12a600b6de 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -556,12 +556,13 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only columns_added_by_join_list.push_back(joined_column.name_and_type); if (only_types) - actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, columns_added_by_join_list)); + actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, + columns_added_by_join_list, columns_added_by_join_from_right_keys)); else for (auto & subquery_for_set : subqueries_for_sets) if (subquery_for_set.second.join) actions->add(ExpressionAction::ordinaryJoin(subquery_for_set.second.join, analyzedJoin().key_names_left, - columns_added_by_join_list)); + columns_added_by_join_list, columns_added_by_join_from_right_keys)); } bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) @@ -621,10 +622,8 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (!subquery_for_set.join) { - JoinPtr join = std::make_shared( - analyzedJoin().key_names_left, analyzedJoin().key_names_right, columns_added_by_join_from_right_keys, - settings.join_use_nulls, settings.size_limits_for_join, - join_params.kind, join_params.strictness); + JoinPtr join = std::make_shared(analyzedJoin().key_names_right, settings.join_use_nulls, + settings.size_limits_for_join, join_params.kind, join_params.strictness); /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 00d74cc0e2d..62479422c5d 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -30,12 +30,10 @@ namespace ErrorCodes } -Join::Join(const Names & key_names_left_, const Names & key_names_right_, const NameSet & needed_key_names_right_, - bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) +Join::Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, + ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) : kind(kind_), strictness(strictness_), - key_names_left(key_names_left_), key_names_right(key_names_right_), - needed_key_names_right(needed_key_names_right_), use_nulls(use_nulls_), log(&Logger::get("Join")), limits(limits) @@ -662,7 +660,12 @@ namespace template -void Join::joinBlockImpl(Block & block, const Maps & maps) const +void Join::joinBlockImpl( + Block & block, + const Names & key_names_left, + const NameSet & needed_key_names_right, + const Block & block_with_columns_to_add, + const Maps & maps) const { size_t keys_size = key_names_left.size(); ColumnRawPtrs key_columns(keys_size); @@ -734,8 +737,8 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const { const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); - /// Don't insert column if it's in left block. - if (!block.has(src_column.name)) + /// Don't insert column if it's in left block or not explicitly required. + if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) { added_columns.push_back(src_column.column->cloneEmpty()); added_columns.back()->reserve(src_column.column->size()); @@ -746,7 +749,6 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const size_t rows = block.rows(); - /// Used with ANY INNER JOIN std::unique_ptr filter; bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any; @@ -875,7 +877,7 @@ void Join::joinBlockImplCross(Block & block) const } -void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) const +void Join::checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right) const { size_t keys_size = key_names_left.size(); @@ -895,30 +897,90 @@ void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) } -void Join::joinBlock(Block & block) const +static void checkTypeOfKey(const Block & block_left, const Block & block_right) +{ + auto & [c1, left_type_origin, left_name] = block_left.safeGetByPosition(0); + auto & [c2, right_type_origin, right_name] = block_right.safeGetByPosition(0); + auto left_type = removeNullable(left_type_origin); + auto right_type = removeNullable(right_type_origin); + + if (!left_type->equals(*right_type)) + throw Exception("Type mismatch of columns to joinGet by: " + + left_name + " " + left_type->getName() + " at left, " + + right_name + " " + right_type->getName() + " at right", + ErrorCodes::TYPE_MISMATCH); +} + + +DataTypePtr Join::joinGetReturnType(const String & column_name) const +{ + std::shared_lock lock(rwlock); + + if (!sample_block_with_columns_to_add.has(column_name)) + throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::LOGICAL_ERROR); + return sample_block_with_columns_to_add.getByName(column_name).type; +} + + +template +void Join::joinGetImpl(Block & block, const String & column_name, const Maps & maps) const +{ + joinBlockImpl( + block, {block.getByPosition(0).name}, {}, {sample_block_with_columns_to_add.getByName(column_name)}, maps); +} + + +// TODO: support composite key +// TODO: return multible columns as named tuple +// TODO: return array of values when strictness == ASTTableJoin::Strictness::All +void Join::joinGet(Block & block, const String & column_name) const +{ + std::shared_lock lock(rwlock); + + if (key_names_right.size() != 1) + throw Exception("joinGet only supports StorageJoin containing exactly one key", ErrorCodes::LOGICAL_ERROR); + + checkTypeOfKey(block, sample_block_with_keys); + + if (kind == ASTTableJoin::Kind::Left && strictness == ASTTableJoin::Strictness::Any) + joinGetImpl(block, column_name, maps_any); + else + throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::LOGICAL_ERROR); +} + + +void Join::joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const { // std::cerr << "joinBlock: " << block.dumpStructure() << "\n"; std::shared_lock lock(rwlock); - checkTypesOfKeys(block, sample_block_with_keys); + checkTypesOfKeys(block, key_names_left, sample_block_with_keys); if (kind == ASTTableJoin::Kind::Left && strictness == ASTTableJoin::Strictness::Any) - joinBlockImpl(block, maps_any); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_any); else if (kind == ASTTableJoin::Kind::Inner && strictness == ASTTableJoin::Strictness::Any) - joinBlockImpl(block, maps_any); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_any); else if (kind == ASTTableJoin::Kind::Left && strictness == ASTTableJoin::Strictness::All) - joinBlockImpl(block, maps_all); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_all); else if (kind == ASTTableJoin::Kind::Inner && strictness == ASTTableJoin::Strictness::All) - joinBlockImpl(block, maps_all); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_all); else if (kind == ASTTableJoin::Kind::Full && strictness == ASTTableJoin::Strictness::Any) - joinBlockImpl(block, maps_any_full); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_any_full); else if (kind == ASTTableJoin::Kind::Right && strictness == ASTTableJoin::Strictness::Any) - joinBlockImpl(block, maps_any_full); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_any_full); else if (kind == ASTTableJoin::Kind::Full && strictness == ASTTableJoin::Strictness::All) - joinBlockImpl(block, maps_all_full); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_all_full); else if (kind == ASTTableJoin::Kind::Right && strictness == ASTTableJoin::Strictness::All) - joinBlockImpl(block, maps_all_full); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_all_full); else if (kind == ASTTableJoin::Kind::Cross) joinBlockImplCross(block); else @@ -995,14 +1057,14 @@ struct AdderNonJoined class NonJoinedBlockInputStream : public IProfilingBlockInputStream { public: - NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, size_t max_block_size_) + NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, size_t max_block_size_) : parent(parent_), max_block_size(max_block_size_) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. */ - size_t num_keys = parent.key_names_left.size(); + size_t num_keys = key_names_left.size(); size_t num_columns_left = left_sample_block.columns() - num_keys; size_t num_columns_right = parent.sample_block_with_columns_to_add.columns(); @@ -1019,7 +1081,7 @@ public: column_indices_keys_and_right.reserve(num_keys + num_columns_right); std::vector is_key_column_in_left_block(num_keys + num_columns_left, false); - for (const std::string & key : parent.key_names_left) + for (const std::string & key : key_names_left) { size_t key_pos = left_sample_block.getPositionByName(key); is_key_column_in_left_block[key_pos] = true; @@ -1170,9 +1232,9 @@ private: }; -BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, size_t max_block_size) const +BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const { - return std::make_shared(*this, left_sample_block, max_block_size); + return std::make_shared(*this, left_sample_block, key_names_left, max_block_size); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 7c288e0a008..4a864d8795e 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -219,8 +219,8 @@ struct JoinKeyGetterHashed class Join { public: - Join(const Names & key_names_left_, const Names & key_names_right_, const NameSet & needed_key_names_right_, - bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_); + Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, + ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_); bool empty() { return type == Type::EMPTY; } @@ -237,7 +237,13 @@ public: /** Join data from the map (that was previously built by calls to insertFromBlock) to the block with data from "left" table. * Could be called from different threads in parallel. */ - void joinBlock(Block & block) const; + void joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const; + + /// Infer the return type for joinGet function + DataTypePtr joinGetReturnType(const String & column_name) const; + + /// Used by joinGet function that turns StorageJoin into a dictionary + void joinGet(Block & block, const String & column_name) const; /** Keep "totals" (separate part of dataset, see WITH TOTALS) to use later. */ @@ -251,7 +257,7 @@ public: * Use only after all calls to joinBlock was done. * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside). */ - BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, size_t max_block_size) const; + BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const; /// Number of keys in all built JOIN maps. size_t getTotalRowCount() const; @@ -320,6 +326,16 @@ public: M(keys256) \ M(hashed) + + /// Used for reading from StorageJoin and applying joinGet function + #define APPLY_FOR_JOIN_VARIANTS_LIMITED(M) \ + M(key8) \ + M(key16) \ + M(key32) \ + M(key64) \ + M(key_string) \ + M(key_fixed_string) + enum class Type { EMPTY, @@ -353,16 +369,13 @@ public: private: friend class NonJoinedBlockInputStream; + friend class JoinBlockInputStream; ASTTableJoin::Kind kind; ASTTableJoin::Strictness strictness; - /// Names of key columns (columns for equi-JOIN) in "left" table (in the order they appear in USING clause). - const Names key_names_left; /// Names of key columns (columns for equi-JOIN) in "right" table (in the order they appear in USING clause). const Names key_names_right; - /// Names of key columns in the "right" table which should stay in block after join. - const NameSet needed_key_names_right; /// Substitute NULLs for non-JOINed rows. bool use_nulls; @@ -408,12 +421,20 @@ private: void init(Type type_); /// Throw an exception if blocks have different types of key columns. - void checkTypesOfKeys(const Block & block_left, const Block & block_right) const; + void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right) const; template - void joinBlockImpl(Block & block, const Maps & maps) const; + void joinBlockImpl( + Block & block, + const Names & key_names_left, + const NameSet & needed_key_names_right, + const Block & block_with_columns_to_add, + const Maps & maps) const; void joinBlockImplCross(Block & block) const; + + template + void joinGetImpl(Block & block, const String & column_name, const Maps & maps) const; }; using JoinPtr = std::shared_ptr; diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 546e8f27843..07520af1cea 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -86,11 +86,11 @@ StoragePtr StorageFactory::get( name = engine_def.name; - if (storage_def->settings && !endsWith(name, "MergeTree") && name != "Kafka") + if (storage_def->settings && !endsWith(name, "MergeTree") && name != "Kafka" && name != "Join") { throw Exception( "Engine " + name + " doesn't support SETTINGS clause. " - "Currently only the MergeTree family of engines and Kafka engine supports it", + "Currently only the MergeTree family of engines, Kafka engine and Join engine support it", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 65e13e05220..3b13764d091 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -1,8 +1,12 @@ #include #include #include +#include #include #include +#include +#include +#include #include /// toLower #include @@ -13,6 +17,7 @@ namespace DB namespace ErrorCodes { + extern const int UNKNOWN_SET_DATA_VARIANT; extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int INCOMPATIBLE_TYPE_OF_JOIN; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -24,18 +29,23 @@ StorageJoin::StorageJoin( const String & path_, const String & name_, const Names & key_names_, - ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, + bool use_nulls_, + SizeLimits limits_, + ASTTableJoin::Kind kind_, + ASTTableJoin::Strictness strictness_, const ColumnsDescription & columns_) - : StorageSetOrJoinBase{path_, name_, columns_}, - key_names(key_names_), kind(kind_), strictness(strictness_) + : StorageSetOrJoinBase{path_, name_, columns_} + , key_names(key_names_) + , use_nulls(use_nulls_) + , limits(limits_) + , kind(kind_) + , strictness(strictness_) { for (const auto & key : key_names) if (!getColumns().hasPhysical(key)) throw Exception{"Key column (" + key + ") does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE}; - /// NOTE StorageJoin doesn't use join_use_nulls setting. - - join = std::make_shared(key_names, key_names, NameSet(), false /* use_nulls */, SizeLimits(), kind, strictness); + join = std::make_shared(key_names, use_nulls, limits, kind, strictness); join->setSampleBlock(getSampleBlock().sortColumns()); restore(); } @@ -48,7 +58,7 @@ void StorageJoin::truncate(const ASTPtr &) Poco::File(path + "tmp/").createDirectories(); increment = 0; - join = std::make_shared(key_names, key_names, NameSet(), false /* use_nulls */, SizeLimits(), kind, strictness); + join = std::make_shared(key_names, use_nulls, limits, kind, strictness); join->setSampleBlock(getSampleBlock().sortColumns()); } @@ -119,11 +129,237 @@ void registerStorageJoin(StorageFactory & factory) key_names.push_back(key->name); } + auto & settings = args.context.getSettingsRef(); + auto join_use_nulls = settings.join_use_nulls; + auto max_rows_in_join = settings.max_rows_in_join; + auto max_bytes_in_join = settings.max_bytes_in_join; + auto join_overflow_mode = settings.join_overflow_mode; + + if (args.storage_def && args.storage_def->settings) + { + for (const ASTSetQuery::Change & setting : args.storage_def->settings->changes) + { + if (setting.name == "join_use_nulls") join_use_nulls.set(setting.value); + else if (setting.name == "max_rows_in_join") max_rows_in_join.set(setting.value); + else if (setting.name == "max_bytes_in_join") max_bytes_in_join.set(setting.value); + else if (setting.name == "join_overflow_mode") join_overflow_mode.set(setting.value); + else + throw Exception( + "Unknown setting " + setting.name + " for storage " + args.engine_name, + ErrorCodes::BAD_ARGUMENTS); + } + } + return StorageJoin::create( - args.data_path, args.table_name, - key_names, kind, strictness, + args.data_path, + args.table_name, + key_names, + join_use_nulls.value, + SizeLimits{max_rows_in_join.value, max_bytes_in_join.value, join_overflow_mode.value}, + kind, + strictness, args.columns); }); } +template +static const char * rawData(T & t) +{ + return reinterpret_cast(&t); +} +template +static size_t rawSize(T &) +{ + return sizeof(T); +} +template <> +const char * rawData(const StringRef & t) +{ + return t.data; +} +template <> +size_t rawSize(const StringRef & t) +{ + return t.size; +} + +class JoinBlockInputStream : public IProfilingBlockInputStream +{ +public: + JoinBlockInputStream(const Join & parent_, size_t max_block_size_, Block & sample_block_) + : parent(parent_), lock(parent.rwlock), max_block_size(max_block_size_), sample_block(sample_block_) + { + columns.resize(sample_block.columns()); + column_indices.resize(sample_block.columns()); + column_with_null.resize(sample_block.columns()); + for (size_t i = 0; i < sample_block.columns(); ++i) + { + auto & [_, type, name] = sample_block.getByPosition(i); + if (parent.sample_block_with_keys.has(name)) + { + key_pos = i; + column_with_null[i] = parent.sample_block_with_keys.getByName(name).type->isNullable(); + } + else + { + auto pos = parent.sample_block_with_columns_to_add.getPositionByName(name); + column_indices[i] = pos; + column_with_null[i] = !parent.sample_block_with_columns_to_add.getByPosition(pos).type->equals(*type); + } + } + } + + String getName() const override { return "Join"; } + + Block getHeader() const override { return sample_block; } + + +protected: + Block readImpl() override + { + if (parent.blocks.empty()) + return Block(); + + if (parent.strictness == ASTTableJoin::Strictness::Any) + return createBlock(parent.maps_any); + else if (parent.strictness == ASTTableJoin::Strictness::All) + return createBlock(parent.maps_all); + else + throw Exception("Logical error: unknown JOIN strictness (must be ANY or ALL)", ErrorCodes::LOGICAL_ERROR); + } + +private: + const Join & parent; + std::shared_lock lock; + size_t max_block_size; + Block sample_block; + + ColumnNumbers column_indices; + std::vector column_with_null; + std::optional key_pos; + MutableColumns columns; + + std::unique_ptr> position; /// type erasure + + + template + Block createBlock(const Maps & maps) + { + for (size_t i = 0; i < sample_block.columns(); ++i) + { + const auto & src_col = sample_block.safeGetByPosition(i); + columns[i] = src_col.type->createColumn(); + if (column_with_null[i]) + { + if (key_pos == i) + { + // unwrap null key column + ColumnNullable & nullable_col = static_cast(*columns[i]); + columns[i] = nullable_col.getNestedColumnPtr()->assumeMutable(); + } + else + // wrap non key column with null + columns[i] = makeNullable(std::move(columns[i]))->assumeMutable(); + } + } + + size_t rows_added = 0; + + switch (parent.type) + { +#define M(TYPE) \ + case Join::Type::TYPE: \ + rows_added = fillColumns(*maps.TYPE); \ + break; + APPLY_FOR_JOIN_VARIANTS_LIMITED(M) +#undef M + + default: + throw Exception("Unknown JOIN keys variant for limited use", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); + } + + if (!rows_added) + return {}; + + Block res = sample_block.cloneEmpty(); + for (size_t i = 0; i < columns.size(); ++i) + if (column_with_null[i]) + { + if (key_pos == i) + res.getByPosition(i).column = makeNullable(std::move(columns[i]))->assumeMutable(); + else + { + const ColumnNullable & nullable_col = static_cast(*columns[i]); + res.getByPosition(i).column = nullable_col.getNestedColumnPtr(); + } + } + else + res.getByPosition(i).column = std::move(columns[i]); + + return res; + } + + + template + size_t fillColumns(const Map & map) + { + size_t rows_added = 0; + + if (!position) + position = decltype(position)( + static_cast(new typename Map::const_iterator(map.begin())), + [](void * ptr) { delete reinterpret_cast(ptr); }); + + auto & it = *reinterpret_cast(position.get()); + auto end = map.end(); + + for (; it != end; ++it) + { + if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) + { + for (size_t j = 0; j < columns.size(); ++j) + if (j == key_pos) + columns[j]->insertData(rawData(it->first), rawSize(it->first)); + else + columns[j]->insertFrom(*it->second.block->getByPosition(column_indices[j]).column.get(), it->second.row_num); + ++rows_added; + } + else + for (auto current = &static_cast(it->second); current != nullptr; + current = current->next) + { + for (size_t j = 0; j < columns.size(); ++j) + if (j == key_pos) + columns[j]->insertData(rawData(it->first), rawSize(it->first)); + else + columns[j]->insertFrom(*current->block->getByPosition(column_indices[j]).column.get(), current->row_num); + ++rows_added; + } + + if (rows_added >= max_block_size) + { + ++it; + break; + } + } + + return rows_added; + } +}; + + +// TODO: multiple stream read and index read +BlockInputStreams StorageJoin::read( + const Names & column_names, + const SelectQueryInfo & /*query_info*/, + const Context & /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + unsigned /*num_streams*/) +{ + check(column_names); + Block sample_block = getSampleBlockForColumns(column_names); + return {std::make_shared(*join, max_block_size, sample_block)}; +} + } diff --git a/dbms/src/Storages/StorageJoin.h b/dbms/src/Storages/StorageJoin.h index 34bda5cd8ac..25c5128a349 100644 --- a/dbms/src/Storages/StorageJoin.h +++ b/dbms/src/Storages/StorageJoin.h @@ -33,8 +33,19 @@ public: /// Verify that the data structure is suitable for implementing this type of JOIN. void assertCompatible(ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) const; + BlockInputStreams read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + private: + Block sample_block; const Names & key_names; + bool use_nulls; + SizeLimits limits; ASTTableJoin::Kind kind; /// LEFT | INNER ... ASTTableJoin::Strictness strictness; /// ANY | ALL @@ -48,6 +59,8 @@ protected: const String & path_, const String & name_, const Names & key_names_, + bool use_nulls_, + SizeLimits limits_, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, const ColumnsDescription & columns_); }; diff --git a/dbms/tests/queries/0_stateless/00800_versatile_storage_join.reference b/dbms/tests/queries/0_stateless/00800_versatile_storage_join.reference new file mode 100644 index 00000000000..1fa9ac74e57 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_versatile_storage_join.reference @@ -0,0 +1,19 @@ +--------read-------- +def [1,2] 2 +abc [0] 1 +def [1,2] 2 +abc [0] 1 +def [1,2] 2 +abc [0] 1 +def [1,2] 2 +abc [0] 1 +--------joinGet-------- + +abc +def + +\N +abc +def + +[0] 1 diff --git a/dbms/tests/queries/0_stateless/00800_versatile_storage_join.sql b/dbms/tests/queries/0_stateless/00800_versatile_storage_join.sql new file mode 100644 index 00000000000..80f7616766c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_versatile_storage_join.sql @@ -0,0 +1,51 @@ +DROP TABLE IF EXISTS test.join_any_inner; +DROP TABLE IF EXISTS test.join_any_left; +DROP TABLE IF EXISTS test.join_any_left_null; +DROP TABLE IF EXISTS test.join_all_inner; +DROP TABLE IF EXISTS test.join_all_left; +DROP TABLE IF EXISTS test.join_string_key; + +CREATE TABLE test.join_any_inner (s String, x Array(UInt8), k UInt64) ENGINE = Join(ANY, INNER, k); +CREATE TABLE test.join_any_left (s String, x Array(UInt8), k UInt64) ENGINE = Join(ANY, LEFT, k); +CREATE TABLE test.join_all_inner (s String, x Array(UInt8), k UInt64) ENGINE = Join(ALL, INNER, k); +CREATE TABLE test.join_all_left (s String, x Array(UInt8), k UInt64) ENGINE = Join(ALL, LEFT, k); + +USE test; + +INSERT INTO test.join_any_inner VALUES ('abc', [0], 1), ('def', [1, 2], 2); +INSERT INTO test.join_any_left VALUES ('abc', [0], 1), ('def', [1, 2], 2); +INSERT INTO test.join_all_inner VALUES ('abc', [0], 1), ('def', [1, 2], 2); +INSERT INTO test.join_all_left VALUES ('abc', [0], 1), ('def', [1, 2], 2); + +-- read from StorageJoin + +SELECT '--------read--------'; +SELECT * from test.join_any_inner; +SELECT * from test.join_any_left; +SELECT * from test.join_all_inner; +SELECT * from test.join_all_left; + +-- create StorageJoin tables with customized settings + +CREATE TABLE test.join_any_left_null (s String, k UInt64) ENGINE = Join(ANY, LEFT, k) SETTINGS join_use_nulls = 1; +INSERT INTO test.join_any_left_null VALUES ('abc', 1), ('def', 2); + +-- joinGet +SELECT '--------joinGet--------'; +SELECT joinGet('join_any_left', 's', number) FROM numbers(3); +SELECT ''; +SELECT joinGet('join_any_left_null', 's', number) FROM numbers(3); +SELECT ''; + +CREATE TABLE test.join_string_key (s String, x Array(UInt8), k UInt64) ENGINE = Join(ANY, LEFT, s); +INSERT INTO test.join_string_key VALUES ('abc', [0], 1), ('def', [1, 2], 2); +SELECT joinGet('join_string_key', 'x', 'abc'), joinGet('join_string_key', 'k', 'abc'); + +USE default; + +DROP TABLE test.join_any_inner; +DROP TABLE test.join_any_left; +DROP TABLE test.join_any_left_null; +DROP TABLE test.join_all_inner; +DROP TABLE test.join_all_left; +DROP TABLE test.join_string_key; From 933c055104dd224aca2ec31bb3c5253494de58ab Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 4 Dec 2018 17:09:47 +0300 Subject: [PATCH 50/90] CLICKHOUSE-3578 review proress --- .../src/DataStreams/AddingDefaultsBlockInputStream.cpp | 10 ++++------ dbms/src/Formats/BinaryRowInputStream.cpp | 2 +- dbms/src/Formats/BinaryRowInputStream.h | 2 +- .../src/Formats/BlockInputStreamFromRowInputStream.cpp | 8 ++++---- dbms/src/Formats/BlockInputStreamFromRowInputStream.h | 4 ++-- dbms/src/Formats/CSVRowInputStream.cpp | 2 +- dbms/src/Formats/CSVRowInputStream.h | 2 +- dbms/src/Formats/CapnProtoRowInputStream.cpp | 2 +- dbms/src/Formats/CapnProtoRowInputStream.h | 2 +- dbms/src/Formats/IRowInputStream.h | 5 ++--- dbms/src/Formats/JSONEachRowRowInputStream.cpp | 7 +------ dbms/src/Formats/JSONEachRowRowInputStream.h | 3 +-- dbms/src/Formats/TSKVRowInputStream.cpp | 2 +- dbms/src/Formats/TSKVRowInputStream.h | 2 +- dbms/src/Formats/TabSeparatedRowInputStream.cpp | 2 +- dbms/src/Formats/TabSeparatedRowInputStream.h | 2 +- dbms/src/Formats/ValuesRowInputStream.cpp | 2 +- dbms/src/Formats/ValuesRowInputStream.h | 2 +- dbms/src/Interpreters/evaluateMissingDefaults.cpp | 5 ++--- dbms/src/Interpreters/evaluateMissingDefaults.h | 3 ++- dbms/src/Storages/ColumnDefault.cpp | 2 -- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- 22 files changed, 32 insertions(+), 42 deletions(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 89202ea9ecd..9ee6b15e1b9 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -51,17 +51,15 @@ Block AddingDefaultsBlockInputStream::readImpl() if (column_defaults.empty()) return res; - const BlockMissingValues & delayed_defaults = children.back()->getMissingValues(); - if (delayed_defaults.empty()) + const BlockMissingValues & block_missing_values = children.back()->getMissingValues(); + if (block_missing_values.empty()) return res; Block evaluate_block{res}; + /// remove columns for recalculation for (const auto & column : column_defaults) - { - /// column_defaults contain aliases that could be ommited in evaluate_block if (evaluate_block.has(column.first)) evaluate_block.erase(column.first); - } evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), column_defaults, context, false); @@ -76,7 +74,7 @@ Block AddingDefaultsBlockInputStream::readImpl() size_t block_column_position = res.getPositionByName(column_name); ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position); - const auto & defaults_mask = delayed_defaults.getDefaultsBitmask(block_column_position); + const auto & defaults_mask = block_missing_values.getDefaultsBitmask(block_column_position); checkCalculated(column_read, column_def, defaults_mask.size()); diff --git a/dbms/src/Formats/BinaryRowInputStream.cpp b/dbms/src/Formats/BinaryRowInputStream.cpp index eea6dce12d5..7c059782e6d 100644 --- a/dbms/src/Formats/BinaryRowInputStream.cpp +++ b/dbms/src/Formats/BinaryRowInputStream.cpp @@ -14,7 +14,7 @@ BinaryRowInputStream::BinaryRowInputStream(ReadBuffer & istr_, const Block & hea } -bool BinaryRowInputStream::read(MutableColumns & columns) +bool BinaryRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/BinaryRowInputStream.h b/dbms/src/Formats/BinaryRowInputStream.h index 6e4d2b9c2ce..f70e081f097 100644 --- a/dbms/src/Formats/BinaryRowInputStream.h +++ b/dbms/src/Formats/BinaryRowInputStream.h @@ -17,7 +17,7 @@ class BinaryRowInputStream : public IRowInputStream public: BinaryRowInputStream(ReadBuffer & istr_, const Block & header_); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; private: ReadBuffer & istr; diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 1440375eb14..89dc575dcb9 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -53,7 +53,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() { size_t num_columns = sample.columns(); MutableColumns columns = sample.cloneEmptyColumns(); - delayed_defaults.clear(); + block_missing_values.clear(); try { @@ -62,8 +62,8 @@ Block BlockInputStreamFromRowInputStream::readImpl() try { ++total_rows; - RowReadExtention info; - if (!row_input->extendedRead(columns, info)) + RowReadExtension info; + if (!row_input->read(columns, info)) break; for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) @@ -73,7 +73,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() size_t column_size = columns[column_idx]->size(); if (column_size == 0) throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); - delayed_defaults.setBit(column_idx, column_size - 1); + block_missing_values.setBit(column_idx, column_size - 1); } } } diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h index 65d6fa04469..fcbec628e09 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h @@ -33,7 +33,7 @@ public: Block getHeader() const override { return sample; } - const BlockMissingValues & getMissingValues() const override { return delayed_defaults; } + const BlockMissingValues & getMissingValues() const override { return block_missing_values; } protected: Block readImpl() override; @@ -42,7 +42,7 @@ private: RowInputStreamPtr row_input; Block sample; size_t max_block_size; - BlockMissingValues delayed_defaults; + BlockMissingValues block_missing_values; UInt64 allow_errors_num; Float64 allow_errors_ratio; diff --git a/dbms/src/Formats/CSVRowInputStream.cpp b/dbms/src/Formats/CSVRowInputStream.cpp index ca8f9514312..d7f8c6eea28 100644 --- a/dbms/src/Formats/CSVRowInputStream.cpp +++ b/dbms/src/Formats/CSVRowInputStream.cpp @@ -111,7 +111,7 @@ void CSVRowInputStream::readPrefix() } -bool CSVRowInputStream::read(MutableColumns & columns) +bool CSVRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/CSVRowInputStream.h b/dbms/src/Formats/CSVRowInputStream.h index d7f8d96867f..c04bda57008 100644 --- a/dbms/src/Formats/CSVRowInputStream.h +++ b/dbms/src/Formats/CSVRowInputStream.h @@ -21,7 +21,7 @@ public: */ CSVRowInputStream(ReadBuffer & istr_, const Block & header_, bool with_names_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; void readPrefix() override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 8ed9d882a2e..d8d87f082ed 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -193,7 +193,7 @@ CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block } -bool CapnProtoRowInputStream::read(MutableColumns & columns) +bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/CapnProtoRowInputStream.h b/dbms/src/Formats/CapnProtoRowInputStream.h index a7fcce49143..a6186d7488d 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.h +++ b/dbms/src/Formats/CapnProtoRowInputStream.h @@ -34,7 +34,7 @@ public: */ CapnProtoRowInputStream(ReadBuffer & istr_, const Block & header_, const String & schema_dir, const String & schema_file, const String & root_object); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; private: // Build a traversal plan from a sorted list of fields diff --git a/dbms/src/Formats/IRowInputStream.h b/dbms/src/Formats/IRowInputStream.h index ccfbdeb2b40..65b98d2524f 100644 --- a/dbms/src/Formats/IRowInputStream.h +++ b/dbms/src/Formats/IRowInputStream.h @@ -11,7 +11,7 @@ namespace DB { /// A way to set some extentions to read and return extra information too. IRowInputStream.extendedRead() output. -struct RowReadExtention +struct RowReadExtension { /// IRowInputStream.extendedRead() output value. /// Contains one bit per column in resently read row. IRowInputStream could leave it empty, or partialy set. @@ -27,8 +27,7 @@ public: /** Read next row and append it to the columns. * If no more rows - return false. */ - virtual bool read(MutableColumns & columns) = 0; - virtual bool extendedRead(MutableColumns & columns, RowReadExtention & ) { return read(columns); } + virtual bool read(MutableColumns & columns, RowReadExtension & extra) = 0; virtual void readPrefix() {} /// delimiter before begin of result virtual void readSuffix() {} /// delimiter after end of result diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.cpp b/dbms/src/Formats/JSONEachRowRowInputStream.cpp index cd5ec22ea32..ad67b7a8101 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.cpp +++ b/dbms/src/Formats/JSONEachRowRowInputStream.cpp @@ -209,13 +209,8 @@ void JSONEachRowRowInputStream::readNestedData(const String & name, MutableColum nested_prefix_length = 0; } -bool JSONEachRowRowInputStream::read(MutableColumns & columns) -{ - RowReadExtention tmp; - return extendedRead(columns, tmp); -} -bool JSONEachRowRowInputStream::extendedRead(MutableColumns & columns, RowReadExtention & ext) +bool JSONEachRowRowInputStream::read(MutableColumns & columns, RowReadExtension & ext) { skipWhitespaceIfAny(istr); diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.h b/dbms/src/Formats/JSONEachRowRowInputStream.h index 4a64bf30829..4a915d6aa9d 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.h +++ b/dbms/src/Formats/JSONEachRowRowInputStream.h @@ -22,8 +22,7 @@ class JSONEachRowRowInputStream : public IRowInputStream public: JSONEachRowRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; - bool extendedRead(MutableColumns & columns, RowReadExtention & ext) override; + bool read(MutableColumns & columns, RowReadExtension & ext) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Formats/TSKVRowInputStream.cpp b/dbms/src/Formats/TSKVRowInputStream.cpp index 56f460dafac..837dfb5afaa 100644 --- a/dbms/src/Formats/TSKVRowInputStream.cpp +++ b/dbms/src/Formats/TSKVRowInputStream.cpp @@ -88,7 +88,7 @@ static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp) } -bool TSKVRowInputStream::read(MutableColumns & columns) +bool TSKVRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/TSKVRowInputStream.h b/dbms/src/Formats/TSKVRowInputStream.h index b05686dc37a..155322e90c1 100644 --- a/dbms/src/Formats/TSKVRowInputStream.h +++ b/dbms/src/Formats/TSKVRowInputStream.h @@ -25,7 +25,7 @@ class TSKVRowInputStream : public IRowInputStream public: TSKVRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Formats/TabSeparatedRowInputStream.cpp b/dbms/src/Formats/TabSeparatedRowInputStream.cpp index b843c14bd66..181f29113db 100644 --- a/dbms/src/Formats/TabSeparatedRowInputStream.cpp +++ b/dbms/src/Formats/TabSeparatedRowInputStream.cpp @@ -75,7 +75,7 @@ static void checkForCarriageReturn(ReadBuffer & istr) } -bool TabSeparatedRowInputStream::read(MutableColumns & columns) +bool TabSeparatedRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/TabSeparatedRowInputStream.h b/dbms/src/Formats/TabSeparatedRowInputStream.h index e1c51251009..2435d58d703 100644 --- a/dbms/src/Formats/TabSeparatedRowInputStream.h +++ b/dbms/src/Formats/TabSeparatedRowInputStream.h @@ -22,7 +22,7 @@ public: TabSeparatedRowInputStream( ReadBuffer & istr_, const Block & header_, bool with_names_, bool with_types_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; void readPrefix() override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Formats/ValuesRowInputStream.cpp b/dbms/src/Formats/ValuesRowInputStream.cpp index 2dd27ce8df7..eaa8181d185 100644 --- a/dbms/src/Formats/ValuesRowInputStream.cpp +++ b/dbms/src/Formats/ValuesRowInputStream.cpp @@ -37,7 +37,7 @@ ValuesRowInputStream::ValuesRowInputStream(ReadBuffer & istr_, const Block & hea } -bool ValuesRowInputStream::read(MutableColumns & columns) +bool ValuesRowInputStream::read(MutableColumns & columns, RowReadExtension &) { size_t num_columns = columns.size(); diff --git a/dbms/src/Formats/ValuesRowInputStream.h b/dbms/src/Formats/ValuesRowInputStream.h index 49775861746..372619d4e27 100644 --- a/dbms/src/Formats/ValuesRowInputStream.h +++ b/dbms/src/Formats/ValuesRowInputStream.h @@ -23,7 +23,7 @@ public: */ ValuesRowInputStream(ReadBuffer & istr_, const Block & header_, const Context & context_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; private: ReadBuffer & istr; diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 40b75c0b673..050078b7af4 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -37,7 +37,7 @@ static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & requi void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const ColumnDefaults & column_defaults, - const Context & context, bool with_block_copy) + const Context & context, bool save_unneded_columns) { if (column_defaults.empty()) return; @@ -46,7 +46,7 @@ void evaluateMissingDefaults(Block & block, if (!default_expr_list) return; - if (!with_block_copy) + if (!save_unneded_columns) { auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); @@ -56,7 +56,6 @@ void evaluateMissingDefaults(Block & block, /** ExpressionAnalyzer eliminates "unused" columns, in order to ensure their safety * we are going to operate on a copy instead of the original block */ Block copy_block{block}; - /// evaluate default values for defaulted columns auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(copy_block); diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.h b/dbms/src/Interpreters/evaluateMissingDefaults.h index 71f6fab9753..c65cb1680a2 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.h +++ b/dbms/src/Interpreters/evaluateMissingDefaults.h @@ -12,9 +12,10 @@ class Context; class NamesAndTypesList; struct ColumnDefault; +/// void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const std::unordered_map & column_defaults, - const Context & context, bool with_block_copy = true); + const Context & context, bool save_unneded_columns = true); } diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index cd79c5ca093..2a89cda5caf 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -1,5 +1,3 @@ -#include - #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 524b8bfe8bf..0334e79f541 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2173,7 +2173,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context ValuesRowInputStream input_stream(buf, partition_key_sample, context, format_settings); MutableColumns columns = partition_key_sample.cloneEmptyColumns(); - if (!input_stream.read(columns)) + RowReadExtension unused; + if (!input_stream.read(columns, unused)) throw Exception( "Could not parse partition value: `" + partition_ast.fields_str.toString() + "`", ErrorCodes::INVALID_PARTITION_VALUE); From a71d03737c18efb89fef49a34275a844aef5e6b1 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 4 Dec 2018 23:03:04 +0300 Subject: [PATCH 51/90] send defaults via serialized ColumnsDescription CLICKHOUSE-3578 --- dbms/programs/client/Client.cpp | 24 ++++--- dbms/programs/server/TCPHandler.cpp | 23 +++++-- dbms/programs/server/TCPHandler.h | 1 + dbms/src/Client/Connection.cpp | 14 ++++ dbms/src/Client/Connection.h | 4 ++ dbms/src/Core/Protocol.h | 20 +++++- .../InputStreamFromASTInsertQuery.cpp | 8 +-- .../Interpreters/evaluateMissingDefaults.cpp | 4 +- .../Interpreters/evaluateMissingDefaults.h | 3 +- dbms/src/Storages/ColumnDefault.cpp | 69 ------------------- dbms/src/Storages/ColumnDefault.h | 9 --- dbms/src/Storages/ColumnsDescription.cpp | 18 +++++ dbms/src/Storages/ColumnsDescription.h | 1 + 13 files changed, 93 insertions(+), 105 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index e6064ec8860..407612257a7 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -60,7 +60,7 @@ #include #include #include -#include +#include #if USE_READLINE #include "Suggest.h" // Y_IGNORE @@ -893,11 +893,12 @@ private: /// Receive description of table structure. Block sample; - if (receiveSampleBlock(sample)) + ColumnsDescription columns_description; + if (receiveSampleBlock(sample, columns_description)) { /// If structure was received (thus, server has not thrown an exception), /// send our data with that structure. - sendData(sample); + sendData(sample, columns_description); receiveEndOfQuery(); } } @@ -935,7 +936,7 @@ private: } - void sendData(Block & sample) + void sendData(Block & sample, const ColumnsDescription & columns_description) { /// If INSERT data must be sent. const ASTInsertQuery * parsed_insert_query = typeid_cast(&*parsed_query); @@ -946,19 +947,19 @@ private: { /// Send data contained in the query. ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); - sendDataFrom(data_in, sample); + sendDataFrom(data_in, sample, columns_description); } else if (!is_interactive) { /// Send data read from stdin. - sendDataFrom(std_in, sample); + sendDataFrom(std_in, sample, columns_description); } else throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); } - void sendDataFrom(ReadBuffer & buf, Block & sample) + void sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDescription & columns_description) { String current_format = insert_format; @@ -970,9 +971,10 @@ private: BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - auto column_defaults = ColumnDefaultsHelper::extract(sample); + const auto & column_defaults = columns_description.defaults; if (!column_defaults.empty()) block_input = std::make_shared(block_input, column_defaults, context); + BlockInputStreamPtr async_block_input = std::make_shared(block_input); async_block_input->readPrefix(); @@ -1110,7 +1112,7 @@ private: /// Receive the block that serves as an example of the structure of table where data will be inserted. - bool receiveSampleBlock(Block & out) + bool receiveSampleBlock(Block & out, ColumnsDescription & columns_description) { while (true) { @@ -1131,6 +1133,10 @@ private: onLogData(packet.block); break; + case Protocol::Server::TableColumns: + columns_description = ColumnsDescription::parse(packet.multistring_message[1]); + return receiveSampleBlock(out, columns_description); + default: throw NetException("Unexpected packet from server (expected Data, Exception or Log, got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index d1ccc3e788f..efd4ffc55d6 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -360,17 +360,16 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) */ state.io.out->writePrefix(); - /// Send block to the client - table structure. - Block block = state.io.out->getHeader(); - - /// attach column defaults to sample block (allow client to attach defaults for ommited source values) + /// Send ColumnsDescription for insertion table if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA) { - auto db_and_table = query_context.getInsertionTable(); - ColumnDefaults column_defaults = ColumnDefaultsHelper::loadFromContext(query_context, db_and_table.first, db_and_table.second); - ColumnDefaultsHelper::attach(column_defaults, block); + const auto & db_and_table = query_context.getInsertionTable(); + if (auto * columns = ColumnsDescription::loadFromContext(query_context, db_and_table.first, db_and_table.second)) + sendTableColumns(*columns); } + /// Send block to the client - table structure. + Block block = state.io.out->getHeader(); sendData(block); readData(global_settings); @@ -853,6 +852,16 @@ void TCPHandler::sendLogData(const Block & block) out->next(); } +void TCPHandler::sendTableColumns(const ColumnsDescription & columns) +{ + writeVarUInt(Protocol::Server::TableColumns, *out); + + /// Send external table name (empty name is the main table) + writeStringBinary("", *out); + writeStringBinary(columns.toString(), *out); + + out->next(); +} void TCPHandler::sendException(const Exception & e, bool with_stack_trace) { diff --git a/dbms/programs/server/TCPHandler.h b/dbms/programs/server/TCPHandler.h index af422921f07..14189da6176 100644 --- a/dbms/programs/server/TCPHandler.h +++ b/dbms/programs/server/TCPHandler.h @@ -144,6 +144,7 @@ private: void sendHello(); void sendData(const Block & block); /// Write a block to the network. void sendLogData(const Block & block); + void sendTableColumns(const ColumnsDescription & columns); void sendException(const Exception & e, bool with_stack_trace); void sendProgress(); void sendLogs(); diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index ce6246fba3a..50c5ca2cebc 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -604,6 +604,10 @@ Connection::Packet Connection::receivePacket() res.block = receiveLogData(); return res; + case Protocol::Server::TableColumns: + res.multistring_message = receiveMultistringMessage(res.type); + return res; + case Protocol::Server::EndOfStream: return res; @@ -713,6 +717,16 @@ std::unique_ptr Connection::receiveException() } +std::vector Connection::receiveMultistringMessage(UInt64 msg_type) +{ + size_t num = Protocol::Server::wordsInMessage(msg_type); + std::vector out(num); + for (size_t i = 0; i < num; ++i) + readStringBinary(out[i], *in); + return out; +} + + Progress Connection::receiveProgress() { //LOG_TRACE(log_wrapper.get(), "Receiving progress"); diff --git a/dbms/src/Client/Connection.h b/dbms/src/Client/Connection.h index d8229fc3463..27b7d6bd4d8 100644 --- a/dbms/src/Client/Connection.h +++ b/dbms/src/Client/Connection.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -96,6 +98,7 @@ public: Block block; std::unique_ptr exception; + std::vector multistring_message; Progress progress; BlockStreamProfileInfo profile_info; @@ -259,6 +262,7 @@ private: Block receiveLogData(); Block receiveDataImpl(BlockInputStreamPtr & stream); + std::vector receiveMultistringMessage(UInt64 msg_type); std::unique_ptr receiveException(); Progress receiveProgress(); BlockStreamProfileInfo receiveProfileInfo(); diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index 27df4341de9..28f60cce901 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -69,7 +69,8 @@ namespace Protocol Totals = 7, /// A block with totals (compressed or not). Extremes = 8, /// A block with minimums and maximums (compressed or not). TablesStatusResponse = 9, /// A response to TablesStatus request. - Log = 10 /// System logs of the query execution + Log = 10, /// System logs of the query execution + TableColumns = 11, /// Columns' description for default values calculation }; /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 @@ -78,11 +79,24 @@ namespace Protocol /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values inline const char * toString(UInt64 packet) { - static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", "Extremes", "TablesStatusResponse", "Log" }; - return packet < 11 + static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", + "Extremes", "TablesStatusResponse", "Log", "TableColumns" }; + return packet < 12 ? data[packet] : "Unknown packet"; } + + inline size_t wordsInMessage(UInt64 msg_type) + { + switch (msg_type) + { + case TableColumns: + return 2; + default: + break; + } + return 0; + } } /// Packet types that client transmits. diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index d232fee96ce..b78b7a59db6 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { @@ -46,9 +46,9 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); - auto column_defaults = ColumnDefaultsHelper::loadFromContext(context, ast_insert_query->database, ast_insert_query->table); - if (!column_defaults.empty()) - res_stream = std::make_shared(res_stream, column_defaults, context); + auto columns_description = ColumnsDescription::loadFromContext(context, ast_insert_query->database, ast_insert_query->table); + if (columns_description && !columns_description->defaults.empty()) + res_stream = std::make_shared(res_stream, columns_description->defaults, context); } } diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 050078b7af4..0b330fb00cc 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -37,7 +37,7 @@ static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & requi void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const ColumnDefaults & column_defaults, - const Context & context, bool save_unneded_columns) + const Context & context, bool save_unneeded_columns) { if (column_defaults.empty()) return; @@ -46,7 +46,7 @@ void evaluateMissingDefaults(Block & block, if (!default_expr_list) return; - if (!save_unneded_columns) + if (!save_unneeded_columns) { auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.h b/dbms/src/Interpreters/evaluateMissingDefaults.h index c65cb1680a2..320fb35c9cb 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.h +++ b/dbms/src/Interpreters/evaluateMissingDefaults.h @@ -12,10 +12,9 @@ class Context; class NamesAndTypesList; struct ColumnDefault; -/// void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const std::unordered_map & column_defaults, - const Context & context, bool save_unneded_columns = true); + const Context & context, bool save_unneeded_columns = true); } diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index 2a89cda5caf..46995e307f3 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -54,73 +54,4 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs) return lhs.kind == rhs.kind && queryToString(lhs.expression) == queryToString(rhs.expression); } -ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context, const String & database, const String & table) -{ - if (context.getSettingsRef().insert_sample_with_metadata) - { - if (!context.isTableExist(database, table)) - return {}; - - StoragePtr storage = context.getTable(database, table); - const ColumnsDescription & table_columns = storage->getColumns(); - return table_columns.defaults; - } - return {}; -} - -void ColumnDefaultsHelper::attach(const ColumnDefaults & column_defaults, Block & sample) -{ - if (column_defaults.empty()) - return; - - for (auto pr : column_defaults) - { - std::stringstream ss; - ss << *pr.second.expression; - - /// Serialize defaults to special columns names. - /// It looks better to send expression as a column data but sample block has 0 rows. - ColumnWithTypeAndName col; - col.type = std::make_shared(); - col.name = Block::mkSpecialColumnName(toString(pr.second.kind) + ' ' + pr.first + ' ' + ss.str()); - col.column = col.type->createColumnConst(sample.rows(), ""); - - sample.insert(std::move(col)); - } -} - -ColumnDefaults ColumnDefaultsHelper::extract(Block & sample) -{ - ParserTernaryOperatorExpression parser; - ColumnDefaults column_defaults; - std::set pos_to_erase; - - for (size_t i = 0; i < sample.columns(); ++i) - { - const ColumnWithTypeAndName & column_wtn = sample.safeGetByPosition(i); - - if (Block::isSpecialColumnName(column_wtn.name, AliasNames::DEFAULT) || - Block::isSpecialColumnName(column_wtn.name, AliasNames::MATERIALIZED) || - Block::isSpecialColumnName(column_wtn.name, AliasNames::ALIAS)) - { - String str_kind, column_name; - std::stringstream ss; - ss << column_wtn.name; - ss >> str_kind >> column_name; - size_t expression_pos = str_kind.size() + column_name.size() + 3; - StringRef expression(&column_wtn.name[expression_pos], column_wtn.name.size() - expression_pos); - - ColumnDefault def; - def.kind = columnDefaultKindFromString(str_kind); - def.expression = parseQuery(parser, expression.data, expression.size); - - column_defaults.emplace(column_name, def); - pos_to_erase.insert(i); - } - } - - sample.erase(pos_to_erase); - return column_defaults; -} - } diff --git a/dbms/src/Storages/ColumnDefault.h b/dbms/src/Storages/ColumnDefault.h index 00693b54ad5..292c0cf7495 100644 --- a/dbms/src/Storages/ColumnDefault.h +++ b/dbms/src/Storages/ColumnDefault.h @@ -36,13 +36,4 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs); using ColumnDefaults = std::unordered_map; -/// Static methods to manipulate column defaults -struct ColumnDefaultsHelper -{ - static void attach(const ColumnDefaults & column_defaults, Block & sample); - static ColumnDefaults extract(Block & sample); - - static ColumnDefaults loadFromContext(const Context & context, const String & database, const String & table); -}; - } diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index cb67d01a4ea..c37eaa2fc46 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include @@ -162,4 +164,20 @@ ColumnsDescription ColumnsDescription::parse(const String & str) return result; } +const ColumnsDescription * ColumnsDescription::loadFromContext(const Context & context, const String & db, const String & table) +{ + if (context.getSettingsRef().insert_sample_with_metadata) + { + auto db_and_table = context.getInsertionTable(); + + if (context.isTableExist(db, table)) + { + StoragePtr storage = context.getTable(db, table); + return &storage->getColumns(); + } + } + + return nullptr; +} + } diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 288d2712b3b..f06a9221dfd 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -57,6 +57,7 @@ struct ColumnsDescription String toString() const; static ColumnsDescription parse(const String & str); + static const ColumnsDescription * loadFromContext(const Context & context, const String & db, const String & table); }; } From dcb003bebff233fb85551ca2412dc33457713857 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 4 Dec 2018 23:10:43 +0300 Subject: [PATCH 52/90] fix for review: rename variable CLICKHOUSE-3578 --- dbms/src/Core/BlockInfo.cpp | 6 +++--- dbms/src/Core/BlockInfo.h | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/src/Core/BlockInfo.cpp b/dbms/src/Core/BlockInfo.cpp index 3c8d1ccef7c..aae9723d0ed 100644 --- a/dbms/src/Core/BlockInfo.cpp +++ b/dbms/src/Core/BlockInfo.cpp @@ -60,7 +60,7 @@ void BlockInfo::read(ReadBuffer & in) void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) { - RowsBitMask & mask = columns_defaults[column_idx]; + RowsBitMask & mask = rows_mask_by_column_id[column_idx]; mask.resize(row_idx + 1); mask[row_idx] = true; } @@ -68,8 +68,8 @@ void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const { static RowsBitMask none; - auto it = columns_defaults.find(column_idx); - if (it != columns_defaults.end()) + auto it = rows_mask_by_column_id.find(column_idx); + if (it != rows_mask_by_column_id.end()) return it->second; return none; } diff --git a/dbms/src/Core/BlockInfo.h b/dbms/src/Core/BlockInfo.h index 9e23de688af..32a09d8cf70 100644 --- a/dbms/src/Core/BlockInfo.h +++ b/dbms/src/Core/BlockInfo.h @@ -53,16 +53,16 @@ public: const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; void setBit(size_t column_idx, size_t row_idx); - bool empty() const { return columns_defaults.empty(); } - size_t size() const { return columns_defaults.size(); } - void clear() { columns_defaults.clear(); } + bool empty() const { return rows_mask_by_column_id.empty(); } + size_t size() const { return rows_mask_by_column_id.size(); } + void clear() { rows_mask_by_column_id.clear(); } private: using RowsMaskByColumnId = std::unordered_map; - /// If columns_defaults[column_id][row_id] is true related value in Block should be replaced with column default. + /// If rows_mask_by_column_id[column_id][row_id] is true related value in Block should be replaced with column default. /// It could contain less columns and rows then related block. - RowsMaskByColumnId columns_defaults; + RowsMaskByColumnId rows_mask_by_column_id; }; } From 1a7313eaa009f2bc149a7c18f87eb551478f103e Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 5 Dec 2018 15:27:21 +0300 Subject: [PATCH 53/90] resolve class name conflict --- ...lockInputStream.cpp => AddingMissedBlockInputStream.cpp} | 6 +++--- ...ultBlockInputStream.h => AddingMissedBlockInputStream.h} | 4 ++-- dbms/src/DataStreams/IBlockInputStream.h | 6 ++++++ dbms/src/Storages/StorageBuffer.cpp | 4 ++-- 4 files changed, 13 insertions(+), 7 deletions(-) rename dbms/src/DataStreams/{AddingDefaultBlockInputStream.cpp => AddingMissedBlockInputStream.cpp} (75%) rename dbms/src/DataStreams/{AddingDefaultBlockInputStream.h => AddingMissedBlockInputStream.h} (90%) diff --git a/dbms/src/DataStreams/AddingDefaultBlockInputStream.cpp b/dbms/src/DataStreams/AddingMissedBlockInputStream.cpp similarity index 75% rename from dbms/src/DataStreams/AddingDefaultBlockInputStream.cpp rename to dbms/src/DataStreams/AddingMissedBlockInputStream.cpp index 749eebda1a5..e32a7024824 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingMissedBlockInputStream.cpp @@ -1,11 +1,11 @@ -#include +#include #include namespace DB { -AddingDefaultBlockInputStream::AddingDefaultBlockInputStream( +AddingMissedBlockInputStream::AddingMissedBlockInputStream( const BlockInputStreamPtr & input_, const Block & header_, const ColumnDefaults & column_defaults_, @@ -16,7 +16,7 @@ AddingDefaultBlockInputStream::AddingDefaultBlockInputStream( children.emplace_back(input); } -Block AddingDefaultBlockInputStream::readImpl() +Block AddingMissedBlockInputStream::readImpl() { Block src = children.back()->read(); if (!src) diff --git a/dbms/src/DataStreams/AddingDefaultBlockInputStream.h b/dbms/src/DataStreams/AddingMissedBlockInputStream.h similarity index 90% rename from dbms/src/DataStreams/AddingDefaultBlockInputStream.h rename to dbms/src/DataStreams/AddingMissedBlockInputStream.h index c0afffbfc17..07b37a56d22 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockInputStream.h +++ b/dbms/src/DataStreams/AddingMissedBlockInputStream.h @@ -14,10 +14,10 @@ namespace DB * 3. Columns that materialized from other columns (materialized columns) * All three types of columns are materialized (not constants). */ -class AddingDefaultBlockInputStream : public IProfilingBlockInputStream +class AddingMissedBlockInputStream : public IProfilingBlockInputStream { public: - AddingDefaultBlockInputStream( + AddingMissedBlockInputStream( const BlockInputStreamPtr & input_, const Block & header_, const ColumnDefaults & column_defaults_, diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 3e7c59ff78f..c82761dc02b 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -63,6 +63,12 @@ public: */ virtual Block read() = 0; + virtual const BlockMissingValues & getMissingValues() const + { + static const BlockMissingValues none; + return none; + } + /** Read something before starting all data or after the end of all data. * In the `readSuffix` function, you can implement a finalization that can lead to an exception. * readPrefix() must be called before the first call to read(). diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 020824e81c3..b7707d37ec2 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -198,7 +198,7 @@ BlockInputStreams StorageBuffer::read( streams_from_dst = destination->read(columns_intersection, query_info, context, processed_stage, max_block_size, num_streams); for (auto & stream : streams_from_dst) { - stream = std::make_shared( + stream = std::make_shared( stream, header_after_adding_defaults, getColumns().defaults, context); stream = std::make_shared( context, stream, header, ConvertingBlockInputStream::MatchColumnsMode::Name); From 8500335ef5c8bbe08c27ddf3bb1405068bd4bf87 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 5 Dec 2018 15:49:15 +0300 Subject: [PATCH 54/90] cleanup unused code --- dbms/src/Core/Block.h | 11 ----------- dbms/src/Storages/ColumnDefault.cpp | 7 ------- 2 files changed, 18 deletions(-) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 9cddbb21cda..a3198a0fb74 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -34,9 +34,6 @@ private: Container data; IndexByName index_by_name; - /// Regular column can't start with ' ', so it's possible to attach some hidden columns with a prefix - constexpr static const char SPECIAL_COLUMN_PREFIX = ' '; - public: BlockInfo info; @@ -103,14 +100,6 @@ public: operator bool() const { return !data.empty(); } bool operator!() const { return data.empty(); } - static String mkSpecialColumnName(const String & col_name) { return String(1, SPECIAL_COLUMN_PREFIX) + col_name; } - static bool isSpecialColumnName(const String & col_name) { return !col_name.empty() && col_name[0] == SPECIAL_COLUMN_PREFIX; } - - static bool isSpecialColumnName(const String & col_name, const String & pattern) - { - return col_name.find(String(1, SPECIAL_COLUMN_PREFIX) + pattern) == 0; - } - /** Get a list of column names separated by commas. */ std::string dumpNames() const; diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index 55dbbbc5038..19ba69c2d94 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -1,12 +1,5 @@ #include -#include -#include #include -#include -#include -#include -#include -#include namespace { From ff8fb077a4833ada3561fefa6d7d320456d2c4aa Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 6 Dec 2018 18:29:55 +0300 Subject: [PATCH 55/90] InDepthNodeVisitor: extract AST traverse from visitor logic CLICKHOUSE-3996 --- dbms/src/Interpreters/InDepthNodeVisitor.h | 53 ++++++++++++ .../PredicateExpressionsOptimizer.cpp | 3 +- dbms/src/Interpreters/QueryAliasesVisitor.h | 1 - dbms/src/Interpreters/SyntaxAnalyzer.cpp | 3 +- .../TranslateQualifiedNamesVisitor.cpp | 83 ++++++++++++------- .../TranslateQualifiedNamesVisitor.h | 73 ++++++---------- 6 files changed, 135 insertions(+), 81 deletions(-) create mode 100644 dbms/src/Interpreters/InDepthNodeVisitor.h diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h new file mode 100644 index 00000000000..997013aff1f --- /dev/null +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/// Visits AST tree in depth, call fucntions for nodes according to Matcher type data. +/// You need to define Data, label, visit() and needChildVisit() in Matcher class. +template +class InDepthNodeVisitor +{ +public: + using Data = typename Matcher::Data; + + InDepthNodeVisitor(Data & data_, std::ostream * ostr_ = nullptr) + : data(data_), + visit_depth(0), + ostr(ostr_) + {} + + void visit(ASTPtr & ast) + { + DumpASTNode dump(*ast, ostr, visit_depth, Matcher::label); + + if constexpr (!_topToBottom) + visitChildren(ast); + + auto additional_nodes = Matcher::visit(ast, data); + /// visit additional nodes (ex. only part of children) + for (ASTPtr & node : additional_nodes) + visit(node); + + if constexpr (_topToBottom) + visitChildren(ast); + } + +private: + MatcherData & data; + size_t visit_depth; + std::ostream * ostr; + + void visitChildren(ASTPtr & ast) + { + for (auto & child : ast->children) + if (Matcher::needChildVisit(ast, child)) + visit(child); + } +}; + +} diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index a29e161126d..84ca8b0a088 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -311,7 +311,8 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast std::unordered_map aliases; std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); - TranslateQualifiedNamesVisitor({}, tables).visit(ast); + TranslateQualifiedNamesMatcher::Data qn_visitor_data{{}, tables}; + TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); QueryAliasesVisitor query_aliases_visitor(aliases); query_aliases_visitor.visit(ast); QueryNormalizer(ast, aliases, settings, {}, {}).perform(); diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index fd385e8b774..cb8548bd3cf 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace DB { diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 4f6a4b5befe..c6a15058b5f 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -228,7 +228,8 @@ void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); LogAST log; - TranslateQualifiedNamesVisitor visitor(source_columns, tables, log.stream()); + TranslateQualifiedNamesMatcher::Data visitor_data{source_columns, tables}; + TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream()); visitor.visit(query); } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index cfae71cbac7..c169691121f 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -1,5 +1,6 @@ #include +#include #include #include @@ -15,10 +16,45 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_IDENTIFIER; + extern const int UNKNOWN_ELEMENT_IN_AST; + extern const int LOGICAL_ERROR; } -void TranslateQualifiedNamesVisitor::visit(ASTIdentifier & identifier, ASTPtr & ast, const DumpASTNode & dump) const +bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) { + /// Do not go to FROM, JOIN, subqueries. + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + return false; + + /// Processed nodes. Do not go into children. + if (typeid_cast(node.get()) || + typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + + /// ASTSelectQuery + others + return true; +} + +std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + return {}; +} + +std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier & identifier, ASTPtr & ast, Data & data) +{ + const NameSet & source_columns = data.source_columns; + const std::vector & tables = data.tables; + if (identifier.general()) { /// Select first table name with max number of qualifiers which can be stripped. @@ -38,23 +74,23 @@ void TranslateQualifiedNamesVisitor::visit(ASTIdentifier & identifier, ASTPtr & } if (max_num_qualifiers_to_strip) - { - dump.print(String("stripIdentifier ") + identifier.name, max_num_qualifiers_to_strip); stripIdentifier(ast, max_num_qualifiers_to_strip); - } /// In case if column from the joined table are in source columns, change it's name to qualified. if (best_table_pos && source_columns.count(ast->getColumnName())) { const DatabaseAndTableWithAlias & table = tables[best_table_pos]; table.makeQualifiedName(ast); - dump.print("makeQualifiedName", table.database + '.' + table.table + ' ' + ast->getColumnName()); } } + + return {}; } -void TranslateQualifiedNamesVisitor::visit(ASTQualifiedAsterisk &, ASTPtr & ast, const DumpASTNode &) const +std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data) { + const std::vector & tables = data.tables; + if (ast->children.size() != 1) throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); @@ -76,51 +112,40 @@ void TranslateQualifiedNamesVisitor::visit(ASTQualifiedAsterisk &, ASTPtr & ast, if (!table_names.database.empty() && db_and_table.database == table_names.database && db_and_table.table == table_names.table) - return; + return {}; } else if (num_components == 0) { if ((!table_names.table.empty() && db_and_table.table == table_names.table) || (!table_names.alias.empty() && db_and_table.table == table_names.alias)) - return; + return {}; } } throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER); } -void TranslateQualifiedNamesVisitor::visit(ASTTableJoin & join, ASTPtr &, const DumpASTNode &) const +std::vector TranslateQualifiedNamesMatcher::visit(const ASTTableJoin & join, const ASTPtr & , Data &) { /// Don't translate on_expression here in order to resolve equation parts later. + std::vector out; if (join.using_expression_list) - visit(join.using_expression_list); + out.push_back(join.using_expression_list); + return out; } -void TranslateQualifiedNamesVisitor::visit(ASTSelectQuery & select, ASTPtr & ast, const DumpASTNode &) const +std::vector TranslateQualifiedNamesMatcher::visit(const ASTSelectQuery & select, const ASTPtr & , Data &) { /// If the WHERE clause or HAVING consists of a single quailified column, the reference must be translated not only in children, /// but also in where_expression and having_expression. + std::vector out; if (select.prewhere_expression) - visit(select.prewhere_expression); + out.push_back(select.prewhere_expression); if (select.where_expression) - visit(select.where_expression); + out.push_back(select.where_expression); if (select.having_expression) - visit(select.having_expression); - - visitChildren(ast); -} - -void TranslateQualifiedNamesVisitor::visitChildren(ASTPtr & ast) const -{ - for (auto & child : ast->children) - { - /// Do not go to FROM, JOIN, subqueries. - if (!typeid_cast(child.get()) - && !typeid_cast(child.get())) - { - visit(child); - } - } + out.push_back(select.having_expression); + return out; } } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 0e7079545f3..bebf11d83e2 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -1,67 +1,42 @@ #pragma once -#include #include -#include -#include -#include -#include -#include -#include #include +#include namespace DB { -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. +class ASTIdentifier; +class ASTQualifiedAsterisk; +struct ASTTableJoin; +class ASTSelectQuery; -/// It visits nodes, find columns (general identifiers and asterisks) and translate their names according to tables' names. -class TranslateQualifiedNamesVisitor +/// Visit one node for names qualification. @sa InDepthNodeVisitor. +class TranslateQualifiedNamesMatcher { public: - TranslateQualifiedNamesVisitor(const NameSet & source_columns_, const std::vector & tables_, - std::ostream * ostr_ = nullptr) - : source_columns(source_columns_), - tables(tables_), - visit_depth(0), - ostr(ostr_) - {} - - void visit(ASTPtr & ast) const + struct Data { - if (!tryVisit(ast) && - !tryVisit(ast) && - !tryVisit(ast) && - !tryVisit(ast)) - visitChildren(ast); /// default: do nothing, visit children - } + const NameSet & source_columns; + const std::vector & tables; + }; + + static constexpr const char * label = __FILE__; + + static std::vector visit(ASTPtr & ast, Data & data); + static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - const NameSet & source_columns; - const std::vector & tables; - mutable size_t visit_depth; - std::ostream * ostr; - - void visit(ASTIdentifier & node, ASTPtr & ast, const DumpASTNode & dump) const; - void visit(ASTQualifiedAsterisk & node, ASTPtr & ast, const DumpASTNode & dump) const; - void visit(ASTTableJoin & node, ASTPtr & ast, const DumpASTNode & dump) const; - void visit(ASTSelectQuery & ast, ASTPtr &, const DumpASTNode & dump) const; - - void visitChildren(ASTPtr &) const; - - template - bool tryVisit(ASTPtr & ast) const - { - if (T * t = typeid_cast(ast.get())) - { - DumpASTNode dump(*ast, ostr, visit_depth, "translateQualifiedNames"); - visit(*t, ast, dump); - return true; - } - return false; - } + static std::vector visit(const ASTIdentifier & node, ASTPtr & ast, Data &); + static std::vector visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &); + static std::vector visit(const ASTTableJoin & node, const ASTPtr & ast, Data &); + static std::vector visit(const ASTSelectQuery & node, const ASTPtr & ast, Data &); }; +/// Visits AST for names qualification. +/// It finds columns (general identifiers and asterisks) and translate their names according to tables' names. +using TranslateQualifiedNamesVisitor = InDepthNodeVisitor; + } From 60dc8fcac245d15aac1b8b3b8edb8cef4b7deb14 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 6 Dec 2018 20:20:17 +0300 Subject: [PATCH 56/90] fixes for review CLICKHOUSE-3578 --- dbms/src/Client/Connection.cpp | 2 +- dbms/src/Core/Protocol.h | 2 +- dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp | 4 ++-- dbms/src/DataStreams/AddingMissedBlockInputStream.h | 2 +- dbms/src/Formats/IRowInputStream.h | 7 +++---- dbms/src/Storages/ColumnDefault.h | 3 --- dbms/src/Storages/ColumnsDescription.cpp | 2 -- 7 files changed, 8 insertions(+), 14 deletions(-) diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 82c76324a40..923c8179ca1 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -718,7 +718,7 @@ std::unique_ptr Connection::receiveException() std::vector Connection::receiveMultistringMessage(UInt64 msg_type) { - size_t num = Protocol::Server::wordsInMessage(msg_type); + size_t num = Protocol::Server::stringsInMessage(msg_type); std::vector out(num); for (size_t i = 0; i < num; ++i) readStringBinary(out[i], *in); diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index 28f60cce901..b50d018f9ce 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -86,7 +86,7 @@ namespace Protocol : "Unknown packet"; } - inline size_t wordsInMessage(UInt64 msg_type) + inline size_t stringsInMessage(UInt64 msg_type) { switch (msg_type) { diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 9ee6b15e1b9..6b959cbc05b 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -118,12 +118,12 @@ void AddingDefaultsBlockInputStream::checkCalculated(const ColumnWithTypeAndName size_t column_size = col_read.column->size(); if (column_size != col_defaults.column->size()) - throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception("Mismatch column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); if (column_size < defaults_needed) throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - if (col_read.type->getTypeId() != col_defaults.type->getTypeId()) + if (!col_read.type->equals(*col_defaults.type)) throw Exception("Mismach column types while adding defaults", ErrorCodes::TYPE_MISMATCH); } diff --git a/dbms/src/DataStreams/AddingMissedBlockInputStream.h b/dbms/src/DataStreams/AddingMissedBlockInputStream.h index 07b37a56d22..b3b98509645 100644 --- a/dbms/src/DataStreams/AddingMissedBlockInputStream.h +++ b/dbms/src/DataStreams/AddingMissedBlockInputStream.h @@ -23,7 +23,7 @@ public: const ColumnDefaults & column_defaults_, const Context & context_); - String getName() const override { return "AddingDefault"; } + String getName() const override { return "AddingMissed"; } Block getHeader() const override { return header; } private: diff --git a/dbms/src/Formats/IRowInputStream.h b/dbms/src/Formats/IRowInputStream.h index 65b98d2524f..045b2343e44 100644 --- a/dbms/src/Formats/IRowInputStream.h +++ b/dbms/src/Formats/IRowInputStream.h @@ -10,12 +10,11 @@ namespace DB { -/// A way to set some extentions to read and return extra information too. IRowInputStream.extendedRead() output. +/// Contains extra information about read data. struct RowReadExtension { - /// IRowInputStream.extendedRead() output value. - /// Contains one bit per column in resently read row. IRowInputStream could leave it empty, or partialy set. - /// It should contain true for columns that actually read from the source and false for defaults. + /// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise. + /// It's used to attach defaults for partially filled rows. std::vector read_columns; }; diff --git a/dbms/src/Storages/ColumnDefault.h b/dbms/src/Storages/ColumnDefault.h index 292c0cf7495..0667ce4ed57 100644 --- a/dbms/src/Storages/ColumnDefault.h +++ b/dbms/src/Storages/ColumnDefault.h @@ -9,9 +9,6 @@ namespace DB { -class Context; -class Block; - enum class ColumnDefaultKind { Default, diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index 049845b75ec..0926fef14e8 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -221,8 +221,6 @@ const ColumnsDescription * ColumnsDescription::loadFromContext(const Context & c { if (context.getSettingsRef().insert_sample_with_metadata) { - auto db_and_table = context.getInsertionTable(); - if (context.isTableExist(db, table)) { StoragePtr storage = context.getTable(db, table); From 6fad51d64207bdc82f2844af0ced0dfa09360550 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 6 Dec 2018 22:02:42 +0300 Subject: [PATCH 57/90] QueryAliasesMatcher via InDepthNodeVisitor (bottom to top) CLICKHOUSE-3996 --- dbms/src/Interpreters/InDepthNodeVisitor.h | 2 +- .../PredicateExpressionsOptimizer.cpp | 4 +- dbms/src/Interpreters/QueryAliasesVisitor.cpp | 84 +++++++++++-------- dbms/src/Interpreters/QueryAliasesVisitor.h | 55 ++++-------- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 4 +- 5 files changed, 69 insertions(+), 80 deletions(-) diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index 997013aff1f..4292da7fbdb 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -38,7 +38,7 @@ public: } private: - MatcherData & data; + Data & data; size_t visit_depth; std::ostream * ostr; diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 84ca8b0a088..8e95773f72c 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -313,8 +313,8 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast TranslateQualifiedNamesMatcher::Data qn_visitor_data{{}, tables}; TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); - QueryAliasesVisitor query_aliases_visitor(aliases); - query_aliases_visitor.visit(ast); + QueryAliasesMatcher::Data query_aliases_data{aliases}; + QueryAliasesVisitor(query_aliases_data).visit(ast); QueryNormalizer(ast, aliases, settings, {}, {}).perform(); for (const auto & projection_column : select_query->select_expression_list->children) diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.cpp b/dbms/src/Interpreters/QueryAliasesVisitor.cpp index 22818f96ffd..cd7baba0061 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.cpp +++ b/dbms/src/Interpreters/QueryAliasesVisitor.cpp @@ -1,5 +1,6 @@ #include #include + #include #include #include @@ -16,33 +17,62 @@ namespace ErrorCodes extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS; } -void QueryAliasesVisitor::visit(const ASTPtr & ast) const +static String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, const String & alias) { - /// Bottom-up traversal. We do not go into subqueries. - visitChildren(ast); + std::stringstream message; + message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":" << std::endl; + formatAST(*ast, message, false, true); + message << std::endl << "and" << std::endl; + formatAST(*prev_ast, message, false, true); + message << std::endl; + return message.str(); +} - if (!tryVisit(ast)) - { - DumpASTNode dump(*ast, ostr, visit_depth, "getQueryAliases"); - visitOther(ast); - } + +bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) +{ + /// Don't descent into table functions and subqueries and special case for ArrayJoin. + if (typeid_cast(node.get()) || + typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + return true; +} + +std::vector QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + + visitOther(ast, data); + return {}; } /// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them /// (skip the expression list itself and its children). -void QueryAliasesVisitor::visit(const ASTArrayJoin &, const ASTPtr & ast) const +std::vector QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data) { + visitOther(ast, data); + + /// @warning It breaks botom-to-top order (childs processed after node here), could lead to some effects. + /// It's possible to add ast back to result vec to save order. It will need two phase ASTArrayJoin visit (setting phase in data). + std::vector out; for (auto & child1 : ast->children) for (auto & child2 : child1->children) for (auto & child3 : child2->children) - visit(child3); + out.push_back(child3); + return out; } /// set unique aliases for all subqueries. this is needed, because: /// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect /// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names -void QueryAliasesVisitor::visit(ASTSubquery & subquery, const ASTPtr & ast) const +std::vector QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data) { + Aliases & aliases = data.aliases; + static std::atomic_uint64_t subquery_index = 0; if (subquery.alias.empty()) @@ -59,42 +89,22 @@ void QueryAliasesVisitor::visit(ASTSubquery & subquery, const ASTPtr & ast) cons aliases[alias] = ast; } else - visitOther(ast); + visitOther(ast, data); + return {}; } -void QueryAliasesVisitor::visitOther(const ASTPtr & ast) const +void QueryAliasesMatcher::visitOther(const ASTPtr & ast, Data & data) { + Aliases & aliases = data.aliases; + String alias = ast->tryGetAlias(); if (!alias.empty()) { if (aliases.count(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash()) - throw Exception(wrongAliasMessage(ast, alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); + throw Exception(wrongAliasMessage(ast, aliases[alias], alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); aliases[alias] = ast; } } -void QueryAliasesVisitor::visitChildren(const ASTPtr & ast) const -{ - for (auto & child : ast->children) - { - /// Don't descent into table functions and subqueries and special case for ArrayJoin. - if (!tryVisit(ast) && - !tryVisit(ast) && - !tryVisit(ast)) - visit(child); - } -} - -String QueryAliasesVisitor::wrongAliasMessage(const ASTPtr & ast, const String & alias) const -{ - std::stringstream message; - message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":" << std::endl; - formatAST(*ast, message, false, true); - message << std::endl << "and" << std::endl; - formatAST(*aliases[alias], message, false, true); - message << std::endl; - return message.str(); -} - } diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index cb8548bd3cf..aae211e6e83 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -1,8 +1,7 @@ #pragma once -#include -#include #include +#include namespace DB { @@ -14,47 +13,27 @@ struct ASTArrayJoin; using Aliases = std::unordered_map; -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. - -/// Visits AST nodes and collect their aliases in one map (with links to source nodes). -class QueryAliasesVisitor +/// Visits AST node to collect aliases. +class QueryAliasesMatcher { public: - QueryAliasesVisitor(Aliases & aliases_, std::ostream * ostr_ = nullptr) - : aliases(aliases_), - visit_depth(0), - ostr(ostr_) - {} + struct Data + { + Aliases & aliases; + }; - void visit(const ASTPtr & ast) const; + static constexpr const char * label = __FILE__; + + static std::vector visit(ASTPtr & ast, Data & data); + static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - Aliases & aliases; - mutable size_t visit_depth; - std::ostream * ostr; - - void visit(const ASTTableExpression &, const ASTPtr &) const {} - void visit(const ASTSelectWithUnionQuery &, const ASTPtr &) const {} - - void visit(ASTSubquery & subquery, const ASTPtr & ast) const; - void visit(const ASTArrayJoin &, const ASTPtr & ast) const; - void visitOther(const ASTPtr & ast) const; - void visitChildren(const ASTPtr & ast) const; - - template - bool tryVisit(const ASTPtr & ast) const - { - if (T * t = typeid_cast(ast.get())) - { - DumpASTNode dump(*ast, ostr, visit_depth, "getQueryAliases"); - visit(*t, ast); - return true; - } - return false; - } - - String wrongAliasMessage(const ASTPtr & ast, const String & alias) const; + static std::vector visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data); + static std::vector visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data); + static void visitOther(const ASTPtr & ast, Data & data); }; +/// Visits AST nodes and collect their aliases in one map (with links to source nodes). +using QueryAliasesVisitor = InDepthNodeVisitor; + } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index c6a15058b5f..bc7fce2a165 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -134,8 +134,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Creates a dictionary `aliases`: alias -> ASTPtr { LogAST log; - QueryAliasesVisitor query_aliases_visitor(result.aliases, log.stream()); - query_aliases_visitor.visit(query); + QueryAliasesMatcher::Data query_aliases_data{result.aliases}; + QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); } /// Common subexpression elimination. Rewrite rules. From 75af882cf31fecb067fbc4c8f2db798a0b588743 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 15:34:40 +0300 Subject: [PATCH 58/90] fix AST debug print with underline symbol --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 ---- dbms/src/Interpreters/QueryAliasesVisitor.h | 2 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 2 +- dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h | 2 +- dbms/src/Parsers/ASTAlterQuery.cpp | 4 ++-- dbms/src/Parsers/ASTAlterQuery.h | 6 +++--- dbms/src/Parsers/ASTAssignment.h | 2 +- dbms/src/Parsers/ASTAsterisk.h | 2 +- dbms/src/Parsers/ASTCheckQuery.h | 2 +- dbms/src/Parsers/ASTColumnDeclaration.h | 2 +- dbms/src/Parsers/ASTCreateQuery.h | 4 ++-- dbms/src/Parsers/ASTDropQuery.cpp | 8 ++++---- dbms/src/Parsers/ASTDropQuery.h | 2 +- dbms/src/Parsers/ASTExplainQuery.h | 2 +- dbms/src/Parsers/ASTExpressionList.h | 2 +- dbms/src/Parsers/ASTFunction.cpp | 4 ++-- dbms/src/Parsers/ASTFunction.h | 2 +- dbms/src/Parsers/ASTIdentifier.h | 2 +- dbms/src/Parsers/ASTInsertQuery.h | 2 +- dbms/src/Parsers/ASTKillQueryQuery.cpp | 4 ++-- dbms/src/Parsers/ASTKillQueryQuery.h | 2 +- dbms/src/Parsers/ASTLiteral.h | 2 +- dbms/src/Parsers/ASTNameTypePair.h | 2 +- dbms/src/Parsers/ASTOptimizeQuery.h | 6 ++++-- dbms/src/Parsers/ASTOrderByElement.h | 5 +---- dbms/src/Parsers/ASTPartition.cpp | 4 ++-- dbms/src/Parsers/ASTPartition.h | 2 +- dbms/src/Parsers/ASTQualifiedAsterisk.h | 2 +- dbms/src/Parsers/ASTQueryWithOutput.h | 2 +- dbms/src/Parsers/ASTQueryWithTableAndOutput.h | 2 +- dbms/src/Parsers/ASTRenameQuery.h | 2 +- dbms/src/Parsers/ASTSampleRatio.h | 2 +- dbms/src/Parsers/ASTSelectQuery.h | 2 +- dbms/src/Parsers/ASTSelectWithUnionQuery.h | 2 +- dbms/src/Parsers/ASTSetQuery.h | 2 +- dbms/src/Parsers/ASTShowTablesQuery.h | 2 +- dbms/src/Parsers/ASTSubquery.h | 2 +- dbms/src/Parsers/ASTSystemQuery.h | 2 +- dbms/src/Parsers/ASTTablesInSelectQuery.h | 10 +++++----- dbms/src/Parsers/ASTUseQuery.h | 2 +- dbms/src/Parsers/DumpASTNode.h | 3 +-- dbms/src/Parsers/IAST.h | 2 +- dbms/src/Parsers/TablePropertiesQueriesASTs.h | 2 +- 43 files changed, 59 insertions(+), 65 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 381770e6782..4826f38b9dc 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -31,8 +31,6 @@ #include #include #include -#include -#include #include #include @@ -62,11 +60,9 @@ #include #include -#include #include #include #include -#include #include namespace DB diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index aae211e6e83..2cd4f8f0c6b 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -22,7 +22,7 @@ public: Aliases & aliases; }; - static constexpr const char * label = __FILE__; + static constexpr const char * label = "QueryAliases"; static std::vector visit(ASTPtr & ast, Data & data); static bool needChildVisit(ASTPtr & node, const ASTPtr & child); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index bc7fce2a165..ae6d3ae0b4e 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -46,7 +46,7 @@ namespace { using LogAST = DebugASTLog; /// set to true to enable logs -using Aliases = std::unordered_map; +using Aliases = SyntaxAnalyzerResult::Aliases; /// Add columns from storage to source_columns list. void collectSourceColumns(ASTSelectQuery * select_query, const Context & context, diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index bebf11d83e2..3ce69dd0afa 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -23,7 +23,7 @@ public: const std::vector & tables; }; - static constexpr const char * label = __FILE__; + static constexpr const char * label = "TranslateQualifiedNames"; static std::vector visit(ASTPtr & ast, Data & data); static bool needChildVisit(ASTPtr & node, const ASTPtr & child); diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 863cb299bbb..3577346df0f 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -196,9 +196,9 @@ void ASTAlterCommandList::formatImpl(const FormatSettings & settings, FormatStat /** Get the text that identifies this element. */ -String ASTAlterQuery::getID() const +String ASTAlterQuery::getID(char delim) const { - return "AlterQuery_" + database + "_" + table; + return "AlterQuery" + (delim + database) + delim + table; } ASTPtr ASTAlterQuery::clone() const diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index b58b3a29b30..b73e1f38e2c 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -97,7 +97,7 @@ public: /// To distinguish REPLACE and ATTACH PARTITION partition FROM db.table bool replace = true; - String getID() const override { return "AlterCommand_" + std::to_string(static_cast(type)); } + String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast(type))); } ASTPtr clone() const override; @@ -116,7 +116,7 @@ public: children.push_back(command); } - String getID() const override { return "AlterCommandList"; } + String getID(char) const override { return "AlterCommandList"; } ASTPtr clone() const override; @@ -129,7 +129,7 @@ class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCl public: ASTAlterCommandList * command_list = nullptr; - String getID() const override; + String getID(char) const override; ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTAssignment.h b/dbms/src/Parsers/ASTAssignment.h index 18bf46c171c..6753711f9e3 100644 --- a/dbms/src/Parsers/ASTAssignment.h +++ b/dbms/src/Parsers/ASTAssignment.h @@ -12,7 +12,7 @@ public: String column_name; ASTPtr expression; - String getID() const override { return "Assignment_" + column_name; } + String getID(char delim) const override { return "Assignment" + (delim + column_name); } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTAsterisk.h b/dbms/src/Parsers/ASTAsterisk.h index 02a0f99895d..9a5a4efe267 100644 --- a/dbms/src/Parsers/ASTAsterisk.h +++ b/dbms/src/Parsers/ASTAsterisk.h @@ -9,7 +9,7 @@ namespace DB class ASTAsterisk : public IAST { public: - String getID() const override { return "Asterisk"; } + String getID(char) const override { return "Asterisk"; } ASTPtr clone() const override; void appendColumnName(WriteBuffer & ostr) const override; diff --git a/dbms/src/Parsers/ASTCheckQuery.h b/dbms/src/Parsers/ASTCheckQuery.h index a87f68c855b..595b6c2ecb6 100644 --- a/dbms/src/Parsers/ASTCheckQuery.h +++ b/dbms/src/Parsers/ASTCheckQuery.h @@ -8,7 +8,7 @@ namespace DB struct ASTCheckQuery : public ASTQueryWithTableAndOutput { /** Get the text that identifies this element. */ - String getID() const override { return ("CheckQuery_" + database + "_" + table); } + String getID(char delim) const override { return "CheckQuery" + (delim + database) + delim + table; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 870472fcb30..e288dbbcd58 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -17,7 +17,7 @@ public: ASTPtr default_expression; ASTPtr comment; - String getID() const override { return "ColumnDeclaration_" + name; } + String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 925b12600fd..840ba345813 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -21,7 +21,7 @@ public: IAST * sample_by = nullptr; ASTSetQuery * settings = nullptr; - String getID() const override { return "Storage definition"; } + String getID(char) const override { return "Storage definition"; } ASTPtr clone() const override { @@ -99,7 +99,7 @@ public: ASTSelectWithUnionQuery * select = nullptr; /** Get the text that identifies this element. */ - String getID() const override { return (attach ? "AttachQuery_" : "CreateQuery_") + database + "_" + table; } + String getID(char delim) const override { return (attach ? "AttachQuery" : "CreateQuery") + (delim + database) + delim + table; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTDropQuery.cpp b/dbms/src/Parsers/ASTDropQuery.cpp index 6b6b9b0bec2..094a34242cf 100644 --- a/dbms/src/Parsers/ASTDropQuery.cpp +++ b/dbms/src/Parsers/ASTDropQuery.cpp @@ -10,14 +10,14 @@ namespace ErrorCodes } -String ASTDropQuery::getID() const +String ASTDropQuery::getID(char delim) const { if (kind == ASTDropQuery::Kind::Drop) - return "DropQuery_" + database + "_" + table; + return "DropQuery" + (delim + database) + delim + table; else if (kind == ASTDropQuery::Kind::Detach) - return "DetachQuery_" + database + "_" + table; + return "DetachQuery" + (delim + database) + delim + table; else if (kind == ASTDropQuery::Kind::Truncate) - return "TruncateQuery_" + database + "_" + table; + return "TruncateQuery" + (delim + database) + delim + table; else throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); } diff --git a/dbms/src/Parsers/ASTDropQuery.h b/dbms/src/Parsers/ASTDropQuery.h index 83b5d28e38b..1c230e30aea 100644 --- a/dbms/src/Parsers/ASTDropQuery.h +++ b/dbms/src/Parsers/ASTDropQuery.h @@ -23,7 +23,7 @@ public: bool if_exists{false}; /** Get the text that identifies this element. */ - String getID() const override; + String getID(char) const override; ASTPtr clone() const override; ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override diff --git a/dbms/src/Parsers/ASTExplainQuery.h b/dbms/src/Parsers/ASTExplainQuery.h index b02731c79d3..a1eb9feecd4 100644 --- a/dbms/src/Parsers/ASTExplainQuery.h +++ b/dbms/src/Parsers/ASTExplainQuery.h @@ -20,7 +20,7 @@ public: : kind(kind_) {} - String getID() const override { return "Explain_" + toString(kind); } + String getID(char delim) const override { return "Explain" + (delim + toString(kind)); } ASTPtr clone() const override { return std::make_shared(*this); } protected: diff --git a/dbms/src/Parsers/ASTExpressionList.h b/dbms/src/Parsers/ASTExpressionList.h index cfe9cb3b714..4f77adb4009 100644 --- a/dbms/src/Parsers/ASTExpressionList.h +++ b/dbms/src/Parsers/ASTExpressionList.h @@ -11,7 +11,7 @@ namespace DB class ASTExpressionList : public IAST { public: - String getID() const override { return "ExpressionList"; } + String getID(char ) const override { return "ExpressionList"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index d84d77b649b..73880089f53 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -36,9 +36,9 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const } /** Get the text that identifies this element. */ -String ASTFunction::getID() const +String ASTFunction::getID(char delim) const { - return "Function_" + name; + return "Function" + (delim + name); } ASTPtr ASTFunction::clone() const diff --git a/dbms/src/Parsers/ASTFunction.h b/dbms/src/Parsers/ASTFunction.h index 3bed72d4305..effc9a6cea9 100644 --- a/dbms/src/Parsers/ASTFunction.h +++ b/dbms/src/Parsers/ASTFunction.h @@ -19,7 +19,7 @@ public: public: /** Get text identifying the AST node. */ - String getID() const override; + String getID(char delim) const override; ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 0ada2b60852..b8c56727e17 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -24,7 +24,7 @@ public: : name(name_), kind(kind_) { range = StringRange(name.data(), name.data() + name.size()); } /** Get the text that identifies this element. */ - String getID() const override { return "Identifier_" + name; } + String getID(char delim) const override { return "Identifier" + (delim + name); } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTInsertQuery.h b/dbms/src/Parsers/ASTInsertQuery.h index 9da68ca21c8..baf2a9fce8d 100644 --- a/dbms/src/Parsers/ASTInsertQuery.h +++ b/dbms/src/Parsers/ASTInsertQuery.h @@ -27,7 +27,7 @@ public: const char * end = nullptr; /** Get the text that identifies this element. */ - String getID() const override { return "InsertQuery_" + database + "_" + table; } + String getID(char delim) const override { return "InsertQuery" + (delim + database) + delim + table; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTKillQueryQuery.cpp b/dbms/src/Parsers/ASTKillQueryQuery.cpp index 0b9e6bcf4bc..9e7631eacc3 100644 --- a/dbms/src/Parsers/ASTKillQueryQuery.cpp +++ b/dbms/src/Parsers/ASTKillQueryQuery.cpp @@ -3,9 +3,9 @@ namespace DB { -String ASTKillQueryQuery::getID() const +String ASTKillQueryQuery::getID(char delim) const { - return "KillQueryQuery_" + (where_expression ? where_expression->getID() : "") + "_" + String(sync ? "SYNC" : "ASYNC"); + return String("KillQueryQuery") + delim + (where_expression ? where_expression->getID() : "") + delim + String(sync ? "SYNC" : "ASYNC"); } void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const diff --git a/dbms/src/Parsers/ASTKillQueryQuery.h b/dbms/src/Parsers/ASTKillQueryQuery.h index 491bd3aecd2..7099fbccece 100644 --- a/dbms/src/Parsers/ASTKillQueryQuery.h +++ b/dbms/src/Parsers/ASTKillQueryQuery.h @@ -20,7 +20,7 @@ public: return clone; } - String getID() const override; + String getID(char) const override; void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/dbms/src/Parsers/ASTLiteral.h b/dbms/src/Parsers/ASTLiteral.h index d2b86cbb28c..d6b8b4efc3b 100644 --- a/dbms/src/Parsers/ASTLiteral.h +++ b/dbms/src/Parsers/ASTLiteral.h @@ -18,7 +18,7 @@ public: ASTLiteral(const Field & value_) : value(value_) {} /** Get the text that identifies this element. */ - String getID() const override { return "Literal_" + applyVisitor(FieldVisitorDump(), value); } + String getID(char delim) const override { return "Literal" + (delim + applyVisitor(FieldVisitorDump(), value)); } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTNameTypePair.h b/dbms/src/Parsers/ASTNameTypePair.h index 9dad01df2f5..ac72448e2e9 100644 --- a/dbms/src/Parsers/ASTNameTypePair.h +++ b/dbms/src/Parsers/ASTNameTypePair.h @@ -17,7 +17,7 @@ public: ASTPtr type; /** Get the text that identifies this element. */ - String getID() const override { return "NameTypePair_" + name; } + String getID(char delim) const override { return "NameTypePair" + (delim + name); } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTOptimizeQuery.h b/dbms/src/Parsers/ASTOptimizeQuery.h index d228a8c905f..c93fea2b6d3 100644 --- a/dbms/src/Parsers/ASTOptimizeQuery.h +++ b/dbms/src/Parsers/ASTOptimizeQuery.h @@ -21,8 +21,10 @@ public: bool deduplicate; /** Get the text that identifies this element. */ - String getID() const override - { return "OptimizeQuery_" + database + "_" + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); } + String getID(char delim) const override + { + return "OptimizeQuery" + (delim + database) + delim + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); + } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTOrderByElement.h b/dbms/src/Parsers/ASTOrderByElement.h index 7e9459cba9b..729915400ce 100644 --- a/dbms/src/Parsers/ASTOrderByElement.h +++ b/dbms/src/Parsers/ASTOrderByElement.h @@ -27,10 +27,7 @@ public: { } - String getID() const override - { - return "OrderByElement"; - } + String getID(char) const override { return "OrderByElement"; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTPartition.cpp b/dbms/src/Parsers/ASTPartition.cpp index 6f0a256ec3b..d24575b7f43 100644 --- a/dbms/src/Parsers/ASTPartition.cpp +++ b/dbms/src/Parsers/ASTPartition.cpp @@ -4,12 +4,12 @@ namespace DB { -String ASTPartition::getID() const +String ASTPartition::getID(char delim) const { if (value) return "Partition"; else - return "Partition_ID_" + id; + return "Partition_ID" + (delim + id); } ASTPtr ASTPartition::clone() const diff --git a/dbms/src/Parsers/ASTPartition.h b/dbms/src/Parsers/ASTPartition.h index b1ed866284a..d87206d7bb4 100644 --- a/dbms/src/Parsers/ASTPartition.h +++ b/dbms/src/Parsers/ASTPartition.h @@ -17,7 +17,7 @@ public: String id; - String getID() const override; + String getID(char) const override; ASTPtr clone() const override; protected: diff --git a/dbms/src/Parsers/ASTQualifiedAsterisk.h b/dbms/src/Parsers/ASTQualifiedAsterisk.h index 74cd745d033..e084d80cb94 100644 --- a/dbms/src/Parsers/ASTQualifiedAsterisk.h +++ b/dbms/src/Parsers/ASTQualifiedAsterisk.h @@ -12,7 +12,7 @@ namespace DB class ASTQualifiedAsterisk : public IAST { public: - String getID() const override { return "QualifiedAsterisk"; } + String getID(char) const override { return "QualifiedAsterisk"; } ASTPtr clone() const override { auto clone = std::make_shared(*this); diff --git a/dbms/src/Parsers/ASTQueryWithOutput.h b/dbms/src/Parsers/ASTQueryWithOutput.h index 1e927084e86..0660b1bec63 100644 --- a/dbms/src/Parsers/ASTQueryWithOutput.h +++ b/dbms/src/Parsers/ASTQueryWithOutput.h @@ -34,7 +34,7 @@ template class ASTQueryWithOutputImpl : public ASTQueryWithOutput { public: - String getID() const override { return ASTIDAndQueryNames::ID; } + String getID(char) const override { return ASTIDAndQueryNames::ID; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h index dd9b3fce153..3f3fd036d78 100644 --- a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h @@ -30,7 +30,7 @@ template class ASTQueryWithTableAndOutputImpl : public ASTQueryWithTableAndOutput { public: - String getID() const override { return AstIDAndQueryNames::ID + ("_" + database) + "_" + table; } + String getID(char delim) const override { return AstIDAndQueryNames::ID + (delim + database) + delim + table; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTRenameQuery.h b/dbms/src/Parsers/ASTRenameQuery.h index 561a88a54ce..006c8583836 100644 --- a/dbms/src/Parsers/ASTRenameQuery.h +++ b/dbms/src/Parsers/ASTRenameQuery.h @@ -29,7 +29,7 @@ public: Elements elements; /** Get the text that identifies this element. */ - String getID() const override { return "Rename"; } + String getID(char) const override { return "Rename"; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTSampleRatio.h b/dbms/src/Parsers/ASTSampleRatio.h index ca91d0b6cbb..787833eb4f3 100644 --- a/dbms/src/Parsers/ASTSampleRatio.h +++ b/dbms/src/Parsers/ASTSampleRatio.h @@ -28,7 +28,7 @@ public: ASTSampleRatio(Rational & ratio_) : ratio(ratio_) {} - String getID() const override { return "SampleRatio_" + toString(ratio); } + String getID(char delim) const override { return "SampleRatio" + (delim + toString(ratio)); } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index eb08cf6e20c..d9bb3f11be4 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -16,7 +16,7 @@ class ASTSelectQuery : public IAST { public: /** Get the text that identifies this element. */ - String getID() const override { return "SelectQuery"; } + String getID(char) const override { return "SelectQuery"; } ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTSelectWithUnionQuery.h b/dbms/src/Parsers/ASTSelectWithUnionQuery.h index c458825507e..41ec8bb1076 100644 --- a/dbms/src/Parsers/ASTSelectWithUnionQuery.h +++ b/dbms/src/Parsers/ASTSelectWithUnionQuery.h @@ -12,7 +12,7 @@ namespace DB class ASTSelectWithUnionQuery : public ASTQueryWithOutput { public: - String getID() const override { return "SelectWithUnionQuery"; } + String getID(char) const override { return "SelectWithUnionQuery"; } ASTPtr clone() const override; void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/dbms/src/Parsers/ASTSetQuery.h b/dbms/src/Parsers/ASTSetQuery.h index 29656b26434..756c5bdc058 100644 --- a/dbms/src/Parsers/ASTSetQuery.h +++ b/dbms/src/Parsers/ASTSetQuery.h @@ -26,7 +26,7 @@ public: Changes changes; /** Get the text that identifies this element. */ - String getID() const override { return "Set"; } + String getID(char) const override { return "Set"; } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTShowTablesQuery.h b/dbms/src/Parsers/ASTShowTablesQuery.h index 7f67a08c6e7..58915df0e60 100644 --- a/dbms/src/Parsers/ASTShowTablesQuery.h +++ b/dbms/src/Parsers/ASTShowTablesQuery.h @@ -21,7 +21,7 @@ public: bool not_like{false}; /** Get the text that identifies this element. */ - String getID() const override { return "ShowTables"; } + String getID(char) const override { return "ShowTables"; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTSubquery.h b/dbms/src/Parsers/ASTSubquery.h index 513f0673c6b..8ca3291824b 100644 --- a/dbms/src/Parsers/ASTSubquery.h +++ b/dbms/src/Parsers/ASTSubquery.h @@ -13,7 +13,7 @@ class ASTSubquery : public ASTWithAlias { public: /** Get the text that identifies this element. */ - String getID() const override { return "Subquery"; } + String getID(char) const override { return "Subquery"; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTSystemQuery.h b/dbms/src/Parsers/ASTSystemQuery.h index 3a4bffb19b9..bc4de9689c6 100644 --- a/dbms/src/Parsers/ASTSystemQuery.h +++ b/dbms/src/Parsers/ASTSystemQuery.h @@ -51,7 +51,7 @@ public: String target_database; String target_table; - String getID() const override { return "SYSTEM query"; } + String getID(char) const override { return "SYSTEM query"; } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.h b/dbms/src/Parsers/ASTTablesInSelectQuery.h index 289cfd3c447..c94192b44d2 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.h +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.h @@ -53,7 +53,7 @@ struct ASTTableExpression : public IAST ASTPtr sample_offset; using IAST::IAST; - String getID() const override { return "TableExpression"; } + String getID(char) const override { return "TableExpression"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; @@ -98,7 +98,7 @@ struct ASTTableJoin : public IAST ASTPtr on_expression; using IAST::IAST; - String getID() const override { return "TableJoin"; } + String getID(char) const override { return "TableJoin"; } ASTPtr clone() const override; void formatImplBeforeTable(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const; @@ -122,7 +122,7 @@ struct ASTArrayJoin : public IAST ASTPtr expression_list; using IAST::IAST; - String getID() const override { return "ArrayJoin"; } + String getID(char) const override { return "ArrayJoin"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; @@ -139,7 +139,7 @@ struct ASTTablesInSelectQueryElement : public IAST ASTPtr array_join; /// Arrays to JOIN. using IAST::IAST; - String getID() const override { return "TablesInSelectQueryElement"; } + String getID(char) const override { return "TablesInSelectQueryElement"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; @@ -149,7 +149,7 @@ struct ASTTablesInSelectQueryElement : public IAST struct ASTTablesInSelectQuery : public IAST { using IAST::IAST; - String getID() const override { return "TablesInSelectQuery"; } + String getID(char) const override { return "TablesInSelectQuery"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/dbms/src/Parsers/ASTUseQuery.h b/dbms/src/Parsers/ASTUseQuery.h index 89edc6c8a7e..f1ef1b3b408 100644 --- a/dbms/src/Parsers/ASTUseQuery.h +++ b/dbms/src/Parsers/ASTUseQuery.h @@ -15,7 +15,7 @@ public: String database; /** Get the text that identifies this element. */ - String getID() const override { return "UseQuery_" + database; } + String getID(char delim) const override { return "UseQuery" + (delim + database); } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/DumpASTNode.h b/dbms/src/Parsers/DumpASTNode.h index 64c86514440..edd6b3a634a 100644 --- a/dbms/src/Parsers/DumpASTNode.h +++ b/dbms/src/Parsers/DumpASTNode.h @@ -64,8 +64,7 @@ private: String nodeId() const { - String id = ast.getID(); - std::replace(id.begin(), id.end(), '_', ' '); + String id = ast.getID(' '); return id; } diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 8b1181e39c9..e30ac56738f 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -67,7 +67,7 @@ public: } /** Get the text that identifies this element. */ - virtual String getID() const = 0; + virtual String getID(char delimiter = '_') const = 0; ASTPtr ptr() { return shared_from_this(); } diff --git a/dbms/src/Parsers/TablePropertiesQueriesASTs.h b/dbms/src/Parsers/TablePropertiesQueriesASTs.h index 6a6cf4506d1..e68a3b46e4a 100644 --- a/dbms/src/Parsers/TablePropertiesQueriesASTs.h +++ b/dbms/src/Parsers/TablePropertiesQueriesASTs.h @@ -48,7 +48,7 @@ class ASTDescribeQuery : public ASTQueryWithOutput public: ASTPtr table_expression; - String getID() const override { return "DescribeQuery"; } + String getID(char) const override { return "DescribeQuery"; } ASTPtr clone() const override { From e26c3327cf852a3153c849f0873c1955699c1c6a Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 17:24:47 +0300 Subject: [PATCH 59/90] fix lost AST chenges in InDepthNodeVisitor --- dbms/src/Interpreters/InDepthNodeVisitor.h | 8 ++++--- dbms/src/Interpreters/QueryAliasesVisitor.cpp | 10 ++++----- dbms/src/Interpreters/QueryAliasesVisitor.h | 6 ++--- .../TranslateQualifiedNamesVisitor.cpp | 22 +++++++++---------- .../TranslateQualifiedNamesVisitor.h | 10 ++++----- dbms/src/Parsers/DumpASTNode.h | 8 ++----- 6 files changed, 31 insertions(+), 33 deletions(-) diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index 4292da7fbdb..f4186a9dd4a 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -28,10 +28,12 @@ public: if constexpr (!_topToBottom) visitChildren(ast); - auto additional_nodes = Matcher::visit(ast, data); + /// It operates with ASTPtr * cause we may want to rewrite ASTPtr in visit(). + std::vector additional_nodes = Matcher::visit(ast, data); + /// visit additional nodes (ex. only part of children) - for (ASTPtr & node : additional_nodes) - visit(node); + for (ASTPtr * node : additional_nodes) + visit(*node); if constexpr (_topToBottom) visitChildren(ast); diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.cpp b/dbms/src/Interpreters/QueryAliasesVisitor.cpp index cd7baba0061..3c10f7da1b5 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.cpp +++ b/dbms/src/Interpreters/QueryAliasesVisitor.cpp @@ -39,7 +39,7 @@ bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) return true; } -std::vector QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) +std::vector QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) { if (auto * t = typeid_cast(ast.get())) return visit(*t, ast, data); @@ -52,24 +52,24 @@ std::vector QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) /// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them /// (skip the expression list itself and its children). -std::vector QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data) +std::vector QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data) { visitOther(ast, data); /// @warning It breaks botom-to-top order (childs processed after node here), could lead to some effects. /// It's possible to add ast back to result vec to save order. It will need two phase ASTArrayJoin visit (setting phase in data). - std::vector out; + std::vector out; for (auto & child1 : ast->children) for (auto & child2 : child1->children) for (auto & child3 : child2->children) - out.push_back(child3); + out.push_back(&child3); return out; } /// set unique aliases for all subqueries. this is needed, because: /// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect /// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names -std::vector QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data) +std::vector QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data) { Aliases & aliases = data.aliases; diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index 2cd4f8f0c6b..de6080f2609 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -24,12 +24,12 @@ public: static constexpr const char * label = "QueryAliases"; - static std::vector visit(ASTPtr & ast, Data & data); + static std::vector visit(ASTPtr & ast, Data & data); static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - static std::vector visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data); - static std::vector visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data); + static std::vector visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data); + static std::vector visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data); static void visitOther(const ASTPtr & ast, Data & data); }; diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index c169691121f..6ceb0cfe524 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -37,7 +37,7 @@ bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr return true; } -std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & data) +std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & data) { if (auto * t = typeid_cast(ast.get())) return visit(*t, ast, data); @@ -50,7 +50,7 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & d return {}; } -std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier & identifier, ASTPtr & ast, Data & data) +std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier & identifier, ASTPtr & ast, Data & data) { const NameSet & source_columns = data.source_columns; const std::vector & tables = data.tables; @@ -87,7 +87,7 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier & return {}; } -std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data) +std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data) { const std::vector & tables = data.tables; @@ -125,26 +125,26 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAste throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER); } -std::vector TranslateQualifiedNamesMatcher::visit(const ASTTableJoin & join, const ASTPtr & , Data &) +std::vector TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data &) { /// Don't translate on_expression here in order to resolve equation parts later. - std::vector out; + std::vector out; if (join.using_expression_list) - out.push_back(join.using_expression_list); + out.push_back(&join.using_expression_list); return out; } -std::vector TranslateQualifiedNamesMatcher::visit(const ASTSelectQuery & select, const ASTPtr & , Data &) +std::vector TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr & , Data &) { /// If the WHERE clause or HAVING consists of a single quailified column, the reference must be translated not only in children, /// but also in where_expression and having_expression. - std::vector out; + std::vector out; if (select.prewhere_expression) - out.push_back(select.prewhere_expression); + out.push_back(&select.prewhere_expression); if (select.where_expression) - out.push_back(select.where_expression); + out.push_back(&select.where_expression); if (select.having_expression) - out.push_back(select.having_expression); + out.push_back(&select.having_expression); return out; } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 3ce69dd0afa..59933eb3b2c 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -25,14 +25,14 @@ public: static constexpr const char * label = "TranslateQualifiedNames"; - static std::vector visit(ASTPtr & ast, Data & data); + static std::vector visit(ASTPtr & ast, Data & data); static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - static std::vector visit(const ASTIdentifier & node, ASTPtr & ast, Data &); - static std::vector visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &); - static std::vector visit(const ASTTableJoin & node, const ASTPtr & ast, Data &); - static std::vector visit(const ASTSelectQuery & node, const ASTPtr & ast, Data &); + static std::vector visit(const ASTIdentifier & node, ASTPtr & ast, Data &); + static std::vector visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &); + static std::vector visit(ASTTableJoin & node, const ASTPtr & ast, Data &); + static std::vector visit(ASTSelectQuery & node, const ASTPtr & ast, Data &); }; /// Visits AST for names qualification. diff --git a/dbms/src/Parsers/DumpASTNode.h b/dbms/src/Parsers/DumpASTNode.h index edd6b3a634a..8eb1342ffe1 100644 --- a/dbms/src/Parsers/DumpASTNode.h +++ b/dbms/src/Parsers/DumpASTNode.h @@ -62,11 +62,7 @@ private: size_t & visit_depth; /// shared with children const char * label; - String nodeId() const - { - String id = ast.getID(' '); - return id; - } + String nodeId() const { return ast.getID(' '); } void printNode() const { @@ -77,7 +73,7 @@ private: print("alias", aslias, " "); if (!ast.children.empty()) - print("/", ast.children.size(), " "); /// slash is just a short name for 'children' here + print("children", ast.children.size(), " "); } }; From abffefc8309b05540c3c035e9119e683ea803376 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 18:14:50 +0300 Subject: [PATCH 60/90] ExternalTablesVisitor via InDepthNodeVisitor --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 +- dbms/src/Interpreters/ExternalTablesVisitor.h | 52 ++++++++----------- dbms/src/Interpreters/InDepthNodeVisitor.h | 3 +- 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 4826f38b9dc..8105f3c28f4 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -242,8 +242,8 @@ void ExpressionAnalyzer::analyzeAggregation() void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables() { /// Adds existing external tables (not subqueries) to the external_tables dictionary. - ExternalTablesVisitor tables_visitor(context, external_tables); - tables_visitor.visit(query); + ExternalTablesMatcher::Data tables_data{context, external_tables}; + ExternalTablesVisitor(tables_data).visit(query); if (do_global) { diff --git a/dbms/src/Interpreters/ExternalTablesVisitor.h b/dbms/src/Interpreters/ExternalTablesVisitor.h index 95b109987e0..ffc51bf7890 100644 --- a/dbms/src/Interpreters/ExternalTablesVisitor.h +++ b/dbms/src/Interpreters/ExternalTablesVisitor.h @@ -4,49 +4,43 @@ #include #include #include +#include namespace DB { -/// Finds in the query the usage of external tables (as table identifiers). Fills in external_tables. -class ExternalTablesVisitor +/// If node is ASTIdentifier try to extract external_storage. +class ExternalTablesMatcher { public: - ExternalTablesVisitor(const Context & context_, Tables & tables) - : context(context_), - external_tables(tables) - {} - - void visit(ASTPtr & ast) const + struct Data { - /// Traverse from the bottom. Intentionally go into subqueries. - for (auto & child : ast->children) - visit(child); + const Context & context; + Tables & external_tables; + }; - tryVisit(ast); + static constexpr const char * label = "ExternalTables"; + + static std::vector visit(ASTPtr & ast, Data & data) + { + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + return {}; } + static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } + private: - const Context & context; - Tables & external_tables; - - void visit(const ASTIdentifier & node, ASTPtr &) const + static std::vector visit(const ASTIdentifier & node, ASTPtr &, Data & data) { if (node.special()) - if (StoragePtr external_storage = context.tryGetExternalTable(node.name)) - external_tables[node.name] = external_storage; - } - - template - bool tryVisit(ASTPtr & ast) const - { - if (const T * t = typeid_cast(ast.get())) - { - visit(*t, ast); - return true; - } - return false; + if (StoragePtr external_storage = data.context.tryGetExternalTable(node.name)) + data.external_tables[node.name] = external_storage; + return {}; } }; +/// Finds in the query the usage of external tables. Fills in external_tables. +using ExternalTablesVisitor = InDepthNodeVisitor; + } diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index f4186a9dd4a..ff102d136c5 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -1,7 +1,6 @@ #pragma once -#include -#include +#include #include namespace DB From 31c629bf5588880865a02cfb26c88825e5e3f043 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 18:36:54 +0300 Subject: [PATCH 61/90] GlobalSubqueriesMatcher via InDepthNodeVisitor --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 +- .../Interpreters/GlobalSubqueriesVisitor.h | 280 +++++++++--------- 2 files changed, 140 insertions(+), 144 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 8105f3c28f4..29548e6dd52 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -247,9 +247,9 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables() if (do_global) { - GlobalSubqueriesVisitor subqueries_visitor(context, subquery_depth, isRemoteStorage(), + GlobalSubqueriesMatcher::Data subqueries_data(context, subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries); - subqueries_visitor.visit(query); + GlobalSubqueriesVisitor(subqueries_data).visit(query); } } diff --git a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h index ec616b817b9..92c6441a114 100644 --- a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h @@ -15,175 +15,171 @@ #include #include #include +#include namespace DB { -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. -/// Converts GLOBAL subqueries to external tables; Puts them into the external_tables dictionary: name -> StoragePtr. -class GlobalSubqueriesVisitor +class GlobalSubqueriesMatcher { public: - GlobalSubqueriesVisitor(const Context & context_, size_t subquery_depth_, bool is_remote_, - Tables & tables, SubqueriesForSets & subqueries_for_sets_, bool & has_global_subqueries_) - : context(context_), - subquery_depth(subquery_depth_), - is_remote(is_remote_), - external_table_id(1), - external_tables(tables), - subqueries_for_sets(subqueries_for_sets_), - has_global_subqueries(has_global_subqueries_) - {} - - void visit(ASTPtr & ast) const + struct Data { - /// Recursive calls. We do not go into subqueries. - for (auto & child : ast->children) - if (!typeid_cast(child.get())) - visit(child); + const Context & context; + size_t subquery_depth; + bool is_remote; + size_t external_table_id; + Tables & external_tables; + SubqueriesForSets & subqueries_for_sets; + bool & has_global_subqueries; - /// Bottom-up actions. - if (tryVisit(ast) || - tryVisit(ast)) + Data(const Context & context_, size_t subquery_depth_, bool is_remote_, + Tables & tables, SubqueriesForSets & subqueries_for_sets_, bool & has_global_subqueries_) + : context(context_), + subquery_depth(subquery_depth_), + is_remote(is_remote_), + external_table_id(1), + external_tables(tables), + subqueries_for_sets(subqueries_for_sets_), + has_global_subqueries(has_global_subqueries_) {} + + void addExternalStorage(ASTPtr & subquery_or_table_name_or_table_expression) + { + /// With nondistributed queries, creating temporary tables does not make sense. + if (!is_remote) + return; + + ASTPtr subquery; + ASTPtr table_name; + ASTPtr subquery_or_table_name; + + if (typeid_cast(subquery_or_table_name_or_table_expression.get())) + { + table_name = subquery_or_table_name_or_table_expression; + subquery_or_table_name = table_name; + } + else if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) + { + if (ast_table_expr->database_and_table_name) + { + table_name = ast_table_expr->database_and_table_name; + subquery_or_table_name = table_name; + } + else if (ast_table_expr->subquery) + { + subquery = ast_table_expr->subquery; + subquery_or_table_name = subquery; + } + } + else if (typeid_cast(subquery_or_table_name_or_table_expression.get())) + { + subquery = subquery_or_table_name_or_table_expression; + subquery_or_table_name = subquery; + } + + if (!subquery_or_table_name) + throw Exception("Logical error: unknown AST element passed to ExpressionAnalyzer::addExternalStorage method", + ErrorCodes::LOGICAL_ERROR); + + if (table_name) + { + /// If this is already an external table, you do not need to add anything. Just remember its presence. + if (external_tables.end() != external_tables.find(static_cast(*table_name).name)) + return; + } + + /// Generate the name for the external table. + String external_table_name = "_data" + toString(external_table_id); + while (external_tables.count(external_table_name)) + { + ++external_table_id; + external_table_name = "_data" + toString(external_table_id); + } + + auto interpreter = interpretSubquery(subquery_or_table_name, context, subquery_depth, {}); + + Block sample = interpreter->getSampleBlock(); + NamesAndTypesList columns = sample.getNamesAndTypesList(); + + StoragePtr external_storage = StorageMemory::create(external_table_name, ColumnsDescription{columns}); + external_storage->startup(); + + /** We replace the subquery with the name of the temporary table. + * It is in this form, the request will go to the remote server. + * This temporary table will go to the remote server, and on its side, + * instead of doing a subquery, you just need to read it. + */ + + auto database_and_table_name = createDatabaseAndTableNode("", external_table_name); + + if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) + { + ast_table_expr->subquery.reset(); + ast_table_expr->database_and_table_name = database_and_table_name; + + ast_table_expr->children.clear(); + ast_table_expr->children.emplace_back(database_and_table_name); + } + else + subquery_or_table_name_or_table_expression = database_and_table_name; + + external_tables[external_table_name] = external_storage; + subqueries_for_sets[external_table_name].source = interpreter->execute().in; + subqueries_for_sets[external_table_name].table = external_storage; + + /** NOTE If it was written IN tmp_table - the existing temporary (but not external) table, + * then a new temporary table will be created (for example, _data1), + * and the data will then be copied to it. + * Maybe this can be avoided. + */ + } + }; + + static constexpr const char * label = "GlobalSubqueries"; + + static std::vector visit(ASTPtr & ast, Data & data) + { + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + return {}; + } + + static bool needChildVisit(ASTPtr &, const ASTPtr & child) + { + /// We do not go into subqueries. + if (typeid_cast(child.get())) + return false; + return true; } private: - const Context & context; - size_t subquery_depth; - bool is_remote; - mutable size_t external_table_id = 1; - Tables & external_tables; - SubqueriesForSets & subqueries_for_sets; - bool & has_global_subqueries; - /// GLOBAL IN - void visit(ASTFunction & func, ASTPtr &) const + static void visit(ASTFunction & func, ASTPtr &, Data & data) { if (func.name == "globalIn" || func.name == "globalNotIn") { - addExternalStorage(func.arguments->children.at(1)); - has_global_subqueries = true; + data.addExternalStorage(func.arguments->children[1]); + data.has_global_subqueries = true; } } /// GLOBAL JOIN - void visit(ASTTablesInSelectQueryElement & table_elem, ASTPtr &) const + static void visit(ASTTablesInSelectQueryElement & table_elem, ASTPtr &, Data & data) { if (table_elem.table_join && static_cast(*table_elem.table_join).locality == ASTTableJoin::Locality::Global) { - addExternalStorage(table_elem.table_expression); - has_global_subqueries = true; + data.addExternalStorage(table_elem.table_expression); + data.has_global_subqueries = true; } } - - template - bool tryVisit(ASTPtr & ast) const - { - if (T * t = typeid_cast(ast.get())) - { - visit(*t, ast); - return true; - } - return false; - } - - /** Initialize InterpreterSelectQuery for a subquery in the GLOBAL IN/JOIN section, - * create a temporary table of type Memory and store it in the external_tables dictionary. - */ - void addExternalStorage(ASTPtr & subquery_or_table_name_or_table_expression) const - { - /// With nondistributed queries, creating temporary tables does not make sense. - if (!is_remote) - return; - - ASTPtr subquery; - ASTPtr table_name; - ASTPtr subquery_or_table_name; - - if (typeid_cast(subquery_or_table_name_or_table_expression.get())) - { - table_name = subquery_or_table_name_or_table_expression; - subquery_or_table_name = table_name; - } - else if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) - { - if (ast_table_expr->database_and_table_name) - { - table_name = ast_table_expr->database_and_table_name; - subquery_or_table_name = table_name; - } - else if (ast_table_expr->subquery) - { - subquery = ast_table_expr->subquery; - subquery_or_table_name = subquery; - } - } - else if (typeid_cast(subquery_or_table_name_or_table_expression.get())) - { - subquery = subquery_or_table_name_or_table_expression; - subquery_or_table_name = subquery; - } - - if (!subquery_or_table_name) - throw Exception("Logical error: unknown AST element passed to ExpressionAnalyzer::addExternalStorage method", - ErrorCodes::LOGICAL_ERROR); - - if (table_name) - { - /// If this is already an external table, you do not need to add anything. Just remember its presence. - if (external_tables.end() != external_tables.find(static_cast(*table_name).name)) - return; - } - - /// Generate the name for the external table. - String external_table_name = "_data" + toString(external_table_id); - while (external_tables.count(external_table_name)) - { - ++external_table_id; - external_table_name = "_data" + toString(external_table_id); - } - - auto interpreter = interpretSubquery(subquery_or_table_name, context, subquery_depth, {}); - - Block sample = interpreter->getSampleBlock(); - NamesAndTypesList columns = sample.getNamesAndTypesList(); - - StoragePtr external_storage = StorageMemory::create(external_table_name, ColumnsDescription{columns}); - external_storage->startup(); - - /** We replace the subquery with the name of the temporary table. - * It is in this form, the request will go to the remote server. - * This temporary table will go to the remote server, and on its side, - * instead of doing a subquery, you just need to read it. - */ - - auto database_and_table_name = createDatabaseAndTableNode("", external_table_name); - - if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) - { - ast_table_expr->subquery.reset(); - ast_table_expr->database_and_table_name = database_and_table_name; - - ast_table_expr->children.clear(); - ast_table_expr->children.emplace_back(database_and_table_name); - } - else - subquery_or_table_name_or_table_expression = database_and_table_name; - - external_tables[external_table_name] = external_storage; - subqueries_for_sets[external_table_name].source = interpreter->execute().in; - subqueries_for_sets[external_table_name].table = external_storage; - - /** NOTE If it was written IN tmp_table - the existing temporary (but not external) table, - * then a new temporary table will be created (for example, _data1), - * and the data will then be copied to it. - * Maybe this can be avoided. - */ - } }; +/// Converts GLOBAL subqueries to external tables; Puts them into the external_tables dictionary: name -> StoragePtr. +using GlobalSubqueriesVisitor = InDepthNodeVisitor; + } From 2afe664d674f20d7a5a27638d255f1a05e629cd4 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 19:28:20 +0300 Subject: [PATCH 62/90] RequiredSourceColumnsVisitor via InDepthNodeVisitor --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 16 ++- .../RequiredSourceColumnsVisitor.h | 130 ++++++++---------- 2 files changed, 69 insertions(+), 77 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 29548e6dd52..e6f29670959 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1027,8 +1027,8 @@ void ExpressionAnalyzer::collectUsedColumns() { /// Nothing needs to be ignored for expressions in ARRAY JOIN. NameSet empty; - RequiredSourceColumnsVisitor visitor(available_columns, required, empty, empty, empty); - visitor.visit(expressions[i]); + RequiredSourceColumnsMatcher::Data visitor_data{available_columns, required, empty, empty, empty}; + RequiredSourceColumnsVisitor(visitor_data).visit(expressions[i]); } ignored.insert(expressions[i]->getAliasOrColumnName()); @@ -1044,15 +1044,17 @@ void ExpressionAnalyzer::collectUsedColumns() NameSet required_joined_columns; - for (const auto & left_key_ast : analyzedJoin().key_asts_left) + for (const auto & left_key_ast : syntax->analyzed_join.key_asts_left) { NameSet empty; - RequiredSourceColumnsVisitor columns_visitor(available_columns, required, ignored, empty, required_joined_columns); - columns_visitor.visit(left_key_ast); + RequiredSourceColumnsMatcher::Data columns_data{available_columns, required, ignored, empty, required_joined_columns}; + ASTPtr tmp = left_key_ast; + RequiredSourceColumnsVisitor(columns_data).visit(tmp); } - RequiredSourceColumnsVisitor columns_visitor(available_columns, required, ignored, available_joined_columns, required_joined_columns); - columns_visitor.visit(query); + RequiredSourceColumnsMatcher::Data columns_visitor_data{available_columns, required, ignored, + available_joined_columns, required_joined_columns}; + RequiredSourceColumnsVisitor(columns_visitor_data).visit(query); columns_added_by_join = analyzedJoin().available_joined_columns; for (auto it = columns_added_by_join.begin(); it != columns_added_by_join.end();) diff --git a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h index ada053a3657..726023be8eb 100644 --- a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h +++ b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h @@ -18,25 +18,45 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. -/** Get a set of necessary columns to read from the table. - * In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified. - * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. - * Put in required_joined_columns the set of columns available from JOIN and needed. - */ -class RequiredSourceColumnsVisitor +class RequiredSourceColumnsMatcher { public: - RequiredSourceColumnsVisitor(const NameSet & available_columns_, NameSet & required_source_columns_, NameSet & ignored_names_, - const NameSet & available_joined_columns_, NameSet & required_joined_columns_) - : available_columns(available_columns_), - required_source_columns(required_source_columns_), - ignored_names(ignored_names_), - available_joined_columns(available_joined_columns_), - required_joined_columns(required_joined_columns_) - {} + struct Data + { + const NameSet & available_columns; + NameSet & required_source_columns; + NameSet & ignored_names; + const NameSet & available_joined_columns; + NameSet & required_joined_columns; + }; + + static constexpr const char * label = "RequiredSourceColumns"; + + static bool needChildVisit(ASTPtr & node, const ASTPtr & child) + { + /// We will not go to the ARRAY JOIN section, because we need to look at the names of non-ARRAY-JOIN columns. + /// There, `collectUsedColumns` will send us separately. + if (typeid_cast(child.get()) || + typeid_cast(child.get()) || + typeid_cast(child.get()) || + typeid_cast(child.get())) + return false; + + /// Processed. Do not need children. + if (typeid_cast(node.get())) + return false; + + if (auto * f = typeid_cast(node.get())) + { + /// A special function `indexHint`. Everything that is inside it is not calculated + /// (and is used only for index analysis, see KeyCondition). + if (f->name == "indexHint") + return false; + } + + return true; + } /** Find all the identifiers in the query. * We will use depth first search in AST. @@ -46,36 +66,34 @@ public: * - there is some exception for the ARRAY JOIN clause (it has a slightly different identifiers); * - we put identifiers available from JOIN in required_joined_columns. */ - void visit(const ASTPtr & ast) const + static std::vector visit(ASTPtr & ast, Data & data) { - if (!tryVisit(ast) && - !tryVisit(ast)) - visitChildren(ast); + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + return {}; } private: - const NameSet & available_columns; - NameSet & required_source_columns; - NameSet & ignored_names; - const NameSet & available_joined_columns; - NameSet & required_joined_columns; - - void visit(const ASTIdentifier & node, const ASTPtr &) const + static void visit(const ASTIdentifier & node, const ASTPtr &, Data & data) { if (node.general() - && !ignored_names.count(node.name) - && !ignored_names.count(Nested::extractTableName(node.name))) + && !data.ignored_names.count(node.name) + && !data.ignored_names.count(Nested::extractTableName(node.name))) { - if (!available_joined_columns.count(node.name) - || available_columns.count(node.name)) /// Read column from left table if has. - required_source_columns.insert(node.name); + /// Read column from left table if has. + if (!data.available_joined_columns.count(node.name) || data.available_columns.count(node.name)) + data.required_source_columns.insert(node.name); else - required_joined_columns.insert(node.name); + data.required_joined_columns.insert(node.name); } } - void visit(const ASTFunction & node, const ASTPtr & ast) const + static void visit(const ASTFunction & node, const ASTPtr &, Data & data) { + NameSet & ignored_names = data.ignored_names; + if (node.name == "lambda") { if (node.arguments->children.size() != 2) @@ -102,47 +120,19 @@ private: } } - visit(node.arguments->children.at(1)); + visit(node.arguments->children[1], data); for (size_t i = 0; i < added_ignored.size(); ++i) ignored_names.erase(added_ignored[i]); - - return; } - - /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see KeyCondition). - if (node.name == "indexHint") - return; - - visitChildren(ast); - } - - void visitChildren(const ASTPtr & ast) const - { - for (auto & child : ast->children) - { - /** We will not go to the ARRAY JOIN section, because we need to look at the names of non-ARRAY-JOIN columns. - * There, `collectUsedColumns` will send us separately. - */ - if (!typeid_cast(child.get()) - && !typeid_cast(child.get()) - && !typeid_cast(child.get()) - && !typeid_cast(child.get())) - visit(child); - } - } - - template - bool tryVisit(const ASTPtr & ast) const - { - if (const T * t = typeid_cast(ast.get())) - { - visit(*t, ast); - return true; - } - return false; } }; +/** Get a set of necessary columns to read from the table. + * In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified. + * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. + * Put in required_joined_columns the set of columns available from JOIN and needed. + */ +using RequiredSourceColumnsVisitor = InDepthNodeVisitor; + } From 0c49c1959246b2a284b0cc5a012d0515c93d36e9 Mon Sep 17 00:00:00 2001 From: chertus Date: Sat, 8 Dec 2018 01:02:33 +0300 Subject: [PATCH 63/90] fix style --- dbms/src/Parsers/ASTExpressionList.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ASTExpressionList.h b/dbms/src/Parsers/ASTExpressionList.h index 4f77adb4009..ff2fbaff95f 100644 --- a/dbms/src/Parsers/ASTExpressionList.h +++ b/dbms/src/Parsers/ASTExpressionList.h @@ -11,7 +11,7 @@ namespace DB class ASTExpressionList : public IAST { public: - String getID(char ) const override { return "ExpressionList"; } + String getID(char) const override { return "ExpressionList"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; From 6bc30f225e7acd7992bf37902d0874e2288a08f2 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 9 Dec 2018 23:11:46 +0800 Subject: [PATCH 64/90] Guard `force_restore_data_flag_file` removal resemble `force_drop_table` file --- dbms/src/Interpreters/loadMetadata.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp index e535bafc51d..748ba0a7548 100644 --- a/dbms/src/Interpreters/loadMetadata.cpp +++ b/dbms/src/Interpreters/loadMetadata.cpp @@ -118,7 +118,14 @@ void loadMetadata(Context & context) thread_pool.wait(); if (has_force_restore_data_flag) - force_restore_data_flag_file.remove(); + try + { + force_restore_data_flag_file.remove(); + } + catch (...) + { + tryLogCurrentException("Load metadata", "Can't remove force restore file to enable data santity checks"); + } } From 44d254db5f20c75ae39603d9172734afbfb0530b Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Mon, 10 Dec 2018 17:19:32 +0800 Subject: [PATCH 65/90] zh/development (#3798) * Add style && build zh translate * add style translate * fix --- docs/en/development/build.md | 3 +- docs/zh/development/build.md | 100 +++- docs/zh/development/build_osx.md | 84 +++- docs/zh/development/index.md | 5 +- docs/zh/development/style.md | 839 ++++++++++++++++++++++++++++++- 5 files changed, 1025 insertions(+), 6 deletions(-) mode change 120000 => 100644 docs/zh/development/build.md mode change 120000 => 100644 docs/zh/development/build_osx.md mode change 120000 => 100644 docs/zh/development/index.md mode change 120000 => 100644 docs/zh/development/style.md diff --git a/docs/en/development/build.md b/docs/en/development/build.md index bb434ae5b54..82d8070780b 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -22,9 +22,8 @@ cd ClickHouse # How to Build ClickHouse for Development -Build should work on Ubuntu Linux. +The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution. -The build process is not intended to work on Mac OS X. Only x86_64 with SSE 4.2 is supported. Support for AArch64 is experimental. To test for SSE 4.2, do diff --git a/docs/zh/development/build.md b/docs/zh/development/build.md deleted file mode 120000 index 480dbc2e9f5..00000000000 --- a/docs/zh/development/build.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/build.md \ No newline at end of file diff --git a/docs/zh/development/build.md b/docs/zh/development/build.md new file mode 100644 index 00000000000..6b8db88306f --- /dev/null +++ b/docs/zh/development/build.md @@ -0,0 +1,99 @@ +# 如何构建 ClickHouse 发布包 + +## 安装 Git 和 Pbuilder + +```bash +sudo apt-get update +sudo apt-get install git pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring +``` + +## 拉取 ClickHouse 源码 + +```bash +git clone --recursive --branch stable https://github.com/yandex/ClickHouse.git +cd ClickHouse +``` + +## 运行发布脚本 + +```bash +./release +``` + +# 如何在开发过程中编译 ClickHouse + +以下教程是在 Ubuntu Linux 中进行编译的示例。 +通过适当的更改,它应该可以适用于任何其他的 Linux 发行版。 +仅支持具有 SSE 4.2的 x86_64。 对 AArch64 的支持是实验性的。 + +测试是否支持 SSE 4.2,执行: + +```bash +grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" +``` + +## 安装 Git 和 CMake + +```bash +sudo apt-get install git cmake ninja-build +``` + +Or cmake3 instead of cmake on older systems. +或者在早期版本的系统中用 cmake3 替代 cmake + +## 安装 GCC 7 + +There are several ways to do this. + +### 安装 PPA 包 + +```bash +sudo apt-get install software-properties-common +sudo apt-add-repository ppa:ubuntu-toolchain-r/test +sudo apt-get update +sudo apt-get install gcc-7 g++-7 +``` + +### 源码安装 gcc + +请查看 [ci/build-gcc-from-sources.sh](https://github.com/yandex/ClickHouse/blob/master/ci/build-gcc-from-sources.sh) + +## 使用 GCC 7 来编译 + +```bash +export CC=gcc-7 +export CXX=g++-7 +``` + +## 安装所需的工具依赖库 + +```bash +sudo apt-get install libicu-dev libreadline-dev +``` + +## 拉取 ClickHouse 源码 + +```bash +git clone --recursive git@github.com:yandex/ClickHouse.git +# or: git clone --recursive https://github.com/yandex/ClickHouse.git + +cd ClickHouse +``` + +For the latest stable version, switch to the `stable` branch. + +## 编译 ClickHouse + +```bash +mkdir build +cd build +cmake .. +ninja +cd .. +``` + +若要创建一个执行文件, 执行 `ninja clickhouse`。 +这个命令会使得 `dbms/programs/clickhouse` 文件可执行,您可以使用 `client` or `server` 参数运行。 + + +[来源文章](https://clickhouse.yandex/docs/en/development/build/) diff --git a/docs/zh/development/build_osx.md b/docs/zh/development/build_osx.md deleted file mode 120000 index f9adaf24584..00000000000 --- a/docs/zh/development/build_osx.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/build_osx.md \ No newline at end of file diff --git a/docs/zh/development/build_osx.md b/docs/zh/development/build_osx.md new file mode 100644 index 00000000000..449586b17cc --- /dev/null +++ b/docs/zh/development/build_osx.md @@ -0,0 +1,83 @@ +# 在 Mac OS X 中编译 ClickHouse + +ClickHouse 支持在 Mac OS X 10.12 版本中编译。若您在用更早的操作系统版本,可以尝试在指令中使用 `Gentoo Prefix` 和 `clang sl`. +通过适当的更改,它应该可以适用于任何其他的 Linux 发行版。 + +## 安装 Homebrew + +```bash +/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +``` + +## 安装编译器,工具库 + +```bash +brew install cmake ninja gcc icu4c mariadb-connector-c openssl libtool gettext readline +``` + +## 拉取 ClickHouse 源码 + +```bash +git clone --recursive git@github.com:yandex/ClickHouse.git +# or: git clone --recursive https://github.com/yandex/ClickHouse.git + +cd ClickHouse +``` + +For the latest stable version, switch to the `stable` branch. + +## 编译 ClickHouse + +```bash +mkdir build +cd build +cmake .. -DCMAKE_CXX_COMPILER=`which g++-8` -DCMAKE_C_COMPILER=`which gcc-8` +ninja +cd .. +``` + +## 注意事项 + +若你想运行 clickhouse-server,请先确保增加系统的最大文件数配置。 + +!!! 注意 + 可能需要用 sudo + +为此,请创建以下文件: + +/Library/LaunchDaemons/limit.maxfiles.plist: +``` xml + + + + + Label + limit.maxfiles + ProgramArguments + + launchctl + limit + maxfiles + 524288 + 524288 + + RunAtLoad + + ServiceIPC + + + +``` + +执行以下命令: +``` bash +$ sudo chown root:wheel /Library/LaunchDaemons/limit.maxfiles.plist +``` + +然后重启。 + +可以通过 `ulimit -n` 命令来检查是否生效。 + + +[来源文章](https://clickhouse.yandex/docs/en/development/build_osx/) diff --git a/docs/zh/development/index.md b/docs/zh/development/index.md deleted file mode 120000 index 1e2ad97dcc5..00000000000 --- a/docs/zh/development/index.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/index.md \ No newline at end of file diff --git a/docs/zh/development/index.md b/docs/zh/development/index.md new file mode 100644 index 00000000000..7053cfcb7b4 --- /dev/null +++ b/docs/zh/development/index.md @@ -0,0 +1,4 @@ +# ClickHouse 开发 + + +[来源文章](https://clickhouse.yandex/docs/en/development/) diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md deleted file mode 120000 index c1bbf11f421..00000000000 --- a/docs/zh/development/style.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/style.md \ No newline at end of file diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md new file mode 100644 index 00000000000..d31cd450b6e --- /dev/null +++ b/docs/zh/development/style.md @@ -0,0 +1,838 @@ +# 如何编写 C++ 代码 + +## 一般建议 + +**1.** 以下是建议,而不是要求。 + +**2.** 如果你在修改代码,遵守已有的风格是有意义的。 + +**3.** 代码的风格需保持一致。一致的风格有利于阅读代码,并且方便检索代码。 + +**4.** 许多规则没有逻辑原因; 它们是由既定的做法决定的。 + +## 格式化 + +**1.** 大多数格式化可以用 `clang-format` 自动完成。 + +**2.** 缩进是4个空格。 配置开发环境,使得 TAB 代表添加四个空格。 + +**3.** 左右花括号需在单独的行。 + +```cpp +inline void readBoolText(bool & x, ReadBuffer & buf) +{ + char tmp = '0'; + readChar(tmp, buf); + x = tmp != '0'; +} +``` + +**4.** 若整个方法体仅有一行 `描述`, 则可以放到单独的行上。 在花括号周围放置空格(除了行尾的空格)。 + +```cpp +inline size_t mask() const { return buf_size() - 1; } +inline size_t place(HashValue x) const { return x & mask(); } +``` + +**5.** 对于函数。 不要在括号周围放置空格。 + +```cpp +void reinsert(const Value & x) +``` + +```cpp +memcpy(&buf[place_value], &x, sizeof(x)); +``` + +**6.** 在`if`,`for`,`while`和其他表达式中,在开括号前面插入一个空格(与函数声明相反)。 + +```cpp +for (size_t i = 0; i < rows; i += storage.index_granularity) +``` + +**7.** 在二元运算符(`+`,`-`,`*`,`/`,`%`,...)和三元运算符 `?:` 周围添加空格。 + +```cpp +UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); +UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); +UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); +``` + +**8.** 若有换行,新行应该以运算符开头,并且增加对应的缩进。 + +```cpp +if (elapsed_ns) + message << " (" + << rows_read_on_server * 1000000000 / elapsed_ns << " rows/s., " + << bytes_read_on_server * 1000.0 / elapsed_ns << " MB/s.) "; +``` + +**9.** 如果需要,可以在一行内使用空格来对齐。 + +```cpp +dst.ClickLogID = click.LogID; +dst.ClickEventID = click.EventID; +dst.ClickGoodEvent = click.GoodEvent; +``` + +**10.** 不要在 `.`,`->` 周围加入空格 + +如有必要,运算符可以包裹到下一行。 在这种情况下,它前面的偏移量增加。 + +**11.** 不要使用空格来分开一元运算符 (`--`, `++`, `*`, `&`, ...) 和参数。 + +**12.** 在逗号后面加一个空格,而不是在之前。同样的规则也适合 `for` 循环中的分号。 + +**13.** 不要用空格分开 `[]` 运算符。 + +**14.** 在 `template <...>` 表达式中,在 `template` 和 `<` 中加入一个空格,在 `<` 后面或在 `>` 前面都不要有空格。 + +```cpp +template +struct AggregatedStatElement +{} +``` + +**15.** 在类和结构体中, `public`, `private` 以及 `protected` 同 `class/struct` 无需缩进,其他代码须缩进。 + +```cpp +template +class MultiVersion +{ +public: + /// Version of object for usage. shared_ptr manage lifetime of version. + using Version = std::shared_ptr; + ... +} +``` + +**16.** 如果对整个文件使用相同的 `namespace`,并且没有其他重要的东西,则 `namespace` 中不需要偏移量。 + +**17.** 在 `if`, `for`, `while` 中包裹的代码块中,若代码是一个单行的 `statement`,那么大括号是可选的。 可以将 `statement` 放到一行中。这个规则同样适用于嵌套的 `if`, `for`, `while`, ... + +但是如果内部 `statement` 包含大括号或 `else`,则外部块应该用大括号括起来。 + +```cpp +/// Finish write. +for (auto & stream : streams) + stream.second->finalize(); +``` + +**18.** 行的某尾不应该包含空格。 + +**19.** 源文件应该用 UTF-8 编码。 + +**20.** 非ASCII字符可用于字符串文字。 + +```cpp +<< ", " << (timer.elapsed() / chunks_stats.hits) << " μsec/hit."; +``` + +**21** 不要在一行中写入多个表达式。 + +**22.** 将函数内部的代码段分组,并将它们与不超过一行的空行分开。 + +**23.** 将 函数,类用一个或两个空行分开。 + +**24.** `const` 必须写在类型名称之前。 + +```cpp +//correct +const char * pos +const std::string & s +//incorrect +char const * pos +``` + +**25.** 声明指针或引用时,`*` 和 `&` 符号两边应该都用空格分隔。 + +```cpp +//correct +const char * pos +//incorrect +const char* pos +const char *pos +``` + +**26.** 使用模板类型时,使用 `using` 关键字对它们进行别名(最简单的情况除外)。 + +换句话说,模板参数仅在 `using` 中指定,并且不在代码中重复。 + +`using`可以在本地声明,例如在函数内部。 + +```cpp +//correct +using FileStreams = std::map>; +FileStreams streams; +//incorrect +std::map> streams; +``` + +**27.** 不要在一个语句中声明不同类型的多个变量。 + +```cpp +//incorrect +int x, *y; +``` + +**28.** 不要使用C风格的类型转换。 + +```cpp +//incorrect +std::cerr << (int)c <<; std::endl; +//correct +std::cerr << static_cast(c) << std::endl; +``` + +**29.** 在类和结构中,组成员和函数分别在每个可见范围内。 + +**30.** 对于小类和结构,没有必要将方法声明与实现分开。 + +对于任何类或结构中的小方法也是如此。 + +对于模板化类和结构,不要将方法声明与实现分开(因为否则它们必须在同一个转换单元中定义) + +**31.** 您可以将换行规则定在140个字符,而不是80个字符。 + +**32.** 如果不需要 postfix,请始终使用前缀增量/减量运算符。 + +```cpp +for (Names::const_iterator it = column_names.begin(); it != column_names.end(); ++it) +``` + +## Comments + +**1.** 请务必为所有非常重要的代码部分添加注释。 + +这是非常重要的。 编写注释可能会帮助您意识到代码不是必需的,或者设计错误。 + +```cpp +/** Part of piece of memory, that can be used. + * For example, if internal_buffer is 1MB, and there was only 10 bytes loaded to buffer from file for reading, + * then working_buffer will have size of only 10 bytes + * (working_buffer.end() will point to position right after those 10 bytes available for read). + */ +``` + +**2.** 注释可以尽可能详细。 + +**3.** 在他们描述的代码之前放置注释。 在极少数情况下,注释可以在代码之后,在同一行上。 + +```cpp +/** Parses and executes the query. +*/ +void executeQuery( + ReadBuffer & istr, /// Where to read the query from (and data for INSERT, if applicable) + WriteBuffer & ostr, /// Where to write the result + Context & context, /// DB, tables, data types, engines, functions, aggregate functions... + BlockInputStreamPtr & query_plan, /// Here could be written the description on how query was executed + QueryProcessingStage::Enum stage = QueryProcessingStage::Complete /// Up to which stage process the SELECT query + ) +``` + +**4.** 注释应该只用英文撰写。 + +**5.** 如果您正在编写库,请在主头文件中包含解释它的详细注释。 + +**6.** 请勿添加无效的注释。 特别是,不要留下像这样的空注释: + +```cpp +/* +* Procedure Name: +* Original procedure name: +* Author: +* Date of creation: +* Dates of modification: +* Modification authors: +* Original file name: +* Purpose: +* Intent: +* Designation: +* Classes used: +* Constants: +* Local variables: +* Parameters: +* Date of creation: +* Purpose: +*/ +``` + +这个示例来源于 [http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/](http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/)。 + +**7.** 不要在每个文件的开头写入垃圾注释(作者,创建日期...)。 + +**8.** 单行注释用三个斜杆: `///` ,多行注释以 `/**`开始。 这些注释会当做文档。 + +注意:您可以使用 Doxygen 从这些注释中生成文档。 但是通常不使用 Doxygen,因为在 IDE 中导航代码更方便。 + +**9.** 多行注释的开头和结尾不得有空行(关闭多行注释的行除外)。 + +**10.** 要注释掉代码,请使用基本注释,而不是“记录”注释。 + +**11.** 在提交之前删除代码的无效注释部分。 + +**12.** 不要在注释或代码中使用亵渎语言。 + +**13.** 不要使用大写字母。 不要使用过多的标点符号。 + +```cpp +/// WHAT THE FAIL??? +``` + +**14.** 不要使用注释来制作分隔符。 + +```cpp +///****************************************************** +``` + +**15.** 不要在注释中开始讨论。 + +```cpp +/// Why did you do this stuff? +``` + +**16.** 没有必要在块的末尾写一条注释来描述它的含义。 + +```cpp +/// for +``` + +## Names + +**1.** 在变量和类成员的名称中使用带下划线的小写字母。 + +```cpp +size_t max_block_size; +``` + +**2.** 对于函数(方法)的名称,请使用以小写字母开头的驼峰标识。 + + ```cpp + std::string getName() const override { return "Memory"; } + ``` + +**3.** 对于类(结构)的名称,使用以大写字母开头的驼峰标识。接口名称用I前缀。 + + ```cpp + class StorageMemory : public IStorage + ``` + +**4.** `using` 的命名方式与类相同,或者以__t`命名。 + +**5.** 模板类型参数的名称:在简单的情况下,使用`T`; `T`,`U`; `T1`,`T2`。 + +对于更复杂的情况,要么遵循类名规则,要么添加前缀`T`。 + +```cpp +template +struct AggregatedStatElement +``` + +**6.** 模板常量参数的名称:遵循变量名称的规则,或者在简单的情况下使用 `N`。 + +```cpp +template +struct ExtractDomain +``` + +**7.** 对于抽象类型(接口),用 `I` 前缀。 + +```cpp +class IBlockInputStream +``` + +**8.** 如果在本地使用变量,则可以使用短名称。 + +在所有其他情况下,请使用能描述含义的名称。 + +```cpp +bool info_successfully_loaded = false; +``` + +**9.** `define` 和全局常量的名称使用带下划线的 `ALL_CAPS`。 + +```cpp +#define MAX_SRC_TABLE_NAMES_TO_STORE 1000 +``` + +**10.** 文件名应使用与其内容相同的样式。 + +如果文件包含单个类,则以与该类名称相同的方式命名该文件。 + +如果文件包含单个函数,则以与函数名称相同的方式命名文件。 + +**11.** 如果名称包含缩写,则: + +- 对于变量名,缩写应使用小写字母 `mysql_connection`(不是 `mySQL_connection` )。 +- 对于类和函数的名称,请将大写字母保留在缩写 `MySQLConnection`(不是 `MySqlConnection` 。 + +**12.** 仅用于初始化类成员的构造方法参数的命名方式应与类成员相同,但最后使用下划线。 + +```cpp +FileQueueProcessor( + const std::string & path_, + const std::string & prefix_, + std::shared_ptr handler_) + : path(path_), + prefix(prefix_), + handler(handler_), + log(&Logger::get("FileQueueProcessor")) +{ +} +``` + +如果构造函数体中未使用该参数,则可以省略下划线后缀。 + +**13.** 局部变量和类成员的名称没有区别(不需要前缀)。 + +```cpp +timer (not m_timer) +``` + +**14.** 对于 `enum` 中的常量,请使用带大写字母的驼峰标识。ALL_CAPS 也可以接受。如果 `enum` 是非本地的,请使用 `enum class`。 + +```cpp +enum class CompressionMethod +{ + QuickLZ = 0, + LZ4 = 1, +}; +``` + +**15.** 所有名字必须是英文。不允许音译俄语单词。 + +``` +not Stroka +``` + +**16.** 缩写须是众所周知的(当您可以在维基百科或搜索引擎中轻松找到缩写的含义时)。 + +``` +`AST`, `SQL`. + +Not `NVDH` (some random letters) +``` + +如果缩短版本是常用的,则可以接受不完整的单词。 + +如果注释中旁边包含全名,您也可以使用缩写。 + +**17.** C++ 源码文件名称必须为 `.cpp` 拓展名。 头文件必须为 `.h` 拓展名。 + +## 如何编写代码 + +**1.** 内存管理。 + +手动内存释放 (`delete`) 只能在库代码中使用。 + +在库代码中, `delete` 运算符只能在析构函数中使用。 + +在应用程序代码中,内存必须由拥有它的对象释放。 + +示例: + +- 最简单的方法是将对象放在堆栈上,或使其成为另一个类的成员。 +- 对于大量小对象,请使用容器。 +- 对于自动释放少量在堆中的对象,可以用 `shared_ptr/unique_ptr`。 + +**2.** 资源管理。 + +使用 `RAII` 以及查看以上说明。 + +**3.** 错误处理。 + +在大多数情况下,您只需要抛出一个异常,而不需要捕获它(因为`RAII`)。 + +在离线数据处理应用程序中,通常可以接受不捕获异常。 + +在处理用户请求的服务器中,通常足以捕获连接处理程序顶层的异常。 + +在线程函数中,你应该在 `join` 之后捕获并保留所有异常以在主线程中重新抛出它们。 + +```cpp +/// If there weren't any calculations yet, calculate the first block synchronously +if (!started) +{ + calculate(); + started = true; +} +else /// If calculations are already in progress, wait for the result + pool.wait(); + +if (exception) + exception->rethrow(); +``` + +不处理就不要隐藏异常。 永远不要盲目地把所有异常都记录到日志中。 + +```cpp +//Not correct +catch (...) {} +``` + +如果您需要忽略某些异常,请仅针对特定异常执行此操作并重新抛出其余异常。 + +```cpp +catch (const DB::Exception & e) +{ + if (e.code() == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION) + return nullptr; + else + throw; +} +``` + +当使用具有返回码或 `errno` 的函数时,请始终检查结果并在出现错误时抛出异常。 + +```cpp +if (0 != close(fd)) + throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE); +``` + +`不要使用断言`。 + +**4.** 异常类型。 + +不需要在应用程序代码中使用复杂的异常层次结构。 系统管理员应该可以理解异常文本。 + +**5.** 从析构函数中抛出异常。 + +不建议这样做,但允许这样做。 + +按照以下选项: + +- 创建一个函数( `done()` 或 `finalize()` ),它将提前完成所有可能导致异常的工作。 如果调用了该函数,则稍后在析构函数中应该没有异常。 +- 过于复杂的任务(例如通过网络发送消息)可以放在单独的方法中,类用户必须在销毁之前调用它们。 +- 如果析构函数中存在异常,则最好记录它而不是隐藏它(如果 logger 可用)。 +- 在简单的应用程序中,依赖于`std::terminate`(对于C++ 11中默认情况下为 `noexcept` 的情况)来处理异常是可以接受的。 + +**6.** 匿名代码块。 + +您可以在单个函数内创建单独的代码块,以使某些变量成为局部变量,以便在退出块时调用析构函数。 + +```cpp +Block block = data.in->read(); + +{ + std::lock_guard lock(mutex); + data.ready = true; + data.block = block; +} + +ready_any.set(); +``` + +**7.** 多线程。 + +在离线数据处理程序中: + +- 尝试在单个CPU核心上获得最佳性能。 然后,您可以根据需要并行化代码。 + +在服务端应用中: + +- 使用线程池来处理请求。 此时,我们还没有任何需要用户空间上下文切换的任务。 + +Fork不用于并行化。 + +**8.** 同步线程。 + +通常可以使不同的线程使用不同的存储单元(甚至更好:不同的缓存线),并且不使用任何线程同步(除了`joinAll`)。 + +如果需要同步,在大多数情况下,在 `lock_guard` 下使用互斥量就足够了。 + +在其他情况下,使用系统同步原语。不要使用忙等待。 + +仅在最简单的情况下才应使用原子操作。 + +除非是您的主要专业领域,否则不要尝试实施无锁数据结构。 + +**9.** 指针和引用。 + +大部分情况下,请用引用。 + +**10.** 常量。 + +使用 const 引用,指向常量的指针,`const_iterator`和 const 指针。 + +将 `const` 视为默认值,仅在必要时使用非 `const`。 + +当按值传递变量时,使用 `const` 通常没有意义。 + +**11.** 无符号。 + +必要时使用`unsigned`。 + +**12.** 数值类型。 + +使用 `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, 以及 `Int64`, `size_t`, `ssize_t` 还有 `ptrdiff_t`。 + +不要使用这些类型:`signed / unsigned long`,`long long`,`short`,`signed / unsigned char`,`char`。 + +**13.** 参数传递。 + +通过引用传递复杂类型 (包括 `std::string`)。 + +如果函数中传递堆中创建的对象,则使参数类型为 `shared_ptr` 或者 `unique_ptr`. + +**14.** 返回值 + +大部分情况下使用 `return`。不要使用 `[return std::move(res)]{.strike}`。 + +如果函数在堆上分配对象并返回它,请使用 `shared_ptr` 或 `unique_ptr`。 + +在极少数情况下,您可能需要通过参数返回值。 在这种情况下,参数应该是引用传递的。 + +```cpp +using AggregateFunctionPtr = std::shared_ptr; + +/** Allows creating an aggregate function by its name. + */ +class AggregateFunctionFactory +{ +public: + AggregateFunctionFactory(); + AggregateFunctionPtr get(const String & name, const DataTypes & argument_types) const; +``` + +**15.** 命名空间。 + +没有必要为应用程序代码使用单独的 `namespace` 。 + +小型库也不需要这个。 + +对于中大型库,须将所有代码放在 `namespace` 中。 + +在库的 `.h` 文件中,您可以使用 `namespace detail` 来隐藏应用程序代码不需要的实现细节。 + +在 `.cpp` 文件中,您可以使用 `static` 或匿名命名空间来隐藏符号。 + +同样 `namespace` 可用于 `enum` 以防止相应的名称落入外部 `namespace`(但最好使用`enum class`)。 + +**16.** 延迟初始化。 + +如果初始化需要参数,那么通常不应该编写默认构造函数。 + +如果稍后您需要延迟初始化,则可以添加将创建无效对象的默认构造函数。 或者,对于少量对象,您可以使用 `shared_ptr / unique_ptr`。 + +```cpp +Loader(DB::Connection * connection_, const std::string & query, size_t max_block_size_); + +/// For deferred initialization +Loader() {} +``` + +**17.** 虚函数。 + +如果该类不是用于多态使用,则不需要将函数设置为虚拟。这也适用于析构函数。 + +**18.** 编码。 + +在所有情况下使用 UTF-8 编码。使用 `std::string` and `char *`。不要使用 `std::wstring` 和 `wchar_t`。 + +**19.** 日志。 + +请参阅代码中的示例。 + +在提交之前,删除所有无意义和调试日志记录,以及任何其他类型的调试输出。 + +应该避免循环记录日志,即使在 Trace 级别也是如此。 + +日志必须在任何日志记录级别都可读。 + +在大多数情况下,只应在应用程序代码中使用日志记录。 + +日志消息必须用英文写成。 + +对于系统管理员来说,日志最好是可以理解的。 + +不要在日志中使用亵渎语言。 + +在日志中使用UTF-8编码。 在极少数情况下,您可以在日志中使用非ASCII字符。 + +**20.** 输入-输出。 + +不要使用 `iostreams` 在对应用程序性能至关重要的内部循环中(并且永远不要使用 `stringstream` )。 + +使用 `DB/IO` 库替代。 + +**21.** 日期和时间。 + +参考 `DateLUT` 库。 + +**22.** 引入头文件。 + +一直用 `#pragma once` 而不是其他宏。 + +**23.** using 语法 + +`using namespace` 不会被使用。 您可以使用特定的 `using`。 但是在类或函数中使它成为局部的。 + +**24.** 不要使用 `trailing return type` 为必要的功能。 + +```cpp +[auto f() -> void;]{.strike} +``` + +**25.** 声明和初始化变量。 + +```cpp +//right way +std::string s = "Hello"; +std::string s{"Hello"}; + +//wrong way +auto s = std::string{"Hello"}; +``` + +**26.** 对于虚函数,在基类中编写 `virtual`,但在后代类中写 `override` 而不是`virtual`。 + +## 没有用到的 C++ 特性。 + +**1.** 不使用虚拟继承。 + +**2.** 不使用 C++03 中的异常标准。 + +## 平台 + +**1.** 我们为特定平台编写代码。 + +但在其他条件相同的情况下,首选跨平台或可移植代码。 + +**2.** 语言: C++17. + +**3.** 编译器: `gcc`。 此时(2017年12月),代码使用7.2版编译。(它也可以使用`clang 4` 编译) + +使用标准库 (`libstdc++` 或 `libc++`)。 + +**4.** 操作系统:Linux Ubuntu,不比 Precise 早。 + +**5.** 代码是为x86_64 CPU架构编写的。 + +CPU指令集是我们服务器中支持的最小集合。 目前,它是SSE 4.2。 + +**6.** 使用 `-Wall -Wextra -Werror` 编译参数。 + +**7.** 对所有库使用静态链接,除了那些难以静态连接的库(参见 `ldd` 命令的输出). + +**8.** 使用发布的设置来开发和调试代码。 + +## 工具 + +**1.** KDevelop 是一个好的 IDE. + +**2.** 调试可以使用 `gdb`, `valgrind` (`memcheck`), `strace`, `-fsanitize=...`, 或 `tcmalloc_minimal_debug`. + +**3.** 对于性能分析,使用 `Linux Perf`, `valgrind` (`callgrind`),或者 `strace -cf`。 + +**4.** 源代码用 Git 作版本控制。 + +**5.** 使用 `CMake` 构建。 + +**6.** 程序的发布使用 `deb` 安装包。 + +**7.** 提交到 master 分支的代码不能破坏编译。 + +虽然只有选定的修订被认为是可行的。 + +**8.** 尽可能经常地进行提交,即使代码只是部分准备好了。 + +目的明确的功能,使用分支。 + +如果 `master` 分支中的代码尚不可构建,请在 `push` 之前将其从构建中排除。您需要在几天内完成或删除它。 + +**9.** 对于不重要的更改,请使用分支并在服务器上发布它们。 + +**10.** 未使用的代码将从 repo 中删除。 + +## 库 + +**1.** 使用C ++ 14标准库(允许实验性功能),以及 `boost` 和 `Poco` 框架。 + +**2.** 如有必要,您可以使用 OS 包中提供的任何已知库。 + +如果有一个好的解决方案已经可用,那就使用它,即使这意味着你必须安装另一个库。 + +(但要准备从代码中删除不好的库) + +**3.** 如果软件包没有您需要的软件包或者有过时的版本或错误的编译类型,则可以安装不在软件包中的库。 + +**4.** 如果库很小并且没有自己的复杂构建系统,请将源文件放在 `contrib` 文件夹中。 + +**5.** 始终优先考虑已经使用的库。 + +## 一般建议 + +**1.** 尽可能精简代码。 + +**2.** 尝试用最简单的方式实现。 + +**3.** 在你知道代码是如何工作以及内部循环如何运作之前,不要编写代码。 + +**4.** 在最简单的情况下,使用 `using` 而不是类或结构。 + +**5.** 如果可能,不要编写复制构造函数,赋值运算符,析构函数(虚拟函数除外,如果类包含至少一个虚函数),移动构造函数或移动赋值运算符。 换句话说,编译器生成的函数必须正常工作。 您可以使用 `default`。 + +**6.** 鼓励简化代码。 尽可能减小代码的大小。 + +## 其他建议 + +**1.** 从 `stddef.h` 明确指定 `std ::` 的类型。 + +不推荐。 换句话说,我们建议写 `size_t` 而不是 `std::size_t`,因为它更短。 + +也接受添加 `std::`。 + +**2.** 为标准C库中的函数明确指定 `std::` + +不推荐。换句话说,写 `memcpy` 而不是`std::memcpy`。 + +原因是有类似的非标准功能,例如 `memmem`。我们偶尔会使用这些功能。`namespace std`中不存在这些函数。 + +如果你到处都写 `std::memcpy` 而不是 `memcpy`,那么没有 `std::` 的 `memmem` 会显得很奇怪。 + +不过,如果您愿意,仍然可以使用 `std::`。 + +**3.** 当标准C++库中提供相同的函数时,使用C中的函数。 + +如果它更高效,这是可以接受的。 + +例如,使用`memcpy`而不是`std::copy`来复制大块内存。 + +**4.** 函数的多行参数。 + +允许以下任何包装样式: + +```cpp +function( + T1 x1, + T2 x2) +``` + +```cpp +function( + size_t left, size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` + +```cpp +function(size_t left, size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` + +```cpp +function(size_t left, size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` + +```cpp +function( + size_t left, + size_t right, + const & RangesInDataParts ranges, + size_t limit) +``` + +[来源文章](https://clickhouse.yandex/docs/en/development/style/) From 0901694b1560f779b1623df481a24309fc200091 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 10 Dec 2018 12:40:57 +0300 Subject: [PATCH 66/90] Update integration tests image --- dbms/tests/integration/image/Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile index 4216f8efffb..897c210d7ac 100644 --- a/dbms/tests/integration/image/Dockerfile +++ b/dbms/tests/integration/image/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu +FROM ubuntu:18.04 RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes --force-yes \ @@ -16,7 +16,9 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes - module-init-tools \ cgroupfs-mount \ python-pip \ - tzdata + tzdata \ + libreadline-dev \ + libicu-dev ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone @@ -24,7 +26,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 ENV DOCKER_CHANNEL stable -ENV DOCKER_VERSION 18.09.0 +ENV DOCKER_VERSION 17.09.1-ce RUN set -eux; \ \ From 61cd6fe2d312a20c9e6f057b947477bf48d04e26 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 10 Dec 2018 13:21:32 +0300 Subject: [PATCH 67/90] Remove LowCardinaity for joined keys. --- dbms/src/Interpreters/Join.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 7707eed6933..17e21628ebc 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -15,6 +15,7 @@ #include #include +#include namespace DB @@ -259,10 +260,12 @@ void Join::setSampleBlock(const Block & block) size_t keys_size = key_names_right.size(); ColumnRawPtrs key_columns(keys_size); + Colums materialized_columns(keys_size); for (size_t i = 0; i < keys_size; ++i) { - key_columns[i] = block.getByName(key_names_right[i]).column.get(); + materialized_columns[i] = recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column); + key_columns[i] = materialized_columns[i].get(); /// We will join only keys, where all components are not NULL. if (key_columns[i]->isColumnNullable()) @@ -281,7 +284,10 @@ void Join::setSampleBlock(const Block & block) const auto & name = sample_block_with_columns_to_add.getByPosition(pos).name; if (key_names_right.end() != std::find(key_names_right.begin(), key_names_right.end(), name)) { - sample_block_with_keys.insert(sample_block_with_columns_to_add.getByPosition(pos)); + auto & col = sample_block_with_columns_to_add.getByPosition(pos); + col.column = recursiveRemoveLowCardinality(col.column); + col.type = recursiveRemoveLowCardinality(col.type); + sample_block_with_keys.insert(col); sample_block_with_columns_to_add.erase(pos); } else @@ -428,11 +434,13 @@ bool Join::insertFromBlock(const Block & block) /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; + materialized_columns.reserve(keys_size); /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { - key_columns[i] = block.getByName(key_names_right[i]).column.get(); + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column)); + key_columns[i] = materialized_columns.back().get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) { @@ -669,11 +677,13 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; + materialized_columns.reserve(keys_size); /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { - key_columns[i] = block.getByName(key_names_left[i]).column.get(); + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column)); + key_columns[i] = materialized_columns.back().get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) { @@ -883,8 +893,8 @@ void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) { /// Compare up to Nullability. - DataTypePtr left_type = removeNullable(block_left.getByName(key_names_left[i]).type); - DataTypePtr right_type = removeNullable(block_right.getByName(key_names_right[i]).type); + DataTypePtr left_type = removeNullable(recursiveRemoveLowCardinality(block_left.getByName(key_names_left[i]).type)); + DataTypePtr right_type = removeNullable(recursiveRemoveLowCardinality(block_right.getByName(key_names_right[i]).type)); if (!left_type->equals(*right_type)) throw Exception("Type mismatch of columns to JOIN by: " From e4fd13f2377c9549917633f40baa14355face259 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 10 Dec 2018 13:23:45 +0300 Subject: [PATCH 68/90] Remove LowCardinaity for joined keys. --- dbms/src/Interpreters/Join.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 17e21628ebc..fa7c201d67c 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -260,7 +260,7 @@ void Join::setSampleBlock(const Block & block) size_t keys_size = key_names_right.size(); ColumnRawPtrs key_columns(keys_size); - Colums materialized_columns(keys_size); + Columns materialized_columns(keys_size); for (size_t i = 0; i < keys_size; ++i) { From 66d73025d7eec6a00ce27580b887c2070f42ca70 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 10 Dec 2018 13:55:27 +0300 Subject: [PATCH 69/90] fix RequiredSourceColumnsMatcher lambda --- .../RequiredSourceColumnsVisitor.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h index 726023be8eb..ebf948ffeb8 100644 --- a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h +++ b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h @@ -18,7 +18,11 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } - +/** Get a set of necessary columns to read from the table. + * In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified. + * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. + * Put in required_joined_columns the set of columns available from JOIN and needed. + */ class RequiredSourceColumnsMatcher { public: @@ -49,9 +53,9 @@ public: if (auto * f = typeid_cast(node.get())) { - /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see KeyCondition). - if (f->name == "indexHint") + /// "indexHint" is a special function for index analysis. Everything that is inside it is not calculated. @sa KeyCondition + /// "lambda" visit children itself. + if (f->name == "indexHint" || f->name == "lambda") return false; } @@ -120,6 +124,7 @@ private: } } + /// @note It's a special case where we visit children inside the matcher, not in visitor. visit(node.arguments->children[1], data); for (size_t i = 0; i < added_ignored.size(); ++i) @@ -128,11 +133,7 @@ private: } }; -/** Get a set of necessary columns to read from the table. - * In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified. - * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. - * Put in required_joined_columns the set of columns available from JOIN and needed. - */ +/// Get a set of necessary columns to read from the table. using RequiredSourceColumnsVisitor = InDepthNodeVisitor; } From 723932930da0bea67bcf0069a90fa32d2313d137 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 10 Dec 2018 14:21:29 +0300 Subject: [PATCH 70/90] Added test with low cardinality join. --- .../00800_low_cardinality_join.reference | 41 +++++++++++++++++++ .../00800_low_cardinality_join.sql | 27 ++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_join.sql diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference new file mode 100644 index 00000000000..31d1de2d8c7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference @@ -0,0 +1,41 @@ +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +- +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +- +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 \N +0 1 +1 2 +2 \N diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_join.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.sql new file mode 100644 index 00000000000..07ad6d54624 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.sql @@ -0,0 +1,27 @@ +select * from (select dummy as val from system.one) any left join (select dummy as val from system.one) using val; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select dummy as val from system.one) using val; +select * from (select dummy as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) using val; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select dummy as val from system.one) using val; +select * from (select dummy as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) using val; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) using val; +select '-'; +select * from (select dummy as val from system.one) any left join (select dummy as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select dummy as val from system.one) on val + 0 = val * 1; +select * from (select dummy as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select dummy as val from system.one) on val + 0 = val * 1; +select * from (select dummy as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) on val + 0 = val * 1; +select '-'; +select * from (select number as l from system.numbers limit 3) any left join (select number as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) any left join (select number as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select number as l from system.numbers limit 3) any left join (select toLowCardinality(number) as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) any left join (select toLowCardinality(number) as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(toNullable(number)) as l from system.numbers limit 3) any left join (select toLowCardinality(number) as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) any left join (select toLowCardinality(toNullable(number)) as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(toNullable(number)) as l from system.numbers limit 3) any left join (select toLowCardinality(toNullable(number)) as r from system.numbers limit 3) on l + 1 = r * 1; From 1a6127dce4a1660254071b8474efa5f2a1b44be4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 10 Dec 2018 15:09:37 +0300 Subject: [PATCH 71/90] Remove LowCardinality from NativeBlockOutputStream types for old clients. --- dbms/programs/server/TCPHandler.cpp | 11 +++++++++++ dbms/src/DataStreams/NativeBlockInputStream.cpp | 3 ++- dbms/src/DataStreams/NativeBlockOutputStream.cpp | 10 +++++++++- dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp | 3 +++ 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 108a630a0dd..6b1d4407a20 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -389,6 +389,17 @@ void TCPHandler::processOrdinaryQuery() /// Send header-block, to allow client to prepare output format for data to send. { Block header = state.io.in->getHeader(); + + /// Send data to old clients without low cardinality type. + if (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) + { + for (auto & column : header) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } + } + if (header) sendData(header); } diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 33afbb0aa9e..7cd4a571a60 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -153,7 +153,8 @@ Block NativeBlockInputStream::readImpl() column.column = std::move(read_column); - if (server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) + /// Support insert from old clients without low cardinality type. + if (header && server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) { column.column = recursiveLowCardinalityConversion(column.column, column.type, header.getByPosition(i).type); column.type = header.getByPosition(i).type; diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp index 8be3373fc44..c87d82b2506 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace DB { @@ -100,7 +101,14 @@ void NativeBlockOutputStream::write(const Block & block) mark.offset_in_decompressed_block = ostr_concrete->getRemainingBytes(); } - const ColumnWithTypeAndName & column = block.safeGetByPosition(i); + ColumnWithTypeAndName column = block.safeGetByPosition(i); + + /// Send data to old clients without low cardinality type. + if (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } /// Name writeStringBinary(column.name, ostr); diff --git a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index b212c8ebdaa..215b21f7994 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -69,6 +69,9 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) { + if (!column) + return column; + if (from_type->equals(*to_type)) return column; From 6a90abc543e362a5738bc796f72ce88d31397d35 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 10 Dec 2018 15:54:02 +0300 Subject: [PATCH 72/90] Support SUBSTRING(expr FROM start FOR length) --- dbms/src/Parsers/ExpressionElementParsers.cpp | 73 ++++++++++++++++++- dbms/src/Parsers/ExpressionElementParsers.h | 7 ++ .../00765_sql_compatibility_aliases.reference | 2 + .../00765_sql_compatibility_aliases.sql | 2 + 4 files changed, 83 insertions(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index f3e9e43aa19..0912d2a5b7b 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -317,6 +317,76 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; } +bool ParserSubstringExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length) + /// The latter will be parsed normally as a function later. + + ASTPtr expr_node; + ASTPtr start_node; + ASTPtr length_node; + + if (!ParserKeyword("SUBSTRING").ignore(pos, expected)) + return false; + + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; + + if (!ParserExpression().parse(pos, expr_node, expected)) + return false; + + if (pos->type != TokenType::Comma) + { + if (!ParserKeyword("FROM").ignore(pos, expected)) + return false; + } + else + { + ++pos; + } + + if (!ParserExpression().parse(pos, start_node, expected)) + return false; + + if (pos->type == TokenType::ClosingRoundBracket) + { + ++pos; + } + else + { + if (pos->type != TokenType::Comma) + { + if (!ParserKeyword("FOR").ignore(pos, expected)) + return false; + } + else + { + ++pos; + } + + if (!ParserExpression().parse(pos, length_node, expected)) + return false; + + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected); + } + + /// Convert to canonical representation in functional form: SUBSTRING(expr, start, length) + + auto expr_list_args = std::make_shared(); + expr_list_args->children = {expr_node, start_node}; + + if (length_node) + expr_list_args->children.push_back(length_node); + + auto func_node = std::make_shared(); + func_node->name = "substring"; + func_node->arguments = std::move(expr_list_args); + func_node->children.push_back(func_node->arguments); + + node = std::move(func_node); + return true; +} bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -678,8 +748,9 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp || ParserArrayOfLiterals().parse(pos, node, expected) || ParserArray().parse(pos, node, expected) || ParserLiteral().parse(pos, node, expected) - || ParserExtractExpression().parse(pos, node, expected) || ParserCastExpression().parse(pos, node, expected) + || ParserExtractExpression().parse(pos, node, expected) + || ParserSubstringExpression().parse(pos, node, expected) || ParserCase().parse(pos, node, expected) || ParserFunction().parse(pos, node, expected) || ParserQualifiedAsterisk().parse(pos, node, expected) diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h index 32e1c57dce2..a52864d97d1 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.h +++ b/dbms/src/Parsers/ExpressionElementParsers.h @@ -96,6 +96,13 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserSubstringExpression : public IParserBase +{ +protected: + const char * getName() const override { return "SUBSTRING expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + class ParserExtractExpression : public IParserBase { protected: diff --git a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference index f774720f9ff..7a70e443c1b 100644 --- a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference +++ b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference @@ -8,3 +8,5 @@ fo oo o 1 +oo +o diff --git a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql index c7ce18d2b45..248514d134b 100644 --- a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql +++ b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql @@ -10,3 +10,5 @@ select SUBSTRING('foo', 1, 2); select Substr('foo', 2); select mid('foo', 3); select IF(3>2, 1, 0); +select substring('foo' from 1 + 1); +select SUBSTRING('foo' FROM 2 FOR 1); From 9236e94e1b61ac039db1e6ad266bc48bf64d2562 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 10 Dec 2018 16:02:45 +0300 Subject: [PATCH 73/90] ExecuteScalarSubqueriesVisitor via InDepthNodeVisitor --- .../ExecuteScalarSubqueriesVisitor.cpp | 26 +++---- .../ExecuteScalarSubqueriesVisitor.h | 74 +++++++++---------- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 10 +-- .../PredicateExpressionsOptimizer.cpp | 4 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 12 +-- 5 files changed, 59 insertions(+), 67 deletions(-) diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 3be5095e23a..7769d60b4b5 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -35,17 +35,17 @@ static ASTPtr addTypeConversion(std::unique_ptr && ast, const String return res; } -void ExecuteScalarSubqueriesVisitor::visit(const ASTSubquery & subquery, ASTPtr & ast) const +void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { - Context subquery_context = context; - Settings subquery_settings = context.getSettings(); + Context subquery_context = data.context; + Settings subquery_settings = data.context.getSettings(); subquery_settings.max_result_rows = 1; subquery_settings.extremes = 0; subquery_context.setSettings(subquery_settings); ASTPtr subquery_select = subquery.children.at(0); BlockIO res = InterpreterSelectWithUnionQuery( - subquery_select, subquery_context, {}, QueryProcessingStage::Complete, subquery_depth + 1).execute(); + subquery_select, subquery_context, {}, QueryProcessingStage::Complete, data.subquery_depth + 1).execute(); Block block; try @@ -100,31 +100,29 @@ void ExecuteScalarSubqueriesVisitor::visit(const ASTSubquery & subquery, ASTPtr } } - -void ExecuteScalarSubqueriesVisitor::visit(const ASTTableExpression &, ASTPtr &) const -{ - /// Don't descend into subqueries in FROM section. -} - -void ExecuteScalarSubqueriesVisitor::visit(const ASTFunction & func, ASTPtr & ast) const +std::vector ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &) { /// Don't descend into subqueries in arguments of IN operator. /// But if an argument is not subquery, than deeper may be scalar subqueries and we need to descend in them. + std::vector out; if (functionIsInOrGlobalInOperator(func.name)) { for (auto & child : ast->children) { if (child != func.arguments) - visit(child); + out.push_back(&child); else for (size_t i = 0, size = func.arguments->children.size(); i < size; ++i) if (i != 1 || !typeid_cast(func.arguments->children[i].get())) - visit(func.arguments->children[i]); + out.push_back(&func.arguments->children[i]); } } else - visitChildren(ast); + for (auto & child : ast->children) + out.push_back(&child); + + return out; } } diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h index d0e5c520a69..b3e87429d89 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h @@ -2,10 +2,10 @@ #include #include -#include #include #include #include +#include namespace DB { @@ -29,51 +29,45 @@ namespace DB * Scalar subqueries are executed on the request-initializer server. * The request is sent to remote servers with already substituted constants. */ -class ExecuteScalarSubqueriesVisitor +class ExecuteScalarSubqueriesMatcher { public: - ExecuteScalarSubqueriesVisitor(const Context & context_, size_t subquery_depth_, std::ostream * ostr_ = nullptr) - : context(context_), - subquery_depth(subquery_depth_), - visit_depth(0), - ostr(ostr_) - {} - - void visit(ASTPtr & ast) const + struct Data { - if (!tryVisit(ast) && - !tryVisit(ast) && - !tryVisit(ast)) - visitChildren(ast); + const Context & context; + size_t subquery_depth; + }; + + static constexpr const char * label = "ExecuteScalarSubqueries"; + + static bool needChildVisit(ASTPtr & node, const ASTPtr &) + { + /// Processed + if (typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + + /// Don't descend into subqueries in FROM section + if (typeid_cast(node.get())) + return false; + + return true; + } + + static std::vector visit(ASTPtr & ast, Data & data) + { + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + return {}; } private: - const Context & context; - size_t subquery_depth; - mutable size_t visit_depth; - std::ostream * ostr; - - void visit(const ASTSubquery & subquery, ASTPtr & ast) const; - void visit(const ASTFunction & func, ASTPtr & ast) const; - void visit(const ASTTableExpression &, ASTPtr &) const; - - void visitChildren(ASTPtr & ast) const - { - for (auto & child : ast->children) - visit(child); - } - - template - bool tryVisit(ASTPtr & ast) const - { - if (const T * t = typeid_cast(ast.get())) - { - DumpASTNode dump(*ast, ostr, visit_depth, "executeScalarSubqueries"); - visit(*t, ast); - return true; - } - return false; - } + static void visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data); + static std::vector visit(const ASTFunction & func, ASTPtr & ast, Data & data); }; +using ExecuteScalarSubqueriesVisitor = InDepthNodeVisitor; + } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index e6f29670959..7e0998cf275 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -242,12 +242,12 @@ void ExpressionAnalyzer::analyzeAggregation() void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables() { /// Adds existing external tables (not subqueries) to the external_tables dictionary. - ExternalTablesMatcher::Data tables_data{context, external_tables}; + ExternalTablesVisitor::Data tables_data{context, external_tables}; ExternalTablesVisitor(tables_data).visit(query); if (do_global) { - GlobalSubqueriesMatcher::Data subqueries_data(context, subquery_depth, isRemoteStorage(), + GlobalSubqueriesVisitor::Data subqueries_data(context, subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries); GlobalSubqueriesVisitor(subqueries_data).visit(query); } @@ -1027,7 +1027,7 @@ void ExpressionAnalyzer::collectUsedColumns() { /// Nothing needs to be ignored for expressions in ARRAY JOIN. NameSet empty; - RequiredSourceColumnsMatcher::Data visitor_data{available_columns, required, empty, empty, empty}; + RequiredSourceColumnsVisitor::Data visitor_data{available_columns, required, empty, empty, empty}; RequiredSourceColumnsVisitor(visitor_data).visit(expressions[i]); } @@ -1047,12 +1047,12 @@ void ExpressionAnalyzer::collectUsedColumns() for (const auto & left_key_ast : syntax->analyzed_join.key_asts_left) { NameSet empty; - RequiredSourceColumnsMatcher::Data columns_data{available_columns, required, ignored, empty, required_joined_columns}; + RequiredSourceColumnsVisitor::Data columns_data{available_columns, required, ignored, empty, required_joined_columns}; ASTPtr tmp = left_key_ast; RequiredSourceColumnsVisitor(columns_data).visit(tmp); } - RequiredSourceColumnsMatcher::Data columns_visitor_data{available_columns, required, ignored, + RequiredSourceColumnsVisitor::Data columns_visitor_data{available_columns, required, ignored, available_joined_columns, required_joined_columns}; RequiredSourceColumnsVisitor(columns_visitor_data).visit(query); diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 8e95773f72c..af84eac7f91 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -311,9 +311,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast std::unordered_map aliases; std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); - TranslateQualifiedNamesMatcher::Data qn_visitor_data{{}, tables}; + TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables}; TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); - QueryAliasesMatcher::Data query_aliases_data{aliases}; + QueryAliasesVisitor::Data query_aliases_data{aliases}; QueryAliasesVisitor(query_aliases_data).visit(ast); QueryNormalizer(ast, aliases, settings, {}, {}).perform(); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index ae6d3ae0b4e..30124b509a7 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -134,7 +134,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Creates a dictionary `aliases`: alias -> ASTPtr { LogAST log; - QueryAliasesMatcher::Data query_aliases_data{result.aliases}; + QueryAliasesVisitor::Data query_aliases_data{result.aliases}; QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); } @@ -228,7 +228,7 @@ void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); LogAST log; - TranslateQualifiedNamesMatcher::Data visitor_data{source_columns, tables}; + TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables}; TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream()); visitor.visit(query); } @@ -342,8 +342,8 @@ void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query if (!select_query) { - ExecuteScalarSubqueriesVisitor visitor(context, subquery_depth, log.stream()); - visitor.visit(query); + ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; + ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } else { @@ -353,8 +353,8 @@ void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query if (!typeid_cast(child.get()) && !typeid_cast(child.get())) { - ExecuteScalarSubqueriesVisitor visitor(context, subquery_depth, log.stream()); - visitor.visit(child); + ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; + ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(child); } } } From 49d80bf8f5df10eddd29b5380e5535ab1e31ac10 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 10 Dec 2018 16:19:09 +0300 Subject: [PATCH 74/90] ExecuteScalarSubqueriesVisitor (move code from h to cpp) --- .../ExecuteScalarSubqueriesVisitor.cpp | 23 +++++++++++++ .../ExecuteScalarSubqueriesVisitor.h | 34 ++++--------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 7769d60b4b5..9cea690a39b 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -35,6 +35,29 @@ static ASTPtr addTypeConversion(std::unique_ptr && ast, const String return res; } +bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) +{ + /// Processed + if (typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + + /// Don't descend into subqueries in FROM section + if (typeid_cast(node.get())) + return false; + + return true; +} + +std::vector ExecuteScalarSubqueriesMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + return {}; +} + void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { Context subquery_context = data.context; diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h index b3e87429d89..555b7334204 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h @@ -1,17 +1,15 @@ #pragma once #include -#include -#include -#include -#include #include namespace DB { -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. +class Context; +class ASTSubquery; +class ASTFunction; +struct ASTTableExpression; /** Replace subqueries that return exactly one row * ("scalar" subqueries) to the corresponding constants. @@ -40,28 +38,8 @@ public: static constexpr const char * label = "ExecuteScalarSubqueries"; - static bool needChildVisit(ASTPtr & node, const ASTPtr &) - { - /// Processed - if (typeid_cast(node.get()) || - typeid_cast(node.get())) - return false; - - /// Don't descend into subqueries in FROM section - if (typeid_cast(node.get())) - return false; - - return true; - } - - static std::vector visit(ASTPtr & ast, Data & data) - { - if (auto * t = typeid_cast(ast.get())) - visit(*t, ast, data); - if (auto * t = typeid_cast(ast.get())) - return visit(*t, ast, data); - return {}; - } + static bool needChildVisit(ASTPtr & node, const ASTPtr &); + static std::vector visit(ASTPtr & ast, Data & data); private: static void visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data); From 9f9bf0cb1b84025080fa1c370fe9ddce67d97937 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 10 Dec 2018 16:49:36 +0300 Subject: [PATCH 75/90] ArrayJoinedColumnsVisitor via InDepthNodeVisitor --- .../Interpreters/ArrayJoinedColumnsVisitor.h | 90 +++++++++---------- dbms/src/Interpreters/InDepthNodeVisitor.h | 2 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 8 +- 3 files changed, 45 insertions(+), 55 deletions(-) diff --git a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h index 96e37cd6a42..de75f4622ef 100644 --- a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h +++ b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h @@ -9,42 +9,55 @@ #include #include +#include + namespace DB { -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. - /// Fills the array_join_result_to_source: on which columns-arrays to replicate, and how to call them after that. -class ArrayJoinedColumnsVisitor +class ArrayJoinedColumnsMatcher { public: - ArrayJoinedColumnsVisitor(NameToNameMap & array_join_name_to_alias_, - NameToNameMap & array_join_alias_to_name_, - NameToNameMap & array_join_result_to_source_) - : array_join_name_to_alias(array_join_name_to_alias_), - array_join_alias_to_name(array_join_alias_to_name_), - array_join_result_to_source(array_join_result_to_source_) - {} - - void visit(ASTPtr & ast) const + struct Data { - if (!tryVisit(ast) && - !tryVisit(ast)) - visitChildren(ast); + NameToNameMap & array_join_name_to_alias; + NameToNameMap & array_join_alias_to_name; + NameToNameMap & array_join_result_to_source; + }; + + static constexpr const char * label = "ArrayJoinedColumns"; + + static bool needChildVisit(ASTPtr & node, const ASTPtr & child) + { + /// Processed + if (typeid_cast(node.get())) + return false; + + if (typeid_cast(node.get())) + return false; + + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + return false; + + return true; + } + + static std::vector visit(ASTPtr & ast, Data & data) + { + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + return {}; } private: - NameToNameMap & array_join_name_to_alias; - NameToNameMap & array_join_alias_to_name; - NameToNameMap & array_join_result_to_source; - - void visit(const ASTTablesInSelectQuery &, ASTPtr &) const - {} - - void visit(const ASTIdentifier & node, ASTPtr &) const + static void visit(const ASTIdentifier & node, ASTPtr &, Data & data) { + NameToNameMap & array_join_name_to_alias = data.array_join_name_to_alias; + NameToNameMap & array_join_alias_to_name = data.array_join_alias_to_name; + NameToNameMap & array_join_result_to_source = data.array_join_result_to_source; + if (!node.general()) return; @@ -74,34 +87,11 @@ private: /** Example: SELECT ParsedParams.Key1 FROM ... ARRAY JOIN ParsedParams AS PP. */ array_join_result_to_source[ /// PP.Key1 -> ParsedParams.Key1 - Nested::concatenateName(array_join_name_to_alias[splitted.first], splitted.second)] = node.name; + Nested::concatenateName(array_join_name_to_alias[splitted.first], splitted.second)] = node.name; } } - - void visit(const ASTSubquery &, ASTPtr &) const - {} - - void visit(const ASTSelectQuery &, ASTPtr &) const - {} - - void visitChildren(ASTPtr & ast) const - { - for (auto & child : ast->children) - if (!tryVisit(child) && - !tryVisit(child)) - visit(child); - } - - template - bool tryVisit(ASTPtr & ast) const - { - if (const T * t = typeid_cast(ast.get())) - { - visit(*t, ast); - return true; - } - return false; - } }; +using ArrayJoinedColumnsVisitor = InDepthNodeVisitor; + } diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index ff102d136c5..bdeb8ddb234 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -6,7 +6,7 @@ namespace DB { -/// Visits AST tree in depth, call fucntions for nodes according to Matcher type data. +/// Visits AST tree in depth, call functions for nodes according to Matcher type data. /// You need to define Data, label, visit() and needChildVisit() in Matcher class. template class InDepthNodeVisitor diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 30124b509a7..78c90a0be8c 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -697,10 +697,10 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const } { - ArrayJoinedColumnsVisitor visitor(result.array_join_name_to_alias, - result.array_join_alias_to_name, - result.array_join_result_to_source); - visitor.visit(query); + ArrayJoinedColumnsVisitor::Data visitor_data{result.array_join_name_to_alias, + result.array_join_alias_to_name, + result.array_join_result_to_source}; + ArrayJoinedColumnsVisitor(visitor_data).visit(query); } /// If the result of ARRAY JOIN is not used, it is necessary to ARRAY-JOIN any column, From 5cb9f9ea2fc6b9f16b4e73160dd9e567e26563cb Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 10 Dec 2018 17:29:08 +0300 Subject: [PATCH 76/90] fix ident --- docs/zh/getting_started/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/getting_started/index.md b/docs/zh/getting_started/index.md index ca108db655a..fd2efaabdeb 100644 --- a/docs/zh/getting_started/index.md +++ b/docs/zh/getting_started/index.md @@ -52,7 +52,7 @@ ClickHouse包含访问控制配置,它们位于`users.xml`文件中(与'config ```text Client: dbms/programs/clickhouse-client Server: dbms/programs/clickhouse-server - ``` +``` 在服务器中为数据创建如下目录: From becad378ca4e70263854fa282010398abf3e7f33 Mon Sep 17 00:00:00 2001 From: mf5137 Date: Mon, 10 Dec 2018 16:23:21 +0100 Subject: [PATCH 77/90] Fixes from comments of #3695 --- docker/server/Dockerfile | 12 +++++++++++- docker/server/entrypoint.sh | 32 ++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index f52eb61799e..db81c29ae17 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -16,19 +16,29 @@ RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --allow-unauthenticated --yes --no-install-recommends \ clickhouse-common-static=$version \ + clickhouse-client=$version \ clickhouse-server=$version \ libgcc-7-dev \ + locales \ + tzdata \ + wget \ && rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ /tmp/* \ && apt-get clean +ADD https://github.com/tianon/gosu/releases/download/1.10/gosu-amd64 /bin/gosu + +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + RUN mkdir /docker-entrypoint-initdb.d COPY docker_related_config.xml /etc/clickhouse-server/config.d/ COPY entrypoint.sh /entrypoint.sh -ADD https://github.com/tianon/gosu/releases/download/1.10/gosu-amd64 /bin/gosu RUN chmod +x \ /entrypoint.sh \ diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 1cd3a799c15..c44ec3e5a9f 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -5,14 +5,18 @@ CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}" USER="$(id -u clickhouse)" GROUP="$(id -g clickhouse)" +# port is needed to check if clickhouse-server is ready for connections +HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)" + # get CH directories locations -DATA_DIR="$(grep -oP '\K(.*)(?=[/?])' $CLICKHOUSE_CONFIG || true)" -TMP_DIR="$(grep -oP '\K(.*)(?=[/?])' $CLICKHOUSE_CONFIG || true)" -USER_PATH="$(grep -oP '\K(.*)(?=)' $CLICKHOUSE_CONFIG || true)" -LOG_PATH="$(grep -oP '\K(.*)(?=)' $CLICKHOUSE_CONFIG || true)" +DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)" +TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)" +USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)" +LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)" LOG_DIR="$(dirname $LOG_PATH || true)" -ERROR_LOG_PATH="$(grep -oP '\K(.*)(?=)' $CLICKHOUSE_CONFIG || true)" +ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)" ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)" +FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)" # ensure directories exist mkdir -p \ @@ -20,7 +24,8 @@ mkdir -p \ "$ERROR_LOG_DIR" \ "$LOG_DIR" \ "$TMP_DIR" \ - "$USER_PATH" + "$USER_PATH" \ + "$FORMAT_SCHEMA_PATH" # ensure proper directories permissions chown -R $USER:$GROUP \ @@ -28,14 +33,21 @@ chown -R $USER:$GROUP \ "$ERROR_LOG_DIR" \ "$LOG_DIR" \ "$TMP_DIR" \ - "$USER_PATH" + "$USER_PATH" \ + "$FORMAT_SCHEMA_PATH" if [ -n "$(ls /docker-entrypoint-initdb.d/)" ]; then gosu clickhouse /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG & pid="$!" - sleep 1 - clickhouseclient=( clickhouse client --multiquery ) + # check if clickhouse is ready to accept connections + # will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay) + if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then + echo >&2 'ClickHouse init process failed.' + exit 1 + fi + + clickhouseclient=( clickhouse-client --multiquery ) echo for f in /docker-entrypoint-initdb.d/*; do case "$f" in @@ -56,7 +68,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ]; then done if ! kill -s TERM "$pid" || ! wait "$pid"; then - echo >&2 'ClickHouse init process failed.' + echo >&2 'Finishing of ClickHouse init process failed.' exit 1 fi fi From c9e98c8debee48592d4e6b860995604d4254adbe Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 10 Dec 2018 18:25:45 +0300 Subject: [PATCH 78/90] clang-format of dbms/src/Dictionaries/* --- dbms/src/Dictionaries/CacheDictionary.cpp | 300 +++++----- dbms/src/Dictionaries/CacheDictionary.h | 141 ++--- dbms/src/Dictionaries/CacheDictionary.inc.h | 129 ++--- .../ClickHouseDictionarySource.cpp | 128 +++-- .../Dictionaries/ClickHouseDictionarySource.h | 16 +- .../ComplexKeyCacheDictionary.cpp | 164 +++--- .../Dictionaries/ComplexKeyCacheDictionary.h | 180 +++--- ...acheDictionary_createAttributeWithType.cpp | 16 +- ...exKeyCacheDictionary_setAttributeValue.cpp | 57 +- ...cheDictionary_setDefaultAttributeValue.cpp | 45 +- .../ComplexKeyHashedDictionary.cpp | 521 +++++++++++------- .../Dictionaries/ComplexKeyHashedDictionary.h | 171 +++--- .../Dictionaries/DictionaryBlockInputStream.h | 412 ++++++++------ .../DictionaryBlockInputStreamBase.cpp | 1 - .../DictionaryBlockInputStreamBase.h | 1 - dbms/src/Dictionaries/DictionaryFactory.cpp | 8 +- .../Dictionaries/DictionarySourceFactory.cpp | 14 +- .../Dictionaries/DictionarySourceHelpers.cpp | 20 +- .../Dictionaries/DictionarySourceHelpers.h | 10 +- dbms/src/Dictionaries/DictionaryStructure.cpp | 194 ++++--- dbms/src/Dictionaries/DictionaryStructure.h | 11 +- .../Embedded/GeoDictionariesLoader.cpp | 6 +- .../Embedded/GeoDictionariesLoader.h | 6 +- .../Embedded/GeodataProviders/Entries.h | 3 +- .../GeodataProviders/HierarchiesProvider.cpp | 14 +- .../GeodataProviders/HierarchiesProvider.h | 14 +- .../HierarchyFormatReader.cpp | 5 +- .../GeodataProviders/HierarchyFormatReader.h | 7 +- .../GeodataProviders/IHierarchiesProvider.h | 3 +- .../GeodataProviders/INamesProvider.h | 6 +- .../GeodataProviders/NamesFormatReader.h | 6 +- .../GeodataProviders/NamesProvider.cpp | 11 +- .../Embedded/GeodataProviders/NamesProvider.h | 12 +- .../Embedded/IGeoDictionariesLoader.h | 18 +- .../Embedded/RegionsHierarchies.cpp | 2 +- .../Embedded/RegionsHierarchies.h | 6 +- .../Embedded/RegionsHierarchy.cpp | 44 +- .../Dictionaries/Embedded/RegionsHierarchy.h | 2 +- .../Dictionaries/Embedded/RegionsNames.cpp | 19 +- dbms/src/Dictionaries/Embedded/RegionsNames.h | 35 +- .../Embedded/TechDataHierarchy.cpp | 6 +- .../Dictionaries/Embedded/TechDataHierarchy.h | 28 +- .../ExecutableDictionarySource.cpp | 188 +++---- .../Dictionaries/ExecutableDictionarySource.h | 11 +- .../src/Dictionaries/ExternalQueryBuilder.cpp | 17 +- dbms/src/Dictionaries/ExternalQueryBuilder.h | 10 +- .../ExternalResultDescription.cpp | 9 +- .../Dictionaries/ExternalResultDescription.h | 1 - .../src/Dictionaries/FileDictionarySource.cpp | 26 +- dbms/src/Dictionaries/FileDictionarySource.h | 9 +- dbms/src/Dictionaries/FlatDictionary.cpp | 471 ++++++++++------ dbms/src/Dictionaries/FlatDictionary.h | 114 ++-- .../src/Dictionaries/HTTPDictionarySource.cpp | 83 ++- dbms/src/Dictionaries/HTTPDictionarySource.h | 20 +- dbms/src/Dictionaries/HashedDictionary.cpp | 505 ++++++++++------- dbms/src/Dictionaries/HashedDictionary.h | 119 ++-- dbms/src/Dictionaries/IDictionary.h | 18 +- dbms/src/Dictionaries/IDictionarySource.h | 6 +- .../Dictionaries/LibraryDictionarySource.cpp | 31 +- .../Dictionaries/LibraryDictionarySource.h | 12 +- .../Dictionaries/MongoDBBlockInputStream.cpp | 120 ++-- .../Dictionaries/MongoDBBlockInputStream.h | 11 +- .../Dictionaries/MongoDBDictionarySource.cpp | 161 +++--- .../Dictionaries/MongoDBDictionarySource.h | 40 +- .../Dictionaries/MySQLBlockInputStream.cpp | 89 +-- dbms/src/Dictionaries/MySQLBlockInputStream.h | 11 +- .../Dictionaries/MySQLDictionarySource.cpp | 89 +-- dbms/src/Dictionaries/MySQLDictionarySource.h | 31 +- .../src/Dictionaries/ODBCBlockInputStream.cpp | 91 +-- dbms/src/Dictionaries/ODBCBlockInputStream.h | 10 +- .../RangeDictionaryBlockInputStream.h | 201 ++++--- .../Dictionaries/RangeHashedDictionary.cpp | 365 ++++++++---- dbms/src/Dictionaries/RangeHashedDictionary.h | 101 ++-- dbms/src/Dictionaries/TrieDictionary.cpp | 488 +++++++++------- dbms/src/Dictionaries/TrieDictionary.h | 167 +++--- .../src/Dictionaries/XDBCDictionarySource.cpp | 142 ++--- dbms/src/Dictionaries/XDBCDictionarySource.h | 7 +- dbms/src/Dictionaries/readInvalidateQuery.cpp | 7 +- dbms/src/Dictionaries/readInvalidateQuery.h | 1 - .../Dictionaries/writeParenthesisedString.cpp | 1 - .../Dictionaries/writeParenthesisedString.h | 2 - 81 files changed, 3791 insertions(+), 2786 deletions(-) diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index 08a0752a23f..253b9124963 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -1,48 +1,47 @@ #include "CacheDictionary.h" #include -#include #include -#include +#include #include +#include #include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include #include -#include "DictionaryBlockInputStream.h" -#include -#include #include -#include "DictionaryFactory.h" +#include +#include #include "CacheDictionary.inc.h" +#include "DictionaryBlockInputStream.h" +#include "DictionaryFactory.h" namespace ProfileEvents { - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheRequests; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; +extern const Event DictCacheKeysRequested; +extern const Event DictCacheKeysRequestedMiss; +extern const Event DictCacheKeysRequestedFound; +extern const Event DictCacheKeysExpired; +extern const Event DictCacheKeysNotFound; +extern const Event DictCacheKeysHit; +extern const Event DictCacheRequestTimeNs; +extern const Event DictCacheRequests; +extern const Event DictCacheLockWriteNs; +extern const Event DictCacheLockReadNs; } namespace CurrentMetrics { - extern const Metric DictCacheRequests; +extern const Metric DictCacheRequests; } namespace DB { - namespace ErrorCodes { extern const int TYPE_MISMATCH; @@ -61,15 +60,20 @@ inline size_t CacheDictionary::getCellIdx(const Key id) const } -CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, +CacheDictionary::CacheDictionary( + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, const size_t size) - : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, - size_overlap_mask{this->size - 1}, - cells{this->size}, - rnd_engine(randomSeed()) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))} + , size_overlap_mask{this->size - 1} + , cells{this->size} + , rnd_engine(randomSeed()) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; @@ -79,32 +83,36 @@ CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStruc CacheDictionary::CacheDictionary(const CacheDictionary & other) : CacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size} -{} +{ +} void CacheDictionary::toParent(const PaddedPODArray & ids, PaddedPODArray & out) const { const auto null_value = std::get(hierarchical_attribute->null_values); - getItemsNumber(*hierarchical_attribute, ids, out, [&] (const size_t) { return null_value; }); + getItemsNumber(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; }); } /// Allow to use single value in same way as array. -static inline CacheDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) { return arr[idx]; } -static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t) { return value; } +static inline CacheDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) +{ + return arr[idx]; +} +static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t) +{ + return value; +} template -void CacheDictionary::isInImpl( - const PaddedPODArray & child_ids, - const AncestorType & ancestor_ids, - PaddedPODArray & out) const +void CacheDictionary::isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const { /// Transform all children to parents until ancestor id or null_value will be reached. size_t out_size = out.size(); - memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated" + memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated" const auto null_value = std::get(hierarchical_attribute->null_values); @@ -164,25 +172,17 @@ void CacheDictionary::isInImpl( } void CacheDictionary::isInVectorVector( - const PaddedPODArray & child_ids, - const PaddedPODArray & ancestor_ids, - PaddedPODArray & out) const + const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const { isInImpl(child_ids, ancestor_ids, out); } -void CacheDictionary::isInVectorConstant( - const PaddedPODArray & child_ids, - const Key ancestor_id, - PaddedPODArray & out) const +void CacheDictionary::isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const { isInImpl(child_ids, ancestor_id, out); } -void CacheDictionary::isInConstantVector( - const Key child_id, - const PaddedPODArray & ancestor_ids, - PaddedPODArray & out) const +void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const { /// Special case with single child value. @@ -213,33 +213,34 @@ void CacheDictionary::getString(const std::string & attribute_name, const Padded { auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; const auto null_value = StringRef{std::get(attribute.null_values)}; - getItemsString(attribute, ids, out, [&] (const size_t) { return null_value; }); + getItemsString(attribute, ids, out, [&](const size_t) { return null_value; }); } void CacheDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, - ColumnString * const out) const + const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) const { auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, ids, out, [&] (const size_t row) { return def->getDataAt(row); }); + getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); }); } void CacheDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const String & def, - ColumnString * const out) const + const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const { auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, ids, out, [&] (const size_t) { return StringRef{def}; }); + getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; }); } @@ -329,21 +330,21 @@ void CacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray return; std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); /// request new values - update(required_ids, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = true; - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = false; - }); + update( + required_ids, + [&](const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + out[row] = true; + }, + [&](const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + out[row] = false; + }); } @@ -362,7 +363,7 @@ void CacheDictionary::createAttributes() if (attribute.hierarchical) { - hierarchical_attribute = & attributes.back(); + hierarchical_attribute = &attributes.back(); if (hierarchical_attribute->type != AttributeUnderlyingType::UInt64) throw Exception{name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH}; @@ -376,12 +377,12 @@ CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const Attrib switch (type) { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::TYPE: \ - attr.null_values = TYPE(null_value.get>()); \ - attr.arrays = std::make_unique>(size); \ - bytes_allocated += size * sizeof(TYPE); \ - break; +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::TYPE: \ + attr.null_values = TYPE(null_value.get>()); \ + attr.arrays = std::make_unique>(size); \ + bytes_allocated += size * sizeof(TYPE); \ + break; DISPATCH(UInt8) DISPATCH(UInt16) DISPATCH(UInt32) @@ -413,17 +414,39 @@ void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; case AttributeUnderlyingType::Decimal32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); @@ -457,21 +480,49 @@ void CacheDictionary::setAttributeValue(Attribute & attribute, const Key idx, co { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = value.get(); + break; - case AttributeUnderlyingType::Decimal32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Decimal64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Decimal128: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingType::Decimal32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Decimal64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Decimal128: + std::get>(attribute.arrays)[idx] = value.get(); + break; case AttributeUnderlyingType::String: { @@ -509,8 +560,8 @@ CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & a bool CacheDictionary::isEmptyCell(const UInt64 idx) const { - return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data - == ext::safe_bit_cast(CellMetadata::time_point_t())); + return (idx != zero_cell_idx && cells[idx].id == 0) + || (cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t())); } PaddedPODArray CacheDictionary::getCachedIds() const @@ -537,36 +588,31 @@ BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_na void registerDictionaryCache(DictionaryFactory & factory) { - auto create_layout = [=]( - const std::string & name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr - ) -> DictionaryPtr { - + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr { if (dict_struct.key) - throw Exception {"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD}; + throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD}; if (dict_struct.range_min || dict_struct.range_max) - throw Exception {name - + ": elements .structure.range_min and .structure.range_max should be defined only " - "for a dictionary of layout 'range_hashed'", - ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + + ": elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + ErrorCodes::BAD_ARGUMENTS}; const auto & layout_prefix = config_prefix + ".layout"; const auto size = config.getInt(layout_prefix + ".cache.size_in_cells"); if (size == 0) - throw Exception {name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); if (require_nonempty) - throw Exception {name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", - ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", + ErrorCodes::BAD_ARGUMENTS}; - const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); - - }; factory.registerLayout("cache", create_layout); } diff --git a/dbms/src/Dictionaries/CacheDictionary.h b/dbms/src/Dictionaries/CacheDictionary.h index 8b72daaca23..1f8754c0c0a 100644 --- a/dbms/src/Dictionaries/CacheDictionary.h +++ b/dbms/src/Dictionaries/CacheDictionary.h @@ -1,31 +1,33 @@ #pragma once -#include "IDictionary.h" -#include "IDictionarySource.h" -#include "DictionaryStructure.h" -#include -#include -#include -#include -#include -#include #include #include -#include +#include #include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" namespace DB { - class CacheDictionary final : public IDictionary { public: - CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, + CacheDictionary( + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, const size_t size); CacheDictionary(const CacheDictionary & other); @@ -42,16 +44,12 @@ public: double getHitRate() const override { - return static_cast(hit_count.load(std::memory_order_acquire)) / - query_count.load(std::memory_order_relaxed); + return static_cast(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); } size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } - double getLoadFactor() const override - { - return static_cast(element_count.load(std::memory_order_relaxed)) / size; - } + double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } bool isCached() const override { return true; } @@ -63,10 +61,7 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override - { - return creation_time; - } + std::chrono::time_point getCreationTime() const override { return creation_time; } bool isInjective(const std::string & attribute_name) const override { @@ -77,14 +72,15 @@ public: void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const override; - void isInVectorVector(const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; + void isInVectorVector( + const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; void isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const override; void isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; -#define DECLARE(TYPE)\ +#define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -104,9 +100,11 @@ public: void getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const; -#define DECLARE(TYPE)\ - void get##TYPE(\ - const std::string & attribute_name, const PaddedPODArray & ids, const PaddedPODArray & def,\ +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const PaddedPODArray & def, \ ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -124,11 +122,11 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString( - const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, - ColumnString * const out) const; + void + getString(const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) + const; -#define DECLARE(TYPE)\ +#define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -146,17 +144,17 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString( - const std::string & attribute_name, const PaddedPODArray & ids, const String & def, - ColumnString * const out) const; + void getString(const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const; void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; private: - template using ContainerType = Value[]; - template using ContainerPtrType = std::unique_ptr>; + template + using ContainerType = Value[]; + template + using ContainerPtrType = std::unique_ptr>; struct CellMetadata final { @@ -183,19 +181,39 @@ private: { AttributeUnderlyingType type; std::variant< - UInt8, UInt16, UInt32, UInt64, + UInt8, + UInt16, + UInt32, + UInt64, UInt128, - Int8, Int16, Int32, Int64, - Decimal32, Decimal64, Decimal128, - Float32, Float64, - String> null_values; + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String> + null_values; std::variant< - ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, ContainerPtrType, - ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, - ContainerPtrType, ContainerPtrType, ContainerPtrType, - ContainerPtrType, ContainerPtrType, - ContainerPtrType> arrays; + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType> + arrays; }; void createAttributes(); @@ -205,29 +223,17 @@ private: template void getItemsNumber( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const; + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; template void getItemsNumberImpl( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const; + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; template - void getItemsString( - Attribute & attribute, - const PaddedPODArray & ids, - ColumnString * out, - DefaultGetter && get_default) const; + void getItemsString(Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; template - void update( - const std::vector & requested_ids, PresentIdHandler && on_cell_updated, - AbsentIdHandler && on_id_not_found) const; + void update(const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const; PaddedPODArray getCachedIds() const; @@ -251,10 +257,7 @@ private: FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const; template - void isInImpl( - const PaddedPODArray & child_ids, - const AncestorType & ancestor_ids, - PaddedPODArray & out) const; + void isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const; const std::string name; const DictionaryStructure dict_struct; diff --git a/dbms/src/Dictionaries/CacheDictionary.inc.h b/dbms/src/Dictionaries/CacheDictionary.inc.h index 6fc082ab267..25d6786fbd3 100644 --- a/dbms/src/Dictionaries/CacheDictionary.inc.h +++ b/dbms/src/Dictionaries/CacheDictionary.inc.h @@ -1,34 +1,33 @@ #include "CacheDictionary.h" -#include -#include -#include +#include #include #include -#include +#include +#include +#include namespace ProfileEvents { - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheRequests; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; +extern const Event DictCacheKeysRequested; +extern const Event DictCacheKeysRequestedMiss; +extern const Event DictCacheKeysRequestedFound; +extern const Event DictCacheKeysExpired; +extern const Event DictCacheKeysNotFound; +extern const Event DictCacheKeysHit; +extern const Event DictCacheRequestTimeNs; +extern const Event DictCacheRequests; +extern const Event DictCacheLockWriteNs; +extern const Event DictCacheLockReadNs; } namespace CurrentMetrics { - extern const Metric DictCacheRequests; +extern const Metric DictCacheRequests; } namespace DB { - namespace ErrorCodes { extern const int TYPE_MISMATCH; @@ -36,13 +35,12 @@ namespace ErrorCodes template void CacheDictionary::getItemsNumber( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const { - if (false) {} -#define DISPATCH(TYPE) \ + if (false) + { + } +#define DISPATCH(TYPE) \ else if (attribute.type == AttributeUnderlyingType::TYPE) \ getItemsNumberImpl(attribute, ids, out, std::forward(get_default)); DISPATCH(UInt8) @@ -60,16 +58,12 @@ void CacheDictionary::getItemsNumber( DISPATCH(Decimal64) DISPATCH(Decimal128) #undef DISPATCH - else - throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); + else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); } template void CacheDictionary::getItemsNumberImpl( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const { /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } std::unordered_map> outdated_ids; @@ -122,31 +116,28 @@ void CacheDictionary::getItemsNumberImpl( return; std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); /// request new values - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; + update( + required_ids, + [&](const auto id, const auto cell_idx) + { + const auto attribute_value = attribute_array[cell_idx]; - for (const size_t row : outdated_ids[id]) - out[row] = static_cast(attribute_value); - }, - [&] (const auto id, const auto) - { - for (const size_t row : outdated_ids[id]) - out[row] = get_default(row); - }); + for (const size_t row : outdated_ids[id]) + out[row] = static_cast(attribute_value); + }, + [&](const auto id, const auto) + { + for (const size_t row : outdated_ids[id]) + out[row] = get_default(row); + }); } template void CacheDictionary::getItemsString( - Attribute & attribute, - const PaddedPODArray & ids, - ColumnString * out, - DefaultGetter && get_default) const + Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const { const auto rows = ext::size(ids); @@ -245,22 +236,22 @@ void CacheDictionary::getItemsString( if (!outdated_ids.empty()) { std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; + update( + required_ids, + [&](const auto id, const auto cell_idx) + { + const auto attribute_value = attribute_array[cell_idx]; - map[id] = String{attribute_value}; - total_length += (attribute_value.size + 1) * outdated_ids[id].size(); - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - total_length += get_default(row).size + 1; - }); + map[id] = String{attribute_value}; + total_length += (attribute_value.size + 1) * outdated_ids[id].size(); + }, + [&](const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + total_length += get_default(row).size + 1; + }); } out->getChars().reserve(total_length); @@ -277,19 +268,13 @@ void CacheDictionary::getItemsString( template void CacheDictionary::update( - const std::vector & requested_ids, - PresentIdHandler && on_cell_updated, - AbsentIdHandler && on_id_not_found) const + const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const { std::unordered_map remaining_ids{requested_ids.size()}; for (const auto id : requested_ids) - remaining_ids.insert({ id, 0 }); + remaining_ids.insert({id, 0}); - std::uniform_int_distribution distribution - { - dict_lifetime.min_sec, - dict_lifetime.max_sec - }; + std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; @@ -310,10 +295,8 @@ void CacheDictionary::update( const auto & ids = id_column->getData(); /// cache column pointers - const auto column_ptrs = ext::map(ext::range(0, attributes.size()), [&block] (size_t i) - { - return block.safeGetByPosition(i + 1).column.get(); - }); + const auto column_ptrs = ext::map( + ext::range(0, attributes.size()), [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); }); for (const auto i : ext::range(0, ids.size())) { diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index 161a157ffaa..faa93055073 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -1,21 +1,20 @@ #include "ClickHouseDictionarySource.h" -#include "ExternalQueryBuilder.h" -#include "writeParenthesisedString.h" +#include #include #include -#include "readInvalidateQuery.h" +#include #include #include -#include #include -#include #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" +#include "ExternalQueryBuilder.h" +#include "readInvalidateQuery.h" +#include "writeParenthesisedString.h" namespace DB { - namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; @@ -25,61 +24,81 @@ namespace ErrorCodes static const size_t MAX_CONNECTIONS = 16; static ConnectionPoolWithFailoverPtr createPool( - const std::string & host, UInt16 port, bool secure, const std::string & db, - const std::string & user, const std::string & password, const Context & context) + const std::string & host, + UInt16 port, + bool secure, + const std::string & db, + const std::string & user, + const std::string & password, + const Context & context) { auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(context.getSettingsRef()); ConnectionPoolPtrs pools; pools.emplace_back(std::make_shared( - MAX_CONNECTIONS, host, port, db, user, password, timeouts, "ClickHouseDictionarySource", - Protocol::Compression::Enable, - secure ? Protocol::Secure::Enable : Protocol::Secure::Disable)); + MAX_CONNECTIONS, + host, + port, + db, + user, + password, + timeouts, + "ClickHouseDictionarySource", + Protocol::Compression::Enable, + secure ? Protocol::Secure::Enable : Protocol::Secure::Disable)); return std::make_shared(pools, LoadBalancing::RANDOM); } ClickHouseDictionarySource::ClickHouseDictionarySource( - const DictionaryStructure & dict_struct_, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - const Block & sample_block, Context & context) - : update_time{std::chrono::system_clock::from_time_t(0)}, - dict_struct{dict_struct_}, - host{config.getString(config_prefix + ".host")}, - port(config.getInt(config_prefix + ".port")), - secure(config.getBool(config_prefix + ".secure", false)), - user{config.getString(config_prefix + ".user", "")}, - password{config.getString(config_prefix + ".password", "")}, - db{config.getString(config_prefix + ".db", "")}, - table{config.getString(config_prefix + ".table")}, - where{config.getString(config_prefix + ".where", "")}, - update_field{config.getString(config_prefix + ".update_field", "")}, - invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}, - query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}, - sample_block{sample_block}, context(context), - is_local{isLocalAddress({ host, port }, config.getInt("tcp_port", 0))}, - pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}, - load_all_query{query_builder.composeLoadAllQuery()} -{} + const DictionaryStructure & dict_struct_, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const Block & sample_block, + Context & context) + : update_time{std::chrono::system_clock::from_time_t(0)} + , dict_struct{dict_struct_} + , host{config.getString(config_prefix + ".host")} + , port(config.getInt(config_prefix + ".port")) + , secure(config.getBool(config_prefix + ".secure", false)) + , user{config.getString(config_prefix + ".user", "")} + , password{config.getString(config_prefix + ".password", "")} + , db{config.getString(config_prefix + ".db", "")} + , table{config.getString(config_prefix + ".table")} + , where{config.getString(config_prefix + ".where", "")} + , update_field{config.getString(config_prefix + ".update_field", "")} + , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} + , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , sample_block{sample_block} + , context(context) + , is_local{isLocalAddress({host, port}, config.getInt("tcp_port", 0))} + , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} + , load_all_query{query_builder.composeLoadAllQuery()} +{ +} ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionarySource & other) - : update_time{other.update_time}, - dict_struct{other.dict_struct}, - host{other.host}, port{other.port}, - secure{other.secure}, - user{other.user}, password{other.password}, - db{other.db}, table{other.table}, - where{other.where}, - update_field{other.update_field}, - invalidate_query{other.invalidate_query}, - invalidate_query_response{other.invalidate_query_response}, - query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}, - sample_block{other.sample_block}, context(other.context), - is_local{other.is_local}, - pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}, - load_all_query{other.load_all_query} -{} + : update_time{other.update_time} + , dict_struct{other.dict_struct} + , host{other.host} + , port{other.port} + , secure{other.secure} + , user{other.user} + , password{other.password} + , db{other.db} + , table{other.table} + , where{other.where} + , update_field{other.update_field} + , invalidate_query{other.invalidate_query} + , invalidate_query_response{other.invalidate_query_response} + , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , sample_block{other.sample_block} + , context(other.context) + , is_local{other.is_local} + , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} + , load_all_query{other.load_all_query} +{ +} std::string ClickHouseDictionarySource::getUpdateFieldAndDate() { @@ -119,17 +138,14 @@ BlockInputStreamPtr ClickHouseDictionarySource::loadUpdatedAll() BlockInputStreamPtr ClickHouseDictionarySource::loadIds(const std::vector & ids) { - return createStreamForSelectiveLoad( - query_builder.composeLoadIdsQuery(ids)); + return createStreamForSelectiveLoad(query_builder.composeLoadIdsQuery(ids)); } -BlockInputStreamPtr ClickHouseDictionarySource::loadKeys( - const Columns & key_columns, const std::vector & requested_rows) +BlockInputStreamPtr ClickHouseDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) { return createStreamForSelectiveLoad( - query_builder.composeLoadKeysQuery( - key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES)); + query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES)); } bool ClickHouseDictionarySource::isModified() const @@ -167,7 +183,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re if (is_local) { auto input_block = executeQuery(request, context, true).in; - return readInvalidateQuery(dynamic_cast((*input_block))); + return readInvalidateQuery(dynamic_cast((*input_block))); } else { diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.h b/dbms/src/Dictionaries/ClickHouseDictionarySource.h index 89db23737bc..bf8653932f7 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.h +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.h @@ -1,15 +1,14 @@ #pragma once -#include "IDictionarySource.h" +#include +#include #include "DictionaryStructure.h" #include "ExternalQueryBuilder.h" -#include -#include +#include "IDictionarySource.h" namespace DB { - /** Allows loading dictionaries from local or remote ClickHouse instance * @todo use ConnectionPoolWithFailover * @todo invent a way to keep track of source modifications @@ -17,10 +16,12 @@ namespace DB class ClickHouseDictionarySource final : public IDictionarySource { public: - ClickHouseDictionarySource(const DictionaryStructure & dict_struct_, + ClickHouseDictionarySource( + const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - const Block & sample_block, Context & context); + const Block & sample_block, + Context & context); /// copy-constructor is provided in order to support cloneability ClickHouseDictionarySource(const ClickHouseDictionarySource & other); @@ -31,8 +32,7 @@ public: BlockInputStreamPtr loadIds(const std::vector & ids) override; - BlockInputStreamPtr loadKeys( - const Columns & key_columns, const std::vector & requested_rows) override; + BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; bool isModified() const override; bool supportsSelectiveLoad() const override { return true; } diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp index 61693a3538a..1d71d072e9b 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp @@ -1,40 +1,38 @@ #include "ComplexKeyCacheDictionary.h" -#include "DictionaryBlockInputStream.h" #include #include -#include -#include -#include -#include #include -#include +#include +#include +#include +#include #include +#include +#include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" namespace ProfileEvents { - - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; +extern const Event DictCacheKeysRequested; +extern const Event DictCacheKeysRequestedMiss; +extern const Event DictCacheKeysRequestedFound; +extern const Event DictCacheKeysExpired; +extern const Event DictCacheKeysNotFound; +extern const Event DictCacheKeysHit; +extern const Event DictCacheRequestTimeNs; +extern const Event DictCacheLockWriteNs; +extern const Event DictCacheLockReadNs; } namespace CurrentMetrics { - extern const Metric DictCacheRequests; +extern const Metric DictCacheRequests; } namespace DB { - namespace ErrorCodes { extern const int TYPE_MISMATCH; @@ -52,13 +50,19 @@ inline UInt64 ComplexKeyCacheDictionary::getCellIdx(const StringRef key) const } -ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, +ComplexKeyCacheDictionary::ComplexKeyCacheDictionary( + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, const size_t size) - : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, - size_overlap_mask{this->size - 1}, - rnd_engine(randomSeed()) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))} + , size_overlap_mask{this->size - 1} + , rnd_engine(randomSeed()) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{name + ": source cannot be used with ComplexKeyCacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; @@ -68,47 +72,56 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, c ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other) : ComplexKeyCacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size} -{} +{ +} void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - ColumnString * out) const + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const { dict_struct.validateKeyTypes(key_types); auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; const auto null_value = StringRef{std::get(attribute.null_values)}; - getItemsString(attribute, key_columns, out, [&] (const size_t) { return null_value; }); + getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; }); } void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const ColumnString * const def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, key_columns, out, [&] (const size_t row) { return def->getDataAt(row); }); + getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); }); } void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const String & def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, key_columns, out, [&] (const size_t) { return StringRef{def}; }); + getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; }); } /// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag, @@ -118,7 +131,8 @@ void ComplexKeyCacheDictionary::getString( /// true true impossible /// /// todo: split this func to two: find_for_get and find_for_set -ComplexKeyCacheDictionary::FindResult ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata::time_point_t now, const size_t hash) const +ComplexKeyCacheDictionary::FindResult +ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata::time_point_t now, const size_t hash) const { auto pos = hash; auto oldest_id = pos; @@ -211,17 +225,20 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes return; std::vector required_rows(outdated_keys.size()); - std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), - [] (auto & pair) { return pair.second.front(); }); + std::transform( + std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); }); /// request new values - update(key_columns, keys_array, required_rows, - [&] (const StringRef key, const auto) + update( + key_columns, + keys_array, + required_rows, + [&](const StringRef key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = true; }, - [&] (const StringRef key, const auto) + [&](const StringRef key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = false; @@ -242,7 +259,8 @@ void ComplexKeyCacheDictionary::createAttributes() attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); if (attribute.hierarchical) - throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), + ErrorCodes::TYPE_MISMATCH}; } } @@ -273,8 +291,7 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, Pool & pool) + const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector & key_attributes, Pool & pool) { const auto keys_size = key_columns.size(); size_t sum_keys_size{}; @@ -313,22 +330,27 @@ StringRef ComplexKeyCacheDictionary::placeKeysInPool( } } - return { place, sum_keys_size }; + return {place, sum_keys_size}; } /// Explicit instantiations. template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, Arena & pool); + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + Arena & pool); template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, ArenaWithFreeLists & pool); + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + ArenaWithFreeLists & pool); -StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool( - const size_t row, const Columns & key_columns) const +StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool(const size_t row, const Columns & key_columns) const { const auto res = fixed_size_keys_pool->alloc(); auto place = res; @@ -340,14 +362,14 @@ StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool( place += key.size; } - return { res, key_size }; + return {res, key_size}; } StringRef ComplexKeyCacheDictionary::copyIntoArena(StringRef src, Arena & arena) { char * allocated = arena.alloc(src.size); memcpy(allocated, src.data, src.size); - return { allocated, src.size }; + return {allocated, src.size}; } StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const @@ -355,13 +377,14 @@ StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const const auto res = key_size_is_fixed ? fixed_size_keys_pool->alloc() : keys_pool->alloc(key.size); memcpy(res, key.data, key.size); - return { res, key.size }; + return {res, key.size}; } bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const { - return (cells[idx].key == StringRef{} && (idx != zero_cell_idx - || cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t()))); + return ( + cells[idx].key == StringRef{} + && (idx != zero_cell_idx || cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t()))); } BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const @@ -371,8 +394,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; for (auto idx : ext::range(0, cells.size())) - if (!isEmptyCell(idx) - && !cells[idx].isDefault()) + if (!isEmptyCell(idx) && !cells[idx].isDefault()) keys.push_back(cells[idx].key); } @@ -382,26 +404,24 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & void registerDictionaryComplexKeyCache(DictionaryFactory & factory) { - auto create_layout = [=]( - const std::string & name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr - ) -> DictionaryPtr { + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr { if (!dict_struct.key) - throw Exception {"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; const auto & layout_prefix = config_prefix + ".layout"; const auto size = config.getInt(layout_prefix + ".complex_key_cache.size_in_cells"); if (size == 0) - throw Exception {name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); if (require_nonempty) - throw Exception {name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", - ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", + ErrorCodes::BAD_ARGUMENTS}; - const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); }; factory.registerLayout("complex_key_cache", create_layout); diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h index f60e142db5e..22a2d51e963 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -3,23 +3,23 @@ #include #include #include +#include #include #include -#include #include #include +#include #include #include #include #include -#include "DictionaryStructure.h" -#include "IDictionary.h" -#include "IDictionarySource.h" #include #include #include #include -#include +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" namespace ProfileEvents @@ -40,7 +40,8 @@ namespace DB class ComplexKeyCacheDictionary final : public IDictionaryBase { public: - ComplexKeyCacheDictionary(const std::string & name, + ComplexKeyCacheDictionary( + const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, @@ -48,25 +49,13 @@ public: ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other); - std::string getKeyDescription() const - { - return key_description; - } + std::string getKeyDescription() const { return key_description; } - std::exception_ptr getCreationException() const override - { - return {}; - } + std::exception_ptr getCreationException() const override { return {}; } - std::string getName() const override - { - return name; - } + std::string getName() const override { return name; } - std::string getTypeName() const override - { - return "ComplexKeyCache"; - } + std::string getTypeName() const override { return "ComplexKeyCache"; } size_t getBytesAllocated() const override { @@ -74,55 +63,28 @@ public: + (string_arena ? string_arena->size() : 0); } - size_t getQueryCount() const override - { - return query_count.load(std::memory_order_relaxed); - } + size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } double getHitRate() const override { return static_cast(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); } - size_t getElementCount() const override - { - return element_count.load(std::memory_order_relaxed); - } + size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } - double getLoadFactor() const override - { - return static_cast(element_count.load(std::memory_order_relaxed)) / size; - } + double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } - bool isCached() const override - { - return true; - } + bool isCached() const override { return true; } - std::unique_ptr clone() const override - { - return std::make_unique(*this); - } + std::unique_ptr clone() const override { return std::make_unique(*this); } - const IDictionarySource * getSource() const override - { - return source_ptr.get(); - } + const IDictionarySource * getSource() const override { return source_ptr.get(); } - const DictionaryLifetime & getLifetime() const override - { - return dict_lifetime; - } + const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } - const DictionaryStructure & getStructure() const override - { - return dict_struct; - } + const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override - { - return creation_time; - } + std::chrono::time_point getCreationTime() const override { return creation_time; } bool isInjective(const std::string & attribute_name) const override { @@ -155,11 +117,12 @@ public: void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const; -#define DECLARE(TYPE) \ - void get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const PaddedPODArray & def, \ +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -177,17 +140,19 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString(const std::string & attribute_name, + void getString( + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const; -#define DECLARE(TYPE) \ - void get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const TYPE def, \ +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -205,7 +170,8 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString(const std::string & attribute_name, + void getString( + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, @@ -216,9 +182,12 @@ public: BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; private: - template using MapType = HashMapWithSavedHash; - template using ContainerType = Value[]; - template using ContainerPtrType = std::unique_ptr>; + template + using MapType = HashMapWithSavedHash; + template + using ContainerType = Value[]; + template + using ContainerPtrType = std::unique_ptr>; struct CellMetadata final { @@ -235,32 +204,35 @@ private: time_point_urep_t data; /// Sets expiration time, resets `is_default` flag to false - time_point_t expiresAt() const - { - return ext::safe_bit_cast(data & EXPIRES_AT_MASK); - } - void setExpiresAt(const time_point_t & t) - { - data = ext::safe_bit_cast(t); - } + time_point_t expiresAt() const { return ext::safe_bit_cast(data & EXPIRES_AT_MASK); } + void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); } - bool isDefault() const - { - return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; - } - void setDefault() - { - data |= IS_DEFAULT_MASK; - } + bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; } + void setDefault() { data |= IS_DEFAULT_MASK; } }; struct Attribute final { AttributeUnderlyingType type; - std::variant null_values; - std::variant, + std::variant< + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String> + null_values; + std::variant< + ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, @@ -283,8 +255,8 @@ private: Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); template - void getItemsNumber( - Attribute & attribute, const Columns & key_columns, PaddedPODArray & out, DefaultGetter && get_default) const + void + getItemsNumber(Attribute & attribute, const Columns & key_columns, PaddedPODArray & out, DefaultGetter && get_default) const { if (false) { @@ -372,7 +344,8 @@ private: std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); }); /// request new values - update(key_columns, + update( + key_columns, keys_array, required_rows, [&](const StringRef key, const size_t cell_idx) @@ -497,7 +470,8 @@ private: return pair.second.front(); }); - update(key_columns, + update( + key_columns, keys_array, required_rows, [&](const StringRef key, const size_t cell_idx) @@ -531,7 +505,8 @@ private: } template - void update(const Columns & in_key_columns, + void update( + const Columns & in_key_columns, const PODArray & in_keys, const std::vector & in_requested_rows, PresentKeyHandler && on_cell_updated, @@ -561,8 +536,10 @@ private: const auto key_columns = ext::map( ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); - const auto attribute_columns = ext::map(ext::range(0, attributes_size), - [&](const size_t attribute_idx) { return block.safeGetByPosition(keys_size + attribute_idx).column; }); + const auto attribute_columns = ext::map(ext::range(0, attributes_size), [&](const size_t attribute_idx) + { + return block.safeGetByPosition(keys_size + attribute_idx).column; + }); const auto rows_num = block.rows(); @@ -693,7 +670,8 @@ private: void freeKey(const StringRef key) const; template - static StringRef placeKeysInPool(const size_t row, + static StringRef placeKeysInPool( + const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector & key_attributes, diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp index 843c389dcb0..8cfa1471f79 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp @@ -2,19 +2,19 @@ namespace DB { - -ComplexKeyCacheDictionary::Attribute ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) +ComplexKeyCacheDictionary::Attribute +ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) { Attribute attr{type, {}, {}}; switch (type) { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::TYPE: \ - attr.null_values = TYPE(null_value.get>()); \ - attr.arrays = std::make_unique>(size); \ - bytes_allocated += size * sizeof(TYPE); \ - break; +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::TYPE: \ + attr.null_values = TYPE(null_value.get>()); \ + attr.arrays = std::make_unique>(size); \ + bytes_allocated += size * sizeof(TYPE); \ + break; DISPATCH(UInt8) DISPATCH(UInt16) DISPATCH(UInt32) diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp index 9a3d34eb2c7..7b3a44214c5 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp @@ -2,26 +2,53 @@ namespace DB { - void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = value.get(); + break; - case AttributeUnderlyingType::Decimal32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Decimal64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Decimal128: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingType::Decimal32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Decimal64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Decimal128: + std::get>(attribute.arrays)[idx] = value.get(); + break; case AttributeUnderlyingType::String: { diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp index 7477e01da9c..89cf1506f90 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp @@ -2,22 +2,43 @@ namespace DB { - void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; case AttributeUnderlyingType::Decimal32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp index cdf01668bd2..bfd808c5914 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp @@ -1,12 +1,11 @@ +#include "ComplexKeyHashedDictionary.h" #include #include -#include "ComplexKeyHashedDictionary.h" #include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" namespace DB { - namespace ErrorCodes { extern const int TYPE_MISMATCH; @@ -16,12 +15,19 @@ namespace ErrorCodes } ComplexKeyHashedDictionary::ComplexKeyHashedDictionary( - const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, - const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block) - : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - require_nonempty(require_nonempty), saved_block{std::move(saved_block)} + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, + bool require_nonempty, + BlockPtr saved_block) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , require_nonempty(require_nonempty) + , saved_block{std::move(saved_block)} { - createAttributes(); try @@ -38,27 +44,30 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary( } ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(const ComplexKeyHashedDictionary & other) - : ComplexKeyHashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block} + : ComplexKeyHashedDictionary{ + other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block} { } -#define DECLARE(TYPE)\ -void ComplexKeyHashedDictionary::get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - ResultArrayType & out) const\ -{\ - dict_struct.validateKeyTypes(key_types);\ - \ - const auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ - \ - const auto null_value = std::get(attribute.null_values);\ - \ - getItemsNumber(attribute, key_columns,\ - [&] (const size_t row, const auto value) { out[row] = value; },\ - [&] (const size_t) { return null_value; });\ -} +#define DECLARE(TYPE) \ + void ComplexKeyHashedDictionary::get##TYPE( \ + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const \ + { \ + dict_struct.validateKeyTypes(key_types); \ + \ + const auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + const auto null_value = std::get(attribute.null_values); \ + \ + getItemsNumber( \ + attribute, \ + key_columns, \ + [&](const size_t row, const auto value) { out[row] = value; }, \ + [&](const size_t) { return null_value; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -76,37 +85,45 @@ DECLARE(Decimal128) #undef DECLARE void ComplexKeyHashedDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - ColumnString * out) const + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const { dict_struct.validateKeyTypes(key_types); const auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; const auto & null_value = StringRef{std::get(attribute.null_values)}; - getItemsImpl(attribute, key_columns, - [&] (const size_t, const StringRef value) { out->insertData(value.data, value.size); }, - [&] (const size_t) { return null_value; }); + getItemsImpl( + attribute, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t) { return null_value; }); } -#define DECLARE(TYPE)\ -void ComplexKeyHashedDictionary::get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - const PaddedPODArray & def, ResultArrayType & out) const\ -{\ - dict_struct.validateKeyTypes(key_types);\ - \ - const auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, key_columns,\ - [&] (const size_t row, const auto value) { out[row] = value; },\ - [&] (const size_t row) { return def[row]; });\ -} +#define DECLARE(TYPE) \ + void ComplexKeyHashedDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ + ResultArrayType & out) const \ + { \ + dict_struct.validateKeyTypes(key_types); \ + \ + const auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + getItemsNumber( \ + attribute, \ + key_columns, \ + [&](const size_t row, const auto value) { out[row] = value; }, \ + [&](const size_t row) { return def[row]; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -124,35 +141,44 @@ DECLARE(Decimal128) #undef DECLARE void ComplexKeyHashedDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const ColumnString * const def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); const auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsImpl(attribute, key_columns, - [&] (const size_t, const StringRef value) { out->insertData(value.data, value.size); }, - [&] (const size_t row) { return def->getDataAt(row); }); + getItemsImpl( + attribute, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t row) { return def->getDataAt(row); }); } -#define DECLARE(TYPE)\ -void ComplexKeyHashedDictionary::get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - const TYPE def, ResultArrayType & out) const\ -{\ - dict_struct.validateKeyTypes(key_types);\ - \ - const auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, key_columns,\ - [&] (const size_t row, const auto value) { out[row] = value; },\ - [&] (const size_t) { return def; });\ -} +#define DECLARE(TYPE) \ + void ComplexKeyHashedDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + ResultArrayType & out) const \ + { \ + dict_struct.validateKeyTypes(key_types); \ + \ + const auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + getItemsNumber( \ + attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -170,18 +196,24 @@ DECLARE(Decimal128) #undef DECLARE void ComplexKeyHashedDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const String & def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); const auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsImpl(attribute, key_columns, - [&] (const size_t, const StringRef value) { out->insertData(value.data, value.size); }, - [&] (const size_t) { return StringRef{def}; }); + getItemsImpl( + attribute, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t) { return StringRef{def}; }); } void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const @@ -192,22 +224,52 @@ void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataType switch (attribute.type) { - case AttributeUnderlyingType::UInt8: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::UInt16: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::UInt32: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::UInt64: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::UInt128: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Int8: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Int16: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Int32: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Int64: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Float32: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Float64: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::String: has(attribute, key_columns, out); break; + case AttributeUnderlyingType::UInt8: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::UInt16: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::UInt32: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::UInt64: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::UInt128: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Int8: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Int16: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Int32: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Int64: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Float32: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Float64: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::String: + has(attribute, key_columns, out); + break; - case AttributeUnderlyingType::Decimal32: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Decimal64: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Decimal128: has(attribute, key_columns, out); break; + case AttributeUnderlyingType::Decimal32: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Decimal64: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Decimal128: + has(attribute, key_columns, out); + break; } } @@ -222,7 +284,8 @@ void ComplexKeyHashedDictionary::createAttributes() attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); if (attribute.hierarchical) - throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), + ErrorCodes::TYPE_MISMATCH}; } } @@ -236,17 +299,13 @@ void ComplexKeyHashedDictionary::blockToAttributes(const Block & block) const auto rows = block.rows(); element_count += rows; - const auto key_column_ptrs = ext::map(ext::range(0, keys_size), - [&](const size_t attribute_idx) - { - return block.safeGetByPosition(attribute_idx).column; - }); + const auto key_column_ptrs = ext::map( + ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); - const auto attribute_column_ptrs = ext::map(ext::range(0, attributes_size), - [&](const size_t attribute_idx) - { - return block.safeGetByPosition(keys_size + attribute_idx).column; - }); + const auto attribute_column_ptrs = ext::map(ext::range(0, attributes_size), [&](const size_t attribute_idx) + { + return block.safeGetByPosition(keys_size + attribute_idx).column; + }); for (const auto row_idx : ext::range(0, rows)) { @@ -304,18 +363,14 @@ void ComplexKeyHashedDictionary::updateData() stream->readPrefix(); while (Block block = stream->read()) { - const auto saved_key_column_ptrs = ext::map(ext::range(0, keys_size), [&](const size_t key_idx) - { - return saved_block->safeGetByPosition(key_idx).column; - }); + const auto saved_key_column_ptrs = ext::map( + ext::range(0, keys_size), [&](const size_t key_idx) { return saved_block->safeGetByPosition(key_idx).column; }); - const auto update_key_column_ptrs = ext::map(ext::range(0, keys_size), [&](const size_t key_idx) - { - return block.safeGetByPosition(key_idx).column; - }); + const auto update_key_column_ptrs = ext::map( + ext::range(0, keys_size), [&](const size_t key_idx) { return block.safeGetByPosition(key_idx).column; }); Arena temp_key_pool; - ContainerType > update_key_hash; + ContainerType> update_key_hash; for (size_t i = 0; i < block.rows(); ++i) { @@ -389,21 +444,49 @@ void ComplexKeyHashedDictionary::calculateBytesAllocated() { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: addAttributeSize(attribute); break; - case AttributeUnderlyingType::UInt16: addAttributeSize(attribute); break; - case AttributeUnderlyingType::UInt32: addAttributeSize(attribute); break; - case AttributeUnderlyingType::UInt64: addAttributeSize(attribute); break; - case AttributeUnderlyingType::UInt128: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Int8: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Int16: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Int32: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Int64: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Float32: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Float64: addAttributeSize(attribute); break; + case AttributeUnderlyingType::UInt8: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::UInt16: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::UInt32: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::UInt64: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::UInt128: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Int8: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Int16: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Int32: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Int64: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Float32: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Float64: + addAttributeSize(attribute); + break; - case AttributeUnderlyingType::Decimal32: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Decimal64: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Decimal128: addAttributeSize(attribute); break; + case AttributeUnderlyingType::Decimal32: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Decimal64: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Decimal128: + addAttributeSize(attribute); + break; case AttributeUnderlyingType::String: { @@ -425,27 +508,56 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons attribute.maps.emplace>(); } -ComplexKeyHashedDictionary::Attribute ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) +ComplexKeyHashedDictionary::Attribute +ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) { Attribute attr{type, {}, {}, {}}; switch (type) { - case AttributeUnderlyingType::UInt8: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::UInt16: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::UInt32: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::UInt64: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::UInt128: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Int8: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Int16: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Int32: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Int64: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Float32: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Float64: createAttributeImpl(attr, null_value); break; + case AttributeUnderlyingType::UInt8: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::UInt16: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::UInt32: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::UInt64: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::UInt128: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Int8: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Int16: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Int32: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Int64: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Float32: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Float64: + createAttributeImpl(attr, null_value); + break; - case AttributeUnderlyingType::Decimal32: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Decimal64: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Decimal128: createAttributeImpl(attr, null_value); break; + case AttributeUnderlyingType::Decimal32: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Decimal64: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Decimal128: + createAttributeImpl(attr, null_value); + break; case AttributeUnderlyingType::String: { @@ -462,15 +574,14 @@ ComplexKeyHashedDictionary::Attribute ComplexKeyHashedDictionary::createAttribut template void ComplexKeyHashedDictionary::getItemsNumber( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultGetter && get_default) const + const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const { - if (false) {} -#define DISPATCH(TYPE) \ - else if (attribute.type == AttributeUnderlyingType::TYPE) \ - getItemsImpl(attribute, key_columns, std::forward(set_value), std::forward(get_default)); + if (false) + { + } +#define DISPATCH(TYPE) \ + else if (attribute.type == AttributeUnderlyingType::TYPE) getItemsImpl( \ + attribute, key_columns, std::forward(set_value), std::forward(get_default)); DISPATCH(UInt8) DISPATCH(UInt16) DISPATCH(UInt32) @@ -486,16 +597,12 @@ void ComplexKeyHashedDictionary::getItemsNumber( DISPATCH(Decimal64) DISPATCH(Decimal128) #undef DISPATCH - else - throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); + else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); } template void ComplexKeyHashedDictionary::getItemsImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultGetter && get_default) const + const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const { const auto & attr = std::get>(attribute.maps); @@ -524,7 +631,7 @@ template bool ComplexKeyHashedDictionary::setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value) { auto & map = std::get>(attribute.maps); - const auto pair = map.insert({ key, value }); + const auto pair = map.insert({key, value}); return pair.second; } @@ -532,28 +639,42 @@ bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::UInt16: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::UInt32: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::UInt64: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::UInt128: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Int8: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Int16: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Int32: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Int64: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Float32: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Float64: return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt8: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt16: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt32: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt64: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt128: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Int8: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Int16: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Int32: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Int64: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Float32: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Float64: + return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Decimal32: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Decimal64: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Decimal128: return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Decimal32: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Decimal64: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Decimal128: + return setAttributeValueImpl(attribute, key, value.get()); case AttributeUnderlyingType::String: { auto & map = std::get>(attribute.maps); const auto & string = value.get(); const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size()); - const auto pair = map.insert({ key, StringRef{string_in_arena, string.size()} }); + const auto pair = map.insert({key, StringRef{string_in_arena, string.size()}}); return pair.second; } } @@ -570,8 +691,7 @@ const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAtt return attributes[it->second]; } -StringRef ComplexKeyHashedDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool) +StringRef ComplexKeyHashedDictionary::placeKeysInPool(const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool) { const auto keys_size = key_columns.size(); size_t sum_keys_size{}; @@ -590,7 +710,7 @@ StringRef ComplexKeyHashedDictionary::placeKeysInPool( key_start += keys[j].size; } - return { block_start, sum_keys_size }; + return {block_start, sum_keys_size}; } template @@ -623,22 +743,37 @@ std::vector ComplexKeyHashedDictionary::getKeys() const switch (attribute.type) { - case AttributeUnderlyingType::UInt8: return getKeys(attribute); - case AttributeUnderlyingType::UInt16: return getKeys(attribute); - case AttributeUnderlyingType::UInt32: return getKeys(attribute); - case AttributeUnderlyingType::UInt64: return getKeys(attribute); - case AttributeUnderlyingType::UInt128: return getKeys(attribute); - case AttributeUnderlyingType::Int8: return getKeys(attribute); - case AttributeUnderlyingType::Int16: return getKeys(attribute); - case AttributeUnderlyingType::Int32: return getKeys(attribute); - case AttributeUnderlyingType::Int64: return getKeys(attribute); - case AttributeUnderlyingType::Float32: return getKeys(attribute); - case AttributeUnderlyingType::Float64: return getKeys(attribute); - case AttributeUnderlyingType::String: return getKeys(attribute); + case AttributeUnderlyingType::UInt8: + return getKeys(attribute); + case AttributeUnderlyingType::UInt16: + return getKeys(attribute); + case AttributeUnderlyingType::UInt32: + return getKeys(attribute); + case AttributeUnderlyingType::UInt64: + return getKeys(attribute); + case AttributeUnderlyingType::UInt128: + return getKeys(attribute); + case AttributeUnderlyingType::Int8: + return getKeys(attribute); + case AttributeUnderlyingType::Int16: + return getKeys(attribute); + case AttributeUnderlyingType::Int32: + return getKeys(attribute); + case AttributeUnderlyingType::Int64: + return getKeys(attribute); + case AttributeUnderlyingType::Float32: + return getKeys(attribute); + case AttributeUnderlyingType::Float64: + return getKeys(attribute); + case AttributeUnderlyingType::String: + return getKeys(attribute); - case AttributeUnderlyingType::Decimal32: return getKeys(attribute); - case AttributeUnderlyingType::Decimal64: return getKeys(attribute); - case AttributeUnderlyingType::Decimal128: return getKeys(attribute); + case AttributeUnderlyingType::Decimal32: + return getKeys(attribute); + case AttributeUnderlyingType::Decimal64: + return getKeys(attribute); + case AttributeUnderlyingType::Decimal128: + return getKeys(attribute); } return {}; } @@ -663,17 +798,15 @@ BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names void registerDictionaryComplexKeyHashed(DictionaryFactory & factory) { - auto create_layout = [=]( - const std::string & name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr - ) -> DictionaryPtr { + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr { if (!dict_struct.key) - throw Exception {"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; - const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); }; diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h index 859266fb5d1..81b350dd43e 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h @@ -1,30 +1,33 @@ #pragma once -#include "IDictionary.h" -#include "IDictionarySource.h" -#include "DictionaryStructure.h" -#include -#include -#include -#include -#include -#include #include #include #include +#include +#include +#include +#include +#include +#include +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" namespace DB { - using BlockPtr = std::shared_ptr; class ComplexKeyHashedDictionary final : public IDictionaryBase { public: ComplexKeyHashedDictionary( - const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, - const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block = nullptr); + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, + bool require_nonempty, + BlockPtr saved_block = nullptr); ComplexKeyHashedDictionary(const ComplexKeyHashedDictionary & other); @@ -56,10 +59,7 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override - { - return creation_time; - } + std::chrono::time_point getCreationTime() const override { return creation_time; } bool isInjective(const std::string & attribute_name) const override { @@ -69,9 +69,33 @@ public: template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; -#define DECLARE(TYPE)\ - void get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -90,13 +114,19 @@ public: #undef DECLARE void getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - ColumnString * out) const; + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const; -#define DECLARE(TYPE)\ - void get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - const PaddedPODArray & def, ResultArrayType & out) const; +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -114,57 +144,57 @@ public: #undef DECLARE void getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const ColumnString * const def, ColumnString * const out) const; - -#define DECLARE(TYPE)\ - void get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - const TYPE def, ResultArrayType & out) const; - DECLARE(UInt8) - DECLARE(UInt16) - DECLARE(UInt32) - DECLARE(UInt64) - DECLARE(UInt128) - DECLARE(Int8) - DECLARE(Int16) - DECLARE(Int32) - DECLARE(Int64) - DECLARE(Float32) - DECLARE(Float64) - DECLARE(Decimal32) - DECLARE(Decimal64) - DECLARE(Decimal128) -#undef DECLARE - - void getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const String & def, ColumnString * const out) const; + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const; void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; private: - template using ContainerType = HashMapWithSavedHash; + template + using ContainerType = HashMapWithSavedHash; struct Attribute final { AttributeUnderlyingType type; std::variant< - UInt8, UInt16, UInt32, UInt64, + UInt8, + UInt16, + UInt32, + UInt64, UInt128, - Int8, Int16, Int32, Int64, - Decimal32, Decimal64, Decimal128, - Float32, Float64, - String> null_values; + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String> + null_values; std::variant< - ContainerType, ContainerType, ContainerType, ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, ContainerType, - ContainerType, ContainerType, ContainerType, ContainerType, - ContainerType, ContainerType, ContainerType, - ContainerType, ContainerType, - ContainerType> maps; + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType> + maps; std::unique_ptr string_arena; }; @@ -188,18 +218,12 @@ private: template - void getItemsNumber( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultGetter && get_default) const; + void + getItemsNumber(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const; template - void getItemsImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultGetter && get_default) const; + void + getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const; template @@ -209,8 +233,7 @@ private: const Attribute & getAttribute(const std::string & attribute_name) const; - static StringRef placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool); + static StringRef placeKeysInPool(const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool); template void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray & out) const; diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStream.h b/dbms/src/Dictionaries/DictionaryBlockInputStream.h index f1778a9fa6d..afdc26cdba3 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/DictionaryBlockInputStream.h @@ -1,22 +1,21 @@ #pragma once -#include +#include #include #include +#include #include +#include #include #include +#include +#include #include "DictionaryBlockInputStreamBase.h" #include "DictionaryStructure.h" #include "IDictionary.h" -#include -#include -#include -#include namespace DB { - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -32,28 +31,30 @@ class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase public: using DictionaryPtr = std::shared_ptr; - DictionaryBlockInputStream(std::shared_ptr dictionary, size_t max_block_size, - PaddedPODArray && ids, const Names & column_names); + DictionaryBlockInputStream( + std::shared_ptr dictionary, size_t max_block_size, PaddedPODArray && ids, const Names & column_names); - DictionaryBlockInputStream(std::shared_ptr dictionary, size_t max_block_size, - const std::vector & keys, const Names & column_names); + DictionaryBlockInputStream( + std::shared_ptr dictionary, + size_t max_block_size, + const std::vector & keys, + const Names & column_names); - using GetColumnsFunction = - std::function & attributes)>; + using GetColumnsFunction = std::function & attributes)>; // Used to separate key columns format for storage and view. // Calls get_key_columns_function to get key column for dictionary get fuction call // and get_view_columns_function to get key representation. // Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string - DictionaryBlockInputStream(std::shared_ptr dictionary, size_t max_block_size, - const Columns & data_columns, const Names & column_names, - GetColumnsFunction && get_key_columns_function, - GetColumnsFunction && get_view_columns_function); + DictionaryBlockInputStream( + std::shared_ptr dictionary, + size_t max_block_size, + const Columns & data_columns, + const Names & column_names, + GetColumnsFunction && get_key_columns_function, + GetColumnsFunction && get_view_columns_function); - String getName() const override - { - return "Dictionary"; - } + String getName() const override { return "Dictionary"; } protected: Block getBlock(size_t start, size_t size) const override; @@ -65,8 +66,8 @@ private: using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray &, PaddedPODArray &) const; template - using DictionaryDecimalGetter = - void (DictionaryType::*)(const std::string &, const PaddedPODArray &, DecimalPaddedPODArray &) const; + using DictionaryDecimalGetter + = void (DictionaryType::*)(const std::string &, const PaddedPODArray &, DecimalPaddedPODArray &) const; using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray &, ColumnString *) const; @@ -75,61 +76,103 @@ private: using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray & out) const; template - using DecimalGetterByKey = - void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray & out) const; + using DecimalGetterByKey + = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray & out) const; using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const; // call getXXX // for single key dictionaries template - void callGetter(DictionaryGetter getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + DictionaryGetter getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template - void callGetter(DictionaryDecimalGetter getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + DictionaryDecimalGetter getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template - void callGetter(DictionaryStringGetter getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + DictionaryStringGetter getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; // for complex complex key dictionaries template - void callGetter(GetterByKey getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + GetterByKey getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template - void callGetter(DecimalGetterByKey getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + DecimalGetterByKey getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template - void callGetter(StringGetterByKey getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + StringGetterByKey getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template