From 105ea620ca1ae7f48828957e3c3d701395856260 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 14 Mar 2018 00:33:56 +0100 Subject: [PATCH 01/88] Documenting numbers table function, VerticalRaw format, HTTP sessions, HTTP compression. --- docs/en/formats/verticalraw.md | 24 ++++++++++++++++++++++++ docs/en/interfaces/http_interface.md | 13 ++++++++++--- docs/en/table_functions/numbers.md | 17 +++++++++++++++++ docs/mkdocs_en.yml | 2 ++ docs/mkdocs_ru.yml | 2 ++ docs/ru/formats/verticalraw.md | 26 ++++++++++++++++++++++++++ docs/ru/interfaces/http_interface.md | 11 +++++++++-- docs/ru/table_functions/numbers.md | 16 ++++++++++++++++ 8 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 docs/en/formats/verticalraw.md create mode 100644 docs/en/table_functions/numbers.md create mode 100644 docs/ru/formats/verticalraw.md create mode 100644 docs/ru/table_functions/numbers.md diff --git a/docs/en/formats/verticalraw.md b/docs/en/formats/verticalraw.md new file mode 100644 index 00000000000..440967eb598 --- /dev/null +++ b/docs/en/formats/verticalraw.md @@ -0,0 +1,24 @@ +# VerticalRaw + +Differs from `TabSeparated` format in that the rows are written without escaping. +This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). + +Samples: +``` +:) SHOW CREATE TABLE geonames FORMAT VerticalRaw; +Row 1: +────── +statement: CREATE TABLE default.geonames ( geonameid UInt32, name String, asciiname String, alternatenames String, latitude Float32, longitude Float32, feature_class String, feature_code String, country_code String, cc2 String, admin1_code String, admin2_code String, admin3_code String, admin4_code String, population Int64, elevation String, dem String, timezone String, modification_date Date, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) + +:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT VerticalRaw; +Row 1: +────── +test: string with 'quotes' and with some special + characters + +-- the same in Vertical format: +:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical; +Row 1: +────── +test: string with \'quotes\' and \t with some special \n characters +``` diff --git a/docs/en/interfaces/http_interface.md b/docs/en/interfaces/http_interface.md index 91c6790f975..8c223cf69cf 100644 --- a/docs/en/interfaces/http_interface.md +++ b/docs/en/interfaces/http_interface.md @@ -130,11 +130,15 @@ POST 'http://localhost:8123/?query=DROP TABLE t' For successful requests that don't return a data table, an empty response body is returned. -You can use compression when transmitting data. The compressed data has a non-standard format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). +You can use compression when transmitting data. +For using ClickHouse internal compression format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). If you specified 'compress=1' in the URL, the server will compress the data it sends you. If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method. +Also standard gzip-based HTTP compression can be used. To send gzip compressed POST data just add `Content-Encoding: gzip` to request headers, and gzip POST body. +To get response compressed, you need to add `Accept-Encoding: gzip` to request headers, and turn on ClickHouse setting called `enable_http_compression`. + You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed. You can use the 'database' URL parameter to specify the default database. @@ -190,7 +194,11 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 For information about other parameters, see the section "SET". -In contrast to the native interface, the HTTP interface does not support the concept of sessions or session settings, does not allow aborting a query (to be exact, it allows this in only a few cases), and does not show the progress of query processing. Parsing and data formatting are performed on the server side, and using the network might be ineffective. +You can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` GET parameter in HTTP request. You can use any alphanumeric string as a session_id. By default session will be timed out after 60 seconds of inactivity. You can change that by setting `default_session_timeout` in server config file, or by adding GET parameter `session_timeout`. You can also check the status of the session by using GET parameter `session_check=1`. When using sessions you can't run 2 queries with the same session_id simultaneously. + +You can get the progress of query execution in X-ClickHouse-Progress headers, by enabling setting send_progress_in_http_headers. + +Running query are not aborted automatically after closing HTTP connection. Parsing and data formatting are performed on the server side, and using the network might be ineffective. The optional 'query_id' parameter can be passed as the query ID (any string). For more information, see the section "Settings, replace_running_query". The optional 'quota_key' parameter can be passed as the quota key (any string). For more information, see the section "Quotas". @@ -212,4 +220,3 @@ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&wa ``` Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client side, the error can only be detected at the parsing stage. - diff --git a/docs/en/table_functions/numbers.md b/docs/en/table_functions/numbers.md new file mode 100644 index 00000000000..b055f1cd56e --- /dev/null +++ b/docs/en/table_functions/numbers.md @@ -0,0 +1,17 @@ +# numbers + +`numbers(N)` - returns the table with one column named `number` (UInt64 type), containing integer numbers from 0 to N-1. + +`numbers(N)` (like a table `system.numbers`) can be used in tests or for sequences generation. + +Two following queries are equal: +```sql +SELECT * FROM numbers(10); +SELECT * FROM system.numbers LIMIT 10; +``` + +Samples: +```sql +-- generation of sequence of dates from 2010-01-01 to 2010-12-31 +select toDate('2010-01-01') + number as d FROM numbers(365); +``` diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index 1daf36ecfd8..c94e33c7818 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -122,6 +122,7 @@ pages: - 'Introduction': 'table_functions/index.md' - 'remote': 'table_functions/remote.md' - 'merge': 'table_functions/merge.md' + - 'numbers': 'table_functions/numbers.md' - 'Formats': - 'Introduction': 'formats/index.md' @@ -133,6 +134,7 @@ pages: - 'CSVWithNames': 'formats/csvwithnames.md' - 'Values': 'formats/values.md' - 'Vertical': 'formats/vertical.md' + - 'VerticalRaw': 'formats/verticalraw.md' - 'JSON': 'formats/json.md' - 'JSONCompact': 'formats/jsoncompact.md' - 'JSONEachRow': 'formats/jsoneachrow.md' diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml index 23734934bb5..eb8c002847d 100644 --- a/docs/mkdocs_ru.yml +++ b/docs/mkdocs_ru.yml @@ -124,6 +124,7 @@ pages: - 'Введение': 'table_functions/index.md' - 'remote': 'table_functions/remote.md' - 'merge': 'table_functions/merge.md' + - 'numbers': 'table_functions/numbers.md' - 'Форматы': - 'Введение': 'formats/index.md' @@ -135,6 +136,7 @@ pages: - 'CSVWithNames': 'formats/csvwithnames.md' - 'Values': 'formats/values.md' - 'Vertical': 'formats/vertical.md' + - 'VerticalRaw': 'formats/verticalraw.md' - 'JSON': 'formats/json.md' - 'JSONCompact': 'formats/jsoncompact.md' - 'JSONEachRow': 'formats/jsoneachrow.md' diff --git a/docs/ru/formats/verticalraw.md b/docs/ru/formats/verticalraw.md new file mode 100644 index 00000000000..9693cabe3c3 --- /dev/null +++ b/docs/ru/formats/verticalraw.md @@ -0,0 +1,26 @@ +# VerticalRaw + +Отличается от формата `Vertical` тем, что строки выводятся без экранирования. +Этот формат подходит только для вывода результата выполнения запроса, но не для парсинга (приёма данных для вставки в таблицу). + +Примеры: +``` +:) SHOW CREATE TABLE geonames FORMAT VerticalRaw; +Row 1: +────── +statement: CREATE TABLE default.geonames ( geonameid UInt32, name String, asciiname String, alternatenames String, latitude Float32, longitude Float32, feature_class String, feature_code String, country_code String, cc2 String, admin1_code String, admin2_code String, admin3_code String, admin4_code String, population Int64, elevation String, dem String, timezone String, modification_date Date, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) + +:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT VerticalRaw; +Row 1: +────── +test: string with 'quotes' and with some special + characters +``` + +Для сравнения - формат Vertical: +``` +:) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical; +Row 1: +────── +test: string with \'quotes\' and \t with some special \n characters +``` diff --git a/docs/ru/interfaces/http_interface.md b/docs/ru/interfaces/http_interface.md index 28cb8ce6c66..a20ca4d844e 100644 --- a/docs/ru/interfaces/http_interface.md +++ b/docs/ru/interfaces/http_interface.md @@ -132,11 +132,14 @@ POST 'http://localhost:8123/?query=DROP TABLE t' Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа. -Вы можете использовать сжатие при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу compressor (sudo apt-get install compressor-metrika-yandex). +Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу compressor (sudo apt-get install compressor-metrika-yandex). Если вы указали в URL compress=1, то сервер будет сжимать отправляемые вам данные. Если вы указали в URL decompress=1, то сервер будет разжимать те данные, которые вы передаёте ему POST-ом. +Также имеется возможность использования стандартного сжатия HTTP, на основе gzip. Чтобы отправить POST-запрос, сжатый с помощью gzip, добавьте к запросу заголовок `Content-Encoding: gzip`. +Чтобы ClickHouse сжимал ответ на запрос с помощью gzip, необходимо добавить `Accept-Encoding: gzip` к заголовкам запроса, и включить настройку ClickHouse `enable_http_compression`. + Это может быть использовано для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов. В параметре URL database может быть указана БД по умолчанию. @@ -193,7 +196,11 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 Об остальных параметрах смотри раздел "SET". -В отличие от родного интерфейса, HTTP интерфейс не поддерживает понятие сессии и настройки в пределах сессии, не позволяет (вернее, позволяет лишь в некоторых случаях) прервать выполнение запроса, не показывает прогресс выполнения запроса. Парсинг и форматирование данных производится на стороне сервера и использование сети может быть неэффективным. +В HTTP-протоколе можно использовать ClickHouse-сессии, для этого необходимо добавить к запросу GET-пaраметр `session_id`. В качестве идентификатора сессии можно использовать произвольную строку. По умолчанию через 60 секунд бездействия сессия будет прервана. Можно изменить этот таймаут, изменяя настройку `default_session_timeout` в конфигурации сервера, или добавив к запросу GET параметр `session_timeout`. Статус сессии можно проверить с помощью параметра `session_check=1`. В рамках одной сессии одновременно может испольняться только один запрос. + +Имеется возможность получать информацию о прогрессе выполнения запроса в залоголвках X-ClickHouse-Progress, для этого нужно включить настройку send_progress_in_http_headers. + +Запущенные запросы не останавливаются автоматически при разрыве HTTP соединения. Парсинг и форматирование данных производится на стороне сервера и использование сети может быть неэффективным. Может быть передан необязательный параметр query_id - идентификатор запроса, произвольная строка. Подробнее смотрите раздел "Настройки, replace_running_query". Может быть передан необязательный параметр quota_key - ключ квоты, произвольная строка. Подробнее смотрите раздел "Квоты". diff --git a/docs/ru/table_functions/numbers.md b/docs/ru/table_functions/numbers.md new file mode 100644 index 00000000000..21bf3492ca1 --- /dev/null +++ b/docs/ru/table_functions/numbers.md @@ -0,0 +1,16 @@ +# numbers + +`numbers(N)` - возвращает таблицу с единственным столбцом number (тип UInt64), содержащую натуральные числа от 0 до N-1. + +Так же как и таблица `system.numbers` может использоваться для тестов и генерации последовательных значений. + +Следующие 2 запроса эквивалентны: +```sql +SELECT * FROM numbers(10); +SELECT * FROM system.numbers LIMIT 10; +``` +Примеры: +```sql +-- генарация последовательности всех дат от 2010-01-01 до 2010-12-31 +select toDate('2010-01-01') + number as d FROM numbers(365); +``` From 6bb8e52f71a86d88d6eb495bd7ec9cd7e4af8d7d Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 14 Mar 2018 00:42:06 +0100 Subject: [PATCH 02/88] Fixing bad copy-paste, shorten sample --- docs/en/formats/verticalraw.md | 4 ++-- docs/ru/formats/verticalraw.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/formats/verticalraw.md b/docs/en/formats/verticalraw.md index 440967eb598..9bb53ee1260 100644 --- a/docs/en/formats/verticalraw.md +++ b/docs/en/formats/verticalraw.md @@ -1,6 +1,6 @@ # VerticalRaw -Differs from `TabSeparated` format in that the rows are written without escaping. +Differs from `Vertical` format in that the rows are written without escaping. This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). Samples: @@ -8,7 +8,7 @@ Samples: :) SHOW CREATE TABLE geonames FORMAT VerticalRaw; Row 1: ────── -statement: CREATE TABLE default.geonames ( geonameid UInt32, name String, asciiname String, alternatenames String, latitude Float32, longitude Float32, feature_class String, feature_code String, country_code String, cc2 String, admin1_code String, admin2_code String, admin3_code String, admin4_code String, population Int64, elevation String, dem String, timezone String, modification_date Date, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) +statement: CREATE TABLE default.geonames ( geonameid UInt32, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) :) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT VerticalRaw; Row 1: diff --git a/docs/ru/formats/verticalraw.md b/docs/ru/formats/verticalraw.md index 9693cabe3c3..fb497430fcd 100644 --- a/docs/ru/formats/verticalraw.md +++ b/docs/ru/formats/verticalraw.md @@ -8,7 +8,7 @@ :) SHOW CREATE TABLE geonames FORMAT VerticalRaw; Row 1: ────── -statement: CREATE TABLE default.geonames ( geonameid UInt32, name String, asciiname String, alternatenames String, latitude Float32, longitude Float32, feature_class String, feature_code String, country_code String, cc2 String, admin1_code String, admin2_code String, admin3_code String, admin4_code String, population Int64, elevation String, dem String, timezone String, modification_date Date, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) +statement: CREATE TABLE default.geonames ( geonameid UInt32, date Date DEFAULT CAST('2017-12-08' AS Date)) ENGINE = MergeTree(date, geonameid, 8192) :) SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT VerticalRaw; Row 1: From efc0ed06a886ef2acb011624b7f0cb07c67debde Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 14 Mar 2018 00:49:12 +0100 Subject: [PATCH 03/88] Fixing obsolete name of clickhouse-compressor --- docs/en/interfaces/http_interface.md | 2 +- docs/ru/interfaces/http_interface.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/http_interface.md b/docs/en/interfaces/http_interface.md index 8c223cf69cf..5c989a59d65 100644 --- a/docs/en/interfaces/http_interface.md +++ b/docs/en/interfaces/http_interface.md @@ -132,7 +132,7 @@ For successful requests that don't return a data table, an empty response body i You can use compression when transmitting data. -For using ClickHouse internal compression format, and you will need to use the special compressor program to work with it (sudo apt-get install compressor-metrika-yandex). +For using ClickHouse internal compression format, and you will need to use the special clickhouse-compressor program to work with it (installed as a part of clickhouse-client package). If you specified 'compress=1' in the URL, the server will compress the data it sends you. If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method. diff --git a/docs/ru/interfaces/http_interface.md b/docs/ru/interfaces/http_interface.md index a20ca4d844e..8f2ae4377f4 100644 --- a/docs/ru/interfaces/http_interface.md +++ b/docs/ru/interfaces/http_interface.md @@ -132,7 +132,7 @@ POST 'http://localhost:8123/?query=DROP TABLE t' Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа. -Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу compressor (sudo apt-get install compressor-metrika-yandex). +Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу clickhouse-compressor (устанавливается вместе с пакетом clickhouse-client). Если вы указали в URL compress=1, то сервер будет сжимать отправляемые вам данные. Если вы указали в URL decompress=1, то сервер будет разжимать те данные, которые вы передаёте ему POST-ом. From 80f8b2bdee6c481abb01dc9d5da4b53bb16bbc72 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Wed, 14 Mar 2018 00:52:38 +0100 Subject: [PATCH 04/88] fix word end --- docs/ru/table_functions/numbers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/table_functions/numbers.md b/docs/ru/table_functions/numbers.md index 21bf3492ca1..bd5d566f78e 100644 --- a/docs/ru/table_functions/numbers.md +++ b/docs/ru/table_functions/numbers.md @@ -1,6 +1,6 @@ # numbers -`numbers(N)` - возвращает таблицу с единственным столбцом number (тип UInt64), содержащую натуральные числа от 0 до N-1. +`numbers(N)` - возвращает таблицу с единственным столбцом number (тип UInt64), содержащим натуральные числа от 0 до N-1. Так же как и таблица `system.numbers` может использоваться для тестов и генерации последовательных значений. From 88c4081a823bee118377ad658a08b941148eef0a Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 4 Jul 2018 20:02:47 +0300 Subject: [PATCH 05/88] 3578: defaults for input streams (in progress) --- contrib/CMakeLists.txt | 2 +- dbms/programs/client/Client.cpp | 5 +- dbms/src/Core/Block.h | 1 + dbms/src/Core/BlockInfo.cpp | 16 ++++ dbms/src/Core/BlockInfo.h | 20 ++++ .../AddingDefaultBlockOutputStream.cpp | 1 + .../AddingDefaultBlockOutputStream.h | 1 - .../AddingDefaultsBlockInputStream.cpp | 91 +++++++++++++++++++ .../AddingDefaultsBlockInputStream.h | 32 +++++++ .../BlockInputStreamFromRowInputStream.cpp | 20 +++- dbms/src/Formats/IRowInputStream.h | 10 ++ .../src/Formats/JSONEachRowRowInputStream.cpp | 10 +- dbms/src/Formats/JSONEachRowRowInputStream.h | 1 + .../Interpreters/evaluateMissingDefaults.cpp | 41 +++++++-- .../Interpreters/evaluateMissingDefaults.h | 5 + 15 files changed, 242 insertions(+), 14 deletions(-) create mode 100644 dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp create mode 100644 dbms/src/DataStreams/AddingDefaultsBlockInputStream.h diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2f5e003fc2f..3021b315136 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -148,6 +148,6 @@ if (USE_INTERNAL_POCO_LIBRARY) endif () endif () -if (USE_INTERNAL_LLVM_LIBRARY) +if (ENABLE_EMBEDDED_COMPILER AND USE_INTERNAL_LLVM_LIBRARY) add_subdirectory (llvm/llvm) endif () diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index b056f82d1a7..23a9fcf2030 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -941,7 +942,9 @@ private: BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - BlockInputStreamPtr async_block_input = std::make_shared(block_input); + ColumnDefaults column_defaults; // TODO: get from server + BlockInputStreamPtr defs_block_input = std::make_shared(block_input, column_defaults, context); + BlockInputStreamPtr async_block_input = std::make_shared(defs_block_input); async_block_input->readPrefix(); diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 94a4147aac2..b7c19548963 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -35,6 +35,7 @@ private: public: BlockInfo info; + BlockDelayedDefaults delayed_defaults; Block() = default; Block(std::initializer_list il); diff --git a/dbms/src/Core/BlockInfo.cpp b/dbms/src/Core/BlockInfo.cpp index 77ef2e01007..15d7d9efa12 100644 --- a/dbms/src/Core/BlockInfo.cpp +++ b/dbms/src/Core/BlockInfo.cpp @@ -58,4 +58,20 @@ void BlockInfo::read(ReadBuffer & in) } } +void BlockDelayedDefaults::setBit(size_t column_idx, size_t row_idx) +{ + BitMask & mask = columns_defaults[column_idx]; + mask.resize(row_idx + 1); + mask[row_idx] = true; +} + +const BlockDelayedDefaults::BitMask & BlockDelayedDefaults::getColumnBitmask(size_t column_idx) const +{ + static BitMask none; + auto it = columns_defaults.find(column_idx); + if (it != columns_defaults.end()) + return it->second; + return none; +} + } diff --git a/dbms/src/Core/BlockInfo.h b/dbms/src/Core/BlockInfo.h index ebfbd117de7..00c0821a33e 100644 --- a/dbms/src/Core/BlockInfo.h +++ b/dbms/src/Core/BlockInfo.h @@ -1,5 +1,7 @@ #pragma once +#include + #include @@ -43,4 +45,22 @@ struct BlockInfo void read(ReadBuffer & in); }; +/// Block extention to support delayed defaults. +/// It's expected that it would be lots unset defaults or none. +/// NOTE It's possible to make better solution for sparse values. +class BlockDelayedDefaults +{ +public: + using BitMask = std::vector; + using MaskById = std::unordered_map; + + const BitMask & getColumnBitmask(size_t column_idx) const; + void setBit(size_t column_idx, size_t row_idx); + bool empty() const { return columns_defaults.empty(); } + size_t size() const { return columns_defaults.size(); } + +private: + MaskById columns_defaults; +}; + } diff --git a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp index a43dfab6e13..52f8c3e453a 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h index 06daf5532df..b36aaee501f 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h +++ b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp new file mode 100644 index 00000000000..82c36bfab85 --- /dev/null +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; +} + + +AddingDefaultsBlockInputStream::AddingDefaultsBlockInputStream(const BlockInputStreamPtr & input, + const ColumnDefaults & column_defaults_, + const Context & context_) + : column_defaults(column_defaults_), + context(context_) +{ + children.push_back(input); + header = input->getHeader(); +} + + +Block AddingDefaultsBlockInputStream::readImpl() +{ + Block res = children.back()->read(); + if (!res) + return res; + + if (column_defaults.empty()) + return res; + + BlockDelayedDefaults delayed_defaults = res.delayed_defaults; + if (delayed_defaults.empty()) + return res; + + Block evaluate_block{res}; + for (const auto & column : column_defaults) + evaluate_block.erase(column.first); + + evaluateMissingDefaultsUnsafe(evaluate_block, header.getNamesAndTypesList(), column_defaults, context); + + ColumnsWithTypeAndName mixed_columns; + mixed_columns.reserve(std::min(column_defaults.size(), delayed_defaults.size())); + + for (const ColumnWithTypeAndName & column_def : evaluate_block) + { + const String & column_name = column_def.name; + + if (column_defaults.count(column_name) == 0) + continue; + + size_t block_column_position = res.getPositionByName(column_name); + const ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position); + + if (column_read.column->size() != column_def.column->size()) + throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + const BlockDelayedDefaults::BitMask & mask = delayed_defaults.getColumnBitmask(block_column_position); + MutableColumnPtr column_mixed = column_read.column->cloneEmpty(); + + for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) + { + if (mask[row_idx]) + column_mixed->insertFrom(*column_def.column, row_idx); + else + column_mixed->insertFrom(*column_read.column, row_idx); + } + + ColumnWithTypeAndName mix = column_read.cloneEmpty(); + mix.column = std::move(column_mixed); + mixed_columns.emplace_back(std::move(mix)); + } + + for (auto & column : mixed_columns) + { + res.erase(column.name); + res.insert(std::move(column)); + } + + return res; +} + +} diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h new file mode 100644 index 00000000000..5caaec244da --- /dev/null +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +/// Adds defaults to columns using BlockDelayedDefaults bitmask attached to Block by child InputStream. +class AddingDefaultsBlockInputStream : public IProfilingBlockInputStream +{ +public: + AddingDefaultsBlockInputStream( + const BlockInputStreamPtr & input, + const ColumnDefaults & column_defaults_, + const Context & context_); + + String getName() const override { return "AddingDefaults"; } + Block getHeader() const override { return header; } + +protected: + Block readImpl() override; + +private: + Block header; + const ColumnDefaults column_defaults; + const Context & context; +}; + +} diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 21457a4a74f..aa4c2968539 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -16,6 +16,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_NUMBER; extern const int CANNOT_PARSE_UUID; extern const int TOO_LARGE_STRING_SIZE; + extern const int INCORRECT_NUMBER_OF_COLUMNS; } @@ -47,6 +48,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() { size_t num_columns = sample.columns(); MutableColumns columns = sample.cloneEmptyColumns(); + BlockDelayedDefaults delayed_defaults; try { @@ -55,8 +57,19 @@ Block BlockInputStreamFromRowInputStream::readImpl() try { ++total_rows; - if (!row_input->read(columns)) + RowReadExtention info; + if (!row_input->extendedRead(columns, info)) break; + + for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) + { + if (!info.read_columns[column_idx]) { + size_t column_size = columns[column_idx]->size(); + if (column_size == 0) + throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + delayed_defaults.setBit(column_idx, column_size - 1); + } + } } catch (Exception & e) { @@ -125,7 +138,10 @@ Block BlockInputStreamFromRowInputStream::readImpl() if (columns.empty() || columns[0]->empty()) return {}; - return sample.cloneWithColumns(std::move(columns)); + auto out_block = sample.cloneWithColumns(std::move(columns)); + if (!delayed_defaults.empty()) + out_block.delayed_defaults = std::move(delayed_defaults); + return out_block; } } diff --git a/dbms/src/Formats/IRowInputStream.h b/dbms/src/Formats/IRowInputStream.h index 49019740d10..1ffa205edc6 100644 --- a/dbms/src/Formats/IRowInputStream.h +++ b/dbms/src/Formats/IRowInputStream.h @@ -10,6 +10,15 @@ namespace DB { +/// A way to set some extentions to read and return extra information too. +struct RowReadExtention +{ + using BitMask = std::vector; + + /// IRowInputStream.extendedRead() output value. + /// Contains true for columns that actually read from the source and false for defaults + BitMask read_columns; +}; /** Interface of stream, that allows to read data by rows. */ @@ -20,6 +29,7 @@ public: * If no more rows - return false. */ virtual bool read(MutableColumns & columns) = 0; + virtual bool extendedRead(MutableColumns & columns, RowReadExtention & ) { return read(columns); } virtual void readPrefix() {} /// delimiter before begin of result virtual void readSuffix() {} /// delimiter after end of result diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.cpp b/dbms/src/Formats/JSONEachRowRowInputStream.cpp index c47111e047d..15c9e6f5861 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.cpp +++ b/dbms/src/Formats/JSONEachRowRowInputStream.cpp @@ -62,6 +62,12 @@ static void skipColonDelimeter(ReadBuffer & istr) bool JSONEachRowRowInputStream::read(MutableColumns & columns) +{ + RowReadExtention tmp; + return extendedRead(columns, tmp); +} + +bool JSONEachRowRowInputStream::extendedRead(MutableColumns & columns, RowReadExtention & ext) { skipWhitespaceIfAny(istr); @@ -84,8 +90,8 @@ bool JSONEachRowRowInputStream::read(MutableColumns & columns) /// Set of columns for which the values were read. The rest will be filled with default values. /// TODO Ability to provide your DEFAULTs. - bool read_columns[num_columns]; - memset(read_columns, 0, num_columns); + auto & read_columns = ext.read_columns; + read_columns.assign(num_columns, false); bool first = true; while (true) diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.h b/dbms/src/Formats/JSONEachRowRowInputStream.h index 737811be51c..dea3597de29 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.h +++ b/dbms/src/Formats/JSONEachRowRowInputStream.h @@ -23,6 +23,7 @@ public: JSONEachRowRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & format_settings); bool read(MutableColumns & columns) override; + bool extendedRead(MutableColumns & columns, RowReadExtention & ext) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 0b9bcb5417f..ad7d430c3e1 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -11,14 +11,8 @@ namespace DB { -void evaluateMissingDefaults(Block & block, - const NamesAndTypesList & required_columns, - const ColumnDefaults & column_defaults, - const Context & context) +static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & required_columns, const ColumnDefaults & column_defaults) { - if (column_defaults.empty()) - return; - ASTPtr default_expr_list = std::make_shared(); for (const auto & column : required_columns) @@ -34,6 +28,19 @@ void evaluateMissingDefaults(Block & block, setAlias(it->second.expression->clone(), it->first)); } + return default_expr_list; +} + + +void evaluateMissingDefaults(Block & block, + const NamesAndTypesList & required_columns, + const ColumnDefaults & column_defaults, + const Context & context) +{ + if (column_defaults.empty()) + return; + + ASTPtr default_expr_list = requiredExpressions(block, required_columns, column_defaults); /// nothing to evaluate if (default_expr_list->children.empty()) return; @@ -59,4 +66,24 @@ void evaluateMissingDefaults(Block & block, } } + +void evaluateMissingDefaultsUnsafe(Block & block, + const NamesAndTypesList & required_columns, + const std::unordered_map & column_defaults, + const Context & context) +{ + if (column_defaults.empty()) + return; + + ASTPtr default_expr_list = requiredExpressions(block, required_columns, column_defaults); + if (default_expr_list->children.empty()) + return; + + NamesAndTypesList available_columns; + for (size_t i = 0, size = block.columns(); i < size; ++i) + available_columns.emplace_back(block.getByPosition(i).name, block.getByPosition(i).type); + + ExpressionAnalyzer{default_expr_list, context, {}, available_columns}.getActions(true)->execute(block); +} + } diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.h b/dbms/src/Interpreters/evaluateMissingDefaults.h index a9b46d8dee5..ce0c649f3d0 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.h +++ b/dbms/src/Interpreters/evaluateMissingDefaults.h @@ -17,4 +17,9 @@ void evaluateMissingDefaults(Block & block, const std::unordered_map & column_defaults, const Context & context); +void evaluateMissingDefaultsUnsafe(Block & block, + const NamesAndTypesList & required_columns, + const std::unordered_map & column_defaults, + const Context & context); + } From e34bc2782ec28557060b001b02462bd1f163ff67 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 5 Jul 2018 14:01:25 +0300 Subject: [PATCH 06/88] 3578: get ColumnDefaults from context (it's empty now) --- dbms/programs/client/Client.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 23a9fcf2030..2a26a30c0b8 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -933,16 +933,25 @@ private: void sendDataFrom(ReadBuffer & buf, Block & sample) { String current_format = insert_format; + ColumnDefaults column_defaults; /// Data format can be specified in the INSERT query. if (ASTInsertQuery * insert = typeid_cast(&*parsed_query)) + { if (!insert->format.empty()) current_format = insert->format; + if (context.isTableExist(insert->database, insert->table)) + { + StoragePtr table = context.getTable(insert->database, insert->table); + if (table) + column_defaults = table->getColumns().defaults; + } + } + BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - ColumnDefaults column_defaults; // TODO: get from server BlockInputStreamPtr defs_block_input = std::make_shared(block_input, column_defaults, context); BlockInputStreamPtr async_block_input = std::make_shared(defs_block_input); From c26ca0232d68bda7f456e47cb6c4b014ded579c3 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 6 Jul 2018 18:49:33 +0300 Subject: [PATCH 07/88] metadata in capn proto format (in progress) --- dbms/programs/client/CMakeLists.txt | 3 +- dbms/programs/client/Client.cpp | 6 +- dbms/src/CMakeLists.txt | 3 + dbms/src/Core/Protocol.h | 16 +++-- dbms/src/Proto/CMakeLists.txt | 13 ++++ dbms/src/Proto/ServerMessage.capnp | 34 ++++++++++ dbms/src/Proto/protoHelpers.cpp | 98 +++++++++++++++++++++++++++++ dbms/src/Proto/protoHelpers.h | 11 ++++ 8 files changed, 177 insertions(+), 7 deletions(-) create mode 100644 dbms/src/Proto/CMakeLists.txt create mode 100644 dbms/src/Proto/ServerMessage.capnp create mode 100644 dbms/src/Proto/protoHelpers.cpp create mode 100644 dbms/src/Proto/protoHelpers.h diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index c7d2311b11e..0f84270c8c9 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,5 +1,6 @@ add_library (clickhouse-client-lib Client.cpp) -target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_proto + ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) if (CLICKHOUSE_SPLIT_BINARY) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 2a26a30c0b8..84d2d640d80 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -54,9 +54,9 @@ #include #include #include +#include #include - /// http://en.wikipedia.org/wiki/ANSI_escape_code /// Similar codes \e[s, \e[u don't work in VT100 and Mosh. @@ -1086,6 +1086,10 @@ private: last_exception = std::move(packet.exception); return false; + case Protocol::Server::CapnProto: + loadContext(packet.block.getColumnsWithTypeAndName()[0], context); + return receiveSampleBlock(out); + default: throw NetException("Unexpected packet from server (expected Data, got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); diff --git a/dbms/src/CMakeLists.txt b/dbms/src/CMakeLists.txt index 30bd7c134cc..25b84ec1acc 100644 --- a/dbms/src/CMakeLists.txt +++ b/dbms/src/CMakeLists.txt @@ -14,3 +14,6 @@ add_subdirectory (Client) add_subdirectory (TableFunctions) add_subdirectory (Analyzers) add_subdirectory (Formats) +if (USE_CAPNP) + add_subdirectory (Proto) +endif () diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index cd5456cca34..5006173dacb 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -69,6 +69,7 @@ namespace Protocol Totals = 7, /// A block with totals (compressed or not). Extremes = 8, /// A block with minimums and maximums (compressed or not). TablesStatusResponse = 9, /// A response to TablesStatus request. + CapnProto = 10, /// Cap'n Proto }; /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 @@ -77,8 +78,9 @@ namespace Protocol /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values inline const char * toString(UInt64 packet) { - static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", "Extremes", "TablesStatusResponse" }; - return packet < 10 + static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", + "Extremes", "TablesStatusResponse", "CapnProto" }; + return packet < 11 ? data[packet] : "Unknown packet"; } @@ -97,14 +99,18 @@ namespace Protocol Cancel = 3, /// Cancel the query execution. Ping = 4, /// Check that connection to the server is alive. TablesStatusRequest = 5, /// Check status of tables on the server. + /// + CapnProto = 10, /// Cap'n Proto }; inline const char * toString(UInt64 packet) { - static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest" }; - return packet < 6 + static const char * unknown = "Unknown packet"; + static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest", + unknown, unknown, unknown, unknown, "CapnProto" }; + return packet < 11 ? data[packet] - : "Unknown packet"; + : unknown; } } diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt new file mode 100644 index 00000000000..b3871cd53e8 --- /dev/null +++ b/dbms/src/Proto/CMakeLists.txt @@ -0,0 +1,13 @@ +set (CAPNP_PATH ${CMAKE_BINARY_DIR}/contrib/capnproto/c++/src/capnp) +set (CAPNP_BIN ${CAPNP_PATH}/capnp) + +add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CMAKE_CURRENT_BINARY_DIR}/ServerMessage.capnp + COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_PATH} ${CAPNP_BIN} compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp) + +add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) +target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) +target_include_directories (clickhouse_proto PUBLIC ${CAPNP_INCLUDE_DIR} ${DBMS_INCLUDE_DIR}) +target_include_directories (clickhouse_proto PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} ${COMMON_INCLUDE_DIR} ${DBMS_INCLUDE_DIR} ${CITYHASH_CONTRIB_INCLUDE_DIR}) diff --git a/dbms/src/Proto/ServerMessage.capnp b/dbms/src/Proto/ServerMessage.capnp new file mode 100644 index 00000000000..317430fce91 --- /dev/null +++ b/dbms/src/Proto/ServerMessage.capnp @@ -0,0 +1,34 @@ +@0xfdd1e2948338b156; + +using Cxx = import "/capnp/c++.capnp"; +$Cxx.namespace("DB::Proto"); + +struct ColumnDefault +{ + kind @0 :UInt16; + expression @1 :Text; +} + +struct Column +{ + name @0 :Text; + type @1 :Text; + default @2 :ColumnDefault; +} + +struct Table +{ + name @0 :Text; + columns @1 :List(Column); +} + +struct Database +{ + name @0 :Text; + tables @1 :List(Table); +} + +struct Context +{ + databases @0 :List(Database); +} diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp new file mode 100644 index 00000000000..9af677156b7 --- /dev/null +++ b/dbms/src/Proto/protoHelpers.cpp @@ -0,0 +1,98 @@ +#include "protoHelpers.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +namespace DB +{ + ColumnWithTypeAndName storeContext(Context & context) + { + capnp::MallocMessageBuilder message; + Proto::Context::Builder proto_context = message.initRoot(); + + Databases dbs = context.getDatabases(); + auto proto_databases = proto_context.initDatabases(dbs.size()); + + size_t db_nomber = 0; + for (auto & pr_db : dbs) + { + const String& db_name = pr_db.first; + IDatabase& db = *pr_db.second; + + auto proto_db = proto_databases[db_nomber]; + proto_db.setName(db_name); + + std::unordered_map tables; + DatabaseIteratorPtr it_tables = db.getIterator(context); + while (it_tables->isValid()) + { + tables[it_tables->name()] = it_tables->table(); + it_tables->next(); + } + + auto proto_tables = proto_db.initTables(tables.size()); + size_t table_no = 0; + for (const auto & pr_table : tables) + { + auto current_table = proto_tables[table_no]; + current_table.setName(pr_table.first); + + const ColumnsDescription & columns = pr_table.second->getColumns(); + auto proto_columns = current_table.initColumns(columns.defaults.size()); + + size_t column_no = 0; + for (const auto& pr_column : columns.defaults) + { + const String & column_name = pr_column.first; + const ColumnDefault & def = pr_column.second; + std::stringstream ss; + ss << def.expression; + + auto current_column = proto_columns[column_no]; + current_column.setName(column_name); + current_column.getDefault().setKind(static_cast(def.kind)); + current_column.getDefault().setExpression(ss.str()); + + ++column_no; + } + + ++table_no; + } + + ++db_nomber; + } + + ColumnWithTypeAndName proto_column; + proto_column.name = "context"; + proto_column.type = std::make_shared(); + MutableColumnPtr data = proto_column.type->createColumn(); + + kj::Array serialized = messageToFlatArray(message); + data->insertData(reinterpret_cast(serialized.begin()), serialized.size() * sizeof(capnp::word)); + + proto_column.column = std::move(data); + return proto_column; + } + + void loadContext(const ColumnWithTypeAndName & , Context & ) + { +#if 0 + kj::Array messageToFlatArray(MessageBuilder& builder); + + capnp::MallocMessageBuilder message; + Proto::ServerMessage::Builder serverMessage = message.initRoot(); + /// TODO +#endif + } +} diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h new file mode 100644 index 00000000000..64c99995722 --- /dev/null +++ b/dbms/src/Proto/protoHelpers.h @@ -0,0 +1,11 @@ +#pragma once + + +namespace DB +{ + class Context; + struct ColumnWithTypeAndName; + + ColumnWithTypeAndName storeContext(Context & context); + void loadContext(const ColumnWithTypeAndName & proto_column, Context & context); +} From 597778c074368b7f1759f1b157c927c156eac911 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 6 Jul 2018 22:19:06 +0300 Subject: [PATCH 08/88] metadata in capn proto (in progress) --- dbms/src/Proto/protoHelpers.cpp | 81 ++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 9af677156b7..567faaba2f0 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -4,18 +4,39 @@ #include #include #include +#include +#include #include #include #include #include -#include -#include -#include +#include namespace DB { + template + static MutableColumnPtr serializeProto(const ColumnT & column_type, capnp::MessageBuilder & message) + { + MutableColumnPtr data = column_type.createColumn(); + + kj::Array serialized = messageToFlatArray(message); + + data->insertData(reinterpret_cast(serialized.begin()), serialized.size() * sizeof(capnp::word)); + return data; + } + + template + typename T::Reader deserializeProto(const char * data, size_t data_size) + { + const capnp::word * ptr = reinterpret_cast(data); + auto serialized = kj::arrayPtr(ptr, data_size / sizeof(capnp::word)); + + capnp::FlatArrayMessageReader reader(serialized); + return reader.getRoot(); + } + ColumnWithTypeAndName storeContext(Context & context) { capnp::MallocMessageBuilder message; @@ -76,23 +97,51 @@ namespace DB ColumnWithTypeAndName proto_column; proto_column.name = "context"; proto_column.type = std::make_shared(); - MutableColumnPtr data = proto_column.type->createColumn(); - - kj::Array serialized = messageToFlatArray(message); - data->insertData(reinterpret_cast(serialized.begin()), serialized.size() * sizeof(capnp::word)); - - proto_column.column = std::move(data); + proto_column.column = std::move(serializeProto(*proto_column.type, message)); return proto_column; } - void loadContext(const ColumnWithTypeAndName & , Context & ) + void loadContext(const ColumnWithTypeAndName & proto_column, Context & context) { -#if 0 - kj::Array messageToFlatArray(MessageBuilder& builder); + StringRef plain_data = proto_column.column->getDataAt(0); + size_t data_size = proto_column.column->byteSize(); + Proto::Context::Reader proto_context = deserializeProto(plain_data.data, data_size); - capnp::MallocMessageBuilder message; - Proto::ServerMessage::Builder serverMessage = message.initRoot(); - /// TODO -#endif + // or ParserCompoundColumnDeclaration ? + ParserColumnDeclaration parser_defaults; + + for (auto proto_database : proto_context.getDatabases()) + { + String database_name = proto_database.getName().cStr(); + if (!context.isDatabaseExist(database_name)) + { + // TODO + } + + for (auto proto_table : proto_database.getTables()) + { + String table_name = proto_table.getName().cStr(); + if (!context.isTableExist(database_name, table_name)) + { + // TODO + } + + StoragePtr table = context.tryGetTable(database_name, table_name); + // TODO: throw on fail + + ColumnsDescription column_description; + for (auto column : proto_table.getColumns()) + { + String column_name = column.getName().cStr(); + String expression = column.getDefault().getExpression().cStr(); + ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); + ASTPtr ast = parseQuery(parser_defaults, expression, expression.size()); + + column_description.defaults[column_name] = ColumnDefault{expression_kind, ast}; + } + + table->setColumns(column_description); + } + } } } From 2e4c2328af07de9e229da4f498f288255d0155fd Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 9 Jul 2018 19:31:24 +0300 Subject: [PATCH 09/88] metadata in capn proto format (in progress - need apply on client) --- dbms/programs/client/Client.cpp | 2 +- dbms/programs/server/TCPHandler.cpp | 19 ++++++++++ dbms/programs/server/TCPHandler.h | 1 + dbms/src/Client/Connection.cpp | 1 + dbms/src/Proto/protoHelpers.cpp | 59 ++++++++++++++++++++--------- dbms/src/Proto/protoHelpers.h | 6 +-- 6 files changed, 67 insertions(+), 21 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 84d2d640d80..3c7f5e013ce 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1087,7 +1087,7 @@ private: return false; case Protocol::Server::CapnProto: - loadContext(packet.block.getColumnsWithTypeAndName()[0], context); + loadContextBlock(packet.block, context); return receiveSampleBlock(out); default: diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 53ca6c8699f..128dc0090bd 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -28,6 +28,8 @@ #include +#include + #include "TCPHandler.h" #include @@ -310,6 +312,10 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) */ state.io.out->writePrefix(); + /// Send query metadata (column defaults) + Block meta_block = storeContextBlock(query_context); + sendMetadata(meta_block); + /// Send block to the client - table structure. Block block = state.io.out->getHeader(); sendData(block); @@ -762,6 +768,19 @@ void TCPHandler::sendData(const Block & block) } +void TCPHandler::sendMetadata(const Block & block) +{ + initBlockOutput(block); + + writeVarUInt(Protocol::Server::CapnProto, *out); + writeStringBinary("", *out); + + state.block_out->write(block); + state.maybe_compressed_out->next(); + out->next(); +} + + void TCPHandler::sendException(const Exception & e) { writeVarUInt(Protocol::Server::Exception, *out); diff --git a/dbms/programs/server/TCPHandler.h b/dbms/programs/server/TCPHandler.h index e01987d3bbd..0a764ac9e2a 100644 --- a/dbms/programs/server/TCPHandler.h +++ b/dbms/programs/server/TCPHandler.h @@ -139,6 +139,7 @@ private: void sendHello(); void sendData(const Block & block); /// Write a block to the network. + void sendMetadata(const Block & block); void sendException(const Exception & e); void sendProgress(); void sendEndOfStream(); diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index c461b4cafde..7dce0afa0c0 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -521,6 +521,7 @@ Connection::Packet Connection::receivePacket() switch (res.type) { case Protocol::Server::Data: + case Protocol::Server::CapnProto: res.block = receiveData(); return res; diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 567faaba2f0..a95e5be8964 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -5,25 +5,30 @@ #include #include #include -#include +#include #include #include +#include #include #include #include +/// @sa https://capnproto.org/cxx.html namespace DB { - template - static MutableColumnPtr serializeProto(const ColumnT & column_type, capnp::MessageBuilder & message) + static MutableColumnPtr serializeProto(capnp::MessageBuilder & message) { - MutableColumnPtr data = column_type.createColumn(); + MutableColumnPtr data = DataTypeUInt8().createColumn(); kj::Array serialized = messageToFlatArray(message); + kj::ArrayPtr bytes = serialized.asChars(); + + data->reserve(bytes.size()); + for (size_t i = 0 ; i < bytes.size(); ++i) + data->insertData(&bytes[i], 1); - data->insertData(reinterpret_cast(serialized.begin()), serialized.size() * sizeof(capnp::word)); return data; } @@ -37,7 +42,7 @@ namespace DB return reader.getRoot(); } - ColumnWithTypeAndName storeContext(Context & context) + static ColumnWithTypeAndName storeContext(const String & column_name, Context & context) { capnp::MallocMessageBuilder message; Proto::Context::Builder proto_context = message.initRoot(); @@ -48,11 +53,14 @@ namespace DB size_t db_nomber = 0; for (auto & pr_db : dbs) { - const String& db_name = pr_db.first; - IDatabase& db = *pr_db.second; + const String & database_name = pr_db.first; + if (database_name == "system") + continue; + + IDatabase & db = *pr_db.second; auto proto_db = proto_databases[db_nomber]; - proto_db.setName(db_name); + proto_db.setName(database_name); std::unordered_map tables; DatabaseIteratorPtr it_tables = db.getIterator(context); @@ -95,24 +103,23 @@ namespace DB } ColumnWithTypeAndName proto_column; - proto_column.name = "context"; - proto_column.type = std::make_shared(); - proto_column.column = std::move(serializeProto(*proto_column.type, message)); + proto_column.name = column_name; + proto_column.type = std::make_shared(); + proto_column.column = std::move(serializeProto(message)); return proto_column; } - void loadContext(const ColumnWithTypeAndName & proto_column, Context & context) + static void loadContext(const ColumnWithTypeAndName & proto_column, Context & context) { StringRef plain_data = proto_column.column->getDataAt(0); size_t data_size = proto_column.column->byteSize(); Proto::Context::Reader proto_context = deserializeProto(plain_data.data, data_size); - // or ParserCompoundColumnDeclaration ? - ParserColumnDeclaration parser_defaults; + ParserExpressionElement parser; for (auto proto_database : proto_context.getDatabases()) { - String database_name = proto_database.getName().cStr(); + const String & database_name = proto_database.getName().cStr(); if (!context.isDatabaseExist(database_name)) { // TODO @@ -135,8 +142,8 @@ namespace DB String column_name = column.getName().cStr(); String expression = column.getDefault().getExpression().cStr(); ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); - ASTPtr ast = parseQuery(parser_defaults, expression, expression.size()); + ASTPtr ast = parseQuery(parser, expression, expression.size()); column_description.defaults[column_name] = ColumnDefault{expression_kind, ast}; } @@ -144,4 +151,22 @@ namespace DB } } } + + static constexpr const char * contextColumnName() + { + return "context"; + } + + Block storeContextBlock(Context & context) + { + Block block; + block.insert(storeContext(contextColumnName(), context)); + return block; + } + + void loadContextBlock(const Block & block, Context & context) + { + const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); + loadContext(column, context); + } } diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index 64c99995722..14512d3453c 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -4,8 +4,8 @@ namespace DB { class Context; - struct ColumnWithTypeAndName; + class Block; - ColumnWithTypeAndName storeContext(Context & context); - void loadContext(const ColumnWithTypeAndName & proto_column, Context & context); + Block storeContextBlock(Context & context); + void loadContextBlock(const Block & block, Context & context); } From bac1f77620492b5096600479972cba42257db743 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 10 Jul 2018 14:40:33 +0300 Subject: [PATCH 10/88] insert defaults (first working version) --- dbms/programs/client/Client.cpp | 28 +++++++++++----------------- dbms/src/Proto/protoHelpers.cpp | 28 +++++++++------------------- dbms/src/Proto/protoHelpers.h | 14 +++++++++++++- 3 files changed, 33 insertions(+), 37 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 3c7f5e013ce..414ba81f3c5 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -865,11 +865,12 @@ private: /// Receive description of table structure. Block sample; - if (receiveSampleBlock(sample)) + TableMetaInfo table_meta(parsed_insert_query.database, parsed_insert_query.table); + if (receiveSampleBlock(sample, table_meta)) { /// If structure was received (thus, server has not thrown an exception), /// send our data with that structure. - sendData(sample); + sendData(sample, table_meta); receivePacket(); } } @@ -907,7 +908,7 @@ private: } - void sendData(Block & sample) + void sendData(Block & sample, const TableMetaInfo & table_meta) { /// If INSERT data must be sent. const ASTInsertQuery * parsed_insert_query = typeid_cast(&*parsed_query); @@ -918,35 +919,28 @@ private: { /// Send data contained in the query. ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); - sendDataFrom(data_in, sample); + sendDataFrom(data_in, sample, table_meta); } else if (!is_interactive) { /// Send data read from stdin. - sendDataFrom(std_in, sample); + sendDataFrom(std_in, sample, table_meta); } else throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); } - void sendDataFrom(ReadBuffer & buf, Block & sample) + void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetaInfo & table_meta) { String current_format = insert_format; - ColumnDefaults column_defaults; + const ColumnDefaults & column_defaults = table_meta.column_defaults; /// Data format can be specified in the INSERT query. if (ASTInsertQuery * insert = typeid_cast(&*parsed_query)) { if (!insert->format.empty()) current_format = insert->format; - - if (context.isTableExist(insert->database, insert->table)) - { - StoragePtr table = context.getTable(insert->database, insert->table); - if (table) - column_defaults = table->getColumns().defaults; - } } BlockInputStreamPtr block_input = context.getInputFormat( @@ -1071,7 +1065,7 @@ private: /// Receive the block that serves as an example of the structure of table where data will be inserted. - bool receiveSampleBlock(Block & out) + bool receiveSampleBlock(Block & out, TableMetaInfo & table_meta) { Connection::Packet packet = connection->receivePacket(); @@ -1087,8 +1081,8 @@ private: return false; case Protocol::Server::CapnProto: - loadContextBlock(packet.block, context); - return receiveSampleBlock(out); + loadTableMetaInfo(packet.block, table_meta); + return receiveSampleBlock(out, table_meta); default: throw NetException("Unexpected packet from server (expected Data, got " diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index a95e5be8964..2ae1fda40f4 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -109,7 +109,7 @@ namespace DB return proto_column; } - static void loadContext(const ColumnWithTypeAndName & proto_column, Context & context) + static void loadTableMetaInfo(const ColumnWithTypeAndName & proto_column, TableMetaInfo & table_meta) { StringRef plain_data = proto_column.column->getDataAt(0); size_t data_size = proto_column.column->byteSize(); @@ -120,23 +120,15 @@ namespace DB for (auto proto_database : proto_context.getDatabases()) { const String & database_name = proto_database.getName().cStr(); - if (!context.isDatabaseExist(database_name)) - { - // TODO - } + if (database_name != table_meta.database) + continue; for (auto proto_table : proto_database.getTables()) { String table_name = proto_table.getName().cStr(); - if (!context.isTableExist(database_name, table_name)) - { - // TODO - } + if (table_name != table_meta.table) + continue; - StoragePtr table = context.tryGetTable(database_name, table_name); - // TODO: throw on fail - - ColumnsDescription column_description; for (auto column : proto_table.getColumns()) { String column_name = column.getName().cStr(); @@ -144,10 +136,8 @@ namespace DB ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); ASTPtr ast = parseQuery(parser, expression, expression.size()); - column_description.defaults[column_name] = ColumnDefault{expression_kind, ast}; + table_meta.column_defaults.emplace(column_name, ColumnDefault{expression_kind, ast}); } - - table->setColumns(column_description); } } } @@ -164,9 +154,9 @@ namespace DB return block; } - void loadContextBlock(const Block & block, Context & context) + void loadTableMetaInfo(const Block & block, TableMetaInfo & table_meta) { const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); - loadContext(column, context); + loadTableMetaInfo(column, table_meta); } } diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index 14512d3453c..ee3da1649a1 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -1,11 +1,23 @@ #pragma once +#include namespace DB { class Context; class Block; + struct TableMetaInfo + { + TableMetaInfo(const String & database_, const String & table_) + : database(database_), table(table_) + {} + + const String & database; + const String & table; + ColumnDefaults column_defaults; + }; + Block storeContextBlock(Context & context); - void loadContextBlock(const Block & block, Context & context); + void loadTableMetaInfo(const Block & block, TableMetaInfo & table_meta); } From 5036309d26f0423b5adeab0dd59e96d5525c2701 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 10 Jul 2018 16:33:41 +0300 Subject: [PATCH 11/88] fix const defaults --- dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 82c36bfab85..99f5f28f88d 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -69,7 +69,12 @@ Block AddingDefaultsBlockInputStream::readImpl() for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) { if (mask[row_idx]) - column_mixed->insertFrom(*column_def.column, row_idx); + { + if (column_def.column->isColumnConst()) + column_mixed->insert((*column_def.column)[row_idx]); + else + column_mixed->insertFrom(*column_def.column, row_idx); + } else column_mixed->insertFrom(*column_read.column, row_idx); } From 519102b11037759ce0b3b44cddffbc07f6516b93 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 10 Jul 2018 20:20:55 +0300 Subject: [PATCH 12/88] defaults for http inserts --- dbms/programs/client/Client.cpp | 11 +++++---- .../InputStreamFromASTInsertQuery.cpp | 10 ++++++-- dbms/src/Proto/protoHelpers.cpp | 7 +++--- dbms/src/Proto/protoHelpers.h | 16 ++----------- dbms/src/Storages/TableMetadata.cpp | 14 +++++++++++ dbms/src/Storages/TableMetadata.h | 23 +++++++++++++++++++ 6 files changed, 57 insertions(+), 24 deletions(-) create mode 100644 dbms/src/Storages/TableMetadata.cpp create mode 100644 dbms/src/Storages/TableMetadata.h diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 414ba81f3c5..da18cbad6e1 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -55,6 +55,7 @@ #include #include #include +#include #include /// http://en.wikipedia.org/wiki/ANSI_escape_code @@ -865,7 +866,7 @@ private: /// Receive description of table structure. Block sample; - TableMetaInfo table_meta(parsed_insert_query.database, parsed_insert_query.table); + TableMetadata table_meta(parsed_insert_query.database, parsed_insert_query.table); if (receiveSampleBlock(sample, table_meta)) { /// If structure was received (thus, server has not thrown an exception), @@ -908,7 +909,7 @@ private: } - void sendData(Block & sample, const TableMetaInfo & table_meta) + void sendData(Block & sample, const TableMetadata & table_meta) { /// If INSERT data must be sent. const ASTInsertQuery * parsed_insert_query = typeid_cast(&*parsed_query); @@ -931,7 +932,7 @@ private: } - void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetaInfo & table_meta) + void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetadata & table_meta) { String current_format = insert_format; const ColumnDefaults & column_defaults = table_meta.column_defaults; @@ -1065,7 +1066,7 @@ private: /// Receive the block that serves as an example of the structure of table where data will be inserted. - bool receiveSampleBlock(Block & out, TableMetaInfo & table_meta) + bool receiveSampleBlock(Block & out, TableMetadata & table_meta) { Connection::Packet packet = connection->receivePacket(); @@ -1081,7 +1082,7 @@ private: return false; case Protocol::Server::CapnProto: - loadTableMetaInfo(packet.block, table_meta); + loadTableMetadata(packet.block, table_meta); return receiveSampleBlock(out, table_meta); default: diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 0e4f876925d..163fee44ea5 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -4,7 +4,8 @@ #include #include #include - +#include +#include namespace DB { @@ -43,7 +44,12 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( input_buffer_contacenated = std::make_unique(buffers); - res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); + TableMetadata table_meta(ast_insert_query->database, ast_insert_query->table); + table_meta.loadFromContext(context); + + BlockInputStreamPtr block_input = + context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); + res_stream = std::make_shared(block_input, table_meta.column_defaults, context); } } diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 2ae1fda40f4..5121529c207 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -109,7 +110,7 @@ namespace DB return proto_column; } - static void loadTableMetaInfo(const ColumnWithTypeAndName & proto_column, TableMetaInfo & table_meta) + static void loadTableMetadata(const ColumnWithTypeAndName & proto_column, TableMetadata & table_meta) { StringRef plain_data = proto_column.column->getDataAt(0); size_t data_size = proto_column.column->byteSize(); @@ -154,9 +155,9 @@ namespace DB return block; } - void loadTableMetaInfo(const Block & block, TableMetaInfo & table_meta) + void loadTableMetadata(const Block & block, TableMetadata & table_meta) { const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); - loadTableMetaInfo(column, table_meta); + loadTableMetadata(column, table_meta); } } diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index ee3da1649a1..c0a514a5bb1 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -1,23 +1,11 @@ #pragma once -#include - namespace DB { class Context; class Block; - - struct TableMetaInfo - { - TableMetaInfo(const String & database_, const String & table_) - : database(database_), table(table_) - {} - - const String & database; - const String & table; - ColumnDefaults column_defaults; - }; + class TableMetadata; Block storeContextBlock(Context & context); - void loadTableMetaInfo(const Block & block, TableMetaInfo & table_meta); + void loadTableMetadata(const Block & block, TableMetadata & table_meta); } diff --git a/dbms/src/Storages/TableMetadata.cpp b/dbms/src/Storages/TableMetadata.cpp new file mode 100644 index 00000000000..e07c6a31e97 --- /dev/null +++ b/dbms/src/Storages/TableMetadata.cpp @@ -0,0 +1,14 @@ +#include +#include +#include "TableMetadata.h" + + +namespace DB +{ + void TableMetadata::loadFromContext(const Context & context) + { + StoragePtr storage = context.getTable(database, table); + const ColumnsDescription & table_columns = storage->getColumns(); + column_defaults = table_columns.defaults; + } +} diff --git a/dbms/src/Storages/TableMetadata.h b/dbms/src/Storages/TableMetadata.h new file mode 100644 index 00000000000..a88808ed96a --- /dev/null +++ b/dbms/src/Storages/TableMetadata.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + class Context; + class Block; + + /// Addition information for query that could not be get from sample block + struct TableMetadata + { + TableMetadata(const String & database_, const String & table_) + : database(database_), table(table_) + {} + + const String & database; + const String & table; + ColumnDefaults column_defaults; + + void loadFromContext(const Context & context); + }; +} From fe1b393e55636aac2e8743560281711854599f5a Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 11 Jul 2018 15:05:04 +0300 Subject: [PATCH 13/88] defaults for storages with formated input --- dbms/programs/client/Client.cpp | 5 +++-- dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp | 8 +++++--- dbms/src/Storages/StorageFile.cpp | 7 ++++++- dbms/src/Storages/StorageKafka.cpp | 5 +++++ dbms/src/Storages/StorageURL.cpp | 10 ++++++++-- 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index da18cbad6e1..af16a4998fb 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -947,8 +947,9 @@ private: BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - BlockInputStreamPtr defs_block_input = std::make_shared(block_input, column_defaults, context); - BlockInputStreamPtr async_block_input = std::make_shared(defs_block_input); + if (!column_defaults.empty()) + block_input = std::make_shared(block_input, column_defaults, context); + BlockInputStreamPtr async_block_input = std::make_shared(block_input); async_block_input->readPrefix(); diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 163fee44ea5..945fc782327 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -47,9 +47,11 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( TableMetadata table_meta(ast_insert_query->database, ast_insert_query->table); table_meta.loadFromContext(context); - BlockInputStreamPtr block_input = - context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); - res_stream = std::make_shared(block_input, table_meta.column_defaults, context); + res_stream = context.getInputFormat( + format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); + + if (!table_meta.column_defaults.empty()) + res_stream = std::make_shared(res_stream, table_meta.column_defaults, context); } } diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index c63e3a263ed..f24686cfea3 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -194,7 +195,11 @@ BlockInputStreams StorageFile::read( size_t max_block_size, unsigned /*num_streams*/) { - return BlockInputStreams(1, std::make_shared(*this, context, max_block_size)); + BlockInputStreamPtr block_input = std::make_shared(*this, context, max_block_size); + const ColumnsDescription & columns = getColumns(); + if (columns.defaults.empty()) + return {block_input}; + return {std::make_shared(block_input, columns.defaults, context)}; } diff --git a/dbms/src/Storages/StorageKafka.cpp b/dbms/src/Storages/StorageKafka.cpp index a9666bab22c..9afbbfd656f 100644 --- a/dbms/src/Storages/StorageKafka.cpp +++ b/dbms/src/Storages/StorageKafka.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -145,6 +146,10 @@ public: LOG_TRACE(storage.log, "Creating formatted reader"); read_buf = std::make_unique(consumer->stream, storage.log); reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); + + const ColumnsDescription & columns = getColumns(); + if (!columns.defaults.empty()) + reader = std::make_shared(reader, columns.defaults, context); } ~KafkaBlockInputStream() override diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 1c3b8246492..fa8c30c24a6 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -13,6 +13,7 @@ #include #include +#include #include @@ -135,14 +136,19 @@ BlockInputStreams StorageURL::read( size_t max_block_size, unsigned /*num_streams*/) { - return {std::make_shared( + BlockInputStreamPtr block_input = std::make_shared( uri, format_name, getName(), getSampleBlock(), context, max_block_size, - ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef()))}; + ConnectionTimeouts::getHTTPTimeouts(context.getSettingsRef())); + + const ColumnsDescription & columns = getColumns(); + if (columns.defaults.empty()) + return {block_input}; + return {std::make_shared(block_input, columns.defaults, context)}; } void StorageURL::rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & /*new_table_name*/) {} From 82d22574d40370e6c1c54ed682e8abd2f6403c92 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 11 Jul 2018 17:11:47 +0300 Subject: [PATCH 14/88] fix empty defaults mask issue and add some comments --- dbms/src/Core/Block.h | 2 + dbms/src/Core/BlockInfo.cpp | 6 +-- dbms/src/Core/BlockInfo.h | 16 ++++---- .../AddingDefaultsBlockInputStream.cpp | 37 ++++++++++--------- dbms/src/Formats/IRowInputStream.h | 6 +-- 5 files changed, 37 insertions(+), 30 deletions(-) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index b7c19548963..0db8954247e 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -35,6 +35,8 @@ private: public: BlockInfo info; + /// Input stream could use delayed_defaults to add addition info at which rows it have inserted default values. + /// Such values would be replaced later by column defaults in AddingDefaultsBlockInputStream (if any). BlockDelayedDefaults delayed_defaults; Block() = default; diff --git a/dbms/src/Core/BlockInfo.cpp b/dbms/src/Core/BlockInfo.cpp index 15d7d9efa12..f214d2782e3 100644 --- a/dbms/src/Core/BlockInfo.cpp +++ b/dbms/src/Core/BlockInfo.cpp @@ -60,14 +60,14 @@ void BlockInfo::read(ReadBuffer & in) void BlockDelayedDefaults::setBit(size_t column_idx, size_t row_idx) { - BitMask & mask = columns_defaults[column_idx]; + RowsBitMask & mask = columns_defaults[column_idx]; mask.resize(row_idx + 1); mask[row_idx] = true; } -const BlockDelayedDefaults::BitMask & BlockDelayedDefaults::getColumnBitmask(size_t column_idx) const +const BlockDelayedDefaults::RowsBitMask & BlockDelayedDefaults::getDefaultsBitmask(size_t column_idx) const { - static BitMask none; + static RowsBitMask none; auto it = columns_defaults.find(column_idx); if (it != columns_defaults.end()) return it->second; diff --git a/dbms/src/Core/BlockInfo.h b/dbms/src/Core/BlockInfo.h index 00c0821a33e..abc5383ddcb 100644 --- a/dbms/src/Core/BlockInfo.h +++ b/dbms/src/Core/BlockInfo.h @@ -45,22 +45,24 @@ struct BlockInfo void read(ReadBuffer & in); }; -/// Block extention to support delayed defaults. -/// It's expected that it would be lots unset defaults or none. -/// NOTE It's possible to make better solution for sparse values. +/// Block extention to support delayed defaults. Used in AddingDefaultsBlockInputStream to replace type defauls set by RowInputStream +/// with column defaults. class BlockDelayedDefaults { public: - using BitMask = std::vector; - using MaskById = std::unordered_map; + using RowsBitMask = std::vector; /// a bit per row for a column - const BitMask & getColumnBitmask(size_t column_idx) const; + const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; void setBit(size_t column_idx, size_t row_idx); bool empty() const { return columns_defaults.empty(); } size_t size() const { return columns_defaults.size(); } private: - MaskById columns_defaults; + using RowsMaskByColumnId = std::unordered_map; + + /// If columns_defaults[column_id][row_id] is true related value in Block should be replaced with column default. + /// It could contain less columns and rows then related block. + RowsMaskByColumnId columns_defaults; }; } diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 99f5f28f88d..ffde573a24f 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -63,25 +63,28 @@ Block AddingDefaultsBlockInputStream::readImpl() if (column_read.column->size() != column_def.column->size()) throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - const BlockDelayedDefaults::BitMask & mask = delayed_defaults.getColumnBitmask(block_column_position); - MutableColumnPtr column_mixed = column_read.column->cloneEmpty(); - - for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) + const auto & defaults_mask = delayed_defaults.getDefaultsBitmask(block_column_position); + if (!defaults_mask.empty()) { - if (mask[row_idx]) - { - if (column_def.column->isColumnConst()) - column_mixed->insert((*column_def.column)[row_idx]); - else - column_mixed->insertFrom(*column_def.column, row_idx); - } - else - column_mixed->insertFrom(*column_read.column, row_idx); - } + MutableColumnPtr column_mixed = column_read.column->cloneEmpty(); - ColumnWithTypeAndName mix = column_read.cloneEmpty(); - mix.column = std::move(column_mixed); - mixed_columns.emplace_back(std::move(mix)); + for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) + { + if (defaults_mask[row_idx]) + { + if (column_def.column->isColumnConst()) + column_mixed->insert((*column_def.column)[row_idx]); + else + column_mixed->insertFrom(*column_def.column, row_idx); + } + else + column_mixed->insertFrom(*column_read.column, row_idx); + } + + ColumnWithTypeAndName mix = column_read.cloneEmpty(); + mix.column = std::move(column_mixed); + mixed_columns.emplace_back(std::move(mix)); + } } for (auto & column : mixed_columns) diff --git a/dbms/src/Formats/IRowInputStream.h b/dbms/src/Formats/IRowInputStream.h index 1ffa205edc6..8b3e7043f3b 100644 --- a/dbms/src/Formats/IRowInputStream.h +++ b/dbms/src/Formats/IRowInputStream.h @@ -10,13 +10,13 @@ namespace DB { -/// A way to set some extentions to read and return extra information too. +/// A way to set some extentions to read and return extra information too. IRowInputStream.extendedRead() output. struct RowReadExtention { using BitMask = std::vector; - /// IRowInputStream.extendedRead() output value. - /// Contains true for columns that actually read from the source and false for defaults + /// Contains one bit per column in resently read row. IRowInputStream could leave it empty, or partialy set. + /// It should contain true for columns that actually read from the source and false for defaults. BitMask read_columns; }; From a765aef4dbdc3ea3a362b25161d82584301c61fe Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 11 Jul 2018 19:24:29 +0300 Subject: [PATCH 15/88] fix KafkaStorage build and add sql test for defaults --- dbms/src/Storages/StorageKafka.cpp | 2 +- .../00651_insert_json_with_defaults.reference | 7 +++++++ .../00651_insert_json_with_defaults.sql | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference create mode 100644 dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql diff --git a/dbms/src/Storages/StorageKafka.cpp b/dbms/src/Storages/StorageKafka.cpp index 9afbbfd656f..43966aec22a 100644 --- a/dbms/src/Storages/StorageKafka.cpp +++ b/dbms/src/Storages/StorageKafka.cpp @@ -147,7 +147,7 @@ public: read_buf = std::make_unique(consumer->stream, storage.log); reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); - const ColumnsDescription & columns = getColumns(); + const ColumnsDescription & columns = storage.getColumns(); if (!columns.defaults.empty()) reader = std::make_shared(reader, columns.defaults, context); } diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference new file mode 100644 index 00000000000..f513c6d6f40 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference @@ -0,0 +1,7 @@ +0 0 6 6 6 +0 5 5 1.7917595 5 +1 1 2 1.0986123 42 +1 1 2 1.0986123 42 +2 2 4 1.609438 2 +3 3 3 3 3 +4 0 4 1.609438 42 diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql new file mode 100644 index 00000000000..d25ecdc1f5a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql @@ -0,0 +1,19 @@ +CREATE DATABASE IF NOT EXISTS test; +DROP TABLE IF EXISTS test.defaults; +CREATE TABLE IF NOT EXISTS test.defaults +( + x UInt32, + y UInt32, + a UInt32 DEFAULT x + y, + b Float32 DEFAULT log(1 + x + y), + c UInt32 DEFAULT 42 +) ENGINE = Memory; + +INSERT INTO test.defaults FORMAT JSONEachRow {"x":1, "y":1}; +INSERT INTO test.defaults (x, y) SELECT x, y FROM test.defaults LIMIT 1; +INSERT INTO test.defaults FORMAT JSONEachRow {"x":2, "y":2, "c":2}; +INSERT INTO test.defaults FORMAT JSONEachRow {"x":3, "y":3, "a":3, "b":3, "c":3}; +INSERT INTO test.defaults FORMAT JSONEachRow {"x":4} {"y":5, "c":5} {"a":6, "b":6, "c":6}; + +SELECT * FROM test.defaults ORDER BY (x, y); +DROP TABLE IF EXISTS test.defaults; From 3c39f2fc691865459aba046cc4515f3532047384 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 11 Jul 2018 21:34:12 +0300 Subject: [PATCH 16/88] fix some tests --- dbms/src/DataStreams/RemoteBlockOutputStream.cpp | 6 +++++- dbms/src/DataStreams/RemoteBlockOutputStream.h | 1 + .../00651_insert_json_with_defaults.reference | 14 +++++++------- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp index d9095ec91b9..7d0c17407c7 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp @@ -33,13 +33,17 @@ RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, const if (!header) throw Exception("Logical error: empty block received as table structure", ErrorCodes::LOGICAL_ERROR); } + else if (Protocol::Server::CapnProto == packet.type) + { + metadata = packet.block; + } else if (Protocol::Server::Exception == packet.type) { packet.exception->rethrow(); return; } else - throw NetException("Unexpected packet from server (expected Data or Exception, got " + throw NetException("Unexpected packet from server (expected Data, CapnProto or Exception, got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); } diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.h b/dbms/src/DataStreams/RemoteBlockOutputStream.h index 41740c39837..e0269a1ea4d 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.h +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.h @@ -35,6 +35,7 @@ private: String query; const Settings * settings; Block header; + Block metadata; bool finished = false; }; diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference index f513c6d6f40..9d95fa08690 100644 --- a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.reference @@ -1,7 +1,7 @@ -0 0 6 6 6 -0 5 5 1.7917595 5 -1 1 2 1.0986123 42 -1 1 2 1.0986123 42 -2 2 4 1.609438 2 -3 3 3 3 3 -4 0 4 1.609438 42 +0 0 6 6 6 +0 5 5 1.7917595 5 +1 1 2 1.0986123 42 +1 1 2 1.0986123 42 +2 2 4 1.609438 2 +3 3 3 3 3 +4 0 4 1.609438 42 From 2876aadba77cd91818fdbcc6f0679533fed52a05 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 12 Jul 2018 11:49:20 +0300 Subject: [PATCH 17/88] fix materialized --- dbms/src/Proto/protoHelpers.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 5121529c207..c59516d42c2 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -136,8 +136,11 @@ namespace DB String expression = column.getDefault().getExpression().cStr(); ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); - ASTPtr ast = parseQuery(parser, expression, expression.size()); - table_meta.column_defaults.emplace(column_name, ColumnDefault{expression_kind, ast}); + if (expression_kind == ColumnDefaultKind::Default) + { + ASTPtr ast = parseQuery(parser, expression, expression.size()); + table_meta.column_defaults.emplace(column_name, ColumnDefault{expression_kind, ast}); + } } } } From a7fcae2759d2285b7feece194f66270231367c38 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 12 Jul 2018 13:03:49 +0300 Subject: [PATCH 18/88] fix defaults with list expressions --- dbms/src/Proto/protoHelpers.cpp | 4 ++-- .../queries/0_stateless/00651_insert_json_with_defaults.sql | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index c59516d42c2..329cd40b5f7 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -116,7 +116,7 @@ namespace DB size_t data_size = proto_column.column->byteSize(); Proto::Context::Reader proto_context = deserializeProto(plain_data.data, data_size); - ParserExpressionElement parser; + ParserTernaryOperatorExpression parser; for (auto proto_database : proto_context.getDatabases()) { diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql index d25ecdc1f5a..270778d9b49 100644 --- a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql @@ -6,7 +6,10 @@ CREATE TABLE IF NOT EXISTS test.defaults y UInt32, a UInt32 DEFAULT x + y, b Float32 DEFAULT log(1 + x + y), - c UInt32 DEFAULT 42 + c UInt32 DEFAULT 42, + d DEFAULT x + y, + e MATERIALIZED x + y, + f ALIAS x + y ) ENGINE = Memory; INSERT INTO test.defaults FORMAT JSONEachRow {"x":1, "y":1}; From f89e476c0f540fb17a89e861053114f7dcd43b35 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 12 Jul 2018 17:33:57 +0300 Subject: [PATCH 19/88] add option to disable sending metadata --- dbms/programs/server/TCPHandler.cpp | 8 ++++++-- dbms/src/Interpreters/Settings.h | 1 + dbms/src/Storages/StorageDistributed.cpp | 3 +++ .../0_stateless/00651_insert_json_with_defaults.sql | 3 +-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 128dc0090bd..9f8800ec1fe 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -313,8 +313,12 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) state.io.out->writePrefix(); /// Send query metadata (column defaults) - Block meta_block = storeContextBlock(query_context); - sendMetadata(meta_block); + if (global_settings.insert_sample_with_metadata && + query_context.getSettingsRef().insert_sample_with_metadata) + { + Block meta_block = storeContextBlock(query_context); + sendMetadata(meta_block); + } /// Send block to the client - table structure. Block block = state.io.out->getHeader(); diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index c4d0d7654e6..8165cc776d9 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -123,6 +123,7 @@ struct Settings M(SettingUInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.") \ \ M(SettingBool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be preformed") \ + M(SettingBool, insert_sample_with_metadata, true, "For INSERT queries, specifies that need add metadata before sample block") \ \ M(SettingUInt64, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled.") \ M(SettingMilliseconds, insert_quorum_timeout, 600000, "") \ diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index a125806515c..49b20acf9eb 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -165,6 +165,9 @@ StorageDistributed::StorageDistributed( if (num_local_shards && remote_database == database_name && remote_table == table_name) throw Exception("Distributed table " + table_name + " looks at itself", ErrorCodes::INFINITE_LOOP); } + + /// HACK: disable metadata for StorageDistributed queries + const_cast(context).getSettingsRef().insert_sample_with_metadata = false; } diff --git a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql index 270778d9b49..12b9e4538d1 100644 --- a/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql +++ b/dbms/tests/queries/0_stateless/00651_insert_json_with_defaults.sql @@ -4,10 +4,9 @@ CREATE TABLE IF NOT EXISTS test.defaults ( x UInt32, y UInt32, - a UInt32 DEFAULT x + y, + a DEFAULT x + y, b Float32 DEFAULT log(1 + x + y), c UInt32 DEFAULT 42, - d DEFAULT x + y, e MATERIALIZED x + y, f ALIAS x + y ) ENGINE = Memory; From c78a67d57316a39ec40116558d00ce5e0b93fb6b Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 01:05:03 +0300 Subject: [PATCH 20/88] fix case with unexpected aliases --- dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index ffde573a24f..7cc90f6a641 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -43,7 +43,11 @@ Block AddingDefaultsBlockInputStream::readImpl() Block evaluate_block{res}; for (const auto & column : column_defaults) - evaluate_block.erase(column.first); + { + /// column_defaults contain aliases that could be ommited in evaluate_block + if (evaluate_block.has(column.first)) + evaluate_block.erase(column.first); + } evaluateMissingDefaultsUnsafe(evaluate_block, header.getNamesAndTypesList(), column_defaults, context); From 4e0d9aa8d44e92f744925797c29e9df2a94f11b8 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 15:46:29 +0300 Subject: [PATCH 21/88] revert unneeded changes --- dbms/programs/client/Client.cpp | 4 +--- dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp | 1 - dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp | 5 ++--- dbms/src/DataStreams/RemoteBlockOutputStream.cpp | 6 +----- dbms/src/DataStreams/RemoteBlockOutputStream.h | 1 - 5 files changed, 4 insertions(+), 13 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 5dddf053482..5ee25d38c33 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -936,18 +936,16 @@ private: void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetadata & table_meta) { String current_format = insert_format; - const ColumnDefaults & column_defaults = table_meta.column_defaults; /// Data format can be specified in the INSERT query. if (ASTInsertQuery * insert = typeid_cast(&*parsed_query)) - { if (!insert->format.empty()) current_format = insert->format; - } BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); + const ColumnDefaults & column_defaults = table_meta.column_defaults; if (!column_defaults.empty()) block_input = std::make_shared(block_input, column_defaults, context); BlockInputStreamPtr async_block_input = std::make_shared(block_input); diff --git a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp index 7a1ab14e7fc..fe773b40776 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp @@ -6,7 +6,6 @@ #include #include #include -#include namespace DB diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 945fc782327..855840a15ee 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -44,12 +44,11 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( input_buffer_contacenated = std::make_unique(buffers); + res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); + TableMetadata table_meta(ast_insert_query->database, ast_insert_query->table); table_meta.loadFromContext(context); - res_stream = context.getInputFormat( - format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); - if (!table_meta.column_defaults.empty()) res_stream = std::make_shared(res_stream, table_meta.column_defaults, context); } diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp index 7d0c17407c7..d9095ec91b9 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp @@ -33,17 +33,13 @@ RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, const if (!header) throw Exception("Logical error: empty block received as table structure", ErrorCodes::LOGICAL_ERROR); } - else if (Protocol::Server::CapnProto == packet.type) - { - metadata = packet.block; - } else if (Protocol::Server::Exception == packet.type) { packet.exception->rethrow(); return; } else - throw NetException("Unexpected packet from server (expected Data, CapnProto or Exception, got " + throw NetException("Unexpected packet from server (expected Data or Exception, got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); } diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.h b/dbms/src/DataStreams/RemoteBlockOutputStream.h index e0269a1ea4d..41740c39837 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.h +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.h @@ -35,7 +35,6 @@ private: String query; const Settings * settings; Block header; - Block metadata; bool finished = false; }; From 2c4949dd5d5b735574dac9f32843a341737e8920 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 16:47:13 +0300 Subject: [PATCH 22/88] fix build without CAPNP --- cmake/find_capnp.cmake | 4 ++++ dbms/programs/client/CMakeLists.txt | 2 +- dbms/programs/client/Client.cpp | 2 ++ dbms/programs/server/TCPHandler.cpp | 2 ++ dbms/src/Proto/CMakeLists.txt | 1 + dbms/src/Proto/protoHelpers.h | 3 +++ 6 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index 03ecadda6a1..426031db15e 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -1,5 +1,9 @@ option (ENABLE_CAPNP "Enable Cap'n Proto" ${NOT_MSVC}) +unset (USE_CAPNP CACHE) +unset (USE_INTERNAL_CAPNP_LIBRARY CACHE) +unset (MISSING_INTERNAL_CAPNP_LIBRARY CACHE) + if (ENABLE_CAPNP) # cmake 3.5.1 bug: # capnproto uses this cmake feature: diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index 0f84270c8c9..659e8018a49 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,5 +1,5 @@ add_library (clickhouse-client-lib Client.cpp) -target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_proto +target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions ${PROTO_LIB} ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 5ee25d38c33..74eff658133 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1082,7 +1082,9 @@ private: return false; case Protocol::Server::CapnProto: +#if USE_CAPNP loadTableMetadata(packet.block, table_meta); +#endif return receiveSampleBlock(out, table_meta); default: diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 9f8800ec1fe..c5800aa11df 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -312,6 +312,7 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) */ state.io.out->writePrefix(); +#if USE_CAPNP /// Send query metadata (column defaults) if (global_settings.insert_sample_with_metadata && query_context.getSettingsRef().insert_sample_with_metadata) @@ -319,6 +320,7 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) Block meta_block = storeContextBlock(query_context); sendMetadata(meta_block); } +#endif /// Send block to the client - table structure. Block block = state.io.out->getHeader(); diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt index b3871cd53e8..4ca555914f7 100644 --- a/dbms/src/Proto/CMakeLists.txt +++ b/dbms/src/Proto/CMakeLists.txt @@ -6,6 +6,7 @@ add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_PATH} ${CAPNP_BIN} compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp) +set (PROTO_LIB clickhouse_proto) add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) target_include_directories (clickhouse_proto PUBLIC ${CAPNP_INCLUDE_DIR} ${DBMS_INCLUDE_DIR}) diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index c0a514a5bb1..88e3c299fa2 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -1,4 +1,5 @@ #pragma once +#if USE_CAPNP namespace DB { @@ -9,3 +10,5 @@ namespace DB Block storeContextBlock(Context & context); void loadTableMetadata(const Block & block, TableMetadata & table_meta); } + +#endif From bc5d5bcf4a2a79e920b8aa238e2ada77a8af488e Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 18:10:42 +0300 Subject: [PATCH 23/88] fix build with CAPNP --- dbms/CMakeLists.txt | 1 + dbms/src/Proto/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index eaf21b0b6ac..3c7a7ac7198 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -208,6 +208,7 @@ if (USE_CAPNP) if (NOT USE_INTERNAL_CAPNP_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${CAPNP_INCLUDE_DIR}) endif () + set (PROTO_LIB clickhouse_proto) endif () if (USE_RDKAFKA) diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt index 4ca555914f7..b3871cd53e8 100644 --- a/dbms/src/Proto/CMakeLists.txt +++ b/dbms/src/Proto/CMakeLists.txt @@ -6,7 +6,6 @@ add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_PATH} ${CAPNP_BIN} compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp) -set (PROTO_LIB clickhouse_proto) add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) target_include_directories (clickhouse_proto PUBLIC ${CAPNP_INCLUDE_DIR} ${DBMS_INCLUDE_DIR}) From 1d7f6c32a06bf7dc042486ed92d50ed880a7bf88 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 18:32:20 +0300 Subject: [PATCH 24/88] one more build fix --- libs/libcommon/include/common/config_common.h.in | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index 0cc0950efba..bc24a0c6fe2 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -4,6 +4,7 @@ #cmakedefine01 USE_TCMALLOC #cmakedefine01 USE_JEMALLOC +#cmakedefine01 USE_CAPNP #cmakedefine01 USE_READLINE #cmakedefine01 USE_LIBEDIT #cmakedefine01 HAVE_READLINE_HISTORY From 53f1f4794dd440fcc842c0bc845460db732709da Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 13 Jul 2018 21:06:47 +0300 Subject: [PATCH 25/88] fix inserts into unknown table, ex. 'table function url(...)' --- dbms/src/Storages/TableMetadata.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/Storages/TableMetadata.cpp b/dbms/src/Storages/TableMetadata.cpp index e07c6a31e97..a489e5ba92e 100644 --- a/dbms/src/Storages/TableMetadata.cpp +++ b/dbms/src/Storages/TableMetadata.cpp @@ -7,6 +7,9 @@ namespace DB { void TableMetadata::loadFromContext(const Context & context) { + if (!context.isTableExist(database, table)) + return; + StoragePtr storage = context.getTable(database, table); const ColumnsDescription & table_columns = storage->getColumns(); column_defaults = table_columns.defaults; From 925e4c7dbb43845c4fad955f30bb01d2d2bde930 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 16 Jul 2018 14:28:22 +0300 Subject: [PATCH 26/88] backward compatibility --- dbms/cmake/version.cmake | 4 ++-- dbms/programs/server/TCPHandler.cpp | 3 ++- dbms/src/Core/Defines.h | 1 + dbms/src/Proto/protoHelpers.cpp | 8 ++++++-- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index bba2600d441..9d5cf8d5bf4 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,6 +1,6 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54394-testing) -set(VERSION_REVISION 54394) +set(VERSION_DESCRIBE v1.1.54400-testing) +set(VERSION_REVISION 54400) set(VERSION_GITHASH 875ea0f4eaa3592f1fe628b6a1150d91b04ad574) # end of autochange diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index c5800aa11df..e85c08a5574 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -314,7 +314,8 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) #if USE_CAPNP /// Send query metadata (column defaults) - if (global_settings.insert_sample_with_metadata && + if (client_revision >= DBMS_MIN_REVISION_WITH_PROTO_METADATA && + global_settings.insert_sample_with_metadata && query_context.getSettingsRef().insert_sample_with_metadata) { Block meta_block = storeContextBlock(query_context); diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index 2ed07dce9b6..35198c2cb4f 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -49,6 +49,7 @@ #define DBMS_MIN_REVISION_WITH_TABLES_STATUS 54226 #define DBMS_MIN_REVISION_WITH_TIME_ZONE_PARAMETER_IN_DATETIME_DATA_TYPE 54337 #define DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME 54372 +#define DBMS_MIN_REVISION_WITH_PROTO_METADATA 54400 /// Version of ClickHouse TCP protocol. Set to git tag with latest protocol change. #define DBMS_TCP_PROTOCOL_VERSION 54226 diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 329cd40b5f7..0d451625ea6 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -160,7 +160,11 @@ namespace DB void loadTableMetadata(const Block & block, TableMetadata & table_meta) { - const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); - loadTableMetadata(column, table_meta); + /// select metadata type by column name + if (block.has(contextColumnName())) + { + const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); + loadTableMetadata(column, table_meta); + } } } From 2cddb4b840553d439d0a394637fcfa5fabf3fd79 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 16 Jul 2018 17:52:02 +0300 Subject: [PATCH 27/88] send metadata only for affected table --- dbms/programs/server/TCPHandler.cpp | 12 +- dbms/src/Interpreters/Context.cpp | 15 +++ dbms/src/Interpreters/Context.h | 3 + .../Interpreters/InterpreterInsertQuery.cpp | 7 ++ .../src/Interpreters/InterpreterInsertQuery.h | 2 + dbms/src/Interpreters/executeQuery.cpp | 8 ++ dbms/src/Proto/protoHelpers.cpp | 108 +++++++----------- dbms/src/Proto/protoHelpers.h | 2 +- dbms/src/Storages/TableMetadata.cpp | 5 +- dbms/src/Storages/TableMetadata.h | 3 +- 10 files changed, 91 insertions(+), 74 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index e85c08a5574..096eea8512c 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -25,6 +25,7 @@ #include #include +#include #include @@ -313,13 +314,16 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) state.io.out->writePrefix(); #if USE_CAPNP - /// Send query metadata (column defaults) + /// Send table metadata (column defaults) if (client_revision >= DBMS_MIN_REVISION_WITH_PROTO_METADATA && - global_settings.insert_sample_with_metadata && query_context.getSettingsRef().insert_sample_with_metadata) { - Block meta_block = storeContextBlock(query_context); - sendMetadata(meta_block); + TableMetadata table_meta(query_context.getCurrentDatabase(), query_context.getCurrentTable()); + if (table_meta.loadFromContext(query_context) && table_meta.hasDefaults()) + { + Block meta_block = storeTableMetadata(table_meta); + sendMetadata(meta_block); + } } #endif diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 9fed370cfbc..4b48b6e233c 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1003,6 +1003,12 @@ String Context::getCurrentDatabase() const } +String Context::getCurrentTable() const +{ + return current_table; +} + + String Context::getCurrentQueryId() const { return client_info.current_query_id; @@ -1017,6 +1023,15 @@ void Context::setCurrentDatabase(const String & name) } +void Context::setCurrentTable(const String & database, const String & table) +{ + auto lock = getLock(); + assertTableExists(database, table); + current_database = database; + current_table = table; +} + + void Context::setCurrentQueryId(const String & query_id) { if (!client_info.current_query_id.empty()) diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 1c867d65e8f..4ab675dfaf2 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -104,6 +104,7 @@ private: std::shared_ptr quota; /// Current quota. By default - empty quota, that have no limits. String current_database; + String current_table; Settings settings; /// Setting for query execution. using ProgressCallback = std::function; ProgressCallback progress_callback; /// Callback for tracking progress of query execution. @@ -211,8 +212,10 @@ public: std::unique_ptr getDDLGuardIfTableDoesntExist(const String & database, const String & table, const String & message) const; String getCurrentDatabase() const; + String getCurrentTable() const; String getCurrentQueryId() const; void setCurrentDatabase(const String & name); + void setCurrentTable(const String & database, const String & table); void setCurrentQueryId(const String & query_id); String getDefaultFormat() const; /// If default_format is not specified, some global default format is returned. diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 86164ef2704..f11c12b67d9 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -153,4 +153,11 @@ void InterpreterInsertQuery::checkAccess(const ASTInsertQuery & query) throw Exception("Cannot insert into table in readonly mode", ErrorCodes::READONLY); } +void InterpreterInsertQuery::getDatabaseTable(String & database, String & table) const +{ + ASTInsertQuery & query = typeid_cast(*query_ptr); + database = query.database; + table = query.table; +} + } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.h b/dbms/src/Interpreters/InterpreterInsertQuery.h index 2180ebe0550..4ec7460fb75 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.h +++ b/dbms/src/Interpreters/InterpreterInsertQuery.h @@ -24,6 +24,8 @@ public: */ BlockIO execute() override; + void getDatabaseTable(String & database, String & table) const; + private: StoragePtr getTable(const ASTInsertQuery & query); Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index de36a84fd26..ebfc71ef537 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -200,6 +200,14 @@ static std::tuple executeQueryImpl( auto interpreter = InterpreterFactory::get(ast, context, stage); res = interpreter->execute(); + if (InterpreterInsertQuery * insertInterpreter = typeid_cast(&*interpreter)) + { + String database; + String table_name; + insertInterpreter->getDatabaseTable(database, table_name); + if (!database.empty()) + context.setCurrentTable(database, table_name); + } /// Delayed initialization of query streams (required for KILL QUERY purposes) if (process_list_entry) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 0d451625ea6..371f17d2d81 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -33,6 +33,7 @@ namespace DB return data; } + template typename T::Reader deserializeProto(const char * data, size_t data_size) { @@ -43,78 +44,45 @@ namespace DB return reader.getRoot(); } - static ColumnWithTypeAndName storeContext(const String & column_name, Context & context) + + static MutableColumnPtr storeTableMeta(const TableMetadata & meta) { capnp::MallocMessageBuilder message; Proto::Context::Builder proto_context = message.initRoot(); - Databases dbs = context.getDatabases(); - auto proto_databases = proto_context.initDatabases(dbs.size()); + auto proto_databases = proto_context.initDatabases(1); + auto proto_db = proto_databases[0]; + proto_db.setName(meta.database); - size_t db_nomber = 0; - for (auto & pr_db : dbs) + auto proto_db_tables = proto_db.initTables(1); + auto proto_table = proto_db_tables[0]; + proto_table.setName(meta.table); + + auto proto_columns = proto_table.initColumns(meta.column_defaults.size()); + + size_t column_no = 0; + for (const auto & pr_column : meta.column_defaults) { - const String & database_name = pr_db.first; - if (database_name == "system") - continue; + const String & column_name = pr_column.first; + const ColumnDefault & def = pr_column.second; + std::stringstream ss; + ss << def.expression; - IDatabase & db = *pr_db.second; + auto current_column = proto_columns[column_no]; + current_column.setName(column_name); + current_column.getDefault().setKind(static_cast(def.kind)); + current_column.getDefault().setExpression(ss.str()); - auto proto_db = proto_databases[db_nomber]; - proto_db.setName(database_name); - - std::unordered_map tables; - DatabaseIteratorPtr it_tables = db.getIterator(context); - while (it_tables->isValid()) - { - tables[it_tables->name()] = it_tables->table(); - it_tables->next(); - } - - auto proto_tables = proto_db.initTables(tables.size()); - size_t table_no = 0; - for (const auto & pr_table : tables) - { - auto current_table = proto_tables[table_no]; - current_table.setName(pr_table.first); - - const ColumnsDescription & columns = pr_table.second->getColumns(); - auto proto_columns = current_table.initColumns(columns.defaults.size()); - - size_t column_no = 0; - for (const auto& pr_column : columns.defaults) - { - const String & column_name = pr_column.first; - const ColumnDefault & def = pr_column.second; - std::stringstream ss; - ss << def.expression; - - auto current_column = proto_columns[column_no]; - current_column.setName(column_name); - current_column.getDefault().setKind(static_cast(def.kind)); - current_column.getDefault().setExpression(ss.str()); - - ++column_no; - } - - ++table_no; - } - - ++db_nomber; + ++column_no; } - ColumnWithTypeAndName proto_column; - proto_column.name = column_name; - proto_column.type = std::make_shared(); - proto_column.column = std::move(serializeProto(message)); - return proto_column; + return serializeProto(message); } - static void loadTableMetadata(const ColumnWithTypeAndName & proto_column, TableMetadata & table_meta) + + static void loadTableMeta(const char * data, size_t data_size, TableMetadata & table_meta) { - StringRef plain_data = proto_column.column->getDataAt(0); - size_t data_size = proto_column.column->byteSize(); - Proto::Context::Reader proto_context = deserializeProto(plain_data.data, data_size); + Proto::Context::Reader proto_context = deserializeProto(data, data_size); ParserTernaryOperatorExpression parser; @@ -146,25 +114,33 @@ namespace DB } } - static constexpr const char * contextColumnName() + + static constexpr const char * tableMetaColumnName() { - return "context"; + return "tableMeta"; } - Block storeContextBlock(Context & context) + + Block storeTableMetadata(const TableMetadata & table_meta) { + ColumnWithTypeAndName proto_column; + proto_column.name = tableMetaColumnName(); + proto_column.type = std::make_shared(); + proto_column.column = std::move(storeTableMeta(table_meta)); + Block block; - block.insert(storeContext(contextColumnName(), context)); + block.insert(std::move(proto_column)); return block; } + void loadTableMetadata(const Block & block, TableMetadata & table_meta) { /// select metadata type by column name - if (block.has(contextColumnName())) + if (block.has(tableMetaColumnName())) { - const ColumnWithTypeAndName & column = block.getByName(contextColumnName()); - loadTableMetadata(column, table_meta); + const ColumnWithTypeAndName & column = block.getByName(tableMetaColumnName()); + loadTableMeta(column.column->getDataAt(0).data, column.column->byteSize(), table_meta); } } } diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index 88e3c299fa2..2909ebc3e9d 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -7,7 +7,7 @@ namespace DB class Block; class TableMetadata; - Block storeContextBlock(Context & context); + Block storeTableMetadata(const TableMetadata & table_meta); void loadTableMetadata(const Block & block, TableMetadata & table_meta); } diff --git a/dbms/src/Storages/TableMetadata.cpp b/dbms/src/Storages/TableMetadata.cpp index a489e5ba92e..5549574275a 100644 --- a/dbms/src/Storages/TableMetadata.cpp +++ b/dbms/src/Storages/TableMetadata.cpp @@ -5,13 +5,14 @@ namespace DB { - void TableMetadata::loadFromContext(const Context & context) + bool TableMetadata::loadFromContext(const Context & context) { if (!context.isTableExist(database, table)) - return; + return false; StoragePtr storage = context.getTable(database, table); const ColumnsDescription & table_columns = storage->getColumns(); column_defaults = table_columns.defaults; + return true; } } diff --git a/dbms/src/Storages/TableMetadata.h b/dbms/src/Storages/TableMetadata.h index a88808ed96a..2194f2c8465 100644 --- a/dbms/src/Storages/TableMetadata.h +++ b/dbms/src/Storages/TableMetadata.h @@ -18,6 +18,7 @@ namespace DB const String & table; ColumnDefaults column_defaults; - void loadFromContext(const Context & context); + bool loadFromContext(const Context & context); + bool hasDefaults() const { return !column_defaults.empty(); } }; } From 2d3aa8cdbaf0a93c75632a2074cdffacc4d69ca7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 16 Jul 2018 19:41:15 +0300 Subject: [PATCH 28/88] Update TableMetadata.h --- dbms/src/Storages/TableMetadata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/TableMetadata.h b/dbms/src/Storages/TableMetadata.h index 2194f2c8465..ddb5b79b68b 100644 --- a/dbms/src/Storages/TableMetadata.h +++ b/dbms/src/Storages/TableMetadata.h @@ -7,7 +7,7 @@ namespace DB class Context; class Block; - /// Addition information for query that could not be get from sample block + /// Additional information for query that could not be get from sample block struct TableMetadata { TableMetadata(const String & database_, const String & table_) From 0463fb6dd316f1a3bbe6c0b67ea213338dced3cd Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 16 Jul 2018 19:48:56 +0300 Subject: [PATCH 29/88] Update Settings.h --- dbms/src/Interpreters/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 5d946a7d6cb..b07e9914f34 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -123,7 +123,7 @@ struct Settings M(SettingUInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.") \ \ M(SettingBool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be preformed") \ - M(SettingBool, insert_sample_with_metadata, true, "For INSERT queries, specifies that need add metadata before sample block") \ + M(SettingBool, insert_sample_with_metadata, true, "For INSERT queries, specifies that the server need to send metadata about column defaults to the client. This will be used to calculate default expressions.") \ \ M(SettingUInt64, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled.") \ M(SettingMilliseconds, insert_quorum_timeout, 600000, "") \ From 07a782e637249a760f15d73bfecb3329cde3f902 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 16 Jul 2018 22:28:07 +0300 Subject: [PATCH 30/88] fix wrong defaults at columns tail and some build fixes --- cmake/find_capnp.cmake | 4 ---- dbms/programs/client/Client.cpp | 1 + .../DataStreams/AddingDefaultsBlockInputStream.cpp | 2 +- .../Formats/BlockInputStreamFromRowInputStream.cpp | 11 ++--------- libs/libcommon/include/common/config_common.h.in | 1 - 5 files changed, 4 insertions(+), 15 deletions(-) diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index 426031db15e..03ecadda6a1 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -1,9 +1,5 @@ option (ENABLE_CAPNP "Enable Cap'n Proto" ${NOT_MSVC}) -unset (USE_CAPNP CACHE) -unset (USE_INTERNAL_CAPNP_LIBRARY CACHE) -unset (MISSING_INTERNAL_CAPNP_LIBRARY CACHE) - if (ENABLE_CAPNP) # cmake 3.5.1 bug: # capnproto uses this cmake feature: diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 74eff658133..51ba397a000 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 7cc90f6a641..63d18cd0285 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -74,7 +74,7 @@ Block AddingDefaultsBlockInputStream::readImpl() for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) { - if (defaults_mask[row_idx]) + if (row_idx < defaults_mask.size() && defaults_mask[row_idx]) { if (column_def.column->isColumnConst()) column_mixed->insert((*column_def.column)[row_idx]); diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index aa4c2968539..f3ec72a4a6c 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -16,7 +16,6 @@ namespace ErrorCodes extern const int CANNOT_PARSE_NUMBER; extern const int CANNOT_PARSE_UUID; extern const int TOO_LARGE_STRING_SIZE; - extern const int INCORRECT_NUMBER_OF_COLUMNS; } @@ -62,14 +61,8 @@ Block BlockInputStreamFromRowInputStream::readImpl() break; for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) - { - if (!info.read_columns[column_idx]) { - size_t column_size = columns[column_idx]->size(); - if (column_size == 0) - throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); - delayed_defaults.setBit(column_idx, column_size - 1); - } - } + if (!info.read_columns[column_idx]) + delayed_defaults.setBit(column_idx, rows); } catch (Exception & e) { diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index bc24a0c6fe2..0cc0950efba 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -4,7 +4,6 @@ #cmakedefine01 USE_TCMALLOC #cmakedefine01 USE_JEMALLOC -#cmakedefine01 USE_CAPNP #cmakedefine01 USE_READLINE #cmakedefine01 USE_LIBEDIT #cmakedefine01 HAVE_READLINE_HISTORY From c7d9314189d7449641abacb3fef6bae8fff88d3b Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 15:18:20 +0300 Subject: [PATCH 31/88] allow build with system capnp --- cmake/find_capnp.cmake | 3 +++ dbms/src/Proto/CMakeLists.txt | 7 ++----- utils/build/build_debian_unbundled.sh | 2 +- utils/travis/normal.sh | 2 +- utils/travis/pbuilder.sh | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index 03ecadda6a1..b72746b436a 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -25,6 +25,7 @@ if (ENABLE_CAPNP) if (NOT USE_INTERNAL_CAPNP_LIBRARY) set (CAPNP_PATHS "/usr/local/opt/capnp/lib") + set (CAPNP_BIN_PATH "/usr/bin:/usr/local/bin") set (CAPNP_INCLUDE_PATHS "/usr/local/opt/capnp/include") find_library (CAPNP capnp PATHS ${CAPNP_PATHS}) find_library (CAPNPC capnpc PATHS ${CAPNP_PATHS}) @@ -40,6 +41,8 @@ if (ENABLE_CAPNP) set (CAPNP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/capnproto/c++/src") set (CAPNP_LIBRARY capnpc) set (USE_CAPNP 1) + set (CAPNP_BIN_PATH ${ClickHouse_BINARY_DIR}/contrib/capnproto/c++/src/capnp) + set (CAPNP_BIN_TARGET capnp_tool) endif () endif () diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt index b3871cd53e8..1f5d07089af 100644 --- a/dbms/src/Proto/CMakeLists.txt +++ b/dbms/src/Proto/CMakeLists.txt @@ -1,10 +1,7 @@ -set (CAPNP_PATH ${CMAKE_BINARY_DIR}/contrib/capnproto/c++/src/capnp) -set (CAPNP_BIN ${CAPNP_PATH}/capnp) - add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CMAKE_CURRENT_BINARY_DIR}/ServerMessage.capnp - COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_PATH} ${CAPNP_BIN} compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp) + COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_BIN_PATH} capnp compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CAPNP_BIN_TARGET}) add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index dc47c8fc3a3..53b7a12a239 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc gdb clang-6.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc gdb clang-6.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev capnproto libcapnp-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index b361744a3ec..f2e935f9bfa 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -32,7 +32,7 @@ cmake $CUR_DIR/../.. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPIL `# Use all possible contrib libs from system` \ -DUNBUNDLED=1 \ `# Disable all features` \ - -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ + -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ && ninja clickhouse-bundle \ `# Skip tests:` \ `# 00281 requires internal compiler` \ diff --git a/utils/travis/pbuilder.sh b/utils/travis/pbuilder.sh index 796dcf3e8d9..a1487ba2783 100755 --- a/utils/travis/pbuilder.sh +++ b/utils/travis/pbuilder.sh @@ -24,10 +24,10 @@ env TEST_RUN=${TEST_RUN=1} \ DEB_CC=${DEB_CC=$CC} DEB_CXX=${DEB_CXX=$CXX} \ CCACHE_SIZE=${CCACHE_SIZE:=4G} \ `# Disable all features` \ - CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ + CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_CAPNP_LIBRARY=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ - EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev libcapnp-dev capnproto librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ `# Travis trusty cant unpack bionic: E: debootstrap failed, TODO: check again, can be fixed` \ DIST=${DIST=artful} \ $CUR_DIR/../../release $RELEASE_OPT From 2edda7dee56111b671f03d4744cb23a091ca22e7 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 16:55:55 +0300 Subject: [PATCH 32/88] try fix Travis build (use internal capnp) --- utils/travis/pbuilder.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/travis/pbuilder.sh b/utils/travis/pbuilder.sh index a1487ba2783..7257e597f1a 100755 --- a/utils/travis/pbuilder.sh +++ b/utils/travis/pbuilder.sh @@ -24,10 +24,10 @@ env TEST_RUN=${TEST_RUN=1} \ DEB_CC=${DEB_CC=$CC} DEB_CXX=${DEB_CXX=$CXX} \ CCACHE_SIZE=${CCACHE_SIZE:=4G} \ `# Disable all features` \ - CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_CAPNP_LIBRARY=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ + CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ - EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev libcapnp-dev capnproto librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ `# Travis trusty cant unpack bionic: E: debootstrap failed, TODO: check again, can be fixed` \ DIST=${DIST=artful} \ $CUR_DIR/../../release $RELEASE_OPT From f12edacbcf79b899bbf781c42a7dc2855e2f6665 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 19:22:31 +0300 Subject: [PATCH 33/88] Travis build with internal capnp (normal) --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 705b6977114..401c8d96856 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ matrix: # update: true # sources: # - ubuntu-toolchain-r-test -# packages: [ g++-7, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl ] +# packages: [ g++-7, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl ] # # env: # - MATRIX_EVAL="export CC=gcc-7 && export CXX=g++-7" @@ -38,7 +38,7 @@ matrix: sources: - ubuntu-toolchain-r-test - llvm-toolchain-trusty-5.0 - packages: [ ninja-build, g++-7, clang-5.0, lld-5.0, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl] + packages: [ ninja-build, g++-7, clang-5.0, lld-5.0, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl] env: - MATRIX_EVAL="export CC=clang-5.0 && export CXX=clang++-5.0" From cbf1c220ae8bd17c40054111cdafd4e1f5dde0f7 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 21:18:46 +0300 Subject: [PATCH 34/88] better proto deserialize --- dbms/src/Proto/protoHelpers.cpp | 38 ++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index 371f17d2d81..c898d182df8 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -19,6 +19,13 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + static MutableColumnPtr serializeProto(capnp::MessageBuilder & message) { MutableColumnPtr data = DataTypeUInt8().createColumn(); @@ -34,19 +41,29 @@ namespace DB } + /// template - typename T::Reader deserializeProto(const char * data, size_t data_size) + class ProtoDeserializer { - const capnp::word * ptr = reinterpret_cast(data); - auto serialized = kj::arrayPtr(ptr, data_size / sizeof(capnp::word)); + public: + ProtoDeserializer(const char * data, size_t data_size) + : serialized(kj::arrayPtr(reinterpret_cast(data), data_size / sizeof(capnp::word))), + reader(serialized) + {} - capnp::FlatArrayMessageReader reader(serialized); - return reader.getRoot(); - } + typename T::Reader getReader() { return reader.getRoot(); } + + private: + kj::ArrayPtr serialized; + capnp::FlatArrayMessageReader reader; + }; static MutableColumnPtr storeTableMeta(const TableMetadata & meta) { + if (meta.database.empty() || meta.table.empty()) + throw Exception("storeTableMeta: table is not set", ErrorCodes::LOGICAL_ERROR); + capnp::MallocMessageBuilder message; Proto::Context::Builder proto_context = message.initRoot(); @@ -82,7 +99,11 @@ namespace DB static void loadTableMeta(const char * data, size_t data_size, TableMetadata & table_meta) { - Proto::Context::Reader proto_context = deserializeProto(data, data_size); + if (data == nullptr || data_size == 0) + throw Exception("loadTableMeta: empty metadata column", ErrorCodes::LOGICAL_ERROR); + + ProtoDeserializer deserializer(data, data_size); + Proto::Context::Reader proto_context = deserializer.getReader(); ParserTernaryOperatorExpression parser; @@ -140,7 +161,8 @@ namespace DB if (block.has(tableMetaColumnName())) { const ColumnWithTypeAndName & column = block.getByName(tableMetaColumnName()); - loadTableMeta(column.column->getDataAt(0).data, column.column->byteSize(), table_meta); + StringRef raw_data = column.column->getRawData(); + loadTableMeta(raw_data.data, raw_data.size, table_meta); } } } From 955293cea78736390501b46cdfa5412f8af5a87c Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 21:42:05 +0300 Subject: [PATCH 35/88] some more build improvements --- cmake/find_capnp.cmake | 2 +- dbms/src/Proto/CMakeLists.txt | 2 +- dbms/src/Proto/protoHelpers.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index b72746b436a..abe46316676 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -42,7 +42,7 @@ if (ENABLE_CAPNP) set (CAPNP_LIBRARY capnpc) set (USE_CAPNP 1) set (CAPNP_BIN_PATH ${ClickHouse_BINARY_DIR}/contrib/capnproto/c++/src/capnp) - set (CAPNP_BIN_TARGET capnp_tool) + set (CAPNP_BIN_TARGETS capnp_tool capnpc_cpp capnpc_capnp) endif () endif () diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt index 1f5d07089af..1e150d9f86b 100644 --- a/dbms/src/Proto/CMakeLists.txt +++ b/dbms/src/Proto/CMakeLists.txt @@ -1,7 +1,7 @@ add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CMAKE_CURRENT_BINARY_DIR}/ServerMessage.capnp COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_BIN_PATH} capnp compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CAPNP_BIN_TARGET}) + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CAPNP_BIN_TARGETS}) add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h index 2909ebc3e9d..7cef5a28013 100644 --- a/dbms/src/Proto/protoHelpers.h +++ b/dbms/src/Proto/protoHelpers.h @@ -5,7 +5,7 @@ namespace DB { class Context; class Block; - class TableMetadata; + struct TableMetadata; Block storeTableMetadata(const TableMetadata & table_meta); void loadTableMetadata(const Block & block, TableMetadata & table_meta); From 3d133a6a5a861161a4dde3b7b76f6ae39f9a9781 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Jul 2018 23:02:37 +0300 Subject: [PATCH 36/88] save block structure when add defaults --- .../AddingDefaultsBlockInputStream.cpp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 63d18cd0285..ace3841e67b 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -51,8 +51,7 @@ Block AddingDefaultsBlockInputStream::readImpl() evaluateMissingDefaultsUnsafe(evaluate_block, header.getNamesAndTypesList(), column_defaults, context); - ColumnsWithTypeAndName mixed_columns; - mixed_columns.reserve(std::min(column_defaults.size(), delayed_defaults.size())); + std::unordered_map mixed_columns; for (const ColumnWithTypeAndName & column_def : evaluate_block) { @@ -85,16 +84,21 @@ Block AddingDefaultsBlockInputStream::readImpl() column_mixed->insertFrom(*column_read.column, row_idx); } - ColumnWithTypeAndName mix = column_read.cloneEmpty(); - mix.column = std::move(column_mixed); - mixed_columns.emplace_back(std::move(mix)); + mixed_columns.emplace(std::make_pair(block_column_position, std::move(column_mixed))); } } - for (auto & column : mixed_columns) + if (!mixed_columns.empty()) { - res.erase(column.name); - res.insert(std::move(column)); + /// replace columns saving block structure + MutableColumns mutation = res.mutateColumns(); + for (size_t position = 0; position < mutation.size(); ++position) + { + auto it = mixed_columns.find(position); + if (it != mixed_columns.end()) + mutation[position] = std::move(it->second); + } + res.setColumns(std::move(mutation)); } return res; From af4cceb87789a9b94265a154fa9490e08ddfe649 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 12 Nov 2018 18:25:21 +0300 Subject: [PATCH 37/88] make branch up to date --- dbms/src/Interpreters/evaluateMissingDefaults.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index b0280fe9ae7..8d84c8f6576 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -90,7 +90,8 @@ void evaluateMissingDefaultsUnsafe(Block & block, for (size_t i = 0, size = block.columns(); i < size; ++i) available_columns.emplace_back(block.getByPosition(i).name, block.getByPosition(i).type); - ExpressionAnalyzer{default_expr_list, context, {}, available_columns}.getActions(true)->execute(block); + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, available_columns); + ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); } } From c1518da50f1e7620a49b9c7494f0fccc614e85e6 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 12 Nov 2018 20:29:02 +0300 Subject: [PATCH 38/88] fix build with CAPNP --- dbms/programs/server/TCPHandler.cpp | 2 +- dbms/src/Proto/protoHelpers.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index e6347342a42..f06fd070d98 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -30,8 +30,8 @@ #include #include #include - #include +#include #include "TCPHandler.h" diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp index c898d182df8..cc91cbc5905 100644 --- a/dbms/src/Proto/protoHelpers.cpp +++ b/dbms/src/Proto/protoHelpers.cpp @@ -147,7 +147,7 @@ namespace ErrorCodes ColumnWithTypeAndName proto_column; proto_column.name = tableMetaColumnName(); proto_column.type = std::make_shared(); - proto_column.column = std::move(storeTableMeta(table_meta)); + proto_column.column = storeTableMeta(table_meta); Block block; block.insert(std::move(proto_column)); From 9aae1d0d8fc5211ac903464676c043db99414d79 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 12 Nov 2018 21:34:43 +0300 Subject: [PATCH 39/88] fix branches merge mistake --- dbms/programs/client/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 7642ca54787..c976d50bad8 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1129,7 +1129,7 @@ private: #if USE_CAPNP loadTableMetadata(packet.block, table_meta); #endif - return receiveSampleBlock(packet.block, table_meta); + return receiveSampleBlock(out, table_meta); case Protocol::Server::Exception: onException(*packet.exception); From 9d3325f56e09a420f3600568bed962339ce5280e Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 13 Nov 2018 16:36:53 +0300 Subject: [PATCH 40/88] one more fix for bad merge of branches --- .../BlockInputStreamFromRowInputStream.cpp | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 894c597ef14..4a23a594876 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes extern const int TOO_LARGE_STRING_SIZE; extern const int CANNOT_READ_ALL_DATA; extern const int INCORRECT_DATA; + extern const int INCORRECT_NUMBER_OF_COLUMNS; } @@ -52,6 +53,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() { size_t num_columns = sample.columns(); MutableColumns columns = sample.cloneEmptyColumns(); + BlockDelayedDefaults delayed_defaults; try { @@ -60,8 +62,19 @@ Block BlockInputStreamFromRowInputStream::readImpl() try { ++total_rows; - if (!row_input->read(columns)) + RowReadExtention info; + if (!row_input->extendedRead(columns, info)) break; + + for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) + { + if (!info.read_columns[column_idx]) { + size_t column_size = columns[column_idx]->size(); + if (column_size == 0) + throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + delayed_defaults.setBit(column_idx, column_size - 1); + } + } } catch (Exception & e) { @@ -130,7 +143,10 @@ Block BlockInputStreamFromRowInputStream::readImpl() if (columns.empty() || columns[0]->empty()) return {}; - return sample.cloneWithColumns(std::move(columns)); + auto out_block = sample.cloneWithColumns(std::move(columns)); + if (!delayed_defaults.empty()) + out_block.delayed_defaults = std::move(delayed_defaults); + return out_block; } From ebf3d6018683dcc1cffd1b243881685df69a04e1 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 14 Nov 2018 18:23:00 +0300 Subject: [PATCH 41/88] column defaults without CapNProto [CLICKHOUSE-3578] --- cmake/find_capnp.cmake | 3 - dbms/CMakeLists.txt | 1 - dbms/programs/client/CMakeLists.txt | 6 +- dbms/programs/client/Client.cpp | 27 +-- dbms/programs/server/TCPHandler.cpp | 35 +--- dbms/programs/server/TCPHandler.h | 1 - dbms/src/Client/Connection.cpp | 1 - dbms/src/Core/Block.cpp | 7 + dbms/src/Core/Block.h | 3 + dbms/src/Core/Defines.h | 2 +- dbms/src/Core/Protocol.h | 20 +-- .../InputStreamFromASTInsertQuery.cpp | 10 +- .../BlockInputStreamFromRowInputStream.cpp | 3 +- dbms/src/Interpreters/Settings.h | 2 +- dbms/src/Proto/CMakeLists.txt | 10 -- dbms/src/Proto/ServerMessage.capnp | 34 ---- dbms/src/Proto/protoHelpers.cpp | 168 ------------------ dbms/src/Proto/protoHelpers.h | 14 -- dbms/src/Storages/ColumnDefault.cpp | 82 ++++++++- dbms/src/Storages/ColumnDefault.h | 12 ++ dbms/src/Storages/TableMetadata.cpp | 18 -- dbms/src/Storages/TableMetadata.h | 24 --- .../00760_insert_json_with_defaults.sql | 2 + utils/build/build_debian_unbundled.sh | 2 +- utils/travis/normal.sh | 2 +- utils/travis/pbuilder.sh | 2 +- 26 files changed, 139 insertions(+), 352 deletions(-) delete mode 100644 dbms/src/Proto/CMakeLists.txt delete mode 100644 dbms/src/Proto/ServerMessage.capnp delete mode 100644 dbms/src/Proto/protoHelpers.cpp delete mode 100644 dbms/src/Proto/protoHelpers.h delete mode 100644 dbms/src/Storages/TableMetadata.cpp delete mode 100644 dbms/src/Storages/TableMetadata.h diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index a54e6c0413f..ec591afdc38 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -25,7 +25,6 @@ if (ENABLE_CAPNP) if (NOT USE_INTERNAL_CAPNP_LIBRARY) set (CAPNP_PATHS "/usr/local/opt/capnp/lib") - set (CAPNP_BIN_PATH "/usr/bin:/usr/local/bin") set (CAPNP_INCLUDE_PATHS "/usr/local/opt/capnp/include") find_library (CAPNP capnp PATHS ${CAPNP_PATHS}) find_library (CAPNPC capnpc PATHS ${CAPNP_PATHS}) @@ -41,8 +40,6 @@ if (ENABLE_CAPNP) set (CAPNP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/capnproto/c++/src") set (CAPNP_LIBRARY capnpc) set (USE_CAPNP 1) - set (CAPNP_BIN_PATH ${ClickHouse_BINARY_DIR}/contrib/capnproto/c++/src/capnp) - set (CAPNP_BIN_TARGETS capnp_tool capnpc_cpp capnpc_capnp) endif () endif () diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 7dd9df62e10..dd8437d1e52 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -238,7 +238,6 @@ if (USE_CAPNP) if (NOT USE_INTERNAL_CAPNP_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${CAPNP_INCLUDE_DIR}) endif () - set (PROTO_LIB clickhouse_proto) endif () if (USE_RDKAFKA) diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index 5e99f97106b..65353094c26 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,12 +1,8 @@ add_library (clickhouse-client-lib ${LINK_MODE} Client.cpp) -target_link_libraries (clickhouse-client-lib clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions - ${PROTO_LIB} ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-client-lib clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (READLINE_INCLUDE_DIR) target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) endif () -if (TARGET clickhouse_proto) - target_link_libraries (clickhouse-client-lib clickhouse_proto) -endif() if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-client clickhouse-client.cpp) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index c976d50bad8..e8f4e1f74b9 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -56,13 +56,11 @@ #include #include #include -#include #include #include #include #include -#include -#include +#include #if USE_READLINE #include "Suggest.h" // Y_IGNORE @@ -895,12 +893,11 @@ private: /// Receive description of table structure. Block sample; - TableMetadata table_meta(parsed_insert_query.database, parsed_insert_query.table); - if (receiveSampleBlock(sample, table_meta)) + if (receiveSampleBlock(sample)) { /// If structure was received (thus, server has not thrown an exception), /// send our data with that structure. - sendData(sample, table_meta); + sendData(sample); receiveEndOfQuery(); } } @@ -938,7 +935,7 @@ private: } - void sendData(Block & sample, const TableMetadata & table_meta) + void sendData(Block & sample) { /// If INSERT data must be sent. const ASTInsertQuery * parsed_insert_query = typeid_cast(&*parsed_query); @@ -949,19 +946,19 @@ private: { /// Send data contained in the query. ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); - sendDataFrom(data_in, sample, table_meta); + sendDataFrom(data_in, sample); } else if (!is_interactive) { /// Send data read from stdin. - sendDataFrom(std_in, sample, table_meta); + sendDataFrom(std_in, sample); } else throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); } - void sendDataFrom(ReadBuffer & buf, Block & sample, const TableMetadata & table_meta) + void sendDataFrom(ReadBuffer & buf, Block & sample) { String current_format = insert_format; @@ -973,7 +970,7 @@ private: BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - const ColumnDefaults & column_defaults = table_meta.column_defaults; + auto column_defaults = ColumnDefaultsHelper::extract(sample); if (!column_defaults.empty()) block_input = std::make_shared(block_input, column_defaults, context); BlockInputStreamPtr async_block_input = std::make_shared(block_input); @@ -1113,7 +1110,7 @@ private: /// Receive the block that serves as an example of the structure of table where data will be inserted. - bool receiveSampleBlock(Block & out, TableMetadata & table_meta) + bool receiveSampleBlock(Block & out) { while (true) { @@ -1125,12 +1122,6 @@ private: out = packet.block; return true; - case Protocol::Server::CapnProto: -#if USE_CAPNP - loadTableMetadata(packet.block, table_meta); -#endif - return receiveSampleBlock(out, table_meta); - case Protocol::Server::Exception: onException(*packet.exception); last_exception = std::move(packet.exception); diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index f06fd070d98..b66c1e8bb5d 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -30,8 +30,7 @@ #include #include #include -#include -#include +#include #include "TCPHandler.h" @@ -361,22 +360,13 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) */ state.io.out->writePrefix(); -#if USE_CAPNP - /// Send table metadata (column defaults) - if (client_revision >= DBMS_MIN_REVISION_WITH_PROTO_METADATA && - query_context.getSettingsRef().insert_sample_with_metadata) - { - TableMetadata table_meta(query_context.getCurrentDatabase(), query_context.getCurrentTable()); - if (table_meta.loadFromContext(query_context) && table_meta.hasDefaults()) - { - Block meta_block = storeTableMetadata(table_meta); - sendMetadata(meta_block); - } - } -#endif - /// Send block to the client - table structure. Block block = state.io.out->getHeader(); + + /// attach table metadata (column defaults) + if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA) + ColumnDefaultsHelper::attachFromContext(query_context, block); + sendData(block); readData(global_settings); @@ -860,19 +850,6 @@ void TCPHandler::sendLogData(const Block & block) } -void TCPHandler::sendMetadata(const Block & block) -{ - initBlockOutput(block); - - writeVarUInt(Protocol::Server::CapnProto, *out); - writeStringBinary("", *out); - - state.block_out->write(block); - state.maybe_compressed_out->next(); - out->next(); -} - - void TCPHandler::sendException(const Exception & e, bool with_stack_trace) { writeVarUInt(Protocol::Server::Exception, *out); diff --git a/dbms/programs/server/TCPHandler.h b/dbms/programs/server/TCPHandler.h index 22177edf77a..af422921f07 100644 --- a/dbms/programs/server/TCPHandler.h +++ b/dbms/programs/server/TCPHandler.h @@ -144,7 +144,6 @@ private: void sendHello(); void sendData(const Block & block); /// Write a block to the network. void sendLogData(const Block & block); - void sendMetadata(const Block & block); void sendException(const Exception & e, bool with_stack_trace); void sendProgress(); void sendLogs(); diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 2f23525f677..ce6246fba3a 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -575,7 +575,6 @@ Connection::Packet Connection::receivePacket() switch (res.type) { case Protocol::Server::Data: - case Protocol::Server::CapnProto: res.block = receiveData(); return res; diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 716de954974..de0e831f37c 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -99,6 +99,13 @@ void Block::insertUnique(ColumnWithTypeAndName && elem) } +void Block::erase(const std::set & positions) +{ + for (auto it = positions.rbegin(); it != positions.rend(); ++it) + erase(*it); +} + + void Block::erase(size_t position) { if (data.empty()) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 0d6eeae9278..2ee6fc4a78f 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -54,6 +55,8 @@ public: void insertUnique(ColumnWithTypeAndName && elem); /// remove the column at the specified position void erase(size_t position); + /// remove the columns at the specified positions + void erase(const std::set & positions); /// remove the column with the specified name void erase(const String & name); diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index 2b3650ea975..264d1d95147 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -51,7 +51,7 @@ /// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules /// (keys will be placed in different buckets and result will not be fully aggregated). #define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54408 -#define DBMS_MIN_REVISION_WITH_PROTO_METADATA 54410 +#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410 /// Version of ClickHouse TCP protocol. Set to git tag with latest protocol change. #define DBMS_TCP_PROTOCOL_VERSION 54226 diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index c925886db20..27df4341de9 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -69,8 +69,7 @@ namespace Protocol Totals = 7, /// A block with totals (compressed or not). Extremes = 8, /// A block with minimums and maximums (compressed or not). TablesStatusResponse = 9, /// A response to TablesStatus request. - Log = 10, /// System logs of the query execution - CapnProto = 11, /// Cap'n Proto + Log = 10 /// System logs of the query execution }; /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 @@ -79,9 +78,8 @@ namespace Protocol /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values inline const char * toString(UInt64 packet) { - static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", - "Extremes", "TablesStatusResponse", "Log", "CapnProto" }; - return packet < 12 + static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", "Extremes", "TablesStatusResponse", "Log" }; + return packet < 11 ? data[packet] : "Unknown packet"; } @@ -100,17 +98,15 @@ namespace Protocol Cancel = 3, /// Cancel the query execution. Ping = 4, /// Check that connection to the server is alive. TablesStatusRequest = 5, /// Check status of tables on the server. - KeepAlive = 6, /// Keep the connection alive - /// - CapnProto = 11, /// Cap'n Proto + KeepAlive = 6 /// Keep the connection alive }; inline const char * toString(UInt64 packet) { - static const char * unknown = "Unknown packet"; - static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest", "KeepAlive", - unknown, unknown, unknown, unknown, "CapnProto" }; - return (packet < 12) ? data[packet] : unknown; + static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest", "KeepAlive" }; + return packet < 7 + ? data[packet] + : "Unknown packet"; } } diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 855840a15ee..c4f63d3bd22 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { @@ -46,11 +46,9 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); - TableMetadata table_meta(ast_insert_query->database, ast_insert_query->table); - table_meta.loadFromContext(context); - - if (!table_meta.column_defaults.empty()) - res_stream = std::make_shared(res_stream, table_meta.column_defaults, context); + auto column_defaults = ColumnDefaultsHelper::loadFromContext(context, ast_insert_query->database, ast_insert_query->table); + if (column_defaults) + res_stream = std::make_shared(res_stream, *column_defaults, context); } } diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 4a23a594876..5670830136b 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -68,7 +68,8 @@ Block BlockInputStreamFromRowInputStream::readImpl() for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) { - if (!info.read_columns[column_idx]) { + if (!info.read_columns[column_idx]) + { size_t column_size = columns[column_idx]->size(); if (column_size == 0) throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index ccd5b7ffb93..428f92b6a9d 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -125,7 +125,7 @@ struct Settings M(SettingUInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.") \ \ M(SettingBool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be preformed") \ - M(SettingBool, insert_sample_with_metadata, true, "For INSERT queries, specifies that the server need to send metadata about column defaults to the client. This will be used to calculate default expressions.") \ + M(SettingBool, insert_sample_with_metadata, false, "For INSERT queries, specifies that the server need to send metadata about column defaults to the client. This will be used to calculate default expressions.") \ \ M(SettingUInt64, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled.") \ M(SettingMilliseconds, insert_quorum_timeout, 600000, "") \ diff --git a/dbms/src/Proto/CMakeLists.txt b/dbms/src/Proto/CMakeLists.txt deleted file mode 100644 index 1e150d9f86b..00000000000 --- a/dbms/src/Proto/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_custom_command (OUTPUT ServerMessage.capnp.c++ ServerMessage.capnp.h - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CMAKE_CURRENT_BINARY_DIR}/ServerMessage.capnp - COMMAND ${CMAKE_COMMAND} -E env PATH=${CAPNP_BIN_PATH} capnp compile -I ${CAPNP_INCLUDE_DIR} -oc++ ServerMessage.capnp - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ServerMessage.capnp ${CAPNP_BIN_TARGETS}) - -add_library (clickhouse_proto ServerMessage.capnp.c++ protoHelpers.cpp) -target_link_libraries (clickhouse_proto clickhouse_common_io ${CAPNP_LIBRARY}) -target_include_directories (clickhouse_proto PUBLIC ${CAPNP_INCLUDE_DIR} ${DBMS_INCLUDE_DIR}) -target_include_directories (clickhouse_proto PRIVATE - ${CMAKE_CURRENT_BINARY_DIR} ${COMMON_INCLUDE_DIR} ${DBMS_INCLUDE_DIR} ${CITYHASH_CONTRIB_INCLUDE_DIR}) diff --git a/dbms/src/Proto/ServerMessage.capnp b/dbms/src/Proto/ServerMessage.capnp deleted file mode 100644 index 317430fce91..00000000000 --- a/dbms/src/Proto/ServerMessage.capnp +++ /dev/null @@ -1,34 +0,0 @@ -@0xfdd1e2948338b156; - -using Cxx = import "/capnp/c++.capnp"; -$Cxx.namespace("DB::Proto"); - -struct ColumnDefault -{ - kind @0 :UInt16; - expression @1 :Text; -} - -struct Column -{ - name @0 :Text; - type @1 :Text; - default @2 :ColumnDefault; -} - -struct Table -{ - name @0 :Text; - columns @1 :List(Column); -} - -struct Database -{ - name @0 :Text; - tables @1 :List(Table); -} - -struct Context -{ - databases @0 :List(Database); -} diff --git a/dbms/src/Proto/protoHelpers.cpp b/dbms/src/Proto/protoHelpers.cpp deleted file mode 100644 index cc91cbc5905..00000000000 --- a/dbms/src/Proto/protoHelpers.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include "protoHelpers.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -/// @sa https://capnproto.org/cxx.html - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - - - static MutableColumnPtr serializeProto(capnp::MessageBuilder & message) - { - MutableColumnPtr data = DataTypeUInt8().createColumn(); - - kj::Array serialized = messageToFlatArray(message); - kj::ArrayPtr bytes = serialized.asChars(); - - data->reserve(bytes.size()); - for (size_t i = 0 ; i < bytes.size(); ++i) - data->insertData(&bytes[i], 1); - - return data; - } - - - /// - template - class ProtoDeserializer - { - public: - ProtoDeserializer(const char * data, size_t data_size) - : serialized(kj::arrayPtr(reinterpret_cast(data), data_size / sizeof(capnp::word))), - reader(serialized) - {} - - typename T::Reader getReader() { return reader.getRoot(); } - - private: - kj::ArrayPtr serialized; - capnp::FlatArrayMessageReader reader; - }; - - - static MutableColumnPtr storeTableMeta(const TableMetadata & meta) - { - if (meta.database.empty() || meta.table.empty()) - throw Exception("storeTableMeta: table is not set", ErrorCodes::LOGICAL_ERROR); - - capnp::MallocMessageBuilder message; - Proto::Context::Builder proto_context = message.initRoot(); - - auto proto_databases = proto_context.initDatabases(1); - auto proto_db = proto_databases[0]; - proto_db.setName(meta.database); - - auto proto_db_tables = proto_db.initTables(1); - auto proto_table = proto_db_tables[0]; - proto_table.setName(meta.table); - - auto proto_columns = proto_table.initColumns(meta.column_defaults.size()); - - size_t column_no = 0; - for (const auto & pr_column : meta.column_defaults) - { - const String & column_name = pr_column.first; - const ColumnDefault & def = pr_column.second; - std::stringstream ss; - ss << def.expression; - - auto current_column = proto_columns[column_no]; - current_column.setName(column_name); - current_column.getDefault().setKind(static_cast(def.kind)); - current_column.getDefault().setExpression(ss.str()); - - ++column_no; - } - - return serializeProto(message); - } - - - static void loadTableMeta(const char * data, size_t data_size, TableMetadata & table_meta) - { - if (data == nullptr || data_size == 0) - throw Exception("loadTableMeta: empty metadata column", ErrorCodes::LOGICAL_ERROR); - - ProtoDeserializer deserializer(data, data_size); - Proto::Context::Reader proto_context = deserializer.getReader(); - - ParserTernaryOperatorExpression parser; - - for (auto proto_database : proto_context.getDatabases()) - { - const String & database_name = proto_database.getName().cStr(); - if (database_name != table_meta.database) - continue; - - for (auto proto_table : proto_database.getTables()) - { - String table_name = proto_table.getName().cStr(); - if (table_name != table_meta.table) - continue; - - for (auto column : proto_table.getColumns()) - { - String column_name = column.getName().cStr(); - String expression = column.getDefault().getExpression().cStr(); - ColumnDefaultKind expression_kind = static_cast(column.getDefault().getKind()); - - if (expression_kind == ColumnDefaultKind::Default) - { - ASTPtr ast = parseQuery(parser, expression, expression.size()); - table_meta.column_defaults.emplace(column_name, ColumnDefault{expression_kind, ast}); - } - } - } - } - } - - - static constexpr const char * tableMetaColumnName() - { - return "tableMeta"; - } - - - Block storeTableMetadata(const TableMetadata & table_meta) - { - ColumnWithTypeAndName proto_column; - proto_column.name = tableMetaColumnName(); - proto_column.type = std::make_shared(); - proto_column.column = storeTableMeta(table_meta); - - Block block; - block.insert(std::move(proto_column)); - return block; - } - - - void loadTableMetadata(const Block & block, TableMetadata & table_meta) - { - /// select metadata type by column name - if (block.has(tableMetaColumnName())) - { - const ColumnWithTypeAndName & column = block.getByName(tableMetaColumnName()); - StringRef raw_data = column.column->getRawData(); - loadTableMeta(raw_data.data, raw_data.size, table_meta); - } - } -} diff --git a/dbms/src/Proto/protoHelpers.h b/dbms/src/Proto/protoHelpers.h deleted file mode 100644 index 7cef5a28013..00000000000 --- a/dbms/src/Proto/protoHelpers.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once -#if USE_CAPNP - -namespace DB -{ - class Context; - class Block; - struct TableMetadata; - - Block storeTableMetadata(const TableMetadata & table_meta); - void loadTableMetadata(const Block & block, TableMetadata & table_meta); -} - -#endif diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index 0ff885ab1e5..a6fc0b39bb6 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -1,6 +1,14 @@ -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { @@ -37,4 +45,74 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs) return lhs.kind == rhs.kind && queryToString(lhs.expression) == queryToString(rhs.expression); } +ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context) +{ + return loadFromContext(context, context.getCurrentDatabase(), context.getCurrentTable()); +} + +ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context, const String & database, const String & table) +{ + if (context.getSettingsRef().insert_sample_with_metadata) + { + if (!context.isTableExist(database, table)) + return {}; + + StoragePtr storage = context.getTable(database, table); + const ColumnsDescription & table_columns = storage->getColumns(); + return table_columns.defaults; + } + return {}; +} + +void ColumnDefaultsHelper::attachFromContext(const Context & context, Block & sample) +{ + ColumnDefaults column_defaults = loadFromContext(context); + if (column_defaults.empty()) + return; + + for (auto pr : column_defaults) + { + std::stringstream ss; + ss << *pr.second.expression; + + ColumnWithTypeAndName col; + col.type = std::make_shared(); + col.name = String(" ") + toString(pr.second.kind) + ' ' + pr.first + ' ' + ss.str(); + col.column = col.type->createColumnConst(sample.rows(), ""); + + sample.insert(std::move(col)); + } +} + +ColumnDefaults ColumnDefaultsHelper::extract(Block & sample) +{ + ParserTernaryOperatorExpression parser; + ColumnDefaults column_defaults; + std::set pos_to_erase; + + for (size_t i = 0; i < sample.columns(); ++i) + { + const ColumnWithTypeAndName & column_wtn = sample.safeGetByPosition(i); + + if (column_wtn.name.size() && column_wtn.name[0] == ' ') + { + String str_kind, column_name; + std::stringstream ss; + ss << column_wtn.name; + ss >> str_kind >> column_name; + String expression = column_wtn.name.substr(str_kind.size() + column_name.size() + 3); + + ColumnDefault def; + def.kind = columnDefaultKindFromString(str_kind); + def.expression = parseQuery(parser, expression, expression.size()); + + column_defaults.emplace(column_name, def); + pos_to_erase.insert(i); + } + } + + sample.erase(pos_to_erase); + return column_defaults; +} + } diff --git a/dbms/src/Storages/ColumnDefault.h b/dbms/src/Storages/ColumnDefault.h index 95eb4d5b597..dfeb05dc39c 100644 --- a/dbms/src/Storages/ColumnDefault.h +++ b/dbms/src/Storages/ColumnDefault.h @@ -9,6 +9,9 @@ namespace DB { +class Context; +class Block; + enum class ColumnDefaultKind { Default, @@ -33,5 +36,14 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs); using ColumnDefaults = std::unordered_map; +/// Static methods to manipulate column defaults +struct ColumnDefaultsHelper +{ + static void attachFromContext(const Context & context, Block & sample); + static ColumnDefaults extract(Block & sample); + + static ColumnDefaults loadFromContext(const Context & context, const String & database, const String & table); + static ColumnDefaults loadFromContext(const Context & context); /// FIXME: we need another way to store current table +}; } diff --git a/dbms/src/Storages/TableMetadata.cpp b/dbms/src/Storages/TableMetadata.cpp deleted file mode 100644 index 5549574275a..00000000000 --- a/dbms/src/Storages/TableMetadata.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include "TableMetadata.h" - - -namespace DB -{ - bool TableMetadata::loadFromContext(const Context & context) - { - if (!context.isTableExist(database, table)) - return false; - - StoragePtr storage = context.getTable(database, table); - const ColumnsDescription & table_columns = storage->getColumns(); - column_defaults = table_columns.defaults; - return true; - } -} diff --git a/dbms/src/Storages/TableMetadata.h b/dbms/src/Storages/TableMetadata.h deleted file mode 100644 index ddb5b79b68b..00000000000 --- a/dbms/src/Storages/TableMetadata.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - class Context; - class Block; - - /// Additional information for query that could not be get from sample block - struct TableMetadata - { - TableMetadata(const String & database_, const String & table_) - : database(database_), table(table_) - {} - - const String & database; - const String & table; - ColumnDefaults column_defaults; - - bool loadFromContext(const Context & context); - bool hasDefaults() const { return !column_defaults.empty(); } - }; -} diff --git a/dbms/tests/queries/0_stateless/00760_insert_json_with_defaults.sql b/dbms/tests/queries/0_stateless/00760_insert_json_with_defaults.sql index 12b9e4538d1..a141b64f98a 100644 --- a/dbms/tests/queries/0_stateless/00760_insert_json_with_defaults.sql +++ b/dbms/tests/queries/0_stateless/00760_insert_json_with_defaults.sql @@ -1,3 +1,5 @@ +SET insert_sample_with_metadata=1; + CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS test.defaults; CREATE TABLE IF NOT EXISTS test.defaults diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index da673f1de55..a78b0fc25de 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc gdb clang-6.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev capnproto libjemalloc-dev libssl-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc gdb clang-6.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index 8c44f6a7d58..7f45641d45f 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -32,7 +32,7 @@ cmake $CUR_DIR/../.. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPIL `# Use all possible contrib libs from system` \ -DUNBUNDLED=1 \ `# Disable all features` \ - -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 $CMAKE_FLAGS + -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 $CMAKE_FLAGS ninja clickhouse-bundle diff --git a/utils/travis/pbuilder.sh b/utils/travis/pbuilder.sh index 3a434219fd7..d993e8715b8 100755 --- a/utils/travis/pbuilder.sh +++ b/utils/travis/pbuilder.sh @@ -24,7 +24,7 @@ env TEST_RUN=${TEST_RUN=1} \ DEB_CC=${DEB_CC=$CC} DEB_CXX=${DEB_CXX=$CXX} \ CCACHE_SIZE=${CCACHE_SIZE:=4G} \ `# Disable all features` \ - CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_RDKAFKA=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ + CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libjemalloc-dev $EXTRAPACKAGES" \ From 8284de8268e73d8d50c15defb44433a0a52085d7 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 13:32:20 +0300 Subject: [PATCH 42/88] fix build --- dbms/src/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbms/src/CMakeLists.txt b/dbms/src/CMakeLists.txt index 410f8a4edad..f6fa96e1d47 100644 --- a/dbms/src/CMakeLists.txt +++ b/dbms/src/CMakeLists.txt @@ -13,6 +13,3 @@ add_subdirectory (AggregateFunctions) add_subdirectory (Client) add_subdirectory (TableFunctions) add_subdirectory (Formats) -if (USE_CAPNP) - add_subdirectory (Proto) -endif () From 5c67e0220554480b6dbc4dc83c5e65030c6fd75b Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 13:39:13 +0300 Subject: [PATCH 43/88] fix build --- dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index c4f63d3bd22..d232fee96ce 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -47,8 +47,8 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); auto column_defaults = ColumnDefaultsHelper::loadFromContext(context, ast_insert_query->database, ast_insert_query->table); - if (column_defaults) - res_stream = std::make_shared(res_stream, *column_defaults, context); + if (!column_defaults.empty()) + res_stream = std::make_shared(res_stream, column_defaults, context); } } From 65d34ca231a433654288889526b57ec399d58cdc Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 18:03:13 +0300 Subject: [PATCH 44/88] do not override current_database in context [CLICKHOUSE-3578] --- dbms/programs/server/TCPHandler.cpp | 8 +++- dbms/src/Core/Block.h | 11 +++++ dbms/src/Interpreters/Context.cpp | 15 ------ dbms/src/Interpreters/Context.h | 7 +-- .../Interpreters/InterpreterInsertQuery.cpp | 5 +- .../src/Interpreters/InterpreterInsertQuery.h | 2 +- dbms/src/Interpreters/executeQuery.cpp | 10 +--- dbms/src/Storages/ColumnDefault.cpp | 46 +++++++++++-------- dbms/src/Storages/ColumnDefault.h | 3 +- 9 files changed, 55 insertions(+), 52 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index b66c1e8bb5d..d1ccc3e788f 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -363,9 +363,13 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) /// Send block to the client - table structure. Block block = state.io.out->getHeader(); - /// attach table metadata (column defaults) + /// attach column defaults to sample block (allow client to attach defaults for ommited source values) if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA) - ColumnDefaultsHelper::attachFromContext(query_context, block); + { + auto db_and_table = query_context.getInsertionTable(); + ColumnDefaults column_defaults = ColumnDefaultsHelper::loadFromContext(query_context, db_and_table.first, db_and_table.second); + ColumnDefaultsHelper::attach(column_defaults, block); + } sendData(block); diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 2ee6fc4a78f..0473ff72419 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -34,6 +34,9 @@ private: Container data; IndexByName index_by_name; + /// Regular column can't start with ' ', so it's possible to attach some hidden columns with a prefix + constexpr static const char SPECIAL_COLUMN_PREFIX = ' '; + public: BlockInfo info; /// Input stream could use delayed_defaults to add addition info at which rows it have inserted default values. @@ -103,6 +106,14 @@ public: operator bool() const { return !data.empty(); } bool operator!() const { return data.empty(); } + static String mkSpecialColumnName(const String & col_name) { return String(1, SPECIAL_COLUMN_PREFIX) + col_name; } + static bool isSpecialColumnName(const String & col_name) { return !col_name.empty() && col_name[0] == SPECIAL_COLUMN_PREFIX; } + + static bool isSpecialColumnName(const String & col_name, const String & pattern) + { + return col_name.find(String(1, SPECIAL_COLUMN_PREFIX) + pattern) == 0; + } + /** Get a list of column names separated by commas. */ std::string dumpNames() const; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index ad110718a27..6e7051ba387 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1014,12 +1014,6 @@ String Context::getCurrentDatabase() const } -String Context::getCurrentTable() const -{ - return current_table; -} - - String Context::getCurrentQueryId() const { return client_info.current_query_id; @@ -1034,15 +1028,6 @@ void Context::setCurrentDatabase(const String & name) } -void Context::setCurrentTable(const String & database, const String & table) -{ - auto lock = getLock(); - assertTableExists(database, table); - current_database = database; - current_table = table; -} - - void Context::setCurrentQueryId(const String & query_id) { if (!client_info.current_query_id.empty()) diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 89153640a8f..5d6d326a460 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -118,11 +118,11 @@ private: std::shared_ptr quota; /// Current quota. By default - empty quota, that have no limits. String current_database; - String current_table; Settings settings; /// Setting for query execution. using ProgressCallback = std::function; ProgressCallback progress_callback; /// Callback for tracking progress of query execution. QueryStatus * process_list_elem = nullptr; /// For tracking total resource usage for query. + std::pair insertion_table; /// Saved insertion table in query context String default_format; /// Format, used when server formats data by itself and if query does not have FORMAT specification. /// Thus, used in HTTP interface. If not specified - then some globally default format is used. @@ -229,12 +229,13 @@ public: std::unique_ptr getDDLGuard(const String & database, const String & table) const; String getCurrentDatabase() const; - String getCurrentTable() const; String getCurrentQueryId() const; void setCurrentDatabase(const String & name); - void setCurrentTable(const String & database, const String & table); void setCurrentQueryId(const String & query_id); + void setInsertionTable(std::pair && db_and_table) { insertion_table = db_and_table; } + const std::pair & getInsertionTable() const { return insertion_table; } + String getDefaultFormat() const; /// If default_format is not specified, some global default format is returned. void setDefaultFormat(const String & name); diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 08147ea47d1..652b098098b 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -159,11 +159,10 @@ void InterpreterInsertQuery::checkAccess(const ASTInsertQuery & query) throw Exception("Cannot insert into table in readonly mode", ErrorCodes::READONLY); } -void InterpreterInsertQuery::getDatabaseTable(String & database, String & table) const +std::pair InterpreterInsertQuery::getDatabaseTable() const { ASTInsertQuery & query = typeid_cast(*query_ptr); - database = query.database; - table = query.table; + return {query.database, query.table}; } } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.h b/dbms/src/Interpreters/InterpreterInsertQuery.h index 4ec7460fb75..9cde2c274fe 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.h +++ b/dbms/src/Interpreters/InterpreterInsertQuery.h @@ -24,7 +24,7 @@ public: */ BlockIO execute() override; - void getDatabaseTable(String & database, String & table) const; + std::pair getDatabaseTable() const; private: StoragePtr getTable(const ASTInsertQuery & query); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 34839eb2b59..2a885285b89 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -204,14 +204,8 @@ static std::tuple executeQueryImpl( auto interpreter = InterpreterFactory::get(ast, context, stage); res = interpreter->execute(); - if (InterpreterInsertQuery * insertInterpreter = typeid_cast(&*interpreter)) - { - String database; - String table_name; - insertInterpreter->getDatabaseTable(database, table_name); - if (!database.empty()) - context.setCurrentTable(database, table_name); - } + if (auto * insert_interpreter = typeid_cast(&*interpreter)) + context.setInsertionTable(insert_interpreter->getDatabaseTable()); if (process_list_entry) { diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index a6fc0b39bb6..cd79c5ca093 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -10,16 +10,27 @@ #include #include -namespace DB +namespace { +struct AliasNames +{ + static constexpr const char * DEFAULT = "DEFAULT"; + static constexpr const char * MATERIALIZED = "MATERIALIZED"; + static constexpr const char * ALIAS = "ALIAS"; +}; + +} + +namespace DB +{ ColumnDefaultKind columnDefaultKindFromString(const std::string & str) { static const std::unordered_map map{ - { "DEFAULT", ColumnDefaultKind::Default }, - { "MATERIALIZED", ColumnDefaultKind::Materialized }, - { "ALIAS", ColumnDefaultKind::Alias } + { AliasNames::DEFAULT, ColumnDefaultKind::Default }, + { AliasNames::MATERIALIZED, ColumnDefaultKind::Materialized }, + { AliasNames::ALIAS, ColumnDefaultKind::Alias } }; const auto it = map.find(str); @@ -30,9 +41,9 @@ ColumnDefaultKind columnDefaultKindFromString(const std::string & str) std::string toString(const ColumnDefaultKind kind) { static const std::unordered_map map{ - { ColumnDefaultKind::Default, "DEFAULT" }, - { ColumnDefaultKind::Materialized, "MATERIALIZED" }, - { ColumnDefaultKind::Alias, "ALIAS" } + { ColumnDefaultKind::Default, AliasNames::DEFAULT }, + { ColumnDefaultKind::Materialized, AliasNames::MATERIALIZED }, + { ColumnDefaultKind::Alias, AliasNames::ALIAS } }; const auto it = map.find(kind); @@ -45,11 +56,6 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs) return lhs.kind == rhs.kind && queryToString(lhs.expression) == queryToString(rhs.expression); } -ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context) -{ - return loadFromContext(context, context.getCurrentDatabase(), context.getCurrentTable()); -} - ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context, const String & database, const String & table) { if (context.getSettingsRef().insert_sample_with_metadata) @@ -64,9 +70,8 @@ ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context, co return {}; } -void ColumnDefaultsHelper::attachFromContext(const Context & context, Block & sample) +void ColumnDefaultsHelper::attach(const ColumnDefaults & column_defaults, Block & sample) { - ColumnDefaults column_defaults = loadFromContext(context); if (column_defaults.empty()) return; @@ -75,9 +80,11 @@ void ColumnDefaultsHelper::attachFromContext(const Context & context, Block & sa std::stringstream ss; ss << *pr.second.expression; + /// Serialize defaults to special columns names. + /// It looks better to send expression as a column data but sample block has 0 rows. ColumnWithTypeAndName col; col.type = std::make_shared(); - col.name = String(" ") + toString(pr.second.kind) + ' ' + pr.first + ' ' + ss.str(); + col.name = Block::mkSpecialColumnName(toString(pr.second.kind) + ' ' + pr.first + ' ' + ss.str()); col.column = col.type->createColumnConst(sample.rows(), ""); sample.insert(std::move(col)); @@ -94,17 +101,20 @@ ColumnDefaults ColumnDefaultsHelper::extract(Block & sample) { const ColumnWithTypeAndName & column_wtn = sample.safeGetByPosition(i); - if (column_wtn.name.size() && column_wtn.name[0] == ' ') + if (Block::isSpecialColumnName(column_wtn.name, AliasNames::DEFAULT) || + Block::isSpecialColumnName(column_wtn.name, AliasNames::MATERIALIZED) || + Block::isSpecialColumnName(column_wtn.name, AliasNames::ALIAS)) { String str_kind, column_name; std::stringstream ss; ss << column_wtn.name; ss >> str_kind >> column_name; - String expression = column_wtn.name.substr(str_kind.size() + column_name.size() + 3); + size_t expression_pos = str_kind.size() + column_name.size() + 3; + StringRef expression(&column_wtn.name[expression_pos], column_wtn.name.size() - expression_pos); ColumnDefault def; def.kind = columnDefaultKindFromString(str_kind); - def.expression = parseQuery(parser, expression, expression.size()); + def.expression = parseQuery(parser, expression.data, expression.size); column_defaults.emplace(column_name, def); pos_to_erase.insert(i); diff --git a/dbms/src/Storages/ColumnDefault.h b/dbms/src/Storages/ColumnDefault.h index dfeb05dc39c..00693b54ad5 100644 --- a/dbms/src/Storages/ColumnDefault.h +++ b/dbms/src/Storages/ColumnDefault.h @@ -39,11 +39,10 @@ using ColumnDefaults = std::unordered_map; /// Static methods to manipulate column defaults struct ColumnDefaultsHelper { - static void attachFromContext(const Context & context, Block & sample); + static void attach(const ColumnDefaults & column_defaults, Block & sample); static ColumnDefaults extract(Block & sample); static ColumnDefaults loadFromContext(const Context & context, const String & database, const String & table); - static ColumnDefaults loadFromContext(const Context & context); /// FIXME: we need another way to store current table }; } From 13646eb4c4c10c37764b40e91dea6545e1a1073e Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 18:57:20 +0300 Subject: [PATCH 45/88] move BlockMissingValues from Block to IBlockInputStream::getMissingValues() --- dbms/src/Core/Block.h | 3 --- dbms/src/Core/BlockInfo.cpp | 4 ++-- dbms/src/Core/BlockInfo.h | 6 +++--- dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp | 2 +- dbms/src/DataStreams/IBlockInputStream.h | 7 +++++++ dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp | 7 ++----- dbms/src/Formats/BlockInputStreamFromRowInputStream.h | 3 +++ 7 files changed, 18 insertions(+), 14 deletions(-) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 0473ff72419..1523665f451 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -39,9 +39,6 @@ private: public: BlockInfo info; - /// Input stream could use delayed_defaults to add addition info at which rows it have inserted default values. - /// Such values would be replaced later by column defaults in AddingDefaultsBlockInputStream (if any). - BlockDelayedDefaults delayed_defaults; Block() = default; Block(std::initializer_list il); diff --git a/dbms/src/Core/BlockInfo.cpp b/dbms/src/Core/BlockInfo.cpp index f214d2782e3..3c8d1ccef7c 100644 --- a/dbms/src/Core/BlockInfo.cpp +++ b/dbms/src/Core/BlockInfo.cpp @@ -58,14 +58,14 @@ void BlockInfo::read(ReadBuffer & in) } } -void BlockDelayedDefaults::setBit(size_t column_idx, size_t row_idx) +void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) { RowsBitMask & mask = columns_defaults[column_idx]; mask.resize(row_idx + 1); mask[row_idx] = true; } -const BlockDelayedDefaults::RowsBitMask & BlockDelayedDefaults::getDefaultsBitmask(size_t column_idx) const +const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const { static RowsBitMask none; auto it = columns_defaults.find(column_idx); diff --git a/dbms/src/Core/BlockInfo.h b/dbms/src/Core/BlockInfo.h index abc5383ddcb..9e23de688af 100644 --- a/dbms/src/Core/BlockInfo.h +++ b/dbms/src/Core/BlockInfo.h @@ -45,9 +45,8 @@ struct BlockInfo void read(ReadBuffer & in); }; -/// Block extention to support delayed defaults. Used in AddingDefaultsBlockInputStream to replace type defauls set by RowInputStream -/// with column defaults. -class BlockDelayedDefaults +/// Block extention to support delayed defaults. AddingDefaultsBlockInputStream uses it to replace missing values with column defaults. +class BlockMissingValues { public: using RowsBitMask = std::vector; /// a bit per row for a column @@ -56,6 +55,7 @@ public: void setBit(size_t column_idx, size_t row_idx); bool empty() const { return columns_defaults.empty(); } size_t size() const { return columns_defaults.size(); } + void clear() { columns_defaults.clear(); } private: using RowsMaskByColumnId = std::unordered_map; diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index ace3841e67b..900a923b23a 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -37,7 +37,7 @@ Block AddingDefaultsBlockInputStream::readImpl() if (column_defaults.empty()) return res; - BlockDelayedDefaults delayed_defaults = res.delayed_defaults; + const BlockMissingValues & delayed_defaults = children.back()->getMissingValues(); if (delayed_defaults.empty()) return res; diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index b0f2d269f56..2e305b4b451 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -19,6 +19,7 @@ class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; using BlockInputStreams = std::vector; +class BlockMissingValues; class TableStructureReadLock; using TableStructureReadLockPtr = std::shared_ptr; @@ -70,6 +71,12 @@ public: throw Exception("Method getBlockExtraInfo is not supported by the data stream " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + virtual const BlockMissingValues & getMissingValues() const + { + static const BlockMissingValues none; + return none; + } + /** Read something before starting all data or after the end of all data. * In the `readSuffix` function, you can implement a finalization that can lead to an exception. * readPrefix() must be called before the first call to read(). diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 5670830136b..1440375eb14 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -53,7 +53,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() { size_t num_columns = sample.columns(); MutableColumns columns = sample.cloneEmptyColumns(); - BlockDelayedDefaults delayed_defaults; + delayed_defaults.clear(); try { @@ -144,10 +144,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() if (columns.empty() || columns[0]->empty()) return {}; - auto out_block = sample.cloneWithColumns(std::move(columns)); - if (!delayed_defaults.empty()) - out_block.delayed_defaults = std::move(delayed_defaults); - return out_block; + return sample.cloneWithColumns(std::move(columns)); } diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h index 72d11a02610..65d6fa04469 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h @@ -33,6 +33,8 @@ public: Block getHeader() const override { return sample; } + const BlockMissingValues & getMissingValues() const override { return delayed_defaults; } + protected: Block readImpl() override; @@ -40,6 +42,7 @@ private: RowInputStreamPtr row_input; Block sample; size_t max_block_size; + BlockMissingValues delayed_defaults; UInt64 allow_errors_num; Float64 allow_errors_ratio; From c642e16ee1ac3a5c9986c898e5d8c58c4e705bb7 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 19:57:20 +0300 Subject: [PATCH 46/88] clearer evaluateMissingDefaults [CLICKHOUSE-3578] --- .../AddingDefaultsBlockInputStream.cpp | 2 +- dbms/src/DataStreams/IBlockInputStream.h | 1 - .../Interpreters/evaluateMissingDefaults.cpp | 42 ++++++------------- .../Interpreters/evaluateMissingDefaults.h | 7 +--- 4 files changed, 14 insertions(+), 38 deletions(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 900a923b23a..3d02b0c6415 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -49,7 +49,7 @@ Block AddingDefaultsBlockInputStream::readImpl() evaluate_block.erase(column.first); } - evaluateMissingDefaultsUnsafe(evaluate_block, header.getNamesAndTypesList(), column_defaults, context); + evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), column_defaults, context, false); std::unordered_map mixed_columns; diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 2e305b4b451..eb5f75ef46c 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -19,7 +19,6 @@ class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; using BlockInputStreams = std::vector; -class BlockMissingValues; class TableStructureReadLock; using TableStructureReadLockPtr = std::shared_ptr; diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 8d84c8f6576..40b75c0b673 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -29,33 +29,36 @@ static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & requi setAlias(it->second.expression->clone(), it->first)); } + if (default_expr_list->children.empty()) + return nullptr; return default_expr_list; } - void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const ColumnDefaults & column_defaults, - const Context & context) + const Context & context, bool with_block_copy) { if (column_defaults.empty()) return; ASTPtr default_expr_list = requiredExpressions(block, required_columns, column_defaults); - /// nothing to evaluate - if (default_expr_list->children.empty()) + if (!default_expr_list) return; + if (!with_block_copy) + { + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); + ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); + return; + } + /** ExpressionAnalyzer eliminates "unused" columns, in order to ensure their safety * we are going to operate on a copy instead of the original block */ Block copy_block{block}; /// evaluate default values for defaulted columns - NamesAndTypesList available_columns; - for (size_t i = 0, size = block.columns(); i < size; ++i) - available_columns.emplace_back(block.getByPosition(i).name, block.getByPosition(i).type); - - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, available_columns); + auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(copy_block); /// move evaluated columns to the original block, materializing them at the same time @@ -73,25 +76,4 @@ void evaluateMissingDefaults(Block & block, } } - -void evaluateMissingDefaultsUnsafe(Block & block, - const NamesAndTypesList & required_columns, - const std::unordered_map & column_defaults, - const Context & context) -{ - if (column_defaults.empty()) - return; - - ASTPtr default_expr_list = requiredExpressions(block, required_columns, column_defaults); - if (default_expr_list->children.empty()) - return; - - NamesAndTypesList available_columns; - for (size_t i = 0, size = block.columns(); i < size; ++i) - available_columns.emplace_back(block.getByPosition(i).name, block.getByPosition(i).type); - - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, available_columns); - ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); -} - } diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.h b/dbms/src/Interpreters/evaluateMissingDefaults.h index ce0c649f3d0..71f6fab9753 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.h +++ b/dbms/src/Interpreters/evaluateMissingDefaults.h @@ -15,11 +15,6 @@ struct ColumnDefault; void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const std::unordered_map & column_defaults, - const Context & context); - -void evaluateMissingDefaultsUnsafe(Block & block, - const NamesAndTypesList & required_columns, - const std::unordered_map & column_defaults, - const Context & context); + const Context & context, bool with_block_copy = true); } From 811b824b01bbdbefe3fc0ea5935cb86476a01014 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 15 Nov 2018 23:26:51 +0300 Subject: [PATCH 47/88] AddingDefaultsBlockInputStream fixed types optimisation --- .../AddingDefaultsBlockInputStream.cpp | 148 +++++++++++++++--- .../AddingDefaultsBlockInputStream.h | 6 + dbms/src/DataTypes/IDataType.h | 7 + 3 files changed, 137 insertions(+), 24 deletions(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 3d02b0c6415..89202ea9ecd 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -1,19 +1,33 @@ -#include -#include -#include -#include -#include #include +#include +#include #include #include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + namespace DB { namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; + extern const int TYPE_MISMATCH; } @@ -61,30 +75,25 @@ Block AddingDefaultsBlockInputStream::readImpl() continue; size_t block_column_position = res.getPositionByName(column_name); - const ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position); - - if (column_read.column->size() != column_def.column->size()) - throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - + ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position); const auto & defaults_mask = delayed_defaults.getDefaultsBitmask(block_column_position); + + checkCalculated(column_read, column_def, defaults_mask.size()); + if (!defaults_mask.empty()) { - MutableColumnPtr column_mixed = column_read.column->cloneEmpty(); - - for (size_t row_idx = 0; row_idx < column_read.column->size(); ++row_idx) + /// TODO: FixedString + if (isColumnedAsNumber(column_read.type) || isDecimal(column_read.type)) { - if (row_idx < defaults_mask.size() && defaults_mask[row_idx]) - { - if (column_def.column->isColumnConst()) - column_mixed->insert((*column_def.column)[row_idx]); - else - column_mixed->insertFrom(*column_def.column, row_idx); - } - else - column_mixed->insertFrom(*column_read.column, row_idx); + MutableColumnPtr column_mixed = (*std::move(column_read.column)).mutate(); + mixNumberColumns(column_read.type->getTypeId(), column_mixed, column_def.column, defaults_mask); + column_read.column = std::move(column_mixed); + } + else + { + MutableColumnPtr column_mixed = mixColumns(column_read, column_def, defaults_mask); + mixed_columns.emplace(block_column_position, std::move(column_mixed)); } - - mixed_columns.emplace(std::make_pair(block_column_position, std::move(column_mixed))); } } @@ -104,4 +113,95 @@ Block AddingDefaultsBlockInputStream::readImpl() return res; } +void AddingDefaultsBlockInputStream::checkCalculated(const ColumnWithTypeAndName & col_read, + const ColumnWithTypeAndName & col_defaults, + size_t defaults_needed) const +{ + size_t column_size = col_read.column->size(); + + if (column_size != col_defaults.column->size()) + throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + if (column_size < defaults_needed) + throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + if (col_read.type->getTypeId() != col_defaults.type->getTypeId()) + throw Exception("Mismach column types while adding defaults", ErrorCodes::TYPE_MISMATCH); +} + +void AddingDefaultsBlockInputStream::mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & column_mixed, const ColumnPtr & column_defs, + const BlockMissingValues::RowsBitMask & defaults_mask) const +{ + auto call = [&](const auto & types) -> bool + { + using Types = std::decay_t; + using DataType = typename Types::LeftType; + + if constexpr (!std::is_same_v && !std::is_same_v) + { + using FieldType = typename DataType::FieldType; + using ColVecType = std::conditional_t, ColumnDecimal, ColumnVector>; + + auto col_read = typeid_cast(column_mixed.get()); + if (!col_read) + return false; + + typename ColVecType::Container & dst = col_read->getData(); + + if (auto const_col_defs = checkAndGetColumnConst(column_defs.get())) + { + FieldType value = checkAndGetColumn(const_col_defs->getDataColumnPtr().get())->getData()[0]; + + for (size_t i = 0; i < defaults_mask.size(); ++i) + if (defaults_mask[i]) + dst[i] = value; + + return true; + } + else if (auto col_defs = checkAndGetColumn(column_defs.get())) + { + auto & src = col_defs->getData(); + for (size_t i = 0; i < defaults_mask.size(); ++i) + if (defaults_mask[i]) + dst[i] = src[i]; + + return true; + } + } + + return false; + }; + + if (!callOnIndexAndDataType(type_idx, call)) + throw Exception("Unexpected type on mixNumberColumns", ErrorCodes::LOGICAL_ERROR); +} + +MutableColumnPtr AddingDefaultsBlockInputStream::mixColumns(const ColumnWithTypeAndName & col_read, + const ColumnWithTypeAndName & col_defaults, + const BlockMissingValues::RowsBitMask & defaults_mask) const +{ + size_t column_size = col_read.column->size(); + size_t defaults_needed = defaults_mask.size(); + + MutableColumnPtr column_mixed = col_read.column->cloneEmpty(); + + for (size_t i = 0; i < defaults_needed; ++i) + { + if (defaults_mask[i]) + { + if (col_defaults.column->isColumnConst()) + column_mixed->insert((*col_defaults.column)[i]); + else + column_mixed->insertFrom(*col_defaults.column, i); + } + else + column_mixed->insertFrom(*col_read.column, i); + } + + for (size_t i = defaults_needed; i < column_size; ++i) + column_mixed->insertFrom(*col_read.column, i); + + return column_mixed; +} + } diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h index 5caaec244da..6711a3daee9 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.h @@ -27,6 +27,12 @@ private: Block header; const ColumnDefaults column_defaults; const Context & context; + + void checkCalculated(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults, size_t needed) const; + MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults, + const BlockMissingValues::RowsBitMask & defaults_mask) const; + void mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & col_mixed, const ColumnPtr & col_defaults, + const BlockMissingValues::RowsBitMask & defaults_mask) const; }; } diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 727d80540ce..063c69ed311 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -512,6 +512,13 @@ inline bool isNumber(const T & data_type) return which.isInt() || which.isUInt() || which.isFloat(); } +template +inline bool isColumnedAsNumber(const T & data_type) +{ + WhichDataType which(data_type); + return which.isInt() || which.isUInt() || which.isFloat() || which.isDateOrDateTime() || which.isUUID(); +} + template inline bool isString(const T & data_type) { From b4b58b292f70d5ddea4c7a013eb08e4abcc28fb5 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 16 Nov 2018 22:19:47 +0300 Subject: [PATCH 48/88] remove hack for StorageDistributed & insert_sample_with_metadata --- dbms/src/Storages/StorageDistributed.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index d0eb764a917..5d3860f449d 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -174,9 +174,6 @@ StorageDistributed::StorageDistributed( if (num_local_shards && remote_database == database_name && remote_table == table_name) throw Exception("Distributed table " + table_name + " looks at itself", ErrorCodes::INFINITE_LOOP); } - - /// HACK: disable metadata for StorageDistributed queries - const_cast(context).getSettingsRef().insert_sample_with_metadata = false; } From 65b654a1b654f2a6f25ca746f60d71a85aae1f9a Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 30 Nov 2018 22:49:35 +0800 Subject: [PATCH 49/88] Versatile StorageJoin This commit does the following: 1. StorageJoin with simple keys now supports reading 2. StorageJoin can be created with Join settings applied. Syntax is similar to MergeTree and Kafka 3. Left Any StorageJoin with one simple key can be used as a dictionary-like structure by function joinGet. Examples are listed in the related test file. --- dbms/src/Functions/FunctionJoinGet.cpp | 74 +++++ dbms/src/Functions/FunctionJoinGet.h | 53 ++++ .../registerFunctionsMiscellaneous.cpp | 2 + dbms/src/Interpreters/ExpressionActions.cpp | 13 +- dbms/src/Interpreters/ExpressionActions.h | 3 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 11 +- dbms/src/Interpreters/Join.cpp | 110 ++++++-- dbms/src/Interpreters/Join.h | 41 ++- dbms/src/Storages/StorageFactory.cpp | 4 +- dbms/src/Storages/StorageJoin.cpp | 254 +++++++++++++++++- dbms/src/Storages/StorageJoin.h | 13 + .../00800_versatile_storage_join.reference | 19 ++ .../00800_versatile_storage_join.sql | 51 ++++ 13 files changed, 591 insertions(+), 57 deletions(-) create mode 100644 dbms/src/Functions/FunctionJoinGet.cpp create mode 100644 dbms/src/Functions/FunctionJoinGet.h create mode 100644 dbms/tests/queries/0_stateless/00800_versatile_storage_join.reference create mode 100644 dbms/tests/queries/0_stateless/00800_versatile_storage_join.sql diff --git a/dbms/src/Functions/FunctionJoinGet.cpp b/dbms/src/Functions/FunctionJoinGet.cpp new file mode 100644 index 00000000000..d2f65148b23 --- /dev/null +++ b/dbms/src/Functions/FunctionJoinGet.cpp @@ -0,0 +1,74 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +FunctionBasePtr FunctionBuilderJoinGet::buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const +{ + if (arguments.size() != 3) + throw Exception{"Function " + getName() + " takes 3 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + String join_name; + if (auto name_col = checkAndGetColumnConst(arguments[0].column.get())) + { + join_name = name_col->getValue(); + } + else + throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + + ", expected a const string.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + auto table = context.getTable("", join_name); + + StorageJoin * storage_join = dynamic_cast(table.get()); + + if (!storage_join) + throw Exception{"Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + auto join = storage_join->getJoin(); + String attr_name; + if (auto name_col = checkAndGetColumnConst(arguments[1].column.get())) + { + attr_name = name_col->getValue(); + } + else + throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + + ", expected a const string.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + DataTypes data_types(arguments.size()); + + for (size_t i = 0; i < arguments.size(); ++i) + data_types[i] = arguments[i].type; + + return std::make_shared( + std::make_shared(join, attr_name), data_types, join->joinGetReturnType(attr_name)); +} + +void FunctionJoinGet::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) +{ + auto & ctn = block.getByPosition(arguments[2]); + ctn.name = ""; // make sure the key name never collide with the join columns + Block key_block = {ctn}; + join->joinGet(key_block, attr_name); + block.getByPosition(result) = key_block.getByPosition(1); +} + +void registerFunctionJoinGet(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/FunctionJoinGet.h b/dbms/src/Functions/FunctionJoinGet.h new file mode 100644 index 00000000000..6573dcf9405 --- /dev/null +++ b/dbms/src/Functions/FunctionJoinGet.h @@ -0,0 +1,53 @@ +#include + +namespace DB +{ +class Context; +class Join; +using JoinPtr = std::shared_ptr; + +class FunctionJoinGet final : public IFunction, public std::enable_shared_from_this +{ +public: + static constexpr auto name = "joinGet"; + + FunctionJoinGet(JoinPtr join, const String & attr_name) : join(std::move(join)), attr_name(attr_name) {} + + String getName() const override { return name; } + +protected: + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return nullptr; } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override; + +private: + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + +private: + JoinPtr join; + const String attr_name; +}; + +class FunctionBuilderJoinGet final : public FunctionBuilderImpl +{ +public: + static constexpr auto name = "joinGet"; + static FunctionBuilderPtr create(const Context & context) { return std::make_shared(context); } + + FunctionBuilderJoinGet(const Context & context) : context(context) {} + + String getName() const override { return name; } + +protected: + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override; + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return nullptr; } + +private: + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + +private: + const Context & context; +}; + +} diff --git a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp index e483deee961..59c49a5c950 100644 --- a/dbms/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/registerFunctionsMiscellaneous.cpp @@ -40,6 +40,7 @@ void registerFunctionToLowCardinality(FunctionFactory &); void registerFunctionLowCardinalityIndices(FunctionFactory &); void registerFunctionLowCardinalityKeys(FunctionFactory &); void registerFunctionsIn(FunctionFactory &); +void registerFunctionJoinGet(FunctionFactory &); void registerFunctionsMiscellaneous(FunctionFactory & factory) { @@ -80,6 +81,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionLowCardinalityIndices(factory); registerFunctionLowCardinalityKeys(factory); registerFunctionsIn(factory); + registerFunctionJoinGet(factory); } } diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index b1fab40a654..20bfecc26d0 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -150,15 +150,18 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column return a; } -ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr join_, - const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_) +ExpressionAction ExpressionAction::ordinaryJoin( + std::shared_ptr join_, + const Names & join_key_names_left, + const NamesAndTypesList & columns_added_by_join_, + const NameSet & columns_added_by_join_from_right_keys_) { ExpressionAction a; a.type = JOIN; a.join = std::move(join_); a.join_key_names_left = join_key_names_left; a.columns_added_by_join = columns_added_by_join_; + a.columns_added_by_join_from_right_keys = columns_added_by_join_from_right_keys_; return a; } @@ -427,7 +430,7 @@ void ExpressionAction::execute(Block & block) const case JOIN: { - join->joinBlock(block); + join->joinBlock(block, join_key_names_left, columns_added_by_join_from_right_keys); break; } @@ -1085,7 +1088,7 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh { for (const auto & action : actions) if (action.join && (action.join->getKind() == ASTTableJoin::Kind::Full || action.join->getKind() == ASTTableJoin::Kind::Right)) - return action.join->createStreamWithNonJoinedRows(source_header, max_block_size); + return action.join->createStreamWithNonJoinedRows(source_header, action.join_key_names_left, max_block_size); return {}; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 781134dbeb2..8c42e8ae492 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -102,6 +102,7 @@ public: std::shared_ptr join; Names join_key_names_left; NamesAndTypesList columns_added_by_join; + NameSet columns_added_by_join_from_right_keys; /// For PROJECT. NamesWithAliases projection; @@ -118,7 +119,7 @@ public: static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); static ExpressionAction ordinaryJoin(std::shared_ptr join_, const Names & join_key_names_left, - const NamesAndTypesList & columns_added_by_join_); + const NamesAndTypesList & columns_added_by_join_, const NameSet & columns_added_by_join_from_right_keys_); /// Which columns necessary to perform this action. Names getNeededColumns() const; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index d9f2406ef4b..e12a600b6de 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -556,12 +556,13 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only columns_added_by_join_list.push_back(joined_column.name_and_type); if (only_types) - actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, columns_added_by_join_list)); + actions->add(ExpressionAction::ordinaryJoin(nullptr, analyzedJoin().key_names_left, + columns_added_by_join_list, columns_added_by_join_from_right_keys)); else for (auto & subquery_for_set : subqueries_for_sets) if (subquery_for_set.second.join) actions->add(ExpressionAction::ordinaryJoin(subquery_for_set.second.join, analyzedJoin().key_names_left, - columns_added_by_join_list)); + columns_added_by_join_list, columns_added_by_join_from_right_keys)); } bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) @@ -621,10 +622,8 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (!subquery_for_set.join) { - JoinPtr join = std::make_shared( - analyzedJoin().key_names_left, analyzedJoin().key_names_right, columns_added_by_join_from_right_keys, - settings.join_use_nulls, settings.size_limits_for_join, - join_params.kind, join_params.strictness); + JoinPtr join = std::make_shared(analyzedJoin().key_names_right, settings.join_use_nulls, + settings.size_limits_for_join, join_params.kind, join_params.strictness); /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 00d74cc0e2d..62479422c5d 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -30,12 +30,10 @@ namespace ErrorCodes } -Join::Join(const Names & key_names_left_, const Names & key_names_right_, const NameSet & needed_key_names_right_, - bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) +Join::Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, + ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) : kind(kind_), strictness(strictness_), - key_names_left(key_names_left_), key_names_right(key_names_right_), - needed_key_names_right(needed_key_names_right_), use_nulls(use_nulls_), log(&Logger::get("Join")), limits(limits) @@ -662,7 +660,12 @@ namespace template -void Join::joinBlockImpl(Block & block, const Maps & maps) const +void Join::joinBlockImpl( + Block & block, + const Names & key_names_left, + const NameSet & needed_key_names_right, + const Block & block_with_columns_to_add, + const Maps & maps) const { size_t keys_size = key_names_left.size(); ColumnRawPtrs key_columns(keys_size); @@ -734,8 +737,8 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const { const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); - /// Don't insert column if it's in left block. - if (!block.has(src_column.name)) + /// Don't insert column if it's in left block or not explicitly required. + if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) { added_columns.push_back(src_column.column->cloneEmpty()); added_columns.back()->reserve(src_column.column->size()); @@ -746,7 +749,6 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const size_t rows = block.rows(); - /// Used with ANY INNER JOIN std::unique_ptr filter; bool filter_left_keys = (kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Right) && strictness == ASTTableJoin::Strictness::Any; @@ -875,7 +877,7 @@ void Join::joinBlockImplCross(Block & block) const } -void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) const +void Join::checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right) const { size_t keys_size = key_names_left.size(); @@ -895,30 +897,90 @@ void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) } -void Join::joinBlock(Block & block) const +static void checkTypeOfKey(const Block & block_left, const Block & block_right) +{ + auto & [c1, left_type_origin, left_name] = block_left.safeGetByPosition(0); + auto & [c2, right_type_origin, right_name] = block_right.safeGetByPosition(0); + auto left_type = removeNullable(left_type_origin); + auto right_type = removeNullable(right_type_origin); + + if (!left_type->equals(*right_type)) + throw Exception("Type mismatch of columns to joinGet by: " + + left_name + " " + left_type->getName() + " at left, " + + right_name + " " + right_type->getName() + " at right", + ErrorCodes::TYPE_MISMATCH); +} + + +DataTypePtr Join::joinGetReturnType(const String & column_name) const +{ + std::shared_lock lock(rwlock); + + if (!sample_block_with_columns_to_add.has(column_name)) + throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::LOGICAL_ERROR); + return sample_block_with_columns_to_add.getByName(column_name).type; +} + + +template +void Join::joinGetImpl(Block & block, const String & column_name, const Maps & maps) const +{ + joinBlockImpl( + block, {block.getByPosition(0).name}, {}, {sample_block_with_columns_to_add.getByName(column_name)}, maps); +} + + +// TODO: support composite key +// TODO: return multible columns as named tuple +// TODO: return array of values when strictness == ASTTableJoin::Strictness::All +void Join::joinGet(Block & block, const String & column_name) const +{ + std::shared_lock lock(rwlock); + + if (key_names_right.size() != 1) + throw Exception("joinGet only supports StorageJoin containing exactly one key", ErrorCodes::LOGICAL_ERROR); + + checkTypeOfKey(block, sample_block_with_keys); + + if (kind == ASTTableJoin::Kind::Left && strictness == ASTTableJoin::Strictness::Any) + joinGetImpl(block, column_name, maps_any); + else + throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::LOGICAL_ERROR); +} + + +void Join::joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const { // std::cerr << "joinBlock: " << block.dumpStructure() << "\n"; std::shared_lock lock(rwlock); - checkTypesOfKeys(block, sample_block_with_keys); + checkTypesOfKeys(block, key_names_left, sample_block_with_keys); if (kind == ASTTableJoin::Kind::Left && strictness == ASTTableJoin::Strictness::Any) - joinBlockImpl(block, maps_any); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_any); else if (kind == ASTTableJoin::Kind::Inner && strictness == ASTTableJoin::Strictness::Any) - joinBlockImpl(block, maps_any); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_any); else if (kind == ASTTableJoin::Kind::Left && strictness == ASTTableJoin::Strictness::All) - joinBlockImpl(block, maps_all); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_all); else if (kind == ASTTableJoin::Kind::Inner && strictness == ASTTableJoin::Strictness::All) - joinBlockImpl(block, maps_all); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_all); else if (kind == ASTTableJoin::Kind::Full && strictness == ASTTableJoin::Strictness::Any) - joinBlockImpl(block, maps_any_full); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_any_full); else if (kind == ASTTableJoin::Kind::Right && strictness == ASTTableJoin::Strictness::Any) - joinBlockImpl(block, maps_any_full); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_any_full); else if (kind == ASTTableJoin::Kind::Full && strictness == ASTTableJoin::Strictness::All) - joinBlockImpl(block, maps_all_full); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_all_full); else if (kind == ASTTableJoin::Kind::Right && strictness == ASTTableJoin::Strictness::All) - joinBlockImpl(block, maps_all_full); + joinBlockImpl( + block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, maps_all_full); else if (kind == ASTTableJoin::Kind::Cross) joinBlockImplCross(block); else @@ -995,14 +1057,14 @@ struct AdderNonJoined class NonJoinedBlockInputStream : public IProfilingBlockInputStream { public: - NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, size_t max_block_size_) + NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, size_t max_block_size_) : parent(parent_), max_block_size(max_block_size_) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. */ - size_t num_keys = parent.key_names_left.size(); + size_t num_keys = key_names_left.size(); size_t num_columns_left = left_sample_block.columns() - num_keys; size_t num_columns_right = parent.sample_block_with_columns_to_add.columns(); @@ -1019,7 +1081,7 @@ public: column_indices_keys_and_right.reserve(num_keys + num_columns_right); std::vector is_key_column_in_left_block(num_keys + num_columns_left, false); - for (const std::string & key : parent.key_names_left) + for (const std::string & key : key_names_left) { size_t key_pos = left_sample_block.getPositionByName(key); is_key_column_in_left_block[key_pos] = true; @@ -1170,9 +1232,9 @@ private: }; -BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, size_t max_block_size) const +BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const { - return std::make_shared(*this, left_sample_block, max_block_size); + return std::make_shared(*this, left_sample_block, key_names_left, max_block_size); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 7c288e0a008..4a864d8795e 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -219,8 +219,8 @@ struct JoinKeyGetterHashed class Join { public: - Join(const Names & key_names_left_, const Names & key_names_right_, const NameSet & needed_key_names_right_, - bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_); + Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, + ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_); bool empty() { return type == Type::EMPTY; } @@ -237,7 +237,13 @@ public: /** Join data from the map (that was previously built by calls to insertFromBlock) to the block with data from "left" table. * Could be called from different threads in parallel. */ - void joinBlock(Block & block) const; + void joinBlock(Block & block, const Names & key_names_left, const NameSet & needed_key_names_right) const; + + /// Infer the return type for joinGet function + DataTypePtr joinGetReturnType(const String & column_name) const; + + /// Used by joinGet function that turns StorageJoin into a dictionary + void joinGet(Block & block, const String & column_name) const; /** Keep "totals" (separate part of dataset, see WITH TOTALS) to use later. */ @@ -251,7 +257,7 @@ public: * Use only after all calls to joinBlock was done. * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside). */ - BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, size_t max_block_size) const; + BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const; /// Number of keys in all built JOIN maps. size_t getTotalRowCount() const; @@ -320,6 +326,16 @@ public: M(keys256) \ M(hashed) + + /// Used for reading from StorageJoin and applying joinGet function + #define APPLY_FOR_JOIN_VARIANTS_LIMITED(M) \ + M(key8) \ + M(key16) \ + M(key32) \ + M(key64) \ + M(key_string) \ + M(key_fixed_string) + enum class Type { EMPTY, @@ -353,16 +369,13 @@ public: private: friend class NonJoinedBlockInputStream; + friend class JoinBlockInputStream; ASTTableJoin::Kind kind; ASTTableJoin::Strictness strictness; - /// Names of key columns (columns for equi-JOIN) in "left" table (in the order they appear in USING clause). - const Names key_names_left; /// Names of key columns (columns for equi-JOIN) in "right" table (in the order they appear in USING clause). const Names key_names_right; - /// Names of key columns in the "right" table which should stay in block after join. - const NameSet needed_key_names_right; /// Substitute NULLs for non-JOINed rows. bool use_nulls; @@ -408,12 +421,20 @@ private: void init(Type type_); /// Throw an exception if blocks have different types of key columns. - void checkTypesOfKeys(const Block & block_left, const Block & block_right) const; + void checkTypesOfKeys(const Block & block_left, const Names & key_names_left, const Block & block_right) const; template - void joinBlockImpl(Block & block, const Maps & maps) const; + void joinBlockImpl( + Block & block, + const Names & key_names_left, + const NameSet & needed_key_names_right, + const Block & block_with_columns_to_add, + const Maps & maps) const; void joinBlockImplCross(Block & block) const; + + template + void joinGetImpl(Block & block, const String & column_name, const Maps & maps) const; }; using JoinPtr = std::shared_ptr; diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 546e8f27843..07520af1cea 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -86,11 +86,11 @@ StoragePtr StorageFactory::get( name = engine_def.name; - if (storage_def->settings && !endsWith(name, "MergeTree") && name != "Kafka") + if (storage_def->settings && !endsWith(name, "MergeTree") && name != "Kafka" && name != "Join") { throw Exception( "Engine " + name + " doesn't support SETTINGS clause. " - "Currently only the MergeTree family of engines and Kafka engine supports it", + "Currently only the MergeTree family of engines, Kafka engine and Join engine support it", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 65e13e05220..3b13764d091 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -1,8 +1,12 @@ #include #include #include +#include #include #include +#include +#include +#include #include /// toLower #include @@ -13,6 +17,7 @@ namespace DB namespace ErrorCodes { + extern const int UNKNOWN_SET_DATA_VARIANT; extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int INCOMPATIBLE_TYPE_OF_JOIN; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; @@ -24,18 +29,23 @@ StorageJoin::StorageJoin( const String & path_, const String & name_, const Names & key_names_, - ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, + bool use_nulls_, + SizeLimits limits_, + ASTTableJoin::Kind kind_, + ASTTableJoin::Strictness strictness_, const ColumnsDescription & columns_) - : StorageSetOrJoinBase{path_, name_, columns_}, - key_names(key_names_), kind(kind_), strictness(strictness_) + : StorageSetOrJoinBase{path_, name_, columns_} + , key_names(key_names_) + , use_nulls(use_nulls_) + , limits(limits_) + , kind(kind_) + , strictness(strictness_) { for (const auto & key : key_names) if (!getColumns().hasPhysical(key)) throw Exception{"Key column (" + key + ") does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE}; - /// NOTE StorageJoin doesn't use join_use_nulls setting. - - join = std::make_shared(key_names, key_names, NameSet(), false /* use_nulls */, SizeLimits(), kind, strictness); + join = std::make_shared(key_names, use_nulls, limits, kind, strictness); join->setSampleBlock(getSampleBlock().sortColumns()); restore(); } @@ -48,7 +58,7 @@ void StorageJoin::truncate(const ASTPtr &) Poco::File(path + "tmp/").createDirectories(); increment = 0; - join = std::make_shared(key_names, key_names, NameSet(), false /* use_nulls */, SizeLimits(), kind, strictness); + join = std::make_shared(key_names, use_nulls, limits, kind, strictness); join->setSampleBlock(getSampleBlock().sortColumns()); } @@ -119,11 +129,237 @@ void registerStorageJoin(StorageFactory & factory) key_names.push_back(key->name); } + auto & settings = args.context.getSettingsRef(); + auto join_use_nulls = settings.join_use_nulls; + auto max_rows_in_join = settings.max_rows_in_join; + auto max_bytes_in_join = settings.max_bytes_in_join; + auto join_overflow_mode = settings.join_overflow_mode; + + if (args.storage_def && args.storage_def->settings) + { + for (const ASTSetQuery::Change & setting : args.storage_def->settings->changes) + { + if (setting.name == "join_use_nulls") join_use_nulls.set(setting.value); + else if (setting.name == "max_rows_in_join") max_rows_in_join.set(setting.value); + else if (setting.name == "max_bytes_in_join") max_bytes_in_join.set(setting.value); + else if (setting.name == "join_overflow_mode") join_overflow_mode.set(setting.value); + else + throw Exception( + "Unknown setting " + setting.name + " for storage " + args.engine_name, + ErrorCodes::BAD_ARGUMENTS); + } + } + return StorageJoin::create( - args.data_path, args.table_name, - key_names, kind, strictness, + args.data_path, + args.table_name, + key_names, + join_use_nulls.value, + SizeLimits{max_rows_in_join.value, max_bytes_in_join.value, join_overflow_mode.value}, + kind, + strictness, args.columns); }); } +template +static const char * rawData(T & t) +{ + return reinterpret_cast(&t); +} +template +static size_t rawSize(T &) +{ + return sizeof(T); +} +template <> +const char * rawData(const StringRef & t) +{ + return t.data; +} +template <> +size_t rawSize(const StringRef & t) +{ + return t.size; +} + +class JoinBlockInputStream : public IProfilingBlockInputStream +{ +public: + JoinBlockInputStream(const Join & parent_, size_t max_block_size_, Block & sample_block_) + : parent(parent_), lock(parent.rwlock), max_block_size(max_block_size_), sample_block(sample_block_) + { + columns.resize(sample_block.columns()); + column_indices.resize(sample_block.columns()); + column_with_null.resize(sample_block.columns()); + for (size_t i = 0; i < sample_block.columns(); ++i) + { + auto & [_, type, name] = sample_block.getByPosition(i); + if (parent.sample_block_with_keys.has(name)) + { + key_pos = i; + column_with_null[i] = parent.sample_block_with_keys.getByName(name).type->isNullable(); + } + else + { + auto pos = parent.sample_block_with_columns_to_add.getPositionByName(name); + column_indices[i] = pos; + column_with_null[i] = !parent.sample_block_with_columns_to_add.getByPosition(pos).type->equals(*type); + } + } + } + + String getName() const override { return "Join"; } + + Block getHeader() const override { return sample_block; } + + +protected: + Block readImpl() override + { + if (parent.blocks.empty()) + return Block(); + + if (parent.strictness == ASTTableJoin::Strictness::Any) + return createBlock(parent.maps_any); + else if (parent.strictness == ASTTableJoin::Strictness::All) + return createBlock(parent.maps_all); + else + throw Exception("Logical error: unknown JOIN strictness (must be ANY or ALL)", ErrorCodes::LOGICAL_ERROR); + } + +private: + const Join & parent; + std::shared_lock lock; + size_t max_block_size; + Block sample_block; + + ColumnNumbers column_indices; + std::vector column_with_null; + std::optional key_pos; + MutableColumns columns; + + std::unique_ptr> position; /// type erasure + + + template + Block createBlock(const Maps & maps) + { + for (size_t i = 0; i < sample_block.columns(); ++i) + { + const auto & src_col = sample_block.safeGetByPosition(i); + columns[i] = src_col.type->createColumn(); + if (column_with_null[i]) + { + if (key_pos == i) + { + // unwrap null key column + ColumnNullable & nullable_col = static_cast(*columns[i]); + columns[i] = nullable_col.getNestedColumnPtr()->assumeMutable(); + } + else + // wrap non key column with null + columns[i] = makeNullable(std::move(columns[i]))->assumeMutable(); + } + } + + size_t rows_added = 0; + + switch (parent.type) + { +#define M(TYPE) \ + case Join::Type::TYPE: \ + rows_added = fillColumns(*maps.TYPE); \ + break; + APPLY_FOR_JOIN_VARIANTS_LIMITED(M) +#undef M + + default: + throw Exception("Unknown JOIN keys variant for limited use", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); + } + + if (!rows_added) + return {}; + + Block res = sample_block.cloneEmpty(); + for (size_t i = 0; i < columns.size(); ++i) + if (column_with_null[i]) + { + if (key_pos == i) + res.getByPosition(i).column = makeNullable(std::move(columns[i]))->assumeMutable(); + else + { + const ColumnNullable & nullable_col = static_cast(*columns[i]); + res.getByPosition(i).column = nullable_col.getNestedColumnPtr(); + } + } + else + res.getByPosition(i).column = std::move(columns[i]); + + return res; + } + + + template + size_t fillColumns(const Map & map) + { + size_t rows_added = 0; + + if (!position) + position = decltype(position)( + static_cast(new typename Map::const_iterator(map.begin())), + [](void * ptr) { delete reinterpret_cast(ptr); }); + + auto & it = *reinterpret_cast(position.get()); + auto end = map.end(); + + for (; it != end; ++it) + { + if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) + { + for (size_t j = 0; j < columns.size(); ++j) + if (j == key_pos) + columns[j]->insertData(rawData(it->first), rawSize(it->first)); + else + columns[j]->insertFrom(*it->second.block->getByPosition(column_indices[j]).column.get(), it->second.row_num); + ++rows_added; + } + else + for (auto current = &static_cast(it->second); current != nullptr; + current = current->next) + { + for (size_t j = 0; j < columns.size(); ++j) + if (j == key_pos) + columns[j]->insertData(rawData(it->first), rawSize(it->first)); + else + columns[j]->insertFrom(*current->block->getByPosition(column_indices[j]).column.get(), current->row_num); + ++rows_added; + } + + if (rows_added >= max_block_size) + { + ++it; + break; + } + } + + return rows_added; + } +}; + + +// TODO: multiple stream read and index read +BlockInputStreams StorageJoin::read( + const Names & column_names, + const SelectQueryInfo & /*query_info*/, + const Context & /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + unsigned /*num_streams*/) +{ + check(column_names); + Block sample_block = getSampleBlockForColumns(column_names); + return {std::make_shared(*join, max_block_size, sample_block)}; +} + } diff --git a/dbms/src/Storages/StorageJoin.h b/dbms/src/Storages/StorageJoin.h index 34bda5cd8ac..25c5128a349 100644 --- a/dbms/src/Storages/StorageJoin.h +++ b/dbms/src/Storages/StorageJoin.h @@ -33,8 +33,19 @@ public: /// Verify that the data structure is suitable for implementing this type of JOIN. void assertCompatible(ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_) const; + BlockInputStreams read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + private: + Block sample_block; const Names & key_names; + bool use_nulls; + SizeLimits limits; ASTTableJoin::Kind kind; /// LEFT | INNER ... ASTTableJoin::Strictness strictness; /// ANY | ALL @@ -48,6 +59,8 @@ protected: const String & path_, const String & name_, const Names & key_names_, + bool use_nulls_, + SizeLimits limits_, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, const ColumnsDescription & columns_); }; diff --git a/dbms/tests/queries/0_stateless/00800_versatile_storage_join.reference b/dbms/tests/queries/0_stateless/00800_versatile_storage_join.reference new file mode 100644 index 00000000000..1fa9ac74e57 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_versatile_storage_join.reference @@ -0,0 +1,19 @@ +--------read-------- +def [1,2] 2 +abc [0] 1 +def [1,2] 2 +abc [0] 1 +def [1,2] 2 +abc [0] 1 +def [1,2] 2 +abc [0] 1 +--------joinGet-------- + +abc +def + +\N +abc +def + +[0] 1 diff --git a/dbms/tests/queries/0_stateless/00800_versatile_storage_join.sql b/dbms/tests/queries/0_stateless/00800_versatile_storage_join.sql new file mode 100644 index 00000000000..80f7616766c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_versatile_storage_join.sql @@ -0,0 +1,51 @@ +DROP TABLE IF EXISTS test.join_any_inner; +DROP TABLE IF EXISTS test.join_any_left; +DROP TABLE IF EXISTS test.join_any_left_null; +DROP TABLE IF EXISTS test.join_all_inner; +DROP TABLE IF EXISTS test.join_all_left; +DROP TABLE IF EXISTS test.join_string_key; + +CREATE TABLE test.join_any_inner (s String, x Array(UInt8), k UInt64) ENGINE = Join(ANY, INNER, k); +CREATE TABLE test.join_any_left (s String, x Array(UInt8), k UInt64) ENGINE = Join(ANY, LEFT, k); +CREATE TABLE test.join_all_inner (s String, x Array(UInt8), k UInt64) ENGINE = Join(ALL, INNER, k); +CREATE TABLE test.join_all_left (s String, x Array(UInt8), k UInt64) ENGINE = Join(ALL, LEFT, k); + +USE test; + +INSERT INTO test.join_any_inner VALUES ('abc', [0], 1), ('def', [1, 2], 2); +INSERT INTO test.join_any_left VALUES ('abc', [0], 1), ('def', [1, 2], 2); +INSERT INTO test.join_all_inner VALUES ('abc', [0], 1), ('def', [1, 2], 2); +INSERT INTO test.join_all_left VALUES ('abc', [0], 1), ('def', [1, 2], 2); + +-- read from StorageJoin + +SELECT '--------read--------'; +SELECT * from test.join_any_inner; +SELECT * from test.join_any_left; +SELECT * from test.join_all_inner; +SELECT * from test.join_all_left; + +-- create StorageJoin tables with customized settings + +CREATE TABLE test.join_any_left_null (s String, k UInt64) ENGINE = Join(ANY, LEFT, k) SETTINGS join_use_nulls = 1; +INSERT INTO test.join_any_left_null VALUES ('abc', 1), ('def', 2); + +-- joinGet +SELECT '--------joinGet--------'; +SELECT joinGet('join_any_left', 's', number) FROM numbers(3); +SELECT ''; +SELECT joinGet('join_any_left_null', 's', number) FROM numbers(3); +SELECT ''; + +CREATE TABLE test.join_string_key (s String, x Array(UInt8), k UInt64) ENGINE = Join(ANY, LEFT, s); +INSERT INTO test.join_string_key VALUES ('abc', [0], 1), ('def', [1, 2], 2); +SELECT joinGet('join_string_key', 'x', 'abc'), joinGet('join_string_key', 'k', 'abc'); + +USE default; + +DROP TABLE test.join_any_inner; +DROP TABLE test.join_any_left; +DROP TABLE test.join_any_left_null; +DROP TABLE test.join_all_inner; +DROP TABLE test.join_all_left; +DROP TABLE test.join_string_key; From 933c055104dd224aca2ec31bb3c5253494de58ab Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 4 Dec 2018 17:09:47 +0300 Subject: [PATCH 50/88] CLICKHOUSE-3578 review proress --- .../src/DataStreams/AddingDefaultsBlockInputStream.cpp | 10 ++++------ dbms/src/Formats/BinaryRowInputStream.cpp | 2 +- dbms/src/Formats/BinaryRowInputStream.h | 2 +- .../src/Formats/BlockInputStreamFromRowInputStream.cpp | 8 ++++---- dbms/src/Formats/BlockInputStreamFromRowInputStream.h | 4 ++-- dbms/src/Formats/CSVRowInputStream.cpp | 2 +- dbms/src/Formats/CSVRowInputStream.h | 2 +- dbms/src/Formats/CapnProtoRowInputStream.cpp | 2 +- dbms/src/Formats/CapnProtoRowInputStream.h | 2 +- dbms/src/Formats/IRowInputStream.h | 5 ++--- dbms/src/Formats/JSONEachRowRowInputStream.cpp | 7 +------ dbms/src/Formats/JSONEachRowRowInputStream.h | 3 +-- dbms/src/Formats/TSKVRowInputStream.cpp | 2 +- dbms/src/Formats/TSKVRowInputStream.h | 2 +- dbms/src/Formats/TabSeparatedRowInputStream.cpp | 2 +- dbms/src/Formats/TabSeparatedRowInputStream.h | 2 +- dbms/src/Formats/ValuesRowInputStream.cpp | 2 +- dbms/src/Formats/ValuesRowInputStream.h | 2 +- dbms/src/Interpreters/evaluateMissingDefaults.cpp | 5 ++--- dbms/src/Interpreters/evaluateMissingDefaults.h | 3 ++- dbms/src/Storages/ColumnDefault.cpp | 2 -- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- 22 files changed, 32 insertions(+), 42 deletions(-) diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 89202ea9ecd..9ee6b15e1b9 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -51,17 +51,15 @@ Block AddingDefaultsBlockInputStream::readImpl() if (column_defaults.empty()) return res; - const BlockMissingValues & delayed_defaults = children.back()->getMissingValues(); - if (delayed_defaults.empty()) + const BlockMissingValues & block_missing_values = children.back()->getMissingValues(); + if (block_missing_values.empty()) return res; Block evaluate_block{res}; + /// remove columns for recalculation for (const auto & column : column_defaults) - { - /// column_defaults contain aliases that could be ommited in evaluate_block if (evaluate_block.has(column.first)) evaluate_block.erase(column.first); - } evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), column_defaults, context, false); @@ -76,7 +74,7 @@ Block AddingDefaultsBlockInputStream::readImpl() size_t block_column_position = res.getPositionByName(column_name); ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position); - const auto & defaults_mask = delayed_defaults.getDefaultsBitmask(block_column_position); + const auto & defaults_mask = block_missing_values.getDefaultsBitmask(block_column_position); checkCalculated(column_read, column_def, defaults_mask.size()); diff --git a/dbms/src/Formats/BinaryRowInputStream.cpp b/dbms/src/Formats/BinaryRowInputStream.cpp index eea6dce12d5..7c059782e6d 100644 --- a/dbms/src/Formats/BinaryRowInputStream.cpp +++ b/dbms/src/Formats/BinaryRowInputStream.cpp @@ -14,7 +14,7 @@ BinaryRowInputStream::BinaryRowInputStream(ReadBuffer & istr_, const Block & hea } -bool BinaryRowInputStream::read(MutableColumns & columns) +bool BinaryRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/BinaryRowInputStream.h b/dbms/src/Formats/BinaryRowInputStream.h index 6e4d2b9c2ce..f70e081f097 100644 --- a/dbms/src/Formats/BinaryRowInputStream.h +++ b/dbms/src/Formats/BinaryRowInputStream.h @@ -17,7 +17,7 @@ class BinaryRowInputStream : public IRowInputStream public: BinaryRowInputStream(ReadBuffer & istr_, const Block & header_); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; private: ReadBuffer & istr; diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp index 1440375eb14..89dc575dcb9 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.cpp @@ -53,7 +53,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() { size_t num_columns = sample.columns(); MutableColumns columns = sample.cloneEmptyColumns(); - delayed_defaults.clear(); + block_missing_values.clear(); try { @@ -62,8 +62,8 @@ Block BlockInputStreamFromRowInputStream::readImpl() try { ++total_rows; - RowReadExtention info; - if (!row_input->extendedRead(columns, info)) + RowReadExtension info; + if (!row_input->read(columns, info)) break; for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx) @@ -73,7 +73,7 @@ Block BlockInputStreamFromRowInputStream::readImpl() size_t column_size = columns[column_idx]->size(); if (column_size == 0) throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); - delayed_defaults.setBit(column_idx, column_size - 1); + block_missing_values.setBit(column_idx, column_size - 1); } } } diff --git a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h index 65d6fa04469..fcbec628e09 100644 --- a/dbms/src/Formats/BlockInputStreamFromRowInputStream.h +++ b/dbms/src/Formats/BlockInputStreamFromRowInputStream.h @@ -33,7 +33,7 @@ public: Block getHeader() const override { return sample; } - const BlockMissingValues & getMissingValues() const override { return delayed_defaults; } + const BlockMissingValues & getMissingValues() const override { return block_missing_values; } protected: Block readImpl() override; @@ -42,7 +42,7 @@ private: RowInputStreamPtr row_input; Block sample; size_t max_block_size; - BlockMissingValues delayed_defaults; + BlockMissingValues block_missing_values; UInt64 allow_errors_num; Float64 allow_errors_ratio; diff --git a/dbms/src/Formats/CSVRowInputStream.cpp b/dbms/src/Formats/CSVRowInputStream.cpp index ca8f9514312..d7f8c6eea28 100644 --- a/dbms/src/Formats/CSVRowInputStream.cpp +++ b/dbms/src/Formats/CSVRowInputStream.cpp @@ -111,7 +111,7 @@ void CSVRowInputStream::readPrefix() } -bool CSVRowInputStream::read(MutableColumns & columns) +bool CSVRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/CSVRowInputStream.h b/dbms/src/Formats/CSVRowInputStream.h index d7f8d96867f..c04bda57008 100644 --- a/dbms/src/Formats/CSVRowInputStream.h +++ b/dbms/src/Formats/CSVRowInputStream.h @@ -21,7 +21,7 @@ public: */ CSVRowInputStream(ReadBuffer & istr_, const Block & header_, bool with_names_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; void readPrefix() override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 8ed9d882a2e..d8d87f082ed 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -193,7 +193,7 @@ CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block } -bool CapnProtoRowInputStream::read(MutableColumns & columns) +bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/CapnProtoRowInputStream.h b/dbms/src/Formats/CapnProtoRowInputStream.h index a7fcce49143..a6186d7488d 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.h +++ b/dbms/src/Formats/CapnProtoRowInputStream.h @@ -34,7 +34,7 @@ public: */ CapnProtoRowInputStream(ReadBuffer & istr_, const Block & header_, const String & schema_dir, const String & schema_file, const String & root_object); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; private: // Build a traversal plan from a sorted list of fields diff --git a/dbms/src/Formats/IRowInputStream.h b/dbms/src/Formats/IRowInputStream.h index ccfbdeb2b40..65b98d2524f 100644 --- a/dbms/src/Formats/IRowInputStream.h +++ b/dbms/src/Formats/IRowInputStream.h @@ -11,7 +11,7 @@ namespace DB { /// A way to set some extentions to read and return extra information too. IRowInputStream.extendedRead() output. -struct RowReadExtention +struct RowReadExtension { /// IRowInputStream.extendedRead() output value. /// Contains one bit per column in resently read row. IRowInputStream could leave it empty, or partialy set. @@ -27,8 +27,7 @@ public: /** Read next row and append it to the columns. * If no more rows - return false. */ - virtual bool read(MutableColumns & columns) = 0; - virtual bool extendedRead(MutableColumns & columns, RowReadExtention & ) { return read(columns); } + virtual bool read(MutableColumns & columns, RowReadExtension & extra) = 0; virtual void readPrefix() {} /// delimiter before begin of result virtual void readSuffix() {} /// delimiter after end of result diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.cpp b/dbms/src/Formats/JSONEachRowRowInputStream.cpp index cd5ec22ea32..ad67b7a8101 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.cpp +++ b/dbms/src/Formats/JSONEachRowRowInputStream.cpp @@ -209,13 +209,8 @@ void JSONEachRowRowInputStream::readNestedData(const String & name, MutableColum nested_prefix_length = 0; } -bool JSONEachRowRowInputStream::read(MutableColumns & columns) -{ - RowReadExtention tmp; - return extendedRead(columns, tmp); -} -bool JSONEachRowRowInputStream::extendedRead(MutableColumns & columns, RowReadExtention & ext) +bool JSONEachRowRowInputStream::read(MutableColumns & columns, RowReadExtension & ext) { skipWhitespaceIfAny(istr); diff --git a/dbms/src/Formats/JSONEachRowRowInputStream.h b/dbms/src/Formats/JSONEachRowRowInputStream.h index 4a64bf30829..4a915d6aa9d 100644 --- a/dbms/src/Formats/JSONEachRowRowInputStream.h +++ b/dbms/src/Formats/JSONEachRowRowInputStream.h @@ -22,8 +22,7 @@ class JSONEachRowRowInputStream : public IRowInputStream public: JSONEachRowRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; - bool extendedRead(MutableColumns & columns, RowReadExtention & ext) override; + bool read(MutableColumns & columns, RowReadExtension & ext) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Formats/TSKVRowInputStream.cpp b/dbms/src/Formats/TSKVRowInputStream.cpp index 56f460dafac..837dfb5afaa 100644 --- a/dbms/src/Formats/TSKVRowInputStream.cpp +++ b/dbms/src/Formats/TSKVRowInputStream.cpp @@ -88,7 +88,7 @@ static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp) } -bool TSKVRowInputStream::read(MutableColumns & columns) +bool TSKVRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/TSKVRowInputStream.h b/dbms/src/Formats/TSKVRowInputStream.h index b05686dc37a..155322e90c1 100644 --- a/dbms/src/Formats/TSKVRowInputStream.h +++ b/dbms/src/Formats/TSKVRowInputStream.h @@ -25,7 +25,7 @@ class TSKVRowInputStream : public IRowInputStream public: TSKVRowInputStream(ReadBuffer & istr_, const Block & header_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Formats/TabSeparatedRowInputStream.cpp b/dbms/src/Formats/TabSeparatedRowInputStream.cpp index b843c14bd66..181f29113db 100644 --- a/dbms/src/Formats/TabSeparatedRowInputStream.cpp +++ b/dbms/src/Formats/TabSeparatedRowInputStream.cpp @@ -75,7 +75,7 @@ static void checkForCarriageReturn(ReadBuffer & istr) } -bool TabSeparatedRowInputStream::read(MutableColumns & columns) +bool TabSeparatedRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; diff --git a/dbms/src/Formats/TabSeparatedRowInputStream.h b/dbms/src/Formats/TabSeparatedRowInputStream.h index e1c51251009..2435d58d703 100644 --- a/dbms/src/Formats/TabSeparatedRowInputStream.h +++ b/dbms/src/Formats/TabSeparatedRowInputStream.h @@ -22,7 +22,7 @@ public: TabSeparatedRowInputStream( ReadBuffer & istr_, const Block & header_, bool with_names_, bool with_types_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; void readPrefix() override; bool allowSyncAfterError() const override { return true; } void syncAfterError() override; diff --git a/dbms/src/Formats/ValuesRowInputStream.cpp b/dbms/src/Formats/ValuesRowInputStream.cpp index 2dd27ce8df7..eaa8181d185 100644 --- a/dbms/src/Formats/ValuesRowInputStream.cpp +++ b/dbms/src/Formats/ValuesRowInputStream.cpp @@ -37,7 +37,7 @@ ValuesRowInputStream::ValuesRowInputStream(ReadBuffer & istr_, const Block & hea } -bool ValuesRowInputStream::read(MutableColumns & columns) +bool ValuesRowInputStream::read(MutableColumns & columns, RowReadExtension &) { size_t num_columns = columns.size(); diff --git a/dbms/src/Formats/ValuesRowInputStream.h b/dbms/src/Formats/ValuesRowInputStream.h index 49775861746..372619d4e27 100644 --- a/dbms/src/Formats/ValuesRowInputStream.h +++ b/dbms/src/Formats/ValuesRowInputStream.h @@ -23,7 +23,7 @@ public: */ ValuesRowInputStream(ReadBuffer & istr_, const Block & header_, const Context & context_, const FormatSettings & format_settings); - bool read(MutableColumns & columns) override; + bool read(MutableColumns & columns, RowReadExtension &) override; private: ReadBuffer & istr; diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 40b75c0b673..050078b7af4 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -37,7 +37,7 @@ static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & requi void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const ColumnDefaults & column_defaults, - const Context & context, bool with_block_copy) + const Context & context, bool save_unneded_columns) { if (column_defaults.empty()) return; @@ -46,7 +46,7 @@ void evaluateMissingDefaults(Block & block, if (!default_expr_list) return; - if (!with_block_copy) + if (!save_unneded_columns) { auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); @@ -56,7 +56,6 @@ void evaluateMissingDefaults(Block & block, /** ExpressionAnalyzer eliminates "unused" columns, in order to ensure their safety * we are going to operate on a copy instead of the original block */ Block copy_block{block}; - /// evaluate default values for defaulted columns auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(copy_block); diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.h b/dbms/src/Interpreters/evaluateMissingDefaults.h index 71f6fab9753..c65cb1680a2 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.h +++ b/dbms/src/Interpreters/evaluateMissingDefaults.h @@ -12,9 +12,10 @@ class Context; class NamesAndTypesList; struct ColumnDefault; +/// void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const std::unordered_map & column_defaults, - const Context & context, bool with_block_copy = true); + const Context & context, bool save_unneded_columns = true); } diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index cd79c5ca093..2a89cda5caf 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -1,5 +1,3 @@ -#include - #include #include #include diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 524b8bfe8bf..0334e79f541 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2173,7 +2173,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context ValuesRowInputStream input_stream(buf, partition_key_sample, context, format_settings); MutableColumns columns = partition_key_sample.cloneEmptyColumns(); - if (!input_stream.read(columns)) + RowReadExtension unused; + if (!input_stream.read(columns, unused)) throw Exception( "Could not parse partition value: `" + partition_ast.fields_str.toString() + "`", ErrorCodes::INVALID_PARTITION_VALUE); From a71d03737c18efb89fef49a34275a844aef5e6b1 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 4 Dec 2018 23:03:04 +0300 Subject: [PATCH 51/88] send defaults via serialized ColumnsDescription CLICKHOUSE-3578 --- dbms/programs/client/Client.cpp | 24 ++++--- dbms/programs/server/TCPHandler.cpp | 23 +++++-- dbms/programs/server/TCPHandler.h | 1 + dbms/src/Client/Connection.cpp | 14 ++++ dbms/src/Client/Connection.h | 4 ++ dbms/src/Core/Protocol.h | 20 +++++- .../InputStreamFromASTInsertQuery.cpp | 8 +-- .../Interpreters/evaluateMissingDefaults.cpp | 4 +- .../Interpreters/evaluateMissingDefaults.h | 3 +- dbms/src/Storages/ColumnDefault.cpp | 69 ------------------- dbms/src/Storages/ColumnDefault.h | 9 --- dbms/src/Storages/ColumnsDescription.cpp | 18 +++++ dbms/src/Storages/ColumnsDescription.h | 1 + 13 files changed, 93 insertions(+), 105 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index e6064ec8860..407612257a7 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -60,7 +60,7 @@ #include #include #include -#include +#include #if USE_READLINE #include "Suggest.h" // Y_IGNORE @@ -893,11 +893,12 @@ private: /// Receive description of table structure. Block sample; - if (receiveSampleBlock(sample)) + ColumnsDescription columns_description; + if (receiveSampleBlock(sample, columns_description)) { /// If structure was received (thus, server has not thrown an exception), /// send our data with that structure. - sendData(sample); + sendData(sample, columns_description); receiveEndOfQuery(); } } @@ -935,7 +936,7 @@ private: } - void sendData(Block & sample) + void sendData(Block & sample, const ColumnsDescription & columns_description) { /// If INSERT data must be sent. const ASTInsertQuery * parsed_insert_query = typeid_cast(&*parsed_query); @@ -946,19 +947,19 @@ private: { /// Send data contained in the query. ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); - sendDataFrom(data_in, sample); + sendDataFrom(data_in, sample, columns_description); } else if (!is_interactive) { /// Send data read from stdin. - sendDataFrom(std_in, sample); + sendDataFrom(std_in, sample, columns_description); } else throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); } - void sendDataFrom(ReadBuffer & buf, Block & sample) + void sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDescription & columns_description) { String current_format = insert_format; @@ -970,9 +971,10 @@ private: BlockInputStreamPtr block_input = context.getInputFormat( current_format, buf, sample, insert_format_max_block_size); - auto column_defaults = ColumnDefaultsHelper::extract(sample); + const auto & column_defaults = columns_description.defaults; if (!column_defaults.empty()) block_input = std::make_shared(block_input, column_defaults, context); + BlockInputStreamPtr async_block_input = std::make_shared(block_input); async_block_input->readPrefix(); @@ -1110,7 +1112,7 @@ private: /// Receive the block that serves as an example of the structure of table where data will be inserted. - bool receiveSampleBlock(Block & out) + bool receiveSampleBlock(Block & out, ColumnsDescription & columns_description) { while (true) { @@ -1131,6 +1133,10 @@ private: onLogData(packet.block); break; + case Protocol::Server::TableColumns: + columns_description = ColumnsDescription::parse(packet.multistring_message[1]); + return receiveSampleBlock(out, columns_description); + default: throw NetException("Unexpected packet from server (expected Data, Exception or Log, got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index d1ccc3e788f..efd4ffc55d6 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -360,17 +360,16 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) */ state.io.out->writePrefix(); - /// Send block to the client - table structure. - Block block = state.io.out->getHeader(); - - /// attach column defaults to sample block (allow client to attach defaults for ommited source values) + /// Send ColumnsDescription for insertion table if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA) { - auto db_and_table = query_context.getInsertionTable(); - ColumnDefaults column_defaults = ColumnDefaultsHelper::loadFromContext(query_context, db_and_table.first, db_and_table.second); - ColumnDefaultsHelper::attach(column_defaults, block); + const auto & db_and_table = query_context.getInsertionTable(); + if (auto * columns = ColumnsDescription::loadFromContext(query_context, db_and_table.first, db_and_table.second)) + sendTableColumns(*columns); } + /// Send block to the client - table structure. + Block block = state.io.out->getHeader(); sendData(block); readData(global_settings); @@ -853,6 +852,16 @@ void TCPHandler::sendLogData(const Block & block) out->next(); } +void TCPHandler::sendTableColumns(const ColumnsDescription & columns) +{ + writeVarUInt(Protocol::Server::TableColumns, *out); + + /// Send external table name (empty name is the main table) + writeStringBinary("", *out); + writeStringBinary(columns.toString(), *out); + + out->next(); +} void TCPHandler::sendException(const Exception & e, bool with_stack_trace) { diff --git a/dbms/programs/server/TCPHandler.h b/dbms/programs/server/TCPHandler.h index af422921f07..14189da6176 100644 --- a/dbms/programs/server/TCPHandler.h +++ b/dbms/programs/server/TCPHandler.h @@ -144,6 +144,7 @@ private: void sendHello(); void sendData(const Block & block); /// Write a block to the network. void sendLogData(const Block & block); + void sendTableColumns(const ColumnsDescription & columns); void sendException(const Exception & e, bool with_stack_trace); void sendProgress(); void sendLogs(); diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index ce6246fba3a..50c5ca2cebc 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -604,6 +604,10 @@ Connection::Packet Connection::receivePacket() res.block = receiveLogData(); return res; + case Protocol::Server::TableColumns: + res.multistring_message = receiveMultistringMessage(res.type); + return res; + case Protocol::Server::EndOfStream: return res; @@ -713,6 +717,16 @@ std::unique_ptr Connection::receiveException() } +std::vector Connection::receiveMultistringMessage(UInt64 msg_type) +{ + size_t num = Protocol::Server::wordsInMessage(msg_type); + std::vector out(num); + for (size_t i = 0; i < num; ++i) + readStringBinary(out[i], *in); + return out; +} + + Progress Connection::receiveProgress() { //LOG_TRACE(log_wrapper.get(), "Receiving progress"); diff --git a/dbms/src/Client/Connection.h b/dbms/src/Client/Connection.h index d8229fc3463..27b7d6bd4d8 100644 --- a/dbms/src/Client/Connection.h +++ b/dbms/src/Client/Connection.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include @@ -96,6 +98,7 @@ public: Block block; std::unique_ptr exception; + std::vector multistring_message; Progress progress; BlockStreamProfileInfo profile_info; @@ -259,6 +262,7 @@ private: Block receiveLogData(); Block receiveDataImpl(BlockInputStreamPtr & stream); + std::vector receiveMultistringMessage(UInt64 msg_type); std::unique_ptr receiveException(); Progress receiveProgress(); BlockStreamProfileInfo receiveProfileInfo(); diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index 27df4341de9..28f60cce901 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -69,7 +69,8 @@ namespace Protocol Totals = 7, /// A block with totals (compressed or not). Extremes = 8, /// A block with minimums and maximums (compressed or not). TablesStatusResponse = 9, /// A response to TablesStatus request. - Log = 10 /// System logs of the query execution + Log = 10, /// System logs of the query execution + TableColumns = 11, /// Columns' description for default values calculation }; /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 @@ -78,11 +79,24 @@ namespace Protocol /// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values inline const char * toString(UInt64 packet) { - static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", "Extremes", "TablesStatusResponse", "Log" }; - return packet < 11 + static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", + "Extremes", "TablesStatusResponse", "Log", "TableColumns" }; + return packet < 12 ? data[packet] : "Unknown packet"; } + + inline size_t wordsInMessage(UInt64 msg_type) + { + switch (msg_type) + { + case TableColumns: + return 2; + default: + break; + } + return 0; + } } /// Packet types that client transmits. diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index d232fee96ce..b78b7a59db6 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { @@ -46,9 +46,9 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); - auto column_defaults = ColumnDefaultsHelper::loadFromContext(context, ast_insert_query->database, ast_insert_query->table); - if (!column_defaults.empty()) - res_stream = std::make_shared(res_stream, column_defaults, context); + auto columns_description = ColumnsDescription::loadFromContext(context, ast_insert_query->database, ast_insert_query->table); + if (columns_description && !columns_description->defaults.empty()) + res_stream = std::make_shared(res_stream, columns_description->defaults, context); } } diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 050078b7af4..0b330fb00cc 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -37,7 +37,7 @@ static ASTPtr requiredExpressions(Block & block, const NamesAndTypesList & requi void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const ColumnDefaults & column_defaults, - const Context & context, bool save_unneded_columns) + const Context & context, bool save_unneeded_columns) { if (column_defaults.empty()) return; @@ -46,7 +46,7 @@ void evaluateMissingDefaults(Block & block, if (!default_expr_list) return; - if (!save_unneded_columns) + if (!save_unneeded_columns) { auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.h b/dbms/src/Interpreters/evaluateMissingDefaults.h index c65cb1680a2..320fb35c9cb 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.h +++ b/dbms/src/Interpreters/evaluateMissingDefaults.h @@ -12,10 +12,9 @@ class Context; class NamesAndTypesList; struct ColumnDefault; -/// void evaluateMissingDefaults(Block & block, const NamesAndTypesList & required_columns, const std::unordered_map & column_defaults, - const Context & context, bool save_unneded_columns = true); + const Context & context, bool save_unneeded_columns = true); } diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index 2a89cda5caf..46995e307f3 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -54,73 +54,4 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs) return lhs.kind == rhs.kind && queryToString(lhs.expression) == queryToString(rhs.expression); } -ColumnDefaults ColumnDefaultsHelper::loadFromContext(const Context & context, const String & database, const String & table) -{ - if (context.getSettingsRef().insert_sample_with_metadata) - { - if (!context.isTableExist(database, table)) - return {}; - - StoragePtr storage = context.getTable(database, table); - const ColumnsDescription & table_columns = storage->getColumns(); - return table_columns.defaults; - } - return {}; -} - -void ColumnDefaultsHelper::attach(const ColumnDefaults & column_defaults, Block & sample) -{ - if (column_defaults.empty()) - return; - - for (auto pr : column_defaults) - { - std::stringstream ss; - ss << *pr.second.expression; - - /// Serialize defaults to special columns names. - /// It looks better to send expression as a column data but sample block has 0 rows. - ColumnWithTypeAndName col; - col.type = std::make_shared(); - col.name = Block::mkSpecialColumnName(toString(pr.second.kind) + ' ' + pr.first + ' ' + ss.str()); - col.column = col.type->createColumnConst(sample.rows(), ""); - - sample.insert(std::move(col)); - } -} - -ColumnDefaults ColumnDefaultsHelper::extract(Block & sample) -{ - ParserTernaryOperatorExpression parser; - ColumnDefaults column_defaults; - std::set pos_to_erase; - - for (size_t i = 0; i < sample.columns(); ++i) - { - const ColumnWithTypeAndName & column_wtn = sample.safeGetByPosition(i); - - if (Block::isSpecialColumnName(column_wtn.name, AliasNames::DEFAULT) || - Block::isSpecialColumnName(column_wtn.name, AliasNames::MATERIALIZED) || - Block::isSpecialColumnName(column_wtn.name, AliasNames::ALIAS)) - { - String str_kind, column_name; - std::stringstream ss; - ss << column_wtn.name; - ss >> str_kind >> column_name; - size_t expression_pos = str_kind.size() + column_name.size() + 3; - StringRef expression(&column_wtn.name[expression_pos], column_wtn.name.size() - expression_pos); - - ColumnDefault def; - def.kind = columnDefaultKindFromString(str_kind); - def.expression = parseQuery(parser, expression.data, expression.size); - - column_defaults.emplace(column_name, def); - pos_to_erase.insert(i); - } - } - - sample.erase(pos_to_erase); - return column_defaults; -} - } diff --git a/dbms/src/Storages/ColumnDefault.h b/dbms/src/Storages/ColumnDefault.h index 00693b54ad5..292c0cf7495 100644 --- a/dbms/src/Storages/ColumnDefault.h +++ b/dbms/src/Storages/ColumnDefault.h @@ -36,13 +36,4 @@ bool operator==(const ColumnDefault & lhs, const ColumnDefault & rhs); using ColumnDefaults = std::unordered_map; -/// Static methods to manipulate column defaults -struct ColumnDefaultsHelper -{ - static void attach(const ColumnDefaults & column_defaults, Block & sample); - static ColumnDefaults extract(Block & sample); - - static ColumnDefaults loadFromContext(const Context & context, const String & database, const String & table); -}; - } diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index cb67d01a4ea..c37eaa2fc46 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include @@ -162,4 +164,20 @@ ColumnsDescription ColumnsDescription::parse(const String & str) return result; } +const ColumnsDescription * ColumnsDescription::loadFromContext(const Context & context, const String & db, const String & table) +{ + if (context.getSettingsRef().insert_sample_with_metadata) + { + auto db_and_table = context.getInsertionTable(); + + if (context.isTableExist(db, table)) + { + StoragePtr storage = context.getTable(db, table); + return &storage->getColumns(); + } + } + + return nullptr; +} + } diff --git a/dbms/src/Storages/ColumnsDescription.h b/dbms/src/Storages/ColumnsDescription.h index 288d2712b3b..f06a9221dfd 100644 --- a/dbms/src/Storages/ColumnsDescription.h +++ b/dbms/src/Storages/ColumnsDescription.h @@ -57,6 +57,7 @@ struct ColumnsDescription String toString() const; static ColumnsDescription parse(const String & str); + static const ColumnsDescription * loadFromContext(const Context & context, const String & db, const String & table); }; } From dcb003bebff233fb85551ca2412dc33457713857 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 4 Dec 2018 23:10:43 +0300 Subject: [PATCH 52/88] fix for review: rename variable CLICKHOUSE-3578 --- dbms/src/Core/BlockInfo.cpp | 6 +++--- dbms/src/Core/BlockInfo.h | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/src/Core/BlockInfo.cpp b/dbms/src/Core/BlockInfo.cpp index 3c8d1ccef7c..aae9723d0ed 100644 --- a/dbms/src/Core/BlockInfo.cpp +++ b/dbms/src/Core/BlockInfo.cpp @@ -60,7 +60,7 @@ void BlockInfo::read(ReadBuffer & in) void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) { - RowsBitMask & mask = columns_defaults[column_idx]; + RowsBitMask & mask = rows_mask_by_column_id[column_idx]; mask.resize(row_idx + 1); mask[row_idx] = true; } @@ -68,8 +68,8 @@ void BlockMissingValues::setBit(size_t column_idx, size_t row_idx) const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const { static RowsBitMask none; - auto it = columns_defaults.find(column_idx); - if (it != columns_defaults.end()) + auto it = rows_mask_by_column_id.find(column_idx); + if (it != rows_mask_by_column_id.end()) return it->second; return none; } diff --git a/dbms/src/Core/BlockInfo.h b/dbms/src/Core/BlockInfo.h index 9e23de688af..32a09d8cf70 100644 --- a/dbms/src/Core/BlockInfo.h +++ b/dbms/src/Core/BlockInfo.h @@ -53,16 +53,16 @@ public: const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; void setBit(size_t column_idx, size_t row_idx); - bool empty() const { return columns_defaults.empty(); } - size_t size() const { return columns_defaults.size(); } - void clear() { columns_defaults.clear(); } + bool empty() const { return rows_mask_by_column_id.empty(); } + size_t size() const { return rows_mask_by_column_id.size(); } + void clear() { rows_mask_by_column_id.clear(); } private: using RowsMaskByColumnId = std::unordered_map; - /// If columns_defaults[column_id][row_id] is true related value in Block should be replaced with column default. + /// If rows_mask_by_column_id[column_id][row_id] is true related value in Block should be replaced with column default. /// It could contain less columns and rows then related block. - RowsMaskByColumnId columns_defaults; + RowsMaskByColumnId rows_mask_by_column_id; }; } From 1a7313eaa009f2bc149a7c18f87eb551478f103e Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 5 Dec 2018 15:27:21 +0300 Subject: [PATCH 53/88] resolve class name conflict --- ...lockInputStream.cpp => AddingMissedBlockInputStream.cpp} | 6 +++--- ...ultBlockInputStream.h => AddingMissedBlockInputStream.h} | 4 ++-- dbms/src/DataStreams/IBlockInputStream.h | 6 ++++++ dbms/src/Storages/StorageBuffer.cpp | 4 ++-- 4 files changed, 13 insertions(+), 7 deletions(-) rename dbms/src/DataStreams/{AddingDefaultBlockInputStream.cpp => AddingMissedBlockInputStream.cpp} (75%) rename dbms/src/DataStreams/{AddingDefaultBlockInputStream.h => AddingMissedBlockInputStream.h} (90%) diff --git a/dbms/src/DataStreams/AddingDefaultBlockInputStream.cpp b/dbms/src/DataStreams/AddingMissedBlockInputStream.cpp similarity index 75% rename from dbms/src/DataStreams/AddingDefaultBlockInputStream.cpp rename to dbms/src/DataStreams/AddingMissedBlockInputStream.cpp index 749eebda1a5..e32a7024824 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingMissedBlockInputStream.cpp @@ -1,11 +1,11 @@ -#include +#include #include namespace DB { -AddingDefaultBlockInputStream::AddingDefaultBlockInputStream( +AddingMissedBlockInputStream::AddingMissedBlockInputStream( const BlockInputStreamPtr & input_, const Block & header_, const ColumnDefaults & column_defaults_, @@ -16,7 +16,7 @@ AddingDefaultBlockInputStream::AddingDefaultBlockInputStream( children.emplace_back(input); } -Block AddingDefaultBlockInputStream::readImpl() +Block AddingMissedBlockInputStream::readImpl() { Block src = children.back()->read(); if (!src) diff --git a/dbms/src/DataStreams/AddingDefaultBlockInputStream.h b/dbms/src/DataStreams/AddingMissedBlockInputStream.h similarity index 90% rename from dbms/src/DataStreams/AddingDefaultBlockInputStream.h rename to dbms/src/DataStreams/AddingMissedBlockInputStream.h index c0afffbfc17..07b37a56d22 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockInputStream.h +++ b/dbms/src/DataStreams/AddingMissedBlockInputStream.h @@ -14,10 +14,10 @@ namespace DB * 3. Columns that materialized from other columns (materialized columns) * All three types of columns are materialized (not constants). */ -class AddingDefaultBlockInputStream : public IProfilingBlockInputStream +class AddingMissedBlockInputStream : public IProfilingBlockInputStream { public: - AddingDefaultBlockInputStream( + AddingMissedBlockInputStream( const BlockInputStreamPtr & input_, const Block & header_, const ColumnDefaults & column_defaults_, diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 3e7c59ff78f..c82761dc02b 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -63,6 +63,12 @@ public: */ virtual Block read() = 0; + virtual const BlockMissingValues & getMissingValues() const + { + static const BlockMissingValues none; + return none; + } + /** Read something before starting all data or after the end of all data. * In the `readSuffix` function, you can implement a finalization that can lead to an exception. * readPrefix() must be called before the first call to read(). diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 020824e81c3..b7707d37ec2 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -198,7 +198,7 @@ BlockInputStreams StorageBuffer::read( streams_from_dst = destination->read(columns_intersection, query_info, context, processed_stage, max_block_size, num_streams); for (auto & stream : streams_from_dst) { - stream = std::make_shared( + stream = std::make_shared( stream, header_after_adding_defaults, getColumns().defaults, context); stream = std::make_shared( context, stream, header, ConvertingBlockInputStream::MatchColumnsMode::Name); From 8500335ef5c8bbe08c27ddf3bb1405068bd4bf87 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 5 Dec 2018 15:49:15 +0300 Subject: [PATCH 54/88] cleanup unused code --- dbms/src/Core/Block.h | 11 ----------- dbms/src/Storages/ColumnDefault.cpp | 7 ------- 2 files changed, 18 deletions(-) diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 9cddbb21cda..a3198a0fb74 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -34,9 +34,6 @@ private: Container data; IndexByName index_by_name; - /// Regular column can't start with ' ', so it's possible to attach some hidden columns with a prefix - constexpr static const char SPECIAL_COLUMN_PREFIX = ' '; - public: BlockInfo info; @@ -103,14 +100,6 @@ public: operator bool() const { return !data.empty(); } bool operator!() const { return data.empty(); } - static String mkSpecialColumnName(const String & col_name) { return String(1, SPECIAL_COLUMN_PREFIX) + col_name; } - static bool isSpecialColumnName(const String & col_name) { return !col_name.empty() && col_name[0] == SPECIAL_COLUMN_PREFIX; } - - static bool isSpecialColumnName(const String & col_name, const String & pattern) - { - return col_name.find(String(1, SPECIAL_COLUMN_PREFIX) + pattern) == 0; - } - /** Get a list of column names separated by commas. */ std::string dumpNames() const; diff --git a/dbms/src/Storages/ColumnDefault.cpp b/dbms/src/Storages/ColumnDefault.cpp index 55dbbbc5038..19ba69c2d94 100644 --- a/dbms/src/Storages/ColumnDefault.cpp +++ b/dbms/src/Storages/ColumnDefault.cpp @@ -1,12 +1,5 @@ #include -#include -#include #include -#include -#include -#include -#include -#include namespace { From ff8fb077a4833ada3561fefa6d7d320456d2c4aa Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 6 Dec 2018 18:29:55 +0300 Subject: [PATCH 55/88] InDepthNodeVisitor: extract AST traverse from visitor logic CLICKHOUSE-3996 --- dbms/src/Interpreters/InDepthNodeVisitor.h | 53 ++++++++++++ .../PredicateExpressionsOptimizer.cpp | 3 +- dbms/src/Interpreters/QueryAliasesVisitor.h | 1 - dbms/src/Interpreters/SyntaxAnalyzer.cpp | 3 +- .../TranslateQualifiedNamesVisitor.cpp | 83 ++++++++++++------- .../TranslateQualifiedNamesVisitor.h | 73 ++++++---------- 6 files changed, 135 insertions(+), 81 deletions(-) create mode 100644 dbms/src/Interpreters/InDepthNodeVisitor.h diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h new file mode 100644 index 00000000000..997013aff1f --- /dev/null +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/// Visits AST tree in depth, call fucntions for nodes according to Matcher type data. +/// You need to define Data, label, visit() and needChildVisit() in Matcher class. +template +class InDepthNodeVisitor +{ +public: + using Data = typename Matcher::Data; + + InDepthNodeVisitor(Data & data_, std::ostream * ostr_ = nullptr) + : data(data_), + visit_depth(0), + ostr(ostr_) + {} + + void visit(ASTPtr & ast) + { + DumpASTNode dump(*ast, ostr, visit_depth, Matcher::label); + + if constexpr (!_topToBottom) + visitChildren(ast); + + auto additional_nodes = Matcher::visit(ast, data); + /// visit additional nodes (ex. only part of children) + for (ASTPtr & node : additional_nodes) + visit(node); + + if constexpr (_topToBottom) + visitChildren(ast); + } + +private: + MatcherData & data; + size_t visit_depth; + std::ostream * ostr; + + void visitChildren(ASTPtr & ast) + { + for (auto & child : ast->children) + if (Matcher::needChildVisit(ast, child)) + visit(child); + } +}; + +} diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index a29e161126d..84ca8b0a088 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -311,7 +311,8 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast std::unordered_map aliases; std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); - TranslateQualifiedNamesVisitor({}, tables).visit(ast); + TranslateQualifiedNamesMatcher::Data qn_visitor_data{{}, tables}; + TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); QueryAliasesVisitor query_aliases_visitor(aliases); query_aliases_visitor.visit(ast); QueryNormalizer(ast, aliases, settings, {}, {}).perform(); diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index fd385e8b774..cb8548bd3cf 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace DB { diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 4f6a4b5befe..c6a15058b5f 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -228,7 +228,8 @@ void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); LogAST log; - TranslateQualifiedNamesVisitor visitor(source_columns, tables, log.stream()); + TranslateQualifiedNamesMatcher::Data visitor_data{source_columns, tables}; + TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream()); visitor.visit(query); } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index cfae71cbac7..c169691121f 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -1,5 +1,6 @@ #include +#include #include #include @@ -15,10 +16,45 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_IDENTIFIER; + extern const int UNKNOWN_ELEMENT_IN_AST; + extern const int LOGICAL_ERROR; } -void TranslateQualifiedNamesVisitor::visit(ASTIdentifier & identifier, ASTPtr & ast, const DumpASTNode & dump) const +bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) { + /// Do not go to FROM, JOIN, subqueries. + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + return false; + + /// Processed nodes. Do not go into children. + if (typeid_cast(node.get()) || + typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + + /// ASTSelectQuery + others + return true; +} + +std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + return {}; +} + +std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier & identifier, ASTPtr & ast, Data & data) +{ + const NameSet & source_columns = data.source_columns; + const std::vector & tables = data.tables; + if (identifier.general()) { /// Select first table name with max number of qualifiers which can be stripped. @@ -38,23 +74,23 @@ void TranslateQualifiedNamesVisitor::visit(ASTIdentifier & identifier, ASTPtr & } if (max_num_qualifiers_to_strip) - { - dump.print(String("stripIdentifier ") + identifier.name, max_num_qualifiers_to_strip); stripIdentifier(ast, max_num_qualifiers_to_strip); - } /// In case if column from the joined table are in source columns, change it's name to qualified. if (best_table_pos && source_columns.count(ast->getColumnName())) { const DatabaseAndTableWithAlias & table = tables[best_table_pos]; table.makeQualifiedName(ast); - dump.print("makeQualifiedName", table.database + '.' + table.table + ' ' + ast->getColumnName()); } } + + return {}; } -void TranslateQualifiedNamesVisitor::visit(ASTQualifiedAsterisk &, ASTPtr & ast, const DumpASTNode &) const +std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data) { + const std::vector & tables = data.tables; + if (ast->children.size() != 1) throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); @@ -76,51 +112,40 @@ void TranslateQualifiedNamesVisitor::visit(ASTQualifiedAsterisk &, ASTPtr & ast, if (!table_names.database.empty() && db_and_table.database == table_names.database && db_and_table.table == table_names.table) - return; + return {}; } else if (num_components == 0) { if ((!table_names.table.empty() && db_and_table.table == table_names.table) || (!table_names.alias.empty() && db_and_table.table == table_names.alias)) - return; + return {}; } } throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER); } -void TranslateQualifiedNamesVisitor::visit(ASTTableJoin & join, ASTPtr &, const DumpASTNode &) const +std::vector TranslateQualifiedNamesMatcher::visit(const ASTTableJoin & join, const ASTPtr & , Data &) { /// Don't translate on_expression here in order to resolve equation parts later. + std::vector out; if (join.using_expression_list) - visit(join.using_expression_list); + out.push_back(join.using_expression_list); + return out; } -void TranslateQualifiedNamesVisitor::visit(ASTSelectQuery & select, ASTPtr & ast, const DumpASTNode &) const +std::vector TranslateQualifiedNamesMatcher::visit(const ASTSelectQuery & select, const ASTPtr & , Data &) { /// If the WHERE clause or HAVING consists of a single quailified column, the reference must be translated not only in children, /// but also in where_expression and having_expression. + std::vector out; if (select.prewhere_expression) - visit(select.prewhere_expression); + out.push_back(select.prewhere_expression); if (select.where_expression) - visit(select.where_expression); + out.push_back(select.where_expression); if (select.having_expression) - visit(select.having_expression); - - visitChildren(ast); -} - -void TranslateQualifiedNamesVisitor::visitChildren(ASTPtr & ast) const -{ - for (auto & child : ast->children) - { - /// Do not go to FROM, JOIN, subqueries. - if (!typeid_cast(child.get()) - && !typeid_cast(child.get())) - { - visit(child); - } - } + out.push_back(select.having_expression); + return out; } } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 0e7079545f3..bebf11d83e2 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -1,67 +1,42 @@ #pragma once -#include #include -#include -#include -#include -#include -#include -#include #include +#include namespace DB { -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. +class ASTIdentifier; +class ASTQualifiedAsterisk; +struct ASTTableJoin; +class ASTSelectQuery; -/// It visits nodes, find columns (general identifiers and asterisks) and translate their names according to tables' names. -class TranslateQualifiedNamesVisitor +/// Visit one node for names qualification. @sa InDepthNodeVisitor. +class TranslateQualifiedNamesMatcher { public: - TranslateQualifiedNamesVisitor(const NameSet & source_columns_, const std::vector & tables_, - std::ostream * ostr_ = nullptr) - : source_columns(source_columns_), - tables(tables_), - visit_depth(0), - ostr(ostr_) - {} - - void visit(ASTPtr & ast) const + struct Data { - if (!tryVisit(ast) && - !tryVisit(ast) && - !tryVisit(ast) && - !tryVisit(ast)) - visitChildren(ast); /// default: do nothing, visit children - } + const NameSet & source_columns; + const std::vector & tables; + }; + + static constexpr const char * label = __FILE__; + + static std::vector visit(ASTPtr & ast, Data & data); + static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - const NameSet & source_columns; - const std::vector & tables; - mutable size_t visit_depth; - std::ostream * ostr; - - void visit(ASTIdentifier & node, ASTPtr & ast, const DumpASTNode & dump) const; - void visit(ASTQualifiedAsterisk & node, ASTPtr & ast, const DumpASTNode & dump) const; - void visit(ASTTableJoin & node, ASTPtr & ast, const DumpASTNode & dump) const; - void visit(ASTSelectQuery & ast, ASTPtr &, const DumpASTNode & dump) const; - - void visitChildren(ASTPtr &) const; - - template - bool tryVisit(ASTPtr & ast) const - { - if (T * t = typeid_cast(ast.get())) - { - DumpASTNode dump(*ast, ostr, visit_depth, "translateQualifiedNames"); - visit(*t, ast, dump); - return true; - } - return false; - } + static std::vector visit(const ASTIdentifier & node, ASTPtr & ast, Data &); + static std::vector visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &); + static std::vector visit(const ASTTableJoin & node, const ASTPtr & ast, Data &); + static std::vector visit(const ASTSelectQuery & node, const ASTPtr & ast, Data &); }; +/// Visits AST for names qualification. +/// It finds columns (general identifiers and asterisks) and translate their names according to tables' names. +using TranslateQualifiedNamesVisitor = InDepthNodeVisitor; + } From 60dc8fcac245d15aac1b8b3b8edb8cef4b7deb14 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 6 Dec 2018 20:20:17 +0300 Subject: [PATCH 56/88] fixes for review CLICKHOUSE-3578 --- dbms/src/Client/Connection.cpp | 2 +- dbms/src/Core/Protocol.h | 2 +- dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp | 4 ++-- dbms/src/DataStreams/AddingMissedBlockInputStream.h | 2 +- dbms/src/Formats/IRowInputStream.h | 7 +++---- dbms/src/Storages/ColumnDefault.h | 3 --- dbms/src/Storages/ColumnsDescription.cpp | 2 -- 7 files changed, 8 insertions(+), 14 deletions(-) diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 82c76324a40..923c8179ca1 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -718,7 +718,7 @@ std::unique_ptr Connection::receiveException() std::vector Connection::receiveMultistringMessage(UInt64 msg_type) { - size_t num = Protocol::Server::wordsInMessage(msg_type); + size_t num = Protocol::Server::stringsInMessage(msg_type); std::vector out(num); for (size_t i = 0; i < num; ++i) readStringBinary(out[i], *in); diff --git a/dbms/src/Core/Protocol.h b/dbms/src/Core/Protocol.h index 28f60cce901..b50d018f9ce 100644 --- a/dbms/src/Core/Protocol.h +++ b/dbms/src/Core/Protocol.h @@ -86,7 +86,7 @@ namespace Protocol : "Unknown packet"; } - inline size_t wordsInMessage(UInt64 msg_type) + inline size_t stringsInMessage(UInt64 msg_type) { switch (msg_type) { diff --git a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp index 9ee6b15e1b9..6b959cbc05b 100644 --- a/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultsBlockInputStream.cpp @@ -118,12 +118,12 @@ void AddingDefaultsBlockInputStream::checkCalculated(const ColumnWithTypeAndName size_t column_size = col_read.column->size(); if (column_size != col_defaults.column->size()) - throw Exception("Mismach column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception("Mismatch column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); if (column_size < defaults_needed) throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - if (col_read.type->getTypeId() != col_defaults.type->getTypeId()) + if (!col_read.type->equals(*col_defaults.type)) throw Exception("Mismach column types while adding defaults", ErrorCodes::TYPE_MISMATCH); } diff --git a/dbms/src/DataStreams/AddingMissedBlockInputStream.h b/dbms/src/DataStreams/AddingMissedBlockInputStream.h index 07b37a56d22..b3b98509645 100644 --- a/dbms/src/DataStreams/AddingMissedBlockInputStream.h +++ b/dbms/src/DataStreams/AddingMissedBlockInputStream.h @@ -23,7 +23,7 @@ public: const ColumnDefaults & column_defaults_, const Context & context_); - String getName() const override { return "AddingDefault"; } + String getName() const override { return "AddingMissed"; } Block getHeader() const override { return header; } private: diff --git a/dbms/src/Formats/IRowInputStream.h b/dbms/src/Formats/IRowInputStream.h index 65b98d2524f..045b2343e44 100644 --- a/dbms/src/Formats/IRowInputStream.h +++ b/dbms/src/Formats/IRowInputStream.h @@ -10,12 +10,11 @@ namespace DB { -/// A way to set some extentions to read and return extra information too. IRowInputStream.extendedRead() output. +/// Contains extra information about read data. struct RowReadExtension { - /// IRowInputStream.extendedRead() output value. - /// Contains one bit per column in resently read row. IRowInputStream could leave it empty, or partialy set. - /// It should contain true for columns that actually read from the source and false for defaults. + /// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise. + /// It's used to attach defaults for partially filled rows. std::vector read_columns; }; diff --git a/dbms/src/Storages/ColumnDefault.h b/dbms/src/Storages/ColumnDefault.h index 292c0cf7495..0667ce4ed57 100644 --- a/dbms/src/Storages/ColumnDefault.h +++ b/dbms/src/Storages/ColumnDefault.h @@ -9,9 +9,6 @@ namespace DB { -class Context; -class Block; - enum class ColumnDefaultKind { Default, diff --git a/dbms/src/Storages/ColumnsDescription.cpp b/dbms/src/Storages/ColumnsDescription.cpp index 049845b75ec..0926fef14e8 100644 --- a/dbms/src/Storages/ColumnsDescription.cpp +++ b/dbms/src/Storages/ColumnsDescription.cpp @@ -221,8 +221,6 @@ const ColumnsDescription * ColumnsDescription::loadFromContext(const Context & c { if (context.getSettingsRef().insert_sample_with_metadata) { - auto db_and_table = context.getInsertionTable(); - if (context.isTableExist(db, table)) { StoragePtr storage = context.getTable(db, table); From 6fad51d64207bdc82f2844af0ced0dfa09360550 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 6 Dec 2018 22:02:42 +0300 Subject: [PATCH 57/88] QueryAliasesMatcher via InDepthNodeVisitor (bottom to top) CLICKHOUSE-3996 --- dbms/src/Interpreters/InDepthNodeVisitor.h | 2 +- .../PredicateExpressionsOptimizer.cpp | 4 +- dbms/src/Interpreters/QueryAliasesVisitor.cpp | 84 +++++++++++-------- dbms/src/Interpreters/QueryAliasesVisitor.h | 55 ++++-------- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 4 +- 5 files changed, 69 insertions(+), 80 deletions(-) diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index 997013aff1f..4292da7fbdb 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -38,7 +38,7 @@ public: } private: - MatcherData & data; + Data & data; size_t visit_depth; std::ostream * ostr; diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 84ca8b0a088..8e95773f72c 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -313,8 +313,8 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast TranslateQualifiedNamesMatcher::Data qn_visitor_data{{}, tables}; TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); - QueryAliasesVisitor query_aliases_visitor(aliases); - query_aliases_visitor.visit(ast); + QueryAliasesMatcher::Data query_aliases_data{aliases}; + QueryAliasesVisitor(query_aliases_data).visit(ast); QueryNormalizer(ast, aliases, settings, {}, {}).perform(); for (const auto & projection_column : select_query->select_expression_list->children) diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.cpp b/dbms/src/Interpreters/QueryAliasesVisitor.cpp index 22818f96ffd..cd7baba0061 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.cpp +++ b/dbms/src/Interpreters/QueryAliasesVisitor.cpp @@ -1,5 +1,6 @@ #include #include + #include #include #include @@ -16,33 +17,62 @@ namespace ErrorCodes extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS; } -void QueryAliasesVisitor::visit(const ASTPtr & ast) const +static String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, const String & alias) { - /// Bottom-up traversal. We do not go into subqueries. - visitChildren(ast); + std::stringstream message; + message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":" << std::endl; + formatAST(*ast, message, false, true); + message << std::endl << "and" << std::endl; + formatAST(*prev_ast, message, false, true); + message << std::endl; + return message.str(); +} - if (!tryVisit(ast)) - { - DumpASTNode dump(*ast, ostr, visit_depth, "getQueryAliases"); - visitOther(ast); - } + +bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) +{ + /// Don't descent into table functions and subqueries and special case for ArrayJoin. + if (typeid_cast(node.get()) || + typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + return true; +} + +std::vector QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + + visitOther(ast, data); + return {}; } /// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them /// (skip the expression list itself and its children). -void QueryAliasesVisitor::visit(const ASTArrayJoin &, const ASTPtr & ast) const +std::vector QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data) { + visitOther(ast, data); + + /// @warning It breaks botom-to-top order (childs processed after node here), could lead to some effects. + /// It's possible to add ast back to result vec to save order. It will need two phase ASTArrayJoin visit (setting phase in data). + std::vector out; for (auto & child1 : ast->children) for (auto & child2 : child1->children) for (auto & child3 : child2->children) - visit(child3); + out.push_back(child3); + return out; } /// set unique aliases for all subqueries. this is needed, because: /// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect /// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names -void QueryAliasesVisitor::visit(ASTSubquery & subquery, const ASTPtr & ast) const +std::vector QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data) { + Aliases & aliases = data.aliases; + static std::atomic_uint64_t subquery_index = 0; if (subquery.alias.empty()) @@ -59,42 +89,22 @@ void QueryAliasesVisitor::visit(ASTSubquery & subquery, const ASTPtr & ast) cons aliases[alias] = ast; } else - visitOther(ast); + visitOther(ast, data); + return {}; } -void QueryAliasesVisitor::visitOther(const ASTPtr & ast) const +void QueryAliasesMatcher::visitOther(const ASTPtr & ast, Data & data) { + Aliases & aliases = data.aliases; + String alias = ast->tryGetAlias(); if (!alias.empty()) { if (aliases.count(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash()) - throw Exception(wrongAliasMessage(ast, alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); + throw Exception(wrongAliasMessage(ast, aliases[alias], alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); aliases[alias] = ast; } } -void QueryAliasesVisitor::visitChildren(const ASTPtr & ast) const -{ - for (auto & child : ast->children) - { - /// Don't descent into table functions and subqueries and special case for ArrayJoin. - if (!tryVisit(ast) && - !tryVisit(ast) && - !tryVisit(ast)) - visit(child); - } -} - -String QueryAliasesVisitor::wrongAliasMessage(const ASTPtr & ast, const String & alias) const -{ - std::stringstream message; - message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":" << std::endl; - formatAST(*ast, message, false, true); - message << std::endl << "and" << std::endl; - formatAST(*aliases[alias], message, false, true); - message << std::endl; - return message.str(); -} - } diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index cb8548bd3cf..aae211e6e83 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -1,8 +1,7 @@ #pragma once -#include -#include #include +#include namespace DB { @@ -14,47 +13,27 @@ struct ASTArrayJoin; using Aliases = std::unordered_map; -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. - -/// Visits AST nodes and collect their aliases in one map (with links to source nodes). -class QueryAliasesVisitor +/// Visits AST node to collect aliases. +class QueryAliasesMatcher { public: - QueryAliasesVisitor(Aliases & aliases_, std::ostream * ostr_ = nullptr) - : aliases(aliases_), - visit_depth(0), - ostr(ostr_) - {} + struct Data + { + Aliases & aliases; + }; - void visit(const ASTPtr & ast) const; + static constexpr const char * label = __FILE__; + + static std::vector visit(ASTPtr & ast, Data & data); + static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - Aliases & aliases; - mutable size_t visit_depth; - std::ostream * ostr; - - void visit(const ASTTableExpression &, const ASTPtr &) const {} - void visit(const ASTSelectWithUnionQuery &, const ASTPtr &) const {} - - void visit(ASTSubquery & subquery, const ASTPtr & ast) const; - void visit(const ASTArrayJoin &, const ASTPtr & ast) const; - void visitOther(const ASTPtr & ast) const; - void visitChildren(const ASTPtr & ast) const; - - template - bool tryVisit(const ASTPtr & ast) const - { - if (T * t = typeid_cast(ast.get())) - { - DumpASTNode dump(*ast, ostr, visit_depth, "getQueryAliases"); - visit(*t, ast); - return true; - } - return false; - } - - String wrongAliasMessage(const ASTPtr & ast, const String & alias) const; + static std::vector visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data); + static std::vector visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data); + static void visitOther(const ASTPtr & ast, Data & data); }; +/// Visits AST nodes and collect their aliases in one map (with links to source nodes). +using QueryAliasesVisitor = InDepthNodeVisitor; + } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index c6a15058b5f..bc7fce2a165 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -134,8 +134,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Creates a dictionary `aliases`: alias -> ASTPtr { LogAST log; - QueryAliasesVisitor query_aliases_visitor(result.aliases, log.stream()); - query_aliases_visitor.visit(query); + QueryAliasesMatcher::Data query_aliases_data{result.aliases}; + QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); } /// Common subexpression elimination. Rewrite rules. From 75af882cf31fecb067fbc4c8f2db798a0b588743 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 15:34:40 +0300 Subject: [PATCH 58/88] fix AST debug print with underline symbol --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 ---- dbms/src/Interpreters/QueryAliasesVisitor.h | 2 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 2 +- dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h | 2 +- dbms/src/Parsers/ASTAlterQuery.cpp | 4 ++-- dbms/src/Parsers/ASTAlterQuery.h | 6 +++--- dbms/src/Parsers/ASTAssignment.h | 2 +- dbms/src/Parsers/ASTAsterisk.h | 2 +- dbms/src/Parsers/ASTCheckQuery.h | 2 +- dbms/src/Parsers/ASTColumnDeclaration.h | 2 +- dbms/src/Parsers/ASTCreateQuery.h | 4 ++-- dbms/src/Parsers/ASTDropQuery.cpp | 8 ++++---- dbms/src/Parsers/ASTDropQuery.h | 2 +- dbms/src/Parsers/ASTExplainQuery.h | 2 +- dbms/src/Parsers/ASTExpressionList.h | 2 +- dbms/src/Parsers/ASTFunction.cpp | 4 ++-- dbms/src/Parsers/ASTFunction.h | 2 +- dbms/src/Parsers/ASTIdentifier.h | 2 +- dbms/src/Parsers/ASTInsertQuery.h | 2 +- dbms/src/Parsers/ASTKillQueryQuery.cpp | 4 ++-- dbms/src/Parsers/ASTKillQueryQuery.h | 2 +- dbms/src/Parsers/ASTLiteral.h | 2 +- dbms/src/Parsers/ASTNameTypePair.h | 2 +- dbms/src/Parsers/ASTOptimizeQuery.h | 6 ++++-- dbms/src/Parsers/ASTOrderByElement.h | 5 +---- dbms/src/Parsers/ASTPartition.cpp | 4 ++-- dbms/src/Parsers/ASTPartition.h | 2 +- dbms/src/Parsers/ASTQualifiedAsterisk.h | 2 +- dbms/src/Parsers/ASTQueryWithOutput.h | 2 +- dbms/src/Parsers/ASTQueryWithTableAndOutput.h | 2 +- dbms/src/Parsers/ASTRenameQuery.h | 2 +- dbms/src/Parsers/ASTSampleRatio.h | 2 +- dbms/src/Parsers/ASTSelectQuery.h | 2 +- dbms/src/Parsers/ASTSelectWithUnionQuery.h | 2 +- dbms/src/Parsers/ASTSetQuery.h | 2 +- dbms/src/Parsers/ASTShowTablesQuery.h | 2 +- dbms/src/Parsers/ASTSubquery.h | 2 +- dbms/src/Parsers/ASTSystemQuery.h | 2 +- dbms/src/Parsers/ASTTablesInSelectQuery.h | 10 +++++----- dbms/src/Parsers/ASTUseQuery.h | 2 +- dbms/src/Parsers/DumpASTNode.h | 3 +-- dbms/src/Parsers/IAST.h | 2 +- dbms/src/Parsers/TablePropertiesQueriesASTs.h | 2 +- 43 files changed, 59 insertions(+), 65 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 381770e6782..4826f38b9dc 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -31,8 +31,6 @@ #include #include #include -#include -#include #include #include @@ -62,11 +60,9 @@ #include #include -#include #include #include #include -#include #include namespace DB diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index aae211e6e83..2cd4f8f0c6b 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -22,7 +22,7 @@ public: Aliases & aliases; }; - static constexpr const char * label = __FILE__; + static constexpr const char * label = "QueryAliases"; static std::vector visit(ASTPtr & ast, Data & data); static bool needChildVisit(ASTPtr & node, const ASTPtr & child); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index bc7fce2a165..ae6d3ae0b4e 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -46,7 +46,7 @@ namespace { using LogAST = DebugASTLog; /// set to true to enable logs -using Aliases = std::unordered_map; +using Aliases = SyntaxAnalyzerResult::Aliases; /// Add columns from storage to source_columns list. void collectSourceColumns(ASTSelectQuery * select_query, const Context & context, diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index bebf11d83e2..3ce69dd0afa 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -23,7 +23,7 @@ public: const std::vector & tables; }; - static constexpr const char * label = __FILE__; + static constexpr const char * label = "TranslateQualifiedNames"; static std::vector visit(ASTPtr & ast, Data & data); static bool needChildVisit(ASTPtr & node, const ASTPtr & child); diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index 863cb299bbb..3577346df0f 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -196,9 +196,9 @@ void ASTAlterCommandList::formatImpl(const FormatSettings & settings, FormatStat /** Get the text that identifies this element. */ -String ASTAlterQuery::getID() const +String ASTAlterQuery::getID(char delim) const { - return "AlterQuery_" + database + "_" + table; + return "AlterQuery" + (delim + database) + delim + table; } ASTPtr ASTAlterQuery::clone() const diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index b58b3a29b30..b73e1f38e2c 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -97,7 +97,7 @@ public: /// To distinguish REPLACE and ATTACH PARTITION partition FROM db.table bool replace = true; - String getID() const override { return "AlterCommand_" + std::to_string(static_cast(type)); } + String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast(type))); } ASTPtr clone() const override; @@ -116,7 +116,7 @@ public: children.push_back(command); } - String getID() const override { return "AlterCommandList"; } + String getID(char) const override { return "AlterCommandList"; } ASTPtr clone() const override; @@ -129,7 +129,7 @@ class ASTAlterQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCl public: ASTAlterCommandList * command_list = nullptr; - String getID() const override; + String getID(char) const override; ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTAssignment.h b/dbms/src/Parsers/ASTAssignment.h index 18bf46c171c..6753711f9e3 100644 --- a/dbms/src/Parsers/ASTAssignment.h +++ b/dbms/src/Parsers/ASTAssignment.h @@ -12,7 +12,7 @@ public: String column_name; ASTPtr expression; - String getID() const override { return "Assignment_" + column_name; } + String getID(char delim) const override { return "Assignment" + (delim + column_name); } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTAsterisk.h b/dbms/src/Parsers/ASTAsterisk.h index 02a0f99895d..9a5a4efe267 100644 --- a/dbms/src/Parsers/ASTAsterisk.h +++ b/dbms/src/Parsers/ASTAsterisk.h @@ -9,7 +9,7 @@ namespace DB class ASTAsterisk : public IAST { public: - String getID() const override { return "Asterisk"; } + String getID(char) const override { return "Asterisk"; } ASTPtr clone() const override; void appendColumnName(WriteBuffer & ostr) const override; diff --git a/dbms/src/Parsers/ASTCheckQuery.h b/dbms/src/Parsers/ASTCheckQuery.h index a87f68c855b..595b6c2ecb6 100644 --- a/dbms/src/Parsers/ASTCheckQuery.h +++ b/dbms/src/Parsers/ASTCheckQuery.h @@ -8,7 +8,7 @@ namespace DB struct ASTCheckQuery : public ASTQueryWithTableAndOutput { /** Get the text that identifies this element. */ - String getID() const override { return ("CheckQuery_" + database + "_" + table); } + String getID(char delim) const override { return "CheckQuery" + (delim + database) + delim + table; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 870472fcb30..e288dbbcd58 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -17,7 +17,7 @@ public: ASTPtr default_expression; ASTPtr comment; - String getID() const override { return "ColumnDeclaration_" + name; } + String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 925b12600fd..840ba345813 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -21,7 +21,7 @@ public: IAST * sample_by = nullptr; ASTSetQuery * settings = nullptr; - String getID() const override { return "Storage definition"; } + String getID(char) const override { return "Storage definition"; } ASTPtr clone() const override { @@ -99,7 +99,7 @@ public: ASTSelectWithUnionQuery * select = nullptr; /** Get the text that identifies this element. */ - String getID() const override { return (attach ? "AttachQuery_" : "CreateQuery_") + database + "_" + table; } + String getID(char delim) const override { return (attach ? "AttachQuery" : "CreateQuery") + (delim + database) + delim + table; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTDropQuery.cpp b/dbms/src/Parsers/ASTDropQuery.cpp index 6b6b9b0bec2..094a34242cf 100644 --- a/dbms/src/Parsers/ASTDropQuery.cpp +++ b/dbms/src/Parsers/ASTDropQuery.cpp @@ -10,14 +10,14 @@ namespace ErrorCodes } -String ASTDropQuery::getID() const +String ASTDropQuery::getID(char delim) const { if (kind == ASTDropQuery::Kind::Drop) - return "DropQuery_" + database + "_" + table; + return "DropQuery" + (delim + database) + delim + table; else if (kind == ASTDropQuery::Kind::Detach) - return "DetachQuery_" + database + "_" + table; + return "DetachQuery" + (delim + database) + delim + table; else if (kind == ASTDropQuery::Kind::Truncate) - return "TruncateQuery_" + database + "_" + table; + return "TruncateQuery" + (delim + database) + delim + table; else throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); } diff --git a/dbms/src/Parsers/ASTDropQuery.h b/dbms/src/Parsers/ASTDropQuery.h index 83b5d28e38b..1c230e30aea 100644 --- a/dbms/src/Parsers/ASTDropQuery.h +++ b/dbms/src/Parsers/ASTDropQuery.h @@ -23,7 +23,7 @@ public: bool if_exists{false}; /** Get the text that identifies this element. */ - String getID() const override; + String getID(char) const override; ASTPtr clone() const override; ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database) const override diff --git a/dbms/src/Parsers/ASTExplainQuery.h b/dbms/src/Parsers/ASTExplainQuery.h index b02731c79d3..a1eb9feecd4 100644 --- a/dbms/src/Parsers/ASTExplainQuery.h +++ b/dbms/src/Parsers/ASTExplainQuery.h @@ -20,7 +20,7 @@ public: : kind(kind_) {} - String getID() const override { return "Explain_" + toString(kind); } + String getID(char delim) const override { return "Explain" + (delim + toString(kind)); } ASTPtr clone() const override { return std::make_shared(*this); } protected: diff --git a/dbms/src/Parsers/ASTExpressionList.h b/dbms/src/Parsers/ASTExpressionList.h index cfe9cb3b714..4f77adb4009 100644 --- a/dbms/src/Parsers/ASTExpressionList.h +++ b/dbms/src/Parsers/ASTExpressionList.h @@ -11,7 +11,7 @@ namespace DB class ASTExpressionList : public IAST { public: - String getID() const override { return "ExpressionList"; } + String getID(char ) const override { return "ExpressionList"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index d84d77b649b..73880089f53 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -36,9 +36,9 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const } /** Get the text that identifies this element. */ -String ASTFunction::getID() const +String ASTFunction::getID(char delim) const { - return "Function_" + name; + return "Function" + (delim + name); } ASTPtr ASTFunction::clone() const diff --git a/dbms/src/Parsers/ASTFunction.h b/dbms/src/Parsers/ASTFunction.h index 3bed72d4305..effc9a6cea9 100644 --- a/dbms/src/Parsers/ASTFunction.h +++ b/dbms/src/Parsers/ASTFunction.h @@ -19,7 +19,7 @@ public: public: /** Get text identifying the AST node. */ - String getID() const override; + String getID(char delim) const override; ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 0ada2b60852..b8c56727e17 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -24,7 +24,7 @@ public: : name(name_), kind(kind_) { range = StringRange(name.data(), name.data() + name.size()); } /** Get the text that identifies this element. */ - String getID() const override { return "Identifier_" + name; } + String getID(char delim) const override { return "Identifier" + (delim + name); } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTInsertQuery.h b/dbms/src/Parsers/ASTInsertQuery.h index 9da68ca21c8..baf2a9fce8d 100644 --- a/dbms/src/Parsers/ASTInsertQuery.h +++ b/dbms/src/Parsers/ASTInsertQuery.h @@ -27,7 +27,7 @@ public: const char * end = nullptr; /** Get the text that identifies this element. */ - String getID() const override { return "InsertQuery_" + database + "_" + table; } + String getID(char delim) const override { return "InsertQuery" + (delim + database) + delim + table; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTKillQueryQuery.cpp b/dbms/src/Parsers/ASTKillQueryQuery.cpp index 0b9e6bcf4bc..9e7631eacc3 100644 --- a/dbms/src/Parsers/ASTKillQueryQuery.cpp +++ b/dbms/src/Parsers/ASTKillQueryQuery.cpp @@ -3,9 +3,9 @@ namespace DB { -String ASTKillQueryQuery::getID() const +String ASTKillQueryQuery::getID(char delim) const { - return "KillQueryQuery_" + (where_expression ? where_expression->getID() : "") + "_" + String(sync ? "SYNC" : "ASYNC"); + return String("KillQueryQuery") + delim + (where_expression ? where_expression->getID() : "") + delim + String(sync ? "SYNC" : "ASYNC"); } void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const diff --git a/dbms/src/Parsers/ASTKillQueryQuery.h b/dbms/src/Parsers/ASTKillQueryQuery.h index 491bd3aecd2..7099fbccece 100644 --- a/dbms/src/Parsers/ASTKillQueryQuery.h +++ b/dbms/src/Parsers/ASTKillQueryQuery.h @@ -20,7 +20,7 @@ public: return clone; } - String getID() const override; + String getID(char) const override; void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/dbms/src/Parsers/ASTLiteral.h b/dbms/src/Parsers/ASTLiteral.h index d2b86cbb28c..d6b8b4efc3b 100644 --- a/dbms/src/Parsers/ASTLiteral.h +++ b/dbms/src/Parsers/ASTLiteral.h @@ -18,7 +18,7 @@ public: ASTLiteral(const Field & value_) : value(value_) {} /** Get the text that identifies this element. */ - String getID() const override { return "Literal_" + applyVisitor(FieldVisitorDump(), value); } + String getID(char delim) const override { return "Literal" + (delim + applyVisitor(FieldVisitorDump(), value)); } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTNameTypePair.h b/dbms/src/Parsers/ASTNameTypePair.h index 9dad01df2f5..ac72448e2e9 100644 --- a/dbms/src/Parsers/ASTNameTypePair.h +++ b/dbms/src/Parsers/ASTNameTypePair.h @@ -17,7 +17,7 @@ public: ASTPtr type; /** Get the text that identifies this element. */ - String getID() const override { return "NameTypePair_" + name; } + String getID(char delim) const override { return "NameTypePair" + (delim + name); } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTOptimizeQuery.h b/dbms/src/Parsers/ASTOptimizeQuery.h index d228a8c905f..c93fea2b6d3 100644 --- a/dbms/src/Parsers/ASTOptimizeQuery.h +++ b/dbms/src/Parsers/ASTOptimizeQuery.h @@ -21,8 +21,10 @@ public: bool deduplicate; /** Get the text that identifies this element. */ - String getID() const override - { return "OptimizeQuery_" + database + "_" + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); } + String getID(char delim) const override + { + return "OptimizeQuery" + (delim + database) + delim + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); + } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTOrderByElement.h b/dbms/src/Parsers/ASTOrderByElement.h index 7e9459cba9b..729915400ce 100644 --- a/dbms/src/Parsers/ASTOrderByElement.h +++ b/dbms/src/Parsers/ASTOrderByElement.h @@ -27,10 +27,7 @@ public: { } - String getID() const override - { - return "OrderByElement"; - } + String getID(char) const override { return "OrderByElement"; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTPartition.cpp b/dbms/src/Parsers/ASTPartition.cpp index 6f0a256ec3b..d24575b7f43 100644 --- a/dbms/src/Parsers/ASTPartition.cpp +++ b/dbms/src/Parsers/ASTPartition.cpp @@ -4,12 +4,12 @@ namespace DB { -String ASTPartition::getID() const +String ASTPartition::getID(char delim) const { if (value) return "Partition"; else - return "Partition_ID_" + id; + return "Partition_ID" + (delim + id); } ASTPtr ASTPartition::clone() const diff --git a/dbms/src/Parsers/ASTPartition.h b/dbms/src/Parsers/ASTPartition.h index b1ed866284a..d87206d7bb4 100644 --- a/dbms/src/Parsers/ASTPartition.h +++ b/dbms/src/Parsers/ASTPartition.h @@ -17,7 +17,7 @@ public: String id; - String getID() const override; + String getID(char) const override; ASTPtr clone() const override; protected: diff --git a/dbms/src/Parsers/ASTQualifiedAsterisk.h b/dbms/src/Parsers/ASTQualifiedAsterisk.h index 74cd745d033..e084d80cb94 100644 --- a/dbms/src/Parsers/ASTQualifiedAsterisk.h +++ b/dbms/src/Parsers/ASTQualifiedAsterisk.h @@ -12,7 +12,7 @@ namespace DB class ASTQualifiedAsterisk : public IAST { public: - String getID() const override { return "QualifiedAsterisk"; } + String getID(char) const override { return "QualifiedAsterisk"; } ASTPtr clone() const override { auto clone = std::make_shared(*this); diff --git a/dbms/src/Parsers/ASTQueryWithOutput.h b/dbms/src/Parsers/ASTQueryWithOutput.h index 1e927084e86..0660b1bec63 100644 --- a/dbms/src/Parsers/ASTQueryWithOutput.h +++ b/dbms/src/Parsers/ASTQueryWithOutput.h @@ -34,7 +34,7 @@ template class ASTQueryWithOutputImpl : public ASTQueryWithOutput { public: - String getID() const override { return ASTIDAndQueryNames::ID; } + String getID(char) const override { return ASTIDAndQueryNames::ID; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h index dd9b3fce153..3f3fd036d78 100644 --- a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h @@ -30,7 +30,7 @@ template class ASTQueryWithTableAndOutputImpl : public ASTQueryWithTableAndOutput { public: - String getID() const override { return AstIDAndQueryNames::ID + ("_" + database) + "_" + table; } + String getID(char delim) const override { return AstIDAndQueryNames::ID + (delim + database) + delim + table; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTRenameQuery.h b/dbms/src/Parsers/ASTRenameQuery.h index 561a88a54ce..006c8583836 100644 --- a/dbms/src/Parsers/ASTRenameQuery.h +++ b/dbms/src/Parsers/ASTRenameQuery.h @@ -29,7 +29,7 @@ public: Elements elements; /** Get the text that identifies this element. */ - String getID() const override { return "Rename"; } + String getID(char) const override { return "Rename"; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTSampleRatio.h b/dbms/src/Parsers/ASTSampleRatio.h index ca91d0b6cbb..787833eb4f3 100644 --- a/dbms/src/Parsers/ASTSampleRatio.h +++ b/dbms/src/Parsers/ASTSampleRatio.h @@ -28,7 +28,7 @@ public: ASTSampleRatio(Rational & ratio_) : ratio(ratio_) {} - String getID() const override { return "SampleRatio_" + toString(ratio); } + String getID(char delim) const override { return "SampleRatio" + (delim + toString(ratio)); } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index eb08cf6e20c..d9bb3f11be4 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -16,7 +16,7 @@ class ASTSelectQuery : public IAST { public: /** Get the text that identifies this element. */ - String getID() const override { return "SelectQuery"; } + String getID(char) const override { return "SelectQuery"; } ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTSelectWithUnionQuery.h b/dbms/src/Parsers/ASTSelectWithUnionQuery.h index c458825507e..41ec8bb1076 100644 --- a/dbms/src/Parsers/ASTSelectWithUnionQuery.h +++ b/dbms/src/Parsers/ASTSelectWithUnionQuery.h @@ -12,7 +12,7 @@ namespace DB class ASTSelectWithUnionQuery : public ASTQueryWithOutput { public: - String getID() const override { return "SelectWithUnionQuery"; } + String getID(char) const override { return "SelectWithUnionQuery"; } ASTPtr clone() const override; void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/dbms/src/Parsers/ASTSetQuery.h b/dbms/src/Parsers/ASTSetQuery.h index 29656b26434..756c5bdc058 100644 --- a/dbms/src/Parsers/ASTSetQuery.h +++ b/dbms/src/Parsers/ASTSetQuery.h @@ -26,7 +26,7 @@ public: Changes changes; /** Get the text that identifies this element. */ - String getID() const override { return "Set"; } + String getID(char) const override { return "Set"; } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTShowTablesQuery.h b/dbms/src/Parsers/ASTShowTablesQuery.h index 7f67a08c6e7..58915df0e60 100644 --- a/dbms/src/Parsers/ASTShowTablesQuery.h +++ b/dbms/src/Parsers/ASTShowTablesQuery.h @@ -21,7 +21,7 @@ public: bool not_like{false}; /** Get the text that identifies this element. */ - String getID() const override { return "ShowTables"; } + String getID(char) const override { return "ShowTables"; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTSubquery.h b/dbms/src/Parsers/ASTSubquery.h index 513f0673c6b..8ca3291824b 100644 --- a/dbms/src/Parsers/ASTSubquery.h +++ b/dbms/src/Parsers/ASTSubquery.h @@ -13,7 +13,7 @@ class ASTSubquery : public ASTWithAlias { public: /** Get the text that identifies this element. */ - String getID() const override { return "Subquery"; } + String getID(char) const override { return "Subquery"; } ASTPtr clone() const override { diff --git a/dbms/src/Parsers/ASTSystemQuery.h b/dbms/src/Parsers/ASTSystemQuery.h index 3a4bffb19b9..bc4de9689c6 100644 --- a/dbms/src/Parsers/ASTSystemQuery.h +++ b/dbms/src/Parsers/ASTSystemQuery.h @@ -51,7 +51,7 @@ public: String target_database; String target_table; - String getID() const override { return "SYSTEM query"; } + String getID(char) const override { return "SYSTEM query"; } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.h b/dbms/src/Parsers/ASTTablesInSelectQuery.h index 289cfd3c447..c94192b44d2 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.h +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.h @@ -53,7 +53,7 @@ struct ASTTableExpression : public IAST ASTPtr sample_offset; using IAST::IAST; - String getID() const override { return "TableExpression"; } + String getID(char) const override { return "TableExpression"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; @@ -98,7 +98,7 @@ struct ASTTableJoin : public IAST ASTPtr on_expression; using IAST::IAST; - String getID() const override { return "TableJoin"; } + String getID(char) const override { return "TableJoin"; } ASTPtr clone() const override; void formatImplBeforeTable(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const; @@ -122,7 +122,7 @@ struct ASTArrayJoin : public IAST ASTPtr expression_list; using IAST::IAST; - String getID() const override { return "ArrayJoin"; } + String getID(char) const override { return "ArrayJoin"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; @@ -139,7 +139,7 @@ struct ASTTablesInSelectQueryElement : public IAST ASTPtr array_join; /// Arrays to JOIN. using IAST::IAST; - String getID() const override { return "TablesInSelectQueryElement"; } + String getID(char) const override { return "TablesInSelectQueryElement"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; @@ -149,7 +149,7 @@ struct ASTTablesInSelectQueryElement : public IAST struct ASTTablesInSelectQuery : public IAST { using IAST::IAST; - String getID() const override { return "TablesInSelectQuery"; } + String getID(char) const override { return "TablesInSelectQuery"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/dbms/src/Parsers/ASTUseQuery.h b/dbms/src/Parsers/ASTUseQuery.h index 89edc6c8a7e..f1ef1b3b408 100644 --- a/dbms/src/Parsers/ASTUseQuery.h +++ b/dbms/src/Parsers/ASTUseQuery.h @@ -15,7 +15,7 @@ public: String database; /** Get the text that identifies this element. */ - String getID() const override { return "UseQuery_" + database; } + String getID(char delim) const override { return "UseQuery" + (delim + database); } ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/DumpASTNode.h b/dbms/src/Parsers/DumpASTNode.h index 64c86514440..edd6b3a634a 100644 --- a/dbms/src/Parsers/DumpASTNode.h +++ b/dbms/src/Parsers/DumpASTNode.h @@ -64,8 +64,7 @@ private: String nodeId() const { - String id = ast.getID(); - std::replace(id.begin(), id.end(), '_', ' '); + String id = ast.getID(' '); return id; } diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 8b1181e39c9..e30ac56738f 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -67,7 +67,7 @@ public: } /** Get the text that identifies this element. */ - virtual String getID() const = 0; + virtual String getID(char delimiter = '_') const = 0; ASTPtr ptr() { return shared_from_this(); } diff --git a/dbms/src/Parsers/TablePropertiesQueriesASTs.h b/dbms/src/Parsers/TablePropertiesQueriesASTs.h index 6a6cf4506d1..e68a3b46e4a 100644 --- a/dbms/src/Parsers/TablePropertiesQueriesASTs.h +++ b/dbms/src/Parsers/TablePropertiesQueriesASTs.h @@ -48,7 +48,7 @@ class ASTDescribeQuery : public ASTQueryWithOutput public: ASTPtr table_expression; - String getID() const override { return "DescribeQuery"; } + String getID(char) const override { return "DescribeQuery"; } ASTPtr clone() const override { From e26c3327cf852a3153c849f0873c1955699c1c6a Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 17:24:47 +0300 Subject: [PATCH 59/88] fix lost AST chenges in InDepthNodeVisitor --- dbms/src/Interpreters/InDepthNodeVisitor.h | 8 ++++--- dbms/src/Interpreters/QueryAliasesVisitor.cpp | 10 ++++----- dbms/src/Interpreters/QueryAliasesVisitor.h | 6 ++--- .../TranslateQualifiedNamesVisitor.cpp | 22 +++++++++---------- .../TranslateQualifiedNamesVisitor.h | 10 ++++----- dbms/src/Parsers/DumpASTNode.h | 8 ++----- 6 files changed, 31 insertions(+), 33 deletions(-) diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index 4292da7fbdb..f4186a9dd4a 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -28,10 +28,12 @@ public: if constexpr (!_topToBottom) visitChildren(ast); - auto additional_nodes = Matcher::visit(ast, data); + /// It operates with ASTPtr * cause we may want to rewrite ASTPtr in visit(). + std::vector additional_nodes = Matcher::visit(ast, data); + /// visit additional nodes (ex. only part of children) - for (ASTPtr & node : additional_nodes) - visit(node); + for (ASTPtr * node : additional_nodes) + visit(*node); if constexpr (_topToBottom) visitChildren(ast); diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.cpp b/dbms/src/Interpreters/QueryAliasesVisitor.cpp index cd7baba0061..3c10f7da1b5 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.cpp +++ b/dbms/src/Interpreters/QueryAliasesVisitor.cpp @@ -39,7 +39,7 @@ bool QueryAliasesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) return true; } -std::vector QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) +std::vector QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) { if (auto * t = typeid_cast(ast.get())) return visit(*t, ast, data); @@ -52,24 +52,24 @@ std::vector QueryAliasesMatcher::visit(ASTPtr & ast, Data & data) /// The top-level aliases in the ARRAY JOIN section have a special meaning, we will not add them /// (skip the expression list itself and its children). -std::vector QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data) +std::vector QueryAliasesMatcher::visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data) { visitOther(ast, data); /// @warning It breaks botom-to-top order (childs processed after node here), could lead to some effects. /// It's possible to add ast back to result vec to save order. It will need two phase ASTArrayJoin visit (setting phase in data). - std::vector out; + std::vector out; for (auto & child1 : ast->children) for (auto & child2 : child1->children) for (auto & child3 : child2->children) - out.push_back(child3); + out.push_back(&child3); return out; } /// set unique aliases for all subqueries. this is needed, because: /// 1) content of subqueries could change after recursive analysis, and auto-generated column names could become incorrect /// 2) result of different scalar subqueries can be cached inside expressions compilation cache and must have different names -std::vector QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data) +std::vector QueryAliasesMatcher::visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data) { Aliases & aliases = data.aliases; diff --git a/dbms/src/Interpreters/QueryAliasesVisitor.h b/dbms/src/Interpreters/QueryAliasesVisitor.h index 2cd4f8f0c6b..de6080f2609 100644 --- a/dbms/src/Interpreters/QueryAliasesVisitor.h +++ b/dbms/src/Interpreters/QueryAliasesVisitor.h @@ -24,12 +24,12 @@ public: static constexpr const char * label = "QueryAliases"; - static std::vector visit(ASTPtr & ast, Data & data); + static std::vector visit(ASTPtr & ast, Data & data); static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - static std::vector visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data); - static std::vector visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data); + static std::vector visit(ASTSubquery & subquery, const ASTPtr & ast, Data & data); + static std::vector visit(const ASTArrayJoin &, const ASTPtr & ast, Data & data); static void visitOther(const ASTPtr & ast, Data & data); }; diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index c169691121f..6ceb0cfe524 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -37,7 +37,7 @@ bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr return true; } -std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & data) +std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & data) { if (auto * t = typeid_cast(ast.get())) return visit(*t, ast, data); @@ -50,7 +50,7 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & d return {}; } -std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier & identifier, ASTPtr & ast, Data & data) +std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier & identifier, ASTPtr & ast, Data & data) { const NameSet & source_columns = data.source_columns; const std::vector & tables = data.tables; @@ -87,7 +87,7 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier & return {}; } -std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data) +std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & , const ASTPtr & ast, Data & data) { const std::vector & tables = data.tables; @@ -125,26 +125,26 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAste throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER); } -std::vector TranslateQualifiedNamesMatcher::visit(const ASTTableJoin & join, const ASTPtr & , Data &) +std::vector TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data &) { /// Don't translate on_expression here in order to resolve equation parts later. - std::vector out; + std::vector out; if (join.using_expression_list) - out.push_back(join.using_expression_list); + out.push_back(&join.using_expression_list); return out; } -std::vector TranslateQualifiedNamesMatcher::visit(const ASTSelectQuery & select, const ASTPtr & , Data &) +std::vector TranslateQualifiedNamesMatcher::visit(ASTSelectQuery & select, const ASTPtr & , Data &) { /// If the WHERE clause or HAVING consists of a single quailified column, the reference must be translated not only in children, /// but also in where_expression and having_expression. - std::vector out; + std::vector out; if (select.prewhere_expression) - out.push_back(select.prewhere_expression); + out.push_back(&select.prewhere_expression); if (select.where_expression) - out.push_back(select.where_expression); + out.push_back(&select.where_expression); if (select.having_expression) - out.push_back(select.having_expression); + out.push_back(&select.having_expression); return out; } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 3ce69dd0afa..59933eb3b2c 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -25,14 +25,14 @@ public: static constexpr const char * label = "TranslateQualifiedNames"; - static std::vector visit(ASTPtr & ast, Data & data); + static std::vector visit(ASTPtr & ast, Data & data); static bool needChildVisit(ASTPtr & node, const ASTPtr & child); private: - static std::vector visit(const ASTIdentifier & node, ASTPtr & ast, Data &); - static std::vector visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &); - static std::vector visit(const ASTTableJoin & node, const ASTPtr & ast, Data &); - static std::vector visit(const ASTSelectQuery & node, const ASTPtr & ast, Data &); + static std::vector visit(const ASTIdentifier & node, ASTPtr & ast, Data &); + static std::vector visit(const ASTQualifiedAsterisk & node, const ASTPtr & ast, Data &); + static std::vector visit(ASTTableJoin & node, const ASTPtr & ast, Data &); + static std::vector visit(ASTSelectQuery & node, const ASTPtr & ast, Data &); }; /// Visits AST for names qualification. diff --git a/dbms/src/Parsers/DumpASTNode.h b/dbms/src/Parsers/DumpASTNode.h index edd6b3a634a..8eb1342ffe1 100644 --- a/dbms/src/Parsers/DumpASTNode.h +++ b/dbms/src/Parsers/DumpASTNode.h @@ -62,11 +62,7 @@ private: size_t & visit_depth; /// shared with children const char * label; - String nodeId() const - { - String id = ast.getID(' '); - return id; - } + String nodeId() const { return ast.getID(' '); } void printNode() const { @@ -77,7 +73,7 @@ private: print("alias", aslias, " "); if (!ast.children.empty()) - print("/", ast.children.size(), " "); /// slash is just a short name for 'children' here + print("children", ast.children.size(), " "); } }; From abffefc8309b05540c3c035e9119e683ea803376 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 18:14:50 +0300 Subject: [PATCH 60/88] ExternalTablesVisitor via InDepthNodeVisitor --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 +- dbms/src/Interpreters/ExternalTablesVisitor.h | 52 ++++++++----------- dbms/src/Interpreters/InDepthNodeVisitor.h | 3 +- 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 4826f38b9dc..8105f3c28f4 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -242,8 +242,8 @@ void ExpressionAnalyzer::analyzeAggregation() void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables() { /// Adds existing external tables (not subqueries) to the external_tables dictionary. - ExternalTablesVisitor tables_visitor(context, external_tables); - tables_visitor.visit(query); + ExternalTablesMatcher::Data tables_data{context, external_tables}; + ExternalTablesVisitor(tables_data).visit(query); if (do_global) { diff --git a/dbms/src/Interpreters/ExternalTablesVisitor.h b/dbms/src/Interpreters/ExternalTablesVisitor.h index 95b109987e0..ffc51bf7890 100644 --- a/dbms/src/Interpreters/ExternalTablesVisitor.h +++ b/dbms/src/Interpreters/ExternalTablesVisitor.h @@ -4,49 +4,43 @@ #include #include #include +#include namespace DB { -/// Finds in the query the usage of external tables (as table identifiers). Fills in external_tables. -class ExternalTablesVisitor +/// If node is ASTIdentifier try to extract external_storage. +class ExternalTablesMatcher { public: - ExternalTablesVisitor(const Context & context_, Tables & tables) - : context(context_), - external_tables(tables) - {} - - void visit(ASTPtr & ast) const + struct Data { - /// Traverse from the bottom. Intentionally go into subqueries. - for (auto & child : ast->children) - visit(child); + const Context & context; + Tables & external_tables; + }; - tryVisit(ast); + static constexpr const char * label = "ExternalTables"; + + static std::vector visit(ASTPtr & ast, Data & data) + { + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + return {}; } + static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } + private: - const Context & context; - Tables & external_tables; - - void visit(const ASTIdentifier & node, ASTPtr &) const + static std::vector visit(const ASTIdentifier & node, ASTPtr &, Data & data) { if (node.special()) - if (StoragePtr external_storage = context.tryGetExternalTable(node.name)) - external_tables[node.name] = external_storage; - } - - template - bool tryVisit(ASTPtr & ast) const - { - if (const T * t = typeid_cast(ast.get())) - { - visit(*t, ast); - return true; - } - return false; + if (StoragePtr external_storage = data.context.tryGetExternalTable(node.name)) + data.external_tables[node.name] = external_storage; + return {}; } }; +/// Finds in the query the usage of external tables. Fills in external_tables. +using ExternalTablesVisitor = InDepthNodeVisitor; + } diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index f4186a9dd4a..ff102d136c5 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -1,7 +1,6 @@ #pragma once -#include -#include +#include #include namespace DB From 31c629bf5588880865a02cfb26c88825e5e3f043 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 18:36:54 +0300 Subject: [PATCH 61/88] GlobalSubqueriesMatcher via InDepthNodeVisitor --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 +- .../Interpreters/GlobalSubqueriesVisitor.h | 280 +++++++++--------- 2 files changed, 140 insertions(+), 144 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 8105f3c28f4..29548e6dd52 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -247,9 +247,9 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables() if (do_global) { - GlobalSubqueriesVisitor subqueries_visitor(context, subquery_depth, isRemoteStorage(), + GlobalSubqueriesMatcher::Data subqueries_data(context, subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries); - subqueries_visitor.visit(query); + GlobalSubqueriesVisitor(subqueries_data).visit(query); } } diff --git a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h index ec616b817b9..92c6441a114 100644 --- a/dbms/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/dbms/src/Interpreters/GlobalSubqueriesVisitor.h @@ -15,175 +15,171 @@ #include #include #include +#include namespace DB { -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. -/// Converts GLOBAL subqueries to external tables; Puts them into the external_tables dictionary: name -> StoragePtr. -class GlobalSubqueriesVisitor +class GlobalSubqueriesMatcher { public: - GlobalSubqueriesVisitor(const Context & context_, size_t subquery_depth_, bool is_remote_, - Tables & tables, SubqueriesForSets & subqueries_for_sets_, bool & has_global_subqueries_) - : context(context_), - subquery_depth(subquery_depth_), - is_remote(is_remote_), - external_table_id(1), - external_tables(tables), - subqueries_for_sets(subqueries_for_sets_), - has_global_subqueries(has_global_subqueries_) - {} - - void visit(ASTPtr & ast) const + struct Data { - /// Recursive calls. We do not go into subqueries. - for (auto & child : ast->children) - if (!typeid_cast(child.get())) - visit(child); + const Context & context; + size_t subquery_depth; + bool is_remote; + size_t external_table_id; + Tables & external_tables; + SubqueriesForSets & subqueries_for_sets; + bool & has_global_subqueries; - /// Bottom-up actions. - if (tryVisit(ast) || - tryVisit(ast)) + Data(const Context & context_, size_t subquery_depth_, bool is_remote_, + Tables & tables, SubqueriesForSets & subqueries_for_sets_, bool & has_global_subqueries_) + : context(context_), + subquery_depth(subquery_depth_), + is_remote(is_remote_), + external_table_id(1), + external_tables(tables), + subqueries_for_sets(subqueries_for_sets_), + has_global_subqueries(has_global_subqueries_) {} + + void addExternalStorage(ASTPtr & subquery_or_table_name_or_table_expression) + { + /// With nondistributed queries, creating temporary tables does not make sense. + if (!is_remote) + return; + + ASTPtr subquery; + ASTPtr table_name; + ASTPtr subquery_or_table_name; + + if (typeid_cast(subquery_or_table_name_or_table_expression.get())) + { + table_name = subquery_or_table_name_or_table_expression; + subquery_or_table_name = table_name; + } + else if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) + { + if (ast_table_expr->database_and_table_name) + { + table_name = ast_table_expr->database_and_table_name; + subquery_or_table_name = table_name; + } + else if (ast_table_expr->subquery) + { + subquery = ast_table_expr->subquery; + subquery_or_table_name = subquery; + } + } + else if (typeid_cast(subquery_or_table_name_or_table_expression.get())) + { + subquery = subquery_or_table_name_or_table_expression; + subquery_or_table_name = subquery; + } + + if (!subquery_or_table_name) + throw Exception("Logical error: unknown AST element passed to ExpressionAnalyzer::addExternalStorage method", + ErrorCodes::LOGICAL_ERROR); + + if (table_name) + { + /// If this is already an external table, you do not need to add anything. Just remember its presence. + if (external_tables.end() != external_tables.find(static_cast(*table_name).name)) + return; + } + + /// Generate the name for the external table. + String external_table_name = "_data" + toString(external_table_id); + while (external_tables.count(external_table_name)) + { + ++external_table_id; + external_table_name = "_data" + toString(external_table_id); + } + + auto interpreter = interpretSubquery(subquery_or_table_name, context, subquery_depth, {}); + + Block sample = interpreter->getSampleBlock(); + NamesAndTypesList columns = sample.getNamesAndTypesList(); + + StoragePtr external_storage = StorageMemory::create(external_table_name, ColumnsDescription{columns}); + external_storage->startup(); + + /** We replace the subquery with the name of the temporary table. + * It is in this form, the request will go to the remote server. + * This temporary table will go to the remote server, and on its side, + * instead of doing a subquery, you just need to read it. + */ + + auto database_and_table_name = createDatabaseAndTableNode("", external_table_name); + + if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) + { + ast_table_expr->subquery.reset(); + ast_table_expr->database_and_table_name = database_and_table_name; + + ast_table_expr->children.clear(); + ast_table_expr->children.emplace_back(database_and_table_name); + } + else + subquery_or_table_name_or_table_expression = database_and_table_name; + + external_tables[external_table_name] = external_storage; + subqueries_for_sets[external_table_name].source = interpreter->execute().in; + subqueries_for_sets[external_table_name].table = external_storage; + + /** NOTE If it was written IN tmp_table - the existing temporary (but not external) table, + * then a new temporary table will be created (for example, _data1), + * and the data will then be copied to it. + * Maybe this can be avoided. + */ + } + }; + + static constexpr const char * label = "GlobalSubqueries"; + + static std::vector visit(ASTPtr & ast, Data & data) + { + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + return {}; + } + + static bool needChildVisit(ASTPtr &, const ASTPtr & child) + { + /// We do not go into subqueries. + if (typeid_cast(child.get())) + return false; + return true; } private: - const Context & context; - size_t subquery_depth; - bool is_remote; - mutable size_t external_table_id = 1; - Tables & external_tables; - SubqueriesForSets & subqueries_for_sets; - bool & has_global_subqueries; - /// GLOBAL IN - void visit(ASTFunction & func, ASTPtr &) const + static void visit(ASTFunction & func, ASTPtr &, Data & data) { if (func.name == "globalIn" || func.name == "globalNotIn") { - addExternalStorage(func.arguments->children.at(1)); - has_global_subqueries = true; + data.addExternalStorage(func.arguments->children[1]); + data.has_global_subqueries = true; } } /// GLOBAL JOIN - void visit(ASTTablesInSelectQueryElement & table_elem, ASTPtr &) const + static void visit(ASTTablesInSelectQueryElement & table_elem, ASTPtr &, Data & data) { if (table_elem.table_join && static_cast(*table_elem.table_join).locality == ASTTableJoin::Locality::Global) { - addExternalStorage(table_elem.table_expression); - has_global_subqueries = true; + data.addExternalStorage(table_elem.table_expression); + data.has_global_subqueries = true; } } - - template - bool tryVisit(ASTPtr & ast) const - { - if (T * t = typeid_cast(ast.get())) - { - visit(*t, ast); - return true; - } - return false; - } - - /** Initialize InterpreterSelectQuery for a subquery in the GLOBAL IN/JOIN section, - * create a temporary table of type Memory and store it in the external_tables dictionary. - */ - void addExternalStorage(ASTPtr & subquery_or_table_name_or_table_expression) const - { - /// With nondistributed queries, creating temporary tables does not make sense. - if (!is_remote) - return; - - ASTPtr subquery; - ASTPtr table_name; - ASTPtr subquery_or_table_name; - - if (typeid_cast(subquery_or_table_name_or_table_expression.get())) - { - table_name = subquery_or_table_name_or_table_expression; - subquery_or_table_name = table_name; - } - else if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) - { - if (ast_table_expr->database_and_table_name) - { - table_name = ast_table_expr->database_and_table_name; - subquery_or_table_name = table_name; - } - else if (ast_table_expr->subquery) - { - subquery = ast_table_expr->subquery; - subquery_or_table_name = subquery; - } - } - else if (typeid_cast(subquery_or_table_name_or_table_expression.get())) - { - subquery = subquery_or_table_name_or_table_expression; - subquery_or_table_name = subquery; - } - - if (!subquery_or_table_name) - throw Exception("Logical error: unknown AST element passed to ExpressionAnalyzer::addExternalStorage method", - ErrorCodes::LOGICAL_ERROR); - - if (table_name) - { - /// If this is already an external table, you do not need to add anything. Just remember its presence. - if (external_tables.end() != external_tables.find(static_cast(*table_name).name)) - return; - } - - /// Generate the name for the external table. - String external_table_name = "_data" + toString(external_table_id); - while (external_tables.count(external_table_name)) - { - ++external_table_id; - external_table_name = "_data" + toString(external_table_id); - } - - auto interpreter = interpretSubquery(subquery_or_table_name, context, subquery_depth, {}); - - Block sample = interpreter->getSampleBlock(); - NamesAndTypesList columns = sample.getNamesAndTypesList(); - - StoragePtr external_storage = StorageMemory::create(external_table_name, ColumnsDescription{columns}); - external_storage->startup(); - - /** We replace the subquery with the name of the temporary table. - * It is in this form, the request will go to the remote server. - * This temporary table will go to the remote server, and on its side, - * instead of doing a subquery, you just need to read it. - */ - - auto database_and_table_name = createDatabaseAndTableNode("", external_table_name); - - if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) - { - ast_table_expr->subquery.reset(); - ast_table_expr->database_and_table_name = database_and_table_name; - - ast_table_expr->children.clear(); - ast_table_expr->children.emplace_back(database_and_table_name); - } - else - subquery_or_table_name_or_table_expression = database_and_table_name; - - external_tables[external_table_name] = external_storage; - subqueries_for_sets[external_table_name].source = interpreter->execute().in; - subqueries_for_sets[external_table_name].table = external_storage; - - /** NOTE If it was written IN tmp_table - the existing temporary (but not external) table, - * then a new temporary table will be created (for example, _data1), - * and the data will then be copied to it. - * Maybe this can be avoided. - */ - } }; +/// Converts GLOBAL subqueries to external tables; Puts them into the external_tables dictionary: name -> StoragePtr. +using GlobalSubqueriesVisitor = InDepthNodeVisitor; + } From 2afe664d674f20d7a5a27638d255f1a05e629cd4 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 7 Dec 2018 19:28:20 +0300 Subject: [PATCH 62/88] RequiredSourceColumnsVisitor via InDepthNodeVisitor --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 16 ++- .../RequiredSourceColumnsVisitor.h | 130 ++++++++---------- 2 files changed, 69 insertions(+), 77 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 29548e6dd52..e6f29670959 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1027,8 +1027,8 @@ void ExpressionAnalyzer::collectUsedColumns() { /// Nothing needs to be ignored for expressions in ARRAY JOIN. NameSet empty; - RequiredSourceColumnsVisitor visitor(available_columns, required, empty, empty, empty); - visitor.visit(expressions[i]); + RequiredSourceColumnsMatcher::Data visitor_data{available_columns, required, empty, empty, empty}; + RequiredSourceColumnsVisitor(visitor_data).visit(expressions[i]); } ignored.insert(expressions[i]->getAliasOrColumnName()); @@ -1044,15 +1044,17 @@ void ExpressionAnalyzer::collectUsedColumns() NameSet required_joined_columns; - for (const auto & left_key_ast : analyzedJoin().key_asts_left) + for (const auto & left_key_ast : syntax->analyzed_join.key_asts_left) { NameSet empty; - RequiredSourceColumnsVisitor columns_visitor(available_columns, required, ignored, empty, required_joined_columns); - columns_visitor.visit(left_key_ast); + RequiredSourceColumnsMatcher::Data columns_data{available_columns, required, ignored, empty, required_joined_columns}; + ASTPtr tmp = left_key_ast; + RequiredSourceColumnsVisitor(columns_data).visit(tmp); } - RequiredSourceColumnsVisitor columns_visitor(available_columns, required, ignored, available_joined_columns, required_joined_columns); - columns_visitor.visit(query); + RequiredSourceColumnsMatcher::Data columns_visitor_data{available_columns, required, ignored, + available_joined_columns, required_joined_columns}; + RequiredSourceColumnsVisitor(columns_visitor_data).visit(query); columns_added_by_join = analyzedJoin().available_joined_columns; for (auto it = columns_added_by_join.begin(); it != columns_added_by_join.end();) diff --git a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h index ada053a3657..726023be8eb 100644 --- a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h +++ b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h @@ -18,25 +18,45 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. -/** Get a set of necessary columns to read from the table. - * In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified. - * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. - * Put in required_joined_columns the set of columns available from JOIN and needed. - */ -class RequiredSourceColumnsVisitor +class RequiredSourceColumnsMatcher { public: - RequiredSourceColumnsVisitor(const NameSet & available_columns_, NameSet & required_source_columns_, NameSet & ignored_names_, - const NameSet & available_joined_columns_, NameSet & required_joined_columns_) - : available_columns(available_columns_), - required_source_columns(required_source_columns_), - ignored_names(ignored_names_), - available_joined_columns(available_joined_columns_), - required_joined_columns(required_joined_columns_) - {} + struct Data + { + const NameSet & available_columns; + NameSet & required_source_columns; + NameSet & ignored_names; + const NameSet & available_joined_columns; + NameSet & required_joined_columns; + }; + + static constexpr const char * label = "RequiredSourceColumns"; + + static bool needChildVisit(ASTPtr & node, const ASTPtr & child) + { + /// We will not go to the ARRAY JOIN section, because we need to look at the names of non-ARRAY-JOIN columns. + /// There, `collectUsedColumns` will send us separately. + if (typeid_cast(child.get()) || + typeid_cast(child.get()) || + typeid_cast(child.get()) || + typeid_cast(child.get())) + return false; + + /// Processed. Do not need children. + if (typeid_cast(node.get())) + return false; + + if (auto * f = typeid_cast(node.get())) + { + /// A special function `indexHint`. Everything that is inside it is not calculated + /// (and is used only for index analysis, see KeyCondition). + if (f->name == "indexHint") + return false; + } + + return true; + } /** Find all the identifiers in the query. * We will use depth first search in AST. @@ -46,36 +66,34 @@ public: * - there is some exception for the ARRAY JOIN clause (it has a slightly different identifiers); * - we put identifiers available from JOIN in required_joined_columns. */ - void visit(const ASTPtr & ast) const + static std::vector visit(ASTPtr & ast, Data & data) { - if (!tryVisit(ast) && - !tryVisit(ast)) - visitChildren(ast); + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + return {}; } private: - const NameSet & available_columns; - NameSet & required_source_columns; - NameSet & ignored_names; - const NameSet & available_joined_columns; - NameSet & required_joined_columns; - - void visit(const ASTIdentifier & node, const ASTPtr &) const + static void visit(const ASTIdentifier & node, const ASTPtr &, Data & data) { if (node.general() - && !ignored_names.count(node.name) - && !ignored_names.count(Nested::extractTableName(node.name))) + && !data.ignored_names.count(node.name) + && !data.ignored_names.count(Nested::extractTableName(node.name))) { - if (!available_joined_columns.count(node.name) - || available_columns.count(node.name)) /// Read column from left table if has. - required_source_columns.insert(node.name); + /// Read column from left table if has. + if (!data.available_joined_columns.count(node.name) || data.available_columns.count(node.name)) + data.required_source_columns.insert(node.name); else - required_joined_columns.insert(node.name); + data.required_joined_columns.insert(node.name); } } - void visit(const ASTFunction & node, const ASTPtr & ast) const + static void visit(const ASTFunction & node, const ASTPtr &, Data & data) { + NameSet & ignored_names = data.ignored_names; + if (node.name == "lambda") { if (node.arguments->children.size() != 2) @@ -102,47 +120,19 @@ private: } } - visit(node.arguments->children.at(1)); + visit(node.arguments->children[1], data); for (size_t i = 0; i < added_ignored.size(); ++i) ignored_names.erase(added_ignored[i]); - - return; } - - /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see KeyCondition). - if (node.name == "indexHint") - return; - - visitChildren(ast); - } - - void visitChildren(const ASTPtr & ast) const - { - for (auto & child : ast->children) - { - /** We will not go to the ARRAY JOIN section, because we need to look at the names of non-ARRAY-JOIN columns. - * There, `collectUsedColumns` will send us separately. - */ - if (!typeid_cast(child.get()) - && !typeid_cast(child.get()) - && !typeid_cast(child.get()) - && !typeid_cast(child.get())) - visit(child); - } - } - - template - bool tryVisit(const ASTPtr & ast) const - { - if (const T * t = typeid_cast(ast.get())) - { - visit(*t, ast); - return true; - } - return false; } }; +/** Get a set of necessary columns to read from the table. + * In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified. + * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. + * Put in required_joined_columns the set of columns available from JOIN and needed. + */ +using RequiredSourceColumnsVisitor = InDepthNodeVisitor; + } From 0c49c1959246b2a284b0cc5a012d0515c93d36e9 Mon Sep 17 00:00:00 2001 From: chertus Date: Sat, 8 Dec 2018 01:02:33 +0300 Subject: [PATCH 63/88] fix style --- dbms/src/Parsers/ASTExpressionList.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ASTExpressionList.h b/dbms/src/Parsers/ASTExpressionList.h index 4f77adb4009..ff2fbaff95f 100644 --- a/dbms/src/Parsers/ASTExpressionList.h +++ b/dbms/src/Parsers/ASTExpressionList.h @@ -11,7 +11,7 @@ namespace DB class ASTExpressionList : public IAST { public: - String getID(char ) const override { return "ExpressionList"; } + String getID(char) const override { return "ExpressionList"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; From 6bc30f225e7acd7992bf37902d0874e2288a08f2 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 9 Dec 2018 23:11:46 +0800 Subject: [PATCH 64/88] Guard `force_restore_data_flag_file` removal resemble `force_drop_table` file --- dbms/src/Interpreters/loadMetadata.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp index e535bafc51d..748ba0a7548 100644 --- a/dbms/src/Interpreters/loadMetadata.cpp +++ b/dbms/src/Interpreters/loadMetadata.cpp @@ -118,7 +118,14 @@ void loadMetadata(Context & context) thread_pool.wait(); if (has_force_restore_data_flag) - force_restore_data_flag_file.remove(); + try + { + force_restore_data_flag_file.remove(); + } + catch (...) + { + tryLogCurrentException("Load metadata", "Can't remove force restore file to enable data santity checks"); + } } From 61cd6fe2d312a20c9e6f057b947477bf48d04e26 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 10 Dec 2018 13:21:32 +0300 Subject: [PATCH 65/88] Remove LowCardinaity for joined keys. --- dbms/src/Interpreters/Join.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 7707eed6933..17e21628ebc 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -15,6 +15,7 @@ #include #include +#include namespace DB @@ -259,10 +260,12 @@ void Join::setSampleBlock(const Block & block) size_t keys_size = key_names_right.size(); ColumnRawPtrs key_columns(keys_size); + Colums materialized_columns(keys_size); for (size_t i = 0; i < keys_size; ++i) { - key_columns[i] = block.getByName(key_names_right[i]).column.get(); + materialized_columns[i] = recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column); + key_columns[i] = materialized_columns[i].get(); /// We will join only keys, where all components are not NULL. if (key_columns[i]->isColumnNullable()) @@ -281,7 +284,10 @@ void Join::setSampleBlock(const Block & block) const auto & name = sample_block_with_columns_to_add.getByPosition(pos).name; if (key_names_right.end() != std::find(key_names_right.begin(), key_names_right.end(), name)) { - sample_block_with_keys.insert(sample_block_with_columns_to_add.getByPosition(pos)); + auto & col = sample_block_with_columns_to_add.getByPosition(pos); + col.column = recursiveRemoveLowCardinality(col.column); + col.type = recursiveRemoveLowCardinality(col.type); + sample_block_with_keys.insert(col); sample_block_with_columns_to_add.erase(pos); } else @@ -428,11 +434,13 @@ bool Join::insertFromBlock(const Block & block) /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; + materialized_columns.reserve(keys_size); /// Memoize key columns to work. for (size_t i = 0; i < keys_size; ++i) { - key_columns[i] = block.getByName(key_names_right[i]).column.get(); + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_right[i]).column)); + key_columns[i] = materialized_columns.back().get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) { @@ -669,11 +677,13 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; + materialized_columns.reserve(keys_size); /// Memoize key columns to work with. for (size_t i = 0; i < keys_size; ++i) { - key_columns[i] = block.getByName(key_names_left[i]).column.get(); + materialized_columns.emplace_back(recursiveRemoveLowCardinality(block.getByName(key_names_left[i]).column)); + key_columns[i] = materialized_columns.back().get(); if (ColumnPtr converted = key_columns[i]->convertToFullColumnIfConst()) { @@ -883,8 +893,8 @@ void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) { /// Compare up to Nullability. - DataTypePtr left_type = removeNullable(block_left.getByName(key_names_left[i]).type); - DataTypePtr right_type = removeNullable(block_right.getByName(key_names_right[i]).type); + DataTypePtr left_type = removeNullable(recursiveRemoveLowCardinality(block_left.getByName(key_names_left[i]).type)); + DataTypePtr right_type = removeNullable(recursiveRemoveLowCardinality(block_right.getByName(key_names_right[i]).type)); if (!left_type->equals(*right_type)) throw Exception("Type mismatch of columns to JOIN by: " From e4fd13f2377c9549917633f40baa14355face259 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 10 Dec 2018 13:23:45 +0300 Subject: [PATCH 66/88] Remove LowCardinaity for joined keys. --- dbms/src/Interpreters/Join.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 17e21628ebc..fa7c201d67c 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -260,7 +260,7 @@ void Join::setSampleBlock(const Block & block) size_t keys_size = key_names_right.size(); ColumnRawPtrs key_columns(keys_size); - Colums materialized_columns(keys_size); + Columns materialized_columns(keys_size); for (size_t i = 0; i < keys_size; ++i) { From 66d73025d7eec6a00ce27580b887c2070f42ca70 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 10 Dec 2018 13:55:27 +0300 Subject: [PATCH 67/88] fix RequiredSourceColumnsMatcher lambda --- .../RequiredSourceColumnsVisitor.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h index 726023be8eb..ebf948ffeb8 100644 --- a/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h +++ b/dbms/src/Interpreters/RequiredSourceColumnsVisitor.h @@ -18,7 +18,11 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } - +/** Get a set of necessary columns to read from the table. + * In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified. + * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. + * Put in required_joined_columns the set of columns available from JOIN and needed. + */ class RequiredSourceColumnsMatcher { public: @@ -49,9 +53,9 @@ public: if (auto * f = typeid_cast(node.get())) { - /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see KeyCondition). - if (f->name == "indexHint") + /// "indexHint" is a special function for index analysis. Everything that is inside it is not calculated. @sa KeyCondition + /// "lambda" visit children itself. + if (f->name == "indexHint" || f->name == "lambda") return false; } @@ -120,6 +124,7 @@ private: } } + /// @note It's a special case where we visit children inside the matcher, not in visitor. visit(node.arguments->children[1], data); for (size_t i = 0; i < added_ignored.size(); ++i) @@ -128,11 +133,7 @@ private: } }; -/** Get a set of necessary columns to read from the table. - * In this case, the columns specified in ignored_names are considered unnecessary. And the ignored_names parameter can be modified. - * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. - * Put in required_joined_columns the set of columns available from JOIN and needed. - */ +/// Get a set of necessary columns to read from the table. using RequiredSourceColumnsVisitor = InDepthNodeVisitor; } From 723932930da0bea67bcf0069a90fa32d2313d137 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 10 Dec 2018 14:21:29 +0300 Subject: [PATCH 68/88] Added test with low cardinality join. --- .../00800_low_cardinality_join.reference | 41 +++++++++++++++++++ .../00800_low_cardinality_join.sql | 27 ++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_join.sql diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference new file mode 100644 index 00000000000..31d1de2d8c7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference @@ -0,0 +1,41 @@ +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +- +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +- +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 \N +0 1 +1 2 +2 \N diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_join.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.sql new file mode 100644 index 00000000000..07ad6d54624 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.sql @@ -0,0 +1,27 @@ +select * from (select dummy as val from system.one) any left join (select dummy as val from system.one) using val; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select dummy as val from system.one) using val; +select * from (select dummy as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) using val; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select dummy as val from system.one) using val; +select * from (select dummy as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) using val; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) using val; +select '-'; +select * from (select dummy as val from system.one) any left join (select dummy as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select dummy as val from system.one) on val + 0 = val * 1; +select * from (select dummy as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select dummy as val from system.one) on val + 0 = val * 1; +select * from (select dummy as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select toLowCardinality(dummy) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(dummy) as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) on val + 0 = val * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) on val + 0 = val * 1; +select '-'; +select * from (select number as l from system.numbers limit 3) any left join (select number as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) any left join (select number as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select number as l from system.numbers limit 3) any left join (select toLowCardinality(number) as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) any left join (select toLowCardinality(number) as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(toNullable(number)) as l from system.numbers limit 3) any left join (select toLowCardinality(number) as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) any left join (select toLowCardinality(toNullable(number)) as r from system.numbers limit 3) on l + 1 = r * 1; +select * from (select toLowCardinality(toNullable(number)) as l from system.numbers limit 3) any left join (select toLowCardinality(toNullable(number)) as r from system.numbers limit 3) on l + 1 = r * 1; From 1a6127dce4a1660254071b8474efa5f2a1b44be4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 10 Dec 2018 15:09:37 +0300 Subject: [PATCH 69/88] Remove LowCardinality from NativeBlockOutputStream types for old clients. --- dbms/programs/server/TCPHandler.cpp | 11 +++++++++++ dbms/src/DataStreams/NativeBlockInputStream.cpp | 3 ++- dbms/src/DataStreams/NativeBlockOutputStream.cpp | 10 +++++++++- dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp | 3 +++ 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 108a630a0dd..6b1d4407a20 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -389,6 +389,17 @@ void TCPHandler::processOrdinaryQuery() /// Send header-block, to allow client to prepare output format for data to send. { Block header = state.io.in->getHeader(); + + /// Send data to old clients without low cardinality type. + if (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) + { + for (auto & column : header) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } + } + if (header) sendData(header); } diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 33afbb0aa9e..7cd4a571a60 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -153,7 +153,8 @@ Block NativeBlockInputStream::readImpl() column.column = std::move(read_column); - if (server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) + /// Support insert from old clients without low cardinality type. + if (header && server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) { column.column = recursiveLowCardinalityConversion(column.column, column.type, header.getByPosition(i).type); column.type = header.getByPosition(i).type; diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp index 8be3373fc44..c87d82b2506 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace DB { @@ -100,7 +101,14 @@ void NativeBlockOutputStream::write(const Block & block) mark.offset_in_decompressed_block = ostr_concrete->getRemainingBytes(); } - const ColumnWithTypeAndName & column = block.safeGetByPosition(i); + ColumnWithTypeAndName column = block.safeGetByPosition(i); + + /// Send data to old clients without low cardinality type. + if (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } /// Name writeStringBinary(column.name, ostr); diff --git a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index b212c8ebdaa..215b21f7994 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -69,6 +69,9 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) { + if (!column) + return column; + if (from_type->equals(*to_type)) return column; From 6a90abc543e362a5738bc796f72ce88d31397d35 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 10 Dec 2018 15:54:02 +0300 Subject: [PATCH 70/88] Support SUBSTRING(expr FROM start FOR length) --- dbms/src/Parsers/ExpressionElementParsers.cpp | 73 ++++++++++++++++++- dbms/src/Parsers/ExpressionElementParsers.h | 7 ++ .../00765_sql_compatibility_aliases.reference | 2 + .../00765_sql_compatibility_aliases.sql | 2 + 4 files changed, 83 insertions(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index f3e9e43aa19..0912d2a5b7b 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -317,6 +317,76 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; } +bool ParserSubstringExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length) + /// The latter will be parsed normally as a function later. + + ASTPtr expr_node; + ASTPtr start_node; + ASTPtr length_node; + + if (!ParserKeyword("SUBSTRING").ignore(pos, expected)) + return false; + + if (pos->type != TokenType::OpeningRoundBracket) + return false; + ++pos; + + if (!ParserExpression().parse(pos, expr_node, expected)) + return false; + + if (pos->type != TokenType::Comma) + { + if (!ParserKeyword("FROM").ignore(pos, expected)) + return false; + } + else + { + ++pos; + } + + if (!ParserExpression().parse(pos, start_node, expected)) + return false; + + if (pos->type == TokenType::ClosingRoundBracket) + { + ++pos; + } + else + { + if (pos->type != TokenType::Comma) + { + if (!ParserKeyword("FOR").ignore(pos, expected)) + return false; + } + else + { + ++pos; + } + + if (!ParserExpression().parse(pos, length_node, expected)) + return false; + + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected); + } + + /// Convert to canonical representation in functional form: SUBSTRING(expr, start, length) + + auto expr_list_args = std::make_shared(); + expr_list_args->children = {expr_node, start_node}; + + if (length_node) + expr_list_args->children.push_back(length_node); + + auto func_node = std::make_shared(); + func_node->name = "substring"; + func_node->arguments = std::move(expr_list_args); + func_node->children.push_back(func_node->arguments); + + node = std::move(func_node); + return true; +} bool ParserExtractExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { @@ -678,8 +748,9 @@ bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & exp || ParserArrayOfLiterals().parse(pos, node, expected) || ParserArray().parse(pos, node, expected) || ParserLiteral().parse(pos, node, expected) - || ParserExtractExpression().parse(pos, node, expected) || ParserCastExpression().parse(pos, node, expected) + || ParserExtractExpression().parse(pos, node, expected) + || ParserSubstringExpression().parse(pos, node, expected) || ParserCase().parse(pos, node, expected) || ParserFunction().parse(pos, node, expected) || ParserQualifiedAsterisk().parse(pos, node, expected) diff --git a/dbms/src/Parsers/ExpressionElementParsers.h b/dbms/src/Parsers/ExpressionElementParsers.h index 32e1c57dce2..a52864d97d1 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.h +++ b/dbms/src/Parsers/ExpressionElementParsers.h @@ -96,6 +96,13 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +class ParserSubstringExpression : public IParserBase +{ +protected: + const char * getName() const override { return "SUBSTRING expression"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + class ParserExtractExpression : public IParserBase { protected: diff --git a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference index f774720f9ff..7a70e443c1b 100644 --- a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference +++ b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference @@ -8,3 +8,5 @@ fo oo o 1 +oo +o diff --git a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql index c7ce18d2b45..248514d134b 100644 --- a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql +++ b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql @@ -10,3 +10,5 @@ select SUBSTRING('foo', 1, 2); select Substr('foo', 2); select mid('foo', 3); select IF(3>2, 1, 0); +select substring('foo' from 1 + 1); +select SUBSTRING('foo' FROM 2 FOR 1); From 9236e94e1b61ac039db1e6ad266bc48bf64d2562 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 10 Dec 2018 16:02:45 +0300 Subject: [PATCH 71/88] ExecuteScalarSubqueriesVisitor via InDepthNodeVisitor --- .../ExecuteScalarSubqueriesVisitor.cpp | 26 +++---- .../ExecuteScalarSubqueriesVisitor.h | 74 +++++++++---------- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 10 +-- .../PredicateExpressionsOptimizer.cpp | 4 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 12 +-- 5 files changed, 59 insertions(+), 67 deletions(-) diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 3be5095e23a..7769d60b4b5 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -35,17 +35,17 @@ static ASTPtr addTypeConversion(std::unique_ptr && ast, const String return res; } -void ExecuteScalarSubqueriesVisitor::visit(const ASTSubquery & subquery, ASTPtr & ast) const +void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { - Context subquery_context = context; - Settings subquery_settings = context.getSettings(); + Context subquery_context = data.context; + Settings subquery_settings = data.context.getSettings(); subquery_settings.max_result_rows = 1; subquery_settings.extremes = 0; subquery_context.setSettings(subquery_settings); ASTPtr subquery_select = subquery.children.at(0); BlockIO res = InterpreterSelectWithUnionQuery( - subquery_select, subquery_context, {}, QueryProcessingStage::Complete, subquery_depth + 1).execute(); + subquery_select, subquery_context, {}, QueryProcessingStage::Complete, data.subquery_depth + 1).execute(); Block block; try @@ -100,31 +100,29 @@ void ExecuteScalarSubqueriesVisitor::visit(const ASTSubquery & subquery, ASTPtr } } - -void ExecuteScalarSubqueriesVisitor::visit(const ASTTableExpression &, ASTPtr &) const -{ - /// Don't descend into subqueries in FROM section. -} - -void ExecuteScalarSubqueriesVisitor::visit(const ASTFunction & func, ASTPtr & ast) const +std::vector ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &) { /// Don't descend into subqueries in arguments of IN operator. /// But if an argument is not subquery, than deeper may be scalar subqueries and we need to descend in them. + std::vector out; if (functionIsInOrGlobalInOperator(func.name)) { for (auto & child : ast->children) { if (child != func.arguments) - visit(child); + out.push_back(&child); else for (size_t i = 0, size = func.arguments->children.size(); i < size; ++i) if (i != 1 || !typeid_cast(func.arguments->children[i].get())) - visit(func.arguments->children[i]); + out.push_back(&func.arguments->children[i]); } } else - visitChildren(ast); + for (auto & child : ast->children) + out.push_back(&child); + + return out; } } diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h index d0e5c520a69..b3e87429d89 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h @@ -2,10 +2,10 @@ #include #include -#include #include #include #include +#include namespace DB { @@ -29,51 +29,45 @@ namespace DB * Scalar subqueries are executed on the request-initializer server. * The request is sent to remote servers with already substituted constants. */ -class ExecuteScalarSubqueriesVisitor +class ExecuteScalarSubqueriesMatcher { public: - ExecuteScalarSubqueriesVisitor(const Context & context_, size_t subquery_depth_, std::ostream * ostr_ = nullptr) - : context(context_), - subquery_depth(subquery_depth_), - visit_depth(0), - ostr(ostr_) - {} - - void visit(ASTPtr & ast) const + struct Data { - if (!tryVisit(ast) && - !tryVisit(ast) && - !tryVisit(ast)) - visitChildren(ast); + const Context & context; + size_t subquery_depth; + }; + + static constexpr const char * label = "ExecuteScalarSubqueries"; + + static bool needChildVisit(ASTPtr & node, const ASTPtr &) + { + /// Processed + if (typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + + /// Don't descend into subqueries in FROM section + if (typeid_cast(node.get())) + return false; + + return true; + } + + static std::vector visit(ASTPtr & ast, Data & data) + { + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + return {}; } private: - const Context & context; - size_t subquery_depth; - mutable size_t visit_depth; - std::ostream * ostr; - - void visit(const ASTSubquery & subquery, ASTPtr & ast) const; - void visit(const ASTFunction & func, ASTPtr & ast) const; - void visit(const ASTTableExpression &, ASTPtr &) const; - - void visitChildren(ASTPtr & ast) const - { - for (auto & child : ast->children) - visit(child); - } - - template - bool tryVisit(ASTPtr & ast) const - { - if (const T * t = typeid_cast(ast.get())) - { - DumpASTNode dump(*ast, ostr, visit_depth, "executeScalarSubqueries"); - visit(*t, ast); - return true; - } - return false; - } + static void visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data); + static std::vector visit(const ASTFunction & func, ASTPtr & ast, Data & data); }; +using ExecuteScalarSubqueriesVisitor = InDepthNodeVisitor; + } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index e6f29670959..7e0998cf275 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -242,12 +242,12 @@ void ExpressionAnalyzer::analyzeAggregation() void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables() { /// Adds existing external tables (not subqueries) to the external_tables dictionary. - ExternalTablesMatcher::Data tables_data{context, external_tables}; + ExternalTablesVisitor::Data tables_data{context, external_tables}; ExternalTablesVisitor(tables_data).visit(query); if (do_global) { - GlobalSubqueriesMatcher::Data subqueries_data(context, subquery_depth, isRemoteStorage(), + GlobalSubqueriesVisitor::Data subqueries_data(context, subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries); GlobalSubqueriesVisitor(subqueries_data).visit(query); } @@ -1027,7 +1027,7 @@ void ExpressionAnalyzer::collectUsedColumns() { /// Nothing needs to be ignored for expressions in ARRAY JOIN. NameSet empty; - RequiredSourceColumnsMatcher::Data visitor_data{available_columns, required, empty, empty, empty}; + RequiredSourceColumnsVisitor::Data visitor_data{available_columns, required, empty, empty, empty}; RequiredSourceColumnsVisitor(visitor_data).visit(expressions[i]); } @@ -1047,12 +1047,12 @@ void ExpressionAnalyzer::collectUsedColumns() for (const auto & left_key_ast : syntax->analyzed_join.key_asts_left) { NameSet empty; - RequiredSourceColumnsMatcher::Data columns_data{available_columns, required, ignored, empty, required_joined_columns}; + RequiredSourceColumnsVisitor::Data columns_data{available_columns, required, ignored, empty, required_joined_columns}; ASTPtr tmp = left_key_ast; RequiredSourceColumnsVisitor(columns_data).visit(tmp); } - RequiredSourceColumnsMatcher::Data columns_visitor_data{available_columns, required, ignored, + RequiredSourceColumnsVisitor::Data columns_visitor_data{available_columns, required, ignored, available_joined_columns, required_joined_columns}; RequiredSourceColumnsVisitor(columns_visitor_data).visit(query); diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 8e95773f72c..af84eac7f91 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -311,9 +311,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast std::unordered_map aliases; std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); - TranslateQualifiedNamesMatcher::Data qn_visitor_data{{}, tables}; + TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables}; TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); - QueryAliasesMatcher::Data query_aliases_data{aliases}; + QueryAliasesVisitor::Data query_aliases_data{aliases}; QueryAliasesVisitor(query_aliases_data).visit(ast); QueryNormalizer(ast, aliases, settings, {}, {}).perform(); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index ae6d3ae0b4e..30124b509a7 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -134,7 +134,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Creates a dictionary `aliases`: alias -> ASTPtr { LogAST log; - QueryAliasesMatcher::Data query_aliases_data{result.aliases}; + QueryAliasesVisitor::Data query_aliases_data{result.aliases}; QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); } @@ -228,7 +228,7 @@ void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); LogAST log; - TranslateQualifiedNamesMatcher::Data visitor_data{source_columns, tables}; + TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables}; TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream()); visitor.visit(query); } @@ -342,8 +342,8 @@ void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query if (!select_query) { - ExecuteScalarSubqueriesVisitor visitor(context, subquery_depth, log.stream()); - visitor.visit(query); + ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; + ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } else { @@ -353,8 +353,8 @@ void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query if (!typeid_cast(child.get()) && !typeid_cast(child.get())) { - ExecuteScalarSubqueriesVisitor visitor(context, subquery_depth, log.stream()); - visitor.visit(child); + ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; + ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(child); } } } From 49d80bf8f5df10eddd29b5380e5535ab1e31ac10 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 10 Dec 2018 16:19:09 +0300 Subject: [PATCH 72/88] ExecuteScalarSubqueriesVisitor (move code from h to cpp) --- .../ExecuteScalarSubqueriesVisitor.cpp | 23 +++++++++++++ .../ExecuteScalarSubqueriesVisitor.h | 34 ++++--------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 7769d60b4b5..9cea690a39b 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -35,6 +35,29 @@ static ASTPtr addTypeConversion(std::unique_ptr && ast, const String return res; } +bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) +{ + /// Processed + if (typeid_cast(node.get()) || + typeid_cast(node.get())) + return false; + + /// Don't descend into subqueries in FROM section + if (typeid_cast(node.get())) + return false; + + return true; +} + +std::vector ExecuteScalarSubqueriesMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + if (auto * t = typeid_cast(ast.get())) + return visit(*t, ast, data); + return {}; +} + void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { Context subquery_context = data.context; diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h index b3e87429d89..555b7334204 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.h @@ -1,17 +1,15 @@ #pragma once #include -#include -#include -#include -#include #include namespace DB { -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. +class Context; +class ASTSubquery; +class ASTFunction; +struct ASTTableExpression; /** Replace subqueries that return exactly one row * ("scalar" subqueries) to the corresponding constants. @@ -40,28 +38,8 @@ public: static constexpr const char * label = "ExecuteScalarSubqueries"; - static bool needChildVisit(ASTPtr & node, const ASTPtr &) - { - /// Processed - if (typeid_cast(node.get()) || - typeid_cast(node.get())) - return false; - - /// Don't descend into subqueries in FROM section - if (typeid_cast(node.get())) - return false; - - return true; - } - - static std::vector visit(ASTPtr & ast, Data & data) - { - if (auto * t = typeid_cast(ast.get())) - visit(*t, ast, data); - if (auto * t = typeid_cast(ast.get())) - return visit(*t, ast, data); - return {}; - } + static bool needChildVisit(ASTPtr & node, const ASTPtr &); + static std::vector visit(ASTPtr & ast, Data & data); private: static void visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data); From 9f9bf0cb1b84025080fa1c370fe9ddce67d97937 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 10 Dec 2018 16:49:36 +0300 Subject: [PATCH 73/88] ArrayJoinedColumnsVisitor via InDepthNodeVisitor --- .../Interpreters/ArrayJoinedColumnsVisitor.h | 90 +++++++++---------- dbms/src/Interpreters/InDepthNodeVisitor.h | 2 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 8 +- 3 files changed, 45 insertions(+), 55 deletions(-) diff --git a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h index 96e37cd6a42..de75f4622ef 100644 --- a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h +++ b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h @@ -9,42 +9,55 @@ #include #include +#include + namespace DB { -/// Visitors consist of functions with unified interface 'void visit(Casted & x, ASTPtr & y)', there x is y, successfully casted to Casted. -/// Both types and fuction could have const specifiers. The second argument is used by visitor to replaces AST node (y) if needed. - /// Fills the array_join_result_to_source: on which columns-arrays to replicate, and how to call them after that. -class ArrayJoinedColumnsVisitor +class ArrayJoinedColumnsMatcher { public: - ArrayJoinedColumnsVisitor(NameToNameMap & array_join_name_to_alias_, - NameToNameMap & array_join_alias_to_name_, - NameToNameMap & array_join_result_to_source_) - : array_join_name_to_alias(array_join_name_to_alias_), - array_join_alias_to_name(array_join_alias_to_name_), - array_join_result_to_source(array_join_result_to_source_) - {} - - void visit(ASTPtr & ast) const + struct Data { - if (!tryVisit(ast) && - !tryVisit(ast)) - visitChildren(ast); + NameToNameMap & array_join_name_to_alias; + NameToNameMap & array_join_alias_to_name; + NameToNameMap & array_join_result_to_source; + }; + + static constexpr const char * label = "ArrayJoinedColumns"; + + static bool needChildVisit(ASTPtr & node, const ASTPtr & child) + { + /// Processed + if (typeid_cast(node.get())) + return false; + + if (typeid_cast(node.get())) + return false; + + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + return false; + + return true; + } + + static std::vector visit(ASTPtr & ast, Data & data) + { + if (auto * t = typeid_cast(ast.get())) + visit(*t, ast, data); + return {}; } private: - NameToNameMap & array_join_name_to_alias; - NameToNameMap & array_join_alias_to_name; - NameToNameMap & array_join_result_to_source; - - void visit(const ASTTablesInSelectQuery &, ASTPtr &) const - {} - - void visit(const ASTIdentifier & node, ASTPtr &) const + static void visit(const ASTIdentifier & node, ASTPtr &, Data & data) { + NameToNameMap & array_join_name_to_alias = data.array_join_name_to_alias; + NameToNameMap & array_join_alias_to_name = data.array_join_alias_to_name; + NameToNameMap & array_join_result_to_source = data.array_join_result_to_source; + if (!node.general()) return; @@ -74,34 +87,11 @@ private: /** Example: SELECT ParsedParams.Key1 FROM ... ARRAY JOIN ParsedParams AS PP. */ array_join_result_to_source[ /// PP.Key1 -> ParsedParams.Key1 - Nested::concatenateName(array_join_name_to_alias[splitted.first], splitted.second)] = node.name; + Nested::concatenateName(array_join_name_to_alias[splitted.first], splitted.second)] = node.name; } } - - void visit(const ASTSubquery &, ASTPtr &) const - {} - - void visit(const ASTSelectQuery &, ASTPtr &) const - {} - - void visitChildren(ASTPtr & ast) const - { - for (auto & child : ast->children) - if (!tryVisit(child) && - !tryVisit(child)) - visit(child); - } - - template - bool tryVisit(ASTPtr & ast) const - { - if (const T * t = typeid_cast(ast.get())) - { - visit(*t, ast); - return true; - } - return false; - } }; +using ArrayJoinedColumnsVisitor = InDepthNodeVisitor; + } diff --git a/dbms/src/Interpreters/InDepthNodeVisitor.h b/dbms/src/Interpreters/InDepthNodeVisitor.h index ff102d136c5..bdeb8ddb234 100644 --- a/dbms/src/Interpreters/InDepthNodeVisitor.h +++ b/dbms/src/Interpreters/InDepthNodeVisitor.h @@ -6,7 +6,7 @@ namespace DB { -/// Visits AST tree in depth, call fucntions for nodes according to Matcher type data. +/// Visits AST tree in depth, call functions for nodes according to Matcher type data. /// You need to define Data, label, visit() and needChildVisit() in Matcher class. template class InDepthNodeVisitor diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 30124b509a7..78c90a0be8c 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -697,10 +697,10 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const } { - ArrayJoinedColumnsVisitor visitor(result.array_join_name_to_alias, - result.array_join_alias_to_name, - result.array_join_result_to_source); - visitor.visit(query); + ArrayJoinedColumnsVisitor::Data visitor_data{result.array_join_name_to_alias, + result.array_join_alias_to_name, + result.array_join_result_to_source}; + ArrayJoinedColumnsVisitor(visitor_data).visit(query); } /// If the result of ARRAY JOIN is not used, it is necessary to ARRAY-JOIN any column, From 5cb9f9ea2fc6b9f16b4e73160dd9e567e26563cb Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 10 Dec 2018 17:29:08 +0300 Subject: [PATCH 74/88] fix ident --- docs/zh/getting_started/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/getting_started/index.md b/docs/zh/getting_started/index.md index ca108db655a..fd2efaabdeb 100644 --- a/docs/zh/getting_started/index.md +++ b/docs/zh/getting_started/index.md @@ -52,7 +52,7 @@ ClickHouse包含访问控制配置,它们位于`users.xml`文件中(与'config ```text Client: dbms/programs/clickhouse-client Server: dbms/programs/clickhouse-server - ``` +``` 在服务器中为数据创建如下目录: From becad378ca4e70263854fa282010398abf3e7f33 Mon Sep 17 00:00:00 2001 From: mf5137 Date: Mon, 10 Dec 2018 16:23:21 +0100 Subject: [PATCH 75/88] Fixes from comments of #3695 --- docker/server/Dockerfile | 12 +++++++++++- docker/server/entrypoint.sh | 32 ++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index f52eb61799e..db81c29ae17 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -16,19 +16,29 @@ RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --allow-unauthenticated --yes --no-install-recommends \ clickhouse-common-static=$version \ + clickhouse-client=$version \ clickhouse-server=$version \ libgcc-7-dev \ + locales \ + tzdata \ + wget \ && rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ /tmp/* \ && apt-get clean +ADD https://github.com/tianon/gosu/releases/download/1.10/gosu-amd64 /bin/gosu + +RUN locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + RUN mkdir /docker-entrypoint-initdb.d COPY docker_related_config.xml /etc/clickhouse-server/config.d/ COPY entrypoint.sh /entrypoint.sh -ADD https://github.com/tianon/gosu/releases/download/1.10/gosu-amd64 /bin/gosu RUN chmod +x \ /entrypoint.sh \ diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 1cd3a799c15..c44ec3e5a9f 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -5,14 +5,18 @@ CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}" USER="$(id -u clickhouse)" GROUP="$(id -g clickhouse)" +# port is needed to check if clickhouse-server is ready for connections +HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)" + # get CH directories locations -DATA_DIR="$(grep -oP '\K(.*)(?=[/?])' $CLICKHOUSE_CONFIG || true)" -TMP_DIR="$(grep -oP '\K(.*)(?=[/?])' $CLICKHOUSE_CONFIG || true)" -USER_PATH="$(grep -oP '\K(.*)(?=)' $CLICKHOUSE_CONFIG || true)" -LOG_PATH="$(grep -oP '\K(.*)(?=)' $CLICKHOUSE_CONFIG || true)" +DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)" +TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)" +USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)" +LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)" LOG_DIR="$(dirname $LOG_PATH || true)" -ERROR_LOG_PATH="$(grep -oP '\K(.*)(?=)' $CLICKHOUSE_CONFIG || true)" +ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)" ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)" +FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)" # ensure directories exist mkdir -p \ @@ -20,7 +24,8 @@ mkdir -p \ "$ERROR_LOG_DIR" \ "$LOG_DIR" \ "$TMP_DIR" \ - "$USER_PATH" + "$USER_PATH" \ + "$FORMAT_SCHEMA_PATH" # ensure proper directories permissions chown -R $USER:$GROUP \ @@ -28,14 +33,21 @@ chown -R $USER:$GROUP \ "$ERROR_LOG_DIR" \ "$LOG_DIR" \ "$TMP_DIR" \ - "$USER_PATH" + "$USER_PATH" \ + "$FORMAT_SCHEMA_PATH" if [ -n "$(ls /docker-entrypoint-initdb.d/)" ]; then gosu clickhouse /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG & pid="$!" - sleep 1 - clickhouseclient=( clickhouse client --multiquery ) + # check if clickhouse is ready to accept connections + # will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay) + if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then + echo >&2 'ClickHouse init process failed.' + exit 1 + fi + + clickhouseclient=( clickhouse-client --multiquery ) echo for f in /docker-entrypoint-initdb.d/*; do case "$f" in @@ -56,7 +68,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ]; then done if ! kill -s TERM "$pid" || ! wait "$pid"; then - echo >&2 'ClickHouse init process failed.' + echo >&2 'Finishing of ClickHouse init process failed.' exit 1 fi fi From c9e98c8debee48592d4e6b860995604d4254adbe Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 10 Dec 2018 18:25:45 +0300 Subject: [PATCH 76/88] clang-format of dbms/src/Dictionaries/* --- dbms/src/Dictionaries/CacheDictionary.cpp | 300 +++++----- dbms/src/Dictionaries/CacheDictionary.h | 141 ++--- dbms/src/Dictionaries/CacheDictionary.inc.h | 129 ++--- .../ClickHouseDictionarySource.cpp | 128 +++-- .../Dictionaries/ClickHouseDictionarySource.h | 16 +- .../ComplexKeyCacheDictionary.cpp | 164 +++--- .../Dictionaries/ComplexKeyCacheDictionary.h | 180 +++--- ...acheDictionary_createAttributeWithType.cpp | 16 +- ...exKeyCacheDictionary_setAttributeValue.cpp | 57 +- ...cheDictionary_setDefaultAttributeValue.cpp | 45 +- .../ComplexKeyHashedDictionary.cpp | 521 +++++++++++------- .../Dictionaries/ComplexKeyHashedDictionary.h | 171 +++--- .../Dictionaries/DictionaryBlockInputStream.h | 412 ++++++++------ .../DictionaryBlockInputStreamBase.cpp | 1 - .../DictionaryBlockInputStreamBase.h | 1 - dbms/src/Dictionaries/DictionaryFactory.cpp | 8 +- .../Dictionaries/DictionarySourceFactory.cpp | 14 +- .../Dictionaries/DictionarySourceHelpers.cpp | 20 +- .../Dictionaries/DictionarySourceHelpers.h | 10 +- dbms/src/Dictionaries/DictionaryStructure.cpp | 194 ++++--- dbms/src/Dictionaries/DictionaryStructure.h | 11 +- .../Embedded/GeoDictionariesLoader.cpp | 6 +- .../Embedded/GeoDictionariesLoader.h | 6 +- .../Embedded/GeodataProviders/Entries.h | 3 +- .../GeodataProviders/HierarchiesProvider.cpp | 14 +- .../GeodataProviders/HierarchiesProvider.h | 14 +- .../HierarchyFormatReader.cpp | 5 +- .../GeodataProviders/HierarchyFormatReader.h | 7 +- .../GeodataProviders/IHierarchiesProvider.h | 3 +- .../GeodataProviders/INamesProvider.h | 6 +- .../GeodataProviders/NamesFormatReader.h | 6 +- .../GeodataProviders/NamesProvider.cpp | 11 +- .../Embedded/GeodataProviders/NamesProvider.h | 12 +- .../Embedded/IGeoDictionariesLoader.h | 18 +- .../Embedded/RegionsHierarchies.cpp | 2 +- .../Embedded/RegionsHierarchies.h | 6 +- .../Embedded/RegionsHierarchy.cpp | 44 +- .../Dictionaries/Embedded/RegionsHierarchy.h | 2 +- .../Dictionaries/Embedded/RegionsNames.cpp | 19 +- dbms/src/Dictionaries/Embedded/RegionsNames.h | 35 +- .../Embedded/TechDataHierarchy.cpp | 6 +- .../Dictionaries/Embedded/TechDataHierarchy.h | 28 +- .../ExecutableDictionarySource.cpp | 188 +++---- .../Dictionaries/ExecutableDictionarySource.h | 11 +- .../src/Dictionaries/ExternalQueryBuilder.cpp | 17 +- dbms/src/Dictionaries/ExternalQueryBuilder.h | 10 +- .../ExternalResultDescription.cpp | 9 +- .../Dictionaries/ExternalResultDescription.h | 1 - .../src/Dictionaries/FileDictionarySource.cpp | 26 +- dbms/src/Dictionaries/FileDictionarySource.h | 9 +- dbms/src/Dictionaries/FlatDictionary.cpp | 471 ++++++++++------ dbms/src/Dictionaries/FlatDictionary.h | 114 ++-- .../src/Dictionaries/HTTPDictionarySource.cpp | 83 ++- dbms/src/Dictionaries/HTTPDictionarySource.h | 20 +- dbms/src/Dictionaries/HashedDictionary.cpp | 505 ++++++++++------- dbms/src/Dictionaries/HashedDictionary.h | 119 ++-- dbms/src/Dictionaries/IDictionary.h | 18 +- dbms/src/Dictionaries/IDictionarySource.h | 6 +- .../Dictionaries/LibraryDictionarySource.cpp | 31 +- .../Dictionaries/LibraryDictionarySource.h | 12 +- .../Dictionaries/MongoDBBlockInputStream.cpp | 120 ++-- .../Dictionaries/MongoDBBlockInputStream.h | 11 +- .../Dictionaries/MongoDBDictionarySource.cpp | 161 +++--- .../Dictionaries/MongoDBDictionarySource.h | 40 +- .../Dictionaries/MySQLBlockInputStream.cpp | 89 +-- dbms/src/Dictionaries/MySQLBlockInputStream.h | 11 +- .../Dictionaries/MySQLDictionarySource.cpp | 89 +-- dbms/src/Dictionaries/MySQLDictionarySource.h | 31 +- .../src/Dictionaries/ODBCBlockInputStream.cpp | 91 +-- dbms/src/Dictionaries/ODBCBlockInputStream.h | 10 +- .../RangeDictionaryBlockInputStream.h | 201 ++++--- .../Dictionaries/RangeHashedDictionary.cpp | 365 ++++++++---- dbms/src/Dictionaries/RangeHashedDictionary.h | 101 ++-- dbms/src/Dictionaries/TrieDictionary.cpp | 488 +++++++++------- dbms/src/Dictionaries/TrieDictionary.h | 167 +++--- .../src/Dictionaries/XDBCDictionarySource.cpp | 142 ++--- dbms/src/Dictionaries/XDBCDictionarySource.h | 7 +- dbms/src/Dictionaries/readInvalidateQuery.cpp | 7 +- dbms/src/Dictionaries/readInvalidateQuery.h | 1 - .../Dictionaries/writeParenthesisedString.cpp | 1 - .../Dictionaries/writeParenthesisedString.h | 2 - 81 files changed, 3791 insertions(+), 2786 deletions(-) diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index 08a0752a23f..253b9124963 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -1,48 +1,47 @@ #include "CacheDictionary.h" #include -#include #include -#include +#include #include +#include #include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include #include -#include "DictionaryBlockInputStream.h" -#include -#include #include -#include "DictionaryFactory.h" +#include +#include #include "CacheDictionary.inc.h" +#include "DictionaryBlockInputStream.h" +#include "DictionaryFactory.h" namespace ProfileEvents { - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheRequests; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; +extern const Event DictCacheKeysRequested; +extern const Event DictCacheKeysRequestedMiss; +extern const Event DictCacheKeysRequestedFound; +extern const Event DictCacheKeysExpired; +extern const Event DictCacheKeysNotFound; +extern const Event DictCacheKeysHit; +extern const Event DictCacheRequestTimeNs; +extern const Event DictCacheRequests; +extern const Event DictCacheLockWriteNs; +extern const Event DictCacheLockReadNs; } namespace CurrentMetrics { - extern const Metric DictCacheRequests; +extern const Metric DictCacheRequests; } namespace DB { - namespace ErrorCodes { extern const int TYPE_MISMATCH; @@ -61,15 +60,20 @@ inline size_t CacheDictionary::getCellIdx(const Key id) const } -CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, +CacheDictionary::CacheDictionary( + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, const size_t size) - : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, - size_overlap_mask{this->size - 1}, - cells{this->size}, - rnd_engine(randomSeed()) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))} + , size_overlap_mask{this->size - 1} + , cells{this->size} + , rnd_engine(randomSeed()) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; @@ -79,32 +83,36 @@ CacheDictionary::CacheDictionary(const std::string & name, const DictionaryStruc CacheDictionary::CacheDictionary(const CacheDictionary & other) : CacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size} -{} +{ +} void CacheDictionary::toParent(const PaddedPODArray & ids, PaddedPODArray & out) const { const auto null_value = std::get(hierarchical_attribute->null_values); - getItemsNumber(*hierarchical_attribute, ids, out, [&] (const size_t) { return null_value; }); + getItemsNumber(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; }); } /// Allow to use single value in same way as array. -static inline CacheDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) { return arr[idx]; } -static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t) { return value; } +static inline CacheDictionary::Key getAt(const PaddedPODArray & arr, const size_t idx) +{ + return arr[idx]; +} +static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t) +{ + return value; +} template -void CacheDictionary::isInImpl( - const PaddedPODArray & child_ids, - const AncestorType & ancestor_ids, - PaddedPODArray & out) const +void CacheDictionary::isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const { /// Transform all children to parents until ancestor id or null_value will be reached. size_t out_size = out.size(); - memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated" + memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated" const auto null_value = std::get(hierarchical_attribute->null_values); @@ -164,25 +172,17 @@ void CacheDictionary::isInImpl( } void CacheDictionary::isInVectorVector( - const PaddedPODArray & child_ids, - const PaddedPODArray & ancestor_ids, - PaddedPODArray & out) const + const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const { isInImpl(child_ids, ancestor_ids, out); } -void CacheDictionary::isInVectorConstant( - const PaddedPODArray & child_ids, - const Key ancestor_id, - PaddedPODArray & out) const +void CacheDictionary::isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const { isInImpl(child_ids, ancestor_id, out); } -void CacheDictionary::isInConstantVector( - const Key child_id, - const PaddedPODArray & ancestor_ids, - PaddedPODArray & out) const +void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const { /// Special case with single child value. @@ -213,33 +213,34 @@ void CacheDictionary::getString(const std::string & attribute_name, const Padded { auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; const auto null_value = StringRef{std::get(attribute.null_values)}; - getItemsString(attribute, ids, out, [&] (const size_t) { return null_value; }); + getItemsString(attribute, ids, out, [&](const size_t) { return null_value; }); } void CacheDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, - ColumnString * const out) const + const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) const { auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, ids, out, [&] (const size_t row) { return def->getDataAt(row); }); + getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); }); } void CacheDictionary::getString( - const std::string & attribute_name, const PaddedPODArray & ids, const String & def, - ColumnString * const out) const + const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const { auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, ids, out, [&] (const size_t) { return StringRef{def}; }); + getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; }); } @@ -329,21 +330,21 @@ void CacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray return; std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); /// request new values - update(required_ids, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = true; - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - out[row] = false; - }); + update( + required_ids, + [&](const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + out[row] = true; + }, + [&](const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + out[row] = false; + }); } @@ -362,7 +363,7 @@ void CacheDictionary::createAttributes() if (attribute.hierarchical) { - hierarchical_attribute = & attributes.back(); + hierarchical_attribute = &attributes.back(); if (hierarchical_attribute->type != AttributeUnderlyingType::UInt64) throw Exception{name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH}; @@ -376,12 +377,12 @@ CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const Attrib switch (type) { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::TYPE: \ - attr.null_values = TYPE(null_value.get>()); \ - attr.arrays = std::make_unique>(size); \ - bytes_allocated += size * sizeof(TYPE); \ - break; +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::TYPE: \ + attr.null_values = TYPE(null_value.get>()); \ + attr.arrays = std::make_unique>(size); \ + bytes_allocated += size * sizeof(TYPE); \ + break; DISPATCH(UInt8) DISPATCH(UInt16) DISPATCH(UInt32) @@ -413,17 +414,39 @@ void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; case AttributeUnderlyingType::Decimal32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); @@ -457,21 +480,49 @@ void CacheDictionary::setAttributeValue(Attribute & attribute, const Key idx, co { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = value.get(); + break; - case AttributeUnderlyingType::Decimal32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Decimal64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Decimal128: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingType::Decimal32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Decimal64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Decimal128: + std::get>(attribute.arrays)[idx] = value.get(); + break; case AttributeUnderlyingType::String: { @@ -509,8 +560,8 @@ CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & a bool CacheDictionary::isEmptyCell(const UInt64 idx) const { - return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data - == ext::safe_bit_cast(CellMetadata::time_point_t())); + return (idx != zero_cell_idx && cells[idx].id == 0) + || (cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t())); } PaddedPODArray CacheDictionary::getCachedIds() const @@ -537,36 +588,31 @@ BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_na void registerDictionaryCache(DictionaryFactory & factory) { - auto create_layout = [=]( - const std::string & name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr - ) -> DictionaryPtr { - + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr { if (dict_struct.key) - throw Exception {"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD}; + throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD}; if (dict_struct.range_min || dict_struct.range_max) - throw Exception {name - + ": elements .structure.range_min and .structure.range_max should be defined only " - "for a dictionary of layout 'range_hashed'", - ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + + ": elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + ErrorCodes::BAD_ARGUMENTS}; const auto & layout_prefix = config_prefix + ".layout"; const auto size = config.getInt(layout_prefix + ".cache.size_in_cells"); if (size == 0) - throw Exception {name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); if (require_nonempty) - throw Exception {name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", - ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", + ErrorCodes::BAD_ARGUMENTS}; - const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); - - }; factory.registerLayout("cache", create_layout); } diff --git a/dbms/src/Dictionaries/CacheDictionary.h b/dbms/src/Dictionaries/CacheDictionary.h index 8b72daaca23..1f8754c0c0a 100644 --- a/dbms/src/Dictionaries/CacheDictionary.h +++ b/dbms/src/Dictionaries/CacheDictionary.h @@ -1,31 +1,33 @@ #pragma once -#include "IDictionary.h" -#include "IDictionarySource.h" -#include "DictionaryStructure.h" -#include -#include -#include -#include -#include -#include #include #include -#include +#include #include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" namespace DB { - class CacheDictionary final : public IDictionary { public: - CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, + CacheDictionary( + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, const size_t size); CacheDictionary(const CacheDictionary & other); @@ -42,16 +44,12 @@ public: double getHitRate() const override { - return static_cast(hit_count.load(std::memory_order_acquire)) / - query_count.load(std::memory_order_relaxed); + return static_cast(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); } size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } - double getLoadFactor() const override - { - return static_cast(element_count.load(std::memory_order_relaxed)) / size; - } + double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } bool isCached() const override { return true; } @@ -63,10 +61,7 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override - { - return creation_time; - } + std::chrono::time_point getCreationTime() const override { return creation_time; } bool isInjective(const std::string & attribute_name) const override { @@ -77,14 +72,15 @@ public: void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const override; - void isInVectorVector(const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; + void isInVectorVector( + const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; void isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const override; void isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; -#define DECLARE(TYPE)\ +#define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -104,9 +100,11 @@ public: void getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const; -#define DECLARE(TYPE)\ - void get##TYPE(\ - const std::string & attribute_name, const PaddedPODArray & ids, const PaddedPODArray & def,\ +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const PaddedPODArray & def, \ ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -124,11 +122,11 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString( - const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, - ColumnString * const out) const; + void + getString(const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) + const; -#define DECLARE(TYPE)\ +#define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -146,17 +144,17 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString( - const std::string & attribute_name, const PaddedPODArray & ids, const String & def, - ColumnString * const out) const; + void getString(const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const; void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; private: - template using ContainerType = Value[]; - template using ContainerPtrType = std::unique_ptr>; + template + using ContainerType = Value[]; + template + using ContainerPtrType = std::unique_ptr>; struct CellMetadata final { @@ -183,19 +181,39 @@ private: { AttributeUnderlyingType type; std::variant< - UInt8, UInt16, UInt32, UInt64, + UInt8, + UInt16, + UInt32, + UInt64, UInt128, - Int8, Int16, Int32, Int64, - Decimal32, Decimal64, Decimal128, - Float32, Float64, - String> null_values; + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String> + null_values; std::variant< - ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, ContainerPtrType, - ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, - ContainerPtrType, ContainerPtrType, ContainerPtrType, - ContainerPtrType, ContainerPtrType, - ContainerPtrType> arrays; + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType> + arrays; }; void createAttributes(); @@ -205,29 +223,17 @@ private: template void getItemsNumber( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const; + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; template void getItemsNumberImpl( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const; + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; template - void getItemsString( - Attribute & attribute, - const PaddedPODArray & ids, - ColumnString * out, - DefaultGetter && get_default) const; + void getItemsString(Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; template - void update( - const std::vector & requested_ids, PresentIdHandler && on_cell_updated, - AbsentIdHandler && on_id_not_found) const; + void update(const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const; PaddedPODArray getCachedIds() const; @@ -251,10 +257,7 @@ private: FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const; template - void isInImpl( - const PaddedPODArray & child_ids, - const AncestorType & ancestor_ids, - PaddedPODArray & out) const; + void isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const; const std::string name; const DictionaryStructure dict_struct; diff --git a/dbms/src/Dictionaries/CacheDictionary.inc.h b/dbms/src/Dictionaries/CacheDictionary.inc.h index 6fc082ab267..25d6786fbd3 100644 --- a/dbms/src/Dictionaries/CacheDictionary.inc.h +++ b/dbms/src/Dictionaries/CacheDictionary.inc.h @@ -1,34 +1,33 @@ #include "CacheDictionary.h" -#include -#include -#include +#include #include #include -#include +#include +#include +#include namespace ProfileEvents { - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheRequests; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; +extern const Event DictCacheKeysRequested; +extern const Event DictCacheKeysRequestedMiss; +extern const Event DictCacheKeysRequestedFound; +extern const Event DictCacheKeysExpired; +extern const Event DictCacheKeysNotFound; +extern const Event DictCacheKeysHit; +extern const Event DictCacheRequestTimeNs; +extern const Event DictCacheRequests; +extern const Event DictCacheLockWriteNs; +extern const Event DictCacheLockReadNs; } namespace CurrentMetrics { - extern const Metric DictCacheRequests; +extern const Metric DictCacheRequests; } namespace DB { - namespace ErrorCodes { extern const int TYPE_MISMATCH; @@ -36,13 +35,12 @@ namespace ErrorCodes template void CacheDictionary::getItemsNumber( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const { - if (false) {} -#define DISPATCH(TYPE) \ + if (false) + { + } +#define DISPATCH(TYPE) \ else if (attribute.type == AttributeUnderlyingType::TYPE) \ getItemsNumberImpl(attribute, ids, out, std::forward(get_default)); DISPATCH(UInt8) @@ -60,16 +58,12 @@ void CacheDictionary::getItemsNumber( DISPATCH(Decimal64) DISPATCH(Decimal128) #undef DISPATCH - else - throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); + else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); } template void CacheDictionary::getItemsNumberImpl( - Attribute & attribute, - const PaddedPODArray & ids, - ResultArrayType & out, - DefaultGetter && get_default) const + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const { /// Mapping: -> { all indices `i` of `ids` such that `ids[i]` = } std::unordered_map> outdated_ids; @@ -122,31 +116,28 @@ void CacheDictionary::getItemsNumberImpl( return; std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); /// request new values - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; + update( + required_ids, + [&](const auto id, const auto cell_idx) + { + const auto attribute_value = attribute_array[cell_idx]; - for (const size_t row : outdated_ids[id]) - out[row] = static_cast(attribute_value); - }, - [&] (const auto id, const auto) - { - for (const size_t row : outdated_ids[id]) - out[row] = get_default(row); - }); + for (const size_t row : outdated_ids[id]) + out[row] = static_cast(attribute_value); + }, + [&](const auto id, const auto) + { + for (const size_t row : outdated_ids[id]) + out[row] = get_default(row); + }); } template void CacheDictionary::getItemsString( - Attribute & attribute, - const PaddedPODArray & ids, - ColumnString * out, - DefaultGetter && get_default) const + Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const { const auto rows = ext::size(ids); @@ -245,22 +236,22 @@ void CacheDictionary::getItemsString( if (!outdated_ids.empty()) { std::vector required_ids(outdated_ids.size()); - std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), - [] (auto & pair) { return pair.first; }); + std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); - update(required_ids, - [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; + update( + required_ids, + [&](const auto id, const auto cell_idx) + { + const auto attribute_value = attribute_array[cell_idx]; - map[id] = String{attribute_value}; - total_length += (attribute_value.size + 1) * outdated_ids[id].size(); - }, - [&] (const auto id, const auto) - { - for (const auto row : outdated_ids[id]) - total_length += get_default(row).size + 1; - }); + map[id] = String{attribute_value}; + total_length += (attribute_value.size + 1) * outdated_ids[id].size(); + }, + [&](const auto id, const auto) + { + for (const auto row : outdated_ids[id]) + total_length += get_default(row).size + 1; + }); } out->getChars().reserve(total_length); @@ -277,19 +268,13 @@ void CacheDictionary::getItemsString( template void CacheDictionary::update( - const std::vector & requested_ids, - PresentIdHandler && on_cell_updated, - AbsentIdHandler && on_id_not_found) const + const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const { std::unordered_map remaining_ids{requested_ids.size()}; for (const auto id : requested_ids) - remaining_ids.insert({ id, 0 }); + remaining_ids.insert({id, 0}); - std::uniform_int_distribution distribution - { - dict_lifetime.min_sec, - dict_lifetime.max_sec - }; + std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; @@ -310,10 +295,8 @@ void CacheDictionary::update( const auto & ids = id_column->getData(); /// cache column pointers - const auto column_ptrs = ext::map(ext::range(0, attributes.size()), [&block] (size_t i) - { - return block.safeGetByPosition(i + 1).column.get(); - }); + const auto column_ptrs = ext::map( + ext::range(0, attributes.size()), [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); }); for (const auto i : ext::range(0, ids.size())) { diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index 161a157ffaa..faa93055073 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -1,21 +1,20 @@ #include "ClickHouseDictionarySource.h" -#include "ExternalQueryBuilder.h" -#include "writeParenthesisedString.h" +#include #include #include -#include "readInvalidateQuery.h" +#include #include #include -#include #include -#include #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" +#include "ExternalQueryBuilder.h" +#include "readInvalidateQuery.h" +#include "writeParenthesisedString.h" namespace DB { - namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; @@ -25,61 +24,81 @@ namespace ErrorCodes static const size_t MAX_CONNECTIONS = 16; static ConnectionPoolWithFailoverPtr createPool( - const std::string & host, UInt16 port, bool secure, const std::string & db, - const std::string & user, const std::string & password, const Context & context) + const std::string & host, + UInt16 port, + bool secure, + const std::string & db, + const std::string & user, + const std::string & password, + const Context & context) { auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(context.getSettingsRef()); ConnectionPoolPtrs pools; pools.emplace_back(std::make_shared( - MAX_CONNECTIONS, host, port, db, user, password, timeouts, "ClickHouseDictionarySource", - Protocol::Compression::Enable, - secure ? Protocol::Secure::Enable : Protocol::Secure::Disable)); + MAX_CONNECTIONS, + host, + port, + db, + user, + password, + timeouts, + "ClickHouseDictionarySource", + Protocol::Compression::Enable, + secure ? Protocol::Secure::Enable : Protocol::Secure::Disable)); return std::make_shared(pools, LoadBalancing::RANDOM); } ClickHouseDictionarySource::ClickHouseDictionarySource( - const DictionaryStructure & dict_struct_, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - const Block & sample_block, Context & context) - : update_time{std::chrono::system_clock::from_time_t(0)}, - dict_struct{dict_struct_}, - host{config.getString(config_prefix + ".host")}, - port(config.getInt(config_prefix + ".port")), - secure(config.getBool(config_prefix + ".secure", false)), - user{config.getString(config_prefix + ".user", "")}, - password{config.getString(config_prefix + ".password", "")}, - db{config.getString(config_prefix + ".db", "")}, - table{config.getString(config_prefix + ".table")}, - where{config.getString(config_prefix + ".where", "")}, - update_field{config.getString(config_prefix + ".update_field", "")}, - invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}, - query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}, - sample_block{sample_block}, context(context), - is_local{isLocalAddress({ host, port }, config.getInt("tcp_port", 0))}, - pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}, - load_all_query{query_builder.composeLoadAllQuery()} -{} + const DictionaryStructure & dict_struct_, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const Block & sample_block, + Context & context) + : update_time{std::chrono::system_clock::from_time_t(0)} + , dict_struct{dict_struct_} + , host{config.getString(config_prefix + ".host")} + , port(config.getInt(config_prefix + ".port")) + , secure(config.getBool(config_prefix + ".secure", false)) + , user{config.getString(config_prefix + ".user", "")} + , password{config.getString(config_prefix + ".password", "")} + , db{config.getString(config_prefix + ".db", "")} + , table{config.getString(config_prefix + ".table")} + , where{config.getString(config_prefix + ".where", "")} + , update_field{config.getString(config_prefix + ".update_field", "")} + , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} + , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , sample_block{sample_block} + , context(context) + , is_local{isLocalAddress({host, port}, config.getInt("tcp_port", 0))} + , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} + , load_all_query{query_builder.composeLoadAllQuery()} +{ +} ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionarySource & other) - : update_time{other.update_time}, - dict_struct{other.dict_struct}, - host{other.host}, port{other.port}, - secure{other.secure}, - user{other.user}, password{other.password}, - db{other.db}, table{other.table}, - where{other.where}, - update_field{other.update_field}, - invalidate_query{other.invalidate_query}, - invalidate_query_response{other.invalidate_query_response}, - query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}, - sample_block{other.sample_block}, context(other.context), - is_local{other.is_local}, - pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}, - load_all_query{other.load_all_query} -{} + : update_time{other.update_time} + , dict_struct{other.dict_struct} + , host{other.host} + , port{other.port} + , secure{other.secure} + , user{other.user} + , password{other.password} + , db{other.db} + , table{other.table} + , where{other.where} + , update_field{other.update_field} + , invalidate_query{other.invalidate_query} + , invalidate_query_response{other.invalidate_query_response} + , query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks} + , sample_block{other.sample_block} + , context(other.context) + , is_local{other.is_local} + , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} + , load_all_query{other.load_all_query} +{ +} std::string ClickHouseDictionarySource::getUpdateFieldAndDate() { @@ -119,17 +138,14 @@ BlockInputStreamPtr ClickHouseDictionarySource::loadUpdatedAll() BlockInputStreamPtr ClickHouseDictionarySource::loadIds(const std::vector & ids) { - return createStreamForSelectiveLoad( - query_builder.composeLoadIdsQuery(ids)); + return createStreamForSelectiveLoad(query_builder.composeLoadIdsQuery(ids)); } -BlockInputStreamPtr ClickHouseDictionarySource::loadKeys( - const Columns & key_columns, const std::vector & requested_rows) +BlockInputStreamPtr ClickHouseDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) { return createStreamForSelectiveLoad( - query_builder.composeLoadKeysQuery( - key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES)); + query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES)); } bool ClickHouseDictionarySource::isModified() const @@ -167,7 +183,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re if (is_local) { auto input_block = executeQuery(request, context, true).in; - return readInvalidateQuery(dynamic_cast((*input_block))); + return readInvalidateQuery(dynamic_cast((*input_block))); } else { diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.h b/dbms/src/Dictionaries/ClickHouseDictionarySource.h index 89db23737bc..bf8653932f7 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.h +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.h @@ -1,15 +1,14 @@ #pragma once -#include "IDictionarySource.h" +#include +#include #include "DictionaryStructure.h" #include "ExternalQueryBuilder.h" -#include -#include +#include "IDictionarySource.h" namespace DB { - /** Allows loading dictionaries from local or remote ClickHouse instance * @todo use ConnectionPoolWithFailover * @todo invent a way to keep track of source modifications @@ -17,10 +16,12 @@ namespace DB class ClickHouseDictionarySource final : public IDictionarySource { public: - ClickHouseDictionarySource(const DictionaryStructure & dict_struct_, + ClickHouseDictionarySource( + const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - const Block & sample_block, Context & context); + const Block & sample_block, + Context & context); /// copy-constructor is provided in order to support cloneability ClickHouseDictionarySource(const ClickHouseDictionarySource & other); @@ -31,8 +32,7 @@ public: BlockInputStreamPtr loadIds(const std::vector & ids) override; - BlockInputStreamPtr loadKeys( - const Columns & key_columns, const std::vector & requested_rows) override; + BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; bool isModified() const override; bool supportsSelectiveLoad() const override { return true; } diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp index 61693a3538a..1d71d072e9b 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp @@ -1,40 +1,38 @@ #include "ComplexKeyCacheDictionary.h" -#include "DictionaryBlockInputStream.h" #include #include -#include -#include -#include -#include #include -#include +#include +#include +#include +#include #include +#include +#include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" namespace ProfileEvents { - - extern const Event DictCacheKeysRequested; - extern const Event DictCacheKeysRequestedMiss; - extern const Event DictCacheKeysRequestedFound; - extern const Event DictCacheKeysExpired; - extern const Event DictCacheKeysNotFound; - extern const Event DictCacheKeysHit; - extern const Event DictCacheRequestTimeNs; - extern const Event DictCacheLockWriteNs; - extern const Event DictCacheLockReadNs; +extern const Event DictCacheKeysRequested; +extern const Event DictCacheKeysRequestedMiss; +extern const Event DictCacheKeysRequestedFound; +extern const Event DictCacheKeysExpired; +extern const Event DictCacheKeysNotFound; +extern const Event DictCacheKeysHit; +extern const Event DictCacheRequestTimeNs; +extern const Event DictCacheLockWriteNs; +extern const Event DictCacheLockReadNs; } namespace CurrentMetrics { - extern const Metric DictCacheRequests; +extern const Metric DictCacheRequests; } namespace DB { - namespace ErrorCodes { extern const int TYPE_MISMATCH; @@ -52,13 +50,19 @@ inline UInt64 ComplexKeyCacheDictionary::getCellIdx(const StringRef key) const } -ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, +ComplexKeyCacheDictionary::ComplexKeyCacheDictionary( + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, const size_t size) - : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}, - size_overlap_mask{this->size - 1}, - rnd_engine(randomSeed()) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))} + , size_overlap_mask{this->size - 1} + , rnd_engine(randomSeed()) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{name + ": source cannot be used with ComplexKeyCacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; @@ -68,47 +72,56 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, c ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other) : ComplexKeyCacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size} -{} +{ +} void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - ColumnString * out) const + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const { dict_struct.validateKeyTypes(key_types); auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; const auto null_value = StringRef{std::get(attribute.null_values)}; - getItemsString(attribute, key_columns, out, [&] (const size_t) { return null_value; }); + getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; }); } void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const ColumnString * const def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, key_columns, out, [&] (const size_t row) { return def->getDataAt(row); }); + getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); }); } void ComplexKeyCacheDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const String & def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsString(attribute, key_columns, out, [&] (const size_t) { return StringRef{def}; }); + getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; }); } /// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag, @@ -118,7 +131,8 @@ void ComplexKeyCacheDictionary::getString( /// true true impossible /// /// todo: split this func to two: find_for_get and find_for_set -ComplexKeyCacheDictionary::FindResult ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata::time_point_t now, const size_t hash) const +ComplexKeyCacheDictionary::FindResult +ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata::time_point_t now, const size_t hash) const { auto pos = hash; auto oldest_id = pos; @@ -211,17 +225,20 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes return; std::vector required_rows(outdated_keys.size()); - std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), - [] (auto & pair) { return pair.second.front(); }); + std::transform( + std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); }); /// request new values - update(key_columns, keys_array, required_rows, - [&] (const StringRef key, const auto) + update( + key_columns, + keys_array, + required_rows, + [&](const StringRef key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = true; }, - [&] (const StringRef key, const auto) + [&](const StringRef key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = false; @@ -242,7 +259,8 @@ void ComplexKeyCacheDictionary::createAttributes() attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); if (attribute.hierarchical) - throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), + ErrorCodes::TYPE_MISMATCH}; } } @@ -273,8 +291,7 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, Pool & pool) + const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector & key_attributes, Pool & pool) { const auto keys_size = key_columns.size(); size_t sum_keys_size{}; @@ -313,22 +330,27 @@ StringRef ComplexKeyCacheDictionary::placeKeysInPool( } } - return { place, sum_keys_size }; + return {place, sum_keys_size}; } /// Explicit instantiations. template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, Arena & pool); + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + Arena & pool); template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, - const std::vector & key_attributes, ArenaWithFreeLists & pool); + const size_t row, + const Columns & key_columns, + StringRefs & keys, + const std::vector & key_attributes, + ArenaWithFreeLists & pool); -StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool( - const size_t row, const Columns & key_columns) const +StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool(const size_t row, const Columns & key_columns) const { const auto res = fixed_size_keys_pool->alloc(); auto place = res; @@ -340,14 +362,14 @@ StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool( place += key.size; } - return { res, key_size }; + return {res, key_size}; } StringRef ComplexKeyCacheDictionary::copyIntoArena(StringRef src, Arena & arena) { char * allocated = arena.alloc(src.size); memcpy(allocated, src.data, src.size); - return { allocated, src.size }; + return {allocated, src.size}; } StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const @@ -355,13 +377,14 @@ StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const const auto res = key_size_is_fixed ? fixed_size_keys_pool->alloc() : keys_pool->alloc(key.size); memcpy(res, key.data, key.size); - return { res, key.size }; + return {res, key.size}; } bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const { - return (cells[idx].key == StringRef{} && (idx != zero_cell_idx - || cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t()))); + return ( + cells[idx].key == StringRef{} + && (idx != zero_cell_idx || cells[idx].data == ext::safe_bit_cast(CellMetadata::time_point_t()))); } BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const @@ -371,8 +394,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; for (auto idx : ext::range(0, cells.size())) - if (!isEmptyCell(idx) - && !cells[idx].isDefault()) + if (!isEmptyCell(idx) && !cells[idx].isDefault()) keys.push_back(cells[idx].key); } @@ -382,26 +404,24 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & void registerDictionaryComplexKeyCache(DictionaryFactory & factory) { - auto create_layout = [=]( - const std::string & name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr - ) -> DictionaryPtr { + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr { if (!dict_struct.key) - throw Exception {"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; const auto & layout_prefix = config_prefix + ".layout"; const auto size = config.getInt(layout_prefix + ".complex_key_cache.size_in_cells"); if (size == 0) - throw Exception {name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); if (require_nonempty) - throw Exception {name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", - ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", + ErrorCodes::BAD_ARGUMENTS}; - const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); }; factory.registerLayout("complex_key_cache", create_layout); diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h index f60e142db5e..22a2d51e963 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -3,23 +3,23 @@ #include #include #include +#include #include #include -#include #include #include +#include #include #include #include #include -#include "DictionaryStructure.h" -#include "IDictionary.h" -#include "IDictionarySource.h" #include #include #include #include -#include +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" namespace ProfileEvents @@ -40,7 +40,8 @@ namespace DB class ComplexKeyCacheDictionary final : public IDictionaryBase { public: - ComplexKeyCacheDictionary(const std::string & name, + ComplexKeyCacheDictionary( + const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, @@ -48,25 +49,13 @@ public: ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other); - std::string getKeyDescription() const - { - return key_description; - } + std::string getKeyDescription() const { return key_description; } - std::exception_ptr getCreationException() const override - { - return {}; - } + std::exception_ptr getCreationException() const override { return {}; } - std::string getName() const override - { - return name; - } + std::string getName() const override { return name; } - std::string getTypeName() const override - { - return "ComplexKeyCache"; - } + std::string getTypeName() const override { return "ComplexKeyCache"; } size_t getBytesAllocated() const override { @@ -74,55 +63,28 @@ public: + (string_arena ? string_arena->size() : 0); } - size_t getQueryCount() const override - { - return query_count.load(std::memory_order_relaxed); - } + size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } double getHitRate() const override { return static_cast(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); } - size_t getElementCount() const override - { - return element_count.load(std::memory_order_relaxed); - } + size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } - double getLoadFactor() const override - { - return static_cast(element_count.load(std::memory_order_relaxed)) / size; - } + double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } - bool isCached() const override - { - return true; - } + bool isCached() const override { return true; } - std::unique_ptr clone() const override - { - return std::make_unique(*this); - } + std::unique_ptr clone() const override { return std::make_unique(*this); } - const IDictionarySource * getSource() const override - { - return source_ptr.get(); - } + const IDictionarySource * getSource() const override { return source_ptr.get(); } - const DictionaryLifetime & getLifetime() const override - { - return dict_lifetime; - } + const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } - const DictionaryStructure & getStructure() const override - { - return dict_struct; - } + const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override - { - return creation_time; - } + std::chrono::time_point getCreationTime() const override { return creation_time; } bool isInjective(const std::string & attribute_name) const override { @@ -155,11 +117,12 @@ public: void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const; -#define DECLARE(TYPE) \ - void get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const PaddedPODArray & def, \ +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -177,17 +140,19 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString(const std::string & attribute_name, + void getString( + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const; -#define DECLARE(TYPE) \ - void get##TYPE(const std::string & attribute_name, \ - const Columns & key_columns, \ - const DataTypes & key_types, \ - const TYPE def, \ +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -205,7 +170,8 @@ public: DECLARE(Decimal128) #undef DECLARE - void getString(const std::string & attribute_name, + void getString( + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, const String & def, @@ -216,9 +182,12 @@ public: BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; private: - template using MapType = HashMapWithSavedHash; - template using ContainerType = Value[]; - template using ContainerPtrType = std::unique_ptr>; + template + using MapType = HashMapWithSavedHash; + template + using ContainerType = Value[]; + template + using ContainerPtrType = std::unique_ptr>; struct CellMetadata final { @@ -235,32 +204,35 @@ private: time_point_urep_t data; /// Sets expiration time, resets `is_default` flag to false - time_point_t expiresAt() const - { - return ext::safe_bit_cast(data & EXPIRES_AT_MASK); - } - void setExpiresAt(const time_point_t & t) - { - data = ext::safe_bit_cast(t); - } + time_point_t expiresAt() const { return ext::safe_bit_cast(data & EXPIRES_AT_MASK); } + void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); } - bool isDefault() const - { - return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; - } - void setDefault() - { - data |= IS_DEFAULT_MASK; - } + bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; } + void setDefault() { data |= IS_DEFAULT_MASK; } }; struct Attribute final { AttributeUnderlyingType type; - std::variant null_values; - std::variant, + std::variant< + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String> + null_values; + std::variant< + ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, @@ -283,8 +255,8 @@ private: Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); template - void getItemsNumber( - Attribute & attribute, const Columns & key_columns, PaddedPODArray & out, DefaultGetter && get_default) const + void + getItemsNumber(Attribute & attribute, const Columns & key_columns, PaddedPODArray & out, DefaultGetter && get_default) const { if (false) { @@ -372,7 +344,8 @@ private: std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); }); /// request new values - update(key_columns, + update( + key_columns, keys_array, required_rows, [&](const StringRef key, const size_t cell_idx) @@ -497,7 +470,8 @@ private: return pair.second.front(); }); - update(key_columns, + update( + key_columns, keys_array, required_rows, [&](const StringRef key, const size_t cell_idx) @@ -531,7 +505,8 @@ private: } template - void update(const Columns & in_key_columns, + void update( + const Columns & in_key_columns, const PODArray & in_keys, const std::vector & in_requested_rows, PresentKeyHandler && on_cell_updated, @@ -561,8 +536,10 @@ private: const auto key_columns = ext::map( ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); - const auto attribute_columns = ext::map(ext::range(0, attributes_size), - [&](const size_t attribute_idx) { return block.safeGetByPosition(keys_size + attribute_idx).column; }); + const auto attribute_columns = ext::map(ext::range(0, attributes_size), [&](const size_t attribute_idx) + { + return block.safeGetByPosition(keys_size + attribute_idx).column; + }); const auto rows_num = block.rows(); @@ -693,7 +670,8 @@ private: void freeKey(const StringRef key) const; template - static StringRef placeKeysInPool(const size_t row, + static StringRef placeKeysInPool( + const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector & key_attributes, diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp index 843c389dcb0..8cfa1471f79 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_createAttributeWithType.cpp @@ -2,19 +2,19 @@ namespace DB { - -ComplexKeyCacheDictionary::Attribute ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) +ComplexKeyCacheDictionary::Attribute +ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) { Attribute attr{type, {}, {}}; switch (type) { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::TYPE: \ - attr.null_values = TYPE(null_value.get>()); \ - attr.arrays = std::make_unique>(size); \ - bytes_allocated += size * sizeof(TYPE); \ - break; +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::TYPE: \ + attr.null_values = TYPE(null_value.get>()); \ + attr.arrays = std::make_unique>(size); \ + bytes_allocated += size * sizeof(TYPE); \ + break; DISPATCH(UInt8) DISPATCH(UInt16) DISPATCH(UInt32) diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp index 9a3d34eb2c7..7b3a44214c5 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setAttributeValue.cpp @@ -2,26 +2,53 @@ namespace DB { - void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = value.get(); + break; - case AttributeUnderlyingType::Decimal32: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Decimal64: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingType::Decimal128: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingType::Decimal32: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Decimal64: + std::get>(attribute.arrays)[idx] = value.get(); + break; + case AttributeUnderlyingType::Decimal128: + std::get>(attribute.arrays)[idx] = value.get(); + break; case AttributeUnderlyingType::String: { diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp index 7477e01da9c..89cf1506f90 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp @@ -2,22 +2,43 @@ namespace DB { - void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::UInt128: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; - case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; + case AttributeUnderlyingType::UInt8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::UInt128: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int8: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int16: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Int64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float32: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; + case AttributeUnderlyingType::Float64: + std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); + break; case AttributeUnderlyingType::Decimal32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp index cdf01668bd2..bfd808c5914 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp @@ -1,12 +1,11 @@ +#include "ComplexKeyHashedDictionary.h" #include #include -#include "ComplexKeyHashedDictionary.h" #include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" namespace DB { - namespace ErrorCodes { extern const int TYPE_MISMATCH; @@ -16,12 +15,19 @@ namespace ErrorCodes } ComplexKeyHashedDictionary::ComplexKeyHashedDictionary( - const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, - const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block) - : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), - require_nonempty(require_nonempty), saved_block{std::move(saved_block)} + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, + bool require_nonempty, + BlockPtr saved_block) + : name{name} + , dict_struct(dict_struct) + , source_ptr{std::move(source_ptr)} + , dict_lifetime(dict_lifetime) + , require_nonempty(require_nonempty) + , saved_block{std::move(saved_block)} { - createAttributes(); try @@ -38,27 +44,30 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary( } ComplexKeyHashedDictionary::ComplexKeyHashedDictionary(const ComplexKeyHashedDictionary & other) - : ComplexKeyHashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block} + : ComplexKeyHashedDictionary{ + other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty, other.saved_block} { } -#define DECLARE(TYPE)\ -void ComplexKeyHashedDictionary::get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - ResultArrayType & out) const\ -{\ - dict_struct.validateKeyTypes(key_types);\ - \ - const auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ - \ - const auto null_value = std::get(attribute.null_values);\ - \ - getItemsNumber(attribute, key_columns,\ - [&] (const size_t row, const auto value) { out[row] = value; },\ - [&] (const size_t) { return null_value; });\ -} +#define DECLARE(TYPE) \ + void ComplexKeyHashedDictionary::get##TYPE( \ + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const \ + { \ + dict_struct.validateKeyTypes(key_types); \ + \ + const auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + const auto null_value = std::get(attribute.null_values); \ + \ + getItemsNumber( \ + attribute, \ + key_columns, \ + [&](const size_t row, const auto value) { out[row] = value; }, \ + [&](const size_t) { return null_value; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -76,37 +85,45 @@ DECLARE(Decimal128) #undef DECLARE void ComplexKeyHashedDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - ColumnString * out) const + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const { dict_struct.validateKeyTypes(key_types); const auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; const auto & null_value = StringRef{std::get(attribute.null_values)}; - getItemsImpl(attribute, key_columns, - [&] (const size_t, const StringRef value) { out->insertData(value.data, value.size); }, - [&] (const size_t) { return null_value; }); + getItemsImpl( + attribute, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t) { return null_value; }); } -#define DECLARE(TYPE)\ -void ComplexKeyHashedDictionary::get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - const PaddedPODArray & def, ResultArrayType & out) const\ -{\ - dict_struct.validateKeyTypes(key_types);\ - \ - const auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, key_columns,\ - [&] (const size_t row, const auto value) { out[row] = value; },\ - [&] (const size_t row) { return def[row]; });\ -} +#define DECLARE(TYPE) \ + void ComplexKeyHashedDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ + ResultArrayType & out) const \ + { \ + dict_struct.validateKeyTypes(key_types); \ + \ + const auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + getItemsNumber( \ + attribute, \ + key_columns, \ + [&](const size_t row, const auto value) { out[row] = value; }, \ + [&](const size_t row) { return def[row]; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -124,35 +141,44 @@ DECLARE(Decimal128) #undef DECLARE void ComplexKeyHashedDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const ColumnString * const def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); const auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsImpl(attribute, key_columns, - [&] (const size_t, const StringRef value) { out->insertData(value.data, value.size); }, - [&] (const size_t row) { return def->getDataAt(row); }); + getItemsImpl( + attribute, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t row) { return def->getDataAt(row); }); } -#define DECLARE(TYPE)\ -void ComplexKeyHashedDictionary::get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - const TYPE def, ResultArrayType & out) const\ -{\ - dict_struct.validateKeyTypes(key_types);\ - \ - const auto & attribute = getAttribute(attribute_name);\ - if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\ - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};\ - \ - getItemsNumber(attribute, key_columns,\ - [&] (const size_t row, const auto value) { out[row] = value; },\ - [&] (const size_t) { return def; });\ -} +#define DECLARE(TYPE) \ + void ComplexKeyHashedDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + ResultArrayType & out) const \ + { \ + dict_struct.validateKeyTypes(key_types); \ + \ + const auto & attribute = getAttribute(attribute_name); \ + if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE)) \ + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), \ + ErrorCodes::TYPE_MISMATCH}; \ + \ + getItemsNumber( \ + attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \ + } DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -170,18 +196,24 @@ DECLARE(Decimal128) #undef DECLARE void ComplexKeyHashedDictionary::getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const String & def, ColumnString * const out) const + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const { dict_struct.validateKeyTypes(key_types); const auto & attribute = getAttribute(attribute_name); if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String)) - throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), + ErrorCodes::TYPE_MISMATCH}; - getItemsImpl(attribute, key_columns, - [&] (const size_t, const StringRef value) { out->insertData(value.data, value.size); }, - [&] (const size_t) { return StringRef{def}; }); + getItemsImpl( + attribute, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t) { return StringRef{def}; }); } void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const @@ -192,22 +224,52 @@ void ComplexKeyHashedDictionary::has(const Columns & key_columns, const DataType switch (attribute.type) { - case AttributeUnderlyingType::UInt8: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::UInt16: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::UInt32: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::UInt64: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::UInt128: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Int8: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Int16: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Int32: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Int64: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Float32: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Float64: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::String: has(attribute, key_columns, out); break; + case AttributeUnderlyingType::UInt8: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::UInt16: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::UInt32: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::UInt64: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::UInt128: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Int8: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Int16: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Int32: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Int64: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Float32: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Float64: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::String: + has(attribute, key_columns, out); + break; - case AttributeUnderlyingType::Decimal32: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Decimal64: has(attribute, key_columns, out); break; - case AttributeUnderlyingType::Decimal128: has(attribute, key_columns, out); break; + case AttributeUnderlyingType::Decimal32: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Decimal64: + has(attribute, key_columns, out); + break; + case AttributeUnderlyingType::Decimal128: + has(attribute, key_columns, out); + break; } } @@ -222,7 +284,8 @@ void ComplexKeyHashedDictionary::createAttributes() attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); if (attribute.hierarchical) - throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH}; + throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), + ErrorCodes::TYPE_MISMATCH}; } } @@ -236,17 +299,13 @@ void ComplexKeyHashedDictionary::blockToAttributes(const Block & block) const auto rows = block.rows(); element_count += rows; - const auto key_column_ptrs = ext::map(ext::range(0, keys_size), - [&](const size_t attribute_idx) - { - return block.safeGetByPosition(attribute_idx).column; - }); + const auto key_column_ptrs = ext::map( + ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); - const auto attribute_column_ptrs = ext::map(ext::range(0, attributes_size), - [&](const size_t attribute_idx) - { - return block.safeGetByPosition(keys_size + attribute_idx).column; - }); + const auto attribute_column_ptrs = ext::map(ext::range(0, attributes_size), [&](const size_t attribute_idx) + { + return block.safeGetByPosition(keys_size + attribute_idx).column; + }); for (const auto row_idx : ext::range(0, rows)) { @@ -304,18 +363,14 @@ void ComplexKeyHashedDictionary::updateData() stream->readPrefix(); while (Block block = stream->read()) { - const auto saved_key_column_ptrs = ext::map(ext::range(0, keys_size), [&](const size_t key_idx) - { - return saved_block->safeGetByPosition(key_idx).column; - }); + const auto saved_key_column_ptrs = ext::map( + ext::range(0, keys_size), [&](const size_t key_idx) { return saved_block->safeGetByPosition(key_idx).column; }); - const auto update_key_column_ptrs = ext::map(ext::range(0, keys_size), [&](const size_t key_idx) - { - return block.safeGetByPosition(key_idx).column; - }); + const auto update_key_column_ptrs = ext::map( + ext::range(0, keys_size), [&](const size_t key_idx) { return block.safeGetByPosition(key_idx).column; }); Arena temp_key_pool; - ContainerType > update_key_hash; + ContainerType> update_key_hash; for (size_t i = 0; i < block.rows(); ++i) { @@ -389,21 +444,49 @@ void ComplexKeyHashedDictionary::calculateBytesAllocated() { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: addAttributeSize(attribute); break; - case AttributeUnderlyingType::UInt16: addAttributeSize(attribute); break; - case AttributeUnderlyingType::UInt32: addAttributeSize(attribute); break; - case AttributeUnderlyingType::UInt64: addAttributeSize(attribute); break; - case AttributeUnderlyingType::UInt128: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Int8: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Int16: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Int32: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Int64: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Float32: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Float64: addAttributeSize(attribute); break; + case AttributeUnderlyingType::UInt8: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::UInt16: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::UInt32: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::UInt64: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::UInt128: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Int8: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Int16: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Int32: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Int64: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Float32: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Float64: + addAttributeSize(attribute); + break; - case AttributeUnderlyingType::Decimal32: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Decimal64: addAttributeSize(attribute); break; - case AttributeUnderlyingType::Decimal128: addAttributeSize(attribute); break; + case AttributeUnderlyingType::Decimal32: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Decimal64: + addAttributeSize(attribute); + break; + case AttributeUnderlyingType::Decimal128: + addAttributeSize(attribute); + break; case AttributeUnderlyingType::String: { @@ -425,27 +508,56 @@ void ComplexKeyHashedDictionary::createAttributeImpl(Attribute & attribute, cons attribute.maps.emplace>(); } -ComplexKeyHashedDictionary::Attribute ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) +ComplexKeyHashedDictionary::Attribute +ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) { Attribute attr{type, {}, {}, {}}; switch (type) { - case AttributeUnderlyingType::UInt8: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::UInt16: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::UInt32: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::UInt64: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::UInt128: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Int8: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Int16: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Int32: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Int64: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Float32: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Float64: createAttributeImpl(attr, null_value); break; + case AttributeUnderlyingType::UInt8: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::UInt16: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::UInt32: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::UInt64: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::UInt128: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Int8: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Int16: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Int32: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Int64: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Float32: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Float64: + createAttributeImpl(attr, null_value); + break; - case AttributeUnderlyingType::Decimal32: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Decimal64: createAttributeImpl(attr, null_value); break; - case AttributeUnderlyingType::Decimal128: createAttributeImpl(attr, null_value); break; + case AttributeUnderlyingType::Decimal32: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Decimal64: + createAttributeImpl(attr, null_value); + break; + case AttributeUnderlyingType::Decimal128: + createAttributeImpl(attr, null_value); + break; case AttributeUnderlyingType::String: { @@ -462,15 +574,14 @@ ComplexKeyHashedDictionary::Attribute ComplexKeyHashedDictionary::createAttribut template void ComplexKeyHashedDictionary::getItemsNumber( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultGetter && get_default) const + const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const { - if (false) {} -#define DISPATCH(TYPE) \ - else if (attribute.type == AttributeUnderlyingType::TYPE) \ - getItemsImpl(attribute, key_columns, std::forward(set_value), std::forward(get_default)); + if (false) + { + } +#define DISPATCH(TYPE) \ + else if (attribute.type == AttributeUnderlyingType::TYPE) getItemsImpl( \ + attribute, key_columns, std::forward(set_value), std::forward(get_default)); DISPATCH(UInt8) DISPATCH(UInt16) DISPATCH(UInt32) @@ -486,16 +597,12 @@ void ComplexKeyHashedDictionary::getItemsNumber( DISPATCH(Decimal64) DISPATCH(Decimal128) #undef DISPATCH - else - throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); + else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); } template void ComplexKeyHashedDictionary::getItemsImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultGetter && get_default) const + const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const { const auto & attr = std::get>(attribute.maps); @@ -524,7 +631,7 @@ template bool ComplexKeyHashedDictionary::setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value) { auto & map = std::get>(attribute.maps); - const auto pair = map.insert({ key, value }); + const auto pair = map.insert({key, value}); return pair.second; } @@ -532,28 +639,42 @@ bool ComplexKeyHashedDictionary::setAttributeValue(Attribute & attribute, const { switch (attribute.type) { - case AttributeUnderlyingType::UInt8: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::UInt16: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::UInt32: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::UInt64: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::UInt128: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Int8: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Int16: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Int32: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Int64: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Float32: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Float64: return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt8: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt16: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt32: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt64: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::UInt128: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Int8: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Int16: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Int32: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Int64: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Float32: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Float64: + return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Decimal32: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Decimal64: return setAttributeValueImpl(attribute, key, value.get()); - case AttributeUnderlyingType::Decimal128: return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Decimal32: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Decimal64: + return setAttributeValueImpl(attribute, key, value.get()); + case AttributeUnderlyingType::Decimal128: + return setAttributeValueImpl(attribute, key, value.get()); case AttributeUnderlyingType::String: { auto & map = std::get>(attribute.maps); const auto & string = value.get(); const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size()); - const auto pair = map.insert({ key, StringRef{string_in_arena, string.size()} }); + const auto pair = map.insert({key, StringRef{string_in_arena, string.size()}}); return pair.second; } } @@ -570,8 +691,7 @@ const ComplexKeyHashedDictionary::Attribute & ComplexKeyHashedDictionary::getAtt return attributes[it->second]; } -StringRef ComplexKeyHashedDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool) +StringRef ComplexKeyHashedDictionary::placeKeysInPool(const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool) { const auto keys_size = key_columns.size(); size_t sum_keys_size{}; @@ -590,7 +710,7 @@ StringRef ComplexKeyHashedDictionary::placeKeysInPool( key_start += keys[j].size; } - return { block_start, sum_keys_size }; + return {block_start, sum_keys_size}; } template @@ -623,22 +743,37 @@ std::vector ComplexKeyHashedDictionary::getKeys() const switch (attribute.type) { - case AttributeUnderlyingType::UInt8: return getKeys(attribute); - case AttributeUnderlyingType::UInt16: return getKeys(attribute); - case AttributeUnderlyingType::UInt32: return getKeys(attribute); - case AttributeUnderlyingType::UInt64: return getKeys(attribute); - case AttributeUnderlyingType::UInt128: return getKeys(attribute); - case AttributeUnderlyingType::Int8: return getKeys(attribute); - case AttributeUnderlyingType::Int16: return getKeys(attribute); - case AttributeUnderlyingType::Int32: return getKeys(attribute); - case AttributeUnderlyingType::Int64: return getKeys(attribute); - case AttributeUnderlyingType::Float32: return getKeys(attribute); - case AttributeUnderlyingType::Float64: return getKeys(attribute); - case AttributeUnderlyingType::String: return getKeys(attribute); + case AttributeUnderlyingType::UInt8: + return getKeys(attribute); + case AttributeUnderlyingType::UInt16: + return getKeys(attribute); + case AttributeUnderlyingType::UInt32: + return getKeys(attribute); + case AttributeUnderlyingType::UInt64: + return getKeys(attribute); + case AttributeUnderlyingType::UInt128: + return getKeys(attribute); + case AttributeUnderlyingType::Int8: + return getKeys(attribute); + case AttributeUnderlyingType::Int16: + return getKeys(attribute); + case AttributeUnderlyingType::Int32: + return getKeys(attribute); + case AttributeUnderlyingType::Int64: + return getKeys(attribute); + case AttributeUnderlyingType::Float32: + return getKeys(attribute); + case AttributeUnderlyingType::Float64: + return getKeys(attribute); + case AttributeUnderlyingType::String: + return getKeys(attribute); - case AttributeUnderlyingType::Decimal32: return getKeys(attribute); - case AttributeUnderlyingType::Decimal64: return getKeys(attribute); - case AttributeUnderlyingType::Decimal128: return getKeys(attribute); + case AttributeUnderlyingType::Decimal32: + return getKeys(attribute); + case AttributeUnderlyingType::Decimal64: + return getKeys(attribute); + case AttributeUnderlyingType::Decimal128: + return getKeys(attribute); } return {}; } @@ -663,17 +798,15 @@ BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names void registerDictionaryComplexKeyHashed(DictionaryFactory & factory) { - auto create_layout = [=]( - const std::string & name, - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DictionarySourcePtr source_ptr - ) -> DictionaryPtr { + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr { if (!dict_struct.key) - throw Exception {"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS}; - const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"}; + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); }; diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h index 859266fb5d1..81b350dd43e 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h @@ -1,30 +1,33 @@ #pragma once -#include "IDictionary.h" -#include "IDictionarySource.h" -#include "DictionaryStructure.h" -#include -#include -#include -#include -#include -#include #include #include #include +#include +#include +#include +#include +#include +#include +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" namespace DB { - using BlockPtr = std::shared_ptr; class ComplexKeyHashedDictionary final : public IDictionaryBase { public: ComplexKeyHashedDictionary( - const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, - const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block = nullptr); + const std::string & name, + const DictionaryStructure & dict_struct, + DictionarySourcePtr source_ptr, + const DictionaryLifetime dict_lifetime, + bool require_nonempty, + BlockPtr saved_block = nullptr); ComplexKeyHashedDictionary(const ComplexKeyHashedDictionary & other); @@ -56,10 +59,7 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override - { - return creation_time; - } + std::chrono::time_point getCreationTime() const override { return creation_time; } bool isInjective(const std::string & attribute_name) const override { @@ -69,9 +69,33 @@ public: template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; -#define DECLARE(TYPE)\ - void get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) @@ -90,13 +114,19 @@ public: #undef DECLARE void getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - ColumnString * out) const; + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnString * const def, + ColumnString * const out) const; -#define DECLARE(TYPE)\ - void get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - const PaddedPODArray & def, ResultArrayType & out) const; +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -114,57 +144,57 @@ public: #undef DECLARE void getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const ColumnString * const def, ColumnString * const out) const; - -#define DECLARE(TYPE)\ - void get##TYPE(\ - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\ - const TYPE def, ResultArrayType & out) const; - DECLARE(UInt8) - DECLARE(UInt16) - DECLARE(UInt32) - DECLARE(UInt64) - DECLARE(UInt128) - DECLARE(Int8) - DECLARE(Int16) - DECLARE(Int32) - DECLARE(Int64) - DECLARE(Float32) - DECLARE(Float64) - DECLARE(Decimal32) - DECLARE(Decimal64) - DECLARE(Decimal128) -#undef DECLARE - - void getString( - const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, - const String & def, ColumnString * const out) const; + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const; void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; private: - template using ContainerType = HashMapWithSavedHash; + template + using ContainerType = HashMapWithSavedHash; struct Attribute final { AttributeUnderlyingType type; std::variant< - UInt8, UInt16, UInt32, UInt64, + UInt8, + UInt16, + UInt32, + UInt64, UInt128, - Int8, Int16, Int32, Int64, - Decimal32, Decimal64, Decimal128, - Float32, Float64, - String> null_values; + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String> + null_values; std::variant< - ContainerType, ContainerType, ContainerType, ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, ContainerType, - ContainerType, ContainerType, ContainerType, ContainerType, - ContainerType, ContainerType, ContainerType, - ContainerType, ContainerType, - ContainerType> maps; + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType, + ContainerType> + maps; std::unique_ptr string_arena; }; @@ -188,18 +218,12 @@ private: template - void getItemsNumber( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultGetter && get_default) const; + void + getItemsNumber(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const; template - void getItemsImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultGetter && get_default) const; + void + getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const; template @@ -209,8 +233,7 @@ private: const Attribute & getAttribute(const std::string & attribute_name) const; - static StringRef placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool); + static StringRef placeKeysInPool(const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool); template void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray & out) const; diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStream.h b/dbms/src/Dictionaries/DictionaryBlockInputStream.h index f1778a9fa6d..afdc26cdba3 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/DictionaryBlockInputStream.h @@ -1,22 +1,21 @@ #pragma once -#include +#include #include #include +#include #include +#include #include #include +#include +#include #include "DictionaryBlockInputStreamBase.h" #include "DictionaryStructure.h" #include "IDictionary.h" -#include -#include -#include -#include namespace DB { - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -32,28 +31,30 @@ class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase public: using DictionaryPtr = std::shared_ptr; - DictionaryBlockInputStream(std::shared_ptr dictionary, size_t max_block_size, - PaddedPODArray && ids, const Names & column_names); + DictionaryBlockInputStream( + std::shared_ptr dictionary, size_t max_block_size, PaddedPODArray && ids, const Names & column_names); - DictionaryBlockInputStream(std::shared_ptr dictionary, size_t max_block_size, - const std::vector & keys, const Names & column_names); + DictionaryBlockInputStream( + std::shared_ptr dictionary, + size_t max_block_size, + const std::vector & keys, + const Names & column_names); - using GetColumnsFunction = - std::function & attributes)>; + using GetColumnsFunction = std::function & attributes)>; // Used to separate key columns format for storage and view. // Calls get_key_columns_function to get key column for dictionary get fuction call // and get_view_columns_function to get key representation. // Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string - DictionaryBlockInputStream(std::shared_ptr dictionary, size_t max_block_size, - const Columns & data_columns, const Names & column_names, - GetColumnsFunction && get_key_columns_function, - GetColumnsFunction && get_view_columns_function); + DictionaryBlockInputStream( + std::shared_ptr dictionary, + size_t max_block_size, + const Columns & data_columns, + const Names & column_names, + GetColumnsFunction && get_key_columns_function, + GetColumnsFunction && get_view_columns_function); - String getName() const override - { - return "Dictionary"; - } + String getName() const override { return "Dictionary"; } protected: Block getBlock(size_t start, size_t size) const override; @@ -65,8 +66,8 @@ private: using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray &, PaddedPODArray &) const; template - using DictionaryDecimalGetter = - void (DictionaryType::*)(const std::string &, const PaddedPODArray &, DecimalPaddedPODArray &) const; + using DictionaryDecimalGetter + = void (DictionaryType::*)(const std::string &, const PaddedPODArray &, DecimalPaddedPODArray &) const; using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray &, ColumnString *) const; @@ -75,61 +76,103 @@ private: using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray & out) const; template - using DecimalGetterByKey = - void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray & out) const; + using DecimalGetterByKey + = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray & out) const; using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const; // call getXXX // for single key dictionaries template - void callGetter(DictionaryGetter getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + DictionaryGetter getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template - void callGetter(DictionaryDecimalGetter getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + DictionaryDecimalGetter getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template - void callGetter(DictionaryStringGetter getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + DictionaryStringGetter getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; // for complex complex key dictionaries template - void callGetter(GetterByKey getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + GetterByKey getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template - void callGetter(DecimalGetterByKey getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + DecimalGetterByKey getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template - void callGetter(StringGetterByKey getter, const PaddedPODArray & ids_to_fill, - const Columns & keys, const DataTypes & data_types, - Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + void callGetter( + StringGetterByKey getter, + const PaddedPODArray & ids_to_fill, + const Columns & keys, + const DataTypes & data_types, + Container & container, + const DictionaryAttribute & attribute, + const DictionaryType & dictionary) const; template