From 034566fff352f429ba59f5ff51c468ded01d0c87 Mon Sep 17 00:00:00 2001 From: Kirill Shvakov Date: Wed, 14 Feb 2018 09:21:15 +0200 Subject: [PATCH 001/209] Use custom partition key for the query logs table --- dbms/src/Interpreters/Context.cpp | 19 +++++++++++++++---- dbms/src/Server/config.xml | 2 +- .../en/operations/server_settings/settings.md | 4 ++++ .../ru/operations/server_settings/settings.md | 4 ++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index ad8383d9f63..4b82c8349aa 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1423,13 +1423,24 @@ QueryLog & Context::getQueryLog() auto & config = getConfigRef(); - String database = config.getString("query_log.database", "system"); - String table = config.getString("query_log.table", "query_log"); + String database = config.getString("query_log.database", "system"); + String table = config.getString("query_log.table", "query_log"); + String partition_by = config.getString("query_log.partition_by", "month"); size_t flush_interval_milliseconds = config.getUInt64( "query_log.flush_interval_milliseconds", DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS); + + String engine; - system_logs->query_log = std::make_unique( - *global_context, database, table, "ENGINE = MergeTree(event_date, event_time, 1024)", flush_interval_milliseconds); + if (partition_by == "day") + engine = "ENGINE = MergeTree() PARTITION BY (event_date) ORDER BY (event_time) SETTINGS index_granularity = 1024"; + else if (partition_by == "week") + engine = "ENGINE = MergeTree() PARTITION BY (toMonday(event_date)) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024"; + else if (partition_by == "month") + engine = "ENGINE = MergeTree(event_date, event_time, 1024)"; + else + throw Exception("Logical error: invalid value for query_log.partition_by", ErrorCodes::LOGICAL_ERROR); + + system_logs->query_log = std::make_unique(*global_context, database, table, engine, flush_interval_milliseconds); } return *system_logs->query_log; diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml index ca75025ac7b..c3c33b4b45d 100644 --- a/dbms/src/Server/config.xml +++ b/dbms/src/Server/config.xml @@ -226,7 +226,7 @@ --> system query_log
- + month 7500 diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 12d0690db29..c67cce8e333 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -518,6 +518,7 @@ Use the following parameters to configure logging: - database – Name of the database. - table – Name of the table. +- partition_by - Sets the partition key, possible values: day, week, month. - flush_interval_milliseconds – Interval for flushing data from memory to the disk. **Example** @@ -526,6 +527,7 @@ Use the following parameters to configure logging: system part_log
+ day 7500
``` @@ -560,6 +562,7 @@ Use the following parameters to configure logging: - database – Name of the database. - table – Name of the table. +- partition_by - Sets the partition key, possible values: day, week, month. - flush_interval_milliseconds – Interval for flushing data from memory to the disk. If the table doesn't exist, ClickHouse will create it. If the structure of the query log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. @@ -570,6 +573,7 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q system query_log
+ day 7500
``` diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index 66984962f17..b503f92e69b 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -520,6 +520,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat - database - Имя базы данных. - table - Имя таблицы. +- partition_by - Устанавливает ключ партиционирования, доступные значения: day, week, month. - flush_interval_milliseconds - Период сброса данных из оперативной памяти на диск. @@ -529,6 +530,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat system part_log
+ day 7500
``` @@ -563,6 +565,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat - database - Имя базы данных. - table - Имя таблицы. +- partition_by - Устанавливает ключ партиционирования, доступные значения: day, week, month. - flush_interval_milliseconds - Период сброса данных из оперативной памяти на диск. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -573,6 +576,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat system query_log
+ day 7500
``` From ddb618b1d75a6c30c1db6b5a41b9afba2e7f9731 Mon Sep 17 00:00:00 2001 From: Kirill Shvakov Date: Wed, 14 Feb 2018 10:01:41 +0200 Subject: [PATCH 002/209] Use custom expr for query log --- dbms/src/Interpreters/Context.cpp | 13 ++----------- dbms/src/Server/config.xml | 10 +++++++++- docs/en/operations/server_settings/settings.md | 8 ++++---- docs/ru/operations/server_settings/settings.md | 8 ++++---- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 4b82c8349aa..2bc0aa3c4b7 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1425,20 +1425,11 @@ QueryLog & Context::getQueryLog() String database = config.getString("query_log.database", "system"); String table = config.getString("query_log.table", "query_log"); - String partition_by = config.getString("query_log.partition_by", "month"); + String partition_by = config.getString("query_log.partition_by", "toYYYYMM(event_date)"); size_t flush_interval_milliseconds = config.getUInt64( "query_log.flush_interval_milliseconds", DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS); - String engine; - - if (partition_by == "day") - engine = "ENGINE = MergeTree() PARTITION BY (event_date) ORDER BY (event_time) SETTINGS index_granularity = 1024"; - else if (partition_by == "week") - engine = "ENGINE = MergeTree() PARTITION BY (toMonday(event_date)) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024"; - else if (partition_by == "month") - engine = "ENGINE = MergeTree(event_date, event_time, 1024)"; - else - throw Exception("Logical error: invalid value for query_log.partition_by", ErrorCodes::LOGICAL_ERROR); + String engine = "ENGINE = MergeTree() PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024"; system_logs->query_log = std::make_unique(*global_context, database, table, engine, flush_interval_milliseconds); } diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml index c3c33b4b45d..e0b4ea563ac 100644 --- a/dbms/src/Server/config.xml +++ b/dbms/src/Server/config.xml @@ -226,7 +226,15 @@ --> system query_log
- month + + toYYYYMM(event_date) 7500 diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index c67cce8e333..e5b6cb399ef 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -518,7 +518,7 @@ Use the following parameters to configure logging: - database – Name of the database. - table – Name of the table. -- partition_by - Sets the partition key, possible values: day, week, month. +- partition_by - Sets the [custom partition key](../../table_engines/custom_partitioning_key.md#custom-partitioning-key). - flush_interval_milliseconds – Interval for flushing data from memory to the disk. **Example** @@ -527,7 +527,7 @@ Use the following parameters to configure logging: system part_log
- day + toMonday(event_date) 7500
``` @@ -562,7 +562,7 @@ Use the following parameters to configure logging: - database – Name of the database. - table – Name of the table. -- partition_by - Sets the partition key, possible values: day, week, month. +- partition_by - Sets the [custom partition key](../../table_engines/custom_partitioning_key.md#custom-partitioning-key). - flush_interval_milliseconds – Interval for flushing data from memory to the disk. If the table doesn't exist, ClickHouse will create it. If the structure of the query log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. @@ -573,7 +573,7 @@ If the table doesn't exist, ClickHouse will create it. If the structure of the q system query_log
- day + toMonday(event_date) 7500
``` diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index b503f92e69b..9f8e6e1072e 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -520,7 +520,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat - database - Имя базы данных. - table - Имя таблицы. -- partition_by - Устанавливает ключ партиционирования, доступные значения: day, week, month. +- partition_by - Устанавливает [произвольный ключ партиционирования](../../table_engines/custom_partitioning_key.md#custom-partitioning-key). - flush_interval_milliseconds - Период сброса данных из оперативной памяти на диск. @@ -530,7 +530,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat system part_log
- day + toMonday(event_date) 7500
``` @@ -565,7 +565,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat - database - Имя базы данных. - table - Имя таблицы. -- partition_by - Устанавливает ключ партиционирования, доступные значения: day, week, month. +- partition_by - Устанавливает [произвольный ключ партиционирования](../../table_engines/custom_partitioning_key.md#custom-partitioning-key). - flush_interval_milliseconds - Период сброса данных из оперативной памяти на диск. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -576,7 +576,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat system query_log
- day + toMonday(event_date) 7500
``` From d2fcfb3de62c70b47066dcd94413f09a8d2a2ec5 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 15 Feb 2018 23:57:08 +0300 Subject: [PATCH 003/209] Allow to disable compiling -server or other parts to static binary --- dbms/src/Server/CMakeLists.txt | 105 ++++++++++++++++++++++-------- dbms/src/Server/config_tools.h.in | 11 ++++ dbms/src/Server/main.cpp | 35 ++++++++++ 3 files changed, 125 insertions(+), 26 deletions(-) create mode 100644 dbms/src/Server/config_tools.h.in diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index ff0cd4871f7..adfd3309f06 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -2,6 +2,18 @@ # that contain multiple execution modes (client, server, etc.) # each of them is built and linked as a separate library, defined below. + +option (ENABLE_CLICKHOUSE_ALL "Enable all tools" ON) +option (ENABLE_CLICKHOUSE_SERVER "Enable server" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_CLIENT "Enable client" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_LOCAL "Enable local" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_BENCHMARK "Enable benchmark" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_PERFORMANCE "Enable performance" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_TOOLS "Enable tools: compressor format extract-from-config-lib" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_COPIER "Enable copier" ${ENABLE_CLICKHOUSE_COPIER}) + +configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h) + add_library (clickhouse-server-lib ConfigReloader.cpp HTTPHandler.cpp @@ -91,53 +103,94 @@ if (CLICKHOUSE_SPLIT_BINARY) add_custom_target (clickhouse ALL DEPENDS clickhouse-bundle) else () add_executable (clickhouse main.cpp) + target_link_libraries (clickhouse clickhouse_common_io) target_include_directories (clickhouse BEFORE PRIVATE ${COMMON_INCLUDE_DIR}) + target_include_directories (clickhouse PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) if (USE_EMBEDDED_COMPILER) target_link_libraries (clickhouse clickhouse-compiler-lib) endif () - + if (ENABLE_CLICKHOUSE_SERVER) + target_link_libraries (clickhouse clickhouse-server-lib) + endif () + if (ENABLE_CLICKHOUSE_CLIENT) + target_link_libraries (clickhouse clickhouse-client-lib) + endif () + if (ENABLE_CLICKHOUSE_LOCAL) + target_link_libraries (clickhouse clickhouse-local-lib) + endif () + if (ENABLE_CLICKHOUSE_BENCHMARK) + target_link_libraries (clickhouse clickhouse-benchmark-lib) + endif () + if (ENABLE_CLICKHOUSE_PERFORMANCE) + target_link_libraries (clickhouse clickhouse-performance-test-lib) + endif () + if (ENABLE_CLICKHOUSE_TOOLS) target_link_libraries (clickhouse - clickhouse-server-lib - clickhouse-client-lib - clickhouse-local-lib - clickhouse-benchmark-lib - clickhouse-performance-test-lib clickhouse-extract-from-config-lib clickhouse-compressor-lib clickhouse-format-lib - clickhouse-cluster-copier-lib ) + endif () + if (ENABLE_CLICKHOUSE_COPIER) + target_link_libraries (clickhouse clickhouse-cluster-copier-lib) + endif () - add_custom_target (clickhouse-server ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-server DEPENDS clickhouse) - add_custom_target (clickhouse-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-client DEPENDS clickhouse) - add_custom_target (clickhouse-local ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-local DEPENDS clickhouse) - add_custom_target (clickhouse-benchmark ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-benchmark DEPENDS clickhouse) - add_custom_target (clickhouse-performance-test ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-performance-test DEPENDS clickhouse) - add_custom_target (clickhouse-extract-from-config ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-extract-from-config DEPENDS clickhouse) - add_custom_target (clickhouse-compressor ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-compressor DEPENDS clickhouse) - add_custom_target (clickhouse-format ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-format DEPENDS clickhouse) - add_custom_target (clickhouse-cluster-copier ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-cluster-copier DEPENDS clickhouse) + set (CLICKHOUSE_BUNDLE) + if (ENABLE_CLICKHOUSE_SERVER) + add_custom_target (clickhouse-server ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-server DEPENDS clickhouse) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-server DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-server) + endif () + if (ENABLE_CLICKHOUSE_CLIENT) + add_custom_target (clickhouse-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-client DEPENDS clickhouse) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-client DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-client) + endif () + if (ENABLE_CLICKHOUSE_LOCAL) + add_custom_target (clickhouse-local ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-local DEPENDS clickhouse) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-local DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-local) + endif () + if (ENABLE_CLICKHOUSE_BENCHMARK) + add_custom_target (clickhouse-benchmark ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-benchmark DEPENDS clickhouse) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-benchmark DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-benchmark) + endif () + if (ENABLE_CLICKHOUSE_PERFORMANCE) + add_custom_target (clickhouse-performance-test ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-performance-test DEPENDS clickhouse) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-performance-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-performance-test) + endif () + if (ENABLE_CLICKHOUSE_TOOLS) + add_custom_target (clickhouse-extract-from-config ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-extract-from-config DEPENDS clickhouse) + add_custom_target (clickhouse-compressor ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-compressor DEPENDS clickhouse) + add_custom_target (clickhouse-format ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-format DEPENDS clickhouse) + install (FILES + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-extract-from-config + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-compressor + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-format + DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-extract-from-config clickhouse-compressor clickhouse-format) + endif () + if (ENABLE_CLICKHOUSE_COPIER) + add_custom_target (clickhouse-cluster-copier ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-cluster-copier DEPENDS clickhouse) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-cluster-copier DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-copier) + endif () # install always because depian package want this files: add_custom_target (clickhouse-clang ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-clang DEPENDS clickhouse) add_custom_target (clickhouse-lld ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-lld DEPENDS clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-clang clickhouse-lld) install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + install (FILES - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-server - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-client - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-local - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-benchmark - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-performance-test - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-extract-from-config - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-compressor - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-format - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-cluster-copier ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-clang ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-lld DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - add_custom_target (clickhouse-bundle ALL DEPENDS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-clang clickhouse-lld) + add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE}) endif () install ( diff --git a/dbms/src/Server/config_tools.h.in b/dbms/src/Server/config_tools.h.in new file mode 100644 index 00000000000..2e8337c74b9 --- /dev/null +++ b/dbms/src/Server/config_tools.h.in @@ -0,0 +1,11 @@ +#pragma once + +// .h autogenerated by cmake ! + +#cmakedefine01 ENABLE_CLICKHOUSE_SERVER +#cmakedefine01 ENABLE_CLICKHOUSE_CLIENT +#cmakedefine01 ENABLE_CLICKHOUSE_LOCAL +#cmakedefine01 ENABLE_CLICKHOUSE_BENCHMARK +#cmakedefine01 ENABLE_CLICKHOUSE_PERFORMANCE +#cmakedefine01 ENABLE_CLICKHOUSE_TOOLS +#cmakedefine01 ENABLE_CLICKHOUSE_COPIER diff --git a/dbms/src/Server/main.cpp b/dbms/src/Server/main.cpp index b569f2ba611..c174dc1a3d0 100644 --- a/dbms/src/Server/main.cpp +++ b/dbms/src/Server/main.cpp @@ -1,24 +1,45 @@ + +#include #include #include +#include #if USE_TCMALLOC #include #endif +#if ENABLE_CLICKHOUSE_SERVER #include "Server.h" +#endif +#if ENABLE_CLICKHOUSE_LOCAL #include "LocalServer.h" +#endif #include /// Universal executable for various clickhouse applications +#if ENABLE_CLICKHOUSE_SERVER int mainEntryClickHouseServer(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_CLIENT int mainEntryClickHouseClient(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_LOCAL int mainEntryClickHouseLocal(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_BENCHMARK int mainEntryClickHouseBenchmark(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_PERFORMANCE int mainEntryClickHousePerformanceTest(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_TOOLS int mainEntryClickHouseExtractFromConfig(int argc, char ** argv); int mainEntryClickHouseCompressor(int argc, char ** argv); int mainEntryClickHouseFormat(int argc, char ** argv); +#endif +#if ENABLE_CLICKHOUSE_COPIER int mainEntryClickHouseClusterCopier(int argc, char ** argv); +#endif #if USE_EMBEDDED_COMPILER int mainEntryClickHouseClang(int argc, char ** argv); @@ -34,15 +55,29 @@ using MainFunc = int (*)(int, char**); /// Add an item here to register new application std::pair clickhouse_applications[] = { +#if ENABLE_CLICKHOUSE_LOCAL {"local", mainEntryClickHouseLocal}, +#endif +#if ENABLE_CLICKHOUSE_CLIENT {"client", mainEntryClickHouseClient}, +#endif +#if ENABLE_CLICKHOUSE_BENCHMARK {"benchmark", mainEntryClickHouseBenchmark}, +#endif +#if ENABLE_CLICKHOUSE_SERVER {"server", mainEntryClickHouseServer}, +#endif +#if ENABLE_CLICKHOUSE_PERFORMANCE {"performance-test", mainEntryClickHousePerformanceTest}, +#endif +#if ENABLE_CLICKHOUSE_TOOLS {"extract-from-config", mainEntryClickHouseExtractFromConfig}, {"compressor", mainEntryClickHouseCompressor}, {"format", mainEntryClickHouseFormat}, +#endif +#if ENABLE_CLICKHOUSE_COPIER {"copier", mainEntryClickHouseClusterCopier}, +#endif #if USE_EMBEDDED_COMPILER {"clang", mainEntryClickHouseClang}, {"clang++", mainEntryClickHouseClang}, From df6c5aa2ee6e2128fc540a5d52f522acf432583c Mon Sep 17 00:00:00 2001 From: BayoNet Date: Fri, 16 Feb 2018 21:47:52 +0300 Subject: [PATCH 004/209] Typo is fixed. --- docs/en/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/index.md b/docs/en/index.md index 72efa70802b..586c18297a8 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -39,7 +39,7 @@ We'll say that the following is true for the OLAP (online analytical processing) - Data is updated in fairly large batches (> 1000 rows), not by single rows; or it is not updated at all. - Data is added to the DB but is not modified. - For reads, quite a large number of rows are extracted from the DB, but only a small subset of columns. -- Tables are "wide," meaning they contain a large number of columns. +- Tables are "wide", meaning they contain a large number of columns. - Queries are relatively rare (usually hundreds of queries per server or less per second). - For simple queries, latencies around 50 ms are allowed. - Column values are fairly small: numbers and short strings (for example, 60 bytes per URL). From fa50fe80a01fcfc96ecb0fee83ab4cde00d66a0c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 03:45:32 +0300 Subject: [PATCH 005/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/Client/Connection.cpp | 2 +- .../AddingDefaultBlockOutputStream.cpp | 2 +- .../AddingDefaultBlockOutputStream.h | 5 +- .../AggregatingBlockInputStream.cpp | 4 +- dbms/src/DataStreams/BlockIO.h | 3 - .../BlockOutputStreamFromRowOutputStream.cpp | 4 +- .../BlockOutputStreamFromRowOutputStream.h | 7 ++- .../DataStreams/CountingBlockOutputStream.h | 3 +- dbms/src/DataStreams/FormatFactory.cpp | 50 +++++++-------- dbms/src/DataStreams/IBlockOutputStream.h | 8 ++- .../InputStreamFromASTInsertQuery.cpp | 2 +- .../MaterializingBlockOutputStream.h | 6 +- .../MergeSortingBlockInputStream.cpp | 30 ++++----- .../MergeSortingBlockInputStream.h | 2 +- .../DataStreams/NativeBlockOutputStream.cpp | 4 +- .../src/DataStreams/NativeBlockOutputStream.h | 5 +- dbms/src/DataStreams/NullBlockOutputStream.h | 5 ++ .../NullableAdapterBlockInputStream.cpp | 18 +++--- .../NullableAdapterBlockInputStream.h | 4 +- .../ODBCDriverBlockOutputStream.cpp | 9 ++- .../DataStreams/ODBCDriverBlockOutputStream.h | 5 +- .../ParallelAggregatingBlockInputStream.cpp | 3 +- .../DataStreams/PrettyBlockOutputStream.cpp | 5 +- .../src/DataStreams/PrettyBlockOutputStream.h | 4 +- .../PrettyCompactBlockOutputStream.h | 4 +- .../PrettySpaceBlockOutputStream.h | 4 +- .../ProhibitColumnsBlockOutputStream.cpp | 23 ------- .../ProhibitColumnsBlockOutputStream.h | 31 ---------- .../PushingToViewsBlockOutputStream.cpp | 4 +- .../PushingToViewsBlockOutputStream.h | 4 +- .../DataStreams/RemoteBlockOutputStream.cpp | 29 +++------ .../src/DataStreams/RemoteBlockOutputStream.h | 10 +-- .../DataStreams/SquashingBlockOutputStream.h | 1 + .../tests/block_row_transforms.cpp | 2 +- .../DataStreams/tests/expression_stream.cpp | 2 +- dbms/src/DataStreams/tests/filter_stream.cpp | 2 +- .../tests/filter_stream_hitlog.cpp | 2 +- dbms/src/DataStreams/tests/native_streams.cpp | 2 +- dbms/src/DataStreams/tests/sorting_stream.cpp | 2 +- .../tests/tab_separated_streams.cpp | 2 +- dbms/src/Interpreters/Aggregator.cpp | 2 +- .../Interpreters/InterpreterCreateQuery.cpp | 41 +++---------- .../Interpreters/InterpreterInsertQuery.cpp | 61 +++++++++---------- .../src/Interpreters/InterpreterInsertQuery.h | 10 +-- dbms/src/Interpreters/tests/expression.cpp | 2 +- dbms/src/Server/ClusterCopier.cpp | 2 +- dbms/src/Server/TCPHandler.cpp | 13 ++-- dbms/src/Server/TCPHandler.h | 2 +- .../DistributedBlockOutputStream.cpp | 17 ++++-- .../DistributedBlockOutputStream.h | 2 +- .../MergeTree/MergeTreeBlockOutputStream.cpp | 6 ++ .../MergeTree/MergeTreeBlockOutputStream.h | 1 + dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataMerger.cpp | 2 +- .../MergeTree/MergedBlockOutputStream.cpp | 4 +- .../MergeTree/MergedBlockOutputStream.h | 6 +- .../ReplicatedMergeTreeBlockOutputStream.cpp | 6 ++ .../ReplicatedMergeTreeBlockOutputStream.h | 1 + dbms/src/Storages/StorageBuffer.cpp | 2 + dbms/src/Storages/StorageFile.cpp | 3 +- dbms/src/Storages/StorageLog.cpp | 1 + dbms/src/Storages/StorageMemory.cpp | 2 + dbms/src/Storages/StorageNull.h | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 2 +- dbms/src/Storages/StorageSet.cpp | 3 +- dbms/src/Storages/StorageStripeLog.cpp | 4 +- dbms/src/Storages/StorageTinyLog.cpp | 2 + dbms/src/Storages/tests/hit_log.cpp | 2 +- dbms/src/Storages/tests/storage_log.cpp | 2 +- dbms/src/Storages/tests/system_numbers.cpp | 2 +- 70 files changed, 239 insertions(+), 282 deletions(-) delete mode 100644 dbms/src/DataStreams/ProhibitColumnsBlockOutputStream.cpp delete mode 100644 dbms/src/DataStreams/ProhibitColumnsBlockOutputStream.h diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index 4d0b20f0168..3c6c8d75d0e 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -393,7 +393,7 @@ void Connection::sendData(const Block & block, const String & name) else maybe_compressed_out = out; - block_out = std::make_shared(*maybe_compressed_out, server_revision); + block_out = std::make_shared(*maybe_compressed_out, server_revision, block.cloneEmpty()); } writeVarUInt(Protocol::Client::Data, *out); diff --git a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp index a52c35da8ad..a2f46fccf8a 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp @@ -10,7 +10,7 @@ namespace DB { -void AddingDefaultBlockOutputStream::write(const DB::Block & block) +void AddingDefaultBlockOutputStream::write(const Block & block) { Block res = block; diff --git a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h index 9ac92e74ac9..b54fb475a4b 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h +++ b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.h @@ -19,16 +19,18 @@ class AddingDefaultBlockOutputStream : public IBlockOutputStream public: AddingDefaultBlockOutputStream( const BlockOutputStreamPtr & output_, + const Block & header_, NamesAndTypesList required_columns_, const ColumnDefaults & column_defaults_, const Context & context_, bool only_explicit_column_defaults_) - : output(output_), required_columns(required_columns_), + : output(output_), header(header_), required_columns(required_columns_), column_defaults(column_defaults_), context(context_), only_explicit_column_defaults(only_explicit_column_defaults_) { } + Block getHeader() const override { return header; } void write(const Block & block) override; void flush() override; @@ -38,6 +40,7 @@ public: private: BlockOutputStreamPtr output; + Block header; NamesAndTypesList required_columns; const ColumnDefaults column_defaults; const Context & context; diff --git a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp index 8896c40e511..ce91333bfe0 100644 --- a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp @@ -76,7 +76,7 @@ Block AggregatingBlockInputStream::readImpl() AggregatingBlockInputStream::TemporaryFileStream::TemporaryFileStream(const std::string & path) - : file_in(path), compressed_in(file_in), block_in(std::make_shared(compressed_in, ClickHouseRevision::get())) {} - + : file_in(path), compressed_in(file_in), + block_in(std::make_shared(compressed_in, ClickHouseRevision::get())) {} } diff --git a/dbms/src/DataStreams/BlockIO.h b/dbms/src/DataStreams/BlockIO.h index 8cd19db9154..6d97e30e510 100644 --- a/dbms/src/DataStreams/BlockIO.h +++ b/dbms/src/DataStreams/BlockIO.h @@ -21,8 +21,6 @@ struct BlockIO BlockInputStreamPtr in; BlockOutputStreamPtr out; - Block out_sample; /// Example of a block to be written to `out`. - /// Callbacks for query logging could be set here. std::function finish_callback; std::function exception_callback; @@ -50,7 +48,6 @@ struct BlockIO process_list_entry = rhs.process_list_entry; in = rhs.in; out = rhs.out; - out_sample = rhs.out_sample; finish_callback = rhs.finish_callback; exception_callback = rhs.exception_callback; diff --git a/dbms/src/DataStreams/BlockOutputStreamFromRowOutputStream.cpp b/dbms/src/DataStreams/BlockOutputStreamFromRowOutputStream.cpp index 8c466bc3c00..3206e918232 100644 --- a/dbms/src/DataStreams/BlockOutputStreamFromRowOutputStream.cpp +++ b/dbms/src/DataStreams/BlockOutputStreamFromRowOutputStream.cpp @@ -5,8 +5,8 @@ namespace DB { -BlockOutputStreamFromRowOutputStream::BlockOutputStreamFromRowOutputStream(RowOutputStreamPtr row_output_) - : row_output(row_output_), first_row(true) {} +BlockOutputStreamFromRowOutputStream::BlockOutputStreamFromRowOutputStream(RowOutputStreamPtr row_output_, const Block & header_) + : row_output(row_output_), header(header_) {} void BlockOutputStreamFromRowOutputStream::write(const Block & block) diff --git a/dbms/src/DataStreams/BlockOutputStreamFromRowOutputStream.h b/dbms/src/DataStreams/BlockOutputStreamFromRowOutputStream.h index 63743f7827a..dfb6f49ecec 100644 --- a/dbms/src/DataStreams/BlockOutputStreamFromRowOutputStream.h +++ b/dbms/src/DataStreams/BlockOutputStreamFromRowOutputStream.h @@ -13,7 +13,9 @@ namespace DB class BlockOutputStreamFromRowOutputStream : public IBlockOutputStream { public: - BlockOutputStreamFromRowOutputStream(RowOutputStreamPtr row_output_); + BlockOutputStreamFromRowOutputStream(RowOutputStreamPtr row_output_, const Block & header_); + + Block getHeader() const override { return header; } void write(const Block & block) override; void writePrefix() override { row_output->writePrefix(); } void writeSuffix() override { row_output->writeSuffix(); } @@ -29,7 +31,8 @@ public: private: RowOutputStreamPtr row_output; - bool first_row; + Block header; + bool first_row = true; }; } diff --git a/dbms/src/DataStreams/CountingBlockOutputStream.h b/dbms/src/DataStreams/CountingBlockOutputStream.h index 63ece36c2b0..0918d33f113 100644 --- a/dbms/src/DataStreams/CountingBlockOutputStream.h +++ b/dbms/src/DataStreams/CountingBlockOutputStream.h @@ -12,7 +12,6 @@ namespace DB class CountingBlockOutputStream : public IBlockOutputStream { public: - CountingBlockOutputStream(const BlockOutputStreamPtr & stream_) : stream(stream_) {} @@ -31,6 +30,7 @@ public: return progress; } + Block getHeader() const override { return stream->getHeader(); } void write(const Block & block) override; void writePrefix() override { stream->writePrefix(); } @@ -40,7 +40,6 @@ public: String getContentType() const override { return stream->getContentType(); } protected: - BlockOutputStreamPtr stream; Progress progress; ProgressCallback progress_callback; diff --git a/dbms/src/DataStreams/FormatFactory.cpp b/dbms/src/DataStreams/FormatFactory.cpp index d871a4b23a5..a985c9f3dc0 100644 --- a/dbms/src/DataStreams/FormatFactory.cpp +++ b/dbms/src/DataStreams/FormatFactory.cpp @@ -141,66 +141,66 @@ static BlockOutputStreamPtr getOutputImpl(const String & name, WriteBuffer & buf FormatSettingsJSON json_settings(settings.output_format_json_quote_64bit_integers, settings.output_format_json_quote_denormals); if (name == "Native") - return std::make_shared(buf); + return std::make_shared(buf, 0, sample); else if (name == "RowBinary") - return std::make_shared(std::make_shared(buf)); + return std::make_shared(std::make_shared(buf), sample); else if (name == "TabSeparated" || name == "TSV") - return std::make_shared(std::make_shared(buf, sample)); + return std::make_shared(std::make_shared(buf, sample), sample); else if (name == "TabSeparatedWithNames" || name == "TSVWithNames") - return std::make_shared(std::make_shared(buf, sample, true)); + return std::make_shared(std::make_shared(buf, sample, true), sample); else if (name == "TabSeparatedWithNamesAndTypes" || name == "TSVWithNamesAndTypes") - return std::make_shared(std::make_shared(buf, sample, true, true)); + return std::make_shared(std::make_shared(buf, sample, true, true), sample); else if (name == "TabSeparatedRaw" || name == "TSVRaw") - return std::make_shared(std::make_shared(buf, sample)); + return std::make_shared(std::make_shared(buf, sample), sample); else if (name == "CSV") - return std::make_shared(std::make_shared(buf, sample)); + return std::make_shared(std::make_shared(buf, sample), sample); else if (name == "CSVWithNames") - return std::make_shared(std::make_shared(buf, sample, true)); + return std::make_shared(std::make_shared(buf, sample, true), sample); else if (name == "Pretty") - return std::make_shared(buf, false, settings.output_format_pretty_max_rows, context); + return std::make_shared(buf, sample, false, settings.output_format_pretty_max_rows, context); else if (name == "PrettyCompact") - return std::make_shared(buf, false, settings.output_format_pretty_max_rows, context); + return std::make_shared(buf, sample, false, settings.output_format_pretty_max_rows, context); else if (name == "PrettyCompactMonoBlock") { - BlockOutputStreamPtr dst = std::make_shared(buf, false, settings.output_format_pretty_max_rows, context); + BlockOutputStreamPtr dst = std::make_shared(buf, sample, false, settings.output_format_pretty_max_rows, context); auto res = std::make_shared(dst, settings.output_format_pretty_max_rows, 0); res->disableFlush(); return res; } else if (name == "PrettySpace") - return std::make_shared(buf, false, settings.output_format_pretty_max_rows, context); + return std::make_shared(buf, sample, false, settings.output_format_pretty_max_rows, context); else if (name == "PrettyNoEscapes") - return std::make_shared(buf, true, settings.output_format_pretty_max_rows, context); + return std::make_shared(buf, sample, true, settings.output_format_pretty_max_rows, context); else if (name == "PrettyCompactNoEscapes") - return std::make_shared(buf, true, settings.output_format_pretty_max_rows, context); + return std::make_shared(buf, sample, true, settings.output_format_pretty_max_rows, context); else if (name == "PrettySpaceNoEscapes") - return std::make_shared(buf, true, settings.output_format_pretty_max_rows, context); + return std::make_shared(buf, sample, true, settings.output_format_pretty_max_rows, context); else if (name == "Vertical") return std::make_shared(std::make_shared( - buf, sample, settings.output_format_pretty_max_rows)); + buf, sample, settings.output_format_pretty_max_rows), sample); else if (name == "VerticalRaw") return std::make_shared(std::make_shared( - buf, sample, settings.output_format_pretty_max_rows)); + buf, sample, settings.output_format_pretty_max_rows), sample); else if (name == "Values") - return std::make_shared(std::make_shared(buf)); + return std::make_shared(std::make_shared(buf), sample); else if (name == "JSON") return std::make_shared(std::make_shared( - buf, sample, settings.output_format_write_statistics, json_settings)); + buf, sample, settings.output_format_write_statistics, json_settings), sample); else if (name == "JSONCompact") return std::make_shared(std::make_shared( - buf, sample, settings.output_format_write_statistics, json_settings)); + buf, sample, settings.output_format_write_statistics, json_settings), sample); else if (name == "JSONEachRow") return std::make_shared(std::make_shared( - buf, sample, json_settings)); + buf, sample, json_settings), sample); else if (name == "XML") return std::make_shared(std::make_shared(buf, sample, - settings.output_format_write_statistics)); + settings.output_format_write_statistics), sample); else if (name == "TSKV") - return std::make_shared(std::make_shared(buf, sample)); + return std::make_shared(std::make_shared(buf, sample), sample); else if (name == "ODBCDriver") return std::make_shared(buf, sample); else if (name == "Null") - return std::make_shared(); + return std::make_shared(sample); else throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT); } @@ -211,7 +211,7 @@ BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer & /** Materialization is needed, because formats can use the functions `IDataType`, * which only work with full columns. */ - return std::make_shared(getOutputImpl(name, buf, sample, context)); + return std::make_shared(getOutputImpl(name, buf, materializeBlock(sample), context), sample); } } diff --git a/dbms/src/DataStreams/IBlockOutputStream.h b/dbms/src/DataStreams/IBlockOutputStream.h index 58e6607f383..e33fced86a3 100644 --- a/dbms/src/DataStreams/IBlockOutputStream.h +++ b/dbms/src/DataStreams/IBlockOutputStream.h @@ -4,12 +4,12 @@ #include #include #include +#include namespace DB { -class Block; struct Progress; class TableStructureReadLock; @@ -26,6 +26,12 @@ class IBlockOutputStream : private boost::noncopyable public: IBlockOutputStream() {} + /** Get data structure of the stream in a form of "header" block (it is also called "sample block"). + * Header block contains column names, data types, columns of size 0. Constant columns must have corresponding values. + * You must pass blocks of exactly this structure to the 'write' method. + */ + virtual Block getHeader() const = 0; + /** Write block. */ virtual void write(const Block & block) = 0; diff --git a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 9f5f10d19bf..0e4f876925d 100644 --- a/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/dbms/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -43,7 +43,7 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( input_buffer_contacenated = std::make_unique(buffers); - res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out_sample, context.getSettings().max_insert_block_size); + res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size); } } diff --git a/dbms/src/DataStreams/MaterializingBlockOutputStream.h b/dbms/src/DataStreams/MaterializingBlockOutputStream.h index 2d8489156f4..9e1efeb29d3 100644 --- a/dbms/src/DataStreams/MaterializingBlockOutputStream.h +++ b/dbms/src/DataStreams/MaterializingBlockOutputStream.h @@ -12,9 +12,10 @@ namespace DB class MaterializingBlockOutputStream : public IBlockOutputStream { public: - MaterializingBlockOutputStream(const BlockOutputStreamPtr & output) - : output{output} {} + MaterializingBlockOutputStream(const BlockOutputStreamPtr & output, const Block & header) + : output{output}, header(header) {} + Block getHeader() const override { return header; } void write(const Block & block) override { output->write(materializeBlock(block)); } void flush() override { output->flush(); } void writePrefix() override { output->writePrefix(); } @@ -27,6 +28,7 @@ public: private: BlockOutputStreamPtr output; + Block header; }; } diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index 6a5c5aba0f6..288d5d9e6ef 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -34,29 +34,29 @@ static void removeConstantsFromBlock(Block & block) } } -static void removeConstantsFromSortDescription(const Block & sample_block, SortDescription & description) +static void removeConstantsFromSortDescription(const Block & header, SortDescription & description) { description.erase(std::remove_if(description.begin(), description.end(), [&](const SortColumnDescription & elem) { if (!elem.column_name.empty()) - return sample_block.getByName(elem.column_name).column->isColumnConst(); + return header.getByName(elem.column_name).column->isColumnConst(); else - return sample_block.safeGetByPosition(elem.column_number).column->isColumnConst(); + return header.safeGetByPosition(elem.column_number).column->isColumnConst(); }), description.end()); } /** Add into block, whose constant columns was removed by previous function, - * constant columns from sample_block (which must have structure as before removal of constants from block). + * constant columns from header (which must have structure as before removal of constants from block). */ -static void enrichBlockWithConstants(Block & block, const Block & sample_block) +static void enrichBlockWithConstants(Block & block, const Block & header) { size_t rows = block.rows(); - size_t columns = sample_block.columns(); + size_t columns = header.columns(); for (size_t i = 0; i < columns; ++i) { - const auto & col_type_name = sample_block.getByPosition(i); + const auto & col_type_name = header.getByPosition(i); if (col_type_name.column->isColumnConst()) block.insert(i, {col_type_name.column->cloneResized(rows), col_type_name.type, col_type_name.name}); } @@ -65,6 +65,12 @@ static void enrichBlockWithConstants(Block & block, const Block & sample_block) Block MergeSortingBlockInputStream::readImpl() { + if (!header) + { + header = getHeader(); + removeConstantsFromSortDescription(header, description); + } + /** Algorithm: * - read to memory blocks from source stream; * - if too much of them and if external sorting is enabled, @@ -77,12 +83,6 @@ Block MergeSortingBlockInputStream::readImpl() { while (Block block = children.back()->read()) { - if (!sample_block) - { - sample_block = block.cloneEmpty(); - removeConstantsFromSortDescription(sample_block, description); - } - /// If there were only const columns in sort description, then there is no need to sort. /// Return the blocks as is. if (description.empty()) @@ -103,7 +103,7 @@ Block MergeSortingBlockInputStream::readImpl() const std::string & path = temporary_files.back()->path(); WriteBufferFromFile file_buf(path); CompressedWriteBuffer compressed_buf(file_buf); - NativeBlockOutputStream block_out(compressed_buf); + NativeBlockOutputStream block_out(compressed_buf, 0, block.cloneEmpty()); MergeSortingBlocksBlockInputStream block_in(blocks, description, max_merged_block_size, limit); LOG_INFO(log, "Sorting and writing part of data into temporary file " + path); @@ -148,7 +148,7 @@ Block MergeSortingBlockInputStream::readImpl() Block res = impl->read(); if (res) - enrichBlockWithConstants(res, sample_block); + enrichBlockWithConstants(res, header); return res; } diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.h b/dbms/src/DataStreams/MergeSortingBlockInputStream.h index 613169899d8..4b203182d19 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h @@ -107,7 +107,7 @@ private: /// Before operation, will remove constant columns from blocks. And after, place constant columns back. /// (to avoid excessive virtual function calls and because constants cannot be serialized in Native format for temporary files) /// Save original block structure here. - Block sample_block; + Block header; /// Everything below is for external sorting. std::vector> temporary_files; diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp index 0e38a3e5bd7..b02d435b39f 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp @@ -20,9 +20,9 @@ namespace ErrorCodes NativeBlockOutputStream::NativeBlockOutputStream( - WriteBuffer & ostr_, UInt64 client_revision_, + WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, WriteBuffer * index_ostr_, size_t initial_size_of_file_) - : ostr(ostr_), client_revision(client_revision_), + : ostr(ostr_), client_revision(client_revision_), header(header_), index_ostr(index_ostr_), initial_size_of_file(initial_size_of_file_) { if (index_ostr) diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.h b/dbms/src/DataStreams/NativeBlockOutputStream.h index d76cb827863..7e3f14e06ea 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.h +++ b/dbms/src/DataStreams/NativeBlockOutputStream.h @@ -23,9 +23,10 @@ public: /** If non-zero client_revision is specified, additional block information can be written. */ NativeBlockOutputStream( - WriteBuffer & ostr_, UInt64 client_revision_ = 0, + WriteBuffer & ostr_, UInt64 client_revision_, const Block & header_, WriteBuffer * index_ostr_ = nullptr, size_t initial_size_of_file_ = 0); + Block getHeader() const override { return header; } void write(const Block & block) override; void flush() override; @@ -36,7 +37,7 @@ public: private: WriteBuffer & ostr; UInt64 client_revision; - + Block header; WriteBuffer * index_ostr; size_t initial_size_of_file; /// The initial size of the data file, if `append` done. Used for the index. /// If you need to write index, then `ostr` must be a CompressedWriteBuffer. diff --git a/dbms/src/DataStreams/NullBlockOutputStream.h b/dbms/src/DataStreams/NullBlockOutputStream.h index ad0c398629a..3d437527960 100644 --- a/dbms/src/DataStreams/NullBlockOutputStream.h +++ b/dbms/src/DataStreams/NullBlockOutputStream.h @@ -11,7 +11,12 @@ namespace DB class NullBlockOutputStream : public IBlockOutputStream { public: + NullBlockOutputStream(const Block & header) : header(header) {} + Block getHeader() const override { return header; } void write(const Block &) override {} + +private: + Block header; }; } diff --git a/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp b/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp index 60fe4013595..f10ff9b876b 100644 --- a/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp +++ b/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp @@ -16,10 +16,10 @@ namespace ErrorCodes NullableAdapterBlockInputStream::NullableAdapterBlockInputStream( const BlockInputStreamPtr & input, - const Block & in_sample_, const Block & out_sample_) - : header(out_sample_) + const Block & src_header_, const Block & res_header_) + : header(res_header_) { - buildActions(in_sample_, out_sample_); + buildActions(src_header_, res_header_); children.push_back(input); } @@ -83,12 +83,12 @@ Block NullableAdapterBlockInputStream::readImpl() } void NullableAdapterBlockInputStream::buildActions( - const Block & in_sample, - const Block & out_sample) + const Block & src_header, + const Block & res_header) { - size_t in_size = in_sample.columns(); + size_t in_size = src_header.columns(); - if (out_sample.columns() != in_size) + if (res_header.columns() != in_size) throw Exception("Number of columns in INSERT SELECT doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); actions.reserve(in_size); @@ -96,8 +96,8 @@ void NullableAdapterBlockInputStream::buildActions( for (size_t i = 0; i < in_size; ++i) { - const auto & in_elem = in_sample.getByPosition(i); - const auto & out_elem = out_sample.getByPosition(i); + const auto & in_elem = src_header.getByPosition(i); + const auto & out_elem = res_header.getByPosition(i); bool is_in_nullable = in_elem.type->isNullable(); bool is_out_nullable = out_elem.type->isNullable(); diff --git a/dbms/src/DataStreams/NullableAdapterBlockInputStream.h b/dbms/src/DataStreams/NullableAdapterBlockInputStream.h index 47e064ecdf2..ac21f852e3e 100644 --- a/dbms/src/DataStreams/NullableAdapterBlockInputStream.h +++ b/dbms/src/DataStreams/NullableAdapterBlockInputStream.h @@ -18,7 +18,7 @@ namespace DB class NullableAdapterBlockInputStream : public IProfilingBlockInputStream { public: - NullableAdapterBlockInputStream(const BlockInputStreamPtr & input, const Block & in_sample_, const Block & out_sample_); + NullableAdapterBlockInputStream(const BlockInputStreamPtr & input, const Block & src_header_, const Block & res_header_); String getName() const override { return "NullableAdapterBlockInputStream"; } @@ -48,7 +48,7 @@ private: /// which describes the columns from which we fetch data inside an INSERT /// query, and the target sample block which contains the columns /// we insert data into. - void buildActions(const Block & in_sample, const Block & out_sample); + void buildActions(const Block & src_header, const Block & res_header); private: Block header; diff --git a/dbms/src/DataStreams/ODBCDriverBlockOutputStream.cpp b/dbms/src/DataStreams/ODBCDriverBlockOutputStream.cpp index 71161eeb117..da961948907 100644 --- a/dbms/src/DataStreams/ODBCDriverBlockOutputStream.cpp +++ b/dbms/src/DataStreams/ODBCDriverBlockOutputStream.cpp @@ -7,9 +7,8 @@ namespace DB { -ODBCDriverBlockOutputStream::ODBCDriverBlockOutputStream(WriteBuffer & out_, const Block & sample_) - : out(out_) - , sample(sample_) +ODBCDriverBlockOutputStream::ODBCDriverBlockOutputStream(WriteBuffer & out_, const Block & header_) + : out(out_), header(header_) { } @@ -43,7 +42,7 @@ void ODBCDriverBlockOutputStream::write(const Block & block) void ODBCDriverBlockOutputStream::writePrefix() { - const size_t columns = sample.columns(); + const size_t columns = header.columns(); /// Number of columns. writeVarUInt(columns, out); @@ -51,7 +50,7 @@ void ODBCDriverBlockOutputStream::writePrefix() /// Names and types of columns. for (size_t i = 0; i < columns; ++i) { - const ColumnWithTypeAndName & col = sample.getByPosition(i); + const ColumnWithTypeAndName & col = header.getByPosition(i); writeStringBinary(col.name, out); writeStringBinary(col.type->getName(), out); diff --git a/dbms/src/DataStreams/ODBCDriverBlockOutputStream.h b/dbms/src/DataStreams/ODBCDriverBlockOutputStream.h index 09795b72a3a..a40603c356e 100644 --- a/dbms/src/DataStreams/ODBCDriverBlockOutputStream.h +++ b/dbms/src/DataStreams/ODBCDriverBlockOutputStream.h @@ -19,8 +19,9 @@ class WriteBuffer; class ODBCDriverBlockOutputStream : public IBlockOutputStream { public: - ODBCDriverBlockOutputStream(WriteBuffer & out_, const Block & sample_); + ODBCDriverBlockOutputStream(WriteBuffer & out_, const Block & header_); + Block getHeader() const override { return header; } void write(const Block & block) override; void writePrefix() override; @@ -29,7 +30,7 @@ public: private: WriteBuffer & out; - const Block sample; + const Block header; }; } diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp index 8476f3020af..9405cbfd389 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.cpp @@ -100,7 +100,8 @@ Block ParallelAggregatingBlockInputStream::readImpl() ParallelAggregatingBlockInputStream::TemporaryFileStream::TemporaryFileStream(const std::string & path) - : file_in(path), compressed_in(file_in), block_in(std::make_shared(compressed_in, ClickHouseRevision::get())) {} + : file_in(path), compressed_in(file_in), + block_in(std::make_shared(compressed_in, ClickHouseRevision::get())) {} diff --git a/dbms/src/DataStreams/PrettyBlockOutputStream.cpp b/dbms/src/DataStreams/PrettyBlockOutputStream.cpp index 9c556ed22c5..58b61664b7c 100644 --- a/dbms/src/DataStreams/PrettyBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PrettyBlockOutputStream.cpp @@ -17,8 +17,9 @@ namespace ErrorCodes } -PrettyBlockOutputStream::PrettyBlockOutputStream(WriteBuffer & ostr_, bool no_escapes_, size_t max_rows_, const Context & context_) - : ostr(ostr_), max_rows(max_rows_), no_escapes(no_escapes_), context(context_) +PrettyBlockOutputStream::PrettyBlockOutputStream( + WriteBuffer & ostr_, const Block & header_, bool no_escapes_, size_t max_rows_, const Context & context_) + : ostr(ostr_), header(header_), max_rows(max_rows_), no_escapes(no_escapes_), context(context_) { struct winsize w; if (0 == ioctl(STDOUT_FILENO, TIOCGWINSZ, &w)) diff --git a/dbms/src/DataStreams/PrettyBlockOutputStream.h b/dbms/src/DataStreams/PrettyBlockOutputStream.h index 9c6eef51705..7702cd46435 100644 --- a/dbms/src/DataStreams/PrettyBlockOutputStream.h +++ b/dbms/src/DataStreams/PrettyBlockOutputStream.h @@ -17,8 +17,9 @@ class PrettyBlockOutputStream : public IBlockOutputStream { public: /// no_escapes - do not use ANSI escape sequences - to display in the browser, not in the console. - PrettyBlockOutputStream(WriteBuffer & ostr_, bool no_escapes_, size_t max_rows_, const Context & context_); + PrettyBlockOutputStream(WriteBuffer & ostr_, const Block & header_, bool no_escapes_, size_t max_rows_, const Context & context_); + Block getHeader() const override { return header; } void write(const Block & block) override; void writeSuffix() override; @@ -32,6 +33,7 @@ protected: void writeExtremes(); WriteBuffer & ostr; + const Block header; size_t max_rows; size_t total_rows = 0; size_t terminal_width = 0; diff --git a/dbms/src/DataStreams/PrettyCompactBlockOutputStream.h b/dbms/src/DataStreams/PrettyCompactBlockOutputStream.h index 38e6fae71cf..82a3a44f720 100644 --- a/dbms/src/DataStreams/PrettyCompactBlockOutputStream.h +++ b/dbms/src/DataStreams/PrettyCompactBlockOutputStream.h @@ -11,8 +11,8 @@ namespace DB class PrettyCompactBlockOutputStream : public PrettyBlockOutputStream { public: - PrettyCompactBlockOutputStream(WriteBuffer & ostr_, bool no_escapes_, size_t max_rows_, const Context & context_) - : PrettyBlockOutputStream(ostr_, no_escapes_, max_rows_, context_) {} + PrettyCompactBlockOutputStream(WriteBuffer & ostr_, const Block & header_, bool no_escapes_, size_t max_rows_, const Context & context_) + : PrettyBlockOutputStream(ostr_, header_, no_escapes_, max_rows_, context_) {} void write(const Block & block) override; diff --git a/dbms/src/DataStreams/PrettySpaceBlockOutputStream.h b/dbms/src/DataStreams/PrettySpaceBlockOutputStream.h index 2fd78fa883f..6dbd2c748c4 100644 --- a/dbms/src/DataStreams/PrettySpaceBlockOutputStream.h +++ b/dbms/src/DataStreams/PrettySpaceBlockOutputStream.h @@ -11,8 +11,8 @@ namespace DB class PrettySpaceBlockOutputStream : public PrettyBlockOutputStream { public: - PrettySpaceBlockOutputStream(WriteBuffer & ostr_, bool no_escapes_, size_t max_rows_, const Context & context_) - : PrettyBlockOutputStream(ostr_, no_escapes_, max_rows_, context_) {} + PrettySpaceBlockOutputStream(WriteBuffer & ostr_, const Block & header_, bool no_escapes_, size_t max_rows_, const Context & context_) + : PrettyBlockOutputStream(ostr_, header_, no_escapes_, max_rows_, context_) {} void write(const Block & block) override; void writeSuffix() override; diff --git a/dbms/src/DataStreams/ProhibitColumnsBlockOutputStream.cpp b/dbms/src/DataStreams/ProhibitColumnsBlockOutputStream.cpp deleted file mode 100644 index 0ef5d981da1..00000000000 --- a/dbms/src/DataStreams/ProhibitColumnsBlockOutputStream.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; -} - - -void ProhibitColumnsBlockOutputStream::write(const Block & block) -{ - for (const auto & column : columns) - if (block.has(column.name)) - throw Exception{"Cannot insert column " + column.name, ErrorCodes::ILLEGAL_COLUMN}; - - output->write(block); -} - -} diff --git a/dbms/src/DataStreams/ProhibitColumnsBlockOutputStream.h b/dbms/src/DataStreams/ProhibitColumnsBlockOutputStream.h deleted file mode 100644 index 732eece7904..00000000000 --- a/dbms/src/DataStreams/ProhibitColumnsBlockOutputStream.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -/// Throws exception on encountering prohibited column in block -class ProhibitColumnsBlockOutputStream : public IBlockOutputStream -{ -public: - ProhibitColumnsBlockOutputStream(const BlockOutputStreamPtr & output, const NamesAndTypesList & columns) - : output{output}, columns{columns} - { - } - -private: - void write(const Block & block) override; - - void flush() override { output->flush(); } - - void writePrefix() override { output->writePrefix(); } - void writeSuffix() override { output->writeSuffix(); } - - BlockOutputStreamPtr output; - NamesAndTypesList columns; -}; - -} diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 99711fc31e3..621423044ae 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -6,8 +6,8 @@ namespace DB { PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( - String database, String table, StoragePtr storage, - const Context & context_, const ASTPtr & query_ptr_, bool no_destination) + const String & database, const String & table, const StoragePtr & storage, + const Context & context_, const ASTPtr & query_ptr_, bool no_destination) : context(context_), query_ptr(query_ptr_) { /** TODO This is a very important line. At any insertion into the table one of streams should own lock. diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h index 0fe47f677fd..237c4ef73a1 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -19,9 +19,11 @@ class ReplicatedMergeTreeBlockOutputStream; class PushingToViewsBlockOutputStream : public IBlockOutputStream { public: - PushingToViewsBlockOutputStream(String database, String table, StoragePtr storage, + PushingToViewsBlockOutputStream( + const String & database, const String & table, const StoragePtr & storage, const Context & context_, const ASTPtr & query_ptr_, bool no_destination = false); + Block getHeader() const override { return storage->getSampleBlock(); } void write(const Block & block) override; void flush() override diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp index 659f30c465b..e3a861d9dab 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp @@ -19,24 +19,18 @@ namespace ErrorCodes RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, const String & query_, const Settings * settings_) : connection(connection_), query(query_), settings(settings_) { -} - - -void RemoteBlockOutputStream::writePrefix() -{ - /** Send query and receive "sample block", that describe table structure. - * Sample block is needed to know, what structure is required for blocks to be passed to 'write' method. + /** Send query and receive "header", that describe table structure. + * Header is needed to know, what structure is required for blocks to be passed to 'write' method. */ - connection.sendQuery(query, "", QueryProcessingStage::Complete, settings, nullptr); Connection::Packet packet = connection.receivePacket(); if (Protocol::Server::Data == packet.type) { - sample_block = packet.block; + header = packet.block; - if (!sample_block) + if (!header) throw Exception("Logical error: empty block received as table structure", ErrorCodes::LOGICAL_ERROR); } else if (Protocol::Server::Exception == packet.type) @@ -46,23 +40,18 @@ void RemoteBlockOutputStream::writePrefix() } else throw NetException("Unexpected packet from server (expected Data or Exception, got " - + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); + + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); } void RemoteBlockOutputStream::write(const Block & block) { - if (!sample_block) - throw Exception("You must call IBlockOutputStream::writePrefix before IBlockOutputStream::write", ErrorCodes::LOGICAL_ERROR); - - if (!blocksHaveEqualStructure(block, sample_block)) + if (!blocksHaveEqualStructure(block, header)) { std::stringstream message; message << "Block structure is different from table structure.\n" - << "\nTable structure:\n(" << sample_block.dumpStructure() << ")\nBlock structure:\n(" << block.dumpStructure() << ")\n"; - - LOG_ERROR(&Logger::get("RemoteBlockOutputStream"), message.str()); - throw DB::Exception(message.str()); + << "\nTable structure:\n(" << header.dumpStructure() << ")\nBlock structure:\n(" << block.dumpStructure() << ")\n"; + throw Exception(message.str()); } connection.sendData(block); @@ -71,7 +60,7 @@ void RemoteBlockOutputStream::write(const Block & block) void RemoteBlockOutputStream::writePrepared(ReadBuffer & input, size_t size) { - /// We cannot use 'sample_block'. Input must contain block with proper structure. + /// We cannot use 'header'. Input must contain block with proper structure. connection.sendPreparedData(input, size); } diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.h b/dbms/src/DataStreams/RemoteBlockOutputStream.h index 33b3af86754..6f21fcf138f 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.h +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.h @@ -19,14 +19,8 @@ class RemoteBlockOutputStream : public IBlockOutputStream public: RemoteBlockOutputStream(Connection & connection_, const String & query_, const Settings * settings_ = nullptr); + Block getHeader() const override { return header; } - /// You can call this method after 'writePrefix', to get table required structure. (You must send data with that structure). - Block getSampleBlock() const - { - return sample_block; - } - - void writePrefix() override; void write(const Block & block) override; void writeSuffix() override; @@ -37,7 +31,7 @@ private: Connection & connection; String query; const Settings * settings; - Block sample_block; + Block header; }; } diff --git a/dbms/src/DataStreams/SquashingBlockOutputStream.h b/dbms/src/DataStreams/SquashingBlockOutputStream.h index df3cf262fa6..9e660de59f1 100644 --- a/dbms/src/DataStreams/SquashingBlockOutputStream.h +++ b/dbms/src/DataStreams/SquashingBlockOutputStream.h @@ -14,6 +14,7 @@ class SquashingBlockOutputStream : public IBlockOutputStream public: SquashingBlockOutputStream(BlockOutputStreamPtr & dst, size_t min_block_size_rows, size_t min_block_size_bytes); + Block getHeader() const override { return output->getHeader(); } void write(const Block & block) override; void flush() override; diff --git a/dbms/src/DataStreams/tests/block_row_transforms.cpp b/dbms/src/DataStreams/tests/block_row_transforms.cpp index 6e216c59158..5f826542271 100644 --- a/dbms/src/DataStreams/tests/block_row_transforms.cpp +++ b/dbms/src/DataStreams/tests/block_row_transforms.cpp @@ -44,7 +44,7 @@ try RowInputStreamPtr row_input = std::make_shared(in_buf, sample); BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, 0); RowOutputStreamPtr row_output = std::make_shared(out_buf, sample); - BlockOutputStreamFromRowOutputStream block_output(row_output); + BlockOutputStreamFromRowOutputStream block_output(row_output, sample); copyData(block_input, block_output); } diff --git a/dbms/src/DataStreams/tests/expression_stream.cpp b/dbms/src/DataStreams/tests/expression_stream.cpp index f52a8f9c105..53d79634c80 100644 --- a/dbms/src/DataStreams/tests/expression_stream.cpp +++ b/dbms/src/DataStreams/tests/expression_stream.cpp @@ -56,7 +56,7 @@ try WriteBufferFromOStream out1(std::cout); RowOutputStreamPtr out2 = std::make_shared(out1, expression->getSampleBlock()); - BlockOutputStreamFromRowOutputStream out(out2); + BlockOutputStreamFromRowOutputStream out(out2, expression->getSampleBlock()); { Stopwatch stopwatch; diff --git a/dbms/src/DataStreams/tests/filter_stream.cpp b/dbms/src/DataStreams/tests/filter_stream.cpp index 1f5549204b0..da3b2f7d5ad 100644 --- a/dbms/src/DataStreams/tests/filter_stream.cpp +++ b/dbms/src/DataStreams/tests/filter_stream.cpp @@ -61,7 +61,7 @@ try WriteBufferFromOStream ob(std::cout); RowOutputStreamPtr out_ = std::make_shared(ob, expression->getSampleBlock()); - BlockOutputStreamFromRowOutputStream out(out_); + BlockOutputStreamFromRowOutputStream out(out_, expression->getSampleBlock()); { diff --git a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp index 4561ffde4be..bbfd65b1f93 100644 --- a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp +++ b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp @@ -134,7 +134,7 @@ int main(int, char **) WriteBufferFromOStream ob(std::cout); RowOutputStreamPtr out_ = std::make_shared(ob, expression->getSampleBlock()); - BlockOutputStreamFromRowOutputStream out(out_); + BlockOutputStreamFromRowOutputStream out(out_, in->getHeader()); copyData(*in, out); } diff --git a/dbms/src/DataStreams/tests/native_streams.cpp b/dbms/src/DataStreams/tests/native_streams.cpp index d06d8aa3714..bd0a998f88e 100644 --- a/dbms/src/DataStreams/tests/native_streams.cpp +++ b/dbms/src/DataStreams/tests/native_streams.cpp @@ -106,7 +106,7 @@ try BlockInputStreamPtr in = table->read(column_names, {}, Context::createGlobal(), stage, 8192, 1)[0]; WriteBufferFromFileDescriptor out1(STDOUT_FILENO); CompressedWriteBuffer out2(out1); - NativeBlockOutputStream out3(out2, ClickHouseRevision::get()); + NativeBlockOutputStream out3(out2, ClickHouseRevision::get(), in->getHeader()); copyData(*in, out3); } diff --git a/dbms/src/DataStreams/tests/sorting_stream.cpp b/dbms/src/DataStreams/tests/sorting_stream.cpp index d3a32f4adf1..cd6fe515a53 100644 --- a/dbms/src/DataStreams/tests/sorting_stream.cpp +++ b/dbms/src/DataStreams/tests/sorting_stream.cpp @@ -152,7 +152,7 @@ try WriteBufferFromOStream ob(std::cout); RowOutputStreamPtr out_ = std::make_shared(ob, sample); - BlockOutputStreamFromRowOutputStream out(out_); + BlockOutputStreamFromRowOutputStream out(out_, sample); copyData(*in, out); diff --git a/dbms/src/DataStreams/tests/tab_separated_streams.cpp b/dbms/src/DataStreams/tests/tab_separated_streams.cpp index 2cc6abf9835..c765135484b 100644 --- a/dbms/src/DataStreams/tests/tab_separated_streams.cpp +++ b/dbms/src/DataStreams/tests/tab_separated_streams.cpp @@ -38,7 +38,7 @@ try RowOutputStreamPtr row_output = std::make_shared(out_buf, sample); BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, 0); - BlockOutputStreamFromRowOutputStream block_output(row_output); + BlockOutputStreamFromRowOutputStream block_output(row_output, sample); copyData(block_input, block_output); return 0; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index df1fb72584c..7bc3f7df644 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -840,7 +840,7 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants) const std::string & path = file->path(); WriteBufferFromFile file_buf(path); CompressedWriteBuffer compressed_buf(file_buf); - NativeBlockOutputStream block_out(compressed_buf, ClickHouseRevision::get()); + NativeBlockOutputStream block_out(compressed_buf, ClickHouseRevision::get(), getHeader(false)); LOG_DEBUG(log, "Writing part of aggregation data into temporary file " << path << "."); ProfileEvents::increment(ProfileEvents::ExternalAggregationWritePart); diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index aa4bf23fc20..88c12f17364 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -10,17 +10,12 @@ #include #include -#include -#include -#include -#include -#include - #include #include #include #include #include +#include #include #include #include @@ -33,6 +28,7 @@ #include #include #include +#include #include #include @@ -43,8 +39,10 @@ #include + namespace DB { + namespace ErrorCodes { extern const int DIRECTORY_DOESNT_EXIST; @@ -474,13 +472,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (create.select && (create.is_view || create.is_materialized_view)) create.select->setDatabaseIfNeeded(current_database); - std::unique_ptr interpreter_select; Block as_select_sample; if (create.select && (!create.attach || !create.columns)) - { - interpreter_select = std::make_unique(create.select->clone(), context); - as_select_sample = interpreter_select->getSampleBlock(); - } + as_select_sample = InterpreterSelectQuery::getSampleBlock(create.select->clone(), context); String as_database_name = create.as_database.empty() ? current_database : create.as_database; String as_table_name = create.as_table; @@ -554,28 +548,13 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (create.select && !create.attach && !create.is_view && (!create.is_materialized_view || create.is_populate)) { - auto table_lock = res->lockStructure(true, __PRETTY_FUNCTION__); + auto insert = std::make_shared(); - /// Also see InterpreterInsertQuery. - BlockOutputStreamPtr out; + insert->database = database_name; + insert->table = table_name; + insert->select = create.select->clone(); - out = std::make_shared( - create.database, create.table, res, create.is_temporary ? context.getSessionContext() : context, query_ptr); - - out = std::make_shared(out); - - /// @note shouldn't these two contexts be session contexts in case of temporary table? - bool strict_insert_defaults = static_cast(context.getSettingsRef().strict_insert_defaults); - out = std::make_shared( - out, columns.columns, columns.column_defaults, context, strict_insert_defaults); - - if (!context.getSettingsRef().insert_allow_materialized_columns) - out = std::make_shared(out, columns.materialized_columns); - - BlockIO io; - io.in = std::make_shared(interpreter_select->execute().in, out); - - return io; + return InterpreterInsertQuery(insert, context, context.getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 63eea9542b8..3091457d384 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -25,7 +24,7 @@ namespace ProfileEvents { -extern const Event InsertQuery; + extern const Event InsertQuery; } namespace DB @@ -34,6 +33,7 @@ namespace ErrorCodes { extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int READONLY; + extern const int ILLEGAL_COLUMN; } @@ -45,10 +45,8 @@ InterpreterInsertQuery::InterpreterInsertQuery( } -StoragePtr InterpreterInsertQuery::loadTable() +StoragePtr InterpreterInsertQuery::getTable(const ASTInsertQuery & query) { - ASTInsertQuery & query = typeid_cast(*query_ptr); - if (query.table_function) { auto table_function = typeid_cast(query.table_function.get()); @@ -60,23 +58,15 @@ StoragePtr InterpreterInsertQuery::loadTable() return context.getTable(query.database, query.table); } -StoragePtr InterpreterInsertQuery::getTable() +Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) { - if (!cached_table) - cached_table = loadTable(); - - return cached_table; -} - -Block InterpreterInsertQuery::getSampleBlock() -{ - ASTInsertQuery & query = typeid_cast(*query_ptr); + Block table_sample_non_materialized = table->getSampleBlockNonMaterialized(); /// If the query does not include information about columns if (!query.columns) - return getTable()->getSampleBlockNonMaterialized(); + return table_sample_non_materialized; - Block table_sample = getTable()->getSampleBlock(); + Block table_sample = table->getSampleBlock(); /// Form the block based on the column names from the query Block res; @@ -88,13 +78,11 @@ Block InterpreterInsertQuery::getSampleBlock() if (!table_sample.has(current_name)) throw Exception("No such column " + current_name + " in table " + query.table, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - ColumnWithTypeAndName col; - col.name = current_name; - col.type = table_sample.getByName(current_name).type; - col.column = col.type->createColumn(); - res.insert(std::move(col)); - } + if (!allow_materialized && !table_sample_non_materialized.has(current_name)) + throw Exception("Cannot insert column " + current_name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); + res.insert(ColumnWithTypeAndName(table_sample.getByName(current_name).type, current_name)); + } return res; } @@ -103,7 +91,7 @@ BlockIO InterpreterInsertQuery::execute() { ASTInsertQuery & query = typeid_cast(*query_ptr); checkAccess(query); - StoragePtr table = getTable(); + StoragePtr table = getTable(query); auto table_lock = table->lockStructure(true, __PRETTY_FUNCTION__); @@ -114,13 +102,11 @@ BlockIO InterpreterInsertQuery::execute() out = std::make_shared(query.database, query.table, table, context, query_ptr, query.no_destination); - out = std::make_shared(out); + out = std::make_shared(out, table->getSampleBlock()); out = std::make_shared( - out, required_columns, table->column_defaults, context, static_cast(context.getSettingsRef().strict_insert_defaults)); - - if (!allow_materialized) - out = std::make_shared(out, table->materialized_columns); + out, getSampleBlock(query, table), required_columns, table->column_defaults, context, + static_cast(context.getSettingsRef().strict_insert_defaults)); out = std::make_shared( out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes); @@ -130,12 +116,11 @@ BlockIO InterpreterInsertQuery::execute() out = std::move(out_wrapper); BlockIO res; - res.out_sample = getSampleBlock(); /// What type of query: INSERT or INSERT SELECT? if (!query.select) { - res.out = out; + res.out = std::move(out); } else { @@ -143,14 +128,23 @@ BlockIO InterpreterInsertQuery::execute() res.in = interpreter_select.execute().in; - res.in = std::make_shared(res.in, res.in->getHeader(), res.out_sample); - res.in = std::make_shared(context, res.in, res.out_sample); + res.in = std::make_shared(res.in, res.in->getHeader(), res.out->getHeader()); + res.in = std::make_shared(context, res.in, res.out->getHeader()); res.in = std::make_shared(res.in, out); + + if (!allow_materialized) + { + Block in_header = res.in->getHeader(); + for (const auto & name_type : table->materialized_columns) + if (in_header.has(name_type.name)) + throw Exception("Cannot insert column " + name_type.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); + } } return res; } + void InterpreterInsertQuery::checkAccess(const ASTInsertQuery & query) { const Settings & settings = context.getSettingsRef(); @@ -163,4 +157,5 @@ void InterpreterInsertQuery::checkAccess(const ASTInsertQuery & query) throw Exception("Cannot insert into table in readonly mode", ErrorCodes::READONLY); } + } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.h b/dbms/src/Interpreters/InterpreterInsertQuery.h index 9bdc5cfcaba..2180ebe0550 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.h +++ b/dbms/src/Interpreters/InterpreterInsertQuery.h @@ -25,14 +25,8 @@ public: BlockIO execute() override; private: - /// Cache storage to avoid double table function call. - StoragePtr cached_table; - StoragePtr loadTable(); - - StoragePtr getTable(); - - Block getSampleBlock(); - + StoragePtr getTable(const ASTInsertQuery & query); + Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table); void checkAccess(const ASTInsertQuery & query); ASTPtr query_ptr; diff --git a/dbms/src/Interpreters/tests/expression.cpp b/dbms/src/Interpreters/tests/expression.cpp index 0b2ead8bd52..734f89623ea 100644 --- a/dbms/src/Interpreters/tests/expression.cpp +++ b/dbms/src/Interpreters/tests/expression.cpp @@ -125,7 +125,7 @@ int main(int argc, char ** argv) LimitBlockInputStream lis(is, 20, std::max(0, static_cast(n) - 20)); WriteBufferFromOStream out_buf(std::cout); RowOutputStreamPtr os_ = std::make_shared(out_buf, block); - BlockOutputStreamFromRowOutputStream os(os_); + BlockOutputStreamFromRowOutputStream os(os_, is->getHeader()); copyData(lis, os); } diff --git a/dbms/src/Server/ClusterCopier.cpp b/dbms/src/Server/ClusterCopier.cpp index 02dbf7367c2..96b13012c18 100644 --- a/dbms/src/Server/ClusterCopier.cpp +++ b/dbms/src/Server/ClusterCopier.cpp @@ -1520,7 +1520,7 @@ protected: try { RemoteBlockInputStream stream(*connection, query, {}, context, ¤t_settings); - NullBlockOutputStream output; + NullBlockOutputStream output({}); copyData(stream, output); if (increment_and_check_exit()) diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp index e27d4f088e0..345cf118a5b 100644 --- a/dbms/src/Server/TCPHandler.cpp +++ b/dbms/src/Server/TCPHandler.cpp @@ -288,7 +288,7 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) state.io.out->writePrefix(); /// Send block to the client - table structure. - Block block = state.io.out_sample; + Block block = state.io.out->getHeader(); sendData(block); readData(global_settings); @@ -417,7 +417,7 @@ void TCPHandler::sendTotals() if (totals) { - initBlockOutput(); + initBlockOutput(totals); writeVarUInt(Protocol::Server::Totals, *out); writeStringBinary("", *out); @@ -438,7 +438,7 @@ void TCPHandler::sendExtremes() if (extremes) { - initBlockOutput(); + initBlockOutput(extremes); writeVarUInt(Protocol::Server::Extremes, *out); writeStringBinary("", *out); @@ -662,7 +662,7 @@ void TCPHandler::initBlockInput() } -void TCPHandler::initBlockOutput() +void TCPHandler::initBlockOutput(const Block & block) { if (!state.block_out) { @@ -674,7 +674,8 @@ void TCPHandler::initBlockOutput() state.block_out = std::make_shared( *state.maybe_compressed_out, - client_revision); + client_revision, + block.cloneEmpty()); } } @@ -715,7 +716,7 @@ bool TCPHandler::isQueryCancelled() void TCPHandler::sendData(const Block & block) { - initBlockOutput(); + initBlockOutput(block); writeVarUInt(Protocol::Server::Data, *out); writeStringBinary("", *out); diff --git a/dbms/src/Server/TCPHandler.h b/dbms/src/Server/TCPHandler.h index 93b82acd7ea..444d8eb4990 100644 --- a/dbms/src/Server/TCPHandler.h +++ b/dbms/src/Server/TCPHandler.h @@ -140,7 +140,7 @@ private: /// Creates state.block_in/block_out for blocks read/write, depending on whether compression is enabled. void initBlockInput(); - void initBlockOutput(); + void initBlockOutput(const Block & block); bool isQueryCancelled(); diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 2d2a5c45434..5534ee39eed 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -33,6 +33,7 @@ #include #include + namespace CurrentMetrics { extern const Metric DistributedSend; @@ -53,14 +54,20 @@ namespace ErrorCodes } -DistributedBlockOutputStream::DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_, - const Settings & settings_, bool insert_sync_, UInt64 insert_timeout_) - : storage(storage), query_ast(query_ast), cluster(cluster_), settings(settings_), insert_sync(insert_sync_), - insert_timeout(insert_timeout_) +DistributedBlockOutputStream::DistributedBlockOutputStream( + StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_, + const Settings & settings_, bool insert_sync_, UInt64 insert_timeout_) + : storage(storage), query_ast(query_ast), cluster(cluster_), settings(settings_), insert_sync(insert_sync_), insert_timeout(insert_timeout_) { } +Block DistributedBlockOutputStream::getHeader() const +{ + return storage.getSampleBlock(); +} + + void DistributedBlockOutputStream::writePrefix() { deadline = std::chrono::steady_clock::now() + std::chrono::seconds(insert_timeout); @@ -469,7 +476,7 @@ void DistributedBlockOutputStream::writeToShard(const Block & block, const std:: WriteBufferFromFile out{block_file_tmp_path}; CompressedWriteBuffer compress{out}; - NativeBlockOutputStream stream{compress, ClickHouseRevision::get()}; + NativeBlockOutputStream stream{compress, ClickHouseRevision::get(), block.cloneEmpty()}; writeStringBinary(query_string, out); diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h index de8dc12649b..5cddd1ec92e 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h @@ -35,8 +35,8 @@ public: DistributedBlockOutputStream(StorageDistributed & storage, const ASTPtr & query_ast, const ClusterPtr & cluster_, const Settings & settings_, bool insert_sync_, UInt64 insert_timeout_); + Block getHeader() const override; void write(const Block & block) override; - void writePrefix() override; private: diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index 46b5f470439..af0a207bafc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -6,6 +6,12 @@ namespace DB { +Block MergeTreeBlockOutputStream::getHeader() const +{ + return storage.getSampleBlock(); +} + + void MergeTreeBlockOutputStream::write(const Block & block) { storage.data.delayInsertIfNeeded(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.h index 1ecf621f0f9..64243b6e7bf 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockOutputStream.h @@ -16,6 +16,7 @@ public: MergeTreeBlockOutputStream(StorageMergeTree & storage_) : storage(storage_) {} + Block getHeader() const override; void write(const Block & block) override; private: diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 3d95b9408a0..2e259d2467a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1221,7 +1221,7 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart( * temporary column name ('converting_column_name') created in 'createConvertExpression' method * will have old name of shared offsets for arrays. */ - MergedColumnOnlyOutputStream out(*this, full_path + part->name + '/', true /* sync */, compression_settings, true /* skip_offsets */); + MergedColumnOnlyOutputStream out(*this, in.getHeader(), full_path + part->name + '/', true /* sync */, compression_settings, true /* skip_offsets */); in.readPrefix(); out.writePrefix(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index 0bc73afc690..e2cbdc10980 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -736,7 +736,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart rows_sources_read_buf.seek(0, 0); ColumnGathererStream column_gathered_stream(column_name, column_part_streams, rows_sources_read_buf); - MergedColumnOnlyOutputStream column_to(data, new_part_tmp_path, false, compression_settings, offset_written); + MergedColumnOnlyOutputStream column_to(data, column_gathered_stream.getHeader(), new_part_tmp_path, false, compression_settings, offset_written); size_t column_elems_written = 0; column_to.writePrefix(); diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 9c680b814d5..f2acaa787b4 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -465,12 +465,12 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm /// Implementation of MergedColumnOnlyOutputStream. MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( - MergeTreeData & storage_, String part_path_, bool sync_, CompressionSettings compression_settings, bool skip_offsets_) + MergeTreeData & storage_, const Block & header_, String part_path_, bool sync_, CompressionSettings compression_settings, bool skip_offsets_) : IMergedBlockOutputStream( storage_, storage_.context.getSettings().min_compress_block_size, storage_.context.getSettings().max_compress_block_size, compression_settings, storage_.context.getSettings().min_bytes_to_use_direct_io), - part_path(part_path_), sync(sync_), skip_offsets(skip_offsets_) + header(header_), part_path(part_path_), sync(sync_), skip_offsets(skip_offsets_) { } diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h index 60196c3ecdd..4b83f959991 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -105,6 +105,8 @@ public: std::string getPartPath() const; + Block getHeader() const override { return storage.getSampleBlock(); } + /// If the data is pre-sorted. void write(const Block & block) override; @@ -149,13 +151,15 @@ class MergedColumnOnlyOutputStream final : public IMergedBlockOutputStream { public: MergedColumnOnlyOutputStream( - MergeTreeData & storage_, String part_path_, bool sync_, CompressionSettings compression_settings, bool skip_offsets_); + MergeTreeData & storage_, const Block & header_, String part_path_, bool sync_, CompressionSettings compression_settings, bool skip_offsets_); + Block getHeader() const override { return header; } void write(const Block & block) override; void writeSuffix() override; MergeTreeData::DataPart::Checksums writeSuffixAndGetChecksums(); private: + Block header; String part_path; bool initialized = false; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index a36a400aea5..e34f0f68b6d 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -39,6 +39,12 @@ ReplicatedMergeTreeBlockOutputStream::ReplicatedMergeTreeBlockOutputStream( } +Block ReplicatedMergeTreeBlockOutputStream::getHeader() const +{ + return storage.getSampleBlock(); +} + + /// Allow to verify that the session in ZooKeeper is still alive. static void assertSessionIsNotExpired(zkutil::ZooKeeperPtr & zookeeper) { diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h index 12d8f97a1d7..29ca8657038 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h @@ -25,6 +25,7 @@ public: ReplicatedMergeTreeBlockOutputStream(StorageReplicatedMergeTree & storage_, size_t quorum_, size_t quorum_timeout_ms_, bool deduplicate_); + Block getHeader() const override; void write(const Block & block) override; /// For ATTACHing existing data on filesystem. diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index ec4f7b498da..be6922997d1 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -208,6 +208,8 @@ class BufferBlockOutputStream : public IBlockOutputStream public: explicit BufferBlockOutputStream(StorageBuffer & storage_) : storage(storage_) {} + Block getHeader() const override { return storage.getSampleBlock(); } + void write(const Block & block) override { if (!block) diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index 1bbe9d6d00c..78fb3990978 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -184,7 +184,6 @@ BlockInputStreams StorageFile::read( class StorageFileBlockOutputStream : public IBlockOutputStream { public: - explicit StorageFileBlockOutputStream(StorageFile & storage_) : storage(storage_), lock(storage.rwlock) { @@ -205,6 +204,8 @@ public: writer = FormatFactory().getOutput(storage.format_name, *write_buf, storage.getSampleBlock(), storage.context_global); } + Block getHeader() const override { return storage.getSampleBlock(); } + void write(const Block & block) override { writer->write(block); diff --git a/dbms/src/Storages/StorageLog.cpp b/dbms/src/Storages/StorageLog.cpp index 443510fea42..711d48621f0 100644 --- a/dbms/src/Storages/StorageLog.cpp +++ b/dbms/src/Storages/StorageLog.cpp @@ -127,6 +127,7 @@ public: } } + Block getHeader() const override { return storage.getSampleBlock(); } void write(const Block & block) override; void writeSuffix() override; diff --git a/dbms/src/Storages/StorageMemory.cpp b/dbms/src/Storages/StorageMemory.cpp index 89ce474b065..96af6a9a138 100644 --- a/dbms/src/Storages/StorageMemory.cpp +++ b/dbms/src/Storages/StorageMemory.cpp @@ -61,6 +61,8 @@ class MemoryBlockOutputStream : public IBlockOutputStream public: explicit MemoryBlockOutputStream(StorageMemory & storage_) : storage(storage_) {} + Block getHeader() const override { return storage.getSampleBlock(); } + void write(const Block & block) override { storage.check(block, true); diff --git a/dbms/src/Storages/StorageNull.h b/dbms/src/Storages/StorageNull.h index e413ff2d930..ef3f0f6fcd1 100644 --- a/dbms/src/Storages/StorageNull.h +++ b/dbms/src/Storages/StorageNull.h @@ -33,7 +33,7 @@ public: BlockOutputStreamPtr write(const ASTPtr &, const Settings &) override { - return std::make_shared(); + return std::make_shared(getSampleBlock()); } void rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & new_table_name) override diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 14cc102f609..de3e1f5886c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3214,7 +3214,7 @@ void StorageReplicatedMergeTree::sendRequestToLeaderReplica(const ASTPtr & query "", "", timeouts, "ClickHouse replica"); RemoteBlockInputStream stream(connection, formattedAST(new_query), {}, context, &settings); - NullBlockOutputStream output; + NullBlockOutputStream output({}); copyData(stream, output); return; diff --git a/dbms/src/Storages/StorageSet.cpp b/dbms/src/Storages/StorageSet.cpp index 498e475a465..bdbfca46d57 100644 --- a/dbms/src/Storages/StorageSet.cpp +++ b/dbms/src/Storages/StorageSet.cpp @@ -33,6 +33,7 @@ public: SetOrJoinBlockOutputStream(StorageSetOrJoinBase & table_, const String & backup_path_, const String & backup_tmp_path_, const String & backup_file_name_); + Block getHeader() const override { return table.getSampleBlock(); } void write(const Block & block) override; void writeSuffix() override; @@ -54,7 +55,7 @@ SetOrJoinBlockOutputStream::SetOrJoinBlockOutputStream(StorageSetOrJoinBase & ta backup_file_name(backup_file_name_), backup_buf(backup_tmp_path + backup_file_name), compressed_backup_buf(backup_buf), - backup_stream(compressed_backup_buf) + backup_stream(compressed_backup_buf, 0, table.getSampleBlock()) { } diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp index 1d2d31e27bb..570dd3f9ea4 100644 --- a/dbms/src/Storages/StorageStripeLog.cpp +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -136,7 +136,7 @@ public: data_out(data_out_compressed, CompressionSettings(CompressionMethod::LZ4), storage.max_compress_block_size), index_out_compressed(storage.full_path() + "index.mrk", INDEX_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT), index_out(index_out_compressed), - block_out(data_out, 0, &index_out, Poco::File(storage.full_path() + "data.bin").getSize()) + block_out(data_out, 0, storage.getSampleBlock(), &index_out, Poco::File(storage.full_path() + "data.bin").getSize()) { } @@ -152,6 +152,8 @@ public: } } + Block getHeader() const override { return storage.getSampleBlock(); } + void write(const Block & block) override { block_out.write(block); diff --git a/dbms/src/Storages/StorageTinyLog.cpp b/dbms/src/Storages/StorageTinyLog.cpp index 957d1ca9aff..4ed7d6dfff2 100644 --- a/dbms/src/Storages/StorageTinyLog.cpp +++ b/dbms/src/Storages/StorageTinyLog.cpp @@ -118,6 +118,8 @@ public: } } + Block getHeader() const override { return storage.getSampleBlock(); } + void write(const Block & block) override; void writeSuffix() override; diff --git a/dbms/src/Storages/tests/hit_log.cpp b/dbms/src/Storages/tests/hit_log.cpp index bd1777db18f..3dd75206a2b 100644 --- a/dbms/src/Storages/tests/hit_log.cpp +++ b/dbms/src/Storages/tests/hit_log.cpp @@ -134,7 +134,7 @@ try BlockInputStreamPtr in = table->read(column_names, {}, Context::createGlobal(), stage, 8192, 1)[0]; RowOutputStreamPtr out_ = std::make_shared(out_buf, sample); - BlockOutputStreamFromRowOutputStream out(out_); + BlockOutputStreamFromRowOutputStream out(out_, sample); copyData(*in, out); } diff --git a/dbms/src/Storages/tests/storage_log.cpp b/dbms/src/Storages/tests/storage_log.cpp index 70c73d8c0b5..6d9cb5d0def 100644 --- a/dbms/src/Storages/tests/storage_log.cpp +++ b/dbms/src/Storages/tests/storage_log.cpp @@ -93,7 +93,7 @@ try LimitBlockInputStream in_limit(in, 10, 0); RowOutputStreamPtr output_ = std::make_shared(out_buf, sample); - BlockOutputStreamFromRowOutputStream output(output_); + BlockOutputStreamFromRowOutputStream output(output_, sample); copyData(in_limit, output); } diff --git a/dbms/src/Storages/tests/system_numbers.cpp b/dbms/src/Storages/tests/system_numbers.cpp index d2d0f9785b2..93e31939555 100644 --- a/dbms/src/Storages/tests/system_numbers.cpp +++ b/dbms/src/Storages/tests/system_numbers.cpp @@ -31,7 +31,7 @@ try LimitBlockInputStream input(table->read(column_names, {}, Context::createGlobal(), stage, 10, 1)[0], 10, 96); RowOutputStreamPtr output_ = std::make_shared(out_buf, sample); - BlockOutputStreamFromRowOutputStream output(output_); + BlockOutputStreamFromRowOutputStream output(output_, sample); copyData(input, output); From cd7a4de4122db20c41ac620c6c3c66206f7ce2c7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 06:00:16 +0300 Subject: [PATCH 006/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 6 ++++-- dbms/src/Interpreters/InterpreterInsertQuery.cpp | 15 ++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 88c12f17364..58060b6d2c6 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -550,11 +550,13 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { auto insert = std::make_shared(); - insert->database = database_name; + if (!create.is_temporary) + insert->database = database_name; + insert->table = table_name; insert->select = create.select->clone(); - return InterpreterInsertQuery(insert, context, context.getSettingsRef().insert_allow_materialized_columns).execute(); + return InterpreterInsertQuery(insert, context.getSessionContext(), context.getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 3091457d384..60cd5ea70cb 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -22,6 +22,7 @@ #include #include + namespace ProfileEvents { extern const Event InsertQuery; @@ -29,6 +30,7 @@ namespace ProfileEvents namespace DB { + namespace ErrorCodes { extern const int NO_SUCH_COLUMN_IN_TABLE; @@ -54,7 +56,7 @@ StoragePtr InterpreterInsertQuery::getTable(const ASTInsertQuery & query) return factory.get(table_function->name, context)->execute(query.table_function, context); } - /// In what table to write. + /// Into what table to write. return context.getTable(query.database, query.table); } @@ -116,13 +118,10 @@ BlockIO InterpreterInsertQuery::execute() out = std::move(out_wrapper); BlockIO res; + res.out = std::move(out); /// What type of query: INSERT or INSERT SELECT? - if (!query.select) - { - res.out = std::move(out); - } - else + if (query.select) { InterpreterSelectQuery interpreter_select{query.select, context}; @@ -130,7 +129,9 @@ BlockIO InterpreterInsertQuery::execute() res.in = std::make_shared(res.in, res.in->getHeader(), res.out->getHeader()); res.in = std::make_shared(context, res.in, res.out->getHeader()); - res.in = std::make_shared(res.in, out); + res.in = std::make_shared(res.in, res.out); + + res.out = nullptr; if (!allow_materialized) { From 74c56c2def6cea88f1600ec02d5bbeb170dfced2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 06:56:08 +0300 Subject: [PATCH 007/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- .../IProfilingBlockInputStream.cpp | 43 +++++++++++++++++++ .../MergeTreeBaseBlockInputStream.cpp | 24 +++++++---- .../MergeTree/MergeTreeBaseBlockInputStream.h | 2 +- .../MergeTree/MergeTreeBlockInputStream.cpp | 4 +- .../MergeTreeThreadBlockInputStream.cpp | 4 +- .../tests/queries/0_stateless/00294_enums.sql | 16 +++---- 6 files changed, 73 insertions(+), 20 deletions(-) diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index eb3715044e1..3878e452c11 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -4,6 +4,8 @@ #include #include #include +#include + namespace DB { @@ -15,6 +17,38 @@ namespace ErrorCodes extern const int TIMEOUT_EXCEEDED; extern const int TOO_SLOW; extern const int LOGICAL_ERROR; + extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE; +} + + +static void checkBlockStructure(const Block & block, const Block & header) [[maybe_unused]] +{ + size_t columns = header.columns(); + if (block.columns() != columns) + throw Exception("Block structure mismatch: different number of columns:\n" + + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + + for (size_t i = 0; i < columns; ++i) + { + const auto & expected = header.getByPosition(i); + const auto & actual = block.getByPosition(i); + + if (actual.name != expected.name) + throw Exception("Block structure mismatch: different names of columns:\n" + + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + + if (!actual.type->equals(*expected.type)) + throw Exception("Block structure mismatch: different types:\n" + + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + + if (actual.column->getName() != expected.column->getName()) + throw Exception("Block structure mismatch: different columns:\n" + + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + + if (actual.column->isColumnConst() && expected.column->isColumnConst() + && static_cast(*actual.column).getField() != static_cast(*expected.column).getField()) + throw Exception("Block structure mismatch: different values of constants", ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + } } @@ -70,6 +104,15 @@ Block IProfilingBlockInputStream::read() progress(Progress(res.rows(), res.bytes())); +#ifndef NDEBUG + if (res) + { + Block header = getHeader(); + if (header) + checkBlockStructure(res, header); + } +#endif + return res; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp index bf1e9a6399d..8076c54a0bb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp @@ -470,7 +470,7 @@ Block MergeTreeBaseBlockInputStream::readFromPart() } -void MergeTreeBaseBlockInputStream::injectVirtualColumns(Block & block) +void MergeTreeBaseBlockInputStream::injectVirtualColumns(Block & block) const { const auto rows = block.rows(); @@ -482,17 +482,23 @@ void MergeTreeBaseBlockInputStream::injectVirtualColumns(Block & block) { if (virt_column_name == "_part") { - block.insert(ColumnWithTypeAndName{ - DataTypeString().createColumnConst(rows, task->data_part->name)->convertToFullColumnIfConst(), - std::make_shared(), - virt_column_name}); + ColumnPtr column; + if (rows) + column = DataTypeString().createColumnConst(rows, task->data_part->name)->convertToFullColumnIfConst(); + else + column = DataTypeString().createColumn(); + + block.insert({ column, std::make_shared(), virt_column_name}); } else if (virt_column_name == "_part_index") { - block.insert(ColumnWithTypeAndName{ - DataTypeUInt64().createColumnConst(rows, static_cast(task->part_index_in_query))->convertToFullColumnIfConst(), - std::make_shared(), - virt_column_name}); + ColumnPtr column; + if (rows) + column = DataTypeUInt64().createColumnConst(rows, static_cast(task->part_index_in_query))->convertToFullColumnIfConst(); + else + column = DataTypeUInt64().createColumn(); + + block.insert({ column, std::make_shared(), virt_column_name}); } } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.h index 024f39a619c..6586027f9b6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.h @@ -42,7 +42,7 @@ protected: Block readFromPart(); - void injectVirtualColumns(Block & block); + void injectVirtualColumns(Block & block) const; protected: MergeTreeData & storage; diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp index fcfb00084cf..dd0335c81a0 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp @@ -65,7 +65,9 @@ MergeTreeBlockInputStream::MergeTreeBlockInputStream( Block MergeTreeBlockInputStream::getHeader() const { - return storage.getSampleBlockForColumns(ordered_names); + Block res = storage.getSampleBlockForColumns(ordered_names); + injectVirtualColumns(res); + return res; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeThreadBlockInputStream.cpp index ec961f28a99..2a749513154 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeThreadBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeThreadBlockInputStream.cpp @@ -40,7 +40,9 @@ MergeTreeThreadBlockInputStream::MergeTreeThreadBlockInputStream( Block MergeTreeThreadBlockInputStream::getHeader() const { - return pool->getHeader(); + auto res = pool->getHeader(); + injectVirtualColumns(res); + return res; }; diff --git a/dbms/tests/queries/0_stateless/00294_enums.sql b/dbms/tests/queries/0_stateless/00294_enums.sql index 13c8302fbd4..24a75126990 100644 --- a/dbms/tests/queries/0_stateless/00294_enums.sql +++ b/dbms/tests/queries/0_stateless/00294_enums.sql @@ -2,9 +2,9 @@ set max_threads = 1; drop table if exists test.enums; create table test.enums ( - d Date default '2015-12-29', k default 0, - e Enum8('world' = 2, 'hello' = 1), sign Enum8('minus' = -1, 'plus' = 1), - letter Enum16('a' = 0, 'b' = 1, 'c' = 2, '*' = -256) + d Date default '2015-12-29', k default 0, + e Enum8('world' = 2, 'hello' = 1), sign Enum8('minus' = -1, 'plus' = 1), + letter Enum16('a' = 0, 'b' = 1, 'c' = 2, '*' = -256) ) engine = MergeTree(d, k, 1); desc table test.enums; @@ -21,16 +21,16 @@ select * from test.enums ORDER BY _part; -- expand `e` and `sign` from Enum8 to Enum16 without changing values, change values of `letter` without changing type alter table test.enums - modify column e Enum16('world' = 2, 'hello' = 1, '!' = 3), - modify column sign Enum16('minus' = -1, 'plus' = 1), - modify column letter Enum16('a' = 0, 'b' = 1, 'c' = 2, 'no letter' = -256); + modify column e Enum16('world' = 2, 'hello' = 1, '!' = 3), + modify column sign Enum16('minus' = -1, 'plus' = 1), + modify column letter Enum16('a' = 0, 'b' = 1, 'c' = 2, 'no letter' = -256); desc table test.enums; select * from test.enums ORDER BY _part; alter table test.enums - modify column e Enum8('world' = 2, 'hello' = 1, '!' = 3), - modify column sign Enum8('minus' = -1, 'plus' = 1); + modify column e Enum8('world' = 2, 'hello' = 1, '!' = 3), + modify column sign Enum8('minus' = -1, 'plus' = 1); desc table test.enums; From 8b7003ade6f97abdc2cf2b36d520a5cb44aee8fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 06:57:24 +0300 Subject: [PATCH 008/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/DataStreams/IProfilingBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index 3878e452c11..2fa41804d51 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes } -static void checkBlockStructure(const Block & block, const Block & header) [[maybe_unused]] +[[maybe_unused]] static void checkBlockStructure(const Block & block, const Block & header) { size_t columns = header.columns(); if (block.columns() != columns) From 307fc93d1dfb1c665f385c370d161951ad943ac9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 07:03:31 +0300 Subject: [PATCH 009/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/DataStreams/FilterBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/FilterBlockInputStream.cpp b/dbms/src/DataStreams/FilterBlockInputStream.cpp index d9f0c9142a7..2f6115863cf 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.cpp +++ b/dbms/src/DataStreams/FilterBlockInputStream.cpp @@ -44,7 +44,7 @@ FilterBlockInputStream::FilterBlockInputStream(const BlockInputStreamPtr & input && !constant_filter_description.always_true) { /// Replace the filter column to a constant with value 1. - auto header_filter_elem = header.getByPosition(filter_column); + auto & header_filter_elem = header.getByPosition(filter_column); header_filter_elem.column = header_filter_elem.type->createColumnConst(header.rows(), UInt64(1)); } } From 900755555f829ecba3188449e9c94422103d7554 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 07:11:55 +0300 Subject: [PATCH 010/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/DataStreams/MergeSortingBlockInputStream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.h b/dbms/src/DataStreams/MergeSortingBlockInputStream.h index 4b203182d19..63069d9674b 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h @@ -38,7 +38,7 @@ public: bool isSortedOutput() const override { return true; } const SortDescription & getSortDescription() const override { return description; } - Block getHeader() const override { return children.at(0)->getHeader(); } + Block getHeader() const override { return blocks.at(0)->cloneEmpty(); } protected: Block readImpl() override; From 17320cc40b9e326bf41ee431ea47d36e51bd33c9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 07:12:30 +0300 Subject: [PATCH 011/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/DataStreams/MergeSortingBlockInputStream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.h b/dbms/src/DataStreams/MergeSortingBlockInputStream.h index 63069d9674b..c77ab313b6f 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h @@ -38,7 +38,7 @@ public: bool isSortedOutput() const override { return true; } const SortDescription & getSortDescription() const override { return description; } - Block getHeader() const override { return blocks.at(0)->cloneEmpty(); } + Block getHeader() const override { return blocks.at(0).cloneEmpty(); } protected: Block readImpl() override; From 399ede6404b9e6b034bdf91e5637235f34e45117 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 07:29:56 +0300 Subject: [PATCH 012/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/DataStreams/MergeSortingBlockInputStream.cpp | 2 +- dbms/src/DataStreams/MergeSortingBlockInputStream.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index 288d5d9e6ef..ed165fc0e82 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -155,7 +155,7 @@ Block MergeSortingBlockInputStream::readImpl() MergeSortingBlocksBlockInputStream::MergeSortingBlocksBlockInputStream( Blocks & blocks_, SortDescription & description_, size_t max_merged_block_size_, size_t limit_) - : blocks(blocks_), description(description_), max_merged_block_size(max_merged_block_size_), limit(limit_) + : blocks(blocks_), header(blocks.at(0).cloneEmpty()), description(description_), max_merged_block_size(max_merged_block_size_), limit(limit_) { Blocks nonempty_blocks; for (const auto & block : blocks) diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.h b/dbms/src/DataStreams/MergeSortingBlockInputStream.h index c77ab313b6f..416dc0ecce7 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h @@ -38,13 +38,14 @@ public: bool isSortedOutput() const override { return true; } const SortDescription & getSortDescription() const override { return description; } - Block getHeader() const override { return blocks.at(0).cloneEmpty(); } + Block getHeader() const override { return header; } protected: Block readImpl() override; private: Blocks & blocks; + Block header; SortDescription description; size_t max_merged_block_size; size_t limit; From 2e2c70edb5b6d20a1487a56adc77fb76366b6c6b Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Thu, 8 Feb 2018 14:07:58 +0300 Subject: [PATCH 013/209] Add table retries. [#CLICKHOUSE-3346] --- dbms/src/Server/ClusterCopier.cpp | 203 ++++++++++++++++++------------ 1 file changed, 120 insertions(+), 83 deletions(-) diff --git a/dbms/src/Server/ClusterCopier.cpp b/dbms/src/Server/ClusterCopier.cpp index 02dbf7367c2..b07874261e2 100644 --- a/dbms/src/Server/ClusterCopier.cpp +++ b/dbms/src/Server/ClusterCopier.cpp @@ -151,7 +151,7 @@ using TasksShard = std::vector; using TasksTable = std::list; using ClusterPartitions = std::map; -/// Since we could drop only the whole parition on cluster, set of the same patitions in a cluster is atomic entity +/// Contains all cluster shards (sorted by neighborhood) containig a partition struct ClusterPartition { TasksShard shards; /// having that partition @@ -159,6 +159,8 @@ struct ClusterPartition Stopwatch watch; UInt64 bytes_copied = 0; UInt64 rows_copied = 0; + + size_t total_tries = 0; }; @@ -257,6 +259,8 @@ struct TaskTable TasksShard local_shards; ClusterPartitions cluster_partitions; + NameSet finished_cluster_partitions; + ClusterPartition & getClusterPartition(const String & partition_name) { auto it = cluster_partitions.find(partition_name); @@ -705,6 +709,110 @@ public: zookeeper->createAncestors(getWorkersPath() + "/"); } + + static constexpr size_t max_table_tries = 1000; + static constexpr size_t max_partition_tries = 3; + + bool tryProcessTable(TaskTable & task_table) + { + /// Process each partition that is present in cluster + for (auto & elem : task_table.cluster_partitions) + { + const String & partition_name = elem.first; + ClusterPartition & cluster_partition = elem.second; + const TasksShard & shards_with_partition = cluster_partition.shards; + + if (cluster_partition.total_tries == 0) + cluster_partition.watch.restart(); + else + cluster_partition.watch.start(); + SCOPE_EXIT(cluster_partition.watch.stop()); + + bool partition_is_done = false; + size_t num_partition_tries = 0; + + /// Retry partition processing + while (!partition_is_done && num_partition_tries < max_partition_tries) + { + ++num_partition_tries; + ++cluster_partition.total_tries; + + LOG_DEBUG(log, "Processing partition " << partition_name << " for the whole cluster" + << " (" << shards_with_partition.size() << " shards)"); + + size_t num_successful_shards = 0; + + /// Process each source shard and copy current partition + /// NOTE: shards are sorted by "distance" to current host + for (const TaskShardPtr & shard : shards_with_partition) + { + auto it_shard_partition = shard->partitions.find(partition_name); + if (it_shard_partition == shard->partitions.end()) + throw Exception("There are no such partition in a shard. This is a bug.", ErrorCodes::LOGICAL_ERROR); + + TaskPartition & task_shard_partition = it_shard_partition->second; + if (processPartitionTask(task_shard_partition)) + ++num_successful_shards; + } + + try + { + partition_is_done = (num_successful_shards == shards_with_partition.size()) + && checkPartitionIsDone(task_table, partition_name, shards_with_partition); + } + catch (...) + { + tryLogCurrentException(log); + partition_is_done = false; + } + + if (!partition_is_done) + std::this_thread::sleep_for(default_sleep_time); + } + + if (partition_is_done) + { + task_table.finished_cluster_partitions.emplace(partition_name); + + task_table.bytes_copied += cluster_partition.bytes_copied; + task_table.rows_copied += cluster_partition.rows_copied; + + double elapsed = cluster_partition.watch.elapsedSeconds(); + + LOG_INFO(log, "It took " << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name + << ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied) + << " uncompressed bytes and " + << formatReadableQuantity(cluster_partition.rows_copied) << " rows are copied"); + + if (cluster_partition.rows_copied) + { + LOG_INFO(log, "Average partition speed: " + << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied / elapsed) << " per second."); + } + + if (task_table.rows_copied) + { + LOG_INFO(log, "Average table " << task_table.table_id << " speed: " + << formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed) + << " per second."); + } + } + } + + size_t required_partitions = task_table.cluster_partitions.size(); + size_t finished_partitions = task_table.finished_cluster_partitions.size(); + + bool table_is_done = task_table.finished_cluster_partitions.size() >= task_table.cluster_partitions.size(); + if (!table_is_done) + { + LOG_INFO(log, "Table " + task_table.table_id + " is not processed yet." + << "Copied " << finished_partitions << " of " << required_partitions << ", will retry"); + } + + return table_is_done; + } + + void process() { for (TaskTable & task_table : task_cluster->table_tasks) @@ -714,91 +822,20 @@ public: task_table.watch.restart(); - /// An optimization: first of all, try to process all partitions of the local shards -// for (const TaskShardPtr & shard : task_table.local_shards) -// { -// for (auto & task_partition : shard->partitions) -// { -// LOG_DEBUG(log, "Processing partition " << task_partition.first << " for local shard " << shard->numberInCluster()); -// processPartitionTask(task_partition.second); -// } -// } + bool table_is_done = false; + size_t num_table_tries = 0; - /// Then check and copy all shards until the whole partition is copied - for (auto & elem : task_table.cluster_partitions) + /// Retry table processing + while (!table_is_done && num_table_tries < max_table_tries) { - const String & partition_name = elem.first; - ClusterPartition & cluster_partition = elem.second; - const TasksShard & shards_with_partition = cluster_partition.shards; + table_is_done = tryProcessTable(task_table); + ++num_table_tries; + } - cluster_partition.watch.restart(); - - bool is_done = false; - size_t num_tries = 0; - constexpr size_t max_tries = 1000; - - do - { - LOG_DEBUG(log, "Processing partition " << partition_name << " for the whole cluster" - << " (" << shards_with_partition.size() << " shards)"); - - size_t num_successful_shards = 0; - - for (const TaskShardPtr & shard : shards_with_partition) - { - auto it_shard_partition = shard->partitions.find(partition_name); - if (it_shard_partition == shard->partitions.end()) - throw Exception("There are no such partition in a shard. This is a bug.", ErrorCodes::LOGICAL_ERROR); - - TaskPartition & task_shard_partition = it_shard_partition->second; - if (processPartitionTask(task_shard_partition)) - ++num_successful_shards; - } - - try - { - is_done = (num_successful_shards == shards_with_partition.size()) - && checkPartitionIsDone(task_table, partition_name, shards_with_partition); - } - catch (...) - { - tryLogCurrentException(log); - is_done = false; - } - - if (!is_done) - std::this_thread::sleep_for(default_sleep_time); - - ++num_tries; - } while (!is_done && num_tries < max_tries); - - if (is_done) - { - task_table.bytes_copied += cluster_partition.bytes_copied; - task_table.rows_copied += cluster_partition.rows_copied; - double elapsed = cluster_partition.watch.elapsedSeconds(); - - LOG_INFO(log, "It took " << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name - << ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied) << " uncompressed bytes" - << " and " << formatReadableQuantity(cluster_partition.rows_copied) << " rows are copied"); - - if (cluster_partition.rows_copied) - { - LOG_INFO(log, "Average partition speed: " - << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied / elapsed) << " per second."); - } - - if (task_table.rows_copied) - { - LOG_INFO(log, "Average table " << task_table.table_id << " speed: " - << formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed) << " per second."); - } - } - else - { - throw Exception("Too many retries while copying partition " + partition_name + ". Try the next one", - ErrorCodes::UNFINISHED); - } + if (!table_is_done) + { + throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution", + ErrorCodes::UNFINISHED); } } } From f6a63c4d0cf0c11ddeca4e0fdd6912538f0de7ae Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 13 Feb 2018 21:42:59 +0300 Subject: [PATCH 014/209] Autoupdate of ClusterCopier settings. [#CLICKHOUSE-3346] --- dbms/src/Server/ClusterCopier.cpp | 218 +++++++++++++++++++----------- 1 file changed, 141 insertions(+), 77 deletions(-) diff --git a/dbms/src/Server/ClusterCopier.cpp b/dbms/src/Server/ClusterCopier.cpp index b07874261e2..89ea0ba39b6 100644 --- a/dbms/src/Server/ClusterCopier.cpp +++ b/dbms/src/Server/ClusterCopier.cpp @@ -279,7 +279,12 @@ struct TaskTable struct TaskCluster { - TaskCluster(const String & task_zookeeper_path_, const Poco::Util::AbstractConfiguration & config, const String & base_key, const String & default_local_database_); + TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_) + : task_zookeeper_path(task_zookeeper_path_), default_local_database(default_local_database_) {} + + void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); + + void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); /// Base node for all tasks. Its structure: /// workers/ - directory with active workers (amount of them is less or equal max_workers) @@ -287,6 +292,9 @@ struct TaskCluster /// table_table1/ - directories with per-partition copying status String task_zookeeper_path; + /// Database used to create temporary Distributed tables + String default_local_database; + /// Limits number of simultaneous workers size_t max_workers = 0; @@ -297,15 +305,11 @@ struct TaskCluster /// Settings used to insert data Settings settings_push; + String clusters_prefix; + /// Subtasks TasksTable table_tasks; - /// Database used to create temporary Distributed tables - String default_local_database; - - /// Path to remote_servers in task config - String clusters_prefix; - std::random_device random_device; pcg64 random_engine; }; @@ -565,32 +569,12 @@ void TaskTable::initShards(RandomEngine && random_engine) local_shards.assign(all_shards.begin(), it_first_remote); } -TaskCluster::TaskCluster(const String & task_zookeeper_path_, const Poco::Util::AbstractConfiguration & config, const String & base_key, - const String & default_local_database_) + +void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key) { String prefix = base_key.empty() ? "" : base_key + "."; - task_zookeeper_path = task_zookeeper_path_; - - default_local_database = default_local_database_; - - max_workers = config.getUInt64(prefix + "max_workers"); - - if (config.has(prefix + "settings")) - { - settings_common.loadSettingsFromConfig(prefix + "settings", config); - settings_pull = settings_common; - settings_push = settings_common; - } - - if (config.has(prefix + "settings_pull")) - settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config); - - if (config.has(prefix + "settings_push")) - settings_push.loadSettingsFromConfig(prefix + "settings_push", config); - clusters_prefix = prefix + "remote_servers"; - if (!config.has(clusters_prefix)) throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS); @@ -603,6 +587,36 @@ TaskCluster::TaskCluster(const String & task_zookeeper_path_, const Poco::Util:: } } +void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key) +{ + String prefix = base_key.empty() ? "" : base_key + "."; + + max_workers = config.getUInt64(prefix + "max_workers"); + + settings_common = Settings(); + if (config.has(prefix + "settings")) + settings_common.loadSettingsFromConfig(prefix + "settings", config); + + settings_pull = settings_common; + if (config.has(prefix + "settings_pull")) + settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config); + + settings_push = settings_common; + if (config.has(prefix + "settings_push")) + settings_push.loadSettingsFromConfig(prefix + "settings_push", config); + + /// Override important settings + settings_pull.load_balancing = LoadBalancing::NEAREST_HOSTNAME; + settings_pull.limits.readonly = 1; + settings_pull.max_threads = 1; + settings_pull.max_block_size = std::min(8192UL, settings_pull.max_block_size.value); + settings_pull.preferred_block_size_bytes = 0; + + settings_push.insert_distributed_timeout = 0; + settings_push.insert_distributed_sync = 1; +} + + } // end of an anonymous namespace @@ -628,27 +642,22 @@ public: void init() { - String description_path = task_zookeeper_path + "/description"; - String task_config_str = getZooKeeper()->get(description_path); + auto zookeeper = getZooKeeper(); - task_cluster_config = getConfigurationFromXMLString(task_config_str); - task_cluster = std::make_unique(task_zookeeper_path, *task_cluster_config, "", working_database_name); + task_description_watch_callback = [this] (zkutil::ZooKeeper &, int, int, const char *) + { + UInt64 version = ++task_descprtion_version; + LOG_DEBUG(log, "Task description should be updated, local version " << version); + }; - /// Override important settings - Settings & settings_pull = task_cluster->settings_pull; - settings_pull.load_balancing = LoadBalancing::NEAREST_HOSTNAME; - settings_pull.limits.readonly = 1; - settings_pull.max_threads = 1; - settings_pull.max_block_size = std::min(8192UL, settings_pull.max_block_size.value); - settings_pull.preferred_block_size_bytes = 0; + task_description_path = task_zookeeper_path + "/description"; + task_cluster = std::make_unique(task_zookeeper_path, working_database_name); - Settings & settings_push = task_cluster->settings_push; - settings_push.insert_distributed_timeout = 0; - settings_push.insert_distributed_sync = 1; + reloadTaskDescription(); + task_cluster_initial_config = task_cluster_current_config; - /// Set up clusters - context.getSettingsRef() = task_cluster->settings_common; - context.setClustersConfig(task_cluster_config, task_cluster->clusters_prefix); + task_cluster->loadTasks(*task_cluster_initial_config); + context.setClustersConfig(task_cluster_initial_config, task_cluster->clusters_prefix); /// Set up shards and their priority task_cluster->random_engine.seed(task_cluster->random_device()); @@ -705,13 +714,41 @@ public: } } - auto zookeeper = getZooKeeper(); - zookeeper->createAncestors(getWorkersPath() + "/"); + getZooKeeper()->createAncestors(getWorkersPath() + "/"); } + void reloadTaskDescription() + { + String task_config_str; + zkutil::Stat stat; + int code; + + getZooKeeper()->tryGetWatch(task_description_path, task_config_str, &stat, task_description_watch_callback, &code); + if (code != ZOK) + throw Exception("Can't get description node " + task_description_path, ErrorCodes::BAD_ARGUMENTS); + + LOG_DEBUG(log, "Loading description, zxid=" << task_descprtion_current_stat.czxid); + auto config = getConfigurationFromXMLString(task_config_str); + + /// Setup settings + task_cluster->reloadSettings(*config); + context.getSettingsRef() = task_cluster->settings_common; + + task_cluster_current_config = config; + task_descprtion_current_stat = stat; + } + + void updateConfigIfNeeded() + { + UInt64 version_to_update = task_descprtion_version; + if (task_descprtion_current_version == version_to_update) + return; + + task_descprtion_current_version = version_to_update; + } static constexpr size_t max_table_tries = 1000; - static constexpr size_t max_partition_tries = 3; + static constexpr size_t max_partition_tries = 1; bool tryProcessTable(TaskTable & task_table) { @@ -934,7 +971,8 @@ protected: return getWorkersPath() + "/" + host_id; } - zkutil::EphemeralNodeHolder::Ptr createWorkerNodeAndWaitIfNeed(const zkutil::ZooKeeperPtr & zookeeper, const String & task_description) + zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed(const zkutil::ZooKeeperPtr & zookeeper, + const String & description) { while (true) { @@ -944,12 +982,12 @@ protected: if (static_cast(stat.numChildren) >= task_cluster->max_workers) { LOG_DEBUG(log, "Too many workers (" << stat.numChildren << ", maximum " << task_cluster->max_workers << ")" - << ". Postpone processing " << task_description); + << ". Postpone processing " << description); std::this_thread::sleep_for(default_sleep_time); } else { - return std::make_shared(getCurrentWorkerNodePath(), *zookeeper, true, false, task_description); + return std::make_shared(getCurrentWorkerNodePath(), *zookeeper, true, false, description); } } } @@ -1072,17 +1110,32 @@ protected: return true; } + bool processPartitionTask(TaskPartition & task_partition) { + bool res; + try { - return processPartitionTaskImpl(task_partition); + res = processPartitionTaskImpl(task_partition); } catch (...) { tryLogCurrentException(log, "An error occurred while processing partition " + task_partition.name); return false; } + + /// At the end of each task check if the config is updated + try + { + updateConfigIfNeeded(); + } + catch (...) + { + tryLogCurrentException(log, "An error occurred while updating the config"); + } + + return res; } bool processPartitionTaskImpl(TaskPartition & task_partition) @@ -1125,7 +1178,7 @@ protected: /// Load balancing - auto worker_node_holder = createWorkerNodeAndWaitIfNeed(zookeeper, current_task_status_path); + auto worker_node_holder = createTaskWorkerNodeAndWaitIfNeed(zookeeper, current_task_status_path); LOG_DEBUG(log, "Processing " << current_task_status_path); @@ -1187,14 +1240,14 @@ protected: zookeeper->createAncestors(current_task_status_path); /// We need to update table definitions for each part, it could be changed after ALTER - ASTPtr create_query_pull_ast; + ASTPtr query_create_pull_table; { /// Fetch and parse (possibly) new definition auto connection_entry = task_shard.info.pool->get(&task_cluster->settings_pull); String create_query_pull_str = getRemoteCreateTable(task_table.table_pull, *connection_entry, &task_cluster->settings_pull); ParserCreateQuery parser_create_query; - create_query_pull_ast = parseQuery(parser_create_query, create_query_pull_str); + query_create_pull_table = parseQuery(parser_create_query, create_query_pull_str); } /// Create local Distributed tables: @@ -1210,7 +1263,7 @@ protected: auto storage_shard_ast = createASTStorageDistributed(shard_read_cluster_name, task_table.table_pull.first, task_table.table_pull.second); const auto & storage_split_ast = task_table.engine_split_ast; - auto create_query_ast = removeAliasColumnsFromCreateQuery(create_query_pull_ast); + auto create_query_ast = removeAliasColumnsFromCreateQuery(query_create_pull_table); auto create_table_pull_ast = rewriteCreateQueryStorage(create_query_ast, table_shard, storage_shard_ast); auto create_table_split_ast = rewriteCreateQueryStorage(create_query_ast, table_split, storage_split_ast); @@ -1278,12 +1331,15 @@ protected: /// Try create table (if not exists) on each shard { - auto create_query_push_ast = rewriteCreateQueryStorage(create_query_pull_ast, task_table.table_push, task_table.engine_push_ast); + auto create_query_push_ast = rewriteCreateQueryStorage(query_create_pull_table, task_table.table_push, task_table.engine_push_ast); typeid_cast(*create_query_push_ast).if_not_exists = true; String query = queryToString(create_query_push_ast); LOG_DEBUG(log, "Create remote push tables. Query: " << query); - executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push, PoolMode::GET_MANY); + size_t shards = executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push, + PoolMode::GET_MANY); + LOG_DEBUG(log, "Remote push tables have been created on " << shards << " shards of " + << task_table.cluster_push->getShardCount()); } /// Do the copying @@ -1545,28 +1601,28 @@ protected: /// Will try to make as many as possible queries if (shard.hasRemoteConnections()) { - Settings current_settings = *settings; + Settings current_settings = settings ? *settings : task_cluster->settings_common; current_settings.max_parallel_replicas = num_remote_replicas ? num_remote_replicas : 1; std::vector connections = shard.pool->getMany(¤t_settings, pool_mode); for (auto & connection : connections) { - if (!connection.isNull()) - { - try - { - RemoteBlockInputStream stream(*connection, query, {}, context, ¤t_settings); - NullBlockOutputStream output; - copyData(stream, output); + if (connection.isNull()) + continue; - if (increment_and_check_exit()) - return; - } - catch (const Exception & e) - { - LOG_INFO(log, getCurrentExceptionMessage(false, true)); - } + try + { + RemoteBlockInputStream stream(*connection, query, context, ¤t_settings); + NullBlockOutputStream output; + copyData(stream, output); + + if (increment_and_check_exit()) + return; + } + catch (const Exception & e) + { + LOG_INFO(log, getCurrentExceptionMessage(false, true)); } } } @@ -1604,17 +1660,25 @@ protected: private: ConfigurationPtr zookeeper_config; String task_zookeeper_path; + String task_description_path; String host_id; String working_database_name; - bool is_safe_mode = false; - double copy_fault_probability = 0.0; + UInt64 task_descprtion_current_version = 1; + std::atomic task_descprtion_version{1}; + zkutil::WatchCallback task_description_watch_callback; + + ConfigurationPtr task_cluster_initial_config; + ConfigurationPtr task_cluster_current_config; + zkutil::Stat task_descprtion_current_stat; - ConfigurationPtr task_cluster_config; std::unique_ptr task_cluster; zkutil::ZooKeeperPtr current_zookeeper; + bool is_safe_mode = false; + double copy_fault_probability = 0.0; + Context & context; Poco::Logger * log; From 369f88f65d3fdeb00f3ef55ad91eab5e51d388df Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 14 Feb 2018 18:11:39 +0300 Subject: [PATCH 015/209] Rewritten and improved sync distributed insert. [#CLICKHOUSE-3346] --- .../DataStreams/RemoteBlockOutputStream.cpp | 10 + .../src/DataStreams/RemoteBlockOutputStream.h | 5 + dbms/src/Interpreters/Cluster.cpp | 58 ++- dbms/src/Interpreters/Cluster.h | 2 + .../DistributedBlockOutputStream.cpp | 379 +++++++++--------- .../DistributedBlockOutputStream.h | 87 ++-- .../test.py | 9 +- 7 files changed, 306 insertions(+), 244 deletions(-) mode change 100644 => 100755 dbms/tests/integration/test_insert_into_distributed_sync_async/test.py diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp index 659f30c465b..d6284670127 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp @@ -28,6 +28,7 @@ void RemoteBlockOutputStream::writePrefix() * Sample block is needed to know, what structure is required for blocks to be passed to 'write' method. */ + query_sent = true; connection.sendQuery(query, "", QueryProcessingStage::Complete, settings, nullptr); Connection::Packet packet = connection.receivePacket(); @@ -93,6 +94,15 @@ void RemoteBlockOutputStream::writeSuffix() else throw NetException("Unexpected packet from server (expected EndOfStream or Exception, got " + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); + + finished = true; +} + +RemoteBlockOutputStream::~RemoteBlockOutputStream() +{ + /// If interrupted in the middle of the loop of communication with the server, then interrupt the connection + if (query_sent && !finished) + connection.disconnect(); } } diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.h b/dbms/src/DataStreams/RemoteBlockOutputStream.h index 33b3af86754..7c917393e4e 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.h +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -33,11 +34,15 @@ public: /// Send pre-serialized and possibly pre-compressed block of data, that will be read from 'input'. void writePrepared(ReadBuffer & input, size_t size = 0); + ~RemoteBlockOutputStream() override; + private: Connection & connection; String query; const Settings * settings; Block sample_block; + bool query_sent = false; + bool finished = false; }; } diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp index 7b4857431b6..995a93e4324 100644 --- a/dbms/src/Interpreters/Cluster.cpp +++ b/dbms/src/Interpreters/Cluster.cpp @@ -210,26 +210,29 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se info.weight = weight; if (address.is_local) + { info.local_addresses.push_back(address); + info.per_replica_pools = {nullptr}; + } else { - ConnectionPoolPtrs pools; - pools.push_back(std::make_shared( + ConnectionPoolPtr pool = std::make_shared( settings.distributed_connections_pool_size, address.host_name, address.port, address.resolved_address, address.default_database, address.user, address.password, ConnectionTimeouts::getTCPTimeouts(settings).getSaturated(settings.limits.max_execution_time), - "server", Protocol::Compression::Enable, Protocol::Encryption::Disable)); + "server", Protocol::Compression::Enable, Protocol::Encryption::Disable); info.pool = std::make_shared( - std::move(pools), settings.load_balancing, settings.connections_with_failover_max_tries); + ConnectionPoolPtrs{pool}, settings.load_balancing, settings.connections_with_failover_max_tries); + info.per_replica_pools = {std::move(pool)}; } if (weight) slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size()); - shards_info.push_back(info); - addresses_with_failover.push_back(addresses); + shards_info.emplace_back(std::move(info)); + addresses_with_failover.emplace_back(std::move(addresses)); } else if (startsWith(key, "shard")) { @@ -282,34 +285,42 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se Addresses shard_local_addresses; - ConnectionPoolPtrs replicas; - replicas.reserve(replica_addresses.size()); + ConnectionPoolPtrs remote_replicas_pools; + ConnectionPoolPtrs all_replicas_pools; + remote_replicas_pools.reserve(replica_addresses.size()); + all_replicas_pools.reserve(replica_addresses.size()); for (const auto & replica : replica_addresses) { if (replica.is_local) + { shard_local_addresses.push_back(replica); + all_replicas_pools.emplace_back(nullptr); + } else { - replicas.emplace_back(std::make_shared( + auto replica_pool = std::make_shared( settings.distributed_connections_pool_size, replica.host_name, replica.port, replica.resolved_address, replica.default_database, replica.user, replica.password, ConnectionTimeouts::getTCPTimeouts(settings).getSaturated(settings.limits.max_execution_time), - "server", Protocol::Compression::Enable, Protocol::Encryption::Disable)); + "server", Protocol::Compression::Enable, Protocol::Encryption::Disable); + + remote_replicas_pools.emplace_back(replica_pool); + all_replicas_pools.emplace_back(replica_pool); } } ConnectionPoolWithFailoverPtr shard_pool; - if (!replicas.empty()) + if (!remote_replicas_pools.empty()) shard_pool = std::make_shared( - std::move(replicas), settings.load_balancing, settings.connections_with_failover_max_tries); + std::move(remote_replicas_pools), settings.load_balancing, settings.connections_with_failover_max_tries); if (weight) slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size()); shards_info.push_back({std::move(dir_name_for_internal_replication), current_shard_num, weight, - shard_local_addresses, shard_pool, internal_replication}); + std::move(shard_local_addresses), std::move(shard_pool), std::move(all_replicas_pools), internal_replication}); } else throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); @@ -337,31 +348,38 @@ Cluster::Cluster(const Settings & settings, const std::vector( + auto replica_pool = std::make_shared( settings.distributed_connections_pool_size, replica.host_name, replica.port, replica.resolved_address, replica.default_database, replica.user, replica.password, ConnectionTimeouts::getHTTPTimeouts(settings).getSaturated(settings.limits.max_execution_time), - "server", Protocol::Compression::Enable, Protocol::Encryption::Disable)); + "server", Protocol::Compression::Enable, Protocol::Encryption::Disable); + all_replicas.emplace_back(replica_pool); + remote_replicas.emplace_back(replica_pool); } } ConnectionPoolWithFailoverPtr shard_pool = std::make_shared( - std::move(replicas), settings.load_balancing, settings.connections_with_failover_max_tries); + std::move(remote_replicas), settings.load_balancing, settings.connections_with_failover_max_tries); slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size()); - shards_info.push_back({{}, current_shard_num, default_weight, std::move(shard_local_addresses), shard_pool, false}); + shards_info.push_back({{}, current_shard_num, default_weight, std::move(shard_local_addresses), std::move(shard_pool), + std::move(all_replicas), false}); ++current_shard_num; } diff --git a/dbms/src/Interpreters/Cluster.h b/dbms/src/Interpreters/Cluster.h index 658ffc08a13..77277445020 100644 --- a/dbms/src/Interpreters/Cluster.h +++ b/dbms/src/Interpreters/Cluster.h @@ -99,6 +99,8 @@ public: Addresses local_addresses; /// nullptr if there are no remote addresses ConnectionPoolWithFailoverPtr pool; + /// Connection pool for each replica, contains nullptr for local replicas + ConnectionPoolPtrs per_replica_pools; bool has_internal_replication; }; diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 2d2a5c45434..fe8344da118 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -86,16 +86,104 @@ void DistributedBlockOutputStream::writeAsync(const Block & block) } -ThreadPool::Job DistributedBlockOutputStream::createWritingJob( - WritingJobContext & context, const Block & block, const Cluster::Address & address, size_t shard_id, size_t job_id) +std::string DistributedBlockOutputStream::getCurrentStateDescription() +{ + std::stringstream buffer; + const auto & addresses = cluster->getShardsAddresses(); + + buffer << "Insertion status:\n"; + for (auto & shard_jobs : per_shard_jobs) + for (JobInfo & job : shard_jobs) + { + buffer << "Wrote " << job.blocks_written << " blocks and " << job.rows_written << " rows" + << " on shard " << job.shard_index << " replica " << job.replica_index + << ", " << addresses[job.shard_index][job.replica_index].readableString() << "\n"; + } + + return buffer.str(); +} + + +void DistributedBlockOutputStream::initWritingJobs() +{ + const auto & addresses_with_failovers = cluster->getShardsAddresses(); + const auto & shards_info = cluster->getShardsInfo(); + + remote_jobs_count = 0; + local_jobs_count = 0; + per_shard_jobs.resize(shards_info.size()); + + for (size_t shard_index : ext::range(0, shards_info.size())) + { + const auto & shard_info = shards_info[shard_index]; + auto & shard_jobs = per_shard_jobs[shard_index]; + + /// If hasInternalReplication, than prefer local replica + if (!shard_info.hasInternalReplication() || !shard_info.isLocal()) + { + const auto & replicas = addresses_with_failovers[shard_index]; + + for (size_t replica_index : ext::range(0, replicas.size())) + { + if (!replicas[replica_index].is_local) + { + shard_jobs.emplace_back(shard_index, replica_index, false); + ++remote_jobs_count; + + if (shard_info.hasInternalReplication()) + break; + } + } + } + + if (shard_info.isLocal()) + { + shard_jobs.emplace_back(shard_index, 0, true); + ++local_jobs_count; + } + } +} + + +void DistributedBlockOutputStream::waitForJobs() +{ + size_t jobs_count = remote_jobs_count + local_jobs_count; + auto cond = [this, jobs_count] { return finished_jobs_count >= jobs_count; }; + + if (insert_timeout) + { + bool were_jobs_finished; + { + std::unique_lock lock(mutex); + were_jobs_finished = cond_var.wait_until(lock, deadline, cond); + } + + pool->wait(); + + if (!were_jobs_finished) + { + ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded); + throw Exception("Synchronous distributed insert timeout exceeded.", ErrorCodes::TIMEOUT_EXCEEDED); + } + } + else + { + std::unique_lock lock(mutex); + cond_var.wait(lock, cond); + pool->wait(); + } +} + + +ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutputStream::JobInfo & job) { auto memory_tracker = current_memory_tracker; - return [this, memory_tracker, & context, & block, & address, shard_id, job_id]() + return [this, memory_tracker, &job]() { SCOPE_EXIT({ - std::lock_guard lock(context.mutex); - ++context.finished_jobs_count; - context.cond_var.notify_one(); + std::lock_guard lock(mutex); + ++finished_jobs_count; + cond_var.notify_one(); }); if (!current_memory_tracker) @@ -104,183 +192,106 @@ ThreadPool::Job DistributedBlockOutputStream::createWritingJob( setThreadName("DistrOutStrProc"); } - const auto & shard_info = cluster->getShardsInfo()[shard_id]; - if (address.is_local) + const auto & shard_info = cluster->getShardsInfo()[job.shard_index]; + const auto & addresses = cluster->getShardsAddresses(); + Block & block = current_blocks.at(job.shard_index); + + if (!job.is_local_job) { - writeToLocal(block, shard_info.getLocalNodeCount()); - context.done_local_jobs[job_id] = true; + if (!job.stream) + { + if (shard_info.hasInternalReplication()) + { + /// Skip replica_index in case of internal replication + if (per_shard_jobs[job.shard_index].size() != 1) + throw Exception("There are several writing job for an automatically replicated shard", ErrorCodes::LOGICAL_ERROR); + + /// TODO: it make sense to rewrite skip_unavailable_shards and max_parallel_replicas here + auto connections = shard_info.pool->getMany(&settings, PoolMode::GET_ONE); + if (connections.empty() || connections.front().isNull()) + throw Exception("Expected exactly one connection for shard " + toString(job.shard_index), ErrorCodes::LOGICAL_ERROR); + + job.connection_entry = std::move(connections.front()); + } + else + { + const auto & replica = addresses.at(job.shard_index).at(job.replica_index); + + const ConnectionPoolPtr & connection_pool = shard_info.per_replica_pools.at(job.replica_index); + if (!connection_pool) + throw Exception("Connection pool for replica " + replica.readableString() + " does not exist", ErrorCodes::LOGICAL_ERROR); + + job.connection_entry = connection_pool->get(&settings); + if (job.connection_entry.isNull()) + throw Exception("Got empty connection for replica" + replica.readableString(), ErrorCodes::LOGICAL_ERROR); + } + + if (throttler) + job.connection_entry->setThrottler(throttler); + + job.stream = std::make_shared(*job.connection_entry, query_string, &settings); + job.stream->writePrefix(); + } + + CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend}; + job.stream->write(block); } else { - writeToShardSync(block, shard_info.hasInternalReplication() - ? shard_info.dir_name_for_internal_replication - : address.toStringFull()); - context.done_remote_jobs[job_id] = true; + if (!job.stream) + { + /// Forward user settings + job.local_context = std::make_unique(storage.context); + job.local_context->setSettings(settings); + + InterpreterInsertQuery interp(query_ast, *job.local_context); + job.stream = interp.execute().out; + job.stream->writePrefix(); + } + + size_t num_repetitions = shard_info.getLocalNodeCount(); + for (size_t i = 0; i < num_repetitions; ++i) + job.stream->write(block); } + + ++job.blocks_written; + job.rows_written += block.rows(); }; } -std::string DistributedBlockOutputStream::getCurrentStateDescription(const WritingJobContext & context) -{ - const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo(); - String description; - WriteBufferFromString buffer(description); - - buffer << "Insertion status:\n"; - - auto writeDescription = [&buffer](const Cluster::Address & address, size_t shard_id, size_t blocks_wrote) - { - buffer << "Wrote " << blocks_wrote << " blocks on shard " << shard_id << " replica "; - buffer << address.toString() << '\n'; - }; - - const auto addresses_with_failovers = cluster->getShardsAddresses(); - - size_t remote_job_id = 0; - size_t local_job_id = 0; - for (size_t shard_id : ext::range(0, shards_info.size())) - { - const auto & shard_info = shards_info[shard_id]; - /// If hasInternalReplication, than prefer local replica - if (!shard_info.hasInternalReplication() || !shard_info.isLocal()) - { - for (const auto & address : addresses_with_failovers[shard_id]) - if (!address.is_local) - { - writeDescription(address, shard_id, blocks_inserted + (context.done_remote_jobs[remote_job_id] ? 1 : 0)); - ++remote_job_id; - if (shard_info.hasInternalReplication()) - break; - } - } - - if (shard_info.isLocal()) - { - const auto & address = shard_info.local_addresses.front(); - writeDescription(address, shard_id, blocks_inserted + (context.done_local_jobs[local_job_id] ? 1 : 0)); - ++local_job_id; - } - } - - return description; -} - - -void DistributedBlockOutputStream::createWritingJobs(WritingJobContext & context, const Blocks & blocks) -{ - const auto & addresses_with_failovers = cluster->getShardsAddresses(); - const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo(); - - size_t remote_job_id = 0; - size_t local_job_id = 0; - for (size_t shard_id : ext::range(0, blocks.size())) - { - const auto & shard_info = shards_info[shard_id]; - /// If hasInternalReplication, than prefer local replica - if (!shard_info.hasInternalReplication() || !shard_info.isLocal()) - { - for (const auto & address : addresses_with_failovers[shard_id]) - if (!address.is_local) - { - pool->schedule(createWritingJob(context, blocks[shard_id], address, shard_id, remote_job_id)); - ++remote_job_id; - if (shard_info.hasInternalReplication()) - break; - } - } - - if (shards_info[shard_id].isLocal()) - { - const auto & address = shards_info[shard_id].local_addresses.front(); - pool->schedule(createWritingJob(context, blocks[shard_id], address, shard_id, local_job_id)); - ++local_job_id; - } - } -} - - -void DistributedBlockOutputStream::calculateJobsCount() -{ - remote_jobs_count = 0; - local_jobs_count = 0; - - const auto & addresses_with_failovers = cluster->getShardsAddresses(); - - const auto & shards_info = cluster->getShardsInfo(); - for (size_t shard_id : ext::range(0, shards_info.size())) - { - const auto & shard_info = shards_info[shard_id]; - /// If hasInternalReplication, than prefer local replica - if (!shard_info.hasInternalReplication() || !shard_info.isLocal()) - { - for (const auto & address : addresses_with_failovers[shard_id]) - if (!address.is_local) - { - ++remote_jobs_count; - if (shard_info.hasInternalReplication()) - break; - } - } - - local_jobs_count += shard_info.isLocal() ? 1 : 0; - } -} - - -void DistributedBlockOutputStream::waitForUnfinishedJobs(WritingJobContext & context) -{ - size_t jobs_count = remote_jobs_count + local_jobs_count; - auto cond = [& context, jobs_count] { return context.finished_jobs_count == jobs_count; }; - - if (insert_timeout) - { - bool were_jobs_finished; - { - std::unique_lock lock(context.mutex); - were_jobs_finished = context.cond_var.wait_until(lock, deadline, cond); - } - if (!were_jobs_finished) - { - pool->wait(); - ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded); - throw Exception("Timeout exceeded.", ErrorCodes::TIMEOUT_EXCEEDED); - } - } - else - { - std::unique_lock lock(context.mutex); - context.cond_var.wait(lock, cond); - } - pool->wait(); -} - - void DistributedBlockOutputStream::writeSync(const Block & block) { if (!pool) { /// Deferred initialization. Only for sync insertion. - calculateJobsCount(); + initWritingJobs(); pool.emplace(remote_jobs_count + local_jobs_count); + query_string = queryToString(query_ast); + + if (!throttler && (settings.limits.max_network_bandwidth || settings.limits.max_network_bytes)) + { + throttler = std::make_shared(settings.limits.max_network_bandwidth, settings.limits.max_network_bytes, + "Network bandwidth limit for a query exceeded."); + } } - WritingJobContext context; - context.done_remote_jobs.assign(remote_jobs_count, false); - context.done_local_jobs.assign(local_jobs_count, false); - context.finished_jobs_count = 0; + const auto & shards_info = cluster->getShardsInfo(); + current_blocks = shards_info.size() > 1 ? splitBlock(block) : Blocks({block}); - const Cluster::ShardsInfo & shards_info = cluster->getShardsInfo(); - Blocks blocks = shards_info.size() > 1 ? splitBlock(block) : Blocks({block}); - createWritingJobs(context, blocks); + /// Run jobs in parallel for each block and wait them + finished_jobs_count = 0; + for (size_t shard_index : ext::range(0, current_blocks.size())) + for (JobInfo & job : per_shard_jobs.at(shard_index)) + pool->schedule(runWritingJob(job)); try { - waitForUnfinishedJobs(context); + waitForJobs(); } catch (Exception & exception) { - exception.addMessage(getCurrentStateDescription(context)); + exception.addMessage(getCurrentStateDescription()); throw; } @@ -288,6 +299,25 @@ void DistributedBlockOutputStream::writeSync(const Block & block) } +void DistributedBlockOutputStream::writeSuffix() +{ + if (insert_sync && pool) + { + finished_jobs_count = 0; + for (auto & shard_jobs : per_shard_jobs) + for (JobInfo & job : shard_jobs) + { + if (job.stream) + pool->schedule([&job] () { job.stream->writeSuffix(); }); + } + + pool->wait(); + + LOG_DEBUG(&Logger::get("DistributedBlockOutputStream"), getCurrentStateDescription()); + } +} + + IColumn::Selector DistributedBlockOutputStream::createSelector(Block block) { storage.getShardingKeyExpr()->execute(block); @@ -393,21 +423,10 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_t repeats) { - std::unique_ptr local_context; - std::optional interp; - /// Async insert does not support settings forwarding yet whereas sync one supports - if (insert_sync) - interp.emplace(query_ast, storage.context); - else - { - /// Overwrite global settings by user settings - local_context = std::make_unique(storage.context); - local_context->setSettings(settings); - interp.emplace(query_ast, *local_context); - } + InterpreterInsertQuery interp(query_ast, storage.context); - auto block_io = interp->execute(); + auto block_io = interp.execute(); block_io.out->writePrefix(); for (size_t i = 0; i < repeats; ++i) @@ -417,22 +436,6 @@ void DistributedBlockOutputStream::writeToLocal(const Block & block, const size_ } -void DistributedBlockOutputStream::writeToShardSync(const Block & block, const std::string & connection_pool_name) -{ - auto pool = storage.requireConnectionPool(connection_pool_name); - auto connection = pool->get(); - - const auto & query_string = queryToString(query_ast); - RemoteBlockOutputStream remote{*connection, query_string, &settings}; - - CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedSend}; - - remote.writePrefix(); - remote.write(block); - remote.writeSuffix(); -} - - void DistributedBlockOutputStream::writeToShard(const Block & block, const std::vector & dir_names) { /** tmp directory is used to ensure atomicity of transactions diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h index de8dc12649b..cd9abaa0b89 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -39,37 +40,15 @@ public: void writePrefix() override; + void writeSuffix() override; + private: - void writeAsync(const Block & block); - - /// Performs synchronous insertion to remote nodes. If timeout_exceeded flag was set, throws. - void writeSync(const Block & block); - - void calculateJobsCount(); - - struct WritingJobContext - { - /// Remote job per replica. - std::vector done_remote_jobs; - /// Local job per shard. - std::vector done_local_jobs; - std::atomic finished_jobs_count; - std::mutex mutex; - std::condition_variable cond_var; - }; - - ThreadPool::Job createWritingJob(WritingJobContext & context, const Block & block, - const Cluster::Address & address, size_t shard_id, size_t job_id); - - void createWritingJobs(WritingJobContext & context, const Blocks & blocks); - - void waitForUnfinishedJobs(WritingJobContext & context); - - /// Returns the number of blocks was written for each cluster node. Uses during exception handling. - std::string getCurrentStateDescription(const WritingJobContext & context); IColumn::Selector createSelector(Block block); + + void writeAsync(const Block & block); + /// Split block between shards. Blocks splitBlock(const Block & block); @@ -82,21 +61,63 @@ private: void writeToShard(const Block & block, const std::vector & dir_names); - /// Performs synchronous insertion to remote node. - void writeToShardSync(const Block & block, const std::string & connection_pool_name); + + /// Performs synchronous insertion to remote nodes. If timeout_exceeded flag was set, throws. + void writeSync(const Block & block); + + void initWritingJobs(); + + struct JobInfo; + ThreadPool::Job runWritingJob(JobInfo & job); + + void waitForJobs(); + + /// Returns the number of blocks was written for each cluster node. Uses during exception handling. + std::string getCurrentStateDescription(); private: StorageDistributed & storage; ASTPtr query_ast; ClusterPtr cluster; const Settings & settings; - bool insert_sync; - UInt64 insert_timeout; size_t blocks_inserted = 0; + + bool insert_sync; + + /// Sync-related stuff + UInt64 insert_timeout; std::chrono::steady_clock::time_point deadline; - size_t remote_jobs_count; - size_t local_jobs_count; std::optional pool; + ThrottlerPtr throttler; + String query_string; + + struct JobInfo + { + JobInfo() = default; + JobInfo(size_t shard_index, size_t replica_index, bool is_local_job) + : shard_index(shard_index), replica_index(replica_index), is_local_job(is_local_job) {} + + size_t shard_index = 0; + size_t replica_index = 0; + bool is_local_job = false; + + ConnectionPool::Entry connection_entry; + std::unique_ptr local_context; + BlockOutputStreamPtr stream; + + UInt64 blocks_written = 0; + UInt64 rows_written = 0; + }; + + std::vector> per_shard_jobs; + Blocks current_blocks; + + size_t remote_jobs_count = 0; + size_t local_jobs_count = 0; + + std::atomic finished_jobs_count{0}; + std::mutex mutex; + std::condition_variable cond_var; }; } diff --git a/dbms/tests/integration/test_insert_into_distributed_sync_async/test.py b/dbms/tests/integration/test_insert_into_distributed_sync_async/test.py old mode 100644 new mode 100755 index 5b4a76bd3b1..552cb957c77 --- a/dbms/tests/integration/test_insert_into_distributed_sync_async/test.py +++ b/dbms/tests/integration/test_insert_into_distributed_sync_async/test.py @@ -1,9 +1,12 @@ +#!/usr/bin/env python2 +import sys +import os from contextlib import contextmanager -from helpers.network import PartitionManager -from helpers.test_tools import TSV - import pytest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from helpers.network import PartitionManager +from helpers.test_tools import TSV from helpers.cluster import ClickHouseCluster from helpers.client import QueryRuntimeException, QueryTimeoutExceedException From 585b80acf518e9e598d886538250267b7fc76c02 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Thu, 15 Feb 2018 02:01:34 +0300 Subject: [PATCH 016/209] Fixed typo. [#CLICKHOUSE-3346] --- dbms/src/Server/ClusterCopier.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dbms/src/Server/ClusterCopier.cpp b/dbms/src/Server/ClusterCopier.cpp index 89ea0ba39b6..2bccec2cef7 100644 --- a/dbms/src/Server/ClusterCopier.cpp +++ b/dbms/src/Server/ClusterCopier.cpp @@ -744,6 +744,9 @@ public: if (task_descprtion_current_version == version_to_update) return; + LOG_DEBUG(log, "Updating task description"); + reloadTaskDescription(); + task_descprtion_current_version = version_to_update; } @@ -816,7 +819,7 @@ public: double elapsed = cluster_partition.watch.elapsedSeconds(); - LOG_INFO(log, "It took " << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name + LOG_INFO(log, "It took " << std::fixed << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name << ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied) << " uncompressed bytes and " << formatReadableQuantity(cluster_partition.rows_copied) << " rows are copied"); @@ -983,7 +986,10 @@ protected: { LOG_DEBUG(log, "Too many workers (" << stat.numChildren << ", maximum " << task_cluster->max_workers << ")" << ". Postpone processing " << description); + std::this_thread::sleep_for(default_sleep_time); + + updateConfigIfNeeded(); } else { @@ -1122,7 +1128,7 @@ protected: catch (...) { tryLogCurrentException(log, "An error occurred while processing partition " + task_partition.name); - return false; + res = false; } /// At the end of each task check if the config is updated From 7ff1346685e7371262126d1d4e81a87fb6b16dfd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 20:31:30 +0300 Subject: [PATCH 017/209] Removed UnsortedMergeTree, part 2 [#CLICKHOUSE-2] --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 7 ++----- dbms/src/Storages/MergeTree/MergeTreeData.h | 6 ++---- dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp | 6 +----- .../src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 5 +---- dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp | 6 +++--- dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp | 6 +++--- 6 files changed, 12 insertions(+), 24 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 2e259d2467a..a13542e3b5f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -113,10 +113,8 @@ MergeTreeData::MergeTreeData( { merging_params.check(columns); - if (primary_expr_ast && merging_params.mode == MergingParams::Unsorted) - throw Exception("Primary key cannot be set for UnsortedMergeTree", ErrorCodes::BAD_ARGUMENTS); - if (!primary_expr_ast && merging_params.mode != MergingParams::Unsorted) - throw Exception("Primary key can be empty only for UnsortedMergeTree", ErrorCodes::BAD_ARGUMENTS); + if (!primary_expr_ast) /// TODO Allow tables without primary key. + throw Exception("Primary key cannot be empty", ErrorCodes::BAD_ARGUMENTS); initPrimaryKey(); @@ -402,7 +400,6 @@ String MergeTreeData::MergingParams::getModeName() const case Collapsing: return "Collapsing"; case Summing: return "Summing"; case Aggregating: return "Aggregating"; - case Unsorted: return "Unsorted"; case Replacing: return "Replacing"; case Graphite: return "Graphite"; case VersionedCollapsing: return "VersionedCollapsing"; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 87164d687d4..dbcd948aaae 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -78,8 +78,6 @@ namespace ErrorCodes /// column is set, keep the latest row with the maximal version. /// - Summing - sum all numeric columns not contained in the primary key for all rows with the same primary key. /// - Aggregating - merge columns containing aggregate function states for all rows with the same primary key. -/// - Unsorted - during the merge the data is not sorted but merely concatenated; this allows reading the data -/// in the same batches as they were written. /// - Graphite - performs coarsening of historical data for Graphite (a system for quantitative monitoring). /// The MergeTreeData class contains a list of parts and the data structure parameters. @@ -239,7 +237,6 @@ public: Collapsing = 1, Summing = 2, Aggregating = 3, - Unsorted = 4, Replacing = 5, Graphite = 6, VersionedCollapsing = 7, @@ -269,7 +266,7 @@ public: /// Attach the table corresponding to the directory in full_path (must end with /), with the given columns. /// Correctness of names and paths is not checked. /// - /// primary_expr_ast - expression used for sorting; empty for UnsortedMergeTree. + /// primary_expr_ast - expression used for sorting; /// date_column_name - if not empty, the name of the Date column used for partitioning by month. /// Otherwise, partition_expr_ast is used for partitioning. /// require_part_metadata - should checksums.txt and columns.txt exist in the part directory. @@ -442,6 +439,7 @@ public: broken_part_callback(name); } + bool hasPrimaryKey() const { return !primary_sort_descr.empty(); } ExpressionActionsPtr getPrimaryExpression() const { return primary_expr; } ExpressionActionsPtr getSecondarySortExpression() const { return secondary_sort_expr; } /// may return nullptr SortDescription getPrimarySortDescription() const { return primary_sort_descr; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index e2cbdc10980..11b43e69ee6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -592,7 +592,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart input->setProgressCallback(MergeProgressCallback( merge_entry, sum_input_rows_upper_bound, column_sizes, watch_prev_elapsed, merge_alg)); - if (data.merging_params.mode != MergeTreeData::MergingParams::Unsorted) + if (data.hasPrimaryKey()) src_streams.emplace_back(std::make_shared( std::make_shared(BlockInputStreamPtr(std::move(input)), data.getPrimaryExpression()))); else @@ -642,10 +642,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMerger::mergePartsToTemporaryPart src_streams, sort_desc, data.merging_params.sign_column, DEFAULT_MERGE_BLOCK_SIZE, false, rows_sources_write_buf.get()); break; - case MergeTreeData::MergingParams::Unsorted: - merged_stream = std::make_unique(src_streams); - break; - default: throw Exception("Unknown mode of operation for MergeTreeData: " + toString(data.merging_params.mode), ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 79d5d226f31..dc7018bb4d6 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -520,7 +520,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( { RangesInDataPart ranges(part, part_index++); - if (data.merging_params.mode != MergeTreeData::MergingParams::Unsorted) + if (data.hasPrimaryKey()) ranges.ranges = markRangesFromPKRange(part->index, key_condition, settings); else ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}}; @@ -830,9 +830,6 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal to_merge, data.getSortDescription(), data.merging_params.sign_column, max_block_size, true); break; - case MergeTreeData::MergingParams::Unsorted: - throw Exception("UnsortedMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); - case MergeTreeData::MergingParams::Graphite: throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 3d63966484a..60ed5650ead 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -172,8 +172,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa dir.createDirectories(); - /// If you need to calculate some columns to sort, we do it. - if (data.merging_params.mode != MergeTreeData::MergingParams::Unsorted) + /// If we need to calculate some columns to sort. + if (data.hasPrimaryKey()) { data.getPrimaryExpression()->execute(block); auto secondary_sort_expr = data.getSecondarySortExpression(); @@ -188,7 +188,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa /// Sort. IColumn::Permutation * perm_ptr = nullptr; IColumn::Permutation perm; - if (data.merging_params.mode != MergeTreeData::MergingParams::Unsorted) + if (data.hasPrimaryKey()) { if (!isAlreadySorted(block, sort_descr)) { diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index f2acaa787b4..d3162810a1c 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -293,7 +293,7 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( if (additional_column_checksums) checksums = std::move(*additional_column_checksums); - if (storage.merging_params.mode != MergeTreeData::MergingParams::Unsorted) + if (index_stream) { index_stream->next(); checksums.files["primary.idx"].file_size = index_stream->count(); @@ -354,7 +354,7 @@ void MergedBlockOutputStream::init() { Poco::File(part_path).createDirectories(); - if (storage.merging_params.mode != MergeTreeData::MergingParams::Unsorted) + if (storage.hasPrimaryKey()) { index_file_stream = std::make_unique( part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, O_TRUNC | O_CREAT | O_WRONLY); @@ -443,7 +443,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm /// Write index. The index contains Primary Key value for each `index_granularity` row. for (size_t i = index_offset; i < rows; i += storage.index_granularity) { - if (storage.merging_params.mode != MergeTreeData::MergingParams::Unsorted) + if (storage.hasPrimaryKey()) { for (size_t j = 0, size = primary_columns.size(); j < size; ++j) { From 3b550789b04e00c63e1585bd8f473d94eb157301 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 21:15:30 +0300 Subject: [PATCH 018/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/Storages/StorageDistributed.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index e6683d6218b..32be27fe2a4 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -195,8 +196,8 @@ BlockInputStreams StorageDistributed::read( if (settings.global_subqueries_method == GlobalSubqueriesMethod::PUSH) external_tables = context.getExternalTables(); - Block header = InterpreterSelectQuery(query_info.query, context, processed_stage, 0, - std::make_shared(getSampleBlockForColumns(column_names))).execute().in->getHeader(); + Block header = materializeBlock(InterpreterSelectQuery(query_info.query, context, processed_stage, 0, + std::make_shared(getSampleBlockForColumns(column_names))).execute().in->getHeader()); ClusterProxy::SelectStreamFactory select_stream_factory( header, processed_stage, QualifiedTableName{remote_database, remote_table}, external_tables); From b747c83b1514f87c6bc69dd636853bcac8d832b4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 22:18:51 +0300 Subject: [PATCH 019/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- .../DataStreams/CastTypeBlockInputStream.cpp | 57 +++++++++---------- .../DataStreams/CastTypeBlockInputStream.h | 15 ++--- dbms/src/DataStreams/IBlockInputStream.h | 4 +- .../IProfilingBlockInputStream.cpp | 19 ++++--- .../DataStreams/IProfilingBlockInputStream.h | 2 + .../NullableAdapterBlockInputStream.cpp | 57 ++++++++++--------- .../NullableAdapterBlockInputStream.h | 11 ++-- 7 files changed, 79 insertions(+), 86 deletions(-) diff --git a/dbms/src/DataStreams/CastTypeBlockInputStream.cpp b/dbms/src/DataStreams/CastTypeBlockInputStream.cpp index 9dbd0962c93..d19e9764793 100644 --- a/dbms/src/DataStreams/CastTypeBlockInputStream.cpp +++ b/dbms/src/DataStreams/CastTypeBlockInputStream.cpp @@ -8,11 +8,35 @@ namespace DB CastTypeBlockInputStream::CastTypeBlockInputStream( const Context & context_, - const BlockInputStreamPtr & input_, - const Block & reference_definition_) - : context(context_), ref_definition(reference_definition_) + const BlockInputStreamPtr & input, + const Block & reference_definition) + : context(context_) { - children.emplace_back(input_); + children.emplace_back(input); + + Block input_header = input->getHeader(); + + for (size_t col_num = 0, num_columns = input_header.columns(); col_num < num_columns; ++col_num) + { + const auto & elem = input_header.getByPosition(col_num); + + /// Skip, if it is a problem, it will be detected on the next pipeline stage + if (!reference_definition.has(elem.name)) + continue; + + const auto & ref_column = reference_definition.getByName(elem.name); + + /// Force conversion if source and destination types is different. + if (ref_column.type->equals(*elem.type)) + { + header.insert(elem); + } + else + { + header.insert({ castColumn(elem, ref_column.type, context), ref_column.type, elem.name }); + cast_description.emplace(col_num, ref_column.type); + } + } } String CastTypeBlockInputStream::getName() const @@ -27,12 +51,6 @@ Block CastTypeBlockInputStream::readImpl() if (!block) return block; - if (!initialized) - { - initialized = true; - initialize(block); - } - if (cast_description.empty()) return block; @@ -53,23 +71,4 @@ Block CastTypeBlockInputStream::readImpl() return res; } - -void CastTypeBlockInputStream::initialize(const Block & src_block) -{ - for (size_t src_col = 0, num_columns = src_block.columns(); src_col < num_columns; ++src_col) - { - const auto & src_column = src_block.getByPosition(src_col); - - /// Skip, if it is a problem, it will be detected on the next pipeline stage - if (!ref_definition.has(src_column.name)) - continue; - - const auto & ref_column = ref_definition.getByName(src_column.name); - - /// Force conversion if source and destination types is different. - if (!ref_column.type->equals(*src_column.type)) - cast_description.emplace(src_col, ref_column.type); - } -} - } diff --git a/dbms/src/DataStreams/CastTypeBlockInputStream.h b/dbms/src/DataStreams/CastTypeBlockInputStream.h index b92a7ffa31f..f84f6dacf0e 100644 --- a/dbms/src/DataStreams/CastTypeBlockInputStream.h +++ b/dbms/src/DataStreams/CastTypeBlockInputStream.h @@ -7,7 +7,7 @@ namespace DB { -/// Implicitly converts string and numeric values to Enum, numeric types to other numeric types. +/// Implicitly converts types. class CastTypeBlockInputStream : public IProfilingBlockInputStream { public: @@ -17,18 +17,13 @@ public: String getName() const override; - Block getHeader() const override { return ref_definition; } - -protected: - Block readImpl() override; + Block getHeader() const override { return header; } private: - const Context & context; - Block ref_definition; + Block readImpl() override; - /// Initializes cast_description and prepares tmp_conversion_block - void initialize(const Block & src_block); - bool initialized = false; + const Context & context; + Block header; /// Describes required conversions on source block /// Contains column numbers in source block that should be converted diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 77ee6b94157..cf158f9b46c 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -118,9 +118,7 @@ private: size_t checkDepthImpl(size_t max_depth, size_t level) const; - /** Get text that identifies this source and the entire subtree. - * Unlike getID - without taking into account the parameters. - */ + /// Get text with names of this source and the entire subtree. String getTreeID() const; }; diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index 2fa41804d51..eee930e6248 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -21,11 +21,11 @@ namespace ErrorCodes } -[[maybe_unused]] static void checkBlockStructure(const Block & block, const Block & header) +void IProfilingBlockInputStream::checkBlockStructure(const Block & block, const Block & header) { size_t columns = header.columns(); if (block.columns() != columns) - throw Exception("Block structure mismatch: different number of columns:\n" + throw Exception("Block structure mismatch in " + getName() + " stream: different number of columns:\n" + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); for (size_t i = 0; i < columns; ++i) @@ -34,20 +34,21 @@ namespace ErrorCodes const auto & actual = block.getByPosition(i); if (actual.name != expected.name) - throw Exception("Block structure mismatch: different names of columns:\n" - + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + throw Exception("Block structure mismatch in " + getName() + " stream: different names of columns:\n" + + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); if (!actual.type->equals(*expected.type)) - throw Exception("Block structure mismatch: different types:\n" - + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + throw Exception("Block structure mismatch in " + getName() + " stream: different types:\n" + + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); if (actual.column->getName() != expected.column->getName()) - throw Exception("Block structure mismatch: different columns:\n" - + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + throw Exception("Block structure mismatch in " + getName() + " stream: different columns:\n" + + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); if (actual.column->isColumnConst() && expected.column->isColumnConst() && static_cast(*actual.column).getField() != static_cast(*expected.column).getField()) - throw Exception("Block structure mismatch: different values of constants", ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + throw Exception("Block structure mismatch in " + getName() + " stream: different values of constants", + ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); } } diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.h b/dbms/src/DataStreams/IProfilingBlockInputStream.h index 048473be30c..062d6929fcc 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.h +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.h @@ -224,6 +224,8 @@ private: * It is done so that sending occurs only in the upper stream. */ void collectAndSendTotalRowsApprox(); + + void checkBlockStructure(const Block & block, const Block & header); }; } diff --git a/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp b/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp index f10ff9b876b..d7d23633b72 100644 --- a/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp +++ b/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp @@ -14,32 +14,19 @@ namespace ErrorCodes extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; } -NullableAdapterBlockInputStream::NullableAdapterBlockInputStream( - const BlockInputStreamPtr & input, - const Block & src_header_, const Block & res_header_) - : header(res_header_) -{ - buildActions(src_header_, res_header_); - children.push_back(input); -} -Block NullableAdapterBlockInputStream::readImpl() +static Block transform(const Block & block, const NullableAdapterBlockInputStream::Actions & actions, const std::vector> & rename) { - Block block = children.back()->read(); - - if (!block && !must_transform) - return block; + size_t num_columns = block.columns(); Block res; - size_t s = block.columns(); - - for (size_t i = 0; i < s; ++i) + for (size_t i = 0; i < num_columns; ++i) { const auto & elem = block.getByPosition(i); switch (actions[i]) { - case TO_ORDINARY: + case NullableAdapterBlockInputStream::TO_ORDINARY: { const auto & nullable_col = static_cast(*elem.column); const auto & nullable_type = static_cast(*elem.type); @@ -54,11 +41,10 @@ Block NullableAdapterBlockInputStream::readImpl() res.insert({ nullable_col.getNestedColumnPtr(), nullable_type.getNestedType(), - rename[i].value_or(elem.name) - }); + rename[i].value_or(elem.name)}); break; } - case TO_NULLABLE: + case NullableAdapterBlockInputStream::TO_NULLABLE: { ColumnPtr null_map = ColumnUInt8::create(elem.column->size(), 0); @@ -68,12 +54,9 @@ Block NullableAdapterBlockInputStream::readImpl() rename[i].value_or(elem.name)}); break; } - case NONE: + case NullableAdapterBlockInputStream::NONE: { - if (rename[i]) - res.insert({elem.column, elem.type, *rename[i]}); - else - res.insert(elem); + res.insert({elem.column, elem.type, rename[i].value_or(elem.name)}); break; } } @@ -82,6 +65,27 @@ Block NullableAdapterBlockInputStream::readImpl() return res; } + +NullableAdapterBlockInputStream::NullableAdapterBlockInputStream( + const BlockInputStreamPtr & input, + const Block & src_header, const Block & res_header) +{ + buildActions(src_header, res_header); + children.push_back(input); + header = transform(src_header, actions, rename); +} + + +Block NullableAdapterBlockInputStream::readImpl() +{ + Block block = children.back()->read(); + + if (!block) + return block; + + return transform(block, actions, rename); +} + void NullableAdapterBlockInputStream::buildActions( const Block & src_header, const Block & res_header) @@ -113,9 +117,6 @@ void NullableAdapterBlockInputStream::buildActions( rename.emplace_back(std::make_optional(out_elem.name)); else rename.emplace_back(); - - if (actions.back() != NONE || rename.back()) - must_transform = true; } } diff --git a/dbms/src/DataStreams/NullableAdapterBlockInputStream.h b/dbms/src/DataStreams/NullableAdapterBlockInputStream.h index ac21f852e3e..60c2b2ec16e 100644 --- a/dbms/src/DataStreams/NullableAdapterBlockInputStream.h +++ b/dbms/src/DataStreams/NullableAdapterBlockInputStream.h @@ -18,16 +18,13 @@ namespace DB class NullableAdapterBlockInputStream : public IProfilingBlockInputStream { public: - NullableAdapterBlockInputStream(const BlockInputStreamPtr & input, const Block & src_header_, const Block & res_header_); + NullableAdapterBlockInputStream(const BlockInputStreamPtr & input, const Block & src_header, const Block & res_header); - String getName() const override { return "NullableAdapterBlockInputStream"; } + String getName() const override { return "NullableAdapter"; } Block getHeader() const override { return header; } -protected: - Block readImpl() override; -private: /// Given a column of a block we have just read, /// how must we process it? enum Action @@ -44,17 +41,17 @@ private: using Actions = std::vector; private: + Block readImpl() override; + /// Determine the actions to be taken using the source sample block, /// which describes the columns from which we fetch data inside an INSERT /// query, and the target sample block which contains the columns /// we insert data into. void buildActions(const Block & src_header, const Block & res_header); -private: Block header; Actions actions; std::vector> rename; - bool must_transform = false; }; } From ee4b5d25278b6a76c69d7faf8b1d0955776818eb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 23:23:25 +0300 Subject: [PATCH 020/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- .../AddingDefaultBlockOutputStream.cpp | 3 - dbms/src/Interpreters/ExpressionActions.cpp | 61 ++++++------------- dbms/src/Interpreters/ExpressionActions.h | 9 +-- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 8 ++- .../Interpreters/InterpreterSelectQuery.cpp | 10 +-- 5 files changed, 28 insertions(+), 63 deletions(-) diff --git a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp index a2f46fccf8a..205b665c712 100644 --- a/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp +++ b/dbms/src/DataStreams/AddingDefaultBlockOutputStream.cpp @@ -71,9 +71,6 @@ void AddingDefaultBlockOutputStream::write(const Block & block) } /// Computes explicitly specified values (in column_defaults) by default. - /** @todo if somehow block does not contain values for implicitly-defaulted columns that are prerequisites - * for explicitly-defaulted ones, exception will be thrown during evaluating such columns - * (implicitly-defaulted columns are evaluated on the line after following one. */ evaluateMissingDefaults(res, required_columns, column_defaults, context); output->write(res); diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 1f8a5bb8f0d..d34cbe1d5b1 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -142,39 +142,11 @@ ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr join } -ExpressionActions::Actions ExpressionAction::getPrerequisites(Block & sample_block) -{ - ExpressionActions::Actions res; - - if (type == APPLY_FUNCTION) - { - if (sample_block.has(result_name)) - throw Exception("Column '" + result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); - - ColumnsWithTypeAndName arguments(argument_names.size()); - for (size_t i = 0; i < argument_names.size(); ++i) - { - if (!sample_block.has(argument_names[i])) - throw Exception("Unknown identifier: '" + argument_names[i] + "'", ErrorCodes::UNKNOWN_IDENTIFIER); - arguments[i] = sample_block.getByName(argument_names[i]); - } - - function = function_builder->build(arguments); - result_type = function->getReturnType(); - } - - return res; -} - void ExpressionAction::prepare(Block & sample_block) { // std::cerr << "preparing: " << toString() << std::endl; /** Constant expressions should be evaluated, and put the result in sample_block. - * For non-constant columns, put the nullptr as the column in sample_block. - * - * The fact that only for constant expressions column != nullptr, - * can be used later when optimizing the query. */ switch (type) @@ -567,40 +539,43 @@ void ExpressionActions::addInput(const NameAndTypePair & column) void ExpressionActions::add(const ExpressionAction & action, Names & out_new_columns) { - NameSet temp_names; - addImpl(action, temp_names, out_new_columns); + addImpl(action, out_new_columns); } void ExpressionActions::add(const ExpressionAction & action) { - NameSet temp_names; Names new_names; - addImpl(action, temp_names, new_names); + addImpl(action, new_names); } -void ExpressionActions::addImpl(ExpressionAction action, NameSet & current_names, Names & new_names) +void ExpressionActions::addImpl(ExpressionAction action, Names & new_names) { if (sample_block.has(action.result_name)) return; - if (current_names.count(action.result_name)) - throw Exception("Cyclic function prerequisites: " + action.result_name, ErrorCodes::LOGICAL_ERROR); - - current_names.insert(action.result_name); - if (action.result_name != "") new_names.push_back(action.result_name); new_names.insert(new_names.end(), action.array_joined_columns.begin(), action.array_joined_columns.end()); - Actions prerequisites = action.getPrerequisites(sample_block); + if (action.type == ExpressionAction::APPLY_FUNCTION) + { + if (sample_block.has(action.result_name)) + throw Exception("Column '" + action.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); - for (size_t i = 0; i < prerequisites.size(); ++i) - addImpl(prerequisites[i], current_names, new_names); + ColumnsWithTypeAndName arguments(action.argument_names.size()); + for (size_t i = 0; i < action.argument_names.size(); ++i) + { + if (!sample_block.has(action.argument_names[i])) + throw Exception("Unknown identifier: '" + action.argument_names[i] + "'", ErrorCodes::UNKNOWN_IDENTIFIER); + arguments[i] = sample_block.getByName(action.argument_names[i]); + } + + action.function = action.function_builder->build(arguments); + action.result_type = action.function->getReturnType(); + } action.prepare(sample_block); actions.push_back(action); - - current_names.erase(action.result_name); } void ExpressionActions::prependProjectInput() diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 59434c741ac..ac855add69d 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -99,7 +99,6 @@ public: static ExpressionAction ordinaryJoin(std::shared_ptr join_, const NamesAndTypesList & columns_added_by_join_); /// Which columns necessary to perform this action. - /// If this `Action` is not already added to `ExpressionActions`, the returned list may be incomplete, because `prerequisites` are not taken into account. Names getNeededColumns() const; std::string toString() const; @@ -107,7 +106,6 @@ public: private: friend class ExpressionActions; - std::vector getPrerequisites(Block & sample_block); void prepare(Block & sample_block); void execute(Block & block) const; void executeOnTotals(Block & block) const; @@ -147,8 +145,7 @@ public: void add(const ExpressionAction & action); - /// Adds new column names to out_new_columns - /// (formed as a result of the added action and its prerequisites). + /// Adds new column names to out_new_columns (formed as a result of the added action). void add(const ExpressionAction & action, Names & out_new_columns); /// Adds to the beginning the removal of all extra columns. @@ -208,9 +205,7 @@ private: void checkLimits(Block & block) const; - /// Adds all `prerequisites` first, then the action itself. - /// current_names - columns whose `prerequisites` are currently being processed. - void addImpl(ExpressionAction action, NameSet & current_names, Names & new_names); + void addImpl(ExpressionAction action, Names & new_names); /// Try to improve something without changing the lists of input and output columns. void optimize(); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 259cd8f3503..ccd1615826e 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -2653,7 +2653,13 @@ Block ExpressionAnalyzer::getSelectSampleBlock() temp_actions->add(ExpressionAction::project(result_columns)); - return temp_actions->getSampleBlock(); + Block res = temp_actions->getSampleBlock(); + + for (auto & elem : res) + if (!elem.column) + elem.column = elem.type->createColumn(); + + return res; } void ExpressionAnalyzer::getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index be916e0b7b0..8a057c3eda8 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -374,15 +374,7 @@ DataTypes InterpreterSelectQuery::getReturnTypes() Block InterpreterSelectQuery::getSampleBlock() { - Block block = query_analyzer->getSelectSampleBlock(); - /// create non-zero columns so that SampleBlock can be - /// written (read) with BlockOut(In)putStreams - for (size_t i = 0; i < block.columns(); ++i) - { - ColumnWithTypeAndName & col = block.safeGetByPosition(i); - col.column = col.type->createColumn(); - } - return block; + return query_analyzer->getSelectSampleBlock(); } From 582798b77f7345f5514f5b8d3cc7c999cc305e72 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2018 23:42:05 +0300 Subject: [PATCH 021/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp index 455a05e3ad5..5e1d3835d71 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -108,6 +109,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery) if (!done_with_table) { + block = materializeBlock(block); table_out->write(block); rows_to_transfer += block.rows(); From bb05235608339396474c52de4a9ba12a61c99590 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2018 00:01:46 +0300 Subject: [PATCH 022/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/Interpreters/Join.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index c469fc733a8..d5b7104d8d1 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -11,6 +11,8 @@ #include #include +#include + #include #include @@ -281,7 +283,7 @@ void Join::setSampleBlock(const Block & block) /// Choose data structure to use for JOIN. init(chooseMethod(key_columns, key_sizes)); - sample_block_with_columns_to_add = block; + sample_block_with_columns_to_add = materializeBlock(block); /// Move from `sample_block_with_columns_to_add` key columns to `sample_block_with_keys`, keeping the order. size_t pos = 0; @@ -462,8 +464,8 @@ bool Join::insertFromBlock(const Block & block) if (getFullness(kind)) { - /** Transfer the key columns to the beginning of the block. - * This is where NonJoinedBlockInputStream will wait for them. + /** Move the key columns to the beginning of the block. + * This is where NonJoinedBlockInputStream will expect. */ size_t key_num = 0; for (const auto & name : key_names_right) From 4add285d55497436045d2c1f1b4dc1194e8474bf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2018 02:39:16 +0300 Subject: [PATCH 023/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 11 ----------- dbms/src/Interpreters/InterpreterSelectQuery.h | 1 - 2 files changed, 12 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 8a057c3eda8..1b6ed45e5d3 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -361,17 +361,6 @@ void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, St } -DataTypes InterpreterSelectQuery::getReturnTypes() -{ - DataTypes res; - const NamesAndTypesList & columns = query_analyzer->getSelectSampleBlock().getNamesAndTypesList(); - for (auto & column : columns) - res.push_back(column.type); - - return res; -} - - Block InterpreterSelectQuery::getSampleBlock() { return query_analyzer->getSelectSampleBlock(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index cfc61e35f2c..b794907d23d 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -81,7 +81,6 @@ public: const BlockInputStreams & executeWithoutUnion(); /// TODO It's confusing that these methods return result structure for the case of QueryProcessingStage::Complete regardless to the actual 'to_stage'. - DataTypes getReturnTypes(); Block getSampleBlock(); static Block getSampleBlock( From fa6420b34d4e3681a8720e507c2744b5949db87e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2018 04:14:38 +0300 Subject: [PATCH 024/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/Core/Block.cpp | 69 ++++++++++++++----- dbms/src/Core/Block.h | 7 +- dbms/src/Core/ColumnWithTypeAndName.cpp | 32 +++++---- dbms/src/Core/ColumnWithTypeAndName.h | 7 +- .../MergingSortedBlockInputStream.cpp | 15 +--- .../DataStreams/RemoteBlockOutputStream.cpp | 9 +-- dbms/src/DataStreams/UnionBlockInputStream.h | 8 +++ .../Interpreters/InterpreterCheckQuery.cpp | 43 ------------ dbms/src/Interpreters/InterpreterCheckQuery.h | 3 - .../Interpreters/InterpreterSelectQuery.cpp | 14 ---- dbms/src/Storages/StorageBuffer.cpp | 5 +- 11 files changed, 92 insertions(+), 120 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 328aa6074f0..4d3889da8b9 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -6,6 +6,8 @@ #include #include +#include + #include #include @@ -18,6 +20,7 @@ namespace ErrorCodes extern const int POSITION_OUT_OF_BOUND; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; + extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE; } @@ -276,13 +279,7 @@ std::string Block::dumpStructure() const { if (it != data.begin()) out << ", "; - - out << it->name << ' ' << it->type->getName(); - - if (it->column) - out << ' ' << it->column->dumpStructure(); - else - out << " nullptr"; + it->dumpStructure(out); } return out.str(); } @@ -379,22 +376,58 @@ Names Block::getNames() const } -bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs) +template +static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description) { - size_t columns = lhs.columns(); - if (rhs.columns() != columns) - return false; + auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]]) + { + if constexpr (std::is_same_v) + throw Exception(message, code); + else + return false; + }; + + size_t columns = rhs.columns(); + if (lhs.columns() != columns) + return on_error("Block structure mismatch in " + context_description + " stream: different number of columns:\n" + + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); for (size_t i = 0; i < columns; ++i) { - const IDataType & lhs_type = *lhs.safeGetByPosition(i).type; - const IDataType & rhs_type = *rhs.safeGetByPosition(i).type; + const auto & expected = rhs.getByPosition(i); + const auto & actual = lhs.getByPosition(i); - if (!lhs_type.equals(rhs_type)) - return false; + if (actual.name != expected.name) + return on_error("Block structure mismatch in " + context_description + " stream: different names of columns:\n" + + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + + if (!actual.type->equals(*expected.type)) + return on_error("Block structure mismatch in " + context_description + " stream: different types:\n" + + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + + if (actual.column->getName() != expected.column->getName()) + return on_error("Block structure mismatch in " + context_description + " stream: different columns:\n" + + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + + if (actual.column->isColumnConst() && expected.column->isColumnConst() + && static_cast(*actual.column).getField() != static_cast(*expected.column).getField()) + return on_error("Block structure mismatch in " + context_description + " stream: different values of constants", + ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); } - return true; + return ReturnType(true); +} + + +bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs) +{ + return checkBlockStructure(lhs, rhs, {}); +} + + +void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description) +{ + checkBlockStructure(lhs, rhs, context_description); } @@ -453,12 +486,12 @@ void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out for (auto it = left_columns.rbegin(); it != left_columns.rend(); ++it) { - lhs_diff_writer << it->prettyPrint(); + lhs_diff_writer << it->dumpStructure(); lhs_diff_writer << ", position: " << lhs.getPositionByName(it->name) << '\n'; } for (auto it = right_columns.rbegin(); it != right_columns.rend(); ++it) { - rhs_diff_writer << it->prettyPrint(); + rhs_diff_writer << it->dumpStructure(); rhs_diff_writer << ", position: " << rhs.getPositionByName(it->name) << '\n'; } } diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h index 0513f50456e..7c836e49532 100644 --- a/dbms/src/Core/Block.h +++ b/dbms/src/Core/Block.h @@ -137,10 +137,13 @@ using Blocks = std::vector; using BlocksList = std::list; -/// Compare column types for blocks. The order of the columns matters. Names do not matter. +/// Compare number of columns, data types, column types, column names, and values of constant columns. bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs); -/// Calculate difference in structure of blocks and write description into output strings. +/// Throw exception when blocks are different. +void assertBlocksHaveEqualStructure(const Block & lhs, const Block & rhs, const std::string & context_description); + +/// Calculate difference in structure of blocks and write description into output strings. NOTE It doesn't compare values of constant columns. void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff); diff --git a/dbms/src/Core/ColumnWithTypeAndName.cpp b/dbms/src/Core/ColumnWithTypeAndName.cpp index 37afe8a4641..9acc2d56408 100644 --- a/dbms/src/Core/ColumnWithTypeAndName.cpp +++ b/dbms/src/Core/ColumnWithTypeAndName.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB @@ -19,7 +20,7 @@ ColumnWithTypeAndName ColumnWithTypeAndName::cloneEmpty() const } -bool ColumnWithTypeAndName::operator== (const ColumnWithTypeAndName & other) const +bool ColumnWithTypeAndName::operator==(const ColumnWithTypeAndName & other) const { return name == other.name && ((!type && !other.type) || (type && other.type && type->equals(*other.type))) @@ -27,20 +28,25 @@ bool ColumnWithTypeAndName::operator== (const ColumnWithTypeAndName & other) con } -String ColumnWithTypeAndName::prettyPrint() const +void ColumnWithTypeAndName::dumpStructure(WriteBuffer & out) const +{ + out << name; + + if (type) + out << ' ' << type->getName(); + else + out << " nullptr"; + + if (column) + out << ' ' << column->dumpStructure(); + else + out << " nullptr"; +} + +String ColumnWithTypeAndName::dumpStructure() const { WriteBufferFromOwnString out; - writeString(name, out); - if (type) - { - writeChar(' ', out); - writeString(type->getName(), out); - } - if (column) - { - writeChar(' ', out); - writeString(column->getName(), out); - } + dumpStructure(out); return out.str(); } diff --git a/dbms/src/Core/ColumnWithTypeAndName.h b/dbms/src/Core/ColumnWithTypeAndName.h index edf61430abf..9c52145f581 100644 --- a/dbms/src/Core/ColumnWithTypeAndName.h +++ b/dbms/src/Core/ColumnWithTypeAndName.h @@ -7,6 +7,9 @@ namespace DB { +class WriteBuffer; + + /** Column data along with its data type and name. * Column data could be nullptr - to represent just 'header' of column. * Name could be either name from a table or some temporary generated name during expression evaluation. @@ -28,7 +31,9 @@ struct ColumnWithTypeAndName ColumnWithTypeAndName cloneEmpty() const; bool operator==(const ColumnWithTypeAndName & other) const; - String prettyPrint() const; + + void dumpStructure(WriteBuffer & out) const; + String dumpStructure() const; }; } diff --git a/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp b/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp index aee5b34899f..c256e49e60e 100644 --- a/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp @@ -11,7 +11,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; - extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE; } @@ -92,19 +91,7 @@ void MergingSortedBlockInputStream::init(Block & header, MutableColumns & merged if (!*shared_block_ptr) continue; - size_t src_columns = shared_block_ptr->columns(); - size_t dst_columns = header.columns(); - - if (src_columns != dst_columns) - throw Exception("Merging blocks have different number of columns (" - + toString(src_columns) + " and " + toString(dst_columns) + ")", - ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); - - for (size_t i = 0; i < src_columns; ++i) - if (!blocksHaveEqualStructure(*shared_block_ptr, header)) - throw Exception("Merging blocks have different names or types of columns:\n" - + shared_block_ptr->dumpStructure() + "\nand\n" + header.dumpStructure(), - ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + assertBlocksHaveEqualStructure(*shared_block_ptr, header, getName()); } merged_columns.resize(num_columns); diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp index e3a861d9dab..bd9cef66790 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp @@ -46,14 +46,7 @@ RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, const void RemoteBlockOutputStream::write(const Block & block) { - if (!blocksHaveEqualStructure(block, header)) - { - std::stringstream message; - message << "Block structure is different from table structure.\n" - << "\nTable structure:\n(" << header.dumpStructure() << ")\nBlock structure:\n(" << block.dumpStructure() << ")\n"; - throw Exception(message.str()); - } - + assertBlocksHaveEqualStructure(block, header, "RemoteBlockOutputStream"); connection.sendData(block); } diff --git a/dbms/src/DataStreams/UnionBlockInputStream.h b/dbms/src/DataStreams/UnionBlockInputStream.h index 176cb472a61..f45ff396cb6 100644 --- a/dbms/src/DataStreams/UnionBlockInputStream.h +++ b/dbms/src/DataStreams/UnionBlockInputStream.h @@ -82,6 +82,14 @@ public: children = inputs; if (additional_input_at_end) children.push_back(additional_input_at_end); + + size_t num_children = children.size(); + if (num_children > 1) + { + Block header = children.at(0)->getHeader(); + for (size_t i = 1; i < num_children; ++i) + assertBlocksHaveEqualStructure(children[i]->getHeader(), header, "UNION"); + } } String getName() const override { return "Union"; } diff --git a/dbms/src/Interpreters/InterpreterCheckQuery.cpp b/dbms/src/Interpreters/InterpreterCheckQuery.cpp index d96344190b5..068b7897105 100644 --- a/dbms/src/Interpreters/InterpreterCheckQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCheckQuery.cpp @@ -92,49 +92,6 @@ InterpreterCheckQuery::InterpreterCheckQuery(const ASTPtr & query_ptr_, const Co { } -Block InterpreterCheckQuery::getSampleBlock() const -{ - Block block; - ColumnWithTypeAndName col; - - col.name = "status"; - col.type = std::make_shared(); - col.column = col.type->createColumn(); - block.insert(col); - - col.name = "host_name"; - col.type = std::make_shared(); - col.column = col.type->createColumn(); - block.insert(col); - - col.name = "host_address"; - col.type = std::make_shared(); - col.column = col.type->createColumn(); - block.insert(col); - - col.name = "port"; - col.type = std::make_shared(); - col.column = col.type->createColumn(); - block.insert(col); - - col.name = "user"; - col.type = std::make_shared(); - col.column = col.type->createColumn(); - block.insert(col); - - col.name = "structure_class"; - col.type = std::make_shared(); - col.column = col.type->createColumn(); - block.insert(col); - - col.name = "structure"; - col.type = std::make_shared(); - col.column = col.type->createColumn(); - block.insert(col); - - return block; -} - BlockIO InterpreterCheckQuery::execute() { diff --git a/dbms/src/Interpreters/InterpreterCheckQuery.h b/dbms/src/Interpreters/InterpreterCheckQuery.h index dc2e9cc8bb1..a0a0d677235 100644 --- a/dbms/src/Interpreters/InterpreterCheckQuery.h +++ b/dbms/src/Interpreters/InterpreterCheckQuery.h @@ -16,9 +16,6 @@ public: BlockIO execute() override; -private: - Block getSampleBlock() const; - private: ASTPtr query_ptr; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 1b6ed45e5d3..78c22b3de82 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -213,20 +213,6 @@ void InterpreterSelectQuery::basicInit(const BlockInputStreamPtr & input) if (input) streams.push_back(input); - - if (is_first_select_inside_union_all) - { - /// We check that the results of all SELECT queries are compatible. - Block first = getSampleBlock(); - for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - { - Block current = p->getSampleBlock(); - if (!blocksHaveEqualStructure(first, current)) - throw Exception("Result structures mismatch in the SELECT queries of the UNION ALL chain. Found result structure:\n\n" + current.dumpStructure() - + "\n\nwhile expecting:\n\n" + first.dumpStructure() + "\n\ninstead", - ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); - } - } } void InterpreterSelectQuery::initQueryAnalyzer() diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index be6922997d1..1b32ba197ff 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -44,7 +44,6 @@ namespace DB namespace ErrorCodes { extern const int INFINITE_LOOP; - extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -150,9 +149,7 @@ static void appendBlock(const Block & from, Block & to) if (!to) throw Exception("Cannot append to empty block", ErrorCodes::LOGICAL_ERROR); - if (!blocksHaveEqualStructure(from, to)) - throw Exception("Cannot append block to buffer: block has different structure. " - "Block: " + from.dumpStructure() + ", Buffer: " + to.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + assertBlocksHaveEqualStructure(from, to, "Buffer"); from.checkNumberOfRows(); to.checkNumberOfRows(); From 13f1d5c67d9b8f08de1bace49d45e11677f31af0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2018 04:25:56 +0300 Subject: [PATCH 025/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- .../IProfilingBlockInputStream.cpp | 34 +------------------ .../DataStreams/IProfilingBlockInputStream.h | 2 -- 2 files changed, 1 insertion(+), 35 deletions(-) diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index eee930e6248..7a76e424109 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -21,38 +21,6 @@ namespace ErrorCodes } -void IProfilingBlockInputStream::checkBlockStructure(const Block & block, const Block & header) -{ - size_t columns = header.columns(); - if (block.columns() != columns) - throw Exception("Block structure mismatch in " + getName() + " stream: different number of columns:\n" - + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); - - for (size_t i = 0; i < columns; ++i) - { - const auto & expected = header.getByPosition(i); - const auto & actual = block.getByPosition(i); - - if (actual.name != expected.name) - throw Exception("Block structure mismatch in " + getName() + " stream: different names of columns:\n" - + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); - - if (!actual.type->equals(*expected.type)) - throw Exception("Block structure mismatch in " + getName() + " stream: different types:\n" - + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); - - if (actual.column->getName() != expected.column->getName()) - throw Exception("Block structure mismatch in " + getName() + " stream: different columns:\n" - + block.dumpStructure() + "\n" + header.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); - - if (actual.column->isColumnConst() && expected.column->isColumnConst() - && static_cast(*actual.column).getField() != static_cast(*expected.column).getField()) - throw Exception("Block structure mismatch in " + getName() + " stream: different values of constants", - ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); - } -} - - IProfilingBlockInputStream::IProfilingBlockInputStream() { info.parent = this; @@ -110,7 +78,7 @@ Block IProfilingBlockInputStream::read() { Block header = getHeader(); if (header) - checkBlockStructure(res, header); + assertBlocksHaveEqualStructure(res, header, getName()); } #endif diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.h b/dbms/src/DataStreams/IProfilingBlockInputStream.h index 062d6929fcc..048473be30c 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.h +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.h @@ -224,8 +224,6 @@ private: * It is done so that sending occurs only in the upper stream. */ void collectAndSendTotalRowsApprox(); - - void checkBlockStructure(const Block & block, const Block & header); }; } From 8942d7c127c59cfcd674b4e8bc4e983b4051b5cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2018 04:38:42 +0300 Subject: [PATCH 026/209] Added method "getHeader" in IBlockOutputStream: development [#CLICKHOUSE-2] --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 86 +++----------------- dbms/src/Interpreters/Settings.h | 2 - dbms/src/Interpreters/SettingsCommon.h | 67 --------------- dbms/src/Storages/StorageDistributed.cpp | 7 +- 4 files changed, 14 insertions(+), 148 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index ccd1615826e..629260ab8c5 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -802,84 +802,24 @@ void ExpressionAnalyzer::addExternalStorage(ASTPtr & subquery_or_table_name_or_t StoragePtr external_storage = StorageMemory::create(external_table_name, columns, NamesAndTypesList{}, NamesAndTypesList{}, ColumnDefaults{}); external_storage->startup(); - /** There are two ways to perform distributed GLOBAL subqueries. - * - * "push" method: - * Subquery data is sent to all remote servers, where they are then used. - * For this method, the data is sent in the form of "external tables" and will be available on each remote server by the name of the type _data1. - * Replace in the query a subquery for this name. - * - * "pull" method: - * Remote servers download the subquery data from the request initiating server. - * For this method, replace the subquery with another subquery of the form (SELECT * FROM remote ('host: port', _query_QUERY_ID, _data1)) - * This subquery, in fact, says - "you need to download data from there." - * - * The "pull" method takes precedence, because in it a remote server can decide that it does not need data and does not download it in such cases. - */ + /** We replace the subquery with the name of the temporary table. + * It is in this form, the request will go to the remote server. + * This temporary table will go to the remote server, and on its side, + * instead of doing a subquery, you just need to read it. + */ - if (settings.global_subqueries_method == GlobalSubqueriesMethod::PUSH) + auto database_and_table_name = std::make_shared(StringRange(), external_table_name, ASTIdentifier::Table); + + if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) { - /** We replace the subquery with the name of the temporary table. - * It is in this form, the request will go to the remote server. - * This temporary table will go to the remote server, and on its side, - * instead of doing a subquery, you just need to read it. - */ + ast_table_expr->subquery.reset(); + ast_table_expr->database_and_table_name = database_and_table_name; - auto database_and_table_name = std::make_shared(StringRange(), external_table_name, ASTIdentifier::Table); - - if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) - { - ast_table_expr->subquery.reset(); - ast_table_expr->database_and_table_name = database_and_table_name; - - ast_table_expr->children.clear(); - ast_table_expr->children.emplace_back(database_and_table_name); - } - else - subquery_or_table_name_or_table_expression = database_and_table_name; - } - else if (settings.global_subqueries_method == GlobalSubqueriesMethod::PULL) - { - throw Exception("Support for 'pull' method of execution of global subqueries is disabled.", ErrorCodes::SUPPORT_IS_DISABLED); - - /// TODO -/* String host_port = getFQDNOrHostName() + ":" + toString(context.getTCPPort()); - String database = "_query_" + context.getCurrentQueryId(); - - auto subquery = std::make_shared(); - subquery_or_table_name = subquery; - - auto select = std::make_shared(); - subquery->children.push_back(select); - - auto exp_list = std::make_shared(); - select->select_expression_list = exp_list; - select->children.push_back(select->select_expression_list); - - Names column_names = external_storage->getColumnNamesList(); - for (const auto & name : column_names) - exp_list->children.push_back(std::make_shared(StringRange(), name)); - - auto table_func = std::make_shared(); - select->table = table_func; - select->children.push_back(select->table); - - table_func->name = "remote"; - auto args = std::make_shared(); - table_func->arguments = args; - table_func->children.push_back(table_func->arguments); - - auto address_lit = std::make_shared(StringRange(), host_port); - args->children.push_back(address_lit); - - auto database_lit = std::make_shared(StringRange(), database); - args->children.push_back(database_lit); - - auto table_lit = std::make_shared(StringRange(), external_table_name); - args->children.push_back(table_lit);*/ + ast_table_expr->children.clear(); + ast_table_expr->children.emplace_back(database_and_table_name); } else - throw Exception("Unknown global subqueries execution method", ErrorCodes::UNKNOWN_GLOBAL_SUBQUERIES_METHOD); + subquery_or_table_name_or_table_expression = database_and_table_name; external_tables[external_table_name] = external_storage; subqueries_for_sets[external_table_name].source = interpreter->execute().in; diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 4946be8acb6..0a725509186 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -113,8 +113,6 @@ struct Settings \ M(SettingDistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?") \ \ - M(SettingGlobalSubqueriesMethod, global_subqueries_method, GlobalSubqueriesMethod::PUSH, "The method for executing GLOBAL subqueries.") \ - \ M(SettingUInt64, max_concurrent_queries_for_user, 0, "The maximum number of concurrent requests per user.") \ \ M(SettingBool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be preformed") \ diff --git a/dbms/src/Interpreters/SettingsCommon.h b/dbms/src/Interpreters/SettingsCommon.h index 55ffc48dff1..59c1c0dac28 100644 --- a/dbms/src/Interpreters/SettingsCommon.h +++ b/dbms/src/Interpreters/SettingsCommon.h @@ -672,73 +672,6 @@ struct SettingDistributedProductMode } }; -/// Method for executing global distributed subqueries. -enum class GlobalSubqueriesMethod -{ - PUSH = 0, /// Send the subquery data to all remote servers. - PULL = 1, /// Remote servers will download the subquery data from the initiating server. -}; - -struct SettingGlobalSubqueriesMethod -{ - GlobalSubqueriesMethod value; - bool changed = false; - - SettingGlobalSubqueriesMethod(GlobalSubqueriesMethod x = GlobalSubqueriesMethod::PUSH) : value(x) {} - - operator GlobalSubqueriesMethod() const { return value; } - SettingGlobalSubqueriesMethod & operator= (GlobalSubqueriesMethod x) { set(x); return *this; } - - static GlobalSubqueriesMethod getGlobalSubqueriesMethod(const String & s) - { - if (s == "push") - return GlobalSubqueriesMethod::PUSH; - if (s == "pull") - return GlobalSubqueriesMethod::PULL; - - throw Exception("Unknown global subqueries execution method: '" + s + "', must be one of 'push', 'pull'", - ErrorCodes::UNKNOWN_GLOBAL_SUBQUERIES_METHOD); - } - - String toString() const - { - const char * strings[] = { "push", "pull" }; - - if (value < GlobalSubqueriesMethod::PUSH || value > GlobalSubqueriesMethod::PULL) - throw Exception("Unknown global subqueries execution method", ErrorCodes::UNKNOWN_GLOBAL_SUBQUERIES_METHOD); - - return strings[static_cast(value)]; - } - - void set(GlobalSubqueriesMethod x) - { - value = x; - changed = true; - } - - void set(const Field & x) - { - set(safeGet(x)); - } - - void set(const String & x) - { - set(getGlobalSubqueriesMethod(x)); - } - - void set(ReadBuffer & buf) - { - String x; - readBinary(x, buf); - set(x); - } - - void write(WriteBuffer & buf) const - { - writeBinary(toString(), buf); - } -}; - struct SettingString { diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 32be27fe2a4..b191f598a0e 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -191,16 +191,11 @@ BlockInputStreams StorageDistributed::read( const auto & modified_query_ast = rewriteSelectQuery( query_info.query, remote_database, remote_table); - Tables external_tables; - - if (settings.global_subqueries_method == GlobalSubqueriesMethod::PUSH) - external_tables = context.getExternalTables(); - Block header = materializeBlock(InterpreterSelectQuery(query_info.query, context, processed_stage, 0, std::make_shared(getSampleBlockForColumns(column_names))).execute().in->getHeader()); ClusterProxy::SelectStreamFactory select_stream_factory( - header, processed_stage, QualifiedTableName{remote_database, remote_table}, external_tables); + header, processed_stage, QualifiedTableName{remote_database, remote_table}, context.getExternalTables()); return ClusterProxy::executeQuery( select_stream_factory, cluster, modified_query_ast, context, settings); From 5bc6bd55b11f4dc37487756e184aa822278c2ec3 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 19 Feb 2018 18:31:43 +0300 Subject: [PATCH 027/209] allow PreCommitted parts to contain each other, clean up code --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 450 ++++++++---------- dbms/src/Storages/MergeTree/MergeTreeData.h | 85 ++-- .../Storages/MergeTree/MergeTreePartInfo.h | 5 + .../Storages/StorageReplicatedMergeTree.cpp | 4 +- 4 files changed, 249 insertions(+), 295 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 3d95b9408a0..b73eb7759b8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -108,8 +108,8 @@ MergeTreeData::MergeTreeData( full_path(full_path_), broken_part_callback(broken_part_callback_), log_name(database_name + "." + table_name), log(&Logger::get(log_name + " (Data)")), - data_parts_by_name(data_parts_indexes.get()), - data_parts_by_state_and_name(data_parts_indexes.get()) + data_parts_by_info(data_parts_indexes.get()), + data_parts_by_state_and_info(data_parts_indexes.get()) { merging_params.check(columns); @@ -418,7 +418,7 @@ Int64 MergeTreeData::getMaxDataPartIndex() std::lock_guard lock_all(data_parts_mutex); Int64 max_block_id = 0; - for (const DataPartPtr & part : data_parts_by_name) + for (const DataPartPtr & part : data_parts_by_info) max_block_id = std::max(max_block_id, part->info.max_block); return max_block_id; @@ -552,11 +552,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (data_parts_indexes.size() >= 2) { - /// Now all parts are committed, so data_parts_by_state_and_name == committed_parts_range - auto prev_jt = data_parts_by_state_and_name.begin(); + /// Now all parts are committed, so data_parts_by_state_and_info == committed_parts_range + auto prev_jt = data_parts_by_state_and_info.begin(); auto curr_jt = std::next(prev_jt); - auto deactivate_part = [&] (DataPartIteratorByStateAndName it) + auto deactivate_part = [&] (DataPartIteratorByStateAndInfo it) { (*it)->remove_time = (*it)->modification_time; modifyPartState(it, DataPartState::Outdated); @@ -564,7 +564,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) (*prev_jt)->assertState({DataPartState::Committed}); - while (curr_jt != data_parts_by_state_and_name.end() && (*curr_jt)->state == DataPartState::Committed) + while (curr_jt != data_parts_by_state_and_info.end() && (*curr_jt)->state == DataPartState::Committed) { /// Don't consider data parts belonging to different partitions. if ((*curr_jt)->info.partition_id != (*prev_jt)->info.partition_id) @@ -664,7 +664,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts() return res; time_t now = time(nullptr); - std::vector parts_to_delete; + std::vector parts_to_delete; { std::lock_guard lock_parts(data_parts_mutex); @@ -674,7 +674,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts() { const DataPartPtr & part = *it; - if (part.unique() && /// Grab only parts that is not using by anyone (SELECTs for example) + if (part.unique() && /// Grab only parts that are not used by anyone (SELECTs for example). part->remove_time < now && now - part->remove_time > settings.old_parts_lifetime.totalSeconds()) { @@ -716,9 +716,9 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa /// TODO: use data_parts iterators instead of pointers for (auto & part : parts) { - auto it = data_parts_by_name.find(part->info); - if (it == data_parts_by_name.end()) - throw Exception("Deleting data part " + part->name + " is not exist", ErrorCodes::LOGICAL_ERROR); + auto it = data_parts_by_info.find(part->info); + if (it == data_parts_by_info.end()) + throw Exception("Deleting data part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); (*it)->assertState({DataPartState::Deleting}); @@ -1362,6 +1362,58 @@ MergeTreeData::AlterDataPartTransaction::~AlterDataPartTransaction() } +MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( + const MergeTreePartInfo & new_part_info, + DataPartPtr & out_covering_part, + std::lock_guard & /* data_parts_lock */) const +{ + /// Parts contained in the part are consecutive in data_parts, intersecting the insertion place for the part itself. + auto it_middle = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo(DataPartState::Committed, new_part_info)); + auto committed_parts_range = getDataPartsStateRange(DataPartState::Committed); + + /// Go to the left. + DataPartIteratorByStateAndInfo begin = it_middle; + while (begin != committed_parts_range.begin()) + { + auto prev = std::prev(begin); + + if (!new_part_info.contains((*prev)->info)) + { + if ((*prev)->info.contains(new_part_info)) + { + out_covering_part = *prev; + return {}; + } + break; + } + + begin = prev; + } + + /// Go to the right. + DataPartIteratorByStateAndInfo end = it_middle; + while (end != committed_parts_range.end()) + { + if ((*end)->info == new_part_info) + throw Exception("Unexpected duplicate part " + (*end)->getNameWithState() + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + + if (!new_part_info.contains((*end)->info)) + { + if ((*end)->info.contains(new_part_info)) + { + out_covering_part = *end; + return {}; + } + break; + } + + ++end; + } + + return DataPartsVector{begin, end}; +} + + void MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction) { auto removed = renameTempPartAndReplace(part, increment, out_transaction); @@ -1375,184 +1427,91 @@ void MergeTreeData::renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrem MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction) { - if (out_transaction && out_transaction->data) - throw Exception("Using the same MergeTreeData::Transaction for overlapping transactions is invalid", ErrorCodes::LOGICAL_ERROR); + if (out_transaction && out_transaction->data && out_transaction->data != this) + throw Exception("The same MergeTreeData::Transaction cannot be used for different tables", + ErrorCodes::LOGICAL_ERROR); + + std::lock_guard lock(data_parts_mutex); part->assertState({DataPartState::Temporary}); MergeTreePartInfo part_info = part->info; String part_name; - DataPartsVector replaced_parts; - std::vector replaced_iterators; + if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock)) { - std::unique_lock lock(data_parts_mutex); - - if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock)) - { - if (part->partition.value != existing_part_in_partition->partition.value) - throw Exception( - "Partition value mismatch between two parts with the same partition ID. Existing part: " - + existing_part_in_partition->name + ", newly added part: " + part->name, - ErrorCodes::CORRUPTED_DATA); - } - - /** It is important that obtaining new block number and adding that block to parts set is done atomically. - * Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part. - */ - if (increment) - part_info.min_block = part_info.max_block = increment->get(); - - if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - part_name = part_info.getPartNameV0(part->getMinDate(), part->getMaxDate()); - else - part_name = part_info.getPartName(); - - LOG_TRACE(log, "Renaming temporary part " << part->relative_path << " to " << part_name << "."); - - auto it_duplicate = data_parts_by_name.find(part_info); - if (it_duplicate != data_parts_by_name.end()) - { - String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists"; - - if ((*it_duplicate)->checkState({DataPartState::Outdated, DataPartState::Deleting})) - { - throw Exception(message + ", but it will be deleted soon", ErrorCodes::PART_IS_TEMPORARILY_LOCKED); - } - - throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART); - } - - /// Check that part is not covered and doesn't cover other in-progress parts, it makes sense only for Replicated* engines - if (out_transaction) - { - auto check_coverage = [&part_info, &part_name] (const DataPartPtr & part) - { - if (part_info.contains(part->info)) - throw Exception("Cannot add part " + part_name + " covering pre-committed part " + part->name, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); - else if (part->info.contains(part_info)) - throw Exception("Cannot add part " + part_name + " covered by pre-committed part " + part->name, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); - }; - - auto it_middle = data_parts_by_state_and_name.lower_bound(DataPartStateAndInfo(DataPartState::PreCommitted, part_info)); - - auto precommitted_parts_range = getDataPartsStateRange(DataPartState::PreCommitted); - - for (auto it = it_middle; it != precommitted_parts_range.begin();) - { - --it; - check_coverage(*it); - } - - for (auto it = it_middle; it != precommitted_parts_range.end();) - { - check_coverage(*it); - ++it; - } - } - - /// Is the part covered by some other part? - DataPartPtr covering_part; - - auto it_middle = data_parts_by_state_and_name.lower_bound(DataPartStateAndInfo(DataPartState::Committed, part_info)); - - /// Parts contained in the part are consecutive in data_parts, intersecting the insertion place for the part itself. - auto committed_parts_range = getDataPartsStateRange(DataPartState::Committed); - - /// Go to the left. - for (auto it = it_middle; it != committed_parts_range.begin();) - { - --it; - - if (!part_info.contains((*it)->info)) - { - if ((*it)->info.contains(part_info)) - covering_part = *it; - break; - } - - replaced_iterators.push_back(it); - } - - /// Parts must be in ascending order. - std::reverse(replaced_iterators.begin(), replaced_iterators.end()); - - /// Go to the right. - for (auto it = it_middle; it != committed_parts_range.end();) - { - if ((*it)->name == part_name) - throw Exception("Unexpected duplicate part " + (*it)->getNameWithState() + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); - - if (!part_info.contains((*it)->info)) - { - if ((*it)->info.contains(part_info)) - covering_part = *it; - break; - } - - replaced_iterators.push_back(it); - ++it; - } - - if (covering_part) - { - LOG_WARNING(log, "Tried to add obsolete part " << part_name << " covered by " << covering_part->getNameWithState()); - - /// It is a temporary part, we want to delete it from filesystem immediately - /// Other fields remain the same - part->remove_time = time(nullptr); - part->is_temp = true; - - /// Nothing to commit or rollback - if (out_transaction) - { - out_transaction->data = this; - out_transaction->parts_to_add_on_rollback = {}; - out_transaction->parts_to_remove_on_rollback = {}; - } - - /// We replaced nothing - return {}; - } - - /// All checks are passed. Now we can rename the part on disk. - /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts - /// - /// Ordinary MergeTree engines (they don't use out_transaction) commit parts immediately, - /// whereas ReplicatedMergeTree uses intermediate PreCommitted state - part->name = part_name; - part->info = part_info; - part->is_temp = false; - part->state = (out_transaction) ? DataPartState::PreCommitted : DataPartState::Committed; - part->renameTo(part_name); - - data_parts_indexes.insert(part); - - replaced_parts.reserve(replaced_iterators.size()); - for (auto it_replacing_part : replaced_iterators) - replaced_parts.emplace_back(*it_replacing_part); - - if (!out_transaction) - { - addPartContributionToColumnSizes(part); - - auto current_time = time(nullptr); - for (auto it_replacing_part : replaced_iterators) - { - (*it_replacing_part)->remove_time = current_time; - modifyPartState(it_replacing_part, DataPartState::Outdated); - removePartContributionToColumnSizes(*it_replacing_part); - } - } - else - { - out_transaction->data = this; - out_transaction->parts_to_add_on_rollback = replaced_parts; - out_transaction->parts_to_remove_on_rollback = {part}; - } + if (part->partition.value != existing_part_in_partition->partition.value) + throw Exception( + "Partition value mismatch between two parts with the same partition ID. Existing part: " + + existing_part_in_partition->name + ", newly added part: " + part->name, + ErrorCodes::CORRUPTED_DATA); } - return replaced_parts; + /** It is important that obtaining new block number and adding that block to parts set is done atomically. + * Otherwise there is race condition - merge of blocks could happen in interval that doesn't yet contain new part. + */ + if (increment) + part_info.min_block = part_info.max_block = increment->get(); + + if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + part_name = part_info.getPartNameV0(part->getMinDate(), part->getMaxDate()); + else + part_name = part_info.getPartName(); + + LOG_TRACE(log, "Renaming temporary part " << part->relative_path << " to " << part_name << "."); + + auto it_duplicate = data_parts_by_info.find(part_info); + if (it_duplicate != data_parts_by_info.end()) + { + String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists"; + + if ((*it_duplicate)->checkState({DataPartState::Outdated, DataPartState::Deleting})) + throw Exception(message + ", but it will be deleted soon", ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + + throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART); + } + + DataPartPtr covering_part; + DataPartsVector covered_parts = getActivePartsToReplace(part_info, covering_part, lock); + + if (covering_part) + { + LOG_WARNING(log, "Tried to add obsolete part " << part_name << " covered by " << covering_part->getNameWithState()); + return {}; + } + + /// All checks are passed. Now we can rename the part on disk. + /// So, we maintain invariant: if a non-temporary part in filesystem then it is in data_parts + /// + /// If out_transaction is null, we commit the part to the active set immediately, else add it to the transaction. + part->name = part_name; + part->info = part_info; + part->is_temp = false; + part->state = DataPartState::PreCommitted; + part->renameTo(part_name); + + auto part_it = data_parts_indexes.insert(part).first; + + if (out_transaction) + { + out_transaction->data = this; + out_transaction->precommitted_parts.insert(part); + } + else + { + auto current_time = time(nullptr); + for (const DataPartPtr & covered_part : covered_parts) + { + covered_part->remove_time = current_time; + modifyPartState(covered_part, DataPartState::Outdated); + removePartContributionToColumnSizes(covered_part); + } + + modifyPartState(part_it, DataPartState::Committed); + addPartContributionToColumnSizes(part); + } + + return covered_parts; } void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout) @@ -1561,7 +1520,7 @@ void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bo for (auto & part : remove) { - if (!data_parts_by_name.count(part->info)) + if (!data_parts_by_info.count(part->info)) throw Exception("Part " + part->getNameWithState() + " not found in data_parts", ErrorCodes::LOGICAL_ERROR); part->assertState({DataPartState::PreCommitted, DataPartState::Committed, DataPartState::Outdated}); @@ -1586,8 +1545,8 @@ void MergeTreeData::renameAndDetachPart(const DataPartPtr & part_to_detach, cons std::lock_guard lock(data_parts_mutex); - auto it_part = data_parts_by_name.find(part_to_detach->info); - if (it_part == data_parts_by_name.end()) + auto it_part = data_parts_by_info.find(part_to_detach->info); + if (it_part == data_parts_by_info.end()) throw Exception("No such data part " + part_to_detach->getNameWithState(), ErrorCodes::NO_SUCH_DATA_PART); /// What if part_to_detach is reference to *it_part? Make a new owner just in case. @@ -1619,16 +1578,16 @@ void MergeTreeData::renameAndDetachPart(const DataPartPtr & part_to_detach, cons return state == DataPartState::Committed || state == DataPartState::Outdated; }; - auto update_error = [&] (DataPartIteratorByAndName it) + auto update_error = [&] (DataPartIteratorByInfo it) { error = true; error_parts += (*it)->getNameWithState() + " "; }; - auto it_middle = data_parts_by_name.lower_bound(part->info); + auto it_middle = data_parts_by_info.lower_bound(part->info); /// Restore the leftmost part covered by the part - if (it_middle != data_parts_by_name.begin()) + if (it_middle != data_parts_by_info.begin()) { auto it = std::prev(it_middle); @@ -1654,7 +1613,7 @@ void MergeTreeData::renameAndDetachPart(const DataPartPtr & part_to_detach, cons error = true; /// Restore "right" parts - for (auto it = it_middle; it != data_parts_by_name.end() && part->contains(**it); ++it) + for (auto it = it_middle; it != data_parts_by_info.end() && part->contains(**it); ++it) { if ((*it)->info.min_block < pos) continue; @@ -1776,7 +1735,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & auto committed_parts_range = getDataPartsStateRange(DataPartState::Committed); /// The part can be covered only by the previous or the next one in data_parts. - auto it = data_parts_by_state_and_name.lower_bound(DataPartStateAndInfo(DataPartState::Committed, part_info)); + auto it = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo(DataPartState::Committed, part_info)); if (it != committed_parts_range.end()) { @@ -1803,8 +1762,8 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_na std::lock_guard lock(data_parts_mutex); - auto it = data_parts_by_name.find(part_info); - if (it == data_parts_by_name.end()) + auto it = data_parts_by_info.find(part_info); + if (it == data_parts_by_info.end()) return nullptr; for (auto state : valid_states) @@ -2067,7 +2026,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context String partition_id = partition.getID(*this); { - std::unique_lock data_parts_lock(data_parts_mutex); + std::lock_guard data_parts_lock(data_parts_mutex); DataPartPtr existing_part_in_partition = getAnyPartInPartition(partition_id, data_parts_lock); if (existing_part_in_partition && existing_part_in_partition->partition.value != partition.value) { @@ -2115,7 +2074,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getAllDataPartsVector(MergeTreeDat DataPartsVector res; { std::lock_guard lock(data_parts_mutex); - res.assign(data_parts_by_name.begin(), data_parts_by_name.end()); + res.assign(data_parts_by_info.begin(), data_parts_by_info.end()); if (out_states != nullptr) { @@ -2153,14 +2112,14 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector() const } MergeTreeData::DataPartPtr MergeTreeData::getAnyPartInPartition( - const String & partition_id, std::unique_lock & /*data_parts_lock*/) + const String & partition_id, std::lock_guard & /*data_parts_lock*/) { auto min_block = std::numeric_limits::min(); MergeTreePartInfo dummy_part_info(partition_id, min_block, min_block, 0); - auto it = data_parts_by_state_and_name.lower_bound(DataPartStateAndInfo(DataPartState::Committed, dummy_part_info)); + auto it = data_parts_by_state_and_info.lower_bound(DataPartStateAndInfo(DataPartState::Committed, dummy_part_info)); - if (it != data_parts_by_state_and_name.end() && (*it)->state == DataPartState::Committed && (*it)->info.partition_id == partition_id) + if (it != data_parts_by_state_and_info.end() && (*it)->state == DataPartState::Committed && (*it)->info.partition_id == partition_id) return *it; return nullptr; @@ -2171,79 +2130,60 @@ void MergeTreeData::Transaction::rollback() if (!isEmpty()) { std::stringstream ss; - if (!parts_to_remove_on_rollback.empty()) - { - ss << " Removing parts:"; - for (const auto & part : parts_to_remove_on_rollback) - ss << " " << part->relative_path; - ss << "."; - } - if (!parts_to_add_on_rollback.empty()) - { - ss << " Adding parts: "; - for (const auto & part : parts_to_add_on_rollback) - ss << " " << part->relative_path; - ss << "."; - } - + ss << " Removing parts:"; + for (const auto & part : precommitted_parts) + ss << " " << part->relative_path; + ss << "."; LOG_DEBUG(data->log, "Undoing transaction." << ss.str()); - /// PreCommitted -> Outdated - replaceParts(DataPartState::Outdated, DataPartState::Committed, true); + data->removePartsFromWorkingSet( + DataPartsVector(precommitted_parts.begin(), precommitted_parts.end()), + /* clear_without_timeout = */ true); } clear(); } -void MergeTreeData::Transaction::commit() +MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit() { + DataPartsVector total_covered_parts; + if (!isEmpty()) { - /// PreCommitted -> Committed, Committed -> Outdated - replaceParts(DataPartState::Committed, DataPartState::Outdated, false); + std::lock_guard data_parts_lock(data->data_parts_mutex); + + auto current_time = time(nullptr); + for (const DataPartPtr & part : precommitted_parts) + { + DataPartPtr covering_part; + DataPartsVector covered_parts = data->getActivePartsToReplace(part->info, covering_part, data_parts_lock); + if (covering_part) + { + LOG_WARNING(data->log, "Tried to commit obsolete part " << part->name + << " covered by " << covering_part->getNameWithState()); + + part->remove_time = 0; /// The part will be removed without waiting for old_parts_lifetime seconds. + data->modifyPartState(part, DataPartState::Outdated); + } + else + { + total_covered_parts.insert(total_covered_parts.end(), covered_parts.begin(), covered_parts.end()); + for (const DataPartPtr & covered_part : covered_parts) + { + covered_part->remove_time = current_time; + data->modifyPartState(covered_part, DataPartState::Outdated); + data->removePartContributionToColumnSizes(covered_part); + } + + data->modifyPartState(part, DataPartState::Committed); + data->addPartContributionToColumnSizes(part); + } + } } clear(); -} -void MergeTreeData::Transaction::replaceParts(MergeTreeData::DataPartState move_precommitted_to, - MergeTreeData::DataPartState move_committed_to, bool remove_without_delay) -{ - auto & committed_parts = parts_to_add_on_rollback; - auto & precommitted_parts = parts_to_remove_on_rollback; - - /// TODO: also make sense to activate CleanupThread's cv - auto remove_time = (remove_without_delay) ? 0 : time(nullptr); - - { - std::lock_guard lock(data->data_parts_mutex); - - for (auto & part : committed_parts) - part->assertState({DataPartState::Committed}); - for (auto & part : precommitted_parts) - part->assertState({DataPartState::PreCommitted}); - - /// If it is rollback then do nothing, else make it Outdated and remove their size contribution - if (move_committed_to != DataPartState::Committed) - { - for (const DataPartPtr & part : committed_parts) - { - data->modifyPartState(part, move_committed_to); - part->remove_time = remove_time; - data->removePartContributionToColumnSizes(part); - } - } - - /// If it is rollback just change state to Outdated, else change state to Committed and add their size contribution - for (auto & part : precommitted_parts) - { - data->modifyPartState(part, move_precommitted_to); - if (move_precommitted_to == DataPartState::Committed) - data->addPartContributionToColumnSizes(part); - else - part->remove_time = remove_time; - } - } + return total_covered_parts; } bool MergeTreeData::isPrimaryKeyColumn(const ASTPtr &node) const diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 87164d687d4..cc9e6ff4d15 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -145,20 +145,24 @@ public: using DataParts = std::set; using DataPartsVector = std::vector; - /// Some operations on the set of parts return a Transaction object. + /// Auxiliary object to add a set of parts into the working set in two steps: + /// * First, as PreCommitted parts (the parts are ready, but not yet in the active set). + /// * Next, if commit() is called, the parts are added to the active set and the parts that are + /// covered by them are marked Outdated. /// If neither commit() nor rollback() was called, the destructor rollbacks the operation. class Transaction : private boost::noncopyable { public: Transaction() {} - void commit(); + /// Return parts marked Obsolete as a result of the transaction commit. + DataPartsVector commit(); void rollback(); bool isEmpty() const { - return parts_to_add_on_rollback.empty() && parts_to_remove_on_rollback.empty(); + return precommitted_parts.empty(); } ~Transaction() @@ -172,23 +176,18 @@ public: tryLogCurrentException("~MergeTreeData::Transaction"); } } + private: friend class MergeTreeData; MergeTreeData * data = nullptr; - - /// What to do on rollback. - DataPartsVector parts_to_remove_on_rollback; - DataPartsVector parts_to_add_on_rollback; + DataParts precommitted_parts; void clear() { data = nullptr; - parts_to_remove_on_rollback.clear(); - parts_to_add_on_rollback.clear(); + precommitted_parts.clear(); } - - void replaceParts(DataPartState move_precommitted_to, DataPartState move_committed_to, bool remove_without_delay); }; /// An object that stores the names of temporary files created in the part directory during ALTER of its @@ -368,14 +367,17 @@ public: /// If until is non-null, wake up from the sleep earlier if the event happened. void delayInsertIfNeeded(Poco::Event * until = nullptr); - /// Renames temporary part to a permanent part and adds it to the working set. - /// If increment != nullptr, part index is determing using increment. Otherwise part index remains unchanged. + /// Renames temporary part to a permanent part and adds it to the parts set. /// It is assumed that the part does not intersect with existing parts. - /// If out_transaction != nullptr, sets it to an object allowing to rollback part addition (but not the renaming). + /// If increment != nullptr, part index is determing using increment. Otherwise part index remains unchanged. + /// If out_transaction != nullptr, adds the part in the PreCommitted state (the part will be added to the + /// active set later with out_transaction->commit()). + /// Else, commits the part immediately. void renameTempPartAndAdd(MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr); - /// The same as renameTempPartAndAdd but the part can intersect existing parts. - /// Deletes and returns all parts covered by the added part (in ascending order). + /// The same as renameTempPartAndAdd but the block range of the part can contain existing parts. + /// Returns all parts covered by the added part (in ascending order). + /// If out_transaction == nullptr, marks covered parts as Outdated. DataPartsVector renameTempPartAndReplace( MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr); @@ -573,8 +575,8 @@ private: /// Work with data parts - struct TagByName{}; - struct TagByStateAndName{}; + struct TagByInfo{}; + struct TagByStateAndInfo{}; static const MergeTreePartInfo & dataPartPtrToInfo(const DataPartPtr & part) { @@ -588,14 +590,14 @@ private: using DataPartsIndexes = boost::multi_index_container, + boost::multi_index::tag, boost::multi_index::global_fun >, - /// Index by (State, Name), is used to obtain ordered slices of parts with the same state + /// Index by (State, Info), is used to obtain ordered slices of parts with the same state boost::multi_index::ordered_unique< - boost::multi_index::tag, + boost::multi_index::tag, boost::multi_index::global_fun, LessStateDataPart > @@ -605,16 +607,16 @@ private: /// Current set of data parts. mutable std::mutex data_parts_mutex; DataPartsIndexes data_parts_indexes; - DataPartsIndexes::index::type & data_parts_by_name; - DataPartsIndexes::index::type & data_parts_by_state_and_name; + DataPartsIndexes::index::type & data_parts_by_info; + DataPartsIndexes::index::type & data_parts_by_state_and_info; - using DataPartIteratorByAndName = DataPartsIndexes::index::type::iterator; - using DataPartIteratorByStateAndName = DataPartsIndexes::index::type::iterator; + using DataPartIteratorByInfo = DataPartsIndexes::index::type::iterator; + using DataPartIteratorByStateAndInfo = DataPartsIndexes::index::type::iterator; - boost::iterator_range getDataPartsStateRange(DataPartState state) const + boost::iterator_range getDataPartsStateRange(DataPartState state) const { - auto begin = data_parts_by_state_and_name.lower_bound(state, LessStateDataPart()); - auto end = data_parts_by_state_and_name.upper_bound(state, LessStateDataPart()); + auto begin = data_parts_by_state_and_info.lower_bound(state, LessStateDataPart()); + auto end = data_parts_by_state_and_info.upper_bound(state, LessStateDataPart()); return {begin, end}; } @@ -623,25 +625,25 @@ private: return [state] (const DataPartPtr & part) { part->state = state; }; } - void modifyPartState(DataPartIteratorByStateAndName it, DataPartState state) + void modifyPartState(DataPartIteratorByStateAndInfo it, DataPartState state) { - if (!data_parts_by_state_and_name.modify(it, getStateModifier(state))) + if (!data_parts_by_state_and_info.modify(it, getStateModifier(state))) throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR); } - void modifyPartState(DataPartIteratorByAndName it, DataPartState state) + void modifyPartState(DataPartIteratorByInfo it, DataPartState state) { - if (!data_parts_by_state_and_name.modify(data_parts_indexes.project(it), getStateModifier(state))) + if (!data_parts_by_state_and_info.modify(data_parts_indexes.project(it), getStateModifier(state))) throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR); } void modifyPartState(const DataPartPtr & part, DataPartState state) { - auto it = data_parts_by_name.find(part->info); - if (it == data_parts_by_name.end() || (*it).get() != part.get()) - throw Exception("Part " + part->name + " is not exists", ErrorCodes::LOGICAL_ERROR); + auto it = data_parts_by_info.find(part->info); + if (it == data_parts_by_info.end() || (*it).get() != part.get()) + throw Exception("Part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); - if (!data_parts_by_state_and_name.modify(data_parts_indexes.project(it), getStateModifier(state))) + if (!data_parts_by_state_and_info.modify(data_parts_indexes.project(it), getStateModifier(state))) throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR); } @@ -672,7 +674,14 @@ private: void removePartContributionToColumnSizes(const DataPartPtr & part); /// If there is no part in the partition with ID `partition_id`, returns empty ptr. Should be called under the lock. - DataPartPtr getAnyPartInPartition(const String & partition_id, std::unique_lock & data_parts_lock); + DataPartPtr getAnyPartInPartition(const String & partition_id, std::lock_guard & data_parts_lock); + + /// Return parts in the Committed set that are covered by the new_part_info or the part that covers it. + /// Will check that the new part doesn't already exist and that it doesn't intersect existing part. + DataPartsVector getActivePartsToReplace( + const MergeTreePartInfo & new_part_info, + DataPartPtr & out_covering_part, + std::lock_guard & data_parts_lock) const; /// Checks whether the column is in the primary key. bool isPrimaryKeyColumn(const ASTPtr &node) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index 4bc660c84f1..fcee1ef0e7f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -29,6 +29,11 @@ struct MergeTreePartInfo < std::forward_as_tuple(rhs.partition_id, rhs.min_block, rhs.max_block, rhs.level); } + bool operator==(const MergeTreePartInfo & rhs) const + { + return !(*this < rhs || rhs < *this); + } + /// Contains another part (obtained after merging another part with some other) bool contains(const MergeTreePartInfo & rhs) const { diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 14cc102f609..7441e52bf61 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2259,13 +2259,13 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin checkPartAndAddToZooKeeper(part, ops, part_name); MergeTreeData::Transaction transaction; - replaced_parts = data.renameTempPartAndReplace(part, nullptr, &transaction); + data.renameTempPartAndReplace(part, nullptr, &transaction); /// Do not commit if the part is obsolete if (!transaction.isEmpty()) { getZooKeeper()->multi(ops); - transaction.commit(); + replaced_parts = transaction.commit(); } /** If a quorum is tracked for this part, you must update it. From 53b5c5f54cc26be4dab164b4e34aa1de0ef728c0 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 19 Feb 2018 19:12:16 +0300 Subject: [PATCH 028/209] add check for intersecting parts when adding new parts --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 15 +++++++++++++-- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + dbms/src/Storages/MergeTree/MergeTreePartInfo.h | 8 ++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index b73eb7759b8..248ddbce53f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1364,6 +1364,7 @@ MergeTreeData::AlterDataPartTransaction::~AlterDataPartTransaction() MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( const MergeTreePartInfo & new_part_info, + const String & new_part_name, DataPartPtr & out_covering_part, std::lock_guard & /* data_parts_lock */) const { @@ -1384,6 +1385,11 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( out_covering_part = *prev; return {}; } + + if (!new_part_info.isDisjoint((*prev)->info)) + throw Exception("Part " + new_part_name + " intersects previous part " + (*prev)->getNameWithState() + + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + break; } @@ -1404,6 +1410,11 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( out_covering_part = *end; return {}; } + + if (!new_part_info.isDisjoint((*end)->info)) + throw Exception("Part " + new_part_name + " intersects next part " + (*end)->getNameWithState() + + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + break; } @@ -1472,7 +1483,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( } DataPartPtr covering_part; - DataPartsVector covered_parts = getActivePartsToReplace(part_info, covering_part, lock); + DataPartsVector covered_parts = getActivePartsToReplace(part_info, part_name, covering_part, lock); if (covering_part) { @@ -2156,7 +2167,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit() for (const DataPartPtr & part : precommitted_parts) { DataPartPtr covering_part; - DataPartsVector covered_parts = data->getActivePartsToReplace(part->info, covering_part, data_parts_lock); + DataPartsVector covered_parts = data->getActivePartsToReplace(part->info, part->name, covering_part, data_parts_lock); if (covering_part) { LOG_WARNING(data->log, "Tried to commit obsolete part " << part->name diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index cc9e6ff4d15..4b0da8ac5f7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -680,6 +680,7 @@ private: /// Will check that the new part doesn't already exist and that it doesn't intersect existing part. DataPartsVector getActivePartsToReplace( const MergeTreePartInfo & new_part_info, + const String & new_part_name, DataPartPtr & out_covering_part, std::lock_guard & data_parts_lock) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index fcee1ef0e7f..c7982909771 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -43,6 +43,14 @@ struct MergeTreePartInfo && level >= rhs.level; } + /// True if parts do not intersect in any way. + bool isDisjoint(const MergeTreePartInfo & rhs) const + { + return partition_id != rhs.partition_id + || min_block > rhs.max_block + || max_block < rhs.min_block; + } + String getPartName() const; String getPartNameV0(DayNum_t left_date, DayNum_t right_date) const; From da87abc422277cfcc2b9bc8d845270a2b5aa7523 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 19 Feb 2018 20:32:37 +0300 Subject: [PATCH 029/209] fix condition for the 'intersects previous part' log message --- dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index 0bc73afc690..fe605ed8cdc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -189,7 +189,7 @@ bool MergeTreeDataMerger::selectPartsToMerge( /// Check for consistency of data parts. If assertion is failed, it requires immediate investigation. if (prev_part && part->info.partition_id == (*prev_part)->info.partition_id - && part->info.min_block < (*prev_part)->info.max_block) + && part->info.min_block <= (*prev_part)->info.max_block) { LOG_ERROR(log, "Part " << part->name << " intersects previous part " << (*prev_part)->name); } From a890473c02ee2609bdd024d41a3360982039e0de Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 06:26:06 +0300 Subject: [PATCH 030/209] Preparation [#CLICKHOUSE-2] --- .../Interpreters/InterpreterSelectQuery.cpp | 310 +++++++++--------- .../src/Interpreters/InterpreterSelectQuery.h | 110 ++++--- 2 files changed, 210 insertions(+), 210 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 78c22b3de82..7ec72bf5bd0 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -69,7 +69,7 @@ namespace ErrorCodes InterpreterSelectQuery::~InterpreterSelectQuery() = default; -void InterpreterSelectQuery::init(const BlockInputStreamPtr & input, const Names & required_column_names) +void InterpreterSelectQuery::init(const Names & required_column_names) { ProfileEvents::increment(ProfileEvents::SelectQuery); @@ -95,15 +95,14 @@ void InterpreterSelectQuery::init(const BlockInputStreamPtr & input, const Names ASTSelectQuery & head_query = static_cast(*head); tail = head_query.next_union_all; - interpreter->next_select_in_union_all = - std::make_unique(head, context, to_stage, subquery_depth); + interpreter->next_select_in_union_all = std::make_unique(head, context, to_stage, subquery_depth); interpreter = interpreter->next_select_in_union_all.get(); } } if (is_first_select_inside_union_all && hasAsterisk()) { - basicInit(input); + basicInit(); // We execute this code here, because otherwise the following kind of query would not work // SELECT X FROM (SELECT * FROM (SELECT 1 AS X, 2 AS Y) UNION ALL SELECT 3, 4) @@ -133,7 +132,7 @@ void InterpreterSelectQuery::init(const BlockInputStreamPtr & input, const Names } } - basicInit(input); + basicInit(); } } @@ -146,7 +145,7 @@ bool InterpreterSelectQuery::hasAggregation(const ASTSelectQuery & query_ptr) return false; } -void InterpreterSelectQuery::basicInit(const BlockInputStreamPtr & input) +void InterpreterSelectQuery::basicInit() { /// Read from prepared input. if (input) @@ -210,9 +209,6 @@ void InterpreterSelectQuery::basicInit(const BlockInputStreamPtr & input) for (const auto & it : query_analyzer->getExternalTables()) if (!context.tryGetExternalTable(it.first)) context.addExternalTable(it.first, it.second); - - if (input) - streams.push_back(input); } void InterpreterSelectQuery::initQueryAnalyzer() @@ -224,16 +220,17 @@ void InterpreterSelectQuery::initQueryAnalyzer() } InterpreterSelectQuery::InterpreterSelectQuery(const ASTPtr & query_ptr_, const Context & context_, QueryProcessingStage::Enum to_stage_, - size_t subquery_depth_, BlockInputStreamPtr input) + size_t subquery_depth_, const BlockInputStreamPtr & input) : query_ptr(query_ptr_) , query(typeid_cast(*query_ptr)) , context(context_) , to_stage(to_stage_) , subquery_depth(subquery_depth_) , is_first_select_inside_union_all(query.isUnionAllHead()) + , input(input) , log(&Logger::get("InterpreterSelectQuery")) { - init(input); + init({}); } InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & query_ptr_, const Context & context_) @@ -250,14 +247,14 @@ InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & qu InterpreterSelectQuery::InterpreterSelectQuery(const ASTPtr & query_ptr_, const Context & context_, const Names & required_column_names_, - QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, BlockInputStreamPtr input) + QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, const BlockInputStreamPtr & input) : InterpreterSelectQuery(query_ptr_, context_, required_column_names_, {}, to_stage_, subquery_depth_, input) { } InterpreterSelectQuery::InterpreterSelectQuery(const ASTPtr & query_ptr_, const Context & context_, const Names & required_column_names_, - const NamesAndTypesList & table_column_names_, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, BlockInputStreamPtr input) + const NamesAndTypesList & table_column_names_, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, const BlockInputStreamPtr & input) : query_ptr(query_ptr_) , query(typeid_cast(*query_ptr)) , context(context_) @@ -265,9 +262,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(const ASTPtr & query_ptr_, const , subquery_depth(subquery_depth_) , table_column_names(table_column_names_) , is_first_select_inside_union_all(query.isUnionAllHead()) + , input(input) , log(&Logger::get("InterpreterSelectQuery")) { - init(input, required_column_names_); + init(required_column_names_); } bool InterpreterSelectQuery::hasAsterisk() const @@ -276,13 +274,9 @@ bool InterpreterSelectQuery::hasAsterisk() const return true; if (is_first_select_inside_union_all) - { for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - { if (p->query.hasAsterisk()) return true; - } - } return false; } @@ -290,10 +284,8 @@ bool InterpreterSelectQuery::hasAsterisk() const void InterpreterSelectQuery::renameColumns() { if (is_first_select_inside_union_all) - { for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) p->query.renameColumns(query); - } } void InterpreterSelectQuery::rewriteExpressionList(const Names & required_column_names) @@ -302,21 +294,15 @@ void InterpreterSelectQuery::rewriteExpressionList(const Names & required_column return; if (is_first_select_inside_union_all) - { for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - { if (p->query.distinct) return; - } - } query.rewriteSelectExpressionList(required_column_names); if (is_first_select_inside_union_all) - { for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) p->query.rewriteSelectExpressionList(required_column_names); - } } void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, String & table_name) @@ -361,12 +347,16 @@ Block InterpreterSelectQuery::getSampleBlock(const ASTPtr & query_ptr_, const Co BlockIO InterpreterSelectQuery::execute() { - (void) executeWithoutUnion(); + Pipeline pipeline; - executeUnion(); + if (input) + pipeline.streams.push_back(input); + + executeWithoutUnionImpl(pipeline); + executeUnion(pipeline); /// Constraints on the result, the quota on the result, and also callback for progress. - if (IProfilingBlockInputStream * stream = dynamic_cast(streams[0].get())) + if (IProfilingBlockInputStream * stream = dynamic_cast(pipeline.firstStream().get())) { /// Constraints apply only to the final result. if (to_stage == QueryProcessingStage::Complete) @@ -385,34 +375,44 @@ BlockIO InterpreterSelectQuery::execute() } BlockIO res; - res.in = streams[0]; + res.in = pipeline.firstStream(); return res; } -const BlockInputStreams & InterpreterSelectQuery::executeWithoutUnion() +BlockInputStreams InterpreterSelectQuery::executeWithoutUnion() +{ + Pipeline pipeline; + + if (input) + pipeline.streams.push_back(input); + + executeWithoutUnionImpl(pipeline); + + return pipeline.streams; +} + +void InterpreterSelectQuery::executeWithoutUnionImpl(Pipeline & pipeline) { if (is_first_select_inside_union_all) { - executeSingleQuery(); + executeSingleQuery(pipeline); for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) { - p->executeSingleQuery(); - const auto & others = p->streams; - streams.insert(streams.end(), others.begin(), others.end()); + Pipeline other_pipeline; + p->executeSingleQuery(other_pipeline); + pipeline.streams.insert(pipeline.streams.end(), other_pipeline.streams.begin(), other_pipeline.streams.end()); } - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream); }); } else - executeSingleQuery(); - - return streams; + executeSingleQuery(pipeline); } -void InterpreterSelectQuery::executeSingleQuery() +void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) { /** Streams of data. When the query is executed in parallel, we have several data streams. * If there is no GROUP BY, then perform all operations before ORDER BY and LIMIT in parallel, then @@ -429,7 +429,7 @@ void InterpreterSelectQuery::executeSingleQuery() union_within_single_query = false; /** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */ - QueryProcessingStage::Enum from_stage = executeFetchColumns(); + QueryProcessingStage::Enum from_stage = executeFetchColumns(pipeline); LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(to_stage)); @@ -480,7 +480,7 @@ void InterpreterSelectQuery::executeSingleQuery() const ASTTableJoin & join = static_cast(*query.join()->table_join); if (join.kind == ASTTableJoin::Kind::Full || join.kind == ASTTableJoin::Kind::Right) - stream_with_non_joined_data = before_join->createStreamWithNonJoinedDataIfFullOrRightJoin(settings.max_block_size); + pipeline.stream_with_non_joined_data = before_join->createStreamWithNonJoinedDataIfFullOrRightJoin(settings.max_block_size); } if (query_analyzer->appendWhere(chain, !first_stage)) @@ -546,18 +546,18 @@ void InterpreterSelectQuery::executeSingleQuery() if (first_stage) { if (has_join) - for (auto & stream : streams) /// Applies to all sources except stream_with_non_joined_data. + for (auto & stream : pipeline.streams) /// Applies to all sources except stream_with_non_joined_data. stream = std::make_shared(stream, before_join); if (has_where) - executeWhere(before_where); + executeWhere(pipeline, before_where); if (need_aggregate) - executeAggregation(before_aggregation, aggregate_overflow_row, aggregate_final); + executeAggregation(pipeline, before_aggregation, aggregate_overflow_row, aggregate_final); else { - executeExpression(before_order_and_select); - executeDistinct(true, selected_columns); + executeExpression(pipeline, before_order_and_select); + executeDistinct(pipeline, true, selected_columns); } /** For distributed query processing, @@ -568,13 +568,13 @@ void InterpreterSelectQuery::executeSingleQuery() if (!second_stage && !need_aggregate && !has_having) { if (has_order_by) - executeOrder(); + executeOrder(pipeline); if (has_order_by && query.limit_length) - executeDistinct(false, selected_columns); + executeDistinct(pipeline, false, selected_columns); if (query.limit_length) - executePreLimit(); + executePreLimit(pipeline); } } @@ -586,24 +586,24 @@ void InterpreterSelectQuery::executeSingleQuery() { /// If you need to combine aggregated results from multiple servers if (!first_stage) - executeMergeAggregated(aggregate_overflow_row, aggregate_final); + executeMergeAggregated(pipeline, aggregate_overflow_row, aggregate_final); if (!aggregate_final) - executeTotalsAndHaving(has_having, before_having, aggregate_overflow_row); + executeTotalsAndHaving(pipeline, has_having, before_having, aggregate_overflow_row); else if (has_having) - executeHaving(before_having); + executeHaving(pipeline, before_having); - executeExpression(before_order_and_select); - executeDistinct(true, selected_columns); + executeExpression(pipeline, before_order_and_select); + executeDistinct(pipeline, true, selected_columns); - need_second_distinct_pass = query.distinct && hasMoreThanOneStream(); + need_second_distinct_pass = query.distinct && pipeline.hasMoreThanOneStream(); } else { - need_second_distinct_pass = query.distinct && hasMoreThanOneStream(); + need_second_distinct_pass = query.distinct && pipeline.hasMoreThanOneStream(); if (query.group_by_with_totals && !aggregate_final) - executeTotalsAndHaving(false, nullptr, aggregate_overflow_row); + executeTotalsAndHaving(pipeline, false, nullptr, aggregate_overflow_row); } if (has_order_by) @@ -613,17 +613,17 @@ void InterpreterSelectQuery::executeSingleQuery() * - therefore, we merge the sorted streams from remote servers. */ if (!first_stage && !need_aggregate && !(query.group_by_with_totals && !aggregate_final)) - executeMergeSorted(); + executeMergeSorted(pipeline); else /// Otherwise, just sort. - executeOrder(); + executeOrder(pipeline); } - executeProjection(final_projection); + executeProjection(pipeline, final_projection); /// At this stage, we can calculate the minimums and maximums, if necessary. if (settings.extremes) { - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) p_stream->enableExtremes(); @@ -633,36 +633,36 @@ void InterpreterSelectQuery::executeSingleQuery() /** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT, * limiting the number of entries in each up to `offset + limit`. */ - if (query.limit_length && hasMoreThanOneStream() && !query.distinct && !query.limit_by_expression_list) - executePreLimit(); + if (query.limit_length && pipeline.hasMoreThanOneStream() && !query.distinct && !query.limit_by_expression_list) + executePreLimit(pipeline); - if (stream_with_non_joined_data || need_second_distinct_pass) + if (pipeline.stream_with_non_joined_data || need_second_distinct_pass) union_within_single_query = true; /// To execute LIMIT BY we should merge all streams together. - if (query.limit_by_expression_list && hasMoreThanOneStream()) + if (query.limit_by_expression_list && pipeline.hasMoreThanOneStream()) union_within_single_query = true; if (union_within_single_query) - executeUnion(); + executeUnion(pipeline); - if (streams.size() == 1) + if (pipeline.streams.size() == 1) { /** If there was more than one stream, * then DISTINCT needs to be performed once again after merging all streams. */ if (need_second_distinct_pass) - executeDistinct(false, Names()); + executeDistinct(pipeline, false, Names()); - executeLimitBy(); - executeLimit(); + executeLimitBy(pipeline); + executeLimit(pipeline); } } } SubqueriesForSets subqueries_for_sets = query_analyzer->getSubqueriesForSets(); if (!subqueries_for_sets.empty()) - executeSubqueriesInSetsAndJoins(subqueries_for_sets); + executeSubqueriesInSetsAndJoins(pipeline, subqueries_for_sets); } @@ -678,7 +678,7 @@ static void getLimitLengthAndOffset(ASTSelectQuery & query, size_t & length, siz } } -QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns() +QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline & pipeline) { /// The subquery interpreter, if the subquery std::optional interpreter_subquery; @@ -827,30 +827,29 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns() } /// If there was no already prepared input. - if (streams.empty()) - streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); + if (pipeline.streams.empty()) + pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); - if (streams.empty()) - streams.emplace_back(std::make_shared(storage->getSampleBlockForColumns(required_columns))); + if (pipeline.streams.empty()) + pipeline.streams.emplace_back(std::make_shared(storage->getSampleBlockForColumns(required_columns))); if (alias_actions) { /// Wrap each stream returned from the table to calculate and add ALIAS columns - transformStreams([&] (auto & stream) + pipeline.transform([&] (auto & stream) { stream = std::make_shared(stream, alias_actions); }); } - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream->addTableLock(table_lock); }); } else { - const auto & subquery_streams = interpreter_subquery->executeWithoutUnion(); - streams.insert(streams.end(), subquery_streams.begin(), subquery_streams.end()); + interpreter_subquery->executeWithoutUnionImpl(pipeline); } /** Set the limits and quota for reading data, the speed and time of the query. @@ -871,7 +870,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns() QuotaForIntervals & quota = context.getQuota(); - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) { @@ -885,18 +884,18 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns() } -void InterpreterSelectQuery::executeWhere(const ExpressionActionsPtr & expression) +void InterpreterSelectQuery::executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expression) { - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expression, query.where_expression->getColumnName()); }); } -void InterpreterSelectQuery::executeAggregation(const ExpressionActionsPtr & expression, bool overflow_row, bool final) +void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final) { - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expression); }); @@ -905,7 +904,7 @@ void InterpreterSelectQuery::executeAggregation(const ExpressionActionsPtr & exp AggregateDescriptions aggregates; query_analyzer->getAggregateInfo(key_names, aggregates); - Block header = streams[0]->getHeader(); + Block header = pipeline.firstStream()->getHeader(); ColumnNumbers keys; for (const auto & name : key_names) keys.push_back(header.getPositionByName(name)); @@ -920,7 +919,7 @@ void InterpreterSelectQuery::executeAggregation(const ExpressionActionsPtr & exp * 1. Parallel aggregation is done, and the results should be merged in parallel. * 2. An aggregation is done with store of temporary data on the disk, and they need to be merged in a memory efficient way. */ - bool allow_to_use_two_level_group_by = streams.size() > 1 || settings.limits.max_bytes_before_external_group_by != 0; + bool allow_to_use_two_level_group_by = pipeline.streams.size() > 1 || settings.limits.max_bytes_before_external_group_by != 0; Aggregator::Params params(header, keys, aggregates, overflow_row, settings.limits.max_rows_to_group_by, settings.limits.group_by_overflow_mode, @@ -931,43 +930,43 @@ void InterpreterSelectQuery::executeAggregation(const ExpressionActionsPtr & exp context.getTemporaryPath()); /// If there are several sources, then we perform parallel aggregation - if (streams.size() > 1) + if (pipeline.streams.size() > 1) { - streams[0] = std::make_shared( - streams, stream_with_non_joined_data, params, final, + pipeline.firstStream() = std::make_shared( + pipeline.streams, pipeline.stream_with_non_joined_data, params, final, max_streams, settings.aggregation_memory_efficient_merge_threads ? static_cast(settings.aggregation_memory_efficient_merge_threads) : static_cast(settings.max_threads)); - stream_with_non_joined_data = nullptr; - streams.resize(1); + pipeline.stream_with_non_joined_data = nullptr; + pipeline.streams.resize(1); } else { BlockInputStreams inputs; - if (!streams.empty()) - inputs.push_back(streams[0]); + if (!pipeline.streams.empty()) + inputs.push_back(pipeline.firstStream()); else - streams.resize(1); + pipeline.streams.resize(1); - if (stream_with_non_joined_data) - inputs.push_back(stream_with_non_joined_data); + if (pipeline.stream_with_non_joined_data) + inputs.push_back(pipeline.stream_with_non_joined_data); - streams[0] = std::make_shared(std::make_shared(inputs), params, final); + pipeline.firstStream() = std::make_shared(std::make_shared(inputs), params, final); - stream_with_non_joined_data = nullptr; + pipeline.stream_with_non_joined_data = nullptr; } } -void InterpreterSelectQuery::executeMergeAggregated(bool overflow_row, bool final) +void InterpreterSelectQuery::executeMergeAggregated(Pipeline & pipeline, bool overflow_row, bool final) { Names key_names; AggregateDescriptions aggregates; query_analyzer->getAggregateInfo(key_names, aggregates); - Block header = streams[0]->getHeader(); + Block header = pipeline.firstStream()->getHeader(); ColumnNumbers keys; for (const auto & name : key_names) @@ -995,48 +994,48 @@ void InterpreterSelectQuery::executeMergeAggregated(bool overflow_row, bool fina if (!settings.distributed_aggregation_memory_efficient) { /// We union several sources into one, parallelizing the work. - executeUnion(); + executeUnion(pipeline); /// Now merge the aggregated blocks - streams[0] = std::make_shared(streams[0], params, final, settings.max_threads); + pipeline.firstStream() = std::make_shared(pipeline.firstStream(), params, final, settings.max_threads); } else { - streams[0] = std::make_shared(streams, params, final, + pipeline.firstStream() = std::make_shared(pipeline.streams, params, final, max_streams, settings.aggregation_memory_efficient_merge_threads ? static_cast(settings.aggregation_memory_efficient_merge_threads) : static_cast(settings.max_threads)); - streams.resize(1); + pipeline.streams.resize(1); } } -void InterpreterSelectQuery::executeHaving(const ExpressionActionsPtr & expression) +void InterpreterSelectQuery::executeHaving(Pipeline & pipeline, const ExpressionActionsPtr & expression) { - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expression, query.having_expression->getColumnName()); }); } -void InterpreterSelectQuery::executeTotalsAndHaving(bool has_having, const ExpressionActionsPtr & expression, bool overflow_row) +void InterpreterSelectQuery::executeTotalsAndHaving(Pipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row) { - executeUnion(); + executeUnion(pipeline); const Settings & settings = context.getSettingsRef(); - streams[0] = std::make_shared( - streams[0], overflow_row, expression, + pipeline.firstStream() = std::make_shared( + pipeline.firstStream(), overflow_row, expression, has_having ? query.having_expression->getColumnName() : "", settings.totals_mode, settings.totals_auto_threshold); } -void InterpreterSelectQuery::executeExpression(const ExpressionActionsPtr & expression) +void InterpreterSelectQuery::executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expression) { - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expression); }); @@ -1078,14 +1077,14 @@ static size_t getLimitForSorting(ASTSelectQuery & query) } -void InterpreterSelectQuery::executeOrder() +void InterpreterSelectQuery::executeOrder(Pipeline & pipeline) { SortDescription order_descr = getSortDescription(query); size_t limit = getLimitForSorting(query); const Settings & settings = context.getSettingsRef(); - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { auto sorting_stream = std::make_shared(stream, order_descr, limit); @@ -1101,16 +1100,16 @@ void InterpreterSelectQuery::executeOrder() }); /// If there are several streams, we merge them into one - executeUnion(); + executeUnion(pipeline); /// Merge the sorted blocks. - streams[0] = std::make_shared( - streams[0], order_descr, settings.max_block_size, limit, + pipeline.firstStream() = std::make_shared( + pipeline.firstStream(), order_descr, settings.max_block_size, limit, settings.limits.max_bytes_before_external_sort, context.getTemporaryPath()); } -void InterpreterSelectQuery::executeMergeSorted() +void InterpreterSelectQuery::executeMergeSorted(Pipeline & pipeline) { SortDescription order_descr = getSortDescription(query); size_t limit = getLimitForSorting(query); @@ -1118,33 +1117,33 @@ void InterpreterSelectQuery::executeMergeSorted() const Settings & settings = context.getSettingsRef(); /// If there are several streams, then we merge them into one - if (hasMoreThanOneStream()) + if (pipeline.hasMoreThanOneStream()) { /** MergingSortedBlockInputStream reads the sources sequentially. * To make the data on the remote servers prepared in parallel, we wrap it in AsynchronousBlockInputStream. */ - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream); }); /// Merge the sorted sources into one sorted source. - streams[0] = std::make_shared(streams, order_descr, settings.max_block_size, limit); - streams.resize(1); + pipeline.firstStream() = std::make_shared(pipeline.streams, order_descr, settings.max_block_size, limit); + pipeline.streams.resize(1); } } -void InterpreterSelectQuery::executeProjection(const ExpressionActionsPtr & expression) +void InterpreterSelectQuery::executeProjection(Pipeline & pipeline, const ExpressionActionsPtr & expression) { - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expression); }); } -void InterpreterSelectQuery::executeDistinct(bool before_order, Names columns) +void InterpreterSelectQuery::executeDistinct(Pipeline & pipeline, bool before_order, Names columns) { if (query.distinct) { @@ -1160,7 +1159,7 @@ void InterpreterSelectQuery::executeDistinct(bool before_order, Names columns) if (!query.order_expression_list || !before_order) limit_for_distinct = limit_length + limit_offset; - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { if (stream->isGroupedOutput()) stream = std::make_shared(stream, settings.limits, limit_for_distinct, columns); @@ -1168,33 +1167,33 @@ void InterpreterSelectQuery::executeDistinct(bool before_order, Names columns) stream = std::make_shared(stream, settings.limits, limit_for_distinct, columns); }); - if (hasMoreThanOneStream()) + if (pipeline.hasMoreThanOneStream()) union_within_single_query = true; } } -void InterpreterSelectQuery::executeUnion() +void InterpreterSelectQuery::executeUnion(Pipeline & pipeline) { /// If there are still several streams, then we combine them into one - if (hasMoreThanOneStream()) + if (pipeline.hasMoreThanOneStream()) { - streams[0] = std::make_shared>(streams, stream_with_non_joined_data, max_streams); - stream_with_non_joined_data = nullptr; - streams.resize(1); + pipeline.firstStream() = std::make_shared>(pipeline.streams, pipeline.stream_with_non_joined_data, max_streams); + pipeline.stream_with_non_joined_data = nullptr; + pipeline.streams.resize(1); union_within_single_query = false; } - else if (stream_with_non_joined_data) + else if (pipeline.stream_with_non_joined_data) { - streams.push_back(stream_with_non_joined_data); - stream_with_non_joined_data = nullptr; + pipeline.streams.push_back(pipeline.stream_with_non_joined_data); + pipeline.stream_with_non_joined_data = nullptr; union_within_single_query = false; } } /// Preliminary LIMIT - is used in every source, if there are several sources, before they are combined. -void InterpreterSelectQuery::executePreLimit() +void InterpreterSelectQuery::executePreLimit(Pipeline & pipeline) { size_t limit_length = 0; size_t limit_offset = 0; @@ -1203,18 +1202,18 @@ void InterpreterSelectQuery::executePreLimit() /// If there is LIMIT if (query.limit_length) { - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit_length + limit_offset, 0, false); }); - if (hasMoreThanOneStream()) + if (pipeline.hasMoreThanOneStream()) union_within_single_query = true; } } -void InterpreterSelectQuery::executeLimitBy() +void InterpreterSelectQuery::executeLimitBy(Pipeline & pipeline) { if (!query.limit_by_value || !query.limit_by_expression_list) return; @@ -1227,7 +1226,7 @@ void InterpreterSelectQuery::executeLimitBy() columns.emplace_back(elem->getAliasOrColumnName()); } - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared( stream, value, columns @@ -1236,7 +1235,7 @@ void InterpreterSelectQuery::executeLimitBy() } -void InterpreterSelectQuery::executeLimit() +void InterpreterSelectQuery::executeLimit(Pipeline & pipeline) { size_t limit_length = 0; size_t limit_offset = 0; @@ -1286,7 +1285,7 @@ void InterpreterSelectQuery::executeLimit() } } - transformStreams([&](auto & stream) + pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit_length, limit_offset, always_read_till_end); }); @@ -1294,31 +1293,16 @@ void InterpreterSelectQuery::executeLimit() } -void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(SubqueriesForSets & subqueries_for_sets) +void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline, SubqueriesForSets & subqueries_for_sets) { const Settings & settings = context.getSettingsRef(); - executeUnion(); - streams[0] = std::make_shared(streams[0], subqueries_for_sets, settings.limits); -} - -template -void InterpreterSelectQuery::transformStreams(Transform && transform) -{ - for (auto & stream : streams) - transform(stream); - - if (stream_with_non_joined_data) - transform(stream_with_non_joined_data); -} - - -bool InterpreterSelectQuery::hasMoreThanOneStream() const -{ - return streams.size() + (stream_with_non_joined_data ? 1 : 0) > 1; + executeUnion(pipeline); + pipeline.firstStream() = std::make_shared(pipeline.firstStream(), subqueries_for_sets, settings.limits); } +/// TODO This is trash. void InterpreterSelectQuery::ignoreWithTotals() { query.group_by_with_totals = false; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index b794907d23d..48a14656151 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -50,7 +50,7 @@ public: const Context & context_, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, size_t subquery_depth_ = 0, - BlockInputStreamPtr input = nullptr); + const BlockInputStreamPtr & input = nullptr); InterpreterSelectQuery( const ASTPtr & query_ptr_, @@ -58,7 +58,7 @@ public: const Names & required_column_names, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, size_t subquery_depth_ = 0, - BlockInputStreamPtr input = nullptr); + const BlockInputStreamPtr & input = nullptr); InterpreterSelectQuery( const ASTPtr & query_ptr_, @@ -67,7 +67,7 @@ public: const NamesAndTypesList & table_column_names_, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, size_t subquery_depth_ = 0, - BlockInputStreamPtr input = nullptr); + const BlockInputStreamPtr & input = nullptr); ~InterpreterSelectQuery(); @@ -78,7 +78,7 @@ public: /** Execute the query without union of threads, if it is possible. */ - const BlockInputStreams & executeWithoutUnion(); + BlockInputStreams executeWithoutUnion(); /// TODO It's confusing that these methods return result structure for the case of QueryProcessingStage::Complete regardless to the actual 'to_stage'. Block getSampleBlock(); @@ -88,23 +88,55 @@ public: const Context & context_); private: - /** - * - Optimization if an object is created only to call getSampleBlock(): consider only the first SELECT of the UNION ALL chain, because - * the first SELECT is sufficient to determine the required columns. - */ + struct Pipeline + { + /** Streams of data. + * The source data streams are produced in the executeFetchColumns function. + * Then they are converted (wrapped in other streams) using the `execute*` functions, + * to get the whole pipeline running the query. + */ + BlockInputStreams streams; + + /** When executing FULL or RIGHT JOIN, there will be a data stream from which you can read "not joined" rows. + * It has a special meaning, since reading from it should be done after reading from the main streams. + * It is joined to the main streams in UnionBlockInputStream or ParallelAggregatingBlockInputStream. + */ + BlockInputStreamPtr stream_with_non_joined_data; + + BlockInputStreamPtr & firstStream() { return streams.at(0); } + + template + void transform(Transform && transform) + { + for (auto & stream : streams) + transform(stream); + + if (stream_with_non_joined_data) + transform(stream_with_non_joined_data); + } + + bool hasMoreThanOneStream() const + { + return streams.size() + (stream_with_non_joined_data ? 1 : 0) > 1; + } + }; + + /** - Optimization if an object is created only to call getSampleBlock(): consider only the first SELECT of the UNION ALL chain, because + * the first SELECT is sufficient to determine the required columns. + */ struct OnlyAnalyzeTag {}; InterpreterSelectQuery( OnlyAnalyzeTag, const ASTPtr & query_ptr_, const Context & context_); - void init(const BlockInputStreamPtr & input, const Names & required_column_names = Names{}); - void basicInit(const BlockInputStreamPtr & input); + void init(const Names & required_column_names); + void basicInit(); void initQueryAnalyzer(); bool hasAggregation(const ASTSelectQuery & query_ptr); /// Execute one SELECT query from the UNION ALL chain. - void executeSingleQuery(); + void executeSingleQuery(Pipeline & pipeline); /** Leave only the necessary columns of the SELECT section in each query of the UNION ALL chain. * However, if you use at least one DISTINCT in the chain, then all the columns are considered necessary, @@ -135,30 +167,24 @@ private: /// Different stages of query execution. /// Fetch data from the table. Returns the stage to which the query was processed in Storage. - QueryProcessingStage::Enum executeFetchColumns(); + QueryProcessingStage::Enum executeFetchColumns(Pipeline & pipeline); - void executeWhere(const ExpressionActionsPtr & expression); - void executeAggregation(const ExpressionActionsPtr & expression, bool overflow_row, bool final); - void executeMergeAggregated(bool overflow_row, bool final); - void executeTotalsAndHaving(bool has_having, const ExpressionActionsPtr & expression, bool overflow_row); - void executeHaving(const ExpressionActionsPtr & expression); - void executeExpression(const ExpressionActionsPtr & expression); - void executeOrder(); - void executeMergeSorted(); - void executePreLimit(); - void executeUnion(); - void executeLimitBy(); - void executeLimit(); - void executeProjection(const ExpressionActionsPtr & expression); - void executeDistinct(bool before_order, Names columns); - void executeSubqueriesInSetsAndJoins(std::unordered_map & subqueries_for_sets); - - template - void transformStreams(Transform && transform); - - bool hasNoData() const; - - bool hasMoreThanOneStream() const; + void executeWithoutUnionImpl(Pipeline & pipeline); + void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expression); + void executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final); + void executeMergeAggregated(Pipeline & pipeline, bool overflow_row, bool final); + void executeTotalsAndHaving(Pipeline & pipeline, bool has_having, const ExpressionActionsPtr & expression, bool overflow_row); + void executeHaving(Pipeline & pipeline, const ExpressionActionsPtr & expression); + void executeExpression(Pipeline & pipeline, const ExpressionActionsPtr & expression); + void executeOrder(Pipeline & pipeline); + void executeMergeSorted(Pipeline & pipeline); + void executePreLimit(Pipeline & pipeline); + void executeUnion(Pipeline & pipeline); + void executeLimitBy(Pipeline & pipeline); + void executeLimit(Pipeline & pipeline); + void executeProjection(Pipeline & pipeline, const ExpressionActionsPtr & expression); + void executeDistinct(Pipeline & pipeline, bool before_order, Names columns); + void executeSubqueriesInSetsAndJoins(Pipeline & pipeline, std::unordered_map & subqueries_for_sets); void ignoreWithTotals(); @@ -181,19 +207,6 @@ private: /// How many streams we ask for storage to produce, and in how many threads we will do further processing. size_t max_streams = 1; - /** Streams of data. - * The source data streams are produced in the executeFetchColumns function. - * Then they are converted (wrapped in other streams) using the `execute*` functions, - * to get the whole pipeline running the query. - */ - BlockInputStreams streams; - - /** When executing FULL or RIGHT JOIN, there will be a data stream from which you can read "not joined" rows. - * It has a special meaning, since reading from it should be done after reading from the main streams. - * It is joined to the main streams in UnionBlockInputStream or ParallelAggregatingBlockInputStream. - */ - BlockInputStreamPtr stream_with_non_joined_data; - /// Is it the first SELECT query of the UNION ALL chain? bool is_first_select_inside_union_all; @@ -207,6 +220,9 @@ private: StoragePtr storage; TableStructureReadLockPtr table_lock; + /// Used when we read from prepared input, not table or subquery. + BlockInputStreamPtr input; + /// Do union of streams within a SELECT query? bool union_within_single_query = false; From 33e42a9001beb767e48cffef32b369fd1a762751 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 06:48:46 +0300 Subject: [PATCH 031/209] Better [#CLICKHOUSE-2] --- dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp | 3 ++- dbms/src/DataStreams/PushingToViewsBlockOutputStream.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 621423044ae..06296b8b34b 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -1,4 +1,5 @@ -#include "PushingToViewsBlockOutputStream.h" +#include +#include #include diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h index 237c4ef73a1..7703b81dc4e 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -4,7 +4,6 @@ #include #include #include -#include #include From 63a9f961983524c4b57d549944aceccc2b4693cb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 06:49:10 +0300 Subject: [PATCH 032/209] Preparation [#CLICKHOUSE-2] --- .../Interpreters/InterpreterSelectQuery.cpp | 92 ++++++++----------- .../src/Interpreters/InterpreterSelectQuery.h | 13 +-- 2 files changed, 42 insertions(+), 63 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 7ec72bf5bd0..f3d8b0201f6 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -109,7 +110,7 @@ void InterpreterSelectQuery::init(const Names & required_column_names) // because the asterisk is replaced with columns only when query_analyzer objects are created in basicInit(). renameColumns(); - if (!required_column_names.empty() && (table_column_names.size() != required_column_names.size())) + if (!required_column_names.empty() && (source_header.columns() != required_column_names.size())) { rewriteExpressionList(required_column_names); /// Now there is obsolete information to execute the query. We update this information. @@ -127,7 +128,7 @@ void InterpreterSelectQuery::init(const Names & required_column_names) { for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) p->query_analyzer = std::make_unique( - p->query_ptr, p->context, p->storage, p->table_column_names, p->subquery_depth, + p->query_ptr, p->context, p->storage, p->source_header.getNamesAndTypesList(), p->subquery_depth, false, p->query_analyzer->getSubqueriesForSets()); } } @@ -150,8 +151,7 @@ void InterpreterSelectQuery::basicInit() /// Read from prepared input. if (input) { - if (table_column_names.empty()) - table_column_names = input->getHeader().getNamesAndTypesList(); + source_header = input->getHeader(); } else { @@ -160,8 +160,7 @@ void InterpreterSelectQuery::basicInit() /// Read from subquery. if (table_expression && typeid_cast(table_expression.get())) { - if (table_column_names.empty()) - table_column_names = InterpreterSelectQuery::getSampleBlock(table_expression, context).getNamesAndTypesList(); + source_header = InterpreterSelectQuery::getSampleBlock(table_expression, context); } else { @@ -186,15 +185,14 @@ void InterpreterSelectQuery::basicInit() } table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); - if (table_column_names.empty()) - table_column_names = storage->getColumnsListNonMaterialized(); + source_header = storage->getSampleBlockNonMaterialized(); } } - if (table_column_names.empty()) + if (!source_header) throw Exception("There are no available columns", ErrorCodes::THERE_IS_NO_COLUMN); - query_analyzer = std::make_unique(query_ptr, context, storage, table_column_names, subquery_depth, !only_analyze); + query_analyzer = std::make_unique(query_ptr, context, storage, source_header.getNamesAndTypesList(), subquery_depth, !only_analyze); if (query.sample_size() && (input || !storage || !storage->supportsSampling())) throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); @@ -213,14 +211,29 @@ void InterpreterSelectQuery::basicInit() void InterpreterSelectQuery::initQueryAnalyzer() { - query_analyzer = std::make_unique(query_ptr, context, storage, table_column_names, subquery_depth, !only_analyze); + query_analyzer = std::make_unique(query_ptr, context, storage, source_header.getNamesAndTypesList(), subquery_depth, !only_analyze); for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - p->query_analyzer = std::make_unique(p->query_ptr, p->context, p->storage, p->table_column_names, p->subquery_depth, !only_analyze); + p->query_analyzer = std::make_unique(p->query_ptr, p->context, p->storage, p->source_header.getNamesAndTypesList(), p->subquery_depth, !only_analyze); } -InterpreterSelectQuery::InterpreterSelectQuery(const ASTPtr & query_ptr_, const Context & context_, QueryProcessingStage::Enum to_stage_, - size_t subquery_depth_, const BlockInputStreamPtr & input) +InterpreterSelectQuery::InterpreterSelectQuery( + const ASTPtr & query_ptr_, + const Context & context_, + QueryProcessingStage::Enum to_stage_, + size_t subquery_depth_, + const BlockInputStreamPtr & input) + : InterpreterSelectQuery(query_ptr_, context_, {}, to_stage_, subquery_depth_, input) +{ +} + +InterpreterSelectQuery::InterpreterSelectQuery( + const ASTPtr & query_ptr_, + const Context & context_, + const Names & required_column_names_, + QueryProcessingStage::Enum to_stage_, + size_t subquery_depth_, + const BlockInputStreamPtr & input) : query_ptr(query_ptr_) , query(typeid_cast(*query_ptr)) , context(context_) @@ -230,9 +243,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(const ASTPtr & query_ptr_, const , input(input) , log(&Logger::get("InterpreterSelectQuery")) { - init({}); + init(required_column_names_); } + InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & query_ptr_, const Context & context_) : query_ptr(query_ptr_) , query(typeid_cast(*query_ptr)) @@ -245,28 +259,6 @@ InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & qu init({}); } -InterpreterSelectQuery::InterpreterSelectQuery(const ASTPtr & query_ptr_, const Context & context_, - const Names & required_column_names_, - QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, const BlockInputStreamPtr & input) - : InterpreterSelectQuery(query_ptr_, context_, required_column_names_, {}, to_stage_, subquery_depth_, input) -{ -} - -InterpreterSelectQuery::InterpreterSelectQuery(const ASTPtr & query_ptr_, const Context & context_, - const Names & required_column_names_, - const NamesAndTypesList & table_column_names_, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, const BlockInputStreamPtr & input) - : query_ptr(query_ptr_) - , query(typeid_cast(*query_ptr)) - , context(context_) - , to_stage(to_stage_) - , subquery_depth(subquery_depth_) - , table_column_names(table_column_names_) - , is_first_select_inside_union_all(query.isUnionAllHead()) - , input(input) - , log(&Logger::get("InterpreterSelectQuery")) -{ - init(required_column_names_); -} bool InterpreterSelectQuery::hasAsterisk() const { @@ -335,7 +327,9 @@ void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, St Block InterpreterSelectQuery::getSampleBlock() { - return query_analyzer->getSelectSampleBlock(); + Pipeline pipeline; + executeWithoutUnionImpl(pipeline, std::make_shared(source_header)); + return pipeline.firstStream()->getHeader(); } @@ -348,11 +342,7 @@ Block InterpreterSelectQuery::getSampleBlock(const ASTPtr & query_ptr_, const Co BlockIO InterpreterSelectQuery::execute() { Pipeline pipeline; - - if (input) - pipeline.streams.push_back(input); - - executeWithoutUnionImpl(pipeline); + executeWithoutUnionImpl(pipeline, input); executeUnion(pipeline); /// Constraints on the result, the quota on the result, and also callback for progress. @@ -382,17 +372,15 @@ BlockIO InterpreterSelectQuery::execute() BlockInputStreams InterpreterSelectQuery::executeWithoutUnion() { Pipeline pipeline; - - if (input) - pipeline.streams.push_back(input); - - executeWithoutUnionImpl(pipeline); - + executeWithoutUnionImpl(pipeline, input); return pipeline.streams; } -void InterpreterSelectQuery::executeWithoutUnionImpl(Pipeline & pipeline) +void InterpreterSelectQuery::executeWithoutUnionImpl(Pipeline & pipeline, const BlockInputStreamPtr & input) { + if (input) + pipeline.streams.push_back(input); + if (is_first_select_inside_union_all) { executeSingleQuery(pipeline); @@ -716,7 +704,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline required_columns_expr_list->children.emplace_back(std::make_shared(StringRange(), column)); } - alias_actions = ExpressionAnalyzer{required_columns_expr_list, context, storage, table_column_names}.getActions(true); + alias_actions = ExpressionAnalyzer{required_columns_expr_list, context, storage, source_header.getNamesAndTypesList()}.getActions(true); /// The set of required columns could be added as a result of adding an action to calculate ALIAS. required_columns = alias_actions->getRequiredColumns(); @@ -849,7 +837,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline } else { - interpreter_subquery->executeWithoutUnionImpl(pipeline); + interpreter_subquery->executeWithoutUnionImpl(pipeline, input); } /** Set the limits and quota for reading data, the speed and time of the query. diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 48a14656151..52dca53ceb8 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -60,15 +60,6 @@ public: size_t subquery_depth_ = 0, const BlockInputStreamPtr & input = nullptr); - InterpreterSelectQuery( - const ASTPtr & query_ptr_, - const Context & context_, - const Names & required_column_names, - const NamesAndTypesList & table_column_names_, - QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, - size_t subquery_depth_ = 0, - const BlockInputStreamPtr & input = nullptr); - ~InterpreterSelectQuery(); /** Execute a query, possibly part of UNION ALL chain. @@ -169,7 +160,7 @@ private: /// Fetch data from the table. Returns the stage to which the query was processed in Storage. QueryProcessingStage::Enum executeFetchColumns(Pipeline & pipeline); - void executeWithoutUnionImpl(Pipeline & pipeline); + void executeWithoutUnionImpl(Pipeline & pipeline, const BlockInputStreamPtr & input); void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expression); void executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final); void executeMergeAggregated(Pipeline & pipeline, bool overflow_row, bool final); @@ -202,7 +193,7 @@ private: QueryProcessingStage::Enum to_stage; size_t subquery_depth; std::unique_ptr query_analyzer; - NamesAndTypesList table_column_names; + Block source_header; /// How many streams we ask for storage to produce, and in how many threads we will do further processing. size_t max_streams = 1; From 19a946d8af7ebe688316ac2b2a0497f33bf0203d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 06:54:55 +0300 Subject: [PATCH 033/209] Preparation [#CLICKHOUSE-2] --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 25 ------------------- dbms/src/Interpreters/ExpressionAnalyzer.h | 3 --- .../src/Interpreters/InterpreterSelectQuery.h | 1 - 3 files changed, 29 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 629260ab8c5..cfcd7989df5 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -2577,31 +2577,6 @@ void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) con } -Block ExpressionAnalyzer::getSelectSampleBlock() -{ - assertSelect(); - - ExpressionActionsPtr temp_actions = std::make_shared(aggregated_columns, settings); - NamesWithAliases result_columns; - - ASTs asts = select_query->select_expression_list->children; - for (size_t i = 0; i < asts.size(); ++i) - { - result_columns.emplace_back(asts[i]->getColumnName(), asts[i]->getAliasOrColumnName()); - getRootActions(asts[i], true, false, temp_actions); - } - - temp_actions->add(ExpressionAction::project(result_columns)); - - Block res = temp_actions->getSampleBlock(); - - for (auto & elem : res) - if (!elem.column) - elem.column = elem.type->createColumn(); - - return res; -} - void ExpressionAnalyzer::getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries) { ASTFunction * node = typeid_cast(ast.get()); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index ccd60b296cb..41e56c691a5 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -135,9 +135,6 @@ public: */ const Tables & getExternalTables() const { return external_tables; } - /// If ast is a SELECT query, it gets the aliases and column types from the SELECT section. - Block getSelectSampleBlock(); - /// Create Set-s that we can from IN section to use the index on them. void makeSetsForIndex(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 52dca53ceb8..14416e5fd46 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -71,7 +71,6 @@ public: */ BlockInputStreams executeWithoutUnion(); - /// TODO It's confusing that these methods return result structure for the case of QueryProcessingStage::Complete regardless to the actual 'to_stage'. Block getSampleBlock(); static Block getSampleBlock( From 76cf9eb40b29d9f1921ac20a7992d5115e1e2f2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 07:14:03 +0300 Subject: [PATCH 034/209] Fixed error [#CLICKHOUSE-2] --- dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 53066a946e9..5325e5d463c 100644 --- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -29,7 +29,7 @@ private: const Block header; QueryProcessingStage::Enum processed_stage; QualifiedTableName main_table; - const Tables & external_tables; + Tables external_tables; }; } From 652dfc8c36b42d19109c7776c3f7740fb465505c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 07:18:08 +0300 Subject: [PATCH 035/209] Fixed error [#CLICKHOUSE-2] --- dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp index 27cdc08b07a..b5095e79118 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseBlockInputStream.cpp @@ -414,7 +414,7 @@ Block MergeTreeBaseBlockInputStream::readFromPart() /// Replace column with condition value from PREWHERE to a constant. if (!task->remove_prewhere_column) - res.getByName(prewhere_column_name).column = DataTypeUInt8().createColumnConst(rows, UInt64(1)); + res.getByName(prewhere_column_name).column = DataTypeUInt8().createColumnConst(rows, UInt64(1))->convertToFullColumnIfConst(); } if (res) From 09ac108f4c9637680a058a651935bd959b07888d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 07:38:26 +0300 Subject: [PATCH 036/209] Fixed error [#CLICKHOUSE-2] --- .../src/DataStreams/NativeBlockInputStream.cpp | 2 +- dbms/src/Storages/StorageStripeLog.cpp | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 1ec7c902065..787ba974546 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -52,7 +52,7 @@ NativeBlockInputStream::NativeBlockInputStream(ReadBuffer & istr_, UInt64 server for (const auto & column : index_block_it->columns) { auto type = DataTypeFactory::instance().get(column.type); - header.insert({ type->createColumn(), type, column.name }); + header.insert(ColumnWithTypeAndName{ type, column.name }); } } diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp index 570dd3f9ea4..5f746dc246c 100644 --- a/dbms/src/Storages/StorageStripeLog.cpp +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -55,21 +55,20 @@ public: : storage(storage_), max_read_buffer_size(max_read_buffer_size_), index(index_), index_begin(index_begin_), index_end(index_end_) { + if (index_begin != index_end) + { + for (const auto & column : index_begin->columns) + { + auto type = DataTypeFactory::instance().get(column.type); + header.insert(ColumnWithTypeAndName{ type, column.name }); + } + } } String getName() const override { return "StripeLog"; } Block getHeader() const override { - if (index_begin == index_end) - return {}; - - Block header; - for (const auto & column : index_begin->columns) - { - auto type = DataTypeFactory::instance().get(column.type); - header.insert({ type->createColumn(), type, column.name }); - } return header; }; @@ -102,6 +101,7 @@ private: std::shared_ptr index; IndexForNativeFormat::Blocks::const_iterator index_begin; IndexForNativeFormat::Blocks::const_iterator index_end; + Block header; /** optional - to create objects only on first reading * and delete objects (release buffers) after the source is exhausted From aad0c62e671cdd92949a5d6e7931ee8b585b2ee3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 08:11:53 +0300 Subject: [PATCH 037/209] Fixed error [#CLICKHOUSE-2] --- .../MergeTree/MergeTreeBlockInputStream.cpp | 23 ++++++++++++++++--- .../MergeTree/MergeTreeBlockInputStream.h | 1 + dbms/src/Storages/MergeTree/MergeTreeData.cpp | 7 ++++-- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp index dd0335c81a0..13fa4100a70 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp @@ -60,14 +60,31 @@ MergeTreeBlockInputStream::MergeTreeBlockInputStream( << " rows starting from " << all_mark_ranges.front().begin * storage.index_granularity); setTotalRowsApprox(total_rows); + + header = storage.getSampleBlockForColumns(ordered_names); + + /// Types may be different during ALTER (when this stream is used to perform an ALTER). + /// NOTE: We may use similar code to implement non blocking ALTERs. + for (const auto & name_type : data_part->columns) + { + if (header.has(name_type.name)) + { + auto & elem = header.getByName(name_type.name); + if (!elem.type->equals(*name_type.type)) + { + elem.type = name_type.type; + elem.column = elem.type->createColumn(); + } + } + } + + injectVirtualColumns(header); } Block MergeTreeBlockInputStream::getHeader() const { - Block res = storage.getSampleBlockForColumns(ordered_names); - injectVirtualColumns(res); - return res; + return header; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.h index 9739cfd49fc..13bd55ba7eb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.h @@ -48,6 +48,7 @@ protected: bool getNewTask() override; private: + Block header; /// Used by Task Names ordered_names; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index a13542e3b5f..6e114af737a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1058,14 +1058,17 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name if (part && !out_rename_map.empty()) { WriteBufferFromOwnString out; - out << "Will rename "; + out << "Will "; bool first = true; for (const auto & from_to : out_rename_map) { if (!first) out << ", "; first = false; - out << from_to.first << " to " << from_to.second; + if (from_to.second.empty()) + out << "remove " << from_to.first; + else + out << "rename " << from_to.first << " to " << from_to.second; } out << " in part " << part->name; LOG_DEBUG(log, out.str()); From 4e3a0f0ac647bc53e64852d7a1113e403b77657e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 09:25:21 +0300 Subject: [PATCH 038/209] Fixed error [#CLICKHOUSE-2] --- dbms/src/Interpreters/Aggregator.cpp | 3 +- .../Interpreters/InterpreterSelectQuery.cpp | 79 +++++++++++-------- 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 7bc3f7df644..c0a998a3081 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -130,7 +131,7 @@ Block Aggregator::getHeader(bool final) const } } - return res; + return materializeBlock(res); } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index f3d8b0201f6..04647e09ccb 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -64,6 +64,7 @@ namespace ErrorCodes extern const int ILLEGAL_FINAL; extern const int ILLEGAL_PREWHERE; extern const int TOO_MUCH_COLUMNS; + extern const int LOGICAL_ERROR; } @@ -329,7 +330,8 @@ Block InterpreterSelectQuery::getSampleBlock() { Pipeline pipeline; executeWithoutUnionImpl(pipeline, std::make_shared(source_header)); - return pipeline.firstStream()->getHeader(); + auto res = pipeline.firstStream()->getHeader(); + return res; } @@ -785,9 +787,20 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline query_analyzer->makeSetsForIndex(); - /// Initialize the initial data streams to which the query transforms are superimposed. Table or subquery? - if (!interpreter_subquery) + /// Initialize the initial data streams to which the query transforms are superimposed. Table or subquery or prepared input? + if (!pipeline.streams.empty()) { + /// Prepared input. + } + else if (interpreter_subquery) + { + /// Subquery. + interpreter_subquery->executeWithoutUnionImpl(pipeline, {}); + } + else if (storage) + { + /// Table. + if (max_streams == 0) throw Exception("Logical error: zero number of streams requested", ErrorCodes::LOGICAL_ERROR); @@ -834,39 +847,37 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline { stream->addTableLock(table_lock); }); + + /** Set the limits and quota for reading data, the speed and time of the query. + * Such restrictions are checked on the initiating server of the request, and not on remote servers. + * Because the initiating server has a summary of the execution of the request on all servers. + */ + if (to_stage == QueryProcessingStage::Complete) + { + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; + limits.max_rows_to_read = settings.limits.max_rows_to_read; + limits.max_bytes_to_read = settings.limits.max_bytes_to_read; + limits.read_overflow_mode = settings.limits.read_overflow_mode; + limits.max_execution_time = settings.limits.max_execution_time; + limits.timeout_overflow_mode = settings.limits.timeout_overflow_mode; + limits.min_execution_speed = settings.limits.min_execution_speed; + limits.timeout_before_checking_execution_speed = settings.limits.timeout_before_checking_execution_speed; + + QuotaForIntervals & quota = context.getQuota(); + + pipeline.transform([&](auto & stream) + { + if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) + { + p_stream->setLimits(limits); + p_stream->setQuota(quota); + } + }); + } } else - { - interpreter_subquery->executeWithoutUnionImpl(pipeline, input); - } - - /** Set the limits and quota for reading data, the speed and time of the query. - * Such restrictions are checked on the initiating server of the request, and not on remote servers. - * Because the initiating server has a summary of the execution of the request on all servers. - */ - if (storage && to_stage == QueryProcessingStage::Complete) - { - IProfilingBlockInputStream::LocalLimits limits; - limits.mode = IProfilingBlockInputStream::LIMITS_TOTAL; - limits.max_rows_to_read = settings.limits.max_rows_to_read; - limits.max_bytes_to_read = settings.limits.max_bytes_to_read; - limits.read_overflow_mode = settings.limits.read_overflow_mode; - limits.max_execution_time = settings.limits.max_execution_time; - limits.timeout_overflow_mode = settings.limits.timeout_overflow_mode; - limits.min_execution_speed = settings.limits.min_execution_speed; - limits.timeout_before_checking_execution_speed = settings.limits.timeout_before_checking_execution_speed; - - QuotaForIntervals & quota = context.getQuota(); - - pipeline.transform([&](auto & stream) - { - if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) - { - p_stream->setLimits(limits); - p_stream->setQuota(quota); - } - }); - } + throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR); return from_stage; } From 1a026e6131e508c8416ebfde2e9c817a765689bc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 09:47:32 +0300 Subject: [PATCH 039/209] Fixed error [#CLICKHOUSE-2] --- dbms/src/DataStreams/CastTypeBlockInputStream.cpp | 4 +++- dbms/src/Storages/StorageMerge.cpp | 7 ++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/dbms/src/DataStreams/CastTypeBlockInputStream.cpp b/dbms/src/DataStreams/CastTypeBlockInputStream.cpp index d19e9764793..5028799d41d 100644 --- a/dbms/src/DataStreams/CastTypeBlockInputStream.cpp +++ b/dbms/src/DataStreams/CastTypeBlockInputStream.cpp @@ -20,9 +20,11 @@ CastTypeBlockInputStream::CastTypeBlockInputStream( { const auto & elem = input_header.getByPosition(col_num); - /// Skip, if it is a problem, it will be detected on the next pipeline stage if (!reference_definition.has(elem.name)) + { + header.insert(elem); continue; + } const auto & ref_column = reference_definition.getByName(elem.name); diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 39be47eab9b..13d82db6fc3 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -137,7 +137,6 @@ BlockInputStreams StorageMerge::read( const unsigned num_streams) { BlockInputStreams res; - Block header = getSampleBlockForColumns(column_names); Names virt_column_names, real_column_names; for (const auto & it : column_names) @@ -180,6 +179,8 @@ BlockInputStreams StorageMerge::read( Context modified_context = context; modified_context.getSettingsRef().optimize_move_to_prewhere = false; + Block header = getSampleBlockForColumns(real_column_names); + size_t tables_count = selected_tables.size(); size_t curr_table_number = 0; for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it, ++curr_table_number) @@ -262,14 +263,10 @@ BlockInputStreams StorageMerge::read( stream->addTableLock(table_lock); for (auto & virtual_column : virt_column_names) - { if (virtual_column == "_table") - { for (auto & stream : source_streams) stream = std::make_shared>( stream, std::make_shared(), table->getTableName(), "_table"); - } - } res.insert(res.end(), source_streams.begin(), source_streams.end()); } From b1fc5eed45122be4d168d315eceb46336b1a6d91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 10:34:40 +0300 Subject: [PATCH 040/209] Fixed error [#CLICKHOUSE-2] --- ...ergingAggregatedMemoryEfficientBlockInputStream.cpp | 10 +++++----- .../MergingAggregatedMemoryEfficientBlockInputStream.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 56e816cb05a..3b38dae42a9 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -219,10 +219,10 @@ Block MergingAggregatedMemoryEfficientBlockInputStream::readImpl() parallel_merge_data->merged_blocks_changed.wait(lock, [this] { - return parallel_merge_data->finish /// Requested to finish early. - || parallel_merge_data->exception /// An error in merging thread. - || parallel_merge_data->exhausted /// No more data in sources. - || !parallel_merge_data->merged_blocks.empty(); /// Have another merged block. + return parallel_merge_data->finish /// Requested to finish early. + || parallel_merge_data->exception /// An error in merging thread. + || parallel_merge_data->exhausted /// No more data in sources. + || !parallel_merge_data->merged_blocks.empty(); /// Have another merged block. }); if (parallel_merge_data->exception) @@ -493,7 +493,7 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate while (true) { - if (current_bucket_num == NUM_BUCKETS) + if (current_bucket_num >= NUM_BUCKETS) { /// All ordinary data was processed. Maybe, there are also 'overflows'-blocks. // std::cerr << "at end\n"; diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h index f7b978e9c66..69af976c52c 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h @@ -84,7 +84,7 @@ protected: Block readImpl() override; private: - static constexpr size_t NUM_BUCKETS = 256; + static constexpr int NUM_BUCKETS = 256; Aggregator aggregator; bool final; From 72aac966b174f431058507b5fedac5e1c1d0bfef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 11:16:01 +0300 Subject: [PATCH 041/209] Fixed error with order of columns in FULL JOIN that may lead to segfault [#CLICKHOUSE-2] --- dbms/src/Interpreters/ExpressionActions.cpp | 12 ++---------- dbms/src/Interpreters/ExpressionActions.h | 2 +- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 11 +++++++---- dbms/src/Interpreters/Join.cpp | 4 ++-- dbms/src/Interpreters/Join.h | 2 +- 5 files changed, 13 insertions(+), 18 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index d34cbe1d5b1..d8bcb2936fd 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -992,19 +992,11 @@ void ExpressionActions::optimizeArrayJoin() } -BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRightJoin(size_t max_block_size) const +BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRightJoin(const Block & source_header, size_t max_block_size) const { for (const auto & action : actions) - { if (action.join && (action.join->getKind() == ASTTableJoin::Kind::Full || action.join->getKind() == ASTTableJoin::Kind::Right)) - { - Block left_sample_block; - for (const auto & input_elem : input_columns) - left_sample_block.insert(ColumnWithTypeAndName{ input_elem.type, input_elem.name }); - - return action.join->createStreamWithNonJoinedRows(left_sample_block, max_block_size); - } - } + return action.join->createStreamWithNonJoinedRows(source_header, max_block_size); return {}; } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index ac855add69d..f29e53a1d7e 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -195,7 +195,7 @@ public: static std::string getSmallestColumn(const NamesAndTypesList & columns); - BlockInputStreamPtr createStreamWithNonJoinedDataIfFullOrRightJoin(size_t max_block_size) const; + BlockInputStreamPtr createStreamWithNonJoinedDataIfFullOrRightJoin(const Block & source_header, size_t max_block_size) const; private: NamesAndTypesList input_columns; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 04647e09ccb..da60ef7ada4 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -467,10 +467,6 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) has_join = true; before_join = chain.getLastActions(); chain.addStep(); - - const ASTTableJoin & join = static_cast(*query.join()->table_join); - if (join.kind == ASTTableJoin::Kind::Full || join.kind == ASTTableJoin::Kind::Right) - pipeline.stream_with_non_joined_data = before_join->createStreamWithNonJoinedDataIfFullOrRightJoin(settings.max_block_size); } if (query_analyzer->appendWhere(chain, !first_stage)) @@ -536,8 +532,15 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) if (first_stage) { if (has_join) + { + const ASTTableJoin & join = static_cast(*query.join()->table_join); + if (join.kind == ASTTableJoin::Kind::Full || join.kind == ASTTableJoin::Kind::Right) + pipeline.stream_with_non_joined_data = before_join->createStreamWithNonJoinedDataIfFullOrRightJoin( + pipeline.firstStream()->getHeader(), settings.max_block_size); + for (auto & stream : pipeline.streams) /// Applies to all sources except stream_with_non_joined_data. stream = std::make_shared(stream, before_join); + } if (has_where) executeWhere(pipeline, before_where); diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index d5b7104d8d1..18e1e056a03 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -992,7 +992,7 @@ public: size_t num_columns_left = left_sample_block.columns() - num_keys; size_t num_columns_right = parent.sample_block_with_columns_to_add.columns(); - result_sample_block = left_sample_block; + result_sample_block = materializeBlock(left_sample_block); /// Add columns from the right-side table to the block. for (size_t i = 0; i < num_columns_right; ++i) @@ -1156,7 +1156,7 @@ private: }; -BlockInputStreamPtr Join::createStreamWithNonJoinedRows(Block & left_sample_block, size_t max_block_size) const +BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, size_t max_block_size) const { return std::make_shared(*this, left_sample_block, max_block_size); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 5e7ad9728c1..20df3829af9 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -253,7 +253,7 @@ public: * Use only after all calls to joinBlock was done. * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside). */ - BlockInputStreamPtr createStreamWithNonJoinedRows(Block & left_sample_block, size_t max_block_size) const; + BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, size_t max_block_size) const; /// Number of keys in all built JOIN maps. size_t getTotalRowCount() const; From 8e91a5e1f1176dc66bb237e9b6d8e1716c4224c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 11:19:17 +0300 Subject: [PATCH 042/209] Added test [#CLICKHOUSE-2] --- .../0_stateless/00577_full_join_segfault.reference | 8 ++++++++ .../queries/0_stateless/00577_full_join_segfault.sql | 7 +++++++ 2 files changed, 15 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00577_full_join_segfault.reference create mode 100644 dbms/tests/queries/0_stateless/00577_full_join_segfault.sql diff --git a/dbms/tests/queries/0_stateless/00577_full_join_segfault.reference b/dbms/tests/queries/0_stateless/00577_full_join_segfault.reference new file mode 100644 index 00000000000..e91c16f7902 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00577_full_join_segfault.reference @@ -0,0 +1,8 @@ +0 hello 123 0 +1 0 hello 456 +0 hello 123 +1 0 +0 hello 123 \N \N +1 \N \N hello 456 +0 hello 123 +1 \N \N diff --git a/dbms/tests/queries/0_stateless/00577_full_join_segfault.sql b/dbms/tests/queries/0_stateless/00577_full_join_segfault.sql new file mode 100644 index 00000000000..a53c9ffe4eb --- /dev/null +++ b/dbms/tests/queries/0_stateless/00577_full_join_segfault.sql @@ -0,0 +1,7 @@ +SELECT k, a1, b1, a2, b2 FROM (SELECT 0 AS k, 'hello' AS a1, 123 AS b1, a1) ANY FULL OUTER JOIN (SELECT 1 AS k, 'hello' AS a2, 456 AS b2, a2) USING (k) ORDER BY k; +SELECT k, a, b FROM (SELECT 0 AS k, 'hello' AS a, 123 AS b, a) ANY FULL OUTER JOIN (SELECT 1 AS k) USING (k) ORDER BY k; + +SET join_use_nulls = 1; + +SELECT k, a1, b1, a2, b2 FROM (SELECT 0 AS k, 'hello' AS a1, 123 AS b1, a1) ANY FULL OUTER JOIN (SELECT 1 AS k, 'hello' AS a2, 456 AS b2, a2) USING (k) ORDER BY k; +SELECT k, a, b FROM (SELECT 0 AS k, 'hello' AS a, 123 AS b, a) ANY FULL OUTER JOIN (SELECT 1 AS k) USING (k) ORDER BY k; From fef53b1963dcd35eaf21d075117441b1c312b48d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 11:34:38 +0300 Subject: [PATCH 043/209] Tiny modification [#CLICKHOUSE-2] --- dbms/src/DataStreams/IProfilingBlockInputStream.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index 7a76e424109..ec8eb407f9c 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB From d73e9b67ef3fd238adc6a3a025defedf2cd85953 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 21 Feb 2018 13:03:55 +0300 Subject: [PATCH 044/209] Editing of russian sources including based on english sources changes. --- docs/ru/agg_functions/reference.md | 4 ++-- docs/ru/dicts/external_dicts_dict_structure.md | 10 +++++----- docs/ru/functions/higher_order_functions.md | 16 ++++++++++++++++ docs/ru/table_engines/dictionary.md | 18 +++++++++++------- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/docs/ru/agg_functions/reference.md b/docs/ru/agg_functions/reference.md index a6fee523c09..b31d4b5496b 100644 --- a/docs/ru/agg_functions/reference.md +++ b/docs/ru/agg_functions/reference.md @@ -256,8 +256,8 @@ GROUP BY timeslot ## quantileTimingWeighted(level)(x, weight) -Отличается от функции quantileTiming наличием второго аргумента - «веса». Вес - неотрицательное целое число. -Результат считается так же, как если бы в функцию quantileTiming значение x было передано weight количество раз. +Отличается от функции `quantileTiming` наличием второго аргумента - «веса». Вес - неотрицательное целое число. +Результат считается так же, как если бы в функцию `quantileTiming` значение `x` было передано `weight` количество раз. ## quantileExact(level)(x) diff --git a/docs/ru/dicts/external_dicts_dict_structure.md b/docs/ru/dicts/external_dicts_dict_structure.md index 99792b8c465..b032003ec8c 100644 --- a/docs/ru/dicts/external_dicts_dict_structure.md +++ b/docs/ru/dicts/external_dicts_dict_structure.md @@ -25,8 +25,8 @@ В структуре описываются столбцы: -- `` -[ключевой столбец](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure-key). -- `` -[столбец данных](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure-attributes). Столбцов может быть много. +- `` - [ключевой столбец](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure-key). +- `` - [столбец данных](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure-attributes). Столбцов может быть много. @@ -66,9 +66,7 @@ ClickHouse поддерживает следующие виды ключей: Ключем может быть кортеж (`tuple`) из полей произвольных типов. [layout](external_dicts_dict_layout.md#dicts-external_dicts_dict_layout) в этом случае должен быть `complex_key_hashed` или `complex_key_cache`.
- -Cоставной ключ может состоять и из одного элемента, что даёт возможность использовать в качестве ключа, например, строку. - +Cоставной ключ может состоять из одного элемента. Это даёт возможность использовать в качестве ключа, например, строку.
Структура ключа задаётся в элементе ``. Поля ключа задаются в том же формате, что и [атрибуты](external_dicts_dict_structure.md#dicts-external_dicts_dict_structure-attributes) словаря. Пример: @@ -107,6 +105,7 @@ Cоставной ключ может состоять и из одного эл rand64() true true + true
``` @@ -119,4 +118,5 @@ Cоставной ключ может состоять и из одного эл - `expression` - Атрибут может быть выражением. Тег не обязательный. - `hierarchical` - Поддержка иерархии. Отображение в идентификатор родителя. По умолчанию, `false`. - `injective` - Признак инъективности отображения `id -> attribute`. Если `true`, то можно оптимизировать `GROUP BY`. По умолчанию, `false`. +- `is_object_id` - Признак того, что запрос выполняется к документу MongoDB по `ObjectID`. diff --git a/docs/ru/functions/higher_order_functions.md b/docs/ru/functions/higher_order_functions.md index 5e42a726698..bc48e9dc663 100644 --- a/docs/ru/functions/higher_order_functions.md +++ b/docs/ru/functions/higher_order_functions.md @@ -62,3 +62,19 @@ SELECT ### arrayFirstIndex(func, arr1, ...) Вернуть индекс первого элемента массива arr1, для которого функция func возвращает не 0. + +### arrayCumSum(\[func,\] arr1, ...) + +Возвращает совокупную сумму элементов массива, полученную при применении функции `func` к каждому элементу массива `arr`. + +Пример: + +```sql +SELECT arrayCumSum([1, 1, 1, 1]) AS res +``` + +```text +┌─res──────────┐ +│ [1, 2, 3, 4] │ +└──────────────┘ +``` diff --git a/docs/ru/table_engines/dictionary.md b/docs/ru/table_engines/dictionary.md index dd9f3216667..255bb9aaa24 100644 --- a/docs/ru/table_engines/dictionary.md +++ b/docs/ru/table_engines/dictionary.md @@ -4,7 +4,7 @@ Движок `Dictionary` отображает данные словаря как таблицу ClickHouse. -К примеру у нас есть словарь `products` со следующим конфигом: +Рассмотрим для примера словарь `products` со следующей конфигурацией: ```xml @@ -37,7 +37,7 @@ ``` -Проверяем: +Запрос данных словаря: ```sql select name, type, key, attribute.names, attribute.types, bytes_allocated, element_count,source from system.dictionaries where name = 'products'; @@ -60,14 +60,18 @@ WHERE name = 'products' └──────────┴──────┴────────┴─────────────────┴─────────────────┴─────────────────┴───────────────┴─────────────────┘ ``` -В таком виде данные из словаря можно смотреть/использовать при помощи функций [dictGet*](../functions/ext_dict_functions.md#ext_dict_functions). -Это бывает не всегда угодно, когда требуется выполнять JOIN операции или просто получить данные находящиеся сейчас в словаре. +В таком виде данные из словаря можно получить при помощи функций [dictGet*](../functions/ext_dict_functions.md#ext_dict_functions). -Мы можем использовать джок таблицы `Dictionary` для отображения данных словаря в таблицу. +Такое представление неудобно, когда нам необходимо получить данные в чистом виде, а также при выполнении операции `JOIN`. Для этих случаев можно использовать движок `Dictionary`, который отобразит данные словаря в таблицу. -Синтаксис: `CREATE TABLE %table_name% (%fields%) engine = Dictionary(%dictionary_name%)` +Синтаксис: -Попробуем: +``` +CREATE TABLE %table_name% (%fields%) engine = Dictionary(%dictionary_name%)` +``` + + +Пример использования: ```sql create table products (product_id UInt64, title String) Engine = Dictionary(products); From 9d348846d6898488af0a625423bb2aaef16f35a7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 21 Feb 2018 16:57:58 +0300 Subject: [PATCH 045/209] fix row position with max version for vertical merge of ReplacingMergeTree #1914 --- dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp | 4 +++- dbms/src/DataStreams/ReplacingSortedBlockInputStream.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp index 687d5ae19f4..96ac8a98355 100644 --- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp @@ -17,7 +17,7 @@ void ReplacingSortedBlockInputStream::insertRow(MutableColumns & merged_columns, if (out_row_sources_buf) { /// true flag value means "skip row" - current_row_sources.back().setSkipFlag(false); + current_row_sources[max_pos].setSkipFlag(false); out_row_sources_buf->write(reinterpret_cast(current_row_sources.data()), current_row_sources.size() * sizeof(RowSourcePart)); @@ -96,6 +96,7 @@ void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, std } /// Initially, skip all rows. Unskip last on insert. + size_t current_pos = current_row_sources.size(); if (out_row_sources_buf) current_row_sources.emplace_back(current.impl->order, true); @@ -103,6 +104,7 @@ void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, std if (version >= max_version) { max_version = version; + max_pos = current_pos; setRowRef(selected_row, current); } diff --git a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h index 0ab6b185833..34d903f49f3 100644 --- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h @@ -43,6 +43,7 @@ private: RowRef selected_row; /// Last row with maximum version for current primary key. UInt64 max_version = 0; /// Max version for current primary key. + size_t max_pos; PODArray current_row_sources; /// Sources of rows with the current primary key From da9b8dbd2ca2e312af3bbaee83b6c03fae70b1cc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 21 Feb 2018 17:13:29 +0300 Subject: [PATCH 046/209] added test for ReplacingMergeTree vertical merge #1914 --- .../00577_replacing_merge_tree_vertical_merge.reference | 4 ++++ .../00577_replacing_merge_tree_vertical_merge.sql | 8 ++++++++ 2 files changed, 12 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference create mode 100644 dbms/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql diff --git a/dbms/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference b/dbms/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference new file mode 100644 index 00000000000..e0f8c3bae3f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference @@ -0,0 +1,4 @@ +2018-01-01 0 0 +2018-01-01 1 1 +2018-01-01 2 2 +2018-01-01 2 2 diff --git a/dbms/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql b/dbms/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql new file mode 100644 index 00000000000..25b53d9b169 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql @@ -0,0 +1,8 @@ +drop table if exists test.tab; +create table test.tab (date Date, version UInt64, val UInt64) engine = ReplacingMergeTree(version) partition by date order by date settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0; +insert into test.tab values ('2018-01-01', 2, 2), ('2018-01-01', 1, 1); +insert into test.tab values ('2018-01-01', 0, 0); +select * from test.tab order by version; +OPTIMIZE TABLE test.tab; +select * from test.tab; + From e3422ea52e9581fb041903723e0eeb127d2fb795 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 21 Feb 2018 17:43:10 +0300 Subject: [PATCH 047/209] MergeTree without primary key [#CLICKHOUSE-3462] --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 7 +-- .../MergeTree/registerStorageMergeTree.cpp | 4 ++ ..._merge_trees_without_primary_key.reference | 14 +++++ .../00578_merge_trees_without_primary_key.sql | 56 +++++++++++++++++++ 4 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00578_merge_trees_without_primary_key.reference create mode 100644 dbms/tests/queries/0_stateless/00578_merge_trees_without_primary_key.sql diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f8f08028257..fc921f894b2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -113,12 +113,12 @@ MergeTreeData::MergeTreeData( { merging_params.check(columns); - if (!primary_expr_ast) /// TODO Allow tables without primary key. + if (!primary_expr_ast) throw Exception("Primary key cannot be empty", ErrorCodes::BAD_ARGUMENTS); initPrimaryKey(); - if (sampling_expression && (!primary_expr_ast || !primary_key_sample.has(sampling_expression->getColumnName())) + if (sampling_expression && (!primary_key_sample.has(sampling_expression->getColumnName())) && !attach && !settings.compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); @@ -191,9 +191,6 @@ static void checkForAllowedKeyColumns(const ColumnWithTypeAndName & element, con void MergeTreeData::initPrimaryKey() { - if (!primary_expr_ast) - return; - auto addSortDescription = [](SortDescription & descr, const ASTPtr & expr_ast) { descr.reserve(descr.size() + expr_ast->children.size()); diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 2c1f6ddf270..89787fca4f0 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -586,6 +586,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->order_by) primary_expr_list = extractKeyExpressionList(*args.storage_def->order_by); + else + throw Exception("You must provide an ORDER BY expression in the table definition. " + "If you don't want this table to be sorted, use ORDER BY tuple()", + ErrorCodes::BAD_ARGUMENTS); if (args.storage_def->sample_by) sampling_expression = args.storage_def->sample_by->ptr(); diff --git a/dbms/tests/queries/0_stateless/00578_merge_trees_without_primary_key.reference b/dbms/tests/queries/0_stateless/00578_merge_trees_without_primary_key.reference new file mode 100644 index 00000000000..ccff1dac5cd --- /dev/null +++ b/dbms/tests/queries/0_stateless/00578_merge_trees_without_primary_key.reference @@ -0,0 +1,14 @@ +*** MergeTree *** +1 a +5 b +2 c +4 d +3 e +*** ReplacingMergeTree *** +1 a 5 +--- +1 a 5 +*** CollapsingMergeTree *** +3 c 1 +--- +3 c 1 diff --git a/dbms/tests/queries/0_stateless/00578_merge_trees_without_primary_key.sql b/dbms/tests/queries/0_stateless/00578_merge_trees_without_primary_key.sql new file mode 100644 index 00000000000..efd881ae006 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00578_merge_trees_without_primary_key.sql @@ -0,0 +1,56 @@ +SELECT '*** MergeTree ***'; + +DROP TABLE IF EXISTS test.unsorted; +CREATE TABLE test.unsorted (x UInt32, y String) ENGINE MergeTree ORDER BY tuple(); + +INSERT INTO test.unsorted VALUES (1, 'a'), (5, 'b'); +INSERT INTO test.unsorted VALUES (2, 'c'), (4, 'd'); +INSERT INTO test.unsorted VALUES (3, 'e'); + +OPTIMIZE TABLE test.unsorted PARTITION tuple() FINAL; + +SELECT * FROM test.unsorted; + +DROP TABLE test.unsorted; + + +SELECT '*** ReplacingMergeTree ***'; + +DROP TABLE IF EXISTS test.unsorted_replacing; + +CREATE TABLE test.unsorted_replacing (x UInt32, s String, v UInt32) ENGINE ReplacingMergeTree(v) ORDER BY tuple(); + +INSERT INTO test.unsorted_replacing VALUES (1, 'a', 5), (5, 'b', 4); +INSERT INTO test.unsorted_replacing VALUES (2, 'c', 3), (4, 'd', 2); +INSERT INTO test.unsorted_replacing VALUES (3, 'e', 1); + +SELECT * FROM test.unsorted_replacing FINAL; + +SELECT '---'; + +OPTIMIZE TABLE test.unsorted_replacing PARTITION tuple() FINAL; + +SELECT * FROM test.unsorted_replacing; + +DROP TABLE test.unsorted_replacing; + + +SELECT '*** CollapsingMergeTree ***'; + +DROP TABLE IF EXISTS test.unsorted_collapsing; + +CREATE TABLE test.unsorted_collapsing (x UInt32, s String, sign Int8) ENGINE CollapsingMergeTree(sign) ORDER BY tuple(); + +INSERT INTO test.unsorted_collapsing VALUES (1, 'a', 1); +INSERT INTO test.unsorted_collapsing VALUES (1, 'a', -1), (2, 'b', 1); +INSERT INTO test.unsorted_collapsing VALUES (2, 'b', -1), (3, 'c', 1); + +SELECT * FROM test.unsorted_collapsing FINAL; + +SELECT '---'; + +OPTIMIZE TABLE test.unsorted_collapsing PARTITION tuple() FINAL; + +SELECT * FROM test.unsorted_collapsing; + +DROP TABLE test.unsorted_collapsing; From 6fdf82e452e76739c4006bd46857b41570611d21 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 21 Feb 2018 18:43:24 +0300 Subject: [PATCH 048/209] allow the primary key and the partition key to use the same expression [#CLICKHOUSE-3455] --- .../Storages/MergeTree/MergeTreeDataWriter.cpp | 4 +++- ...rimary_keys_using_same_expression.reference | 6 ++++++ ..._and_primary_keys_using_same_expression.sql | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00579_merge_tree_partition_and_primary_keys_using_same_expression.reference create mode 100644 dbms/tests/queries/0_stateless/00579_merge_tree_partition_and_primary_keys_using_same_expression.sql diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 60ed5650ead..9449255ffe3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -103,7 +103,9 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block if (partitions_count == 1) { /// A typical case is when there is one partition (you do not need to split anything). - result.emplace_back(std::move(block_copy), get_partition(0)); + /// NOTE: returning a copy of the original block so that calculated partition key columns + /// do not interfere with possible calculated primary key columns of the same name. + result.emplace_back(Block(block), get_partition(0)); return result; } diff --git a/dbms/tests/queries/0_stateless/00579_merge_tree_partition_and_primary_keys_using_same_expression.reference b/dbms/tests/queries/0_stateless/00579_merge_tree_partition_and_primary_keys_using_same_expression.reference new file mode 100644 index 00000000000..74af60e7bb1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00579_merge_tree_partition_and_primary_keys_using_same_expression.reference @@ -0,0 +1,6 @@ +2018-02-19 12:00:00 +2018-02-20 12:00:00 +2018-02-21 12:00:00 +--- +2018-02-19 12:00:00 +2018-02-21 12:00:00 diff --git a/dbms/tests/queries/0_stateless/00579_merge_tree_partition_and_primary_keys_using_same_expression.sql b/dbms/tests/queries/0_stateless/00579_merge_tree_partition_and_primary_keys_using_same_expression.sql new file mode 100644 index 00000000000..f897de5a645 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00579_merge_tree_partition_and_primary_keys_using_same_expression.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test.partition_and_primary_keys_using_same_expression; + +CREATE TABLE test.partition_and_primary_keys_using_same_expression(dt DateTime) + ENGINE MergeTree PARTITION BY toDate(dt) ORDER BY toDayOfWeek(toDate(dt)); + +INSERT INTO test.partition_and_primary_keys_using_same_expression + VALUES ('2018-02-19 12:00:00'); +INSERT INTO test.partition_and_primary_keys_using_same_expression + VALUES ('2018-02-20 12:00:00'), ('2018-02-21 12:00:00'); + +SELECT * FROM test.partition_and_primary_keys_using_same_expression ORDER BY dt; + +SELECT '---'; + +ALTER TABLE test.partition_and_primary_keys_using_same_expression DROP PARTITION '2018-02-20'; +SELECT * FROM test.partition_and_primary_keys_using_same_expression ORDER BY dt; + +DROP TABLE test.partition_and_primary_keys_using_same_expression; From 47becc0b92572b876fc78653dbf71db5cc404bc3 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 21 Feb 2018 20:06:29 +0300 Subject: [PATCH 049/209] Try to reduce false positive of sanity check. More detail dignostics. [#CLICKHOUSE-3571] --- .../Storages/MergeTree/MergeTreePartInfo.h | 4 ++ .../Storages/StorageReplicatedMergeTree.cpp | 71 +++++++++++++++---- .../integration/test_random_inserts/test.sh | 6 +- 3 files changed, 65 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h index 4bc660c84f1..a5be3010f95 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/dbms/src/Storages/MergeTree/MergeTreePartInfo.h @@ -40,6 +40,10 @@ struct MergeTreePartInfo String getPartName() const; String getPartNameV0(DayNum_t left_date, DayNum_t right_date) const; + UInt64 getBlocksCount() const + { + return static_cast(max_block - min_block + 1); + } static MergeTreePartInfo fromPartName(const String & part_name, MergeTreeDataFormatVersion format_version); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 14cc102f609..42023e82e4e 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -741,6 +741,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) /// Which local parts to added into ZK. MergeTreeData::DataPartsVector parts_to_add; + UInt64 parts_to_add_rows = 0; /// Which parts should be taken from other replicas. Strings parts_to_fetch; @@ -756,6 +757,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) { parts_to_add.push_back(containing); unexpected_parts.erase(containing); + parts_to_add_rows += containing->rows_count; } } else @@ -768,21 +770,53 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) for (const String & name : parts_to_fetch) expected_parts.erase(name); + /** To check the adequacy, for the parts that are in the FS, but not in ZK, we will only consider not the most recent parts. * Because unexpected new parts usually arise only because they did not have time to enroll in ZK with a rough restart of the server. * It also occurs from deduplicated parts that did not have time to retire. */ size_t unexpected_parts_nonnew = 0; + UInt64 unexpected_parts_nonnew_rows = 0; + UInt64 unexpected_parts_rows = 0; for (const auto & part : unexpected_parts) + { if (part->info.level > 0) + { ++unexpected_parts_nonnew; + unexpected_parts_nonnew_rows += part->rows_count; + } - String sanity_report = "There are " - + toString(unexpected_parts.size()) + " unexpected parts (" - + toString(unexpected_parts_nonnew) + " of them is not just-written), " - + toString(parts_to_add.size()) + " unexpectedly merged parts, " - + toString(expected_parts.size()) + " missing obsolete parts, " - + toString(parts_to_fetch.size()) + " missing parts"; + unexpected_parts_rows += part->rows_count; + } + + + /// Additional helpful statistics + auto get_blocks_count_in_data_part = [&] (const String & part_name) -> UInt64 + { + MergeTreePartInfo part_info; + if (MergeTreePartInfo::tryParsePartName(part_name, &part_info, data.format_version)) + return part_info.getBlocksCount(); + + LOG_ERROR(log, "Unexpected part name: " << part_name); + return 0; + }; + + UInt64 parts_to_fetch_blocks = 0; + for (const String & name : parts_to_fetch) + parts_to_fetch_blocks += get_blocks_count_in_data_part(name); + + UInt64 expected_parts_blocks = 0; + for (const String & name : expected_parts) + expected_parts_blocks += get_blocks_count_in_data_part(name); + + + std::stringstream sanity_report; + sanity_report << "There are " + << unexpected_parts.size() << " unexpected parts with " << unexpected_parts_rows << " rows (" + << unexpected_parts_nonnew << " of them is not just-written with " << unexpected_parts_rows << " rows), " + << parts_to_add.size() << " unexpectedly merged parts with " << parts_to_add_rows << " rows, " + << expected_parts.size() << " missing obsolete parts (with " << expected_parts_blocks << " blocks), " + << parts_to_fetch.size() << " missing parts (with " << parts_to_fetch_blocks << " blocks)."; /** We can automatically synchronize data, * if the ratio of the total number of errors to the total number of parts (minimum - on the local filesystem or in ZK) @@ -793,17 +827,28 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) * In this case, the protection mechanism does not allow the server to start. */ - size_t min_parts_local_or_expected = std::min(expected_parts_vec.size(), parts.size()); - size_t total_difference = parts_to_add.size() + unexpected_parts_nonnew + parts_to_fetch.size(); + UInt64 total_rows_on_filesystem = 0; + for (const auto & part : parts) + total_rows_on_filesystem += part->rows_count; - bool insane = total_difference > min_parts_local_or_expected * data.settings.replicated_max_ratio_of_wrong_parts; + UInt64 total_suspicious_rows = parts_to_add_rows + unexpected_parts_rows; + UInt64 total_suspicious_rows_no_new = parts_to_add_rows + unexpected_parts_nonnew_rows; + + bool insane = total_suspicious_rows > total_rows_on_filesystem * data.settings.replicated_max_ratio_of_wrong_parts; if (insane && !skip_sanity_checks) - throw Exception("The local set of parts of table " + getTableName() + " doesn't look like the set of parts in ZooKeeper. " - + sanity_report, ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS); + { + std::stringstream why; + why << "The local set of parts of table " << database_name << "." << table_name << " doesn't look like the set of parts " + << "in ZooKeeper: " + << formatReadableQuantity(total_suspicious_rows) << " rows of " << formatReadableQuantity(total_rows_on_filesystem) + << " total rows in filesystem are suspicious."; - if (total_difference > 0) - LOG_WARNING(log, sanity_report); + throw Exception(why.str() + " " + sanity_report.str(), ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS); + } + + if (total_suspicious_rows_no_new > 0) + LOG_WARNING(log, sanity_report.str()); /// Add information to the ZK about the parts that cover the missing parts. for (const MergeTreeData::DataPartPtr & part : parts_to_add) diff --git a/dbms/tests/integration/test_random_inserts/test.sh b/dbms/tests/integration/test_random_inserts/test.sh index 76f1b05e61e..9022f307d56 100755 --- a/dbms/tests/integration/test_random_inserts/test.sh +++ b/dbms/tests/integration/test_random_inserts/test.sh @@ -2,9 +2,9 @@ #set -e [[ -n "$1" ]] && host="$1" || host="localhost" -[[ -n "$2" ]] && min_timestamp="$2" || min_timestamp=$(( $(date +%s) - 10 )) -[[ -n "$3" ]] && max_timestamp="$3" || max_timestamp=$(( $(date +%s) + 10 )) -[[ -n "$4" ]] && iters_per_timestamp="$4" || iters_per_timestamp=1 +[[ -n "$2" ]] && min_timestamp="$2" || min_timestamp=$(( $(date +%s) - 60 )) +[[ -n "$3" ]] && max_timestamp="$3" || max_timestamp=$(( $(date +%s) + 60 )) +[[ -n "$4" ]] && iters_per_timestamp="$4" || iters_per_timestamp=5 timestamps=`seq $min_timestamp $max_timestamp` From fbe4066c1566f11713fc0500505ead0a8ca84d1a Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 21 Feb 2018 00:03:38 +0300 Subject: [PATCH 050/209] Add support of any partition key. [#CLICKHOUSE-3606] --- dbms/src/Server/ClusterCopier.cpp | 414 ++++++++++++------ dbms/src/Server/ClusterCopier.h | 5 +- .../test_cluster_copier/task0_description.xml | 2 +- .../task_month_to_week_description.xml | 89 ++++ .../integration/test_cluster_copier/test.py | 118 +++-- 5 files changed, 456 insertions(+), 172 deletions(-) create mode 100644 dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml diff --git a/dbms/src/Server/ClusterCopier.cpp b/dbms/src/Server/ClusterCopier.cpp index 2bccec2cef7..ed06710c19e 100644 --- a/dbms/src/Server/ClusterCopier.cpp +++ b/dbms/src/Server/ClusterCopier.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -88,8 +89,20 @@ static ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_da namespace { + using DatabaseAndTableName = std::pair; +String getDatabaseDotTable(const String & database, const String & table) +{ + return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table); +} + +String getDatabaseDotTable(const DatabaseAndTableName & db_and_table) +{ + return getDatabaseDotTable(db_and_table.first, db_and_table.second); +} + + enum class TaskState { @@ -138,35 +151,24 @@ struct TaskStateWithOwner /// Hierarchical description of the tasks -struct TaskPartition; +struct ShardPartition; struct TaskShard; struct TaskTable; struct TaskCluster; struct ClusterPartition; -using TasksPartition = std::map; +using TasksPartition = std::map; using ShardInfo = Cluster::ShardInfo; using TaskShardPtr = std::shared_ptr; using TasksShard = std::vector; using TasksTable = std::list; using ClusterPartitions = std::map; -/// Contains all cluster shards (sorted by neighborhood) containig a partition -struct ClusterPartition + +/// Just destination partition of a shard +struct ShardPartition { - TasksShard shards; /// having that partition - - Stopwatch watch; - UInt64 bytes_copied = 0; - UInt64 rows_copied = 0; - - size_t total_tries = 0; -}; - - -struct TaskPartition -{ - TaskPartition(TaskShard & parent, const String & name_) : task_shard(parent), name(name_) {} + ShardPartition(TaskShard & parent, const String & name_quoted_) : task_shard(parent), name(name_quoted_) {} String getPartitionPath() const; String getCommonPartitionIsDirtyPath() const; @@ -204,11 +206,39 @@ struct TaskShard UInt32 numberInCluster() const { return info.shard_num; } UInt32 indexInCluster() const { return info.shard_num - 1; } - TasksPartition partitions; + String getDescription() const; + /// Used to sort clusters by thier proximity ShardPriority priority; + + /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard + ColumnWithTypeAndName partition_key_column; + + /// There is a task for each destination partition + TasksPartition partition_tasks; + + /// Last CREATE TABLE query of the table of the shard + ASTPtr current_pull_table_create_query; + + /// Internal distributed tables + DatabaseAndTableName table_read_shard; + DatabaseAndTableName table_split_shard; }; + +/// Contains all cluster shards that contain a partition (and sorted by the proximity) +struct ClusterPartition +{ + TasksShard shards; /// having that partition + + Stopwatch watch; + UInt64 bytes_copied = 0; + UInt64 rows_copied = 0; + + size_t total_tries = 0; +}; + + struct TaskTable { TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, @@ -235,6 +265,7 @@ struct TaskTable /// Storage of destination table String engine_push_str; ASTPtr engine_push_ast; + ASTPtr engine_push_partition_key_ast; /// Local Distributed table used to split data DatabaseAndTableName table_split; @@ -252,7 +283,8 @@ struct TaskTable /// Filter partitions that should be copied bool has_enabled_partitions = false; - NameSet enabled_partitions; + Strings enabled_partitions; + NameSet enabled_partitions_set; /// Prioritized list of shards TasksShard all_shards; @@ -277,6 +309,7 @@ struct TaskTable void initShards(RandomEngine && random_engine); }; + struct TaskCluster { TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_) @@ -284,6 +317,7 @@ struct TaskCluster void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); + /// Set (or update) settings and max_workers param void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); /// Base node for all tasks. Its structure: @@ -315,17 +349,6 @@ struct TaskCluster }; -String getDatabaseDotTable(const String & database, const String & table) -{ - return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table); -} - -String getDatabaseDotTable(const DatabaseAndTableName & db_and_table) -{ - return getDatabaseDotTable(db_and_table.first, db_and_table.second); -} - - /// Atomically checks that is_dirty node is not exists, and made the remaining op /// Returns relative number of failed operation in the second field (the passed op has 0 index) static void checkNoNodeAndCommit( @@ -381,42 +404,43 @@ Block getBlockWithAllStreamData(const BlockInputStreamPtr & stream) return squashStreamIntoOneBlock(stream)->read(); } -// Path getters + +/// Path getters String TaskTable::getPartitionPath(const String & partition_name) const { - return task_cluster.task_zookeeper_path // root - + "/tables/" + table_id // tables/dst_cluster.merge.hits - + "/" + partition_name; // 201701 + return task_cluster.task_zookeeper_path // root + + "/tables/" + table_id // tables/dst_cluster.merge.hits + + "/" + escapeForFileName(partition_name); // 201701 } -String TaskPartition::getPartitionPath() const +String ShardPartition::getPartitionPath() const { return task_shard.task_table.getPartitionPath(name); } -String TaskPartition::getShardStatusPath() const +String ShardPartition::getShardStatusPath() const { // /root/table_test.hits/201701/1 return getPartitionPath() + "/shards/" + toString(task_shard.numberInCluster()); } -String TaskPartition::getPartitionShardsPath() const +String ShardPartition::getPartitionShardsPath() const { return getPartitionPath() + "/shards"; } -String TaskPartition::getPartitionActiveWorkersPath() const +String ShardPartition::getPartitionActiveWorkersPath() const { return getPartitionPath() + "/partition_active_workers"; } -String TaskPartition::getActiveWorkerPath() const +String ShardPartition::getActiveWorkerPath() const { return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster()); } -String TaskPartition::getCommonPartitionIsDirtyPath() const +String ShardPartition::getCommonPartitionIsDirtyPath() const { return getPartitionPath() + "/is_dirty"; } @@ -426,6 +450,58 @@ String TaskTable::getPartitionIsDirtyPath(const String & partition_name) const return getPartitionPath(partition_name) + "/is_dirty"; } +String DB::TaskShard::getDescription() const +{ + return "№" + toString(numberInCluster()) + + " of pull table " + getDatabaseDotTable(task_table.table_pull) + + " of cluster " + task_table.cluster_pull_name; +} + + + +static bool isExtedndedDefinitionStorage(const ASTPtr & storage_ast) +{ + const ASTStorage & storage = typeid_cast(*storage_ast); + return storage.partition_by || storage.order_by || storage.sample_by; +} + +static ASTPtr extractPartitionKey(const ASTPtr & storage_ast) +{ + String storage_str = queryToString(storage_ast); + + const ASTStorage & storage = typeid_cast(*storage_ast); + const ASTFunction & engine = typeid_cast(*storage.engine); + + if (!endsWith(engine.name, "MergeTree")) + { + throw Exception("Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported", + ErrorCodes::BAD_ARGUMENTS); + } + + ASTPtr arguments_ast = engine.arguments->clone(); + ASTs & arguments = typeid_cast(*arguments_ast).children; + + if (isExtedndedDefinitionStorage(storage_ast)) + { + if (storage.partition_by) + return storage.partition_by->clone(); + + static const char * all = "all"; + return std::make_shared(StringRange(all, all + strlen(all)), Field(all, strlen(all))); + } + else + { + bool is_replicated = startsWith(engine.name, "Replicated"); + size_t min_args = is_replicated ? 3 : 1; + + if (arguments.size() < min_args) + throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS); + + ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1]; + return makeASTFunction("toYYYYMM", month_arg->clone()); + } +} + TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix_, const String & table_key) @@ -453,6 +529,7 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati { ParserStorage parser_storage; engine_push_ast = parseQuery(parser_storage, engine_push_str); + engine_push_partition_key_ast = extractPartitionKey(engine_push_ast); } sharding_key_str = config.getString(table_prefix + "sharding_key"); @@ -482,13 +559,12 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati Strings keys; config.keys(enabled_partitions_prefix, keys); - Strings partitions; if (keys.empty()) { /// Parse list of partition from space-separated string String partitions_str = config.getString(table_prefix + "enabled_partitions"); boost::trim_if(partitions_str, isWhitespaceASCII); - boost::split(partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on); + boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on); } else { @@ -498,13 +574,12 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati if (!startsWith(key, "partition")) throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key)); + enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key)); } } - std::copy(partitions.begin(), partitions.end(), std::inserter(enabled_partitions, enabled_partitions.begin())); + std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin())); } - } @@ -670,9 +745,11 @@ public: LOG_DEBUG(log, "Loaded " << task_cluster->table_tasks.size() << " table tasks"); - /// Compute set of partitions, set of partitions aren't changed + /// Compute set of partitions, assume set of partitions aren't changed during the processing for (auto & task_table : task_cluster->table_tasks) { + LOG_DEBUG(log, "Set up table task " << task_table.table_id); + for (const TaskShardPtr & task_shard : task_table.all_shards) { if (task_shard->info.pool == nullptr) @@ -681,36 +758,72 @@ public: ErrorCodes::LOGICAL_ERROR); } - LOG_DEBUG(log, "Set up table task " << task_table.table_id << " (pull from " - << "cluster " << task_table.cluster_pull_name - << ", table " << getDatabaseDotTable(task_table.table_pull) - << ", shard " << task_shard->info.shard_num << ")"); + LOG_DEBUG(log, "Set up shard " << task_shard->getDescription()); + LOG_DEBUG(log, "There are " << task_table.all_shards.size() << " shards, " << task_table.local_shards.size() << " of them are local ones"); - LOG_DEBUG(log, "There are " - << task_table.all_shards.size() << " shards, " - << task_table.local_shards.size() << " of them are remote ones"); + auto existing_partitions_names = getShardPartitions(*task_shard); + Strings filtered_partitions_names; - auto connection_entry = task_shard->info.pool->get(&task_cluster->settings_pull); - LOG_DEBUG(log, "Will get meta information for shard " << task_shard->numberInCluster() - << " from replica " << connection_entry->getDescription()); - - Strings partitions = getRemotePartitions(task_table.table_pull, *connection_entry, &task_cluster->settings_pull); - for (const String & partition_name : partitions) + /// Check that user specified correct partition names + auto check_partition_format = [&] (const String & partition_text_quoted) { - /// Do not process partition if it is not in enabled_partitions list - if (task_table.has_enabled_partitions && !task_table.enabled_partitions.count(partition_name)) + const DataTypePtr & type = task_shard->partition_key_column.type; + MutableColumnPtr column_dummy = type->createColumn(); + ReadBufferFromString rb(partition_text_quoted); + + try { - LOG_DEBUG(log, "Will skip partition " << partition_name); - continue; + type->deserializeTextQuoted(*column_dummy, rb); + } + catch (Exception & e) + { + throw Exception("Partition " + partition_text_quoted + " has incorrect format. " + e.displayText(), ErrorCodes::BAD_ARGUMENTS); + } + }; + + if (task_table.has_enabled_partitions) + { + /// Process partition in order specified by + for (const String & partition_name : task_table.enabled_partitions) + { + check_partition_format(partition_name); + auto it = existing_partitions_names.find(partition_name); + + /// Do not process partition if it is not in enabled_partitions list + if (it == existing_partitions_names.end()) + { + LOG_WARNING(log, "There is no enabled " << partition_name << " specified in enabled_partitions in shard " + << task_shard->getDescription()); + continue; + } + + filtered_partitions_names.emplace_back(*it); } - task_shard->partitions.emplace(partition_name, TaskPartition(*task_shard, partition_name)); + for (const String & partition_name : existing_partitions_names) + { + if (!task_table.enabled_partitions_set.count(partition_name)) + { + LOG_DEBUG(log, "Partition " << partition_name << " will not be processed, since it is not in " + << "enabled_partitions of " << task_table.table_id); + } + } + } + else + { + for (const String & partition_name : existing_partitions_names) + filtered_partitions_names.emplace_back(partition_name); + } + + for (const String & partition_name : filtered_partitions_names) + { + task_shard->partition_tasks.emplace(partition_name, ShardPartition(*task_shard, partition_name)); ClusterPartition & cluster_partition = task_table.cluster_partitions[partition_name]; cluster_partition.shards.emplace_back(task_shard); } - LOG_DEBUG(log, "Will fetch " << task_shard->partitions.size() << " partitions"); + LOG_DEBUG(log, "Will copy " << task_shard->partition_tasks.size() << " partitions from shard " << task_shard->getDescription()); } } @@ -786,11 +899,11 @@ public: /// NOTE: shards are sorted by "distance" to current host for (const TaskShardPtr & shard : shards_with_partition) { - auto it_shard_partition = shard->partitions.find(partition_name); - if (it_shard_partition == shard->partitions.end()) + auto it_shard_partition = shard->partition_tasks.find(partition_name); + if (it_shard_partition == shard->partition_tasks.end()) throw Exception("There are no such partition in a shard. This is a bug.", ErrorCodes::LOGICAL_ERROR); - TaskPartition & task_shard_partition = it_shard_partition->second; + ShardPartition & task_shard_partition = it_shard_partition->second; if (processPartitionTask(task_shard_partition)) ++num_successful_shards; } @@ -905,7 +1018,7 @@ public: Strings status_paths; for (auto & shard : shards_with_partition) { - TaskPartition & task_shard_partition = shard->partitions.find(partition_name)->second; + ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second; status_paths.emplace_back(task_shard_partition.getShardStatusPath()); } @@ -998,6 +1111,7 @@ protected: } } + /// Removes MATERIALIZED and ALIAS columns from create table query static ASTPtr removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast) { const ASTs & column_asts = typeid_cast(*query_ast).columns->children; @@ -1025,6 +1139,7 @@ protected: return new_query_ast; } + /// Replaces ENGINE and table name in a create query std::shared_ptr rewriteCreateQueryStorage(const ASTPtr & create_query_ast, const DatabaseAndTableName & new_table, const ASTPtr & new_storage_ast) { ASTCreateQuery & create = typeid_cast(*create_query_ast); @@ -1043,7 +1158,7 @@ protected: return res; } - bool tryDropPartition(TaskPartition & task_partition, const zkutil::ZooKeeperPtr & zookeeper) + bool tryDropPartition(ShardPartition & task_partition, const zkutil::ZooKeeperPtr & zookeeper) { if (is_safe_mode) throw Exception("DROP PARTITION is prohibited in safe mode", ErrorCodes::NOT_IMPLEMENTED); @@ -1117,7 +1232,7 @@ protected: } - bool processPartitionTask(TaskPartition & task_partition) + bool processPartitionTask(ShardPartition & task_partition) { bool res; @@ -1144,7 +1259,7 @@ protected: return res; } - bool processPartitionTaskImpl(TaskPartition & task_partition) + bool processPartitionTaskImpl(ShardPartition & task_partition) { TaskShard & task_shard = task_partition.task_shard; TaskTable & task_table = task_shard.task_table; @@ -1172,7 +1287,7 @@ protected: { String query; query += "SELECT " + fields + " FROM " + getDatabaseDotTable(from_table); - query += " WHERE (_part LIKE '" + task_partition.name + "%')"; + query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = " + task_partition.name + ")"; if (!task_table.where_condition_str.empty()) query += " AND (" + task_table.where_condition_str + ")"; if (!limit.empty()) @@ -1245,45 +1360,13 @@ protected: zookeeper->createAncestors(current_task_status_path); - /// We need to update table definitions for each part, it could be changed after ALTER - ASTPtr query_create_pull_table; - { - /// Fetch and parse (possibly) new definition - auto connection_entry = task_shard.info.pool->get(&task_cluster->settings_pull); - String create_query_pull_str = getRemoteCreateTable(task_table.table_pull, *connection_entry, &task_cluster->settings_pull); - - ParserCreateQuery parser_create_query; - query_create_pull_table = parseQuery(parser_create_query, create_query_pull_str); - } - - /// Create local Distributed tables: - /// a table fetching data from current shard and a table inserting data to the whole destination cluster - DatabaseAndTableName table_shard(working_database_name, ".read_shard." + task_table.table_id); - DatabaseAndTableName table_split(working_database_name, ".split." + task_table.table_id); - { - /// Create special cluster with single shard - String shard_read_cluster_name = ".read_shard." + task_table.cluster_pull_name; - ClusterPtr cluster_pull_current_shard = task_table.cluster_pull->getClusterWithSingleShard(task_shard.indexInCluster()); - context.setCluster(shard_read_cluster_name, cluster_pull_current_shard); - - auto storage_shard_ast = createASTStorageDistributed(shard_read_cluster_name, task_table.table_pull.first, task_table.table_pull.second); - const auto & storage_split_ast = task_table.engine_split_ast; - - auto create_query_ast = removeAliasColumnsFromCreateQuery(query_create_pull_table); - auto create_table_pull_ast = rewriteCreateQueryStorage(create_query_ast, table_shard, storage_shard_ast); - auto create_table_split_ast = rewriteCreateQueryStorage(create_query_ast, table_split, storage_split_ast); - - //LOG_DEBUG(log, "Create shard reading table. Query: " << queryToString(create_table_pull_ast)); - dropAndCreateLocalTable(create_table_pull_ast); - - //LOG_DEBUG(log, "Create split table. Query: " << queryToString(create_table_split_ast)); - dropAndCreateLocalTable(create_table_split_ast); - } + /// We need to update table definitions for each partition, it could be changed after ALTER + createShardInternalTables(task_shard); /// Check that destination partition is empty if we are first worker /// NOTE: this check is incorrect if pull and push tables have different partition key! { - ASTPtr query_select_ast = get_select_query(table_split, "count()"); + ASTPtr query_select_ast = get_select_query(task_shard.table_split_shard, "count()"); UInt64 count; { Context local_context = context; @@ -1292,9 +1375,8 @@ protected: local_context.getSettingsRef().skip_unavailable_shards = true; InterpreterSelectQuery interperter(query_select_ast, local_context); - BlockIO io = interperter.execute(); - Block block = getBlockWithAllStreamData(io.in); + Block block = getBlockWithAllStreamData(interperter.execute().in); count = (block) ? block.safeGetByPosition(0).column->getUInt(0) : 0; } @@ -1337,15 +1419,15 @@ protected: /// Try create table (if not exists) on each shard { - auto create_query_push_ast = rewriteCreateQueryStorage(query_create_pull_table, task_table.table_push, task_table.engine_push_ast); + auto create_query_push_ast = rewriteCreateQueryStorage(task_shard.current_pull_table_create_query, task_table.table_push, task_table.engine_push_ast); typeid_cast(*create_query_push_ast).if_not_exists = true; String query = queryToString(create_query_push_ast); - LOG_DEBUG(log, "Create remote push tables. Query: " << query); + LOG_DEBUG(log, "Create destination tables. Query: " << query); size_t shards = executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push, PoolMode::GET_MANY); - LOG_DEBUG(log, "Remote push tables have been created on " << shards << " shards of " - << task_table.cluster_push->getShardCount()); + LOG_DEBUG(log, "Destination tables " << getDatabaseDotTable(task_table.table_push) << " have been created on " << shards + << " shards of " << task_table.cluster_push->getShardCount()); } /// Do the copying @@ -1359,14 +1441,14 @@ protected: } // Select all fields - ASTPtr query_select_ast = get_select_query(table_shard, "*", inject_fault ? "1" : ""); + ASTPtr query_select_ast = get_select_query(task_shard.table_read_shard, "*", inject_fault ? "1" : ""); LOG_DEBUG(log, "Executing SELECT query: " << queryToString(query_select_ast)); ASTPtr query_insert_ast; { String query; - query += "INSERT INTO " + getDatabaseDotTable(table_split) + " VALUES "; + query += "INSERT INTO " + getDatabaseDotTable(task_shard.table_split_shard) + " VALUES "; ParserQuery p_query(query.data() + query.size()); query_insert_ast = parseQuery(p_query, query); @@ -1521,35 +1603,90 @@ protected: return typeid_cast(*block.safeGetByPosition(0).column).getDataAt(0).toString(); } - Strings getRemotePartitions(const DatabaseAndTableName & table, Connection & connection, const Settings * settings = nullptr) + ASTPtr getCreateTableForPullShard(TaskShard & task_shard) { - Block block; + /// Fetch and parse (possibly) new definition + auto connection_entry = task_shard.info.pool->get(&task_cluster->settings_pull); + String create_query_pull_str = getRemoteCreateTable(task_shard.task_table.table_pull, *connection_entry, + &task_cluster->settings_pull); + + ParserCreateQuery parser_create_query; + return parseQuery(parser_create_query, create_query_pull_str); + } + + void createShardInternalTables(TaskShard & task_shard) + { + TaskTable & task_table = task_shard.task_table; + + /// We need to update table definitions for each part, it could be changed after ALTER + task_shard.current_pull_table_create_query = getCreateTableForPullShard(task_shard); + + /// Create local Distributed tables: + /// a table fetching data from current shard and a table inserting data to the whole destination cluster + String read_shard_prefix = ".read_shard_" + toString(task_shard.indexInCluster()) + "."; + String split_shard_prefix = ".split."; + task_shard.table_read_shard = DatabaseAndTableName(working_database_name, read_shard_prefix + task_table.table_id); + task_shard.table_split_shard = DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id); + + /// Create special cluster with single shard + String shard_read_cluster_name = read_shard_prefix + task_table.cluster_pull_name; + ClusterPtr cluster_pull_current_shard = task_table.cluster_pull->getClusterWithSingleShard(task_shard.indexInCluster()); + context.setCluster(shard_read_cluster_name, cluster_pull_current_shard); + + auto storage_shard_ast = createASTStorageDistributed(shard_read_cluster_name, task_table.table_pull.first, task_table.table_pull.second); + const auto & storage_split_ast = task_table.engine_split_ast; + + auto create_query_ast = removeAliasColumnsFromCreateQuery(task_shard.current_pull_table_create_query); + auto create_table_pull_ast = rewriteCreateQueryStorage(create_query_ast, task_shard.table_read_shard, storage_shard_ast); + auto create_table_split_ast = rewriteCreateQueryStorage(create_query_ast, task_shard.table_split_shard, storage_split_ast); + + //LOG_DEBUG(log, "Create shard reading table. Query: " << queryToString(create_table_pull_ast)); + dropAndCreateLocalTable(create_table_pull_ast); + + //LOG_DEBUG(log, "Create split table. Query: " << queryToString(create_table_split_ast)); + dropAndCreateLocalTable(create_table_split_ast); + } + + + std::set getShardPartitions(TaskShard & task_shard) + { + createShardInternalTables(task_shard); + + TaskTable & task_table = task_shard.task_table; + + String query; { WriteBufferFromOwnString wb; - wb << "SELECT DISTINCT partition FROM system.parts WHERE" - << " database = " << DB::quote << table.first - << " AND table = " << DB::quote << table.second; - - block = getBlockWithAllStreamData(std::make_shared( - connection, wb.str(), Block{{ ColumnString::create(), std::make_shared(), "partition" }}, context, settings)); + wb << "SELECT DISTINCT " << queryToString(task_table.engine_push_partition_key_ast) << " AS partition FROM" + << " " << getDatabaseDotTable(task_shard.table_read_shard) << " ORDER BY partition DESC"; + query = wb.str(); } - Strings res; + LOG_DEBUG(log, "Computing destination partition set, executing query: " << query); + + ParserQuery parser_query(query.data() + query.size()); + ASTPtr query_ast = parseQuery(parser_query, query); + + Context local_context = context; + InterpreterSelectQuery interp(query_ast, local_context); + Block block = getBlockWithAllStreamData(interp.execute().in); + + std::set res; if (block) { - auto & partition_col = typeid_cast(*block.getByName("partition").column); - for (size_t i = 0; i < partition_col.size(); ++i) - res.push_back(partition_col.getDataAt(i).toString()); - } - else - { - if (!existsRemoteTable(table, connection)) + ColumnWithTypeAndName & column = block.getByPosition(0); + task_shard.partition_key_column = column; + + for (size_t i = 0; i < column.column->size(); ++i) { - throw Exception("Table " + getDatabaseDotTable(table) + " is not exists on server " - + connection.getDescription(), ErrorCodes::UNKNOWN_TABLE); + WriteBufferFromOwnString wb; + column.type->serializeTextQuoted(*column.column, i, wb); + res.emplace(wb.str()); } } + LOG_DEBUG(log, "There are " << res.size() << " destination partitions in shard " << task_shard.getDescription()); + return res; } @@ -1610,7 +1747,7 @@ protected: Settings current_settings = settings ? *settings : task_cluster->settings_common; current_settings.max_parallel_replicas = num_remote_replicas ? num_remote_replicas : 1; - std::vector connections = shard.pool->getMany(¤t_settings, pool_mode); + auto connections = shard.pool->getMany(¤t_settings, pool_mode); for (auto & connection : connections) { @@ -1619,7 +1756,8 @@ protected: try { - RemoteBlockInputStream stream(*connection, query, context, ¤t_settings); + /// CREATE TABLE and DROP PARTITION return empty block + RemoteBlockInputStream stream(*connection, query, Block(), context, ¤t_settings); NullBlockOutputStream output; copyData(stream, output); @@ -1780,7 +1918,7 @@ void ClusterCopierApp::setupLogging() } Poco::AutoPtr formatter(new Poco::PatternFormatter); - formatter->setProperty("pattern", "%L%Y-%m-%d %H:%M:%S.%i [ %I ] <%p> %s: %t"); + formatter->setProperty("pattern", "%L%Y-%m-%d %H:%M:%S.%i <%p> %s: %t"); Poco::AutoPtr formatting_channel(new Poco::FormattingChannel(formatter)); formatting_channel->setChannel(split_channel); split_channel->open(); @@ -1840,7 +1978,7 @@ int ClusterCopierApp::main(const std::vector &) } catch (...) { - std::cerr << DB::getCurrentExceptionMessage(true) << "\n"; + tryLogCurrentException(&Poco::Logger::root(), __PRETTY_FUNCTION__); auto code = getCurrentExceptionCode(); return (code) ? code : -1; diff --git a/dbms/src/Server/ClusterCopier.h b/dbms/src/Server/ClusterCopier.h index 0c53d348158..1a76a0c8c11 100644 --- a/dbms/src/Server/ClusterCopier.h +++ b/dbms/src/Server/ClusterCopier.h @@ -99,9 +99,8 @@ NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will be dropped and refilled, take it into account if you already have some data in destination tables. You could directly - specify partitions that should be copied in . - - NOTE: Currently partition key of source and destination tables should be the same. + specify partitions that should be copied in , they should be in quoted format like partition column of + system.parts table. --> ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/hits2/{shard}/hits2', '{replica}', EventDate, (CounterID, EventDate), 8192) diff --git a/dbms/tests/integration/test_cluster_copier/task0_description.xml b/dbms/tests/integration/test_cluster_copier/task0_description.xml index e3a0345ddd1..c54c07cddae 100644 --- a/dbms/tests/integration/test_cluster_copier/task0_description.xml +++ b/dbms/tests/integration/test_cluster_copier/task0_description.xml @@ -30,7 +30,7 @@ 0 1 2 - ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}', '{replica}') PARTITION BY d % 3 ORDER BY d SETTINGS index_granularity = 16 + ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/hits', '{replica}') PARTITION BY d % 3 ORDER BY d SETTINGS index_granularity = 16 d + 1 diff --git a/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml b/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml new file mode 100644 index 00000000000..82cd16a6b6c --- /dev/null +++ b/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml @@ -0,0 +1,89 @@ + + + + 4 + + + + 1 + + + + + + cluster0 + default + a + + cluster1 + default + b + + + + + ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/b', '{replica}') PARTITION BY toMonday(date) ORDER BY d + + + d + 1 + + + + + + + + + + + true + + s0_0_0 + 9000 + + + s0_0_1 + 9000 + + + + true + + s0_1_0 + 9000 + + + + + + + true + + s1_0_0 + 9000 + + + s1_0_1 + 9000 + + + + true + + s1_1_0 + 9000 + + + + 255.255.255.255 + 9000 + + + + + + \ No newline at end of file diff --git a/dbms/tests/integration/test_cluster_copier/test.py b/dbms/tests/integration/test_cluster_copier/test.py index f14e83d5895..8ef4e27b913 100644 --- a/dbms/tests/integration/test_cluster_copier/test.py +++ b/dbms/tests/integration/test_cluster_copier/test.py @@ -69,27 +69,86 @@ def started_cluster(): cluster.shutdown() -def _test_copying(cmd_options): - instance = cluster.instances['s0_0_0'] +class Task1: - ddl_check_query(instance, "DROP TABLE IF EXISTS hits ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE IF EXISTS hits ON CLUSTER cluster1") - ddl_check_query(instance, "DROP TABLE IF EXISTS hits_all ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE IF EXISTS hits_all ON CLUSTER cluster1") + def __init__(self, cluster): + self.cluster = cluster + self.zk_task_path="/clickhouse-copier/task_simple" + self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task0_description.xml'), 'r').read() - ddl_check_query(instance, "CREATE TABLE hits ON CLUSTER cluster0 (d UInt64, d1 UInt64 MATERIALIZED d+1) ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}', '{replica}') PARTITION BY d % 3 ORDER BY d SETTINGS index_granularity = 16") - ddl_check_query(instance, "CREATE TABLE hits_all ON CLUSTER cluster0 (d UInt64) ENGINE=Distributed(cluster0, default, hits, d)") - ddl_check_query(instance, "CREATE TABLE hits_all ON CLUSTER cluster1 (d UInt64) ENGINE=Distributed(cluster1, default, hits, d + 1)") - instance.query("INSERT INTO hits_all SELECT * FROM system.numbers LIMIT 1002") + + def start(self): + instance = cluster.instances['s0_0_0'] + + for cluster_num in ["0", "1"]: + ddl_check_query(instance, "DROP DATABASE IF EXISTS default ON CLUSTER cluster{}".format(cluster_num)) + ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS default ON CLUSTER cluster{}".format(cluster_num)) + + ddl_check_query(instance, "CREATE TABLE hits ON CLUSTER cluster0 (d UInt64, d1 UInt64 MATERIALIZED d+1) ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/hits', '{replica}') PARTITION BY d % 3 ORDER BY d SETTINGS index_granularity = 16") + ddl_check_query(instance, "CREATE TABLE hits_all ON CLUSTER cluster0 (d UInt64) ENGINE=Distributed(cluster0, default, hits, d)") + ddl_check_query(instance, "CREATE TABLE hits_all ON CLUSTER cluster1 (d UInt64) ENGINE=Distributed(cluster1, default, hits, d + 1)") + instance.query("INSERT INTO hits_all SELECT * FROM system.numbers LIMIT 1002", settings={"insert_distributed_sync": 1}) + + + def check(self): + assert TSV(self.cluster.instances['s0_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n") + assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n") + + assert TSV(self.cluster.instances['s1_0_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("1\n") + assert TSV(self.cluster.instances['s1_1_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("0\n") + + instance = self.cluster.instances['s0_0_0'] + ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster0") + ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster1") + ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster0") + ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster1") + + +class Task2: + + def __init__(self, cluster): + self.cluster = cluster + self.zk_task_path="/clickhouse-copier/task_month_to_week_partition" + self.copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task_month_to_week_description.xml'), 'r').read() + + + def start(self): + instance = cluster.instances['s0_0_0'] + + for cluster_num in ["0", "1"]: + ddl_check_query(instance, "DROP DATABASE IF EXISTS default ON CLUSTER cluster{}".format(cluster_num)) + ddl_check_query(instance, "CREATE DATABASE IF NOT EXISTS default ON CLUSTER cluster{}".format(cluster_num)) + + ddl_check_query(instance, "CREATE TABLE a ON CLUSTER cluster0 (date Date, d UInt64, d1 UInt64 ALIAS d+1) ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/a', '{replica}', date, intHash64(d), (date, intHash64(d)), 8192)") + ddl_check_query(instance, "CREATE TABLE a_all ON CLUSTER cluster0 (date Date, d UInt64) ENGINE=Distributed(cluster0, default, a, d)") + + instance.query("INSERT INTO a_all SELECT toDate(17581 + number) AS date, number AS d FROM system.numbers LIMIT 85", settings={"insert_distributed_sync": 1}) + + + def check(self): + assert TSV(self.cluster.instances['s0_0_0'].query("SELECT count() FROM cluster(cluster0, default, a)")) == TSV("85\n") + assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count(), uniqExact(date) FROM cluster(cluster1, default, b)")) == TSV("85\t85\n") + + assert TSV(self.cluster.instances['s1_0_0'].query("SELECT DISTINCT d % 2 FROM b")) == TSV("1\n") + assert TSV(self.cluster.instances['s1_1_0'].query("SELECT DISTINCT d % 2 FROM b")) == TSV("0\n") + + assert TSV(self.cluster.instances['s1_0_0'].query("SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'")) == TSV("1\n") + assert TSV(self.cluster.instances['s1_1_0'].query("SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'")) == TSV("1\n") + + instance = cluster.instances['s0_0_0'] + ddl_check_query(instance, "DROP TABLE a ON CLUSTER cluster0") + ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1") + + +def execute_task(task, cmd_options): + task.start() zk = cluster.get_kazoo_client('zoo1') print "Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1]) - zk_task_path = "/clickhouse-copier/task_simple" + zk_task_path = task.zk_task_path zk.ensure_path(zk_task_path) - - copier_task_config = open(os.path.join(CURRENT_TEST_DIR, 'task0_description.xml'), 'r').read() - zk.create(zk_task_path + "/description", copier_task_config) + zk.create(zk_task_path + "/description", task.copier_task_config) # Run cluster-copier processes on each node docker_api = docker.from_env().api @@ -97,7 +156,7 @@ def _test_copying(cmd_options): cmd = ['/usr/bin/clickhouse', 'copier', '--config', '/etc/clickhouse-server/config-preprocessed.xml', - '--task-path', '/clickhouse-copier/task_simple', + '--task-path', zk_task_path, '--base-dir', '/var/log/clickhouse-server/copier'] cmd += cmd_options @@ -119,27 +178,26 @@ def _test_copying(cmd_options): assert res['ExitCode'] == 0, "Instance: {} ({}). Info: {}".format(instance.name, instance.ip_address, repr(res)) - assert TSV(cluster.instances['s0_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n") - assert TSV(cluster.instances['s1_0_0'].query("SELECT count() FROM hits_all")) == TSV("1002\n") - - assert TSV(cluster.instances['s1_0_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("1\n") - assert TSV(cluster.instances['s1_1_0'].query("SELECT DISTINCT d % 2 FROM hits")) == TSV("0\n") - - zk.delete(zk_task_path, recursive=True) - ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster1") - ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster1") + try: + task.check() + finally: + zk.delete(zk_task_path, recursive=True) -def test_copy_simple(started_cluster): - _test_copying([]) +def test_copy1_simple(started_cluster): + execute_task(Task1(started_cluster), []) -def test_copy_with_recovering(started_cluster): - _test_copying(['--copy-fault-probability', str(COPYING_FAIL_PROBABILITY)]) +def test_copy1_with_recovering(started_cluster): + execute_task(Task1(started_cluster), ['--copy-fault-probability', str(COPYING_FAIL_PROBABILITY)]) +def test_copy_month_to_week_partition(started_cluster): + execute_task(Task2(started_cluster), []) + +def test_copy_month_to_week_partition(started_cluster): + execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.1)]) + if __name__ == '__main__': with contextmanager(started_cluster)() as cluster: for name, instance in cluster.instances.items(): From 610f70fbeb8775944ae2caad73f04b88974a1b39 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 21 Feb 2018 20:05:21 +0300 Subject: [PATCH 051/209] forbid non-deterministic functions in primary and partition keys [#CLICKHOUSE-3455] --- dbms/src/Functions/FunctionsCoding.h | 2 + dbms/src/Functions/FunctionsDateTime.h | 6 ++ .../Functions/FunctionsEmbeddedDictionaries.h | 8 +++ .../Functions/FunctionsExternalDictionaries.h | 14 +++++ dbms/src/Functions/FunctionsExternalModels.h | 2 + dbms/src/Functions/FunctionsMiscellaneous.cpp | 22 +++++++ dbms/src/Functions/IFunction.h | 5 ++ dbms/src/Storages/MergeTree/MergeTreeData.cpp | 61 ++++++++++--------- dbms/src/Storages/MergeTree/MergeTreeData.h | 3 +- .../MergeTree/MergeTreeDataWriter.cpp | 6 +- .../Storages/MergeTree/MergeTreePartition.cpp | 16 ++--- 11 files changed, 104 insertions(+), 41 deletions(-) diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h index b91d60acb97..be9ffb68d66 100644 --- a/dbms/src/Functions/FunctionsCoding.h +++ b/dbms/src/Functions/FunctionsCoding.h @@ -1111,6 +1111,8 @@ public: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override { auto col_res = ColumnVector::create(); diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index c9f3335ebfa..6644c5aa94f 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -1208,6 +1208,8 @@ public: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override { block.getByPosition(result).column = DataTypeUInt32().createColumnConst( @@ -1235,6 +1237,8 @@ public: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override { block.getByPosition(result).column = DataTypeUInt16().createColumnConst( @@ -1262,6 +1266,8 @@ public: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override { block.getByPosition(result).column = DataTypeUInt16().createColumnConst( diff --git a/dbms/src/Functions/FunctionsEmbeddedDictionaries.h b/dbms/src/Functions/FunctionsEmbeddedDictionaries.h index cb7ded5d174..7d39ad8d543 100644 --- a/dbms/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/dbms/src/Functions/FunctionsEmbeddedDictionaries.h @@ -218,6 +218,8 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { /// The dictionary key that defines the "point of view". @@ -312,6 +314,8 @@ public: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { /// The dictionary key that defines the "point of view". @@ -446,6 +450,8 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { /// The dictionary key that defines the "point of view". @@ -720,6 +726,8 @@ public: bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { RegionsNames::Language language = RegionsNames::Language::RU; diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index af5e2b751f9..cb77d84b751 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -94,6 +94,8 @@ private: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); @@ -274,6 +276,8 @@ private: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); @@ -535,6 +539,8 @@ private: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); @@ -821,6 +827,8 @@ private: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); @@ -1134,6 +1142,8 @@ private: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); @@ -1379,6 +1389,8 @@ private: return std::make_shared(std::make_shared()); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); @@ -1549,6 +1561,8 @@ private: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override { const auto dict_name_col = checkAndGetColumnConst(block.getByPosition(arguments[0]).column.get()); diff --git a/dbms/src/Functions/FunctionsExternalModels.h b/dbms/src/Functions/FunctionsExternalModels.h index 74149920e6f..74822db9962 100644 --- a/dbms/src/Functions/FunctionsExternalModels.h +++ b/dbms/src/Functions/FunctionsExternalModels.h @@ -23,6 +23,8 @@ public: bool isVariadic() const override { return true; } + bool isDeterministic() override { return false; } + size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; diff --git a/dbms/src/Functions/FunctionsMiscellaneous.cpp b/dbms/src/Functions/FunctionsMiscellaneous.cpp index 58ca5ce1d2d..33abc2dfd41 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/FunctionsMiscellaneous.cpp @@ -104,6 +104,8 @@ public: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, const size_t result) override { block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), db_name); @@ -126,6 +128,8 @@ public: return name; } + bool isDeterministic() override { return false; } + bool isDeterministicInScopeOfQuery() override { return false; @@ -391,6 +395,8 @@ public: return name; } + bool isDeterministic() override { return false; } + bool isDeterministicInScopeOfQuery() override { return false; @@ -434,6 +440,8 @@ public: return 0; } + bool isDeterministic() override { return false; } + bool isDeterministicInScopeOfQuery() override { return false; @@ -482,6 +490,8 @@ public: return 0; } + bool isDeterministic() override { return false; } + bool isDeterministicInScopeOfQuery() override { return false; @@ -524,6 +534,8 @@ public: return 0; } + bool isDeterministic() override { return false; } + bool isDeterministicInScopeOfQuery() override { return false; @@ -889,6 +901,8 @@ public: } /** It could return many different values for single argument. */ + bool isDeterministic() override { return false; } + bool isDeterministicInScopeOfQuery() override { return false; @@ -1288,6 +1302,8 @@ public: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override { block.getByPosition(result).column = DataTypeUInt32().createColumnConst(block.rows(), static_cast(uptime)); @@ -1323,6 +1339,8 @@ public: return std::make_shared(); } + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & /*arguments*/, size_t result) override { block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), DateLUT::instance().getTimeZone()); @@ -1355,6 +1373,8 @@ public: return 1; } + bool isDeterministic() override { return false; } + bool isDeterministicInScopeOfQuery() override { return false; @@ -1632,6 +1652,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; + bool isDeterministic() override { return false; } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override; private: diff --git a/dbms/src/Functions/IFunction.h b/dbms/src/Functions/IFunction.h index bb9aeffc71d..b7791268c79 100644 --- a/dbms/src/Functions/IFunction.h +++ b/dbms/src/Functions/IFunction.h @@ -126,6 +126,9 @@ public: * (even for distributed query), but not deterministic it general. * Example: now(). Another example: functions that work with periodically updated dictionaries. */ + + virtual bool isDeterministic() { return true; } + virtual bool isDeterministicInScopeOfQuery() { return true; } /** Lets you know if the function is monotonic in a range of values. @@ -320,6 +323,8 @@ public: bool isInjective(const Block & sample_block) override { return function->isInjective(sample_block); } + bool isDeterministic() override { return function->isDeterministic(); } + bool isDeterministicInScopeOfQuery() override { return function->isDeterministicInScopeOfQuery(); } bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index fc921f894b2..9fde52b1151 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -178,14 +178,32 @@ MergeTreeData::MergeTreeData( } -static void checkForAllowedKeyColumns(const ColumnWithTypeAndName & element, const std::string & key_name) +static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name) { - const ColumnPtr & column = element.column; - if (column && (column->isColumnConst() || column->isDummy())) - throw Exception{key_name + " key cannot contain constants", ErrorCodes::ILLEGAL_COLUMN}; + for (const ExpressionAction & action : expr.getActions()) + { + if (action.type == ExpressionAction::ARRAY_JOIN) + throw Exception(key_name + " key cannot contain array joins"); - if (element.type->isNullable()) - throw Exception{key_name + " key cannot contain nullable columns", ErrorCodes::ILLEGAL_COLUMN}; + if (action.type == ExpressionAction::APPLY_FUNCTION) + { + IFunctionBase & func = *action.function; + if (!func.isDeterministic()) + throw Exception(key_name + " key cannot contain non-deterministic functions, " + "but contains function " + func.getName(), + ErrorCodes::BAD_ARGUMENTS); + } + } + + for (const ColumnWithTypeAndName & element : sample_block) + { + const ColumnPtr & column = element.column; + if (column && (column->isColumnConst() || column->isDummy())) + throw Exception{key_name + " key cannot contain constants", ErrorCodes::ILLEGAL_COLUMN}; + + if (element.type->isNullable()) + throw Exception{key_name + " key cannot contain nullable columns", ErrorCodes::ILLEGAL_COLUMN}; + } } @@ -213,14 +231,9 @@ void MergeTreeData::initPrimaryKey() primary_key_sample = projected_expr->getSampleBlock(); } + checkKeyExpression(*primary_expr, primary_key_sample, "Primary"); + size_t primary_key_size = primary_key_sample.columns(); - - /// A primary key cannot contain constants. It is meaningless. - /// (And also couldn't work because primary key is serialized with method of IDataType that doesn't support constants). - /// Also a primary key must not contain any nullable column. - for (size_t i = 0; i < primary_key_size; ++i) - checkForAllowedKeyColumns(primary_key_sample.getByPosition(i), "Primary"); - primary_key_data_types.resize(primary_key_size); for (size_t i = 0; i < primary_key_size; ++i) primary_key_data_types[i] = primary_key_sample.getByPosition(i).type; @@ -235,8 +248,7 @@ void MergeTreeData::initPrimaryKey() ExpressionAnalyzer(secondary_sort_expr_ast, context, nullptr, getColumnsList()).getActions(true); auto secondary_key_sample = projected_expr->getSampleBlock(); - for (size_t i = 0; i < secondary_key_sample.columns(); ++i) - checkForAllowedKeyColumns(secondary_key_sample.getByPosition(i), "Secondary"); + checkKeyExpression(*secondary_sort_expr, secondary_key_sample, "Secondary"); } } @@ -250,14 +262,11 @@ void MergeTreeData::initPartitionKey() for (const ASTPtr & ast : partition_expr_ast->children) { String col_name = ast->getColumnName(); - partition_expr_columns.emplace_back(col_name); - - const ColumnWithTypeAndName & element = partition_expr->getSampleBlock().getByName(col_name); - checkForAllowedKeyColumns(element, "Partition"); - - partition_expr_column_types.emplace_back(element.type); + partition_key_sample.insert(partition_expr->getSampleBlock().getByName(col_name)); } + checkKeyExpression(*partition_expr, partition_key_sample, "Partition"); + /// Add all columns used in the partition key to the min-max index. const NamesAndTypesList & minmax_idx_columns_with_types = partition_expr->getRequiredColumnsWithTypes(); minmax_idx_expr = std::make_shared(minmax_idx_columns_with_types, context.getSettingsRef()); @@ -2025,7 +2034,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context /// Re-parse partition key fields using the information about expected field types. - size_t fields_count = partition_expr_column_types.size(); + size_t fields_count = partition_key_sample.columns(); if (partition_ast.fields_count != fields_count) throw Exception( "Wrong number of fields in the partition expression: " + toString(partition_ast.fields_count) + @@ -2041,12 +2050,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context ReadBufferFromMemory right_paren_buf(")", 1); ConcatReadBuffer buf({&left_paren_buf, &fields_buf, &right_paren_buf}); - Block header; - for (size_t i = 0; i < fields_count; ++i) - header.insert(ColumnWithTypeAndName(partition_expr_column_types[i], partition_expr_columns[i])); - - ValuesRowInputStream input_stream(buf, header, context, /* interpret_expressions = */true); - MutableColumns columns = header.cloneEmptyColumns(); + ValuesRowInputStream input_stream(buf, partition_key_sample, context, /* interpret_expressions = */true); + MutableColumns columns = partition_key_sample.cloneEmptyColumns(); if (!input_stream.read(columns)) throw Exception( diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index dbcd948aaae..ff2c3987357 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -524,8 +524,7 @@ public: ASTPtr partition_expr_ast; ExpressionActionsPtr partition_expr; - Names partition_expr_columns; - DataTypes partition_expr_column_types; + Block partition_key_sample; ExpressionActionsPtr minmax_idx_expr; Names minmax_idx_columns; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 9449255ffe3..fabbca3a473 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -81,9 +81,9 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block data.partition_expr->execute(block_copy); ColumnRawPtrs partition_columns; - partition_columns.reserve(data.partition_expr_columns.size()); - for (const String & name : data.partition_expr_columns) - partition_columns.emplace_back(block_copy.getByName(name).column.get()); + partition_columns.reserve(data.partition_key_sample.columns()); + for (const ColumnWithTypeAndName & element : data.partition_key_sample) + partition_columns.emplace_back(block_copy.getByName(element.name).column.get()); PODArray partition_num_to_first_row; IColumn::Selector selector; diff --git a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp index 56de34f9d84..b95916b2164 100644 --- a/dbms/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreePartition.cpp @@ -23,7 +23,7 @@ static ReadBufferFromFile openForReading(const String & path) /// So if you want to change this method, be sure to guarantee compatibility with existing table data. String MergeTreePartition::getID(const MergeTreeData & storage) const { - if (value.size() != storage.partition_expr_columns.size()) + if (value.size() != storage.partition_key_sample.columns()) throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); if (value.empty()) @@ -51,7 +51,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const if (i > 0) result += '-'; - if (typeid_cast(storage.partition_expr_column_types[i].get())) + if (typeid_cast(storage.partition_key_sample.getByPosition(i).type.get())) result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum_t(value[i].safeGet()))); else result += applyVisitor(to_string_visitor, value[i]); @@ -79,7 +79,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const void MergeTreePartition::serializeTextQuoted(const MergeTreeData & storage, WriteBuffer & out) const { - size_t key_size = storage.partition_expr_column_types.size(); + size_t key_size = storage.partition_key_sample.columns(); if (key_size == 0) { @@ -95,7 +95,7 @@ void MergeTreePartition::serializeTextQuoted(const MergeTreeData & storage, Writ if (i > 0) writeCString(", ", out); - const DataTypePtr & type = storage.partition_expr_column_types[i]; + const DataTypePtr & type = storage.partition_key_sample.getByPosition(i).type; auto column = type->createColumn(); column->insert(value[i]); type->serializeTextQuoted(*column, 0, out); @@ -111,9 +111,9 @@ void MergeTreePartition::load(const MergeTreeData & storage, const String & part return; ReadBufferFromFile file = openForReading(part_path + "partition.dat"); - value.resize(storage.partition_expr_column_types.size()); - for (size_t i = 0; i < storage.partition_expr_column_types.size(); ++i) - storage.partition_expr_column_types[i]->deserializeBinary(value[i], file); + value.resize(storage.partition_key_sample.columns()); + for (size_t i = 0; i < storage.partition_key_sample.columns(); ++i) + storage.partition_key_sample.getByPosition(i).type->deserializeBinary(value[i], file); } void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const @@ -124,7 +124,7 @@ void MergeTreePartition::store(const MergeTreeData & storage, const String & par WriteBufferFromFile out(part_path + "partition.dat"); HashingWriteBuffer out_hashing(out); for (size_t i = 0; i < value.size(); ++i) - storage.partition_expr_column_types[i]->serializeBinary(value[i], out_hashing); + storage.partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing); out_hashing.next(); checksums.files["partition.dat"].file_size = out_hashing.count(); checksums.files["partition.dat"].file_hash = out_hashing.getHash(); From 361a27485dc195fb09d80ab0be45d769f42a68dc Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 21 Feb 2018 21:44:33 +0300 Subject: [PATCH 052/209] Some progress on documentation (#1942) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update presentations * CLICKHOUSE-2936: redirect from clickhouse.yandex.ru and clickhouse.yandex.com * update submodule * lost files * CLICKHOUSE-2981: prefer sphinx docs over original reference * CLICKHOUSE-2981: docs styles more similar to main website + add flags to switch language links * update presentations * Less confusing directory structure (docs -> doc/reference/) * Minify sphinx docs too * Website release script: fail fast + pass docker hash on deploy * Do not underline links in docs * shorter * cleanup docker images * tune nginx config * CLICKHOUSE-3043: get rid of habrastorage links * Lost translation * CLICKHOUSE-2936: temporary client-side redirect * behaves weird in test * put redirect back * CLICKHOUSE-3047: copy docs txts to public too * move to proper file * remove old pages to avoid confusion * Remove reference redirect warning for now * Refresh README.md * Yellow buttons in docs * Use svg flags instead of unicode ones in docs * fix test website instance * Put flags to separate files * wrong flag * Copy Yandex.Metrica introduction from main page to docs * Yet another home page structure change, couple new blocks (CLICKHOUSE-3045) * Update Contacts section * CLICKHOUSE-2849: more detailed legal information * CLICKHOUSE-2978 preparation - split by files * More changes in Contacts block * Tune texts on index page * update presentations * One more benchmark * Add usage sections to index page, adapted from slides * Get the roadmap started, based on slides from last ClickHouse Meetup * CLICKHOUSE-2977: some rendering tuning * Get rid of excessive section in the end of getting started * Make headers linkable * CLICKHOUSE-2981: links to editing reference - https://github.com/yandex/ClickHouse/issues/849 * CLICKHOUSE-2981: fix mobile styles in docs * Ban crawling of duplicating docs * Open some external links in new tab * Ban old docs too * Lots of trivial fixes in english docs * Lots of trivial fixes in russian docs * Remove getting started copies in markdown * Add Yandex.Webmaster * Fix some sphinx warnings * More warnings fixed in english docs * More sphinx warnings fixed * Add code-block:: text * More code-block:: text * These headers look not that well * Better switch between documentation languages * merge use_case.rst into ya_metrika_task.rst * Edit the agg_functions.rst texts * Add lost empty lines * Lost blank lines * Add new logo sizes * update presentations * Next step in migrating to new documentation * Fix all warnings in en reference * Fix all warnings in ru reference * Re-arrange existing reference * Move operation tips to main reference * Fix typos noticed by milovidov@ * Get rid of zookeeper.md * Looks like duplicate of tutorial.html * Fix some mess with html tags in tutorial * No idea why nobody noticed this before, but it was completely not clear whet to get the data * Match code block styling between main and tutorial pages (in favor of the latter) * Get rid of some copypaste in tutorial * Normalize header styles * Move example_datasets to sphinx * Move presentations submodule to website * Move and update README.md * No point in duplicating articles from habrahabr here * Move development-related docs as is for now * doc/reference/ -> docs/ (to match the URL on website) * Adapt links to match the previous commit * Adapt development docs to rst (still lacks translation and strikethrough support) * clean on release * blacklist presentations in gulp * strikethrough support in sphinx * just copy development folder for now * fix weird introduction in style article * Style guide translation (WIP) * Finish style guide translation to English * gulp clean separately * Update year in LICENSE * Initial CONTRIBUTING.md * Fix remaining links to old docs in tutorial * Some tutorial fixes * Typo * Another typo * Update list of authors from yandex-team accoding to git log * Fix diff with master * couple fixes in en what_is_clickhouse.rst * Try different link to blog in Russian * Swap words * Slightly larger line height * CLICKHOUSE-3089: disable hyphenation in docs * update presentations * Fix copying of txt files * update submodule * CLICKHOUSE-3108: fix overflow issues in mobile version * Less weird tutorial header in mobile version * CLICKHOUSE-3073: skip sourcemaps by default * CLICKHOUSE-3067: rename item in docs navigation * fix list markup * CLICKHOUSE-3067: some documentation style tuning * CLICKHOUSE-3067: less laggy single page documentation * update presentations * YQL-3278: add some links to ClickHouse Meetup in Berlin on October 5, 2017 * Add "time series" keyword * Switch link to next event * Switch link to next event #2 * smaller font * Remove Palo Alto link * Add link to Success stories list * better title * Update index.html * Update index.html * Do not expect gulp in $PATH * Add link to Beijing meetup * ignore presentations * introduce requirements.txt * Apply hacks by bayonet@ using monkey patching * Simplify and fix patching of "single" docs on Mac OS (it still has a bug on chunk borders though) * remove hidden symbol * s/2016–2017/2016–2018/g * Add some place to put virtualenv * mkdocs was missing from requirements.txt * This way it hurts eyes less * Change header layout + add flags * yandex_fonts.css -> custom.css * Larger docs logo * Shorter link * Link to home from logo * Borrow some more styles from main page * Tune some links * Remove shadow * Add header border * Header font * Better flag margin * Improve single page mode * Fix search results hover * Fix some MarkDown errors * Silence useless error * Get rid of index.html's * Enable syntax highlight * Fix link label in ru * More style fixes in documentation scripts --- .gitignore | 1 + docs/build.sh | 2 +- docs/concatenate.py | 45 +++++++------- docs/create_contents.py | 24 ++++---- docs/en/operations/access_rights.md | 2 +- .../en/operations/server_settings/settings.md | 2 +- .../stylesheets/application.ac64251e.css | 4 +- .../{yandex_fonts.css => custom.css} | 55 +++++++++++++++++- docs/mkdocs-material-theme/base.html | 8 ++- .../partials/footer.html | 5 -- .../partials/header.html | 38 +++++++----- docs/mkdocs-material-theme/partials/nav.html | 27 ++++++--- docs/mkdocs_en.yml | 11 ++-- docs/mkdocs_en_single_page.yml | 11 ++-- docs/mkdocs_ru.yml | 9 ++- docs/mkdocs_ru_single_page.yml | 7 ++- docs/requirements.txt | 8 +++ docs/validate_headers_structures_in_md.py | 58 ++++++++++--------- website/index.html | 3 +- website/tutorial.html | 2 +- 20 files changed, 215 insertions(+), 107 deletions(-) rename docs/mkdocs-material-theme/assets/stylesheets/{yandex_fonts.css => custom.css} (77%) diff --git a/.gitignore b/.gitignore index 5738aec9dca..ac01f15002f 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ build /docs/en_single_page/ /docs/ru_single_page/ +/docs/venv/ # callgrind files callgrind.out.* diff --git a/docs/build.sh b/docs/build.sh index 5fbff59a744..17ebfbc32e3 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -8,7 +8,7 @@ fi for lang in $LANGS; do echo -e "\n\nLANG=$lang. Creating single page source" - mkdir $lang'_single_page' + mkdir $lang'_single_page' 2>/dev/null cp -r $lang/images $lang'_single_page' ./concatenate.py $lang echo -e "\n\nLANG=$lang. Building multipage..." diff --git a/docs/concatenate.py b/docs/concatenate.py index 0a964e8a54a..a2843fd79a3 100755 --- a/docs/concatenate.py +++ b/docs/concatenate.py @@ -14,7 +14,6 @@ # - For not http-links without anchor script logs an error and cuts them from the resulting single-page document. - import codecs import sys import re @@ -29,19 +28,19 @@ if not os.path.exists(sys.argv[1]): print "Pass language_dir correctly. For example, 'ru'." sys.exit(2) -#Configuration -PROJ_CONFIG = 'mkdocs_'+sys.argv[1]+'.yml' -SINGLE_PAGE = sys.argv[1]+'_single_page/index.md' -DOCS_DIR = sys.argv[1]+'/' +# Configuration +PROJ_CONFIG = 'mkdocs_' + sys.argv[1] + '.yml' +SINGLE_PAGE = sys.argv[1] + '_single_page/index.md' +DOCS_DIR = sys.argv[1] + '/' # 1. Open mkdocs.yml file and read `pages` configuration to get an ordered list of files cfg_file = open(PROJ_CONFIG) -files_to_concatenate=[] +files_to_concatenate = [] -for l in cfg_file : - if( '.md' in l ) and ('single_page' not in l): - path = (l[l.index(':')+1:]).strip(" '\n") +for l in cfg_file: + if('.md' in l) and ('single_page' not in l): + path = (l[l.index(':') + 1:]).strip(" '\n") files_to_concatenate.append(path) print str(len(files_to_concatenate)) + " files will be concatenated into single md-file.\nFiles:" @@ -57,30 +56,30 @@ for path in files_to_concatenate: single_page_file.write('\n\n') - file = open(DOCS_DIR+path) + file = open(DOCS_DIR + path) - #function is passed into re.sub() to process links - def link_proc( matchObj ): + # function is passed into re.sub() to process links + def link_proc(matchObj): text, link = matchObj.group().strip('[)').split('](') if link.startswith('http'): - return '['+text+']('+link+')' - else : + return '[' + text + '](' + link + ')' + else: sharp_pos = link.find('#') if sharp_pos > -1: - return '['+text+']('+link[sharp_pos:]+')' - else : - print 'ERROR: Link ['+text+']('+link+') in file '+path+' has no anchor. Please provide it.' - #return '['+text+'](#'+link.replace('/','-')+')' + return '[' + text + '](' + link[sharp_pos:] + ')' + else: + print 'ERROR: Link [' + text + '](' + link + ') in file ' + path + ' has no anchor. Please provide it.' + # return '['+text+'](#'+link.replace('/','-')+')' for l in file: - #Processing links in a string + # Processing links in a string l = re.sub(r'\[.+?\]\(.+?\)', link_proc, l) - #Correcting headers levels + # Correcting headers levels if not first_file: - if( l.startswith('#') ): - l='#'+l - else : + if(l.startswith('#')): + l = '#' + l + else: first_file = False single_page_file.write(l) diff --git a/docs/create_contents.py b/docs/create_contents.py index 46b822d6c51..c2f8ed58534 100644 --- a/docs/create_contents.py +++ b/docs/create_contents.py @@ -4,12 +4,13 @@ SOURCES_TREE = 'ru' from os import walk + def get_header(filepath): f = open(filepath) header = '' for line in f: - if line.startswith('#') : -# print line + if line.startswith('#'): + # print line header = line[1:].strip(' \n') break @@ -17,19 +18,22 @@ def get_header(filepath): return header -pages_file = open("strings_for_pages.txt","w") -md_links_file = open("links_for_md.txt","w") +pages_file = open("strings_for_pages.txt", "w") +md_links_file = open("links_for_md.txt", "w") for (dirpath, dirnames, filenames) in walk(SOURCES_TREE): - for filename in filenames : + for filename in filenames: - if '.md' not in filename: continue + if '.md' not in filename: + continue - header = get_header(dirpath+'/'+filename) - path = dirpath.replace('docs/','')+'/'+filename + header = get_header(dirpath + '/' + filename) + path = dirpath.replace('docs/', '') + '/' + filename - if filename == 'index.md': pages_file.write("- '" + header + "': " + "'" + path + "'\n") - else: pages_file.write(" - '" + header + "': " + "'" + path + "'\n") + if filename == 'index.md': + pages_file.write("- '" + header + "': " + "'" + path + "'\n") + else: + pages_file.write(" - '" + header + "': " + "'" + path + "'\n") md_links_file.write("[" + header + "](" + path + ")\n") diff --git a/docs/en/operations/access_rights.md b/docs/en/operations/access_rights.md index e2e79d7f2aa..9879dab9a99 100644 --- a/docs/en/operations/access_rights.md +++ b/docs/en/operations/access_rights.md @@ -58,7 +58,7 @@ Users are recorded in the 'users' section. We'll look at a fragment of the `user You can see a declaration from two users: `default`and`web`. We added the `web` user separately. -The `default` user is chosen in cases when the username is not passed. The `default` user is also used for distributed query processing, if the configuration of the server or cluster doesn't specify `user` and `password` (see the section on the [Distributed](../table_engines/distributed.html) engine). +The `default` user is chosen in cases when the username is not passed. The `default` user is also used for distributed query processing, if the configuration of the server or cluster doesn't specify `user` and `password` (see the section on the [Distributed](../table_engines/distributed.md#distributed_distributed) engine). The user that is used for exchanging information between servers combined in a cluster must not have substantial restrictions or quotas – otherwise, distributed queries will fail. diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 08505556f11..fe24bab9d85 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -67,7 +67,7 @@ ClickHouse checks ` min_part_size` and ` min_part_size_ratio` and processes th The default database. -Use a [ SHOW DATABASES](../query_language/queries.md#query_language_queries_show_databases) query to get a list of databases. +Use a [ SHOW DATABASES](../../query_language/queries.md#query_language_queries_show_databases) query to get a list of databases. **Example** diff --git a/docs/mkdocs-material-theme/assets/stylesheets/application.ac64251e.css b/docs/mkdocs-material-theme/assets/stylesheets/application.ac64251e.css index da350a2b17d..1383191d726 100644 --- a/docs/mkdocs-material-theme/assets/stylesheets/application.ac64251e.css +++ b/docs/mkdocs-material-theme/assets/stylesheets/application.ac64251e.css @@ -1,2 +1,2 @@ -html{-webkit-box-sizing:border-box;box-sizing:border-box}*,:after,:before{-webkit-box-sizing:inherit;box-sizing:inherit}html{-webkit-text-size-adjust:none;-moz-text-size-adjust:none;-ms-text-size-adjust:none;text-size-adjust:none}body{margin:0}hr{overflow:visible;-webkit-box-sizing:content-box;box-sizing:content-box}a{-webkit-text-decoration-skip:objects}a,button,input,label{-webkit-tap-highlight-color:transparent}a{color:inherit;text-decoration:none}a:active,a:hover{outline-width:0}small,sub,sup{font-size:80%}sub,sup{position:relative;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}img{border-style:none}table{border-collapse:collapse;border-spacing:0}td,th{font-weight:400;vertical-align:top}button{padding:0;background:transparent;font-size:inherit}button,input{border:0;outline:0}.md-clipboard:before,.md-icon,.md-nav__button,.md-nav__link:after,.md-nav__title:before,.md-search-result__article--document:before,.md-source-file:before,.md-typeset .admonition>.admonition-title:before,.md-typeset .admonition>summary:before,.md-typeset .critic.comment:before,.md-typeset .footnote-backref,.md-typeset .task-list-control .task-list-indicator:before,.md-typeset details>.admonition-title:before,.md-typeset details>summary:before,.md-typeset summary:after{font-family:Material Icons;font-style:normal;font-variant:normal;font-weight:400;line-height:1;text-transform:none;white-space:nowrap;speak:none;word-wrap:normal;direction:ltr}.md-content__icon,.md-footer-nav__button,.md-header-nav__button,.md-nav__button,.md-nav__title:before,.md-search-result__article--document:before{display:inline-block;margin:.4rem;padding:.8rem;font-size:2.4rem;cursor:pointer}.md-icon--arrow-back:before{content:"\E5C4"}.md-icon--arrow-forward:before{content:"\E5C8"}.md-icon--menu:before{content:"\E5D2"}.md-icon--search:before{content:"\E8B6"}body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}body,input{color:rgba(0,0,0,.87);-webkit-font-feature-settings:"kern","liga";font-feature-settings:"kern","liga";font-family:Helvetica Neue,Helvetica,Arial,sans-serif}code,kbd,pre{color:rgba(0,0,0,.87);-webkit-font-feature-settings:"kern";font-feature-settings:"kern";font-family:Courier New,Courier,monospace}.md-typeset{font-size:1.6rem;line-height:1.6;-webkit-print-color-adjust:exact}.md-typeset blockquote,.md-typeset ol,.md-typeset p,.md-typeset ul{margin:1em 0}.md-typeset h1{margin:0 0 4rem;color:rgba(0,0,0,.54);font-size:3.125rem;line-height:1.3}.md-typeset h1,.md-typeset h2{font-weight:300;letter-spacing:-.01em}.md-typeset h2{margin:4rem 0 1.6rem;font-size:2.5rem;line-height:1.4}.md-typeset h3{margin:3.2rem 0 1.6rem;font-size:2rem;font-weight:400;letter-spacing:-.01em;line-height:1.5}.md-typeset h2+h3{margin-top:1.6rem}.md-typeset h4{font-size:1.6rem}.md-typeset h4,.md-typeset h5,.md-typeset h6{margin:1.6rem 0;font-weight:700;letter-spacing:-.01em}.md-typeset h5,.md-typeset h6{color:rgba(0,0,0,.54);font-size:1.28rem}.md-typeset h5{text-transform:uppercase}.md-typeset hr{margin:1.5em 0;border-bottom:.1rem dotted rgba(0,0,0,.26)}.md-typeset a{color:#3f51b5;word-break:break-word}.md-typeset a,.md-typeset a:before{-webkit-transition:color .125s;transition:color .125s}.md-typeset a:active,.md-typeset a:hover{color:#536dfe}.md-typeset code,.md-typeset pre{background-color:hsla(0,0%,93%,.5);color:#37474f;font-size:85%}.md-typeset code{margin:0 .29412em;padding:.07353em 0;border-radius:.2rem;-webkit-box-shadow:.29412em 0 0 hsla(0,0%,93%,.5),-.29412em 0 0 hsla(0,0%,93%,.5);box-shadow:.29412em 0 0 hsla(0,0%,93%,.5),-.29412em 0 0 hsla(0,0%,93%,.5);word-break:break-word;-webkit-box-decoration-break:clone;box-decoration-break:clone}.md-typeset h1 code,.md-typeset h2 code,.md-typeset h3 code,.md-typeset h4 code,.md-typeset h5 code,.md-typeset h6 code{margin:0;background-color:transparent;-webkit-box-shadow:none;box-shadow:none}.md-typeset a>code{margin:inherit;padding:inherit;border-radius:none;background-color:inherit;color:inherit;-webkit-box-shadow:none;box-shadow:none}.md-typeset pre{position:relative;margin:1em 0;border-radius:.2rem;line-height:1.4;-webkit-overflow-scrolling:touch}.md-typeset pre>code{display:block;margin:0;padding:1.05rem 1.2rem;background-color:transparent;font-size:inherit;-webkit-box-shadow:none;box-shadow:none;-webkit-box-decoration-break:none;box-decoration-break:none;overflow:auto}.md-typeset pre>code::-webkit-scrollbar{width:.4rem;height:.4rem}.md-typeset pre>code::-webkit-scrollbar-thumb{background-color:rgba(0,0,0,.26)}.md-typeset pre>code::-webkit-scrollbar-thumb:hover{background-color:#536dfe}.md-typeset kbd{padding:0 .29412em;border:.1rem solid #c9c9c9;border-radius:.2rem;border-bottom-color:#bcbcbc;background-color:#fcfcfc;color:#555;font-size:85%;-webkit-box-shadow:0 .1rem 0 #b0b0b0;box-shadow:0 .1rem 0 #b0b0b0;word-break:break-word}.md-typeset mark{margin:0 .25em;padding:.0625em 0;border-radius:.2rem;background-color:rgba(255,235,59,.5);-webkit-box-shadow:.25em 0 0 rgba(255,235,59,.5),-.25em 0 0 rgba(255,235,59,.5);box-shadow:.25em 0 0 rgba(255,235,59,.5),-.25em 0 0 rgba(255,235,59,.5);word-break:break-word;-webkit-box-decoration-break:clone;box-decoration-break:clone}.md-typeset abbr{border-bottom:.1rem dotted rgba(0,0,0,.54);text-decoration:none;cursor:help}.md-typeset small{opacity:.75}.md-typeset sub,.md-typeset sup{margin-left:.07812em}.md-typeset blockquote{padding-left:1.2rem;border-left:.4rem solid rgba(0,0,0,.26);color:rgba(0,0,0,.54)}.md-typeset ul{list-style-type:disc}.md-typeset ol,.md-typeset ul{margin-left:.625em;padding:0}.md-typeset ol ol,.md-typeset ul ol{list-style-type:lower-alpha}.md-typeset ol ol ol,.md-typeset ul ol ol{list-style-type:lower-roman}.md-typeset ol li,.md-typeset ul li{margin-bottom:.5em;margin-left:1.25em}.md-typeset ol li blockquote,.md-typeset ol li p,.md-typeset ul li blockquote,.md-typeset ul li p{margin:.5em 0}.md-typeset ol li:last-child,.md-typeset ul li:last-child{margin-bottom:0}.md-typeset ol li ol,.md-typeset ol li ul,.md-typeset ul li ol,.md-typeset ul li ul{margin:.5em 0 .5em .625em}.md-typeset dd{margin:1em 0 1em 1.875em}.md-typeset iframe,.md-typeset img,.md-typeset svg{max-width:100%}.md-typeset table:not([class]){-webkit-box-shadow:0 2px 2px 0 rgba(0,0,0,.14),0 1px 5px 0 rgba(0,0,0,.12),0 3px 1px -2px rgba(0,0,0,.2);box-shadow:0 2px 2px 0 rgba(0,0,0,.14),0 1px 5px 0 rgba(0,0,0,.12),0 3px 1px -2px rgba(0,0,0,.2);display:inline-block;max-width:100%;border-radius:.2rem;font-size:1.28rem;overflow:auto;-webkit-overflow-scrolling:touch}.md-typeset table:not([class])+*{margin-top:1.5em}.md-typeset table:not([class]) td:not([align]),.md-typeset table:not([class]) th:not([align]){text-align:left}.md-typeset table:not([class]) th{min-width:10rem;padding:1.2rem 1.6rem;background-color:rgba(0,0,0,.54);color:#fff;vertical-align:top}.md-typeset table:not([class]) td{padding:1.2rem 1.6rem;border-top:.1rem solid rgba(0,0,0,.07);vertical-align:top}.md-typeset table:not([class]) tr:first-child td{border-top:0}.md-typeset table:not([class]) a{word-break:normal}.md-typeset__scrollwrap{margin:1em -1.6rem;overflow-x:auto;-webkit-overflow-scrolling:touch}.md-typeset .md-typeset__table{display:inline-block;margin-bottom:.5em;padding:0 1.6rem}.md-typeset .md-typeset__table table{display:table;width:100%;margin:0;overflow:hidden}html{font-size:62.5%;overflow-x:hidden}body,html{height:100%}body{position:relative}hr{display:block;height:.1rem;padding:0;border:0}.md-svg{display:none}.md-grid{max-width:122rem;margin-right:auto;margin-left:auto}.md-container,.md-main{overflow:auto}.md-container{display:table;width:100%;height:100%;padding-top:4.8rem;table-layout:fixed}.md-main{display:table-row;height:100%}.md-main__inner{height:100%;padding-top:3rem;padding-bottom:.1rem}.md-toggle{display:none}.md-overlay{position:fixed;top:0;width:0;height:0;-webkit-transition:width 0s .25s,height 0s .25s,opacity .25s;transition:width 0s .25s,height 0s .25s,opacity .25s;background-color:rgba(0,0,0,.54);opacity:0;z-index:3}.md-flex{display:table}.md-flex__cell{display:table-cell;position:relative;vertical-align:top}.md-flex__cell--shrink{width:0}.md-flex__cell--stretch{display:table;width:100%;table-layout:fixed}.md-flex__ellipsis{display:table-cell;text-overflow:ellipsis;white-space:nowrap;overflow:hidden}@page{margin:25mm}.md-clipboard{position:absolute;top:.6rem;right:.6rem;width:2.8rem;height:2.8rem;border-radius:.2rem;font-size:1.6rem;cursor:pointer;z-index:1;-webkit-backface-visibility:hidden;backface-visibility:hidden}.md-clipboard:before{-webkit-transition:color .25s,opacity .25s;transition:color .25s,opacity .25s;color:rgba(0,0,0,.54);content:"\E14D";opacity:.25}.codehilite:hover .md-clipboard:before,.md-typeset .highlight:hover .md-clipboard:before,pre:hover .md-clipboard:before{opacity:1}.md-clipboard:active:before,.md-clipboard:hover:before{color:#536dfe}.md-clipboard__message{display:block;position:absolute;top:0;right:3.4rem;padding:.6rem 1rem;-webkit-transform:translateX(.8rem);transform:translateX(.8rem);-webkit-transition:opacity .175s,-webkit-transform .25s cubic-bezier(.9,.1,.9,0);transition:opacity .175s,-webkit-transform .25s cubic-bezier(.9,.1,.9,0);transition:transform .25s cubic-bezier(.9,.1,.9,0),opacity .175s;transition:transform .25s cubic-bezier(.9,.1,.9,0),opacity .175s,-webkit-transform .25s cubic-bezier(.9,.1,.9,0);border-radius:.2rem;background-color:rgba(0,0,0,.54);color:#fff;font-size:1.28rem;white-space:nowrap;opacity:0;pointer-events:none}.md-clipboard__message--active{-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:opacity .175s 75ms,-webkit-transform .25s cubic-bezier(.4,0,.2,1);transition:opacity .175s 75ms,-webkit-transform .25s cubic-bezier(.4,0,.2,1);transition:transform .25s cubic-bezier(.4,0,.2,1),opacity .175s 75ms;transition:transform .25s cubic-bezier(.4,0,.2,1),opacity .175s 75ms,-webkit-transform .25s cubic-bezier(.4,0,.2,1);opacity:1;pointer-events:auto}.md-clipboard__message:before{content:attr(aria-label)}.md-clipboard__message:after{display:block;position:absolute;top:50%;right:-.4rem;width:0;margin-top:-.4rem;border-width:.4rem 0 .4rem .4rem;border-style:solid;border-color:transparent rgba(0,0,0,.54);content:""}.md-content__inner{margin:0 1.6rem 2.4rem;padding-top:1.2rem}.md-content__inner:before{display:block;height:.8rem;content:""}.md-content__inner>:last-child{margin-bottom:0}.md-content__icon{position:relative;margin:.8rem 0;padding:0;float:right}.md-typeset .md-content__icon{color:rgba(0,0,0,.26)}.md-header{position:fixed;top:0;right:0;left:0;height:4.8rem;-webkit-transition:background-color .25s,color .25s;transition:background-color .25s,color .25s;background-color:#3f51b5;color:#fff;z-index:2;-webkit-backface-visibility:hidden;backface-visibility:hidden}.md-header,.no-js .md-header{-webkit-box-shadow:none;box-shadow:none}.md-header[data-md-state=shadow]{-webkit-transition:background-color .25s,color .25s,-webkit-box-shadow .25s;transition:background-color .25s,color .25s,-webkit-box-shadow .25s;transition:background-color .25s,color .25s,box-shadow .25s;transition:background-color .25s,color .25s,box-shadow .25s,-webkit-box-shadow .25s;-webkit-box-shadow:0 0 .4rem rgba(0,0,0,.1),0 .4rem .8rem rgba(0,0,0,.2);box-shadow:0 0 .4rem rgba(0,0,0,.1),0 .4rem .8rem rgba(0,0,0,.2)}.md-header-nav{padding:0 .4rem}.md-header-nav__button{position:relative;-webkit-transition:opacity .25s;transition:opacity .25s;z-index:1}.md-header-nav__button:hover{opacity:.7}.md-header-nav__button.md-logo *{display:block}.no-js .md-header-nav__button.md-icon--search{display:none}.md-header-nav__topic{display:block;position:absolute;-webkit-transition:opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);text-overflow:ellipsis;white-space:nowrap;overflow:hidden}.md-header-nav__topic+.md-header-nav__topic{-webkit-transform:translateX(2.5rem);transform:translateX(2.5rem);-webkit-transition:opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);transition:opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s;transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);opacity:0;z-index:-1;pointer-events:none}.no-js .md-header-nav__topic{position:static}.md-header-nav__title{padding:0 2rem;font-size:1.8rem;line-height:4.8rem}.md-header-nav__title[data-md-state=active] .md-header-nav__topic{-webkit-transform:translateX(-2.5rem);transform:translateX(-2.5rem);-webkit-transition:opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);transition:opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s;transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);opacity:0;z-index:-1;pointer-events:none}.md-header-nav__title[data-md-state=active] .md-header-nav__topic+.md-header-nav__topic{-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);opacity:1;z-index:0;pointer-events:auto}.md-header-nav__source{display:none}.md-hero{-webkit-transition:background .25s;transition:background .25s;background-color:#3f51b5;color:#fff;font-size:2rem;overflow:hidden}.md-hero__inner{margin-top:2rem;padding:1.6rem 1.6rem .8rem;-webkit-transition:opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);-webkit-transition-delay:.1s;transition-delay:.1s}[data-md-state=hidden] .md-hero__inner{pointer-events:none;-webkit-transform:translateY(1.25rem);transform:translateY(1.25rem);-webkit-transition:opacity .1s 0s,-webkit-transform 0s .4s;transition:opacity .1s 0s,-webkit-transform 0s .4s;transition:transform 0s .4s,opacity .1s 0s;transition:transform 0s .4s,opacity .1s 0s,-webkit-transform 0s .4s;opacity:0}.md-hero--expand .md-hero__inner{margin-bottom:2.4rem}.md-footer-nav{background-color:rgba(0,0,0,.87);color:#fff}.md-footer-nav__inner{padding:.4rem;overflow:auto}.md-footer-nav__link{padding-top:2.8rem;padding-bottom:.8rem;-webkit-transition:opacity .25s;transition:opacity .25s}.md-footer-nav__link:hover{opacity:.7}.md-footer-nav__link--prev{width:25%;float:left}.md-footer-nav__link--next{width:75%;float:right;text-align:right}.md-footer-nav__button{-webkit-transition:background .25s;transition:background .25s}.md-footer-nav__title{position:relative;padding:0 2rem;font-size:1.8rem;line-height:4.8rem}.md-footer-nav__direction{position:absolute;right:0;left:0;margin-top:-2rem;padding:0 2rem;color:hsla(0,0%,100%,.7);font-size:1.5rem}.md-footer-meta{background-color:rgba(0,0,0,.895)}.md-footer-meta__inner{padding:.4rem;overflow:auto}html .md-footer-meta.md-typeset a{color:hsla(0,0%,100%,.7)}html .md-footer-meta.md-typeset a:focus,html .md-footer-meta.md-typeset a:hover{color:#fff}.md-footer-copyright{margin:0 1.2rem;padding:.8rem 0;color:hsla(0,0%,100%,.3);font-size:1.28rem}.md-footer-copyright__highlight{color:hsla(0,0%,100%,.7)}.md-footer-social{margin:0 .8rem;padding:.4rem 0 1.2rem}.md-footer-social__link{display:inline-block;width:3.2rem;height:3.2rem;font-size:1.6rem;text-align:center}.md-footer-social__link:before{line-height:1.9}.md-nav{font-size:1.4rem;line-height:1.3}.md-nav--secondary .md-nav__link--active{color:#3f51b5}.md-nav__title{display:block;padding:0 1.2rem;font-weight:700;text-overflow:ellipsis;overflow:hidden}.md-nav__title:before{display:none;content:"\E5C4"}.md-nav__title .md-nav__button{display:none}.md-nav__list{margin:0;padding:0;list-style:none}.md-nav__item{padding:0 1.2rem}.md-nav__item:last-child{padding-bottom:1.2rem}.md-nav__item .md-nav__item{padding-right:0}.md-nav__item .md-nav__item:last-child{padding-bottom:0}.md-nav__button img{width:100%;height:auto}.md-nav__link{display:block;margin-top:.625em;-webkit-transition:color .125s;transition:color .125s;text-overflow:ellipsis;cursor:pointer;overflow:hidden}.md-nav__item--nested>.md-nav__link:after{content:"\E313"}html .md-nav__link[for=toc],html .md-nav__link[for=toc]+.md-nav__link:after,html .md-nav__link[for=toc]~.md-nav{display:none}.md-nav__link[data-md-state=blur]{color:rgba(0,0,0,.54)}.md-nav__link:active{color:#3f51b5}.md-nav__item--nested>.md-nav__link{color:inherit}.md-nav__link:focus,.md-nav__link:hover{color:#536dfe}.md-nav__source,.no-js .md-search{display:none}.md-search__overlay{opacity:0;z-index:1}.md-search__form{position:relative}.md-search__input{position:relative;padding:0 4.8rem 0 7.2rem;text-overflow:ellipsis;z-index:2}.md-search__input::-webkit-input-placeholder{-webkit-transition:color .25s cubic-bezier(.1,.7,.1,1);transition:color .25s cubic-bezier(.1,.7,.1,1)}.md-search__input:-ms-input-placeholder,.md-search__input::-ms-input-placeholder{-webkit-transition:color .25s cubic-bezier(.1,.7,.1,1);transition:color .25s cubic-bezier(.1,.7,.1,1)}.md-search__input::placeholder{-webkit-transition:color .25s cubic-bezier(.1,.7,.1,1);transition:color .25s cubic-bezier(.1,.7,.1,1)}.md-search__input::-webkit-input-placeholder,.md-search__input~.md-search__icon{color:rgba(0,0,0,.54)}.md-search__input:-ms-input-placeholder,.md-search__input::-ms-input-placeholder,.md-search__input~.md-search__icon{color:rgba(0,0,0,.54)}.md-search__input::placeholder,.md-search__input~.md-search__icon{color:rgba(0,0,0,.54)}.md-search__input::-ms-clear{display:none}.md-search__icon{position:absolute;-webkit-transition:color .25s cubic-bezier(.1,.7,.1,1),opacity .25s;transition:color .25s cubic-bezier(.1,.7,.1,1),opacity .25s;font-size:2.4rem;cursor:pointer;z-index:2}.md-search__icon:hover{opacity:.7}.md-search__icon[for=search]{top:.6rem;left:1rem}.md-search__icon[for=search]:before{content:"\E8B6"}.md-search__icon[type=reset]{top:.6rem;right:1rem;-webkit-transform:scale(.125);transform:scale(.125);-webkit-transition:opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1);transition:opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1);transition:transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s;transition:transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1);opacity:0}[data-md-toggle=search]:checked~.md-header .md-search__input:valid~.md-search__icon[type=reset]{-webkit-transform:scale(1);transform:scale(1);opacity:1}[data-md-toggle=search]:checked~.md-header .md-search__input:valid~.md-search__icon[type=reset]:hover{opacity:.7}.md-search__output{position:absolute;width:100%;border-radius:0 0 .2rem .2rem;overflow:hidden;z-index:1}.md-search__scrollwrap{height:100%;background-color:#fff;-webkit-box-shadow:0 .1rem 0 rgba(0,0,0,.07) inset;box-shadow:inset 0 .1rem 0 rgba(0,0,0,.07);overflow-y:auto;-webkit-overflow-scrolling:touch}.md-search-result{color:rgba(0,0,0,.87);word-break:break-word}.md-search-result__meta{padding:0 1.6rem;background-color:rgba(0,0,0,.07);color:rgba(0,0,0,.54);font-size:1.28rem;line-height:3.6rem}.md-search-result__list{margin:0;padding:0;border-top:.1rem solid rgba(0,0,0,.07);list-style:none}.md-search-result__item{-webkit-box-shadow:0 -.1rem 0 rgba(0,0,0,.07);box-shadow:0 -.1rem 0 rgba(0,0,0,.07)}.md-search-result__link{display:block;-webkit-transition:background .25s;transition:background .25s;outline:0;overflow:hidden}.md-search-result__link:hover,.md-search-result__link[data-md-state=active]{background-color:rgba(83,109,254,.1)}.md-search-result__link:hover .md-search-result__article:before,.md-search-result__link[data-md-state=active] .md-search-result__article:before{opacity:.7}.md-search-result__link:last-child .md-search-result__teaser{margin-bottom:1.2rem}.md-search-result__article{position:relative;padding:0 1.6rem;overflow:auto}.md-search-result__article--document:before{position:absolute;left:0;margin:.2rem;-webkit-transition:opacity .25s;transition:opacity .25s;color:rgba(0,0,0,.54);content:"\E880"}.md-search-result__article--document .md-search-result__title{margin:1.1rem 0;font-size:1.6rem;font-weight:400;line-height:1.4}.md-search-result__title{margin:.5em 0;font-size:1.28rem;font-weight:700;line-height:1.4}.md-search-result__teaser{display:-webkit-box;max-height:3.3rem;margin:.5em 0;color:rgba(0,0,0,.54);font-size:1.28rem;line-height:1.4;text-overflow:ellipsis;overflow:hidden;-webkit-box-orient:vertical;-webkit-line-clamp:2}.md-search-result em{font-style:normal;font-weight:700;text-decoration:underline}.md-sidebar{position:absolute;width:24.2rem;padding:2.4rem 0;overflow:hidden}.md-sidebar[data-md-state=lock]{position:fixed;top:4.8rem}.md-sidebar--secondary{display:none}.md-sidebar__scrollwrap{max-height:100%;margin:0 .4rem;overflow-y:auto;-webkit-backface-visibility:hidden;backface-visibility:hidden}.md-sidebar__scrollwrap::-webkit-scrollbar{width:.4rem;height:.4rem}.md-sidebar__scrollwrap::-webkit-scrollbar-thumb{background-color:rgba(0,0,0,.26)}.md-sidebar__scrollwrap::-webkit-scrollbar-thumb:hover{background-color:#536dfe}@-webkit-keyframes md-source__facts--done{0%{height:0}to{height:1.3rem}}@keyframes md-source__facts--done{0%{height:0}to{height:1.3rem}}@-webkit-keyframes md-source__fact--done{0%{-webkit-transform:translateY(100%);transform:translateY(100%);opacity:0}50%{opacity:0}to{-webkit-transform:translateY(0);transform:translateY(0);opacity:1}}@keyframes md-source__fact--done{0%{-webkit-transform:translateY(100%);transform:translateY(100%);opacity:0}50%{opacity:0}to{-webkit-transform:translateY(0);transform:translateY(0);opacity:1}}.md-source{display:block;padding-right:1.2rem;-webkit-transition:opacity .25s;transition:opacity .25s;font-size:1.3rem;line-height:1.2;white-space:nowrap}.md-source:hover{opacity:.7}.md-source:after,.md-source__icon{display:inline-block;height:4.8rem;content:"";vertical-align:middle}.md-source__icon{width:4.8rem}.md-source__icon svg{width:2.4rem;height:2.4rem;margin-top:1.2rem;margin-left:1.2rem}.md-source__icon+.md-source__repository{margin-left:-4.4rem;padding-left:4rem}.md-source__repository{display:inline-block;max-width:100%;margin-left:1.2rem;font-weight:700;text-overflow:ellipsis;overflow:hidden;vertical-align:middle}.md-source__facts{margin:0;padding:0;font-size:1.1rem;font-weight:700;list-style-type:none;opacity:.75;overflow:hidden}[data-md-state=done] .md-source__facts{-webkit-animation:md-source__facts--done .25s ease-in;animation:md-source__facts--done .25s ease-in}.md-source__fact{float:left}[data-md-state=done] .md-source__fact{-webkit-animation:md-source__fact--done .4s ease-out;animation:md-source__fact--done .4s ease-out}.md-source__fact:before{margin:0 .2rem;content:"\B7"}.md-source__fact:first-child:before{display:none}.md-source-file{display:inline-block;margin:1em .5em 1em 0;padding-right:.5rem;border-radius:.2rem;background-color:rgba(0,0,0,.07);font-size:1.28rem;list-style-type:none;cursor:pointer;overflow:hidden}.md-source-file:before{display:inline-block;margin-right:.5rem;padding:.5rem;background-color:rgba(0,0,0,.26);color:#fff;font-size:1.6rem;content:"\E86F";vertical-align:middle}html .md-source-file{-webkit-transition:background .4s,color .4s,-webkit-box-shadow .4s cubic-bezier(.4,0,.2,1);transition:background .4s,color .4s,-webkit-box-shadow .4s cubic-bezier(.4,0,.2,1);transition:background .4s,color .4s,box-shadow .4s cubic-bezier(.4,0,.2,1);transition:background .4s,color .4s,box-shadow .4s cubic-bezier(.4,0,.2,1),-webkit-box-shadow .4s cubic-bezier(.4,0,.2,1)}html .md-source-file:before{-webkit-transition:inherit;transition:inherit}html body .md-typeset .md-source-file{color:rgba(0,0,0,.54)}.md-source-file:hover{-webkit-box-shadow:0 0 8px rgba(0,0,0,.18),0 8px 16px rgba(0,0,0,.36);box-shadow:0 0 8px rgba(0,0,0,.18),0 8px 16px rgba(0,0,0,.36)}.md-source-file:hover:before{background-color:#536dfe}.md-tabs{width:100%;-webkit-transition:background .25s;transition:background .25s;background-color:#3f51b5;color:#fff;overflow:auto}.md-tabs__list{margin:0;margin-left:.4rem;padding:0;list-style:none;white-space:nowrap}.md-tabs__item{display:inline-block;height:4.8rem;padding-right:1.2rem;padding-left:1.2rem}.md-tabs__link{display:block;margin-top:1.6rem;-webkit-transition:opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);font-size:1.4rem;opacity:.7}.md-tabs__link--active,.md-tabs__link:hover{color:inherit;opacity:1}.md-tabs__item:nth-child(2) .md-tabs__link{-webkit-transition-delay:.02s;transition-delay:.02s}.md-tabs__item:nth-child(3) .md-tabs__link{-webkit-transition-delay:.04s;transition-delay:.04s}.md-tabs__item:nth-child(4) .md-tabs__link{-webkit-transition-delay:.06s;transition-delay:.06s}.md-tabs__item:nth-child(5) .md-tabs__link{-webkit-transition-delay:.08s;transition-delay:.08s}.md-tabs__item:nth-child(6) .md-tabs__link{-webkit-transition-delay:.1s;transition-delay:.1s}.md-tabs__item:nth-child(7) .md-tabs__link{-webkit-transition-delay:.12s;transition-delay:.12s}.md-tabs__item:nth-child(8) .md-tabs__link{-webkit-transition-delay:.14s;transition-delay:.14s}.md-tabs__item:nth-child(9) .md-tabs__link{-webkit-transition-delay:.16s;transition-delay:.16s}.md-tabs__item:nth-child(10) .md-tabs__link{-webkit-transition-delay:.18s;transition-delay:.18s}.md-tabs__item:nth-child(11) .md-tabs__link{-webkit-transition-delay:.2s;transition-delay:.2s}.md-tabs__item:nth-child(12) .md-tabs__link{-webkit-transition-delay:.22s;transition-delay:.22s}.md-tabs__item:nth-child(13) .md-tabs__link{-webkit-transition-delay:.24s;transition-delay:.24s}.md-tabs__item:nth-child(14) .md-tabs__link{-webkit-transition-delay:.26s;transition-delay:.26s}.md-tabs__item:nth-child(15) .md-tabs__link{-webkit-transition-delay:.28s;transition-delay:.28s}.md-tabs__item:nth-child(16) .md-tabs__link{-webkit-transition-delay:.3s;transition-delay:.3s}.md-tabs[data-md-state=hidden]{pointer-events:none}.md-tabs[data-md-state=hidden] .md-tabs__link{-webkit-transform:translateY(50%);transform:translateY(50%);-webkit-transition:color .25s,opacity .1s,-webkit-transform 0s .4s;transition:color .25s,opacity .1s,-webkit-transform 0s .4s;transition:color .25s,transform 0s .4s,opacity .1s;transition:color .25s,transform 0s .4s,opacity .1s,-webkit-transform 0s .4s;opacity:0}.md-typeset .admonition,.md-typeset details{-webkit-box-shadow:0 2px 2px 0 rgba(0,0,0,.14),0 1px 5px 0 rgba(0,0,0,.12),0 3px 1px -2px rgba(0,0,0,.2);box-shadow:0 2px 2px 0 rgba(0,0,0,.14),0 1px 5px 0 rgba(0,0,0,.12),0 3px 1px -2px rgba(0,0,0,.2);position:relative;margin:1.5625em 0;padding:1.2rem 1.2rem 0;border-left:.4rem solid #448aff;border-radius:.2rem;font-size:1.28rem}.md-typeset .admonition :first-child,.md-typeset details :first-child{margin-top:0}html .md-typeset .admonition :last-child,html .md-typeset details :last-child{margin-bottom:0;padding-bottom:1.2rem}.md-typeset .admonition .admonition,.md-typeset .admonition details,.md-typeset details .admonition,.md-typeset details details{margin:1em 0}.md-typeset .admonition>.admonition-title,.md-typeset .admonition>summary,.md-typeset details>.admonition-title,.md-typeset details>summary{margin:-1.2rem -1.2rem 0;padding:.8rem 1.2rem .8rem 4rem;border-bottom:.1rem solid rgba(68,138,255,.1);background-color:rgba(68,138,255,.1);font-weight:700}html .md-typeset .admonition>.admonition-title,html .md-typeset .admonition>summary,html .md-typeset details>.admonition-title,html .md-typeset details>summary{padding-bottom:.8rem}.md-typeset .admonition>.admonition-title:before,.md-typeset .admonition>summary:before,.md-typeset details>.admonition-title:before,.md-typeset details>summary:before{position:absolute;left:1.2rem;color:#448aff;font-size:2rem;content:"\E3C9"}.md-typeset .admonition.summary,.md-typeset .admonition.tldr,.md-typeset details.summary,.md-typeset details.tldr{border-left:.4rem solid #00b0ff}.md-typeset .admonition.summary>.admonition-title,.md-typeset .admonition.summary>summary,.md-typeset .admonition.tldr>.admonition-title,.md-typeset .admonition.tldr>summary,.md-typeset details.summary>.admonition-title,.md-typeset details.summary>summary,.md-typeset details.tldr>.admonition-title,.md-typeset details.tldr>summary{border-bottom:.1rem solid rgba(0,176,255,.1);background-color:rgba(0,176,255,.1)}.md-typeset .admonition.summary>.admonition-title:before,.md-typeset .admonition.summary>summary:before,.md-typeset .admonition.tldr>.admonition-title:before,.md-typeset .admonition.tldr>summary:before,.md-typeset details.summary>.admonition-title:before,.md-typeset details.summary>summary:before,.md-typeset details.tldr>.admonition-title:before,.md-typeset details.tldr>summary:before{color:#00b0ff;content:"\E8D2"}.md-typeset .admonition.info,.md-typeset .admonition.todo,.md-typeset details.info,.md-typeset details.todo{border-left:.4rem solid #00b8d4}.md-typeset .admonition.info>.admonition-title,.md-typeset .admonition.info>summary,.md-typeset .admonition.todo>.admonition-title,.md-typeset .admonition.todo>summary,.md-typeset details.info>.admonition-title,.md-typeset details.info>summary,.md-typeset details.todo>.admonition-title,.md-typeset details.todo>summary{border-bottom:.1rem solid rgba(0,184,212,.1);background-color:rgba(0,184,212,.1)}.md-typeset .admonition.info>.admonition-title:before,.md-typeset .admonition.info>summary:before,.md-typeset .admonition.todo>.admonition-title:before,.md-typeset .admonition.todo>summary:before,.md-typeset details.info>.admonition-title:before,.md-typeset details.info>summary:before,.md-typeset details.todo>.admonition-title:before,.md-typeset details.todo>summary:before{color:#00b8d4;content:"\E88E"}.md-typeset .admonition.hint,.md-typeset .admonition.important,.md-typeset .admonition.tip,.md-typeset details.hint,.md-typeset details.important,.md-typeset details.tip{border-left:.4rem solid #00bfa5}.md-typeset .admonition.hint>.admonition-title,.md-typeset .admonition.hint>summary,.md-typeset .admonition.important>.admonition-title,.md-typeset .admonition.important>summary,.md-typeset .admonition.tip>.admonition-title,.md-typeset .admonition.tip>summary,.md-typeset details.hint>.admonition-title,.md-typeset details.hint>summary,.md-typeset details.important>.admonition-title,.md-typeset details.important>summary,.md-typeset details.tip>.admonition-title,.md-typeset details.tip>summary{border-bottom:.1rem solid rgba(0,191,165,.1);background-color:rgba(0,191,165,.1)}.md-typeset .admonition.hint>.admonition-title:before,.md-typeset .admonition.hint>summary:before,.md-typeset .admonition.important>.admonition-title:before,.md-typeset .admonition.important>summary:before,.md-typeset .admonition.tip>.admonition-title:before,.md-typeset .admonition.tip>summary:before,.md-typeset details.hint>.admonition-title:before,.md-typeset details.hint>summary:before,.md-typeset details.important>.admonition-title:before,.md-typeset details.important>summary:before,.md-typeset details.tip>.admonition-title:before,.md-typeset details.tip>summary:before{color:#00bfa5;content:"\E80E"}.md-typeset .admonition.check,.md-typeset .admonition.done,.md-typeset .admonition.success,.md-typeset details.check,.md-typeset details.done,.md-typeset details.success{border-left:.4rem solid #00c853}.md-typeset .admonition.check>.admonition-title,.md-typeset .admonition.check>summary,.md-typeset .admonition.done>.admonition-title,.md-typeset .admonition.done>summary,.md-typeset .admonition.success>.admonition-title,.md-typeset .admonition.success>summary,.md-typeset details.check>.admonition-title,.md-typeset details.check>summary,.md-typeset details.done>.admonition-title,.md-typeset details.done>summary,.md-typeset details.success>.admonition-title,.md-typeset details.success>summary{border-bottom:.1rem solid rgba(0,200,83,.1);background-color:rgba(0,200,83,.1)}.md-typeset .admonition.check>.admonition-title:before,.md-typeset .admonition.check>summary:before,.md-typeset .admonition.done>.admonition-title:before,.md-typeset .admonition.done>summary:before,.md-typeset .admonition.success>.admonition-title:before,.md-typeset .admonition.success>summary:before,.md-typeset details.check>.admonition-title:before,.md-typeset details.check>summary:before,.md-typeset details.done>.admonition-title:before,.md-typeset details.done>summary:before,.md-typeset details.success>.admonition-title:before,.md-typeset details.success>summary:before{color:#00c853;content:"\E876"}.md-typeset .admonition.faq,.md-typeset .admonition.help,.md-typeset .admonition.question,.md-typeset details.faq,.md-typeset details.help,.md-typeset details.question{border-left:.4rem solid #64dd17}.md-typeset .admonition.faq>.admonition-title,.md-typeset .admonition.faq>summary,.md-typeset .admonition.help>.admonition-title,.md-typeset .admonition.help>summary,.md-typeset .admonition.question>.admonition-title,.md-typeset .admonition.question>summary,.md-typeset details.faq>.admonition-title,.md-typeset details.faq>summary,.md-typeset details.help>.admonition-title,.md-typeset details.help>summary,.md-typeset details.question>.admonition-title,.md-typeset details.question>summary{border-bottom:.1rem solid rgba(100,221,23,.1);background-color:rgba(100,221,23,.1)}.md-typeset .admonition.faq>.admonition-title:before,.md-typeset .admonition.faq>summary:before,.md-typeset .admonition.help>.admonition-title:before,.md-typeset .admonition.help>summary:before,.md-typeset .admonition.question>.admonition-title:before,.md-typeset .admonition.question>summary:before,.md-typeset details.faq>.admonition-title:before,.md-typeset details.faq>summary:before,.md-typeset details.help>.admonition-title:before,.md-typeset details.help>summary:before,.md-typeset details.question>.admonition-title:before,.md-typeset details.question>summary:before{color:#64dd17;content:"\E887"}.md-typeset .admonition.attention,.md-typeset .admonition.caution,.md-typeset .admonition.warning,.md-typeset details.attention,.md-typeset details.caution,.md-typeset details.warning{border-left:.4rem solid #ff9100}.md-typeset .admonition.attention>.admonition-title,.md-typeset .admonition.attention>summary,.md-typeset .admonition.caution>.admonition-title,.md-typeset .admonition.caution>summary,.md-typeset .admonition.warning>.admonition-title,.md-typeset .admonition.warning>summary,.md-typeset details.attention>.admonition-title,.md-typeset details.attention>summary,.md-typeset details.caution>.admonition-title,.md-typeset details.caution>summary,.md-typeset details.warning>.admonition-title,.md-typeset details.warning>summary{border-bottom:.1rem solid rgba(255,145,0,.1);background-color:rgba(255,145,0,.1)}.md-typeset .admonition.attention>.admonition-title:before,.md-typeset .admonition.attention>summary:before,.md-typeset .admonition.caution>.admonition-title:before,.md-typeset .admonition.caution>summary:before,.md-typeset .admonition.warning>.admonition-title:before,.md-typeset .admonition.warning>summary:before,.md-typeset details.attention>.admonition-title:before,.md-typeset details.attention>summary:before,.md-typeset details.caution>.admonition-title:before,.md-typeset details.caution>summary:before,.md-typeset details.warning>.admonition-title:before,.md-typeset details.warning>summary:before{color:#ff9100;content:"\E002"}.md-typeset .admonition.fail,.md-typeset .admonition.failure,.md-typeset .admonition.missing,.md-typeset details.fail,.md-typeset details.failure,.md-typeset details.missing{border-left:.4rem solid #ff5252}.md-typeset .admonition.fail>.admonition-title,.md-typeset .admonition.fail>summary,.md-typeset .admonition.failure>.admonition-title,.md-typeset .admonition.failure>summary,.md-typeset .admonition.missing>.admonition-title,.md-typeset .admonition.missing>summary,.md-typeset details.fail>.admonition-title,.md-typeset details.fail>summary,.md-typeset details.failure>.admonition-title,.md-typeset details.failure>summary,.md-typeset details.missing>.admonition-title,.md-typeset details.missing>summary{border-bottom:.1rem solid rgba(255,82,82,.1);background-color:rgba(255,82,82,.1)}.md-typeset .admonition.fail>.admonition-title:before,.md-typeset .admonition.fail>summary:before,.md-typeset .admonition.failure>.admonition-title:before,.md-typeset .admonition.failure>summary:before,.md-typeset .admonition.missing>.admonition-title:before,.md-typeset .admonition.missing>summary:before,.md-typeset details.fail>.admonition-title:before,.md-typeset details.fail>summary:before,.md-typeset details.failure>.admonition-title:before,.md-typeset details.failure>summary:before,.md-typeset details.missing>.admonition-title:before,.md-typeset details.missing>summary:before{color:#ff5252;content:"\E14C"}.md-typeset .admonition.danger,.md-typeset .admonition.error,.md-typeset details.danger,.md-typeset details.error{border-left:.4rem solid #ff1744}.md-typeset .admonition.danger>.admonition-title,.md-typeset .admonition.danger>summary,.md-typeset .admonition.error>.admonition-title,.md-typeset .admonition.error>summary,.md-typeset details.danger>.admonition-title,.md-typeset details.danger>summary,.md-typeset details.error>.admonition-title,.md-typeset details.error>summary{border-bottom:.1rem solid rgba(255,23,68,.1);background-color:rgba(255,23,68,.1)}.md-typeset .admonition.danger>.admonition-title:before,.md-typeset .admonition.danger>summary:before,.md-typeset .admonition.error>.admonition-title:before,.md-typeset .admonition.error>summary:before,.md-typeset details.danger>.admonition-title:before,.md-typeset details.danger>summary:before,.md-typeset details.error>.admonition-title:before,.md-typeset details.error>summary:before{color:#ff1744;content:"\E3E7"}.md-typeset .admonition.bug,.md-typeset details.bug{border-left:.4rem solid #f50057}.md-typeset .admonition.bug>.admonition-title,.md-typeset .admonition.bug>summary,.md-typeset details.bug>.admonition-title,.md-typeset details.bug>summary{border-bottom:.1rem solid rgba(245,0,87,.1);background-color:rgba(245,0,87,.1)}.md-typeset .admonition.bug>.admonition-title:before,.md-typeset .admonition.bug>summary:before,.md-typeset details.bug>.admonition-title:before,.md-typeset details.bug>summary:before{color:#f50057;content:"\E868"}.md-typeset .admonition.cite,.md-typeset .admonition.quote,.md-typeset details.cite,.md-typeset details.quote{border-left:.4rem solid #9e9e9e}.md-typeset .admonition.cite>.admonition-title,.md-typeset .admonition.cite>summary,.md-typeset .admonition.quote>.admonition-title,.md-typeset .admonition.quote>summary,.md-typeset details.cite>.admonition-title,.md-typeset details.cite>summary,.md-typeset details.quote>.admonition-title,.md-typeset details.quote>summary{border-bottom:.1rem solid hsla(0,0%,62%,.1);background-color:hsla(0,0%,62%,.1)}.md-typeset .admonition.cite>.admonition-title:before,.md-typeset .admonition.cite>summary:before,.md-typeset .admonition.quote>.admonition-title:before,.md-typeset .admonition.quote>summary:before,.md-typeset details.cite>.admonition-title:before,.md-typeset details.cite>summary:before,.md-typeset details.quote>.admonition-title:before,.md-typeset details.quote>summary:before{color:#9e9e9e;content:"\E244"}.codehilite .o,.codehilite .ow,.md-typeset .highlight .o,.md-typeset .highlight .ow{color:inherit}.codehilite .ge,.md-typeset .highlight .ge{color:#000}.codehilite .gr,.md-typeset .highlight .gr{color:#a00}.codehilite .gh,.md-typeset .highlight .gh{color:#999}.codehilite .go,.md-typeset .highlight .go{color:#888}.codehilite .gp,.md-typeset .highlight .gp{color:#555}.codehilite .gs,.md-typeset .highlight .gs{color:inherit}.codehilite .gu,.md-typeset .highlight .gu{color:#aaa}.codehilite .gt,.md-typeset .highlight .gt{color:#a00}.codehilite .gd,.md-typeset .highlight .gd{background-color:#fdd}.codehilite .gi,.md-typeset .highlight .gi{background-color:#dfd}.codehilite .k,.md-typeset .highlight .k{color:#3b78e7}.codehilite .kc,.md-typeset .highlight .kc{color:#a71d5d}.codehilite .kd,.codehilite .kn,.md-typeset .highlight .kd,.md-typeset .highlight .kn{color:#3b78e7}.codehilite .kp,.md-typeset .highlight .kp{color:#a71d5d}.codehilite .kr,.codehilite .kt,.md-typeset .highlight .kr,.md-typeset .highlight .kt{color:#3e61a2}.codehilite .c,.codehilite .cm,.md-typeset .highlight .c,.md-typeset .highlight .cm{color:#999}.codehilite .cp,.md-typeset .highlight .cp{color:#666}.codehilite .c1,.codehilite .ch,.codehilite .cs,.md-typeset .highlight .c1,.md-typeset .highlight .ch,.md-typeset .highlight .cs{color:#999}.codehilite .na,.codehilite .nb,.md-typeset .highlight .na,.md-typeset .highlight .nb{color:#c2185b}.codehilite .bp,.md-typeset .highlight .bp{color:#3e61a2}.codehilite .nc,.md-typeset .highlight .nc{color:#c2185b}.codehilite .no,.md-typeset .highlight .no{color:#3e61a2}.codehilite .nd,.codehilite .ni,.md-typeset .highlight .nd,.md-typeset .highlight .ni{color:#666}.codehilite .ne,.codehilite .nf,.md-typeset .highlight .ne,.md-typeset .highlight .nf{color:#c2185b}.codehilite .nl,.md-typeset .highlight .nl{color:#3b5179}.codehilite .nn,.md-typeset .highlight .nn{color:#ec407a}.codehilite .nt,.md-typeset .highlight .nt{color:#3b78e7}.codehilite .nv,.codehilite .vc,.codehilite .vg,.codehilite .vi,.md-typeset .highlight .nv,.md-typeset .highlight .vc,.md-typeset .highlight .vg,.md-typeset .highlight .vi{color:#3e61a2}.codehilite .nx,.md-typeset .highlight .nx{color:#ec407a}.codehilite .il,.codehilite .m,.codehilite .mf,.codehilite .mh,.codehilite .mi,.codehilite .mo,.md-typeset .highlight .il,.md-typeset .highlight .m,.md-typeset .highlight .mf,.md-typeset .highlight .mh,.md-typeset .highlight .mi,.md-typeset .highlight .mo{color:#e74c3c}.codehilite .s,.codehilite .sb,.codehilite .sc,.md-typeset .highlight .s,.md-typeset .highlight .sb,.md-typeset .highlight .sc{color:#0d904f}.codehilite .sd,.md-typeset .highlight .sd{color:#999}.codehilite .s2,.md-typeset .highlight .s2{color:#0d904f}.codehilite .se,.codehilite .sh,.codehilite .si,.codehilite .sx,.md-typeset .highlight .se,.md-typeset .highlight .sh,.md-typeset .highlight .si,.md-typeset .highlight .sx{color:#183691}.codehilite .sr,.md-typeset .highlight .sr{color:#009926}.codehilite .s1,.codehilite .ss,.md-typeset .highlight .s1,.md-typeset .highlight .ss{color:#0d904f}.codehilite .err,.md-typeset .highlight .err{color:#a61717}.codehilite .w,.md-typeset .highlight .w{color:transparent}.codehilite .hll,.md-typeset .highlight .hll{display:block;margin:0 -1.2rem;padding:0 1.2rem;background-color:rgba(255,235,59,.5)}.md-typeset .codehilite,.md-typeset .highlight{position:relative;margin:1em 0;padding:0;border-radius:.2rem;background-color:hsla(0,0%,93%,.5);color:#37474f;line-height:1.4;-webkit-overflow-scrolling:touch}.md-typeset .codehilite code,.md-typeset .codehilite pre,.md-typeset .highlight code,.md-typeset .highlight pre{display:block;margin:0;padding:1.05rem 1.2rem;background-color:transparent;overflow:auto;vertical-align:top}.md-typeset .codehilite code::-webkit-scrollbar,.md-typeset .codehilite pre::-webkit-scrollbar,.md-typeset .highlight code::-webkit-scrollbar,.md-typeset .highlight pre::-webkit-scrollbar{width:.4rem;height:.4rem}.md-typeset .codehilite code::-webkit-scrollbar-thumb,.md-typeset .codehilite pre::-webkit-scrollbar-thumb,.md-typeset .highlight code::-webkit-scrollbar-thumb,.md-typeset .highlight pre::-webkit-scrollbar-thumb{background-color:rgba(0,0,0,.26)}.md-typeset .codehilite code::-webkit-scrollbar-thumb:hover,.md-typeset .codehilite pre::-webkit-scrollbar-thumb:hover,.md-typeset .highlight code::-webkit-scrollbar-thumb:hover,.md-typeset .highlight pre::-webkit-scrollbar-thumb:hover{background-color:#536dfe}.md-typeset pre.codehilite,.md-typeset pre.highlight{overflow:visible}.md-typeset pre.codehilite code,.md-typeset pre.highlight code{display:block;padding:1.05rem 1.2rem;overflow:auto}.md-typeset .codehilitetable{display:block;margin:1em 0;border-radius:.2em;font-size:1.6rem;overflow:hidden}.md-typeset .codehilitetable tbody,.md-typeset .codehilitetable td{display:block;padding:0}.md-typeset .codehilitetable tr{display:-webkit-box;display:-ms-flexbox;display:flex}.md-typeset .codehilitetable .codehilite,.md-typeset .codehilitetable .highlight,.md-typeset .codehilitetable .linenodiv{margin:0;border-radius:0}.md-typeset .codehilitetable .linenodiv{padding:1.05rem 1.2rem}.md-typeset .codehilitetable .linenos{background-color:rgba(0,0,0,.07);color:rgba(0,0,0,.26);-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.md-typeset .codehilitetable .linenos pre{margin:0;padding:0;background-color:transparent;color:inherit;text-align:right}.md-typeset .codehilitetable .code{-webkit-box-flex:1;-ms-flex:1;flex:1;overflow:hidden}.md-typeset>.codehilitetable{-webkit-box-shadow:none;box-shadow:none}.md-typeset [id^="fnref:"]{display:inline-block}.md-typeset [id^="fnref:"]:target{margin-top:-7.6rem;padding-top:7.6rem;pointer-events:none}.md-typeset [id^="fn:"]:before{display:none;height:0;content:""}.md-typeset [id^="fn:"]:target:before{display:block;margin-top:-7rem;padding-top:7rem;pointer-events:none}.md-typeset .footnote{color:rgba(0,0,0,.54);font-size:1.28rem}.md-typeset .footnote ol{margin-left:0}.md-typeset .footnote li{-webkit-transition:color .25s;transition:color .25s}.md-typeset .footnote li:target{color:rgba(0,0,0,.87)}.md-typeset .footnote li :first-child{margin-top:0}.md-typeset .footnote li:hover .footnote-backref,.md-typeset .footnote li:target .footnote-backref{-webkit-transform:translateX(0);transform:translateX(0);opacity:1}.md-typeset .footnote li:hover .footnote-backref:hover,.md-typeset .footnote li:target .footnote-backref{color:#536dfe}.md-typeset .footnote-ref{display:inline-block;pointer-events:auto}.md-typeset .footnote-ref:before{display:inline;margin:0 .2em;border-left:.1rem solid rgba(0,0,0,.26);font-size:1.25em;content:"";vertical-align:-.5rem}.md-typeset .footnote-backref{display:inline-block;-webkit-transform:translateX(.5rem);transform:translateX(.5rem);-webkit-transition:color .25s,opacity .125s .125s,-webkit-transform .25s .125s;transition:color .25s,opacity .125s .125s,-webkit-transform .25s .125s;transition:transform .25s .125s,color .25s,opacity .125s .125s;transition:transform .25s .125s,color .25s,opacity .125s .125s,-webkit-transform .25s .125s;color:rgba(0,0,0,.26);font-size:0;opacity:0;vertical-align:text-bottom}.md-typeset .footnote-backref:before{font-size:1.6rem;content:"\E31B"}.md-typeset .headerlink{display:inline-block;margin-left:1rem;-webkit-transform:translateY(.5rem);transform:translateY(.5rem);-webkit-transition:color .25s,opacity .125s .25s,-webkit-transform .25s .25s;transition:color .25s,opacity .125s .25s,-webkit-transform .25s .25s;transition:transform .25s .25s,color .25s,opacity .125s .25s;transition:transform .25s .25s,color .25s,opacity .125s .25s,-webkit-transform .25s .25s;opacity:0}html body .md-typeset .headerlink{color:rgba(0,0,0,.26)}.md-typeset h1[id] .headerlink{display:none}.md-typeset h2[id]:before{display:block;margin-top:-.8rem;padding-top:.8rem;content:""}.md-typeset h2[id]:target:before{margin-top:-6.8rem;padding-top:6.8rem}.md-typeset h2[id] .headerlink:focus,.md-typeset h2[id]:hover .headerlink,.md-typeset h2[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h2[id] .headerlink:focus,.md-typeset h2[id]:hover .headerlink:hover,.md-typeset h2[id]:target .headerlink{color:#536dfe}.md-typeset h3[id]:before{display:block;margin-top:-.9rem;padding-top:.9rem;content:""}.md-typeset h3[id]:target:before{margin-top:-6.9rem;padding-top:6.9rem}.md-typeset h3[id] .headerlink:focus,.md-typeset h3[id]:hover .headerlink,.md-typeset h3[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h3[id] .headerlink:focus,.md-typeset h3[id]:hover .headerlink:hover,.md-typeset h3[id]:target .headerlink{color:#536dfe}.md-typeset h4[id]:before{display:block;margin-top:-.9rem;padding-top:.9rem;content:""}.md-typeset h4[id]:target:before{margin-top:-6.9rem;padding-top:6.9rem}.md-typeset h4[id] .headerlink:focus,.md-typeset h4[id]:hover .headerlink,.md-typeset h4[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h4[id] .headerlink:focus,.md-typeset h4[id]:hover .headerlink:hover,.md-typeset h4[id]:target .headerlink{color:#536dfe}.md-typeset h5[id]:before{display:block;margin-top:-1.1rem;padding-top:1.1rem;content:""}.md-typeset h5[id]:target:before{margin-top:-7.1rem;padding-top:7.1rem}.md-typeset h5[id] .headerlink:focus,.md-typeset h5[id]:hover .headerlink,.md-typeset h5[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h5[id] .headerlink:focus,.md-typeset h5[id]:hover .headerlink:hover,.md-typeset h5[id]:target .headerlink{color:#536dfe}.md-typeset h6[id]:before{display:block;margin-top:-1.1rem;padding-top:1.1rem;content:""}.md-typeset h6[id]:target:before{margin-top:-7.1rem;padding-top:7.1rem}.md-typeset h6[id] .headerlink:focus,.md-typeset h6[id]:hover .headerlink,.md-typeset h6[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h6[id] .headerlink:focus,.md-typeset h6[id]:hover .headerlink:hover,.md-typeset h6[id]:target .headerlink{color:#536dfe}.md-typeset .MJXc-display{margin:.75em 0;padding:.75em 0;overflow:auto;-webkit-overflow-scrolling:touch}.md-typeset .MathJax_CHTML{outline:0}.md-typeset .critic.comment,.md-typeset del.critic,.md-typeset ins.critic{margin:0 .25em;padding:.0625em 0;border-radius:.2rem;-webkit-box-decoration-break:clone;box-decoration-break:clone}.md-typeset del.critic{background-color:#fdd;-webkit-box-shadow:.25em 0 0 #fdd,-.25em 0 0 #fdd;box-shadow:.25em 0 0 #fdd,-.25em 0 0 #fdd}.md-typeset ins.critic{background-color:#dfd;-webkit-box-shadow:.25em 0 0 #dfd,-.25em 0 0 #dfd;box-shadow:.25em 0 0 #dfd,-.25em 0 0 #dfd}.md-typeset .critic.comment{background-color:hsla(0,0%,93%,.5);color:#37474f;-webkit-box-shadow:.25em 0 0 hsla(0,0%,93%,.5),-.25em 0 0 hsla(0,0%,93%,.5);box-shadow:.25em 0 0 hsla(0,0%,93%,.5),-.25em 0 0 hsla(0,0%,93%,.5)}.md-typeset .critic.comment:before{padding-right:.125em;color:rgba(0,0,0,.26);content:"\E0B7";vertical-align:-.125em}.md-typeset .critic.block{display:block;margin:1em 0;padding-right:1.6rem;padding-left:1.6rem;-webkit-box-shadow:none;box-shadow:none}.md-typeset .critic.block :first-child{margin-top:.5em}.md-typeset .critic.block :last-child{margin-bottom:.5em}.md-typeset details{padding-top:0}.md-typeset details[open]>summary:after{-webkit-transform:rotate(180deg);transform:rotate(180deg)}.md-typeset details:not([open]){padding-bottom:0}.md-typeset details:not([open])>summary{border-bottom:none}.md-typeset details summary{padding-right:4rem}.no-details .md-typeset details:not([open])>*{display:none}.no-details .md-typeset details:not([open]) summary{display:block}.md-typeset summary{display:block;outline:none;cursor:pointer}.md-typeset summary::-webkit-details-marker{display:none}.md-typeset summary:after{position:absolute;top:.8rem;right:1.2rem;color:rgba(0,0,0,.26);font-size:2rem;content:"\E313"}.md-typeset .emojione{width:2rem;vertical-align:text-top}.md-typeset code.codehilite,.md-typeset code.highlight{margin:0 .29412em;padding:.07353em 0}.md-typeset .task-list-item{position:relative;list-style-type:none}.md-typeset .task-list-item [type=checkbox]{position:absolute;top:.45em;left:-2em}.md-typeset .task-list-control .task-list-indicator:before{position:absolute;top:.15em;left:-1.25em;color:rgba(0,0,0,.26);font-size:1.25em;content:"\E835";vertical-align:-.25em}.md-typeset .task-list-control [type=checkbox]:checked+.task-list-indicator:before{content:"\E834"}.md-typeset .task-list-control [type=checkbox]{opacity:0;z-index:-1}@media print{.md-typeset a:after{color:rgba(0,0,0,.54);content:" [" attr(href) "]"}.md-typeset code,.md-typeset pre{white-space:pre-wrap}.md-typeset code{-webkit-box-shadow:none;box-shadow:none;-webkit-box-decoration-break:initial;box-decoration-break:slice}.md-clipboard,.md-content__icon,.md-footer,.md-header,.md-sidebar,.md-tabs,.md-typeset .headerlink{display:none}}@media only screen and (max-width:44.9375em){.md-typeset pre{margin:1em -1.6rem;border-radius:0}.md-typeset pre>code{padding:1.05rem 1.6rem}.md-footer-nav__link--prev .md-footer-nav__title{display:none}.md-search-result__teaser{max-height:5rem;-webkit-line-clamp:3}.codehilite .hll,.md-typeset .highlight .hll{margin:0 -1.6rem;padding:0 1.6rem}.md-typeset>.codehilite,.md-typeset>.highlight{margin:1em -1.6rem;border-radius:0}.md-typeset>.codehilite code,.md-typeset>.codehilite pre,.md-typeset>.highlight code,.md-typeset>.highlight pre{padding:1.05rem 1.6rem}.md-typeset>.codehilitetable{margin:1em -1.6rem;border-radius:0}.md-typeset>.codehilitetable .codehilite>code,.md-typeset>.codehilitetable .codehilite>pre,.md-typeset>.codehilitetable .highlight>code,.md-typeset>.codehilitetable .highlight>pre,.md-typeset>.codehilitetable .linenodiv{padding:1rem 1.6rem}.md-typeset>p>.MJXc-display{margin:.75em -1.6rem;padding:.25em 1.6rem}}@media only screen and (min-width:100em){html{font-size:68.75%}}@media only screen and (min-width:125em){html{font-size:75%}}@media only screen and (max-width:59.9375em){body[data-md-state=lock]{overflow:hidden}.ios body[data-md-state=lock] .md-container{display:none}html .md-nav__link[for=toc]{display:block;padding-right:4.8rem}html .md-nav__link[for=toc]:after{color:inherit;content:"\E8DE"}html .md-nav__link[for=toc]+.md-nav__link{display:none}html .md-nav__link[for=toc]~.md-nav{display:-webkit-box;display:-ms-flexbox;display:flex}.md-nav__source{display:block;padding:0 .4rem;background-color:rgba(50,64,144,.9675);color:#fff}.md-search__overlay{position:absolute;top:.4rem;left:.4rem;width:3.6rem;height:3.6rem;-webkit-transform-origin:center;transform-origin:center;-webkit-transition:opacity .2s .2s,-webkit-transform .3s .1s;transition:opacity .2s .2s,-webkit-transform .3s .1s;transition:transform .3s .1s,opacity .2s .2s;transition:transform .3s .1s,opacity .2s .2s,-webkit-transform .3s .1s;border-radius:2rem;background-color:#fff;overflow:hidden;pointer-events:none}[data-md-toggle=search]:checked~.md-header .md-search__overlay{-webkit-transition:opacity .1s,-webkit-transform .4s;transition:opacity .1s,-webkit-transform .4s;transition:transform .4s,opacity .1s;transition:transform .4s,opacity .1s,-webkit-transform .4s;opacity:1}.md-search__inner{position:fixed;top:0;left:100%;width:100%;height:100%;-webkit-transform:translateX(5%);transform:translateX(5%);-webkit-transition:left 0s .3s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.4,0,.2,1) .15s;transition:left 0s .3s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.4,0,.2,1) .15s;transition:left 0s .3s,transform .15s cubic-bezier(.4,0,.2,1) .15s,opacity .15s .15s;transition:left 0s .3s,transform .15s cubic-bezier(.4,0,.2,1) .15s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.4,0,.2,1) .15s;opacity:0;z-index:2}[data-md-toggle=search]:checked~.md-header .md-search__inner{left:0;-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:left 0s 0s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1) .15s;transition:left 0s 0s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1) .15s;transition:left 0s 0s,transform .15s cubic-bezier(.1,.7,.1,1) .15s,opacity .15s .15s;transition:left 0s 0s,transform .15s cubic-bezier(.1,.7,.1,1) .15s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1) .15s;opacity:1}.md-search__input{width:100%;height:4.8rem;font-size:1.8rem}.md-search__icon[for=search]{top:1.2rem;left:1.6rem}.md-search__icon[for=search][for=search]:before{content:"\E5C4"}.md-search__icon[type=reset]{top:1.2rem;right:1.6rem}.md-search__output{top:4.8rem;bottom:0}.md-search-result__article--document:before{display:none}}@media only screen and (max-width:76.1875em){[data-md-toggle=drawer]:checked~.md-overlay{width:100%;height:100%;-webkit-transition:width 0s,height 0s,opacity .25s;transition:width 0s,height 0s,opacity .25s;opacity:1}.md-header-nav__button.md-icon--home,.md-header-nav__button.md-logo{display:none}.md-hero__inner{margin-top:4.8rem;margin-bottom:2.4rem}.md-nav{background-color:#fff}.md-nav--primary,.md-nav--primary .md-nav{display:-webkit-box;display:-ms-flexbox;display:flex;position:absolute;top:0;right:0;left:0;-webkit-box-orient:vertical;-webkit-box-direction:normal;-ms-flex-direction:column;flex-direction:column;height:100%;z-index:1}.md-nav--primary .md-nav__item,.md-nav--primary .md-nav__title{font-size:1.6rem;line-height:1.5}html .md-nav--primary .md-nav__title{position:relative;height:11.2rem;padding:6rem 1.6rem .4rem;background-color:rgba(0,0,0,.07);color:rgba(0,0,0,.54);font-weight:400;line-height:4.8rem;white-space:nowrap;cursor:pointer}html .md-nav--primary .md-nav__title:before{display:block;position:absolute;top:.4rem;left:.4rem;width:4rem;height:4rem;color:rgba(0,0,0,.54)}html .md-nav--primary .md-nav__title~.md-nav__list{background-color:#fff;-webkit-box-shadow:0 .1rem 0 rgba(0,0,0,.07) inset;box-shadow:inset 0 .1rem 0 rgba(0,0,0,.07)}html .md-nav--primary .md-nav__title~.md-nav__list>.md-nav__item:first-child{border-top:0}html .md-nav--primary .md-nav__title--site{position:relative;background-color:#3f51b5;color:#fff}html .md-nav--primary .md-nav__title--site .md-nav__button{display:block;position:absolute;top:.4rem;left:.4rem;width:6.4rem;height:6.4rem;font-size:4.8rem}html .md-nav--primary .md-nav__title--site:before{display:none}.md-nav--primary .md-nav__list{-webkit-box-flex:1;-ms-flex:1;flex:1;overflow-y:auto}.md-nav--primary .md-nav__item{padding:0;border-top:.1rem solid rgba(0,0,0,.07)}.md-nav--primary .md-nav__item--nested>.md-nav__link{padding-right:4.8rem}.md-nav--primary .md-nav__item--nested>.md-nav__link:after{content:"\E315"}.md-nav--primary .md-nav__link{position:relative;margin-top:0;padding:1.2rem 1.6rem}.md-nav--primary .md-nav__link:after{position:absolute;top:50%;right:1.2rem;margin-top:-1.2rem;color:inherit;font-size:2.4rem}.md-nav--primary .md-nav--secondary .md-nav__link{position:static}.md-nav--primary .md-nav--secondary .md-nav{position:static;background-color:transparent}.md-nav--primary .md-nav--secondary .md-nav .md-nav__link{padding-left:2.8rem}.md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav__link{padding-left:4rem}.md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav__link{padding-left:5.2rem}.md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav .md-nav__link{padding-left:6.4rem}.md-nav__toggle~.md-nav{display:-webkit-box;display:-ms-flexbox;display:flex;-webkit-transform:translateX(100%);transform:translateX(100%);-webkit-transition:opacity .125s .05s,-webkit-transform .25s cubic-bezier(.8,0,.6,1);transition:opacity .125s .05s,-webkit-transform .25s cubic-bezier(.8,0,.6,1);transition:transform .25s cubic-bezier(.8,0,.6,1),opacity .125s .05s;transition:transform .25s cubic-bezier(.8,0,.6,1),opacity .125s .05s,-webkit-transform .25s cubic-bezier(.8,0,.6,1);opacity:0}.no-csstransforms3d .md-nav__toggle~.md-nav{display:none}.md-nav__toggle:checked~.md-nav{-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:opacity .125s .125s,-webkit-transform .25s cubic-bezier(.4,0,.2,1);transition:opacity .125s .125s,-webkit-transform .25s cubic-bezier(.4,0,.2,1);transition:transform .25s cubic-bezier(.4,0,.2,1),opacity .125s .125s;transition:transform .25s cubic-bezier(.4,0,.2,1),opacity .125s .125s,-webkit-transform .25s cubic-bezier(.4,0,.2,1);opacity:1}.no-csstransforms3d .md-nav__toggle:checked~.md-nav{display:-webkit-box;display:-ms-flexbox;display:flex}.md-sidebar--primary{position:fixed;top:0;left:-24.2rem;width:24.2rem;height:100%;-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:-webkit-transform .25s cubic-bezier(.4,0,.2,1),-webkit-box-shadow .25s;transition:-webkit-transform .25s cubic-bezier(.4,0,.2,1),-webkit-box-shadow .25s;transition:transform .25s cubic-bezier(.4,0,.2,1),box-shadow .25s;transition:transform .25s cubic-bezier(.4,0,.2,1),box-shadow .25s,-webkit-transform .25s cubic-bezier(.4,0,.2,1),-webkit-box-shadow .25s;background-color:#fff;z-index:3}.no-csstransforms3d .md-sidebar--primary{display:none}[data-md-toggle=drawer]:checked~.md-container .md-sidebar--primary{-webkit-box-shadow:0 8px 10px 1px rgba(0,0,0,.14),0 3px 14px 2px rgba(0,0,0,.12),0 5px 5px -3px rgba(0,0,0,.4);box-shadow:0 8px 10px 1px rgba(0,0,0,.14),0 3px 14px 2px rgba(0,0,0,.12),0 5px 5px -3px rgba(0,0,0,.4);-webkit-transform:translateX(24.2rem);transform:translateX(24.2rem)}.no-csstransforms3d [data-md-toggle=drawer]:checked~.md-container .md-sidebar--primary{display:block}.md-sidebar--primary .md-sidebar__scrollwrap{overflow:hidden;position:absolute;top:0;right:0;bottom:0;left:0;margin:0}.md-tabs{display:none}}@media only screen and (min-width:60em){.md-content{margin-right:24.2rem}.md-header-nav__button.md-icon--search{display:none}.md-header-nav__source{display:block;width:23rem;max-width:23rem;margin-left:2.8rem;padding-right:1.2rem}.md-search{padding:.4rem}.md-search__overlay{position:fixed;top:0;left:0;width:0;height:0;-webkit-transition:width 0s .25s,height 0s .25s,opacity .25s;transition:width 0s .25s,height 0s .25s,opacity .25s;background-color:rgba(0,0,0,.54);cursor:pointer}[data-md-toggle=search]:checked~.md-header .md-search__overlay{width:100%;height:100%;-webkit-transition:width 0s,height 0s,opacity .25s;transition:width 0s,height 0s,opacity .25s;opacity:1}.md-search__inner{position:relative;width:23rem;padding:.2rem 0;float:right;-webkit-transition:width .25s cubic-bezier(.1,.7,.1,1);transition:width .25s cubic-bezier(.1,.7,.1,1)}.md-search__form,.md-search__input{border-radius:.2rem}.md-search__input{width:100%;height:3.6rem;padding-left:4.4rem;-webkit-transition:background-color .25s cubic-bezier(.1,.7,.1,1),color .25s cubic-bezier(.1,.7,.1,1);transition:background-color .25s cubic-bezier(.1,.7,.1,1),color .25s cubic-bezier(.1,.7,.1,1);background-color:rgba(0,0,0,.26);color:inherit;font-size:1.6rem}.md-search__input+.md-search__icon{color:inherit}.md-search__input::-webkit-input-placeholder{color:hsla(0,0%,100%,.7)}.md-search__input:-ms-input-placeholder,.md-search__input::-ms-input-placeholder{color:hsla(0,0%,100%,.7)}.md-search__input::placeholder{color:hsla(0,0%,100%,.7)}.md-search__input:hover{background-color:hsla(0,0%,100%,.12)}[data-md-toggle=search]:checked~.md-header .md-search__input{border-radius:.2rem .2rem 0 0;background-color:#fff;color:rgba(0,0,0,.87);text-overflow:none}[data-md-toggle=search]:checked~.md-header .md-search__input+.md-search__icon,[data-md-toggle=search]:checked~.md-header .md-search__input::-webkit-input-placeholder{color:rgba(0,0,0,.54)}[data-md-toggle=search]:checked~.md-header .md-search__input+.md-search__icon,[data-md-toggle=search]:checked~.md-header .md-search__input:-ms-input-placeholder,[data-md-toggle=search]:checked~.md-header .md-search__input::-ms-input-placeholder{color:rgba(0,0,0,.54)}[data-md-toggle=search]:checked~.md-header .md-search__input+.md-search__icon,[data-md-toggle=search]:checked~.md-header .md-search__input::placeholder{color:rgba(0,0,0,.54)}.md-search__output{top:3.8rem;-webkit-transition:opacity .4s;transition:opacity .4s;opacity:0}[data-md-toggle=search]:checked~.md-header .md-search__output{-webkit-box-shadow:0 6px 10px 0 rgba(0,0,0,.14),0 1px 18px 0 rgba(0,0,0,.12),0 3px 5px -1px rgba(0,0,0,.4);box-shadow:0 6px 10px 0 rgba(0,0,0,.14),0 1px 18px 0 rgba(0,0,0,.12),0 3px 5px -1px rgba(0,0,0,.4);opacity:1}.md-search__scrollwrap{max-height:0}[data-md-toggle=search]:checked~.md-header .md-search__scrollwrap{max-height:75vh}.md-search__scrollwrap::-webkit-scrollbar{width:.4rem;height:.4rem}.md-search__scrollwrap::-webkit-scrollbar-thumb{background-color:rgba(0,0,0,.26)}.md-search__scrollwrap::-webkit-scrollbar-thumb:hover{background-color:#536dfe}.md-search-result__article,.md-search-result__meta{padding-left:4.4rem}.md-sidebar--secondary{display:block;margin-left:100%;-webkit-transform:translate(-100%);transform:translate(-100%)}}@media only screen and (min-width:76.25em){.md-content{margin-left:24.2rem}.md-content__inner{margin-right:2.4rem;margin-left:2.4rem}.md-header-nav__button.md-icon--menu{display:none}.md-nav[data-md-state=animate]{-webkit-transition:max-height .25s cubic-bezier(.86,0,.07,1);transition:max-height .25s cubic-bezier(.86,0,.07,1)}.md-nav__toggle~.md-nav{max-height:0;overflow:hidden}.md-nav[data-md-state=expand],.md-nav__toggle:checked~.md-nav{max-height:100%}.md-nav__item--nested>.md-nav>.md-nav__title{display:none}.md-nav__item--nested>.md-nav__link:after{display:inline-block;-webkit-transform-origin:.45em .45em;transform-origin:.45em .45em;-webkit-transform-style:preserve-3d;transform-style:preserve-3d;vertical-align:-.125em}.js .md-nav__item--nested>.md-nav__link:after{-webkit-transition:-webkit-transform .4s;transition:-webkit-transform .4s;transition:transform .4s;transition:transform .4s,-webkit-transform .4s}.md-nav__item--nested .md-nav__toggle:checked~.md-nav__link:after{-webkit-transform:rotateX(180deg);transform:rotateX(180deg)}.md-search__scrollwrap,[data-md-toggle=search]:checked~.md-header .md-search__inner{width:68.8rem}.md-sidebar--secondary{margin-left:122rem}.md-tabs~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--nested{font-size:0}.md-tabs--active~.md-main .md-nav--primary .md-nav__title--site{display:none}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item{font-size:0}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--nested{display:none;font-size:1.4rem;overflow:auto}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--nested>.md-nav__link{margin-top:0;font-weight:700;pointer-events:none}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--nested>.md-nav__link:after{display:none}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--active{display:block}.md-tabs--active~.md-main .md-nav[data-md-level="1"]{max-height:none}.md-tabs--active~.md-main .md-nav[data-md-level="1"]>.md-nav__list>.md-nav__item{padding-left:0}}@media only screen and (min-width:45em){.md-footer-nav__link{width:50%}.md-footer-copyright{max-width:75%;float:left}.md-footer-social{padding:1.2rem 0;float:right}}@media only screen and (max-width:29.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{-webkit-transform:scale(45);transform:scale(45)}}@media only screen and (min-width:30em) and (max-width:44.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{-webkit-transform:scale(60);transform:scale(60)}}@media only screen and (min-width:45em) and (max-width:59.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{-webkit-transform:scale(75);transform:scale(75)}}@media only screen and (min-width:60em) and (max-width:76.1875em){.md-search__scrollwrap,[data-md-toggle=search]:checked~.md-header .md-search__inner{width:46.8rem}.md-search-result__teaser{max-height:5rem;-webkit-line-clamp:3}} -/*# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJzb3VyY2VzIjpbXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IiIsImZpbGUiOiJhc3NldHMvc3R5bGVzaGVldHMvYXBwbGljYXRpb24uYWM2NDI1MWUuY3NzIiwic291cmNlUm9vdCI6IiJ9*/ \ No newline at end of file +html{-webkit-box-sizing:border-box;box-sizing:border-box}*,:after,:before{-webkit-box-sizing:inherit;box-sizing:inherit}html{-webkit-text-size-adjust:none;-moz-text-size-adjust:none;-ms-text-size-adjust:none;text-size-adjust:none}body{margin:0}hr{overflow:visible;-webkit-box-sizing:content-box;box-sizing:content-box}a{-webkit-text-decoration-skip:objects}a,button,input,label{-webkit-tap-highlight-color:transparent}a{color:inherit;text-decoration:none}a:active,a:hover{outline-width:0}small,sub,sup{font-size:80%}sub,sup{position:relative;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}img{border-style:none}table{border-collapse:collapse;border-spacing:0}td,th{font-weight:400;vertical-align:top}button{padding:0;background:transparent;font-size:inherit}button,input{border:0;outline:0}.md-clipboard:before,.md-icon,.md-nav__button,.md-nav__link:after,.md-nav__title:before,.md-search-result__article--document:before,.md-source-file:before,.md-typeset .admonition>.admonition-title:before,.md-typeset .admonition>summary:before,.md-typeset .critic.comment:before,.md-typeset .footnote-backref,.md-typeset .task-list-control .task-list-indicator:before,.md-typeset details>.admonition-title:before,.md-typeset details>summary:before,.md-typeset summary:after{font-family:Material Icons;font-style:normal;font-variant:normal;font-weight:400;line-height:1;text-transform:none;white-space:nowrap;speak:none;word-wrap:normal;direction:ltr}.md-content__icon,.md-footer-nav__button,.md-header-nav__button,.md-nav__button,.md-nav__title:before,.md-search-result__article--document:before{display:inline-block;margin:.4rem;padding:.8rem;font-size:2.4rem;cursor:pointer}.md-icon--arrow-back:before{content:"\E5C4"}.md-icon--arrow-forward:before{content:"\E5C8"}.md-icon--menu:before{content:"\E5D2"}.md-icon--search:before{content:"\E8B6"}body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}body,input{color:rgba(0,0,0,.87);-webkit-font-feature-settings:"kern","liga";font-feature-settings:"kern","liga";font-family:Helvetica Neue,Helvetica,Arial,sans-serif}code,kbd,pre{color:rgba(0,0,0,.87);-webkit-font-feature-settings:"kern";font-feature-settings:"kern";font-family:Courier New,Courier,monospace}.md-typeset{font-size:1.6rem;line-height:1.6;-webkit-print-color-adjust:exact}.md-typeset blockquote,.md-typeset ol,.md-typeset p,.md-typeset ul{margin:1em 0}.md-typeset h1{margin:0 0 4rem;color:rgba(0,0,0,.54);font-size:3.125rem;line-height:1.3}.md-typeset h1,.md-typeset h2{font-weight:300;letter-spacing:-.01em}.md-typeset h2{margin:4rem 0 1.6rem;font-size:2.5rem;line-height:1.4}.md-typeset h3{margin:3.2rem 0 1.6rem;font-size:2rem;font-weight:400;letter-spacing:-.01em;line-height:1.5}.md-typeset h2+h3{margin-top:1.6rem}.md-typeset h4{font-size:1.6rem}.md-typeset h4,.md-typeset h5,.md-typeset h6{margin:1.6rem 0;font-weight:700;letter-spacing:-.01em}.md-typeset h5,.md-typeset h6{color:rgba(0,0,0,.54);font-size:1.28rem}.md-typeset h5{text-transform:uppercase}.md-typeset hr{margin:1.5em 0;border-bottom:.1rem dotted rgba(0,0,0,.26)}.md-typeset a{color:#3f51b5;word-break:break-word}.md-typeset a,.md-typeset a:before{-webkit-transition:color .125s;transition:color .125s}.md-typeset a:active,.md-typeset a:hover{color:#536dfe}.md-typeset code,.md-typeset pre{background-color:hsla(0,0%,93%,.5);color:#37474f;font-size:85%}.md-typeset code{margin:0 .29412em;padding:.07353em 0;border-radius:.2rem;-webkit-box-shadow:.29412em 0 0 hsla(0,0%,93%,.5),-.29412em 0 0 hsla(0,0%,93%,.5);box-shadow:.29412em 0 0 hsla(0,0%,93%,.5),-.29412em 0 0 hsla(0,0%,93%,.5);word-break:break-word;-webkit-box-decoration-break:clone;box-decoration-break:clone}.md-typeset h1 code,.md-typeset h2 code,.md-typeset h3 code,.md-typeset h4 code,.md-typeset h5 code,.md-typeset h6 code{margin:0;background-color:transparent;-webkit-box-shadow:none;box-shadow:none}.md-typeset a>code{margin:inherit;padding:inherit;border-radius:none;background-color:inherit;color:inherit;-webkit-box-shadow:none;box-shadow:none}.md-typeset pre{position:relative;margin:1em 0;border-radius:.2rem;line-height:1.4;-webkit-overflow-scrolling:touch}.md-typeset pre>code{display:block;margin:0;padding:1.05rem 1.2rem;background-color:transparent;font-size:inherit;-webkit-box-shadow:none;box-shadow:none;-webkit-box-decoration-break:none;box-decoration-break:none;overflow:auto}.md-typeset pre>code::-webkit-scrollbar{width:.4rem;height:.4rem}.md-typeset pre>code::-webkit-scrollbar-thumb{background-color:rgba(0,0,0,.26)}.md-typeset pre>code::-webkit-scrollbar-thumb:hover{background-color:#536dfe}.md-typeset kbd{padding:0 .29412em;border:.1rem solid #c9c9c9;border-radius:.2rem;border-bottom-color:#bcbcbc;background-color:#fcfcfc;color:#555;font-size:85%;-webkit-box-shadow:0 .1rem 0 #b0b0b0;box-shadow:0 .1rem 0 #b0b0b0;word-break:break-word}.md-typeset mark{margin:0 .25em;padding:.0625em 0;border-radius:.2rem;background-color:rgba(255,235,59,.5);-webkit-box-shadow:.25em 0 0 rgba(255,235,59,.5),-.25em 0 0 rgba(255,235,59,.5);box-shadow:.25em 0 0 rgba(255,235,59,.5),-.25em 0 0 rgba(255,235,59,.5);word-break:break-word;-webkit-box-decoration-break:clone;box-decoration-break:clone}.md-typeset abbr{border-bottom:.1rem dotted rgba(0,0,0,.54);text-decoration:none;cursor:help}.md-typeset small{opacity:.75}.md-typeset sub,.md-typeset sup{margin-left:.07812em}.md-typeset blockquote{padding-left:1.2rem;border-left:.4rem solid rgba(0,0,0,.26);color:rgba(0,0,0,.54)}.md-typeset ul{list-style-type:disc}.md-typeset ol,.md-typeset ul{margin-left:.625em;padding:0}.md-typeset ol ol,.md-typeset ul ol{list-style-type:lower-alpha}.md-typeset ol ol ol,.md-typeset ul ol ol{list-style-type:lower-roman}.md-typeset ol li,.md-typeset ul li{margin-bottom:.5em;margin-left:1.25em}.md-typeset ol li blockquote,.md-typeset ol li p,.md-typeset ul li blockquote,.md-typeset ul li p{margin:.5em 0}.md-typeset ol li:last-child,.md-typeset ul li:last-child{margin-bottom:0}.md-typeset ol li ol,.md-typeset ol li ul,.md-typeset ul li ol,.md-typeset ul li ul{margin:.5em 0 .5em .625em}.md-typeset dd{margin:1em 0 1em 1.875em}.md-typeset iframe,.md-typeset img,.md-typeset svg{max-width:100%}.md-typeset table:not([class]){-webkit-box-shadow:0 2px 2px 0 rgba(0,0,0,.14),0 1px 5px 0 rgba(0,0,0,.12),0 3px 1px -2px rgba(0,0,0,.2);box-shadow:0 2px 2px 0 rgba(0,0,0,.14),0 1px 5px 0 rgba(0,0,0,.12),0 3px 1px -2px rgba(0,0,0,.2);display:inline-block;max-width:100%;border-radius:.2rem;font-size:1.28rem;overflow:auto;-webkit-overflow-scrolling:touch}.md-typeset table:not([class])+*{margin-top:1.5em}.md-typeset table:not([class]) td:not([align]),.md-typeset table:not([class]) th:not([align]){text-align:left}.md-typeset table:not([class]) th{min-width:10rem;padding:1.2rem 1.6rem;background-color:rgba(0,0,0,.54);color:#fff;vertical-align:top}.md-typeset table:not([class]) td{padding:1.2rem 1.6rem;border-top:.1rem solid rgba(0,0,0,.07);vertical-align:top}.md-typeset table:not([class]) tr:first-child td{border-top:0}.md-typeset table:not([class]) a{word-break:normal}.md-typeset__scrollwrap{margin:1em -1.6rem;overflow-x:auto;-webkit-overflow-scrolling:touch}.md-typeset .md-typeset__table{display:inline-block;margin-bottom:.5em;padding:0 1.6rem}.md-typeset .md-typeset__table table{display:table;width:100%;margin:0;overflow:hidden}html{font-size:62.5%;overflow-x:hidden}body,html{height:100%}body{position:relative}hr{display:block;height:.1rem;padding:0;border:0}.md-svg{display:none}.md-grid{max-width:122rem;margin-right:auto;margin-left:auto}.md-container,.md-main{overflow:auto}.md-container{display:table;width:100%;height:100%;padding-top:4.8rem;table-layout:fixed}.md-main{display:table-row;height:100%}.md-main__inner{height:100%;padding-top:3rem;padding-bottom:.1rem}.md-toggle{display:none}.md-overlay{position:fixed;top:0;width:0;height:0;-webkit-transition:width 0s .25s,height 0s .25s,opacity .25s;transition:width 0s .25s,height 0s .25s,opacity .25s;background-color:rgba(0,0,0,.54);opacity:0;z-index:3}.md-flex{display:table}.md-flex__cell{display:table-cell;position:relative;vertical-align:top}.md-flex__cell--shrink{width:0}.md-flex__cell--stretch{display:table;width:100%;table-layout:fixed}.md-flex__ellipsis{display:table-cell;text-overflow:ellipsis;white-space:nowrap;overflow:hidden}@page{margin:25mm}.md-clipboard{position:absolute;top:.6rem;right:.6rem;width:2.8rem;height:2.8rem;border-radius:.2rem;font-size:1.6rem;cursor:pointer;z-index:1;-webkit-backface-visibility:hidden;backface-visibility:hidden}.md-clipboard:before{-webkit-transition:color .25s,opacity .25s;transition:color .25s,opacity .25s;color:rgba(0,0,0,.54);content:"\E14D";opacity:.25}.codehilite:hover .md-clipboard:before,.md-typeset .highlight:hover .md-clipboard:before,pre:hover .md-clipboard:before{opacity:1}.md-clipboard:active:before,.md-clipboard:hover:before{color:#536dfe}.md-clipboard__message{display:block;position:absolute;top:0;right:3.4rem;padding:.6rem 1rem;-webkit-transform:translateX(.8rem);transform:translateX(.8rem);-webkit-transition:opacity .175s,-webkit-transform .25s cubic-bezier(.9,.1,.9,0);transition:opacity .175s,-webkit-transform .25s cubic-bezier(.9,.1,.9,0);transition:transform .25s cubic-bezier(.9,.1,.9,0),opacity .175s;transition:transform .25s cubic-bezier(.9,.1,.9,0),opacity .175s,-webkit-transform .25s cubic-bezier(.9,.1,.9,0);border-radius:.2rem;background-color:rgba(0,0,0,.54);color:#fff;font-size:1.28rem;white-space:nowrap;opacity:0;pointer-events:none}.md-clipboard__message--active{-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:opacity .175s 75ms,-webkit-transform .25s cubic-bezier(.4,0,.2,1);transition:opacity .175s 75ms,-webkit-transform .25s cubic-bezier(.4,0,.2,1);transition:transform .25s cubic-bezier(.4,0,.2,1),opacity .175s 75ms;transition:transform .25s cubic-bezier(.4,0,.2,1),opacity .175s 75ms,-webkit-transform .25s cubic-bezier(.4,0,.2,1);opacity:1;pointer-events:auto}.md-clipboard__message:before{content:attr(aria-label)}.md-clipboard__message:after{display:block;position:absolute;top:50%;right:-.4rem;width:0;margin-top:-.4rem;border-width:.4rem 0 .4rem .4rem;border-style:solid;border-color:transparent rgba(0,0,0,.54);content:""}.md-content__inner{margin:0 1.6rem 2.4rem;padding-top:1.2rem}.md-content__inner:before{display:block;height:.8rem;content:""}.md-content__inner>:last-child{margin-bottom:0}.md-content__icon{position:relative;margin:.8rem 0;padding:0;float:right}.md-typeset .md-content__icon{color:rgba(0,0,0,.26)}.md-header{position:fixed;top:0;right:0;left:0;height:4.8rem;-webkit-transition:background-color .25s,color .25s;transition:background-color .25s,color .25s;background-color:#3f51b5;color:#fff;z-index:2;-webkit-backface-visibility:hidden;backface-visibility:hidden}.md-header,.no-js .md-header{-webkit-box-shadow:none;box-shadow:none}.md-header-nav{padding:0 .4rem}.md-header-nav__button{position:relative;-webkit-transition:opacity .25s;transition:opacity .25s;z-index:1}.md-header-nav__button:hover{opacity:.7}.md-header-nav__button.md-logo *{display:block}.no-js .md-header-nav__button.md-icon--search{display:none}.md-header-nav__topic{display:block;position:absolute;-webkit-transition:opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);text-overflow:ellipsis;white-space:nowrap;overflow:hidden}.md-header-nav__topic+.md-header-nav__topic{-webkit-transform:translateX(2.5rem);transform:translateX(2.5rem);-webkit-transition:opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);transition:opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s;transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);opacity:0;z-index:-1;pointer-events:none}.no-js .md-header-nav__topic{position:static}.md-header-nav__title{padding:0 2rem;font-size:1.8rem;line-height:4.8rem}.md-header-nav__title[data-md-state=active] .md-header-nav__topic{-webkit-transform:translateX(-2.5rem);transform:translateX(-2.5rem);-webkit-transition:opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);transition:opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s;transition:transform .4s cubic-bezier(1,.7,.1,.1),opacity .15s,-webkit-transform .4s cubic-bezier(1,.7,.1,.1);opacity:0;z-index:-1;pointer-events:none}.md-header-nav__title[data-md-state=active] .md-header-nav__topic+.md-header-nav__topic{-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);opacity:1;z-index:0;pointer-events:auto}.md-header-nav__source{display:none}.md-hero{-webkit-transition:background .25s;transition:background .25s;background-color:#3f51b5;color:#fff;font-size:2rem;overflow:hidden}.md-hero__inner{margin-top:2rem;padding:1.6rem 1.6rem .8rem;-webkit-transition:opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);-webkit-transition-delay:.1s;transition-delay:.1s}[data-md-state=hidden] .md-hero__inner{pointer-events:none;-webkit-transform:translateY(1.25rem);transform:translateY(1.25rem);-webkit-transition:opacity .1s 0s,-webkit-transform 0s .4s;transition:opacity .1s 0s,-webkit-transform 0s .4s;transition:transform 0s .4s,opacity .1s 0s;transition:transform 0s .4s,opacity .1s 0s,-webkit-transform 0s .4s;opacity:0}.md-hero--expand .md-hero__inner{margin-bottom:2.4rem}.md-footer-nav{background-color:rgba(0,0,0,.87);color:#fff}.md-footer-nav__inner{padding:.4rem;overflow:auto}.md-footer-nav__link{padding-top:2.8rem;padding-bottom:.8rem;-webkit-transition:opacity .25s;transition:opacity .25s}.md-footer-nav__link:hover{opacity:.7}.md-footer-nav__link--prev{width:25%;float:left}.md-footer-nav__link--next{width:75%;float:right;text-align:right}.md-footer-nav__button{-webkit-transition:background .25s;transition:background .25s}.md-footer-nav__title{position:relative;padding:0 2rem;font-size:1.8rem;line-height:4.8rem}.md-footer-nav__direction{position:absolute;right:0;left:0;margin-top:-2rem;padding:0 2rem;color:hsla(0,0%,100%,.7);font-size:1.5rem}.md-footer-meta{background-color:rgba(0,0,0,.895)}.md-footer-meta__inner{padding:.4rem;overflow:auto}html .md-footer-meta.md-typeset a{color:hsla(0,0%,100%,.7)}html .md-footer-meta.md-typeset a:focus,html .md-footer-meta.md-typeset a:hover{color:#fff}.md-footer-copyright{margin:0 1.2rem;padding:.8rem 0;color:hsla(0,0%,100%,.3);font-size:1.28rem}.md-footer-copyright__highlight{color:hsla(0,0%,100%,.7)}.md-footer-social{margin:0 .8rem;padding:.4rem 0 1.2rem}.md-footer-social__link{display:inline-block;width:3.2rem;height:3.2rem;font-size:1.6rem;text-align:center}.md-footer-social__link:before{line-height:1.9}.md-nav{font-size:1.4rem;line-height:1.3}.md-nav--secondary .md-nav__link--active{color:#3f51b5}.md-nav__title{display:block;padding:0 1.2rem;font-weight:700;text-overflow:ellipsis;overflow:hidden}.md-nav__title:before{display:none;content:"\E5C4"}.md-nav__title .md-nav__button{display:none}.md-nav__list{margin:0;padding:0;list-style:none}.md-nav__item{padding:0 1.2rem}.md-nav__item:last-child{padding-bottom:1.2rem}.md-nav__item .md-nav__item{padding-right:0}.md-nav__item .md-nav__item:last-child{padding-bottom:0}.md-nav__button img{width:100%;height:auto}.md-nav__link{display:block;margin-top:.625em;-webkit-transition:color .125s;transition:color .125s;text-overflow:ellipsis;cursor:pointer;overflow:hidden}.md-nav__item--nested>.md-nav__link:after{content:"\E313"}html .md-nav__link[for=toc],html .md-nav__link[for=toc]+.md-nav__link:after,html .md-nav__link[for=toc]~.md-nav{display:none}.md-nav__link[data-md-state=blur]{color:rgba(0,0,0,.54)}.md-nav__link:active{color:#3f51b5}.md-nav__item--nested>.md-nav__link{color:inherit}.md-nav__link:focus,.md-nav__link:hover{color:#536dfe}.md-nav__source,.no-js .md-search{display:none}.md-search__overlay{opacity:0;z-index:1}.md-search__form{position:relative}.md-search__input{position:relative;padding:0 4.8rem 0 7.2rem;text-overflow:ellipsis;z-index:2}.md-search__input::-webkit-input-placeholder{-webkit-transition:color .25s cubic-bezier(.1,.7,.1,1);transition:color .25s cubic-bezier(.1,.7,.1,1)}.md-search__input:-ms-input-placeholder,.md-search__input::-ms-input-placeholder{-webkit-transition:color .25s cubic-bezier(.1,.7,.1,1);transition:color .25s cubic-bezier(.1,.7,.1,1)}.md-search__input::placeholder{-webkit-transition:color .25s cubic-bezier(.1,.7,.1,1);transition:color .25s cubic-bezier(.1,.7,.1,1)}.md-search__input::-webkit-input-placeholder,.md-search__input~.md-search__icon{color:rgba(0,0,0,.54)}.md-search__input:-ms-input-placeholder,.md-search__input::-ms-input-placeholder,.md-search__input~.md-search__icon{color:rgba(0,0,0,.54)}.md-search__input::placeholder,.md-search__input~.md-search__icon{color:rgba(0,0,0,.54)}.md-search__input::-ms-clear{display:none}.md-search__icon{position:absolute;-webkit-transition:color .25s cubic-bezier(.1,.7,.1,1),opacity .25s;transition:color .25s cubic-bezier(.1,.7,.1,1),opacity .25s;font-size:2.4rem;cursor:pointer;z-index:2}.md-search__icon:hover{opacity:.7}.md-search__icon[for=search]{top:.6rem;left:1rem}.md-search__icon[for=search]:before{content:"\E8B6"}.md-search__icon[type=reset]{top:.6rem;right:1rem;-webkit-transform:scale(.125);transform:scale(.125);-webkit-transition:opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1);transition:opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1);transition:transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s;transition:transform .15s cubic-bezier(.1,.7,.1,1),opacity .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1);opacity:0}[data-md-toggle=search]:checked~.md-header .md-search__input:valid~.md-search__icon[type=reset]{-webkit-transform:scale(1);transform:scale(1);opacity:1}[data-md-toggle=search]:checked~.md-header .md-search__input:valid~.md-search__icon[type=reset]:hover{opacity:.7}.md-search__output{position:absolute;width:100%;border-radius:0 0 .2rem .2rem;overflow:hidden;z-index:1}.md-search__scrollwrap{height:100%;background-color:#fff;-webkit-box-shadow:0 .1rem 0 rgba(0,0,0,.07) inset;box-shadow:inset 0 .1rem 0 rgba(0,0,0,.07);overflow-y:auto;-webkit-overflow-scrolling:touch}.md-search-result{color:rgba(0,0,0,.87);word-break:break-word}.md-search-result__meta{padding:0 1.6rem;background-color:rgba(0,0,0,.07);color:rgba(0,0,0,.54);font-size:1.28rem;line-height:3.6rem}.md-search-result__list{margin:0;padding:0;border-top:.1rem solid rgba(0,0,0,.07);list-style:none}.md-search-result__item{-webkit-box-shadow:0 -.1rem 0 rgba(0,0,0,.07);box-shadow:0 -.1rem 0 rgba(0,0,0,.07)}.md-search-result__link{display:block;-webkit-transition:background .25s;transition:background .25s;outline:0;overflow:hidden}.md-search-result__link:hover,.md-search-result__link[data-md-state=active]{background-color:rgba(83,109,254,.1)}.md-search-result__link:hover .md-search-result__article:before,.md-search-result__link[data-md-state=active] .md-search-result__article:before{opacity:.7}.md-search-result__link:last-child .md-search-result__teaser{margin-bottom:1.2rem}.md-search-result__article{position:relative;padding:0 1.6rem;overflow:auto}.md-search-result__article--document:before{position:absolute;left:0;margin:.2rem;-webkit-transition:opacity .25s;transition:opacity .25s;color:rgba(0,0,0,.54);content:"\E880"}.md-search-result__article--document .md-search-result__title{margin:1.1rem 0;font-size:1.6rem;font-weight:400;line-height:1.4}.md-search-result__title{margin:.5em 0;font-size:1.28rem;font-weight:700;line-height:1.4}.md-search-result__teaser{display:-webkit-box;max-height:3.3rem;margin:.5em 0;color:rgba(0,0,0,.54);font-size:1.28rem;line-height:1.4;text-overflow:ellipsis;overflow:hidden;-webkit-box-orient:vertical;-webkit-line-clamp:2}.md-search-result em{font-style:normal;font-weight:700;text-decoration:underline}.md-sidebar{position:absolute;width:24.2rem;padding:2.4rem 0;overflow:hidden}.md-sidebar[data-md-state=lock]{position:fixed;top:4.8rem}.md-sidebar--secondary{display:none}.md-sidebar__scrollwrap{max-height:100%;margin:0 .4rem;overflow-y:auto;-webkit-backface-visibility:hidden;backface-visibility:hidden}.md-sidebar__scrollwrap::-webkit-scrollbar{width:.4rem;height:.4rem}.md-sidebar__scrollwrap::-webkit-scrollbar-thumb{background-color:rgba(0,0,0,.26)}.md-sidebar__scrollwrap::-webkit-scrollbar-thumb:hover{background-color:#536dfe}@-webkit-keyframes md-source__facts--done{0%{height:0}to{height:1.3rem}}@keyframes md-source__facts--done{0%{height:0}to{height:1.3rem}}@-webkit-keyframes md-source__fact--done{0%{-webkit-transform:translateY(100%);transform:translateY(100%);opacity:0}50%{opacity:0}to{-webkit-transform:translateY(0);transform:translateY(0);opacity:1}}@keyframes md-source__fact--done{0%{-webkit-transform:translateY(100%);transform:translateY(100%);opacity:0}50%{opacity:0}to{-webkit-transform:translateY(0);transform:translateY(0);opacity:1}}.md-source{display:block;padding-right:1.2rem;-webkit-transition:opacity .25s;transition:opacity .25s;font-size:1.3rem;line-height:1.2;white-space:nowrap}.md-source:hover{opacity:.7}.md-source:after,.md-source__icon{display:inline-block;height:4.8rem;content:"";vertical-align:middle}.md-source__icon{width:4.8rem}.md-source__icon svg{width:2.4rem;height:2.4rem;margin-top:1.2rem;margin-left:1.2rem}.md-source__icon+.md-source__repository{margin-left:-4.4rem;padding-left:4rem}.md-source__repository{display:inline-block;max-width:100%;margin-left:1.2rem;font-weight:700;text-overflow:ellipsis;overflow:hidden;vertical-align:middle}.md-source__facts{margin:0;padding:0;font-size:1.1rem;font-weight:700;list-style-type:none;opacity:.75;overflow:hidden}[data-md-state=done] .md-source__facts{-webkit-animation:md-source__facts--done .25s ease-in;animation:md-source__facts--done .25s ease-in}.md-source__fact{float:left}[data-md-state=done] .md-source__fact{-webkit-animation:md-source__fact--done .4s ease-out;animation:md-source__fact--done .4s ease-out}.md-source__fact:before{margin:0 .2rem;content:"\B7"}.md-source__fact:first-child:before{display:none}.md-source-file{display:inline-block;margin:1em .5em 1em 0;padding-right:.5rem;border-radius:.2rem;background-color:rgba(0,0,0,.07);font-size:1.28rem;list-style-type:none;cursor:pointer;overflow:hidden}.md-source-file:before{display:inline-block;margin-right:.5rem;padding:.5rem;background-color:rgba(0,0,0,.26);color:#fff;font-size:1.6rem;content:"\E86F";vertical-align:middle}html .md-source-file{-webkit-transition:background .4s,color .4s,-webkit-box-shadow .4s cubic-bezier(.4,0,.2,1);transition:background .4s,color .4s,-webkit-box-shadow .4s cubic-bezier(.4,0,.2,1);transition:background .4s,color .4s,box-shadow .4s cubic-bezier(.4,0,.2,1);transition:background .4s,color .4s,box-shadow .4s cubic-bezier(.4,0,.2,1),-webkit-box-shadow .4s cubic-bezier(.4,0,.2,1)}html .md-source-file:before{-webkit-transition:inherit;transition:inherit}html body .md-typeset .md-source-file{color:rgba(0,0,0,.54)}.md-source-file:hover{-webkit-box-shadow:0 0 8px rgba(0,0,0,.18),0 8px 16px rgba(0,0,0,.36);box-shadow:0 0 8px rgba(0,0,0,.18),0 8px 16px rgba(0,0,0,.36)}.md-source-file:hover:before{background-color:#536dfe}.md-tabs{width:100%;-webkit-transition:background .25s;transition:background .25s;background-color:#3f51b5;color:#fff;overflow:auto}.md-tabs__list{margin:0;margin-left:.4rem;padding:0;list-style:none;white-space:nowrap}.md-tabs__item{display:inline-block;height:4.8rem;padding-right:1.2rem;padding-left:1.2rem}.md-tabs__link{display:block;margin-top:1.6rem;-webkit-transition:opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s;transition:transform .4s cubic-bezier(.1,.7,.1,1),opacity .25s,-webkit-transform .4s cubic-bezier(.1,.7,.1,1);font-size:1.4rem;opacity:.7}.md-tabs__link--active,.md-tabs__link:hover{color:inherit;opacity:1}.md-tabs__item:nth-child(2) .md-tabs__link{-webkit-transition-delay:.02s;transition-delay:.02s}.md-tabs__item:nth-child(3) .md-tabs__link{-webkit-transition-delay:.04s;transition-delay:.04s}.md-tabs__item:nth-child(4) .md-tabs__link{-webkit-transition-delay:.06s;transition-delay:.06s}.md-tabs__item:nth-child(5) .md-tabs__link{-webkit-transition-delay:.08s;transition-delay:.08s}.md-tabs__item:nth-child(6) .md-tabs__link{-webkit-transition-delay:.1s;transition-delay:.1s}.md-tabs__item:nth-child(7) .md-tabs__link{-webkit-transition-delay:.12s;transition-delay:.12s}.md-tabs__item:nth-child(8) .md-tabs__link{-webkit-transition-delay:.14s;transition-delay:.14s}.md-tabs__item:nth-child(9) .md-tabs__link{-webkit-transition-delay:.16s;transition-delay:.16s}.md-tabs__item:nth-child(10) .md-tabs__link{-webkit-transition-delay:.18s;transition-delay:.18s}.md-tabs__item:nth-child(11) .md-tabs__link{-webkit-transition-delay:.2s;transition-delay:.2s}.md-tabs__item:nth-child(12) .md-tabs__link{-webkit-transition-delay:.22s;transition-delay:.22s}.md-tabs__item:nth-child(13) .md-tabs__link{-webkit-transition-delay:.24s;transition-delay:.24s}.md-tabs__item:nth-child(14) .md-tabs__link{-webkit-transition-delay:.26s;transition-delay:.26s}.md-tabs__item:nth-child(15) .md-tabs__link{-webkit-transition-delay:.28s;transition-delay:.28s}.md-tabs__item:nth-child(16) .md-tabs__link{-webkit-transition-delay:.3s;transition-delay:.3s}.md-tabs[data-md-state=hidden]{pointer-events:none}.md-tabs[data-md-state=hidden] .md-tabs__link{-webkit-transform:translateY(50%);transform:translateY(50%);-webkit-transition:color .25s,opacity .1s,-webkit-transform 0s .4s;transition:color .25s,opacity .1s,-webkit-transform 0s .4s;transition:color .25s,transform 0s .4s,opacity .1s;transition:color .25s,transform 0s .4s,opacity .1s,-webkit-transform 0s .4s;opacity:0}.md-typeset .admonition,.md-typeset details{-webkit-box-shadow:0 2px 2px 0 rgba(0,0,0,.14),0 1px 5px 0 rgba(0,0,0,.12),0 3px 1px -2px rgba(0,0,0,.2);box-shadow:0 2px 2px 0 rgba(0,0,0,.14),0 1px 5px 0 rgba(0,0,0,.12),0 3px 1px -2px rgba(0,0,0,.2);position:relative;margin:1.5625em 0;padding:1.2rem 1.2rem 0;border-left:.4rem solid #448aff;border-radius:.2rem;font-size:1.28rem}.md-typeset .admonition :first-child,.md-typeset details :first-child{margin-top:0}html .md-typeset .admonition :last-child,html .md-typeset details :last-child{margin-bottom:0;padding-bottom:1.2rem}.md-typeset .admonition .admonition,.md-typeset .admonition details,.md-typeset details .admonition,.md-typeset details details{margin:1em 0}.md-typeset .admonition>.admonition-title,.md-typeset .admonition>summary,.md-typeset details>.admonition-title,.md-typeset details>summary{margin:-1.2rem -1.2rem 0;padding:.8rem 1.2rem .8rem 4rem;border-bottom:.1rem solid rgba(68,138,255,.1);background-color:rgba(68,138,255,.1);font-weight:700}html .md-typeset .admonition>.admonition-title,html .md-typeset .admonition>summary,html .md-typeset details>.admonition-title,html .md-typeset details>summary{padding-bottom:.8rem}.md-typeset .admonition>.admonition-title:before,.md-typeset .admonition>summary:before,.md-typeset details>.admonition-title:before,.md-typeset details>summary:before{position:absolute;left:1.2rem;color:#448aff;font-size:2rem;content:"\E3C9"}.md-typeset .admonition.summary,.md-typeset .admonition.tldr,.md-typeset details.summary,.md-typeset details.tldr{border-left:.4rem solid #00b0ff}.md-typeset .admonition.summary>.admonition-title,.md-typeset .admonition.summary>summary,.md-typeset .admonition.tldr>.admonition-title,.md-typeset .admonition.tldr>summary,.md-typeset details.summary>.admonition-title,.md-typeset details.summary>summary,.md-typeset details.tldr>.admonition-title,.md-typeset details.tldr>summary{border-bottom:.1rem solid rgba(0,176,255,.1);background-color:rgba(0,176,255,.1)}.md-typeset .admonition.summary>.admonition-title:before,.md-typeset .admonition.summary>summary:before,.md-typeset .admonition.tldr>.admonition-title:before,.md-typeset .admonition.tldr>summary:before,.md-typeset details.summary>.admonition-title:before,.md-typeset details.summary>summary:before,.md-typeset details.tldr>.admonition-title:before,.md-typeset details.tldr>summary:before{color:#00b0ff;content:"\E8D2"}.md-typeset .admonition.info,.md-typeset .admonition.todo,.md-typeset details.info,.md-typeset details.todo{border-left:.4rem solid #00b8d4}.md-typeset .admonition.info>.admonition-title,.md-typeset .admonition.info>summary,.md-typeset .admonition.todo>.admonition-title,.md-typeset .admonition.todo>summary,.md-typeset details.info>.admonition-title,.md-typeset details.info>summary,.md-typeset details.todo>.admonition-title,.md-typeset details.todo>summary{border-bottom:.1rem solid rgba(0,184,212,.1);background-color:rgba(0,184,212,.1)}.md-typeset .admonition.info>.admonition-title:before,.md-typeset .admonition.info>summary:before,.md-typeset .admonition.todo>.admonition-title:before,.md-typeset .admonition.todo>summary:before,.md-typeset details.info>.admonition-title:before,.md-typeset details.info>summary:before,.md-typeset details.todo>.admonition-title:before,.md-typeset details.todo>summary:before{color:#00b8d4;content:"\E88E"}.md-typeset .admonition.hint,.md-typeset .admonition.important,.md-typeset .admonition.tip,.md-typeset details.hint,.md-typeset details.important,.md-typeset details.tip{border-left:.4rem solid #00bfa5}.md-typeset .admonition.hint>.admonition-title,.md-typeset .admonition.hint>summary,.md-typeset .admonition.important>.admonition-title,.md-typeset .admonition.important>summary,.md-typeset .admonition.tip>.admonition-title,.md-typeset .admonition.tip>summary,.md-typeset details.hint>.admonition-title,.md-typeset details.hint>summary,.md-typeset details.important>.admonition-title,.md-typeset details.important>summary,.md-typeset details.tip>.admonition-title,.md-typeset details.tip>summary{border-bottom:.1rem solid rgba(0,191,165,.1);background-color:rgba(0,191,165,.1)}.md-typeset .admonition.hint>.admonition-title:before,.md-typeset .admonition.hint>summary:before,.md-typeset .admonition.important>.admonition-title:before,.md-typeset .admonition.important>summary:before,.md-typeset .admonition.tip>.admonition-title:before,.md-typeset .admonition.tip>summary:before,.md-typeset details.hint>.admonition-title:before,.md-typeset details.hint>summary:before,.md-typeset details.important>.admonition-title:before,.md-typeset details.important>summary:before,.md-typeset details.tip>.admonition-title:before,.md-typeset details.tip>summary:before{color:#00bfa5;content:"\E80E"}.md-typeset .admonition.check,.md-typeset .admonition.done,.md-typeset .admonition.success,.md-typeset details.check,.md-typeset details.done,.md-typeset details.success{border-left:.4rem solid #00c853}.md-typeset .admonition.check>.admonition-title,.md-typeset .admonition.check>summary,.md-typeset .admonition.done>.admonition-title,.md-typeset .admonition.done>summary,.md-typeset .admonition.success>.admonition-title,.md-typeset .admonition.success>summary,.md-typeset details.check>.admonition-title,.md-typeset details.check>summary,.md-typeset details.done>.admonition-title,.md-typeset details.done>summary,.md-typeset details.success>.admonition-title,.md-typeset details.success>summary{border-bottom:.1rem solid rgba(0,200,83,.1);background-color:rgba(0,200,83,.1)}.md-typeset .admonition.check>.admonition-title:before,.md-typeset .admonition.check>summary:before,.md-typeset .admonition.done>.admonition-title:before,.md-typeset .admonition.done>summary:before,.md-typeset .admonition.success>.admonition-title:before,.md-typeset .admonition.success>summary:before,.md-typeset details.check>.admonition-title:before,.md-typeset details.check>summary:before,.md-typeset details.done>.admonition-title:before,.md-typeset details.done>summary:before,.md-typeset details.success>.admonition-title:before,.md-typeset details.success>summary:before{color:#00c853;content:"\E876"}.md-typeset .admonition.faq,.md-typeset .admonition.help,.md-typeset .admonition.question,.md-typeset details.faq,.md-typeset details.help,.md-typeset details.question{border-left:.4rem solid #64dd17}.md-typeset .admonition.faq>.admonition-title,.md-typeset .admonition.faq>summary,.md-typeset .admonition.help>.admonition-title,.md-typeset .admonition.help>summary,.md-typeset .admonition.question>.admonition-title,.md-typeset .admonition.question>summary,.md-typeset details.faq>.admonition-title,.md-typeset details.faq>summary,.md-typeset details.help>.admonition-title,.md-typeset details.help>summary,.md-typeset details.question>.admonition-title,.md-typeset details.question>summary{border-bottom:.1rem solid rgba(100,221,23,.1);background-color:rgba(100,221,23,.1)}.md-typeset .admonition.faq>.admonition-title:before,.md-typeset .admonition.faq>summary:before,.md-typeset .admonition.help>.admonition-title:before,.md-typeset .admonition.help>summary:before,.md-typeset .admonition.question>.admonition-title:before,.md-typeset .admonition.question>summary:before,.md-typeset details.faq>.admonition-title:before,.md-typeset details.faq>summary:before,.md-typeset details.help>.admonition-title:before,.md-typeset details.help>summary:before,.md-typeset details.question>.admonition-title:before,.md-typeset details.question>summary:before{color:#64dd17;content:"\E887"}.md-typeset .admonition.attention,.md-typeset .admonition.caution,.md-typeset .admonition.warning,.md-typeset details.attention,.md-typeset details.caution,.md-typeset details.warning{border-left:.4rem solid #ff9100}.md-typeset .admonition.attention>.admonition-title,.md-typeset .admonition.attention>summary,.md-typeset .admonition.caution>.admonition-title,.md-typeset .admonition.caution>summary,.md-typeset .admonition.warning>.admonition-title,.md-typeset .admonition.warning>summary,.md-typeset details.attention>.admonition-title,.md-typeset details.attention>summary,.md-typeset details.caution>.admonition-title,.md-typeset details.caution>summary,.md-typeset details.warning>.admonition-title,.md-typeset details.warning>summary{border-bottom:.1rem solid rgba(255,145,0,.1);background-color:rgba(255,145,0,.1)}.md-typeset .admonition.attention>.admonition-title:before,.md-typeset .admonition.attention>summary:before,.md-typeset .admonition.caution>.admonition-title:before,.md-typeset .admonition.caution>summary:before,.md-typeset .admonition.warning>.admonition-title:before,.md-typeset .admonition.warning>summary:before,.md-typeset details.attention>.admonition-title:before,.md-typeset details.attention>summary:before,.md-typeset details.caution>.admonition-title:before,.md-typeset details.caution>summary:before,.md-typeset details.warning>.admonition-title:before,.md-typeset details.warning>summary:before{color:#ff9100;content:"\E002"}.md-typeset .admonition.fail,.md-typeset .admonition.failure,.md-typeset .admonition.missing,.md-typeset details.fail,.md-typeset details.failure,.md-typeset details.missing{border-left:.4rem solid #ff5252}.md-typeset .admonition.fail>.admonition-title,.md-typeset .admonition.fail>summary,.md-typeset .admonition.failure>.admonition-title,.md-typeset .admonition.failure>summary,.md-typeset .admonition.missing>.admonition-title,.md-typeset .admonition.missing>summary,.md-typeset details.fail>.admonition-title,.md-typeset details.fail>summary,.md-typeset details.failure>.admonition-title,.md-typeset details.failure>summary,.md-typeset details.missing>.admonition-title,.md-typeset details.missing>summary{border-bottom:.1rem solid rgba(255,82,82,.1);background-color:rgba(255,82,82,.1)}.md-typeset .admonition.fail>.admonition-title:before,.md-typeset .admonition.fail>summary:before,.md-typeset .admonition.failure>.admonition-title:before,.md-typeset .admonition.failure>summary:before,.md-typeset .admonition.missing>.admonition-title:before,.md-typeset .admonition.missing>summary:before,.md-typeset details.fail>.admonition-title:before,.md-typeset details.fail>summary:before,.md-typeset details.failure>.admonition-title:before,.md-typeset details.failure>summary:before,.md-typeset details.missing>.admonition-title:before,.md-typeset details.missing>summary:before{color:#ff5252;content:"\E14C"}.md-typeset .admonition.danger,.md-typeset .admonition.error,.md-typeset details.danger,.md-typeset details.error{border-left:.4rem solid #ff1744}.md-typeset .admonition.danger>.admonition-title,.md-typeset .admonition.danger>summary,.md-typeset .admonition.error>.admonition-title,.md-typeset .admonition.error>summary,.md-typeset details.danger>.admonition-title,.md-typeset details.danger>summary,.md-typeset details.error>.admonition-title,.md-typeset details.error>summary{border-bottom:.1rem solid rgba(255,23,68,.1);background-color:rgba(255,23,68,.1)}.md-typeset .admonition.danger>.admonition-title:before,.md-typeset .admonition.danger>summary:before,.md-typeset .admonition.error>.admonition-title:before,.md-typeset .admonition.error>summary:before,.md-typeset details.danger>.admonition-title:before,.md-typeset details.danger>summary:before,.md-typeset details.error>.admonition-title:before,.md-typeset details.error>summary:before{color:#ff1744;content:"\E3E7"}.md-typeset .admonition.bug,.md-typeset details.bug{border-left:.4rem solid #f50057}.md-typeset .admonition.bug>.admonition-title,.md-typeset .admonition.bug>summary,.md-typeset details.bug>.admonition-title,.md-typeset details.bug>summary{border-bottom:.1rem solid rgba(245,0,87,.1);background-color:rgba(245,0,87,.1)}.md-typeset .admonition.bug>.admonition-title:before,.md-typeset .admonition.bug>summary:before,.md-typeset details.bug>.admonition-title:before,.md-typeset details.bug>summary:before{color:#f50057;content:"\E868"}.md-typeset .admonition.cite,.md-typeset .admonition.quote,.md-typeset details.cite,.md-typeset details.quote{border-left:.4rem solid #9e9e9e}.md-typeset .admonition.cite>.admonition-title,.md-typeset .admonition.cite>summary,.md-typeset .admonition.quote>.admonition-title,.md-typeset .admonition.quote>summary,.md-typeset details.cite>.admonition-title,.md-typeset details.cite>summary,.md-typeset details.quote>.admonition-title,.md-typeset details.quote>summary{border-bottom:.1rem solid hsla(0,0%,62%,.1);background-color:hsla(0,0%,62%,.1)}.md-typeset .admonition.cite>.admonition-title:before,.md-typeset .admonition.cite>summary:before,.md-typeset .admonition.quote>.admonition-title:before,.md-typeset .admonition.quote>summary:before,.md-typeset details.cite>.admonition-title:before,.md-typeset details.cite>summary:before,.md-typeset details.quote>.admonition-title:before,.md-typeset details.quote>summary:before{color:#9e9e9e;content:"\E244"}.codehilite .o,.codehilite .ow,.md-typeset .highlight .o,.md-typeset .highlight .ow{color:inherit}.codehilite .ge,.md-typeset .highlight .ge{color:#000}.codehilite .gr,.md-typeset .highlight .gr{color:#a00}.codehilite .gh,.md-typeset .highlight .gh{color:#999}.codehilite .go,.md-typeset .highlight .go{color:#888}.codehilite .gp,.md-typeset .highlight .gp{color:#555}.codehilite .gs,.md-typeset .highlight .gs{color:inherit}.codehilite .gu,.md-typeset .highlight .gu{color:#aaa}.codehilite .gt,.md-typeset .highlight .gt{color:#a00}.codehilite .gd,.md-typeset .highlight .gd{background-color:#fdd}.codehilite .gi,.md-typeset .highlight .gi{background-color:#dfd}.codehilite .k,.md-typeset .highlight .k{color:#3b78e7}.codehilite .kc,.md-typeset .highlight .kc{color:#a71d5d}.codehilite .kd,.codehilite .kn,.md-typeset .highlight .kd,.md-typeset .highlight .kn{color:#3b78e7}.codehilite .kp,.md-typeset .highlight .kp{color:#a71d5d}.codehilite .kr,.codehilite .kt,.md-typeset .highlight .kr,.md-typeset .highlight .kt{color:#3e61a2}.codehilite .c,.codehilite .cm,.md-typeset .highlight .c,.md-typeset .highlight .cm{color:#999}.codehilite .cp,.md-typeset .highlight .cp{color:#666}.codehilite .c1,.codehilite .ch,.codehilite .cs,.md-typeset .highlight .c1,.md-typeset .highlight .ch,.md-typeset .highlight .cs{color:#999}.codehilite .na,.codehilite .nb,.md-typeset .highlight .na,.md-typeset .highlight .nb{color:#c2185b}.codehilite .bp,.md-typeset .highlight .bp{color:#3e61a2}.codehilite .nc,.md-typeset .highlight .nc{color:#c2185b}.codehilite .no,.md-typeset .highlight .no{color:#3e61a2}.codehilite .nd,.codehilite .ni,.md-typeset .highlight .nd,.md-typeset .highlight .ni{color:#666}.codehilite .ne,.codehilite .nf,.md-typeset .highlight .ne,.md-typeset .highlight .nf{color:#c2185b}.codehilite .nl,.md-typeset .highlight .nl{color:#3b5179}.codehilite .nn,.md-typeset .highlight .nn{color:#ec407a}.codehilite .nt,.md-typeset .highlight .nt{color:#3b78e7}.codehilite .nv,.codehilite .vc,.codehilite .vg,.codehilite .vi,.md-typeset .highlight .nv,.md-typeset .highlight .vc,.md-typeset .highlight .vg,.md-typeset .highlight .vi{color:#3e61a2}.codehilite .nx,.md-typeset .highlight .nx{color:#ec407a}.codehilite .il,.codehilite .m,.codehilite .mf,.codehilite .mh,.codehilite .mi,.codehilite .mo,.md-typeset .highlight .il,.md-typeset .highlight .m,.md-typeset .highlight .mf,.md-typeset .highlight .mh,.md-typeset .highlight .mi,.md-typeset .highlight .mo{color:#e74c3c}.codehilite .s,.codehilite .sb,.codehilite .sc,.md-typeset .highlight .s,.md-typeset .highlight .sb,.md-typeset .highlight .sc{color:#0d904f}.codehilite .sd,.md-typeset .highlight .sd{color:#999}.codehilite .s2,.md-typeset .highlight .s2{color:#0d904f}.codehilite .se,.codehilite .sh,.codehilite .si,.codehilite .sx,.md-typeset .highlight .se,.md-typeset .highlight .sh,.md-typeset .highlight .si,.md-typeset .highlight .sx{color:#183691}.codehilite .sr,.md-typeset .highlight .sr{color:#009926}.codehilite .s1,.codehilite .ss,.md-typeset .highlight .s1,.md-typeset .highlight .ss{color:#0d904f}.codehilite .err,.md-typeset .highlight .err{color:#a61717}.codehilite .w,.md-typeset .highlight .w{color:transparent}.codehilite .hll,.md-typeset .highlight .hll{display:block;margin:0 -1.2rem;padding:0 1.2rem;background-color:rgba(255,235,59,.5)}.md-typeset .codehilite,.md-typeset .highlight{position:relative;margin:1em 0;padding:0;border-radius:.2rem;background-color:hsla(0,0%,93%,.5);color:#37474f;line-height:1.4;-webkit-overflow-scrolling:touch}.md-typeset .codehilite code,.md-typeset .codehilite pre,.md-typeset .highlight code,.md-typeset .highlight pre{display:block;margin:0;padding:1.05rem 1.2rem;background-color:transparent;overflow:auto;vertical-align:top}.md-typeset .codehilite code::-webkit-scrollbar,.md-typeset .codehilite pre::-webkit-scrollbar,.md-typeset .highlight code::-webkit-scrollbar,.md-typeset .highlight pre::-webkit-scrollbar{width:.4rem;height:.4rem}.md-typeset .codehilite code::-webkit-scrollbar-thumb,.md-typeset .codehilite pre::-webkit-scrollbar-thumb,.md-typeset .highlight code::-webkit-scrollbar-thumb,.md-typeset .highlight pre::-webkit-scrollbar-thumb{background-color:rgba(0,0,0,.26)}.md-typeset .codehilite code::-webkit-scrollbar-thumb:hover,.md-typeset .codehilite pre::-webkit-scrollbar-thumb:hover,.md-typeset .highlight code::-webkit-scrollbar-thumb:hover,.md-typeset .highlight pre::-webkit-scrollbar-thumb:hover{background-color:#536dfe}.md-typeset pre.codehilite,.md-typeset pre.highlight{overflow:visible}.md-typeset pre.codehilite code,.md-typeset pre.highlight code{display:block;padding:1.05rem 1.2rem;overflow:auto}.md-typeset .codehilitetable{display:block;margin:1em 0;border-radius:.2em;font-size:1.6rem;overflow:hidden}.md-typeset .codehilitetable tbody,.md-typeset .codehilitetable td{display:block;padding:0}.md-typeset .codehilitetable tr{display:-webkit-box;display:-ms-flexbox;display:flex}.md-typeset .codehilitetable .codehilite,.md-typeset .codehilitetable .highlight,.md-typeset .codehilitetable .linenodiv{margin:0;border-radius:0}.md-typeset .codehilitetable .linenodiv{padding:1.05rem 1.2rem}.md-typeset .codehilitetable .linenos{background-color:rgba(0,0,0,.07);color:rgba(0,0,0,.26);-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.md-typeset .codehilitetable .linenos pre{margin:0;padding:0;background-color:transparent;color:inherit;text-align:right}.md-typeset .codehilitetable .code{-webkit-box-flex:1;-ms-flex:1;flex:1;overflow:hidden}.md-typeset>.codehilitetable{-webkit-box-shadow:none;box-shadow:none}.md-typeset [id^="fnref:"]{display:inline-block}.md-typeset [id^="fnref:"]:target{margin-top:-7.6rem;padding-top:7.6rem;pointer-events:none}.md-typeset [id^="fn:"]:before{display:none;height:0;content:""}.md-typeset [id^="fn:"]:target:before{display:block;margin-top:-7rem;padding-top:7rem;pointer-events:none}.md-typeset .footnote{color:rgba(0,0,0,.54);font-size:1.28rem}.md-typeset .footnote ol{margin-left:0}.md-typeset .footnote li{-webkit-transition:color .25s;transition:color .25s}.md-typeset .footnote li:target{color:rgba(0,0,0,.87)}.md-typeset .footnote li :first-child{margin-top:0}.md-typeset .footnote li:hover .footnote-backref,.md-typeset .footnote li:target .footnote-backref{-webkit-transform:translateX(0);transform:translateX(0);opacity:1}.md-typeset .footnote li:hover .footnote-backref:hover,.md-typeset .footnote li:target .footnote-backref{color:#536dfe}.md-typeset .footnote-ref{display:inline-block;pointer-events:auto}.md-typeset .footnote-ref:before{display:inline;margin:0 .2em;border-left:.1rem solid rgba(0,0,0,.26);font-size:1.25em;content:"";vertical-align:-.5rem}.md-typeset .footnote-backref{display:inline-block;-webkit-transform:translateX(.5rem);transform:translateX(.5rem);-webkit-transition:color .25s,opacity .125s .125s,-webkit-transform .25s .125s;transition:color .25s,opacity .125s .125s,-webkit-transform .25s .125s;transition:transform .25s .125s,color .25s,opacity .125s .125s;transition:transform .25s .125s,color .25s,opacity .125s .125s,-webkit-transform .25s .125s;color:rgba(0,0,0,.26);font-size:0;opacity:0;vertical-align:text-bottom}.md-typeset .footnote-backref:before{font-size:1.6rem;content:"\E31B"}.md-typeset .headerlink{display:inline-block;margin-left:1rem;-webkit-transform:translateY(.5rem);transform:translateY(.5rem);-webkit-transition:color .25s,opacity .125s .25s,-webkit-transform .25s .25s;transition:color .25s,opacity .125s .25s,-webkit-transform .25s .25s;transition:transform .25s .25s,color .25s,opacity .125s .25s;transition:transform .25s .25s,color .25s,opacity .125s .25s,-webkit-transform .25s .25s;opacity:0}html body .md-typeset .headerlink{color:rgba(0,0,0,.26)}.md-typeset h1[id] .headerlink{display:none}.md-typeset h2[id]:before{display:block;margin-top:-.8rem;padding-top:.8rem;content:""}.md-typeset h2[id]:target:before{margin-top:-6.8rem;padding-top:6.8rem}.md-typeset h2[id] .headerlink:focus,.md-typeset h2[id]:hover .headerlink,.md-typeset h2[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h2[id] .headerlink:focus,.md-typeset h2[id]:hover .headerlink:hover,.md-typeset h2[id]:target .headerlink{color:#536dfe}.md-typeset h3[id]:before{display:block;margin-top:-.9rem;padding-top:.9rem;content:""}.md-typeset h3[id]:target:before{margin-top:-6.9rem;padding-top:6.9rem}.md-typeset h3[id] .headerlink:focus,.md-typeset h3[id]:hover .headerlink,.md-typeset h3[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h3[id] .headerlink:focus,.md-typeset h3[id]:hover .headerlink:hover,.md-typeset h3[id]:target .headerlink{color:#536dfe}.md-typeset h4[id]:before{display:block;margin-top:-.9rem;padding-top:.9rem;content:""}.md-typeset h4[id]:target:before{margin-top:-6.9rem;padding-top:6.9rem}.md-typeset h4[id] .headerlink:focus,.md-typeset h4[id]:hover .headerlink,.md-typeset h4[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h4[id] .headerlink:focus,.md-typeset h4[id]:hover .headerlink:hover,.md-typeset h4[id]:target .headerlink{color:#536dfe}.md-typeset h5[id]:before{display:block;margin-top:-1.1rem;padding-top:1.1rem;content:""}.md-typeset h5[id]:target:before{margin-top:-7.1rem;padding-top:7.1rem}.md-typeset h5[id] .headerlink:focus,.md-typeset h5[id]:hover .headerlink,.md-typeset h5[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h5[id] .headerlink:focus,.md-typeset h5[id]:hover .headerlink:hover,.md-typeset h5[id]:target .headerlink{color:#536dfe}.md-typeset h6[id]:before{display:block;margin-top:-1.1rem;padding-top:1.1rem;content:""}.md-typeset h6[id]:target:before{margin-top:-7.1rem;padding-top:7.1rem}.md-typeset h6[id] .headerlink:focus,.md-typeset h6[id]:hover .headerlink,.md-typeset h6[id]:target .headerlink{-webkit-transform:translate(0);transform:translate(0);opacity:1}.md-typeset h6[id] .headerlink:focus,.md-typeset h6[id]:hover .headerlink:hover,.md-typeset h6[id]:target .headerlink{color:#536dfe}.md-typeset .MJXc-display{margin:.75em 0;padding:.75em 0;overflow:auto;-webkit-overflow-scrolling:touch}.md-typeset .MathJax_CHTML{outline:0}.md-typeset .critic.comment,.md-typeset del.critic,.md-typeset ins.critic{margin:0 .25em;padding:.0625em 0;border-radius:.2rem;-webkit-box-decoration-break:clone;box-decoration-break:clone}.md-typeset del.critic{background-color:#fdd;-webkit-box-shadow:.25em 0 0 #fdd,-.25em 0 0 #fdd;box-shadow:.25em 0 0 #fdd,-.25em 0 0 #fdd}.md-typeset ins.critic{background-color:#dfd;-webkit-box-shadow:.25em 0 0 #dfd,-.25em 0 0 #dfd;box-shadow:.25em 0 0 #dfd,-.25em 0 0 #dfd}.md-typeset .critic.comment{background-color:hsla(0,0%,93%,.5);color:#37474f;-webkit-box-shadow:.25em 0 0 hsla(0,0%,93%,.5),-.25em 0 0 hsla(0,0%,93%,.5);box-shadow:.25em 0 0 hsla(0,0%,93%,.5),-.25em 0 0 hsla(0,0%,93%,.5)}.md-typeset .critic.comment:before{padding-right:.125em;color:rgba(0,0,0,.26);content:"\E0B7";vertical-align:-.125em}.md-typeset .critic.block{display:block;margin:1em 0;padding-right:1.6rem;padding-left:1.6rem;-webkit-box-shadow:none;box-shadow:none}.md-typeset .critic.block :first-child{margin-top:.5em}.md-typeset .critic.block :last-child{margin-bottom:.5em}.md-typeset details{padding-top:0}.md-typeset details[open]>summary:after{-webkit-transform:rotate(180deg);transform:rotate(180deg)}.md-typeset details:not([open]){padding-bottom:0}.md-typeset details:not([open])>summary{border-bottom:none}.md-typeset details summary{padding-right:4rem}.no-details .md-typeset details:not([open])>*{display:none}.no-details .md-typeset details:not([open]) summary{display:block}.md-typeset summary{display:block;outline:none;cursor:pointer}.md-typeset summary::-webkit-details-marker{display:none}.md-typeset summary:after{position:absolute;top:.8rem;right:1.2rem;color:rgba(0,0,0,.26);font-size:2rem;content:"\E313"}.md-typeset .emojione{width:2rem;vertical-align:text-top}.md-typeset code.codehilite,.md-typeset code.highlight{margin:0 .29412em;padding:.07353em 0}.md-typeset .task-list-item{position:relative;list-style-type:none}.md-typeset .task-list-item [type=checkbox]{position:absolute;top:.45em;left:-2em}.md-typeset .task-list-control .task-list-indicator:before{position:absolute;top:.15em;left:-1.25em;color:rgba(0,0,0,.26);font-size:1.25em;content:"\E835";vertical-align:-.25em}.md-typeset .task-list-control [type=checkbox]:checked+.task-list-indicator:before{content:"\E834"}.md-typeset .task-list-control [type=checkbox]{opacity:0;z-index:-1}@media print{.md-typeset a:after{color:rgba(0,0,0,.54);content:" [" attr(href) "]"}.md-typeset code,.md-typeset pre{white-space:pre-wrap}.md-typeset code{-webkit-box-shadow:none;box-shadow:none;-webkit-box-decoration-break:initial;box-decoration-break:slice}.md-clipboard,.md-content__icon,.md-footer,.md-header,.md-sidebar,.md-tabs,.md-typeset .headerlink{display:none}}@media only screen and (max-width:44.9375em){.md-typeset pre{margin:1em -1.6rem;border-radius:0}.md-typeset pre>code{padding:1.05rem 1.6rem}.md-footer-nav__link--prev .md-footer-nav__title{display:none}.md-search-result__teaser{max-height:5rem;-webkit-line-clamp:3}.codehilite .hll,.md-typeset .highlight .hll{margin:0 -1.6rem;padding:0 1.6rem}.md-typeset>.codehilite,.md-typeset>.highlight{margin:1em -1.6rem;border-radius:0}.md-typeset>.codehilite code,.md-typeset>.codehilite pre,.md-typeset>.highlight code,.md-typeset>.highlight pre{padding:1.05rem 1.6rem}.md-typeset>.codehilitetable{margin:1em -1.6rem;border-radius:0}.md-typeset>.codehilitetable .codehilite>code,.md-typeset>.codehilitetable .codehilite>pre,.md-typeset>.codehilitetable .highlight>code,.md-typeset>.codehilitetable .highlight>pre,.md-typeset>.codehilitetable .linenodiv{padding:1rem 1.6rem}.md-typeset>p>.MJXc-display{margin:.75em -1.6rem;padding:.25em 1.6rem}}@media only screen and (min-width:100em){html{font-size:68.75%}}@media only screen and (min-width:125em){html{font-size:75%}}@media only screen and (max-width:59.9375em){body[data-md-state=lock]{overflow:hidden}.ios body[data-md-state=lock] .md-container{display:none}html .md-nav__link[for=toc]{display:block;padding-right:4.8rem}html .md-nav__link[for=toc]:after{color:inherit;content:"\E8DE"}html .md-nav__link[for=toc]+.md-nav__link{display:none}html .md-nav__link[for=toc]~.md-nav{display:-webkit-box;display:-ms-flexbox;display:flex}.md-nav__source{display:block;padding:0 .4rem;background-color:rgba(50,64,144,.9675);color:#fff}.md-search__overlay{position:absolute;top:.4rem;left:.4rem;width:3.6rem;height:3.6rem;-webkit-transform-origin:center;transform-origin:center;-webkit-transition:opacity .2s .2s,-webkit-transform .3s .1s;transition:opacity .2s .2s,-webkit-transform .3s .1s;transition:transform .3s .1s,opacity .2s .2s;transition:transform .3s .1s,opacity .2s .2s,-webkit-transform .3s .1s;border-radius:2rem;background-color:#fff;overflow:hidden;pointer-events:none}[data-md-toggle=search]:checked~.md-header .md-search__overlay{-webkit-transition:opacity .1s,-webkit-transform .4s;transition:opacity .1s,-webkit-transform .4s;transition:transform .4s,opacity .1s;transition:transform .4s,opacity .1s,-webkit-transform .4s;opacity:1}.md-search__inner{position:fixed;top:0;left:100%;width:100%;height:100%;-webkit-transform:translateX(5%);transform:translateX(5%);-webkit-transition:left 0s .3s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.4,0,.2,1) .15s;transition:left 0s .3s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.4,0,.2,1) .15s;transition:left 0s .3s,transform .15s cubic-bezier(.4,0,.2,1) .15s,opacity .15s .15s;transition:left 0s .3s,transform .15s cubic-bezier(.4,0,.2,1) .15s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.4,0,.2,1) .15s;opacity:0;z-index:2}[data-md-toggle=search]:checked~.md-header .md-search__inner{left:0;-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:left 0s 0s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1) .15s;transition:left 0s 0s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1) .15s;transition:left 0s 0s,transform .15s cubic-bezier(.1,.7,.1,1) .15s,opacity .15s .15s;transition:left 0s 0s,transform .15s cubic-bezier(.1,.7,.1,1) .15s,opacity .15s .15s,-webkit-transform .15s cubic-bezier(.1,.7,.1,1) .15s;opacity:1}.md-search__input{width:100%;height:4.8rem;font-size:1.8rem}.md-search__icon[for=search]{top:1.2rem;left:1.6rem}.md-search__icon[for=search][for=search]:before{content:"\E5C4"}.md-search__icon[type=reset]{top:1.2rem;right:1.6rem}.md-search__output{top:4.8rem;bottom:0}.md-search-result__article--document:before{display:none}}@media only screen and (max-width:76.1875em){[data-md-toggle=drawer]:checked~.md-overlay{width:100%;height:100%;-webkit-transition:width 0s,height 0s,opacity .25s;transition:width 0s,height 0s,opacity .25s;opacity:1}.md-header-nav__button.md-icon--home,.md-header-nav__button.md-logo{display:none}.md-hero__inner{margin-top:4.8rem;margin-bottom:2.4rem}.md-nav{background-color:#fff}.md-nav--primary,.md-nav--primary .md-nav{display:-webkit-box;display:-ms-flexbox;display:flex;position:absolute;top:0;right:0;left:0;-webkit-box-orient:vertical;-webkit-box-direction:normal;-ms-flex-direction:column;flex-direction:column;height:100%;z-index:1}.md-nav--primary .md-nav__item,.md-nav--primary .md-nav__title{font-size:1.6rem;line-height:1.5}html .md-nav--primary .md-nav__title{position:relative;height:11.2rem;padding:6rem 1.6rem .4rem;background-color:rgba(0,0,0,.07);color:rgba(0,0,0,.54);font-weight:400;line-height:4.8rem;white-space:nowrap;cursor:pointer}html .md-nav--primary .md-nav__title:before{display:block;position:absolute;top:.4rem;left:.4rem;width:4rem;height:4rem;color:rgba(0,0,0,.54)}html .md-nav--primary .md-nav__title~.md-nav__list{background-color:#fff;-webkit-box-shadow:0 .1rem 0 rgba(0,0,0,.07) inset;box-shadow:inset 0 .1rem 0 rgba(0,0,0,.07)}html .md-nav--primary .md-nav__title~.md-nav__list>.md-nav__item:first-child{border-top:0}html .md-nav--primary .md-nav__title--site{position:relative;background-color:#3f51b5;color:#fff}html .md-nav--primary .md-nav__title--site .md-nav__button{display:block;position:absolute;top:.4rem;left:.4rem;width:6.4rem;height:6.4rem;font-size:4.8rem}html .md-nav--primary .md-nav__title--site:before{display:none}.md-nav--primary .md-nav__list{-webkit-box-flex:1;-ms-flex:1;flex:1;overflow-y:auto}.md-nav--primary .md-nav__item{padding:0;border-top:.1rem solid rgba(0,0,0,.07)}.md-nav--primary .md-nav__item--nested>.md-nav__link{padding-right:4.8rem}.md-nav--primary .md-nav__item--nested>.md-nav__link:after{content:"\E315"}.md-nav--primary .md-nav__link{position:relative;margin-top:0;padding:1.2rem 1.6rem}.md-nav--primary .md-nav__link:after{position:absolute;top:50%;right:1.2rem;margin-top:-1.2rem;color:inherit;font-size:2.4rem}.md-nav--primary .md-nav--secondary .md-nav__link{position:static}.md-nav--primary .md-nav--secondary .md-nav{position:static;background-color:transparent}.md-nav--primary .md-nav--secondary .md-nav .md-nav__link{padding-left:2.8rem}.md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav__link{padding-left:4rem}.md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav__link{padding-left:5.2rem}.md-nav--primary .md-nav--secondary .md-nav .md-nav .md-nav .md-nav .md-nav__link{padding-left:6.4rem}.md-nav__toggle~.md-nav{display:-webkit-box;display:-ms-flexbox;display:flex;-webkit-transform:translateX(100%);transform:translateX(100%);-webkit-transition:opacity .125s .05s,-webkit-transform .25s cubic-bezier(.8,0,.6,1);transition:opacity .125s .05s,-webkit-transform .25s cubic-bezier(.8,0,.6,1);transition:transform .25s cubic-bezier(.8,0,.6,1),opacity .125s .05s;transition:transform .25s cubic-bezier(.8,0,.6,1),opacity .125s .05s,-webkit-transform .25s cubic-bezier(.8,0,.6,1);opacity:0}.no-csstransforms3d .md-nav__toggle~.md-nav{display:none}.md-nav__toggle:checked~.md-nav{-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:opacity .125s .125s,-webkit-transform .25s cubic-bezier(.4,0,.2,1);transition:opacity .125s .125s,-webkit-transform .25s cubic-bezier(.4,0,.2,1);transition:transform .25s cubic-bezier(.4,0,.2,1),opacity .125s .125s;transition:transform .25s cubic-bezier(.4,0,.2,1),opacity .125s .125s,-webkit-transform .25s cubic-bezier(.4,0,.2,1);opacity:1}.no-csstransforms3d .md-nav__toggle:checked~.md-nav{display:-webkit-box;display:-ms-flexbox;display:flex}.md-sidebar--primary{position:fixed;top:0;left:-24.2rem;width:24.2rem;height:100%;-webkit-transform:translateX(0);transform:translateX(0);-webkit-transition:-webkit-transform .25s cubic-bezier(.4,0,.2,1),-webkit-box-shadow .25s;transition:-webkit-transform .25s cubic-bezier(.4,0,.2,1),-webkit-box-shadow .25s;transition:transform .25s cubic-bezier(.4,0,.2,1),box-shadow .25s;transition:transform .25s cubic-bezier(.4,0,.2,1),box-shadow .25s,-webkit-transform .25s cubic-bezier(.4,0,.2,1),-webkit-box-shadow .25s;background-color:#fff;z-index:3}.no-csstransforms3d .md-sidebar--primary{display:none}[data-md-toggle=drawer]:checked~.md-container .md-sidebar--primary{-webkit-box-shadow:0 8px 10px 1px rgba(0,0,0,.14),0 3px 14px 2px rgba(0,0,0,.12),0 5px 5px -3px rgba(0,0,0,.4);box-shadow:0 8px 10px 1px rgba(0,0,0,.14),0 3px 14px 2px rgba(0,0,0,.12),0 5px 5px -3px rgba(0,0,0,.4);-webkit-transform:translateX(24.2rem);transform:translateX(24.2rem)}.no-csstransforms3d [data-md-toggle=drawer]:checked~.md-container .md-sidebar--primary{display:block}.md-sidebar--primary .md-sidebar__scrollwrap{overflow:hidden;position:absolute;top:0;right:0;bottom:0;left:0;margin:0}.md-tabs{display:none}}@media only screen and (min-width:60em){.md-content{margin-right:24.2rem}.md-header-nav__button.md-icon--search{display:none}.md-header-nav__source{display:block;width:23rem;max-width:23rem;margin-left:2.8rem;padding-right:1.2rem}.md-search{padding:.4rem}.md-search__overlay{position:fixed;top:0;left:0;width:0;height:0;-webkit-transition:width 0s .25s,height 0s .25s,opacity .25s;transition:width 0s .25s,height 0s .25s,opacity .25s;background-color:rgba(0,0,0,.54);cursor:pointer}[data-md-toggle=search]:checked~.md-header .md-search__overlay{width:100%;height:100%;-webkit-transition:width 0s,height 0s,opacity .25s;transition:width 0s,height 0s,opacity .25s;opacity:1}.md-search__inner{position:relative;width:23rem;padding:.2rem 0;float:right;-webkit-transition:width .25s cubic-bezier(.1,.7,.1,1);transition:width .25s cubic-bezier(.1,.7,.1,1)}.md-search__form,.md-search__input{border-radius:.2rem}.md-search__input{width:100%;height:3.6rem;padding-left:4.4rem;-webkit-transition:background-color .25s cubic-bezier(.1,.7,.1,1),color .25s cubic-bezier(.1,.7,.1,1);transition:background-color .25s cubic-bezier(.1,.7,.1,1),color .25s cubic-bezier(.1,.7,.1,1);background-color:rgba(0,0,0,.26);color:inherit;font-size:1.6rem}.md-search__input+.md-search__icon{color:inherit}.md-search__input::-webkit-input-placeholder{color:hsla(0,0%,100%,.7)}.md-search__input:-ms-input-placeholder,.md-search__input::-ms-input-placeholder{color:hsla(0,0%,100%,.7)}.md-search__input::placeholder{color:hsla(0,0%,100%,.7)}.md-search__input:hover{background-color:hsla(0,0%,100%,.12)}[data-md-toggle=search]:checked~.md-header .md-search__input{border-radius:.2rem .2rem 0 0;background-color:#fff;color:rgba(0,0,0,.87);text-overflow:none}[data-md-toggle=search]:checked~.md-header .md-search__input+.md-search__icon,[data-md-toggle=search]:checked~.md-header .md-search__input::-webkit-input-placeholder{color:rgba(0,0,0,.54)}[data-md-toggle=search]:checked~.md-header .md-search__input+.md-search__icon,[data-md-toggle=search]:checked~.md-header .md-search__input:-ms-input-placeholder,[data-md-toggle=search]:checked~.md-header .md-search__input::-ms-input-placeholder{color:rgba(0,0,0,.54)}[data-md-toggle=search]:checked~.md-header .md-search__input+.md-search__icon,[data-md-toggle=search]:checked~.md-header .md-search__input::placeholder{color:rgba(0,0,0,.54)}.md-search__output{top:3.8rem;-webkit-transition:opacity .4s;transition:opacity .4s;opacity:0}[data-md-toggle=search]:checked~.md-header .md-search__output{-webkit-box-shadow:0 6px 10px 0 rgba(0,0,0,.14),0 1px 18px 0 rgba(0,0,0,.12),0 3px 5px -1px rgba(0,0,0,.4);box-shadow:0 6px 10px 0 rgba(0,0,0,.14),0 1px 18px 0 rgba(0,0,0,.12),0 3px 5px -1px rgba(0,0,0,.4);opacity:1}.md-search__scrollwrap{max-height:0}[data-md-toggle=search]:checked~.md-header .md-search__scrollwrap{max-height:75vh}.md-search__scrollwrap::-webkit-scrollbar{width:.4rem;height:.4rem}.md-search__scrollwrap::-webkit-scrollbar-thumb{background-color:rgba(0,0,0,.26)}.md-search__scrollwrap::-webkit-scrollbar-thumb:hover{background-color:#536dfe}.md-search-result__article,.md-search-result__meta{padding-left:4.4rem}.md-sidebar--secondary{display:block;margin-left:100%;-webkit-transform:translate(-100%);transform:translate(-100%)}}@media only screen and (min-width:76.25em){.md-content{margin-left:24.2rem}.md-content__inner{margin-right:2.4rem;margin-left:2.4rem}.md-header-nav__button.md-icon--menu{display:none}.md-nav[data-md-state=animate]{-webkit-transition:max-height .25s cubic-bezier(.86,0,.07,1);transition:max-height .25s cubic-bezier(.86,0,.07,1)}.md-nav__toggle~.md-nav{max-height:0;overflow:hidden}.md-nav[data-md-state=expand],.md-nav__toggle:checked~.md-nav{max-height:100%}.md-nav__item--nested>.md-nav>.md-nav__title{display:none}.md-nav__item--nested>.md-nav__link:after{display:inline-block;-webkit-transform-origin:.45em .45em;transform-origin:.45em .45em;-webkit-transform-style:preserve-3d;transform-style:preserve-3d;vertical-align:-.125em}.js .md-nav__item--nested>.md-nav__link:after{-webkit-transition:-webkit-transform .4s;transition:-webkit-transform .4s;transition:transform .4s;transition:transform .4s,-webkit-transform .4s}.md-nav__item--nested .md-nav__toggle:checked~.md-nav__link:after{-webkit-transform:rotateX(180deg);transform:rotateX(180deg)}.md-search__scrollwrap,[data-md-toggle=search]:checked~.md-header .md-search__inner{width:68.8rem}.md-sidebar--secondary{margin-left:122rem}.md-tabs~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--nested{font-size:0}.md-tabs--active~.md-main .md-nav--primary .md-nav__title--site{display:none}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item{font-size:0}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--nested{display:none;font-size:1.4rem;overflow:auto}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--nested>.md-nav__link{margin-top:0;font-weight:700;pointer-events:none}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--nested>.md-nav__link:after{display:none}.md-tabs--active~.md-main .md-nav--primary>.md-nav__list>.md-nav__item--active{display:block}.md-tabs--active~.md-main .md-nav[data-md-level="1"]{max-height:none}.md-tabs--active~.md-main .md-nav[data-md-level="1"]>.md-nav__list>.md-nav__item{padding-left:0}}@media only screen and (min-width:45em){.md-footer-nav__link{width:50%}.md-footer-copyright{max-width:75%;float:left}.md-footer-social{padding:1.2rem 0;float:right}}@media only screen and (max-width:29.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{-webkit-transform:scale(45);transform:scale(45)}}@media only screen and (min-width:30em) and (max-width:44.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{-webkit-transform:scale(60);transform:scale(60)}}@media only screen and (min-width:45em) and (max-width:59.9375em){[data-md-toggle=search]:checked~.md-header .md-search__overlay{-webkit-transform:scale(75);transform:scale(75)}}@media only screen and (min-width:60em) and (max-width:76.1875em){.md-search__scrollwrap,[data-md-toggle=search]:checked~.md-header .md-search__inner{width:46.8rem}.md-search-result__teaser{max-height:5rem;-webkit-line-clamp:3}} +/*# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJzb3VyY2VzIjpbXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IiIsImZpbGUiOiJhc3NldHMvc3R5bGVzaGVldHMvYXBwbGljYXRpb24uYWM2NDI1MWUuY3NzIiwic291cmNlUm9vdCI6IiJ9*/ diff --git a/docs/mkdocs-material-theme/assets/stylesheets/yandex_fonts.css b/docs/mkdocs-material-theme/assets/stylesheets/custom.css similarity index 77% rename from docs/mkdocs-material-theme/assets/stylesheets/yandex_fonts.css rename to docs/mkdocs-material-theme/assets/stylesheets/custom.css index b6c81f611d4..a908ec2f21a 100644 --- a/docs/mkdocs-material-theme/assets/stylesheets/yandex_fonts.css +++ b/docs/mkdocs-material-theme/assets/stylesheets/custom.css @@ -64,5 +64,58 @@ } body { - font-family: 'Yandex Sans Text Web'; + font: 300 14pt/200% 'Yandex Sans Text Web', Arial, sans-serif; +} + +a:link, a:visited { + color: #08f; + text-decoration: none; +} + +.md-nav__link { + color: #000 !important; +} + +.md-nav__link:hover, .md-nav__link:active { + color: #08f !important; + text-decoration: none; +} + +a:hover, a:active { + color: #f00; + text-decoration: underline; +} + +.md-typeset pre { + font: 13px/18px monospace, "Courier New"; + display: block; + padding: 1rem 3rem 1rem 1rem; + overflow: scroll; +} + +h1, h2, h3, .md-logo { + font-family: 'Yandex Sans Display Web', Arial, sans-serif; + color: #000 !important; +} + +.md-logo { + padding: 0; +} + +.md-header { + border-bottom: 1px solid #efefef; +} + +.md-header-nav__title { + font-size: 3rem; + font-family: 'Yandex Sans Display Web', Arial, sans-serif; +} + +.md-content__icon:hover { + text-decoration: none !important; + color: #08f !important; +} + +.md-search-result__link { + text-decoration: none !important; } diff --git a/docs/mkdocs-material-theme/base.html b/docs/mkdocs-material-theme/base.html index 5dd9d9bca96..a35e61dd354 100644 --- a/docs/mkdocs-material-theme/base.html +++ b/docs/mkdocs-material-theme/base.html @@ -134,8 +134,12 @@
{% block content %} - {% if page.edit_url %} - + {% if config.extra.single_page %} + + {% else %} + {% if page.edit_url %} + + {% endif %} {% endif %} {% if not "\x3ch1" in page.content %}

{{ page.title | default(config.site_name, true)}}

diff --git a/docs/mkdocs-material-theme/partials/footer.html b/docs/mkdocs-material-theme/partials/footer.html index 449d9dfcd5b..5ab451ef554 100644 --- a/docs/mkdocs-material-theme/partials/footer.html +++ b/docs/mkdocs-material-theme/partials/footer.html @@ -44,11 +44,6 @@ {{ config.copyright }}
{% endif %} - powered by - MkDocs - and - - Material for MkDocs {% block social %} {% include "partials/social.html" %} diff --git a/docs/mkdocs-material-theme/partials/header.html b/docs/mkdocs-material-theme/partials/header.html index 764a5b82231..3c954c8c5ac 100644 --- a/docs/mkdocs-material-theme/partials/header.html +++ b/docs/mkdocs-material-theme/partials/header.html @@ -33,11 +33,11 @@ diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index 59b500c6c98..ec933308103 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -1,15 +1,16 @@ -site_name: ClickHouse docs +site_name: ClickHouse Documentation +copyright: ©2016–2018 Yandex LLC docs_dir: en site_dir: build/docs/en -use_directory_urls: false +use_directory_urls: true repo_name: 'yandex/ClickHouse' repo_url: 'https://github.com/yandex/ClickHouse/' edit_uri: 'edit/master/docs/en' extra_css: - - assets/stylesheets/yandex_fonts.css + - assets/stylesheets/custom.css theme: name: null @@ -31,10 +32,12 @@ theme: - 404.html extra: + single_page: false search: language: 'en' - +markdown_extensions: + - codehilite pages: - 'ClickHouse': 'index.md' diff --git a/docs/mkdocs_en_single_page.yml b/docs/mkdocs_en_single_page.yml index 15088124fc3..5025d0cbc3f 100644 --- a/docs/mkdocs_en_single_page.yml +++ b/docs/mkdocs_en_single_page.yml @@ -1,4 +1,5 @@ -site_name: ClickHouse docs +site_name: ClickHouse Documentation +copyright: ©2016–2018 Yandex LLC docs_dir: en_single_page site_dir: build/docs/en/single @@ -8,7 +9,10 @@ repo_name: 'yandex/ClickHouse' repo_url: 'https://github.com/yandex/ClickHouse/' extra_css: - - assets/stylesheets/yandex_fonts.css + - assets/stylesheets/custom.css + +markdown_extensions: + - codehilite theme: name: null @@ -30,10 +34,9 @@ theme: - 404.html extra: + single_page: true search: language: 'en' - - pages: - 'Documentation': 'index.md' diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml index 3d16b0526ed..ad26a510ad9 100644 --- a/docs/mkdocs_ru.yml +++ b/docs/mkdocs_ru.yml @@ -1,15 +1,19 @@ site_name: Документация ClickHouse +copyright: ©2016–2018 Yandex LLC docs_dir: ru site_dir: build/docs/ru -use_directory_urls: false +use_directory_urls: true repo_name: 'yandex/ClickHouse' repo_url: 'https://github.com/yandex/ClickHouse/' edit_uri: 'edit/master/docs/ru' extra_css: - - assets/stylesheets/yandex_fonts.css + - assets/stylesheets/custom.css + +markdown_extensions: + - codehilite theme: name: null @@ -31,6 +35,7 @@ theme: - 404.html extra: + single_page: false search: language: 'en, ru' diff --git a/docs/mkdocs_ru_single_page.yml b/docs/mkdocs_ru_single_page.yml index 9fefab45e0b..c66c4716eda 100644 --- a/docs/mkdocs_ru_single_page.yml +++ b/docs/mkdocs_ru_single_page.yml @@ -1,4 +1,5 @@ site_name: Документация ClickHouse +copyright: ©2016–2018 Yandex LLC docs_dir: ru_single_page site_dir: build/docs/ru/single @@ -8,7 +9,10 @@ repo_name: 'yandex/ClickHouse' repo_url: 'https://github.com/yandex/ClickHouse/' extra_css: - - assets/stylesheets/yandex_fonts.css + - assets/stylesheets/custom.css + +markdown_extensions: + - codehilite theme: name: null @@ -30,6 +34,7 @@ theme: - 404.html extra: + single_page: true search: language: 'en, ru' diff --git a/docs/requirements.txt b/docs/requirements.txt index ceaeff0fcf7..fbe704e75b2 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,20 +1,28 @@ alabaster==0.7.10 Babel==2.5.1 +backports-abc==0.5 certifi==2017.11.5 chardet==3.0.4 +click==6.7 CommonMark==0.5.4 docutils==0.14 idna==2.6 imagesize==0.7.1 Jinja2==2.10 +livereload==2.5.1 +Markdown==2.6.11 MarkupSafe==1.0 +mkdocs==0.17.2 Pygments==2.2.0 pytz==2017.3 +PyYAML==3.12 recommonmark==0.4.0 requests==2.18.4 +singledispatch==3.4.0.3 six==1.11.0 snowballstemmer==1.2.1 Sphinx==1.6.5 sphinxcontrib-websupport==1.0.1 +tornado==4.5.3 typing==3.6.2 urllib3==1.22 diff --git a/docs/validate_headers_structures_in_md.py b/docs/validate_headers_structures_in_md.py index d4daabd3112..26df66450d5 100644 --- a/docs/validate_headers_structures_in_md.py +++ b/docs/validate_headers_structures_in_md.py @@ -1,58 +1,64 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +# Gets all the files in SOURCES_TREE directory, shows all level headers +# for each file and skip or process files by user's selection. - -# Gets all the files in SOURCES_TREE directory, shows all level headers for each file and skip or process files by user's selection. - -from os import walk +import os SOURCES_TREE = 'ru' STOP_AT_THE_FIRST_FILE = False -for (dirpath, dirnames, filenames) in walk(SOURCES_TREE): - for filename in filenames : - if filename == 'conf.py': continue +for (dirpath, dirnames, filenames) in os.walk(SOURCES_TREE): + for filename in filenames: + if filename == 'conf.py': + continue - print "=== "+ dirpath+'/'+filename + print '=== ' + dirpath + '/' + filename - f = open(dirpath+'/'+filename) + f = open(dirpath + '/' + filename) content = f.readlines() f.close() - # Showing headers structure in md-file count_lines = 0 for l in content: - if l.startswith('#'): print l - if l.startswith("==="): print content[count_lines - 1] + l - if l.startswith("---"): print content[count_lines - 1] + l + if l.startswith('#'): + print l + if l.startswith('==='): + print content[count_lines - 1] + l + if l.startswith('---'): + print content[count_lines - 1] + l count_lines += 1 # At this stage user should check the headers structucture and choose what to to # Replace headers markup or not - choise = raw_input("What to do with a file (pass(s) or process(p)): ") + choise = raw_input('What to do with a file (pass(s) or process(p)): ') - if choise == 's': continue + if choise == 's': + continue else: - print "processing..." + print 'processing...' count_lines = 0 for l in content: - if l.startswith("==="): - print count_lines, content[count_lines -1], content[count_lines] - content[count_lines - 1] = "# "+content[count_lines - 1] + if l.startswith('==='): + print count_lines, content[count_lines - 1], content[count_lines] + content[count_lines - 1] = '# ' + content[count_lines - 1] content.pop(count_lines) - if l.startswith("---"): - print count_lines, content[count_lines -1], content[count_lines] - content[count_lines - 1] = "## "+content[count_lines - 1] + if l.startswith('---'): + print count_lines, content[count_lines - 1], content[count_lines] + content[count_lines - 1] = '## ' + content[count_lines - 1] content.pop(count_lines) count_lines += 1 - f=open(dirpath+'/'+filename,"w") - for l in content: f.write(l) + f = open(dirpath + '/' + filename, 'w') + for l in content: + f.write(l) f.close() - if STOP_AT_THE_FIRST_FILE : break + if STOP_AT_THE_FIRST_FILE: + break - if STOP_AT_THE_FIRST_FILE : break + if STOP_AT_THE_FIRST_FILE: + break diff --git a/website/index.html b/website/index.html index e87c7774ba2..6e079bf6d4f 100644 --- a/website/index.html +++ b/website/index.html @@ -90,6 +90,7 @@
+

ClickHouse. Just makes you think faster.

@@ -449,7 +450,7 @@ clickhouse-client ClickHouse source code is published under Apache 2.0 License. Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- +
diff --git a/website/tutorial.html b/website/tutorial.html index 65e9f1ae518..da881ddacc5 100644 --- a/website/tutorial.html +++ b/website/tutorial.html @@ -596,7 +596,7 @@ ENGINE = ReplicatedMergeTree( ClickHouse source code is published under Apache 2.0 License. Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

- + From f36dfe42a5b648b07724f42692f935d197df004e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 22:35:10 +0300 Subject: [PATCH 053/209] Miscellaneous [#CLICKHOUSE-2] --- dbms/src/IO/readFloatText.h | 2 -- .../include/common/iostream_debug_helpers.h | 32 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/dbms/src/IO/readFloatText.h b/dbms/src/IO/readFloatText.h index 82a6ea29eec..503203cba21 100644 --- a/dbms/src/IO/readFloatText.h +++ b/dbms/src/IO/readFloatText.h @@ -5,8 +5,6 @@ #include #include -#include - /** Methods for reading floating point numbers from text with decimal representation. * There are "precise", "fast" and "simple" implementations. diff --git a/libs/libcommon/include/common/iostream_debug_helpers.h b/libs/libcommon/include/common/iostream_debug_helpers.h index 09ab4de83e6..739a35d398f 100644 --- a/libs/libcommon/include/common/iostream_debug_helpers.h +++ b/libs/libcommon/include/common/iostream_debug_helpers.h @@ -74,6 +74,14 @@ ostream & operator<<(ostream & stream, const map & what) return stream; } +template +ostream & operator<<(ostream & stream, const multimap & what) +{ + stream << "multimap(size = " << what.size() << ")"; + dumpContainer(stream, what); + return stream; +} + template ostream & operator<<(ostream & stream, const unordered_map & what) { @@ -82,6 +90,14 @@ ostream & operator<<(ostream & stream, const unordered_map & what) return stream; } +template +ostream & operator<<(ostream & stream, const unordered_multimap & what) +{ + stream << "unordered_multimap(size = " << what.size() << ")"; + dumpContainer(stream, what); + return stream; +} + template ostream & operator<<(ostream & stream, const set & what) { @@ -90,6 +106,14 @@ ostream & operator<<(ostream & stream, const set & what) return stream; } +template +ostream & operator<<(ostream & stream, const multiset & what) +{ + stream << "multiset(size = " << what.size() << ")"; + dumpContainer(stream, what); + return stream; +} + template ostream & operator<<(ostream & stream, const unordered_set & what) { @@ -98,6 +122,14 @@ ostream & operator<<(ostream & stream, const unordered_set & what) return stream; } +template +ostream & operator<<(ostream & stream, const unordered_multiset & what) +{ + stream << "unordered_multiset(size = " << what.size() << ")"; + dumpContainer(stream, what); + return stream; +} + template ostream & operator<<(ostream & stream, const list & what) { From 4055da59799b5694f75a52e65b27c0970b6701aa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 22:42:42 +0300 Subject: [PATCH 054/209] Fixed error with filtering of tables by _table virtual column in StorageMerge [#CLICKHOUSE-3583] --- dbms/src/Storages/StorageMerge.cpp | 4 +- dbms/src/Storages/VirtualColumnUtils.cpp | 11 ++--- dbms/src/Storages/VirtualColumnUtils.h | 2 +- ...e_table_and_table_virtual_column.reference | 8 ++++ ...8_merge_table_and_table_virtual_column.sql | 42 +++++++++++++++++++ 5 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.reference create mode 100644 dbms/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.sql diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 13d82db6fc3..e592aabf90b 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -166,7 +166,6 @@ BlockInputStreams StorageMerge::read( if (!virt_column_names.empty()) { VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, context); - auto values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_table"); /// Remove unused tables from the list @@ -182,6 +181,7 @@ BlockInputStreams StorageMerge::read( Block header = getSampleBlockForColumns(real_column_names); size_t tables_count = selected_tables.size(); + size_t curr_table_number = 0; for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it, ++curr_table_number) { @@ -192,7 +192,7 @@ BlockInputStreams StorageMerge::read( if (real_column_names.size() == 0) real_column_names.push_back(ExpressionActions::getSmallestColumn(table->getColumnsList())); - /// Substitute virtual column for its value + /// Substitute virtual column for its value. NOTE This looks terribly wrong. ASTPtr modified_query_ast = query->clone(); VirtualColumnUtils::rewriteEntityInAst(modified_query_ast, "_table", table->getTableName()); diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index 5864d35da48..29409ce715e 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -133,11 +133,11 @@ static ASTPtr buildWhereExpression(const ASTs & functions) return new_query; } -bool filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context) +void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context) { const ASTSelectQuery & select = typeid_cast(*query); if (!select.where_expression && !select.prewhere_expression) - return false; + return; NameSet columns; for (const auto & it : block.getNamesAndTypesList()) @@ -151,7 +151,7 @@ bool filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c extractFunctions(select.prewhere_expression, columns, functions); ASTPtr expression_ast = buildWhereExpression(functions); if (!expression_ast) - return false; + return; /// Let's analyze and calculate the expression. ExpressionAnalyzer analyzer(expression_ast, context, {}, block.getNamesAndTypesList()); @@ -165,16 +165,11 @@ bool filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c filter_column = converted; const IColumn::Filter & filter = typeid_cast(*filter_column).getData(); - if (countBytesInFilter(filter) == 0) - return false; - for (size_t i = 0; i < block.columns(); ++i) { ColumnPtr & column = block.safeGetByPosition(i).column; column = column->filter(filter, -1); } - - return true; } } diff --git a/dbms/src/Storages/VirtualColumnUtils.h b/dbms/src/Storages/VirtualColumnUtils.h index bfbd44cf2c8..a1e1db4f04c 100644 --- a/dbms/src/Storages/VirtualColumnUtils.h +++ b/dbms/src/Storages/VirtualColumnUtils.h @@ -30,7 +30,7 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va /// Leave in the block only the rows that fit under the WHERE clause and the PREWHERE clause of the query. /// Only elements of the outer conjunction are considered, depending only on the columns present in the block. /// Returns true if at least one row is discarded. -bool filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context); +void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & context); /// Extract from the input stream a set of `name` column values template diff --git a/dbms/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.reference b/dbms/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.reference new file mode 100644 index 00000000000..943d6e5f7f4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.reference @@ -0,0 +1,8 @@ +5000 +5 +1000 +1000 +1000 +0 +0 +0 diff --git a/dbms/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.sql b/dbms/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.sql new file mode 100644 index 00000000000..66c740f8ee1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00578_merge_table_and_table_virtual_column.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS test.numbers1; +DROP TABLE IF EXISTS test.numbers2; +DROP TABLE IF EXISTS test.numbers3; +DROP TABLE IF EXISTS test.numbers4; +DROP TABLE IF EXISTS test.numbers5; + +CREATE TABLE test.numbers1 ENGINE = StripeLog AS SELECT number FROM numbers(1000); +CREATE TABLE test.numbers2 ENGINE = TinyLog AS SELECT number FROM numbers(1000); +CREATE TABLE test.numbers3 ENGINE = Log AS SELECT number FROM numbers(1000); +CREATE TABLE test.numbers4 ENGINE = Memory AS SELECT number FROM numbers(1000); +CREATE TABLE test.numbers5 ENGINE = MergeTree ORDER BY number AS SELECT number FROM numbers(1000); + +SELECT count() FROM merge(test, '^numbers\\d+$'); +SELECT DISTINCT count() FROM merge(test, '^numbers\\d+$') GROUP BY number; + +SET max_rows_to_read = 1000; + +SET max_threads = 'auto'; +SELECT count() FROM merge(test, '^numbers\\d+$') WHERE _table = 'numbers1'; + +SET max_threads = 1; +SELECT count() FROM merge(test, '^numbers\\d+$') WHERE _table = 'numbers2'; + +SET max_threads = 10; +SELECT count() FROM merge(test, '^numbers\\d+$') WHERE _table = 'numbers3'; + +SET max_rows_to_read = 1; + +SET max_threads = 'auto'; +SELECT count() FROM merge(test, '^numbers\\d+$') WHERE _table = 'non_existing'; + +SET max_threads = 1; +SELECT count() FROM merge(test, '^numbers\\d+$') WHERE _table = 'non_existing'; + +SET max_threads = 10; +SELECT count() FROM merge(test, '^numbers\\d+$') WHERE _table = 'non_existing'; + +DROP TABLE test.numbers1; +DROP TABLE test.numbers2; +DROP TABLE test.numbers3; +DROP TABLE test.numbers4; +DROP TABLE test.numbers5; From 222fb2c945093a82cc450a973a11be4f2d4aa803 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 23:23:27 +0300 Subject: [PATCH 055/209] Miscellaneous [#CLICKHOUSE-2] --- dbms/src/DataStreams/FilterColumnsBlockInputStream.h | 2 +- dbms/src/Dictionaries/DictionaryBlockInputStream.h | 2 +- dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h | 2 +- dbms/src/Storages/StorageCatBoostPool.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/DataStreams/FilterColumnsBlockInputStream.h b/dbms/src/DataStreams/FilterColumnsBlockInputStream.h index b2ac83c8fdf..dc63ec2823f 100644 --- a/dbms/src/DataStreams/FilterColumnsBlockInputStream.h +++ b/dbms/src/DataStreams/FilterColumnsBlockInputStream.h @@ -21,7 +21,7 @@ public: String getName() const override { - return "FilterColumnsBlockInputStream"; + return "FilterColumns"; } Block getHeader() const override; diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStream.h b/dbms/src/Dictionaries/DictionaryBlockInputStream.h index 9c961b0fb9a..1b7b536a15c 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/DictionaryBlockInputStream.h @@ -43,7 +43,7 @@ public: String getName() const override { - return "DictionaryBlockInputStream"; + return "Dictionary"; } protected: diff --git a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h index 43c2fcb53c7..857c9b58cfb 100644 --- a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -30,7 +30,7 @@ public: String getName() const override { - return "RangeDictionaryBlockInputStream"; + return "RangeDictionary"; } protected: diff --git a/dbms/src/Storages/StorageCatBoostPool.cpp b/dbms/src/Storages/StorageCatBoostPool.cpp index d484158b018..74a40372b79 100644 --- a/dbms/src/Storages/StorageCatBoostPool.cpp +++ b/dbms/src/Storages/StorageCatBoostPool.cpp @@ -36,7 +36,7 @@ public: String getName() const override { - return "CatBoostDatasetBlockInputStream"; + return "CatBoostDataset"; } Block readImpl() override From 02f7cc2d041c9fc17e269355282abb3cf9f77df3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2018 23:24:29 +0300 Subject: [PATCH 056/209] Fixed error with StorageMerge [#CLICKHOUSE-2]. --- dbms/src/Storages/StorageMerge.cpp | 12 +- .../00579_virtual_column_and_lazy.reference | 3000 +++++++++++++++++ .../00579_virtual_column_and_lazy.sql | 18 + 3 files changed, 3029 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00579_virtual_column_and_lazy.reference create mode 100644 dbms/tests/queries/0_stateless/00579_virtual_column_and_lazy.sql diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index e592aabf90b..f503606a742 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -192,7 +192,7 @@ BlockInputStreams StorageMerge::read( if (real_column_names.size() == 0) real_column_names.push_back(ExpressionActions::getSmallestColumn(table->getColumnsList())); - /// Substitute virtual column for its value. NOTE This looks terribly wrong. + /// Substitute virtual column for its value when querying tables. ASTPtr modified_query_ast = query->clone(); VirtualColumnUtils::rewriteEntityInAst(modified_query_ast, "_table", table->getTableName()); @@ -220,6 +220,12 @@ BlockInputStreams StorageMerge::read( throw Exception("Source tables for Merge table are processing data up to different stages", ErrorCodes::INCOMPATIBLE_SOURCE_TABLES); + /// The table may return excessive columns if we query only its virtual column. + /// We filter excessive columns. This is done only if query was not processed more than FetchColumns. + if (processed_stage_in_source_table == QueryProcessingStage::FetchColumns) + for (auto & stream : source_streams) + stream = std::make_shared(stream, real_column_names, true); + /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. for (auto & stream : source_streams) @@ -249,6 +255,10 @@ BlockInputStreams StorageMerge::read( throw Exception("Source tables for Merge table are processing data up to different stages", ErrorCodes::INCOMPATIBLE_SOURCE_TABLES); + if (processed_stage_in_source_table == QueryProcessingStage::FetchColumns) + for (auto & stream : streams) + stream = std::make_shared(stream, real_column_names, true); + auto stream = streams.empty() ? std::make_shared(header) : streams.front(); if (!streams.empty()) { diff --git a/dbms/tests/queries/0_stateless/00579_virtual_column_and_lazy.reference b/dbms/tests/queries/0_stateless/00579_virtual_column_and_lazy.reference new file mode 100644 index 00000000000..1e17df0ebb4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00579_virtual_column_and_lazy.reference @@ -0,0 +1,3000 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00579_virtual_column_and_lazy.sql b/dbms/tests/queries/0_stateless/00579_virtual_column_and_lazy.sql new file mode 100644 index 00000000000..c30133863b5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00579_virtual_column_and_lazy.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test.sample1; +DROP TABLE IF EXISTS test.sample2; +DROP TABLE IF EXISTS test.sample_merge; + +CREATE TABLE test.sample1 (x UInt64, d Date DEFAULT today()) ENGINE = MergeTree(d, intHash64(x), intHash64(x), 10); +CREATE TABLE test.sample2 (x UInt64, d Date DEFAULT today()) ENGINE = MergeTree(d, intHash64(x), intHash64(x), 10); + +INSERT INTO test.sample1 (x) SELECT number AS x FROM system.numbers LIMIT 1000; +INSERT INTO test.sample2 (x) SELECT number AS x FROM system.numbers LIMIT 2000; + +CREATE TABLE test.sample_merge AS test.sample1 ENGINE = Merge(test, '^sample\\d$'); + +SET max_threads = 1; +SELECT _sample_factor FROM merge(test, '^sample\\d$'); + +DROP TABLE test.sample1; +DROP TABLE test.sample2; +DROP TABLE test.sample_merge; From 331fca64a91c216afa9da06cb2225531109f95d6 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Feb 2018 02:04:37 +0300 Subject: [PATCH 057/209] Update ReplacingSortedBlockInputStream.h --- dbms/src/DataStreams/ReplacingSortedBlockInputStream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h index 34d903f49f3..dabc1c7e3af 100644 --- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h @@ -43,7 +43,7 @@ private: RowRef selected_row; /// Last row with maximum version for current primary key. UInt64 max_version = 0; /// Max version for current primary key. - size_t max_pos; + size_t max_pos = 0; PODArray current_row_sources; /// Sources of rows with the current primary key From 4b6bf266bdaea1e7aae9e774e3f7453f9a04475d Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Feb 2018 02:19:56 +0300 Subject: [PATCH 058/209] Update higher_order_functions.md --- docs/ru/functions/higher_order_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/functions/higher_order_functions.md b/docs/ru/functions/higher_order_functions.md index bc48e9dc663..6c028a95f6a 100644 --- a/docs/ru/functions/higher_order_functions.md +++ b/docs/ru/functions/higher_order_functions.md @@ -65,7 +65,7 @@ SELECT ### arrayCumSum(\[func,\] arr1, ...) -Возвращает совокупную сумму элементов массива, полученную при применении функции `func` к каждому элементу массива `arr`. +Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием. Пример: From 3844b129f4986a2a355678585a4393357612c780 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Feb 2018 03:38:05 +0300 Subject: [PATCH 059/209] Update Context.cpp --- dbms/src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 2bc0aa3c4b7..06993f4306b 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1429,7 +1429,7 @@ QueryLog & Context::getQueryLog() size_t flush_interval_milliseconds = config.getUInt64( "query_log.flush_interval_milliseconds", DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS); - String engine = "ENGINE = MergeTree() PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024"; + String engine = "ENGINE = MergeTree PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024"; system_logs->query_log = std::make_unique(*global_context, database, table, engine, flush_interval_milliseconds); } From bcad88c9aeb39540d337cc7c2a72d448a7cca302 Mon Sep 17 00:00:00 2001 From: KochetovNicolai Date: Thu, 22 Feb 2018 11:37:16 +0300 Subject: [PATCH 060/209] Update ReplacingSortedBlockInputStream.h added comment --- .../ReplacingSortedBlockInputStream.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h index dabc1c7e3af..e64100b2207 100644 --- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h @@ -37,15 +37,19 @@ private: /// All data has been read. bool finished = false; - RowRef current_key; /// Primary key of current row. - RowRef next_key; /// Primary key of next row. - - RowRef selected_row; /// Last row with maximum version for current primary key. - - UInt64 max_version = 0; /// Max version for current primary key. + /// Primary key of current row. + RowRef current_key; + /// Primary key of next row. + RowRef next_key; + /// Last row with maximum version for current primary key. + RowRef selected_row; + /// Max version for current primary key. + UInt64 max_version = 0; + /// The position (into current_row_sources) of the row with the highest version. size_t max_pos = 0; - PODArray current_row_sources; /// Sources of rows with the current primary key + /// Sources of rows with the current primary key. + PODArray current_row_sources; void merge(MutableColumns & merged_columns, std::priority_queue & queue); From a75e095a80488bd289e93b4c05911e274dd7b55c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 21 Feb 2018 22:26:59 +0300 Subject: [PATCH 061/209] added data_path and metadata_path to system.tables --- dbms/src/Databases/DatabaseDictionary.cpp | 5 ---- dbms/src/Databases/DatabaseDictionary.h | 2 -- dbms/src/Databases/DatabaseMemory.cpp | 5 ---- dbms/src/Databases/DatabaseMemory.h | 2 -- dbms/src/Databases/DatabaseOrdinary.cpp | 28 +++++++++++++------ dbms/src/Databases/DatabaseOrdinary.h | 4 ++- dbms/src/Databases/IDatabase.h | 6 +++- .../Interpreters/InterpreterCreateQuery.cpp | 2 +- .../src/Interpreters/InterpreterDropQuery.cpp | 4 +-- dbms/src/Storages/IStorage.h | 4 ++- dbms/src/Storages/StorageDistributed.h | 2 ++ dbms/src/Storages/StorageFile.h | 2 ++ dbms/src/Storages/StorageLog.h | 4 +++ dbms/src/Storages/StorageMaterializedView.h | 2 ++ dbms/src/Storages/StorageMergeTree.h | 2 ++ .../src/Storages/StorageReplicatedMergeTree.h | 2 ++ dbms/src/Storages/StorageSet.h | 2 ++ dbms/src/Storages/StorageStripeLog.h | 4 ++- dbms/src/Storages/StorageTinyLog.h | 4 ++- .../Storages/System/StorageSystemTables.cpp | 4 +++ 20 files changed, 60 insertions(+), 30 deletions(-) diff --git a/dbms/src/Databases/DatabaseDictionary.cpp b/dbms/src/Databases/DatabaseDictionary.cpp index 8065f0095c9..8a10535fd09 100644 --- a/dbms/src/Databases/DatabaseDictionary.cpp +++ b/dbms/src/Databases/DatabaseDictionary.cpp @@ -174,9 +174,4 @@ void DatabaseDictionary::drop() /// Additional actions to delete database are not required. } -String DatabaseDictionary::getDataPath(const Context &) const -{ - return {}; -} - } diff --git a/dbms/src/Databases/DatabaseDictionary.h b/dbms/src/Databases/DatabaseDictionary.h index 15269b5b6d9..1308ea20a40 100644 --- a/dbms/src/Databases/DatabaseDictionary.h +++ b/dbms/src/Databases/DatabaseDictionary.h @@ -93,8 +93,6 @@ public: const Context & context, const String & table_name) const override; - String getDataPath(const Context & context) const override; - void shutdown() override; void drop() override; }; diff --git a/dbms/src/Databases/DatabaseMemory.cpp b/dbms/src/Databases/DatabaseMemory.cpp index a602f5e8be4..66b23f07ca6 100644 --- a/dbms/src/Databases/DatabaseMemory.cpp +++ b/dbms/src/Databases/DatabaseMemory.cpp @@ -153,9 +153,4 @@ void DatabaseMemory::drop() /// Additional actions to delete database are not required. } -String DatabaseMemory::getDataPath(const Context &) const -{ - return {}; -} - } diff --git a/dbms/src/Databases/DatabaseMemory.h b/dbms/src/Databases/DatabaseMemory.h index 26de132c91f..be095ad3755 100644 --- a/dbms/src/Databases/DatabaseMemory.h +++ b/dbms/src/Databases/DatabaseMemory.h @@ -84,8 +84,6 @@ public: const Context & context, const String & table_name) const override; - String getDataPath(const Context & context) const override; - void shutdown() override; void drop() override; }; diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 9328634b06d..8e9b5a45c37 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -39,10 +39,12 @@ static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5; static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; static constexpr size_t TABLES_PARALLEL_LOAD_BUNCH_SIZE = 100; - -static String getTableMetadataPath(const String & base_path, const String & table_name) +namespace detail { - return base_path + (endsWith(base_path, "/") ? "" : "/") + escapeForFileName(table_name) + ".sql"; + String getTableMetadataPath(const String & base_path, const String & table_name) + { + return base_path + (endsWith(base_path, "/") ? "" : "/") + escapeForFileName(table_name) + ".sql"; + } } static void loadTable( @@ -271,7 +273,7 @@ void DatabaseOrdinary::createTable( throw Exception("Table " + name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } - String table_metadata_path = getTableMetadataPath(metadata_path, table_name); + String table_metadata_path = detail::getTableMetadataPath(metadata_path, table_name); String table_metadata_tmp_path = table_metadata_path + ".tmp"; String statement; @@ -314,7 +316,7 @@ void DatabaseOrdinary::removeTable( { StoragePtr res = detachTable(table_name); - String table_metadata_path = getTableMetadataPath(metadata_path, table_name); + String table_metadata_path = detail::getTableMetadataPath(metadata_path, table_name); try { @@ -330,7 +332,7 @@ void DatabaseOrdinary::removeTable( static ASTPtr getCreateQueryImpl(const String & path, const String & table_name) { - String table_metadata_path = getTableMetadataPath(path, table_name); + String table_metadata_path = detail::getTableMetadataPath(path, table_name); String query; { @@ -390,7 +392,7 @@ time_t DatabaseOrdinary::getTableMetadataModificationTime( const Context & /*context*/, const String & table_name) { - String table_metadata_path = getTableMetadataPath(metadata_path, table_name); + String table_metadata_path = detail::getTableMetadataPath(metadata_path, table_name); Poco::File meta_file(table_metadata_path); if (meta_file.exists()) @@ -513,9 +515,19 @@ void DatabaseOrdinary::alterTable( } } -String DatabaseOrdinary::getDataPath(const Context &) const +String DatabaseOrdinary::getDataPath() const { return data_path; } +String DatabaseOrdinary::getMetadataPath() const +{ + return metadata_path; +} + +String DatabaseOrdinary::getTableMetadataPath(const String & table_name) const +{ + return detail::getTableMetadataPath(metadata_path, table_name); +} + } diff --git a/dbms/src/Databases/DatabaseOrdinary.h b/dbms/src/Databases/DatabaseOrdinary.h index efaabc59033..dbd44586c74 100644 --- a/dbms/src/Databases/DatabaseOrdinary.h +++ b/dbms/src/Databases/DatabaseOrdinary.h @@ -59,7 +59,9 @@ public: const Context & context, const String & table_name) const override; - String getDataPath(const Context & context) const override; + String getDataPath() const override; + String getMetadataPath() const override; + String getTableMetadataPath(const String & table_name) const override; void shutdown() override; void drop() override; diff --git a/dbms/src/Databases/IDatabase.h b/dbms/src/Databases/IDatabase.h index 1c81c729a9d..74cbe0b2202 100644 --- a/dbms/src/Databases/IDatabase.h +++ b/dbms/src/Databases/IDatabase.h @@ -130,7 +130,11 @@ public: const String & name) const = 0; /// Returns path for persistent data storage if the database supports it, empty string otherwise - virtual String getDataPath(const Context & context) const = 0; + virtual String getDataPath() const { return {}; } + /// Returns metadata path if the database supports it, empty string otherwise + virtual String getMetadataPath() const { return {}; } + /// Returns metadata path of a concrete table if the database supports it, empty string otherwise + virtual String getTableMetadataPath(const String & /*table_name*/) const { return {}; } /// Ask all tables to complete the background threads they are using and delete all table objects. virtual void shutdown() = 0; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index aa4bf23fc20..0bd8560d3c0 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -510,7 +510,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (!create.is_temporary) { database = context.getDatabase(database_name); - data_path = database->getDataPath(context); + data_path = database->getDataPath(); /** If the table already exists, and the request specifies IF NOT EXISTS, * then we allow concurrent CREATE queries (which do nothing). diff --git a/dbms/src/Interpreters/InterpreterDropQuery.cpp b/dbms/src/Interpreters/InterpreterDropQuery.cpp index 09792bdb058..1f3b6c59bc0 100644 --- a/dbms/src/Interpreters/InterpreterDropQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDropQuery.cpp @@ -140,7 +140,7 @@ BlockIO InterpreterDropQuery::execute() table.first->is_dropped = true; - String database_data_path = database->getDataPath(context); + String database_data_path = database->getDataPath(); /// If it is not virtual database like Dictionary then drop remaining data dir if (!database_data_path.empty()) @@ -173,7 +173,7 @@ BlockIO InterpreterDropQuery::execute() database->drop(); /// Remove data directory if it is not virtual database. TODO: should IDatabase::drop() do that? - String database_data_path = database->getDataPath(context); + String database_data_path = database->getDataPath(); if (!database_data_path.empty()) Poco::File(database_data_path).remove(false); diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 14c48d489ca..a2970b0de96 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -288,10 +288,12 @@ public: /// Otherwise - throws an exception with detailed information or returns false virtual bool checkTableCanBeDropped() const { return true; } - /** Notify engine about updated dependencies for this storage. */ virtual void updateDependencies() {} + /// Returns data path if storage supports it, empty string otherwise. + virtual String getDataPath() const { return {}; } + protected: using ITableDeclaration::ITableDeclaration; using std::enable_shared_from_this::shared_from_this; diff --git a/dbms/src/Storages/StorageDistributed.h b/dbms/src/Storages/StorageDistributed.h index c9138c53498..6afb54ba007 100644 --- a/dbms/src/Storages/StorageDistributed.h +++ b/dbms/src/Storages/StorageDistributed.h @@ -72,6 +72,8 @@ public: void startup() override; void shutdown() override; + String getDataPath() const override { return path; } + /// From each replica, get a description of the corresponding local table. BlockInputStreams describe(const Context & context, const Settings & settings); diff --git a/dbms/src/Storages/StorageFile.h b/dbms/src/Storages/StorageFile.h index 4ab458261b2..8501415001c 100644 --- a/dbms/src/Storages/StorageFile.h +++ b/dbms/src/Storages/StorageFile.h @@ -52,6 +52,8 @@ public: void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; + String getDataPath() const override { return path; } + protected: friend class StorageFileBlockInputStream; friend class StorageFileBlockOutputStream; diff --git a/dbms/src/Storages/StorageLog.h b/dbms/src/Storages/StorageLog.h index 2d362c09272..4f38f403138 100644 --- a/dbms/src/Storages/StorageLog.h +++ b/dbms/src/Storages/StorageLog.h @@ -40,6 +40,10 @@ public: bool checkData() const override; + std::string full_path() const { return path + escapeForFileName(name) + '/';} + + String getDataPath() const override { return full_path(); } + protected: /** Attach the table with the appropriate name, along the appropriate path (with / at the end), * (the correctness of names and paths is not verified) diff --git a/dbms/src/Storages/StorageMaterializedView.h b/dbms/src/Storages/StorageMaterializedView.h index 9b9b538b31f..4dd0980e874 100644 --- a/dbms/src/Storages/StorageMaterializedView.h +++ b/dbms/src/Storages/StorageMaterializedView.h @@ -43,6 +43,8 @@ public: size_t max_block_size, unsigned num_streams) override; + String getDataPath() const override { return getTargetTable()->getDataPath(); } + private: String select_database_name; String select_table_name; diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 5316a4db042..e4e880941ff 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -80,6 +80,8 @@ public: MergeTreeData & getData() { return data; } const MergeTreeData & getData() const { return data; } + String getDataPath() const override { return full_path; } + private: String path; String database_name; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index ebe9d29eeac..21c77cfde5d 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -170,6 +170,8 @@ public: part_check_thread.enqueuePart(part_name, delay_to_check_seconds); } + String getDataPath() const override { return full_path; } + private: /// Delete old parts from disk and from ZooKeeper. void clearOldPartsAndRemoveFromZK(); diff --git a/dbms/src/Storages/StorageSet.h b/dbms/src/Storages/StorageSet.h index 0b08f3af2db..2546967b24a 100644 --- a/dbms/src/Storages/StorageSet.h +++ b/dbms/src/Storages/StorageSet.h @@ -25,6 +25,8 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override; + String getDataPath() const override { return path; } + protected: StorageSetOrJoinBase( const String & path_, diff --git a/dbms/src/Storages/StorageStripeLog.h b/dbms/src/Storages/StorageStripeLog.h index e1e72873794..e1514f8329f 100644 --- a/dbms/src/Storages/StorageStripeLog.h +++ b/dbms/src/Storages/StorageStripeLog.h @@ -49,7 +49,9 @@ public: }; using Files_t = std::map; - std::string full_path() { return path + escapeForFileName(name) + '/';} + std::string full_path() const { return path + escapeForFileName(name) + '/';} + + String getDataPath() const override { return full_path(); } private: String path; diff --git a/dbms/src/Storages/StorageTinyLog.h b/dbms/src/Storages/StorageTinyLog.h index 84e52afe265..e64301227ad 100644 --- a/dbms/src/Storages/StorageTinyLog.h +++ b/dbms/src/Storages/StorageTinyLog.h @@ -48,7 +48,9 @@ public: }; using Files_t = std::map; - std::string full_path() { return path + escapeForFileName(name) + '/';} + std::string full_path() const { return path + escapeForFileName(name) + '/';} + + String getDataPath() const override { return full_path(); } private: String path; diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 7c8d93d26f0..852392a63d1 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -120,6 +120,8 @@ StorageSystemTables::StorageSystemTables(const std::string & name_) {"name", std::make_shared()}, {"engine", std::make_shared()}, {"is_temporary", std::make_shared()}, + {"data_path", std::make_shared()}, + {"metadata_path", std::make_shared()}, }; virtual_columns = @@ -190,6 +192,8 @@ BlockInputStreams StorageSystemTables::read( res_columns[j++]->insert(table_name); res_columns[j++]->insert(iterator->table()->getName()); res_columns[j++]->insert(UInt64(0)); + res_columns[j++]->insert(iterator->table()->getDataPath()); + res_columns[j++]->insert(database->getTableMetadataPath(table_name)); if (has_metadata_modification_time) res_columns[j++]->insert(static_cast(database->getTableMetadataModificationTime(context, table_name))); From c7f5f6f57405fb3e541bda8997b3897874e75df9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 22 Feb 2018 12:09:14 +0300 Subject: [PATCH 062/209] added data_path and metadata_path to system.databases and path to system.parts and system.parts_columns [#CLICKHOUSE-3580] --- dbms/src/Databases/DatabaseOrdinary.cpp | 8 ++++---- dbms/src/Storages/System/StorageSystemDatabases.cpp | 8 ++++++-- dbms/src/Storages/System/StorageSystemParts.cpp | 4 +++- dbms/src/Storages/System/StorageSystemPartsColumns.cpp | 2 ++ 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index 8e9b5a45c37..fa29cbc6283 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -147,7 +147,7 @@ void DatabaseOrdinary::loadTables( size_t total_tables = file_names.size(); LOG_INFO(log, "Total " << total_tables << " tables."); - String data_path = context.getPath() + "/data/" + escapeForFileName(name) + "/"; + String data_path = context.getPath() + "data/" + escapeForFileName(name) + "/"; StopwatchWithLock watch; std::atomic tables_processed {0}; @@ -273,7 +273,7 @@ void DatabaseOrdinary::createTable( throw Exception("Table " + name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } - String table_metadata_path = detail::getTableMetadataPath(metadata_path, table_name); + String table_metadata_path = getTableMetadataPath(table_name); String table_metadata_tmp_path = table_metadata_path + ".tmp"; String statement; @@ -316,7 +316,7 @@ void DatabaseOrdinary::removeTable( { StoragePtr res = detachTable(table_name); - String table_metadata_path = detail::getTableMetadataPath(metadata_path, table_name); + String table_metadata_path = getTableMetadataPath(table_name); try { @@ -392,7 +392,7 @@ time_t DatabaseOrdinary::getTableMetadataModificationTime( const Context & /*context*/, const String & table_name) { - String table_metadata_path = detail::getTableMetadataPath(metadata_path, table_name); + String table_metadata_path = getTableMetadataPath(table_name); Poco::File meta_file(table_metadata_path); if (meta_file.exists()) diff --git a/dbms/src/Storages/System/StorageSystemDatabases.cpp b/dbms/src/Storages/System/StorageSystemDatabases.cpp index c4c41f3375d..93917b81000 100644 --- a/dbms/src/Storages/System/StorageSystemDatabases.cpp +++ b/dbms/src/Storages/System/StorageSystemDatabases.cpp @@ -14,8 +14,10 @@ StorageSystemDatabases::StorageSystemDatabases(const std::string & name_) : name(name_) { columns = NamesAndTypesList{ - {"name", std::make_shared()}, - {"engine", std::make_shared()}, + {"name", std::make_shared()}, + {"engine", std::make_shared()}, + {"data_path", std::make_shared()}, + {"metadata_path", std::make_shared()}, }; } @@ -38,6 +40,8 @@ BlockInputStreams StorageSystemDatabases::read( { res_columns[0]->insert(database.first); res_columns[1]->insert(database.second->getEngineName()); + res_columns[2]->insert(database.second->getDataPath()); + res_columns[3]->insert(database.second->getMetadataPath()); } return BlockInputStreams(1, std::make_shared(getSampleBlock().cloneWithColumns(std::move(res_columns)))); diff --git a/dbms/src/Storages/System/StorageSystemParts.cpp b/dbms/src/Storages/System/StorageSystemParts.cpp index bc8588972a5..927ff272dab 100644 --- a/dbms/src/Storages/System/StorageSystemParts.cpp +++ b/dbms/src/Storages/System/StorageSystemParts.cpp @@ -37,7 +37,8 @@ StorageSystemParts::StorageSystemParts(const std::string & name) {"database", std::make_shared()}, {"table", std::make_shared()}, - {"engine", std::make_shared()} + {"engine", std::make_shared()}, + {"path", std::make_shared()} } ) { @@ -81,6 +82,7 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns, const Stor columns[i++]->insert(info.database); columns[i++]->insert(info.table); columns[i++]->insert(info.engine); + columns[i++]->insert(part->getFullPath()); if (has_state_column) columns[i++]->insert(part->stateString()); diff --git a/dbms/src/Storages/System/StorageSystemPartsColumns.cpp b/dbms/src/Storages/System/StorageSystemPartsColumns.cpp index b97178a750d..fe9351323d9 100644 --- a/dbms/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/dbms/src/Storages/System/StorageSystemPartsColumns.cpp @@ -39,6 +39,7 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const std::string & name) {"database", std::make_shared()}, {"table", std::make_shared()}, {"engine", std::make_shared()}, + {"path", std::make_shared()}, {"column", std::make_shared()}, { "type", std::make_shared() }, { "default_kind", std::make_shared() }, @@ -126,6 +127,7 @@ void StorageSystemPartsColumns::processNextStorage(MutableColumns & columns, con columns[j++]->insert(info.database); columns[j++]->insert(info.table); columns[j++]->insert(info.engine); + columns[j++]->insert(part->getFullPath()); columns[j++]->insert(column.name); columns[j++]->insert(column.type->getName()); From 07d01d2dfb5f501314fb5d546254dd482940abc7 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Thu, 22 Feb 2018 14:40:23 +0300 Subject: [PATCH 063/209] Fixed build after merge. [#CLICKHOUSE-3606] --- dbms/src/DataStreams/RemoteBlockInputStream.h | 2 +- dbms/src/Server/ClusterCopier.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.h b/dbms/src/DataStreams/RemoteBlockInputStream.h index 8c3cff4f494..c76b0a03ff1 100644 --- a/dbms/src/DataStreams/RemoteBlockInputStream.h +++ b/dbms/src/DataStreams/RemoteBlockInputStream.h @@ -15,7 +15,7 @@ namespace DB { -/** This class allowes one to launch queries on remote replicas of one shard and get results +/** This class allows one to launch queries on remote replicas of one shard and get results */ class RemoteBlockInputStream : public IProfilingBlockInputStream { diff --git a/dbms/src/Server/ClusterCopier.cpp b/dbms/src/Server/ClusterCopier.cpp index 0f2af436be6..ae1d0643ff1 100644 --- a/dbms/src/Server/ClusterCopier.cpp +++ b/dbms/src/Server/ClusterCopier.cpp @@ -1756,9 +1756,9 @@ protected: try { - /// CREATE TABLE and DROP PARTITION return empty block - RemoteBlockInputStream stream(*connection, query, Block(), context, ¤t_settings); - NullBlockOutputStream output(Block()); + /// CREATE TABLE and DROP PARTITION queries return empty block + RemoteBlockInputStream stream{*connection, query, Block{}, context, ¤t_settings}; + NullBlockOutputStream output{Block{}}; copyData(stream, output); if (increment_and_check_exit()) From e774c48a0730e53b24f000b2a441ea7fd2b4d8ce Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Feb 2018 18:55:52 +0300 Subject: [PATCH 064/209] Update possible_silly_questions.md --- docs/en/introduction/possible_silly_questions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/introduction/possible_silly_questions.md b/docs/en/introduction/possible_silly_questions.md index 3e147a3eb3d..36363ebe247 100644 --- a/docs/en/introduction/possible_silly_questions.md +++ b/docs/en/introduction/possible_silly_questions.md @@ -2,7 +2,7 @@ ## Why not use something like MapReduce? -We can refer to systems like map-reduce as distributed computing systems in which the reduce operation is based on distributed sorting. In this sense, they include YAMR, Hadoop, and YT. +We can refer to systems like map-reduce as distributed computing systems in which the reduce operation is based on distributed sorting. In this sense, they include Hadoop and YT (Yandex proprietary technology). These systems aren't appropriate for online queries due to their high latency. In other words, they can't be used as the back-end for a web interface. These types of systems aren't useful for real-time data updates. From 850f612eeb021de8662639b09bcdd8980800a9f5 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Feb 2018 18:56:32 +0300 Subject: [PATCH 065/209] Update possible_silly_questions.md --- docs/ru/introduction/possible_silly_questions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/introduction/possible_silly_questions.md b/docs/ru/introduction/possible_silly_questions.md index c01c3be3e45..fc2eb6f24e4 100644 --- a/docs/ru/introduction/possible_silly_questions.md +++ b/docs/ru/introduction/possible_silly_questions.md @@ -2,7 +2,7 @@ ## Почему бы не использовать системы типа MapReduce? -Системами типа map-reduce будем называть системы распределённых вычислений, в которых операция reduce сделана на основе распределённой сортировки. Таким образом, к ним относятся YAMR, Hadoop, YT. +Системами типа map-reduce будем называть системы распределённых вычислений, в которых операция reduce сделана на основе распределённой сортировки. Таким образом, к ним относятся Hadoop и YT (YT является внутренней разработкой Яндекса). Такие системы не подходят для онлайн запросов в силу слишком большой latency. То есть, не могут быть использованы в качестве бэкенда для веб-интерфейса. Такие системы не подходят для обновления данных в реальном времени. From 03c3b18e961b588df3f99d58bb28aff9df302c87 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Feb 2018 20:32:06 +0300 Subject: [PATCH 066/209] Fixed error with dynamic compilation of quantile family of aggregate functions #1931 --- .../AggregateFunctionQuantile.cpp | 28 --------------- .../AggregateFunctionQuantile.h | 34 +++++++++++++++++++ 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp index 1978e15ab41..250ee422e8b 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp @@ -4,13 +4,6 @@ #include -#include -#include -#include -#include -#include -#include - #include #include @@ -55,27 +48,6 @@ AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, c #undef FOR_NUMERIC_TYPES - -struct NameQuantile { static constexpr auto name = "quantile"; }; -struct NameQuantiles { static constexpr auto name = "quantiles"; }; -struct NameQuantileDeterministic { static constexpr auto name = "quantileDeterministic"; }; -struct NameQuantilesDeterministic { static constexpr auto name = "quantilesDeterministic"; }; - -struct NameQuantileExact { static constexpr auto name = "quantileExact"; }; -struct NameQuantileExactWeighted { static constexpr auto name = "quantileExactWeighted"; }; -struct NameQuantilesExact { static constexpr auto name = "quantilesExact"; }; -struct NameQuantilesExactWeighted { static constexpr auto name = "quantilesExactWeighted"; }; - -struct NameQuantileTiming { static constexpr auto name = "quantileTiming"; }; -struct NameQuantileTimingWeighted { static constexpr auto name = "quantileTimingWeighted"; }; -struct NameQuantilesTiming { static constexpr auto name = "quantilesTiming"; }; -struct NameQuantilesTimingWeighted { static constexpr auto name = "quantilesTimingWeighted"; }; - -struct NameQuantileTDigest { static constexpr auto name = "quantileTDigest"; }; -struct NameQuantileTDigestWeighted { static constexpr auto name = "quantileTDigestWeighted"; }; -struct NameQuantilesTDigest { static constexpr auto name = "quantilesTDigest"; }; -struct NameQuantilesTDigestWeighted { static constexpr auto name = "quantilesTDigestWeighted"; }; - } void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h index 7e806f52571..26952b6f7a9 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -158,3 +158,37 @@ public: }; } + + +/// These must be exposed in header for the purpose of dynamic compilation. +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +struct NameQuantile { static constexpr auto name = "quantile"; }; +struct NameQuantiles { static constexpr auto name = "quantiles"; }; +struct NameQuantileDeterministic { static constexpr auto name = "quantileDeterministic"; }; +struct NameQuantilesDeterministic { static constexpr auto name = "quantilesDeterministic"; }; + +struct NameQuantileExact { static constexpr auto name = "quantileExact"; }; +struct NameQuantileExactWeighted { static constexpr auto name = "quantileExactWeighted"; }; +struct NameQuantilesExact { static constexpr auto name = "quantilesExact"; }; +struct NameQuantilesExactWeighted { static constexpr auto name = "quantilesExactWeighted"; }; + +struct NameQuantileTiming { static constexpr auto name = "quantileTiming"; }; +struct NameQuantileTimingWeighted { static constexpr auto name = "quantileTimingWeighted"; }; +struct NameQuantilesTiming { static constexpr auto name = "quantilesTiming"; }; +struct NameQuantilesTimingWeighted { static constexpr auto name = "quantilesTimingWeighted"; }; + +struct NameQuantileTDigest { static constexpr auto name = "quantileTDigest"; }; +struct NameQuantileTDigestWeighted { static constexpr auto name = "quantileTDigestWeighted"; }; +struct NameQuantilesTDigest { static constexpr auto name = "quantilesTDigest"; }; +struct NameQuantilesTDigestWeighted { static constexpr auto name = "quantilesTDigestWeighted"; }; + +} From dd52ea834383f40175e0309fec723af2c022e69c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Feb 2018 23:44:10 +0300 Subject: [PATCH 067/209] Fixed typos [#CLICKHOUSE-3] --- libs/libdaemon/src/BaseDaemon.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index cde615283ce..f7e04851e24 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -591,7 +591,7 @@ void BaseDaemon::buildLoggers() if (config().hasProperty("logger.log")) { createDirectory(config().getString("logger.log")); - std::cerr << "Should logs to " << config().getString("logger.log") << std::endl; + std::cerr << "Logging to " << config().getString("logger.log") << std::endl; // Set up two channel chains. Poco::AutoPtr pf = new OwnPatternFormatter(this); @@ -613,7 +613,7 @@ void BaseDaemon::buildLoggers() if (config().hasProperty("logger.errorlog")) { createDirectory(config().getString("logger.errorlog")); - std::cerr << "Should error logs to " << config().getString("logger.errorlog") << std::endl; + std::cerr << "Logging errors to " << config().getString("logger.errorlog") << std::endl; Poco::AutoPtr level = new Poco::LevelFilterChannel; level->setLevel(Message::PRIO_NOTICE); Poco::AutoPtr pf = new OwnPatternFormatter(this); From c44108d5eb7acdbf986bc005d5e6da579b90376b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 00:20:02 +0300 Subject: [PATCH 068/209] Allow to CAST Nullable type to non-Nullable as long as there are no NULL values #1947 --- dbms/src/Functions/FunctionsConversion.h | 103 +++++++++--------- ...80_cast_nullable_to_non_nullable.reference | 10 ++ .../00580_cast_nullable_to_non_nullable.sql | 1 + 3 files changed, 64 insertions(+), 50 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00580_cast_nullable_to_non_nullable.reference create mode 100644 dbms/tests/queries/0_stateless/00580_cast_nullable_to_non_nullable.sql diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 00884ca0ffb..7e1901f5a05 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NOT_IMPLEMENTED; + extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; } @@ -1231,7 +1233,7 @@ public: PreparedFunctionPtr prepare(const Block & /*sample_block*/) const override { - return std::make_shared(prepare(getArgumentTypes()[0], getReturnType().get()), name); + return std::make_shared(prepare(getArgumentTypes()[0], getReturnType()), name); } String getName() const override { return name; } @@ -1314,7 +1316,7 @@ private: throw Exception{"CAST AS Array can only be performed between same-dimensional array types or from String", ErrorCodes::TYPE_MISMATCH}; /// Prepare nested type conversion - const auto nested_function = prepare(from_nested_type, to_nested_type.get()); + const auto nested_function = prepare(from_nested_type, to_nested_type); return [nested_function, from_nested_type, to_nested_type]( Block & block, const ColumnNumbers & arguments, const size_t result) @@ -1368,7 +1370,7 @@ private: /// Create conversion wrapper for each element in tuple for (const auto & idx_type : ext::enumerate(from_type->getElements())) - element_wrappers.push_back(prepare(idx_type.second, to_element_types[idx_type.first].get())); + element_wrappers.push_back(prepare(idx_type.second, to_element_types[idx_type.first])); return [element_wrappers, from_element_types, to_element_types] (Block & block, const ColumnNumbers & arguments, const size_t result) @@ -1519,7 +1521,7 @@ private: bool result_is_nullable = false; }; - WrapperType prepare(const DataTypePtr & from_type, const IDataType * to_type) const + WrapperType prepare(const DataTypePtr & from_type, const DataTypePtr & to_type) const { /// Determine whether pre-processing and/or post-processing must take place during conversion. @@ -1527,12 +1529,11 @@ private: nullable_conversion.source_is_nullable = from_type->isNullable(); nullable_conversion.result_is_nullable = to_type->isNullable(); - /// Check that the requested conversion is allowed. - if (nullable_conversion.source_is_nullable && !nullable_conversion.result_is_nullable) - throw Exception{"Cannot convert data from a nullable type to a non-nullable type", ErrorCodes::CANNOT_CONVERT_TYPE}; - if (from_type->onlyNull()) { + if (!nullable_conversion.result_is_nullable) + throw Exception{"Cannot convert NULL to a non-nullable type", ErrorCodes::CANNOT_CONVERT_TYPE}; + return [](Block & block, const ColumnNumbers &, const size_t result) { auto & res = block.getByPosition(result); @@ -1540,28 +1541,8 @@ private: }; } - DataTypePtr from_inner_type; - const IDataType * to_inner_type; - - /// Create the requested conversion. - if (nullable_conversion.result_is_nullable) - { - if (nullable_conversion.source_is_nullable) - { - const auto & nullable_type = static_cast(*from_type); - from_inner_type = nullable_type.getNestedType(); - } - else - from_inner_type = from_type; - - const auto & nullable_type = static_cast(*to_type); - to_inner_type = nullable_type.getNestedType().get(); - } - else - { - from_inner_type = from_type; - to_inner_type = to_type; - } + DataTypePtr from_inner_type = removeNullable(from_type); + DataTypePtr to_inner_type = removeNullable(to_type); auto wrapper = prepareImpl(from_inner_type, to_inner_type); @@ -1609,51 +1590,73 @@ private: res.column = ColumnNullable::create(tmp_res.column, null_map); }; } + else if (nullable_conversion.source_is_nullable) + { + /// Conversion from Nullable to non-Nullable. + + return [wrapper] (Block & block, const ColumnNumbers & arguments, const size_t result) + { + Block tmp_block = createBlockWithNestedColumns(block, arguments, result); + + /// Check that all values are not-NULL. + + const auto & col = block.getByPosition(arguments[0]).column; + const auto & nullable_col = static_cast(*col); + const auto & null_map = nullable_col.getNullMapData(); + + if (!memoryIsZero(null_map.data(), null_map.size())) + throw Exception{"Cannot convert NULL value to non-Nullable type", + ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN}; + + wrapper(tmp_block, arguments, result); + block.getByPosition(result).column = tmp_block.getByPosition(result).column; + }; + } else return wrapper; } - WrapperType prepareImpl(const DataTypePtr & from_type, const IDataType * to_type) const + WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type) const { if (from_type->equals(*to_type)) return createIdentityWrapper(from_type); else if (checkDataType(from_type.get())) - return createNothingWrapper(to_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + return createNothingWrapper(to_type.get()); + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto to_actual_type = checkAndGetDataType(to_type)) + else if (const auto to_actual_type = checkAndGetDataType(to_type.get())) return createWrapper(from_type, to_actual_type); - else if (const auto type_fixed_string = checkAndGetDataType(to_type)) + else if (const auto type_fixed_string = checkAndGetDataType(to_type.get())) return createFixedStringWrapper(from_type, type_fixed_string->getN()); - else if (const auto type_array = checkAndGetDataType(to_type)) + else if (const auto type_array = checkAndGetDataType(to_type.get())) return createArrayWrapper(from_type, type_array); - else if (const auto type_tuple = checkAndGetDataType(to_type)) + else if (const auto type_tuple = checkAndGetDataType(to_type.get())) return createTupleWrapper(from_type, type_tuple); - else if (const auto type_enum = checkAndGetDataType(to_type)) + else if (const auto type_enum = checkAndGetDataType(to_type.get())) return createEnumWrapper(from_type, type_enum); - else if (const auto type_enum = checkAndGetDataType(to_type)) + else if (const auto type_enum = checkAndGetDataType(to_type.get())) return createEnumWrapper(from_type, type_enum); /// It's possible to use ConvertImplGenericFromString to convert from String to AggregateFunction, diff --git a/dbms/tests/queries/0_stateless/00580_cast_nullable_to_non_nullable.reference b/dbms/tests/queries/0_stateless/00580_cast_nullable_to_non_nullable.reference new file mode 100644 index 00000000000..8b1acc12b63 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00580_cast_nullable_to_non_nullable.reference @@ -0,0 +1,10 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/dbms/tests/queries/0_stateless/00580_cast_nullable_to_non_nullable.sql b/dbms/tests/queries/0_stateless/00580_cast_nullable_to_non_nullable.sql new file mode 100644 index 00000000000..c50e35d4338 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00580_cast_nullable_to_non_nullable.sql @@ -0,0 +1 @@ +SELECT CAST(number = 999999 ? NULL : number AS UInt64) FROM system.numbers LIMIT 10; From f792ab0824fd87c25f360c739e3a4554d3103570 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 00:35:17 +0300 Subject: [PATCH 069/209] Remove RemoveColumnsBlockInputStream #1947 --- .../RemoveColumnsBlockInputStream.h | 54 ------------------- dbms/src/Storages/StorageMerge.cpp | 33 +++++++++--- 2 files changed, 25 insertions(+), 62 deletions(-) delete mode 100644 dbms/src/DataStreams/RemoveColumnsBlockInputStream.h diff --git a/dbms/src/DataStreams/RemoveColumnsBlockInputStream.h b/dbms/src/DataStreams/RemoveColumnsBlockInputStream.h deleted file mode 100644 index a8b54962209..00000000000 --- a/dbms/src/DataStreams/RemoveColumnsBlockInputStream.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include - - - -namespace DB -{ - -/** Removes the specified columns from the block. - */ -class RemoveColumnsBlockInputStream : public IProfilingBlockInputStream -{ -public: - RemoveColumnsBlockInputStream( - BlockInputStreamPtr input_, - const Names & columns_to_remove_) - : columns_to_remove(columns_to_remove_) - { - children.push_back(input_); - } - - String getName() const override { return "RemoveColumns"; } - - Block getHeader() const override - { - Block res = children.back()->getHeader(); - - for (const auto & it : columns_to_remove) - if (res.has(it)) - res.erase(it); - - return res; - } - -protected: - Block readImpl() override - { - Block res = children.back()->read(); - if (!res) - return res; - - for (const auto & it : columns_to_remove) - if (res.has(it)) - res.erase(it); - - return res; - } - -private: - Names columns_to_remove; -}; - -} diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index f503606a742..d94a41e37ab 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -284,6 +283,9 @@ BlockInputStreams StorageMerge::read( if (processed_stage_in_source_tables) processed_stage = *processed_stage_in_source_tables; + if (res.empty()) + return res; + res = narrowBlockInputStreams(res, num_streams); /// Added to avoid different block structure from different sources @@ -294,18 +296,33 @@ BlockInputStreams StorageMerge::read( } else { - /// Blocks from distributed tables may have extra columns. + /// Blocks from distributed tables may have extra columns. TODO Why? /// We need to remove them to make blocks compatible. + + /// Remove columns that are in "column_names" but not in first level of SELECT query. + + Names filtered_columns = res.at(0)->getHeader().getNames(); + std::set filtered_columns_set(filtered_columns.begin(), filtered_columns.end()); + bool need_remove = false; + auto identifiers = collectIdentifiersInFirstLevelOfSelectQuery(query); std::set identifiers_set(identifiers.begin(), identifiers.end()); - Names columns_to_remove; - for (const auto & column : column_names) - if (!identifiers_set.count(column)) - columns_to_remove.push_back(column); - if (!columns_to_remove.empty()) + for (const auto & column : column_names) + { + if (filtered_columns_set.count(column) && !identifiers_set.count(column)) + { + need_remove = true; + filtered_columns_set.erase(column); + } + } + + if (need_remove) + { + filtered_columns.assign(filtered_columns_set.begin(), filtered_columns_set.end()); for (auto & stream : res) - stream = std::make_shared(stream, columns_to_remove); + stream = std::make_shared(stream, filtered_columns, true); + } } return res; From 245fd61b12f5b64d503d87aa786e639078a6c395 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 02:02:35 +0300 Subject: [PATCH 070/209] Unify NullableAdapterBlockInputStream and CastTypeBlockInputStream #1947 --- .../DataStreams/CastTypeBlockInputStream.cpp | 76 ----------- .../DataStreams/CastTypeBlockInputStream.h | 33 ----- .../ConvertingBlockInputStream.cpp | 86 ++++++++++++ .../DataStreams/ConvertingBlockInputStream.h | 44 +++++++ .../NullableAdapterBlockInputStream.cpp | 123 ------------------ .../NullableAdapterBlockInputStream.h | 57 -------- .../Interpreters/InterpreterInsertQuery.cpp | 9 +- dbms/src/Storages/StorageMerge.cpp | 14 +- 8 files changed, 139 insertions(+), 303 deletions(-) delete mode 100644 dbms/src/DataStreams/CastTypeBlockInputStream.cpp delete mode 100644 dbms/src/DataStreams/CastTypeBlockInputStream.h create mode 100644 dbms/src/DataStreams/ConvertingBlockInputStream.cpp create mode 100644 dbms/src/DataStreams/ConvertingBlockInputStream.h delete mode 100644 dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp delete mode 100644 dbms/src/DataStreams/NullableAdapterBlockInputStream.h diff --git a/dbms/src/DataStreams/CastTypeBlockInputStream.cpp b/dbms/src/DataStreams/CastTypeBlockInputStream.cpp deleted file mode 100644 index 5028799d41d..00000000000 --- a/dbms/src/DataStreams/CastTypeBlockInputStream.cpp +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include - - -namespace DB -{ - - -CastTypeBlockInputStream::CastTypeBlockInputStream( - const Context & context_, - const BlockInputStreamPtr & input, - const Block & reference_definition) - : context(context_) -{ - children.emplace_back(input); - - Block input_header = input->getHeader(); - - for (size_t col_num = 0, num_columns = input_header.columns(); col_num < num_columns; ++col_num) - { - const auto & elem = input_header.getByPosition(col_num); - - if (!reference_definition.has(elem.name)) - { - header.insert(elem); - continue; - } - - const auto & ref_column = reference_definition.getByName(elem.name); - - /// Force conversion if source and destination types is different. - if (ref_column.type->equals(*elem.type)) - { - header.insert(elem); - } - else - { - header.insert({ castColumn(elem, ref_column.type, context), ref_column.type, elem.name }); - cast_description.emplace(col_num, ref_column.type); - } - } -} - -String CastTypeBlockInputStream::getName() const -{ - return "CastType"; -} - -Block CastTypeBlockInputStream::readImpl() -{ - Block block = children.back()->read(); - - if (!block) - return block; - - if (cast_description.empty()) - return block; - - size_t num_columns = block.columns(); - Block res = block; - - for (size_t col = 0; col < num_columns; ++col) - { - auto it = cast_description.find(col); - if (cast_description.end() != it) - { - auto & elem = res.getByPosition(col); - elem.column = castColumn(elem, it->second, context); - elem.type = it->second; - } - } - - return res; -} - -} diff --git a/dbms/src/DataStreams/CastTypeBlockInputStream.h b/dbms/src/DataStreams/CastTypeBlockInputStream.h deleted file mode 100644 index f84f6dacf0e..00000000000 --- a/dbms/src/DataStreams/CastTypeBlockInputStream.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -/// Implicitly converts types. -class CastTypeBlockInputStream : public IProfilingBlockInputStream -{ -public: - CastTypeBlockInputStream(const Context & context, - const BlockInputStreamPtr & input, - const Block & reference_definition); - - String getName() const override; - - Block getHeader() const override { return header; } - -private: - Block readImpl() override; - - const Context & context; - Block header; - - /// Describes required conversions on source block - /// Contains column numbers in source block that should be converted - std::unordered_map cast_description; -}; - -} diff --git a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp new file mode 100644 index 00000000000..fcb3082fcf4 --- /dev/null +++ b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp @@ -0,0 +1,86 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int THERE_IS_NO_COLUMN; + extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE; +} + + +ConvertingBlockInputStream::ConvertingBlockInputStream( + const Context & context_, + const BlockInputStreamPtr & input, + const Block & result_header) + : context(context_), header(result_header), conversion(header.columns()) +{ + children.emplace_back(input); + + Block input_header = input->getHeader(); + size_t num_input_columns = input_header.columns(); + + for (size_t result_col_num = 0, num_result_columns = result_header.columns(); result_col_num < num_result_columns; ++result_col_num) + { + const auto & res_elem = result_header.getByPosition(result_col_num); + + if (input_header.has(res_elem.name)) + conversion[result_col_num] = input_header.getPositionByName(res_elem.name); + else if (result_col_num < num_input_columns) + conversion[result_col_num] = result_col_num; + else + throw Exception("Cannot find column " + backQuoteIfNeed(res_elem.name) + " in source stream", + ErrorCodes::THERE_IS_NO_COLUMN); + + const auto & src_elem = input_header.getByPosition(conversion[result_col_num]); + + /// Check constants. + + if (res_elem.column->isColumnConst()) + { + if (!src_elem.column->isColumnConst()) + throw Exception("Cannot convert column " + backQuoteIfNeed(res_elem.name) + + " because it is non constant in source stream but must be constant in result", + ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + else if (static_cast(*src_elem.column).getField() != static_cast(*res_elem.column).getField()) + throw Exception("Cannot convert column " + backQuoteIfNeed(res_elem.name) + + " because it is constant but values of constants are different in source and result", + ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + } + + /// Check conversion by dry run CAST function. + + castColumn(src_elem, res_elem.type, context); + } +} + + +Block ConvertingBlockInputStream::readImpl() +{ + Block src = children.back()->read(); + + if (!src) + return src; + + Block res = header.cloneEmpty(); + for (size_t res_pos = 0, size = conversion.size(); res_pos < size; ++res_pos) + { + const auto & src_elem = src.getByPosition(conversion[res_pos]); + auto & res_elem = res.getByPosition(res_pos); + + ColumnPtr converted = castColumn(src_elem, res_elem.type, context); + + if (src_elem.column->isColumnConst() && !res_elem.column->isColumnConst()) + converted = converted->convertToFullColumnIfConst(); + + res_elem.column = std::move(converted); + } + return res; +} + +} diff --git a/dbms/src/DataStreams/ConvertingBlockInputStream.h b/dbms/src/DataStreams/ConvertingBlockInputStream.h new file mode 100644 index 00000000000..758f57ab833 --- /dev/null +++ b/dbms/src/DataStreams/ConvertingBlockInputStream.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/** Convert one block structure to another: + * + * Leaves only necessary columns; + * + * Columns are searched in source first by name; + * and if there is no column with same name, then by position. + * + * Converting types of matching columns (with CAST function). + * + * Materializing columns which are const in source and non-const in result, + * throw if they are const in result and non const in source, + * or if they are const and have different values. + */ +class ConvertingBlockInputStream : public IProfilingBlockInputStream +{ +public: + ConvertingBlockInputStream(const Context & context, + const BlockInputStreamPtr & input, + const Block & result_header); + + String getName() const override { return "Converting"; } + Block getHeader() const override { return header; } + +private: + Block readImpl() override; + + const Context & context; + Block header; + + /// How to construct result block. Position in source block, where to get each column. + using Conversion = std::vector; + Conversion conversion; +}; + +} diff --git a/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp b/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp deleted file mode 100644 index d7d23633b72..00000000000 --- a/dbms/src/DataStreams/NullableAdapterBlockInputStream.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; - extern const int TYPE_MISMATCH; - extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; -} - - -static Block transform(const Block & block, const NullableAdapterBlockInputStream::Actions & actions, const std::vector> & rename) -{ - size_t num_columns = block.columns(); - - Block res; - for (size_t i = 0; i < num_columns; ++i) - { - const auto & elem = block.getByPosition(i); - - switch (actions[i]) - { - case NullableAdapterBlockInputStream::TO_ORDINARY: - { - const auto & nullable_col = static_cast(*elem.column); - const auto & nullable_type = static_cast(*elem.type); - - const auto & null_map = nullable_col.getNullMapData(); - bool has_nulls = !memoryIsZero(null_map.data(), null_map.size()); - - if (has_nulls) - throw Exception{"Cannot insert NULL value into non-nullable column", - ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN}; - else - res.insert({ - nullable_col.getNestedColumnPtr(), - nullable_type.getNestedType(), - rename[i].value_or(elem.name)}); - break; - } - case NullableAdapterBlockInputStream::TO_NULLABLE: - { - ColumnPtr null_map = ColumnUInt8::create(elem.column->size(), 0); - - res.insert({ - ColumnNullable::create(elem.column, null_map), - std::make_shared(elem.type), - rename[i].value_or(elem.name)}); - break; - } - case NullableAdapterBlockInputStream::NONE: - { - res.insert({elem.column, elem.type, rename[i].value_or(elem.name)}); - break; - } - } - } - - return res; -} - - -NullableAdapterBlockInputStream::NullableAdapterBlockInputStream( - const BlockInputStreamPtr & input, - const Block & src_header, const Block & res_header) -{ - buildActions(src_header, res_header); - children.push_back(input); - header = transform(src_header, actions, rename); -} - - -Block NullableAdapterBlockInputStream::readImpl() -{ - Block block = children.back()->read(); - - if (!block) - return block; - - return transform(block, actions, rename); -} - -void NullableAdapterBlockInputStream::buildActions( - const Block & src_header, - const Block & res_header) -{ - size_t in_size = src_header.columns(); - - if (res_header.columns() != in_size) - throw Exception("Number of columns in INSERT SELECT doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); - - actions.reserve(in_size); - rename.reserve(in_size); - - for (size_t i = 0; i < in_size; ++i) - { - const auto & in_elem = src_header.getByPosition(i); - const auto & out_elem = res_header.getByPosition(i); - - bool is_in_nullable = in_elem.type->isNullable(); - bool is_out_nullable = out_elem.type->isNullable(); - - if (is_in_nullable && !is_out_nullable) - actions.push_back(TO_ORDINARY); - else if (!is_in_nullable && is_out_nullable) - actions.push_back(TO_NULLABLE); - else - actions.push_back(NONE); - - if (in_elem.name != out_elem.name) - rename.emplace_back(std::make_optional(out_elem.name)); - else - rename.emplace_back(); - } -} - -} diff --git a/dbms/src/DataStreams/NullableAdapterBlockInputStream.h b/dbms/src/DataStreams/NullableAdapterBlockInputStream.h deleted file mode 100644 index 60c2b2ec16e..00000000000 --- a/dbms/src/DataStreams/NullableAdapterBlockInputStream.h +++ /dev/null @@ -1,57 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -/// This stream allows perfoming INSERT requests in which the types of -/// the target and source blocks are compatible up to nullability: -/// -/// - if a target column is nullable while the corresponding source -/// column is not, we embed the source column into a nullable column; -/// - if a source column is nullable while the corresponding target -/// column is not, we extract the nested column from the source -/// while checking that is doesn't actually contain NULLs; -/// - otherwise we just perform an identity mapping. -class NullableAdapterBlockInputStream : public IProfilingBlockInputStream -{ -public: - NullableAdapterBlockInputStream(const BlockInputStreamPtr & input, const Block & src_header, const Block & res_header); - - String getName() const override { return "NullableAdapter"; } - - Block getHeader() const override { return header; } - - - /// Given a column of a block we have just read, - /// how must we process it? - enum Action - { - /// Do nothing. - NONE = 0, - /// Convert nullable column to ordinary column. - TO_ORDINARY, - /// Convert non-nullable column to nullable column. - TO_NULLABLE - }; - - /// Actions to be taken for each column of a block. - using Actions = std::vector; - -private: - Block readImpl() override; - - /// Determine the actions to be taken using the source sample block, - /// which describes the columns from which we fetch data inside an INSERT - /// query, and the target sample block which contains the columns - /// we insert data into. - void buildActions(const Block & src_header, const Block & res_header); - - Block header; - Actions actions; - std::vector> rename; -}; - -} diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 60cd5ea70cb..1a273d32e5c 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -3,11 +3,9 @@ #include #include -#include #include -#include +#include #include -#include #include #include #include @@ -104,8 +102,6 @@ BlockIO InterpreterInsertQuery::execute() out = std::make_shared(query.database, query.table, table, context, query_ptr, query.no_destination); - out = std::make_shared(out, table->getSampleBlock()); - out = std::make_shared( out, getSampleBlock(query, table), required_columns, table->column_defaults, context, static_cast(context.getSettingsRef().strict_insert_defaults)); @@ -127,8 +123,7 @@ BlockIO InterpreterInsertQuery::execute() res.in = interpreter_select.execute().in; - res.in = std::make_shared(res.in, res.in->getHeader(), res.out->getHeader()); - res.in = std::make_shared(context, res.in, res.out->getHeader()); + res.in = std::make_shared(context, res.in, res.out->getHeader()); res.in = std::make_shared(res.in, res.out); res.out = nullptr; diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index d94a41e37ab..432afab093e 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -2,23 +2,23 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include -#include #include #include +#include +#include #include #include #include #include -#include -#include -#include -#include namespace DB @@ -230,7 +230,7 @@ BlockInputStreams StorageMerge::read( for (auto & stream : source_streams) { /// will throw if some columns not convertible - stream = std::make_shared(context, stream, header); + stream = std::make_shared(context, stream, header); } } else @@ -262,7 +262,7 @@ BlockInputStreams StorageMerge::read( if (!streams.empty()) { /// will throw if some columns not convertible - stream = std::make_shared(context, stream, header); + stream = std::make_shared(context, stream, header); } return stream; })); From 640daaf052b7cf312f40a04c8964db06cb4b4600 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 04:00:47 +0300 Subject: [PATCH 071/209] Fixed test #1947 --- .../ConvertingBlockInputStream.cpp | 34 +++- .../DataStreams/ConvertingBlockInputStream.h | 16 +- .../Interpreters/InterpreterInsertQuery.cpp | 2 +- dbms/src/Storages/StorageMerge.cpp | 179 +++++++----------- .../0_stateless/00550_join_insert_select.sh | 2 +- 5 files changed, 111 insertions(+), 122 deletions(-) diff --git a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp index fcb3082fcf4..8313f5820e5 100644 --- a/dbms/src/DataStreams/ConvertingBlockInputStream.cpp +++ b/dbms/src/DataStreams/ConvertingBlockInputStream.cpp @@ -11,31 +11,45 @@ namespace ErrorCodes { extern const int THERE_IS_NO_COLUMN; extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE; + extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; } ConvertingBlockInputStream::ConvertingBlockInputStream( const Context & context_, const BlockInputStreamPtr & input, - const Block & result_header) + const Block & result_header, + MatchColumnsMode mode) : context(context_), header(result_header), conversion(header.columns()) { children.emplace_back(input); Block input_header = input->getHeader(); - size_t num_input_columns = input_header.columns(); - for (size_t result_col_num = 0, num_result_columns = result_header.columns(); result_col_num < num_result_columns; ++result_col_num) + size_t num_input_columns = input_header.columns(); + size_t num_result_columns = result_header.columns(); + + if (mode == MatchColumnsMode::Position && num_input_columns != num_result_columns) + throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); + + for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num) { const auto & res_elem = result_header.getByPosition(result_col_num); - if (input_header.has(res_elem.name)) - conversion[result_col_num] = input_header.getPositionByName(res_elem.name); - else if (result_col_num < num_input_columns) - conversion[result_col_num] = result_col_num; - else - throw Exception("Cannot find column " + backQuoteIfNeed(res_elem.name) + " in source stream", - ErrorCodes::THERE_IS_NO_COLUMN); + switch (mode) + { + case MatchColumnsMode::Position: + conversion[result_col_num] = result_col_num; + break; + + case MatchColumnsMode::Name: + if (input_header.has(res_elem.name)) + conversion[result_col_num] = input_header.getPositionByName(res_elem.name); + else + throw Exception("Cannot find column " + backQuoteIfNeed(res_elem.name) + " in source stream", + ErrorCodes::THERE_IS_NO_COLUMN); + break; + } const auto & src_elem = input_header.getByPosition(conversion[result_col_num]); diff --git a/dbms/src/DataStreams/ConvertingBlockInputStream.h b/dbms/src/DataStreams/ConvertingBlockInputStream.h index 758f57ab833..e4511477a72 100644 --- a/dbms/src/DataStreams/ConvertingBlockInputStream.h +++ b/dbms/src/DataStreams/ConvertingBlockInputStream.h @@ -23,9 +23,19 @@ namespace DB class ConvertingBlockInputStream : public IProfilingBlockInputStream { public: - ConvertingBlockInputStream(const Context & context, - const BlockInputStreamPtr & input, - const Block & result_header); + enum class MatchColumnsMode + { + /// Require same number of columns in source and result. Match columns by corresponding positions, regardless to names. + Position, + /// Find columns in source by their names. Allow excessive columns in source. + Name + }; + + ConvertingBlockInputStream( + const Context & context, + const BlockInputStreamPtr & input, + const Block & result_header, + MatchColumnsMode mode); String getName() const override { return "Converting"; } Block getHeader() const override { return header; } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 1a273d32e5c..a01b42761da 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -123,7 +123,7 @@ BlockIO InterpreterInsertQuery::execute() res.in = interpreter_select.execute().in; - res.in = std::make_shared(context, res.in, res.out->getHeader()); + res.in = std::make_shared(context, res.in, res.out->getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Position); res.in = std::make_shared(res.in, res.out); res.out = nullptr; diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 432afab093e..8ba59263f47 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -3,7 +3,9 @@ #include #include #include -#include +#include +#include +#include #include #include #include @@ -11,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +50,22 @@ StorageMerge::StorageMerge( { } + +NameAndTypePair StorageMerge::getColumn(const String & column_name) const +{ + auto type = VirtualColumnFactory::tryGetType(column_name); + if (type) + return NameAndTypePair(column_name, type); + + return IStorage::getColumn(column_name); +} + +bool StorageMerge::hasColumn(const String & column_name) const +{ + return VirtualColumnFactory::hasColumn(column_name) || IStorage::hasColumn(column_name); +} + + bool StorageMerge::isRemote() const { auto database = context.getDatabase(source_database); @@ -67,38 +86,6 @@ bool StorageMerge::isRemote() const return false; } -NameAndTypePair StorageMerge::getColumn(const String & column_name) const -{ - auto type = VirtualColumnFactory::tryGetType(column_name); - if (type) - return NameAndTypePair(column_name, type); - - return IStorage::getColumn(column_name); -} - -bool StorageMerge::hasColumn(const String & column_name) const -{ - return VirtualColumnFactory::hasColumn(column_name) || IStorage::hasColumn(column_name); -} - -static Names collectIdentifiersInFirstLevelOfSelectQuery(ASTPtr ast) -{ - ASTSelectQuery & select = typeid_cast(*ast); - ASTExpressionList & node = typeid_cast(*select.select_expression_list); - ASTs & asts = node.children; - - Names names; - for (size_t i = 0; i < asts.size(); ++i) - { - if (const ASTIdentifier * identifier = typeid_cast(&* asts[i])) - { - if (identifier->kind == ASTIdentifier::Kind::Column) - names.push_back(identifier->name); - } - } - return names; -} - namespace { @@ -137,12 +124,19 @@ BlockInputStreams StorageMerge::read( { BlockInputStreams res; - Names virt_column_names, real_column_names; - for (const auto & it : column_names) - if (it != "_table") - real_column_names.push_back(it); + bool has_table_virtual_column = false; + Names real_column_names; + real_column_names.reserve(column_names.size()); + + for (const auto & name : column_names) + { + if (name == "_table") + { + has_table_virtual_column = true; + } else - virt_column_names.push_back(it); + real_column_names.push_back(name); + } std::optional processed_stage_in_source_tables; @@ -161,8 +155,8 @@ BlockInputStreams StorageMerge::read( Block virtual_columns_block = getBlockWithVirtualColumns(selected_tables); - /// If at least one virtual column is requested, try indexing - if (!virt_column_names.empty()) + /// If _table column is requested, try filtering + if (has_table_virtual_column) { VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, context); auto values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_table"); @@ -177,7 +171,20 @@ BlockInputStreams StorageMerge::read( Context modified_context = context; modified_context.getSettingsRef().optimize_move_to_prewhere = false; - Block header = getSampleBlockForColumns(real_column_names); + /// What will be result structure depending on query processed stage in source tables? + std::map headers; + + headers[QueryProcessingStage::FetchColumns] = getSampleBlockForColumns(column_names); + + headers[QueryProcessingStage::WithMergeableState] + = materializeBlock(InterpreterSelectQuery(query_info.query, context, QueryProcessingStage::WithMergeableState, 0, + std::make_shared(headers[QueryProcessingStage::FetchColumns])).execute().in->getHeader()); + + headers[QueryProcessingStage::Complete] + = materializeBlock(InterpreterSelectQuery(query_info.query, context, QueryProcessingStage::Complete, 0, + std::make_shared(headers[QueryProcessingStage::FetchColumns])).execute().in->getHeader()); + + Block header; size_t tables_count = selected_tables.size(); @@ -219,24 +226,26 @@ BlockInputStreams StorageMerge::read( throw Exception("Source tables for Merge table are processing data up to different stages", ErrorCodes::INCOMPATIBLE_SOURCE_TABLES); - /// The table may return excessive columns if we query only its virtual column. - /// We filter excessive columns. This is done only if query was not processed more than FetchColumns. - if (processed_stage_in_source_table == QueryProcessingStage::FetchColumns) + if (!header) + header = headers[processed_stage_in_source_table]; + + if (has_table_virtual_column) for (auto & stream : source_streams) - stream = std::make_shared(stream, real_column_names, true); + stream = std::make_shared>( + stream, std::make_shared(), table->getTableName(), "_table"); /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. for (auto & stream : source_streams) - { - /// will throw if some columns not convertible - stream = std::make_shared(context, stream, header); - } + stream = std::make_shared(context, stream, header, ConvertingBlockInputStream::MatchColumnsMode::Name); } else { + if (!processed_stage_in_source_tables) + throw Exception("Logical error: unknown processed stage in source tables", ErrorCodes::LOGICAL_ERROR); + /// If many streams, initialize it lazily, to avoid long delay before start of query processing. - source_streams.emplace_back(std::make_shared(header, [=] + source_streams.emplace_back(std::make_shared(header, [=]() -> BlockInputStreamPtr { QueryProcessingStage::Enum processed_stage_in_source_table = processed_stage; BlockInputStreams streams = table->read( @@ -247,36 +256,30 @@ BlockInputStreams StorageMerge::read( max_block_size, 1); - if (!processed_stage_in_source_tables) - throw Exception("Logical error: unknown processed stage in source tables", - ErrorCodes::LOGICAL_ERROR); - else if (processed_stage_in_source_table != *processed_stage_in_source_tables) + if (processed_stage_in_source_table != *processed_stage_in_source_tables) throw Exception("Source tables for Merge table are processing data up to different stages", ErrorCodes::INCOMPATIBLE_SOURCE_TABLES); - if (processed_stage_in_source_table == QueryProcessingStage::FetchColumns) - for (auto & stream : streams) - stream = std::make_shared(stream, real_column_names, true); - - auto stream = streams.empty() ? std::make_shared(header) : streams.front(); - if (!streams.empty()) + if (streams.empty()) { - /// will throw if some columns not convertible - stream = std::make_shared(context, stream, header); + return std::make_shared(header); + } + else + { + BlockInputStreamPtr stream = streams.size() > 1 ? std::make_shared(streams) : streams[0]; + + if (has_table_virtual_column) + stream = std::make_shared>( + stream, std::make_shared(), table->getTableName(), "_table"); + + return std::make_shared(context, stream, header, ConvertingBlockInputStream::MatchColumnsMode::Name); } - return stream; })); } for (auto & stream : source_streams) stream->addTableLock(table_lock); - for (auto & virtual_column : virt_column_names) - if (virtual_column == "_table") - for (auto & stream : source_streams) - stream = std::make_shared>( - stream, std::make_shared(), table->getTableName(), "_table"); - res.insert(res.end(), source_streams.begin(), source_streams.end()); } @@ -287,44 +290,6 @@ BlockInputStreams StorageMerge::read( return res; res = narrowBlockInputStreams(res, num_streams); - - /// Added to avoid different block structure from different sources - if (!processed_stage_in_source_tables || *processed_stage_in_source_tables == QueryProcessingStage::FetchColumns) - { - for (auto & stream : res) - stream = std::make_shared(stream, column_names, true); - } - else - { - /// Blocks from distributed tables may have extra columns. TODO Why? - /// We need to remove them to make blocks compatible. - - /// Remove columns that are in "column_names" but not in first level of SELECT query. - - Names filtered_columns = res.at(0)->getHeader().getNames(); - std::set filtered_columns_set(filtered_columns.begin(), filtered_columns.end()); - bool need_remove = false; - - auto identifiers = collectIdentifiersInFirstLevelOfSelectQuery(query); - std::set identifiers_set(identifiers.begin(), identifiers.end()); - - for (const auto & column : column_names) - { - if (filtered_columns_set.count(column) && !identifiers_set.count(column)) - { - need_remove = true; - filtered_columns_set.erase(column); - } - } - - if (need_remove) - { - filtered_columns.assign(filtered_columns_set.begin(), filtered_columns_set.end()); - for (auto & stream : res) - stream = std::make_shared(stream, filtered_columns, true); - } - } - return res; } diff --git a/dbms/tests/queries/0_stateless/00550_join_insert_select.sh b/dbms/tests/queries/0_stateless/00550_join_insert_select.sh index 6cc71fedc06..3e78942f989 100755 --- a/dbms/tests/queries/0_stateless/00550_join_insert_select.sh +++ b/dbms/tests/queries/0_stateless/00550_join_insert_select.sh @@ -20,6 +20,6 @@ INSERT INTO test.test1 SELECT id, name FROM test.test2 ANY LEFT OUTER JOIN test. DROP TABLE test.test1; DROP TABLE test.test2; DROP TABLE test.test3; -" 2>&1 | grep -F "Number of columns in INSERT SELECT doesn't match" | wc -l +" 2>&1 | grep -F "Number of columns doesn't match" | wc -l $CLICKHOUSE_CLIENT --query="SELECT 1"; From ce1f9d7feb61b1f9cee10e4fa9193828588298fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 08:59:12 +0300 Subject: [PATCH 072/209] Fixed error #1947 --- dbms/src/Storages/StorageMerge.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 8ba59263f47..6193ffce779 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -172,18 +172,6 @@ BlockInputStreams StorageMerge::read( modified_context.getSettingsRef().optimize_move_to_prewhere = false; /// What will be result structure depending on query processed stage in source tables? - std::map headers; - - headers[QueryProcessingStage::FetchColumns] = getSampleBlockForColumns(column_names); - - headers[QueryProcessingStage::WithMergeableState] - = materializeBlock(InterpreterSelectQuery(query_info.query, context, QueryProcessingStage::WithMergeableState, 0, - std::make_shared(headers[QueryProcessingStage::FetchColumns])).execute().in->getHeader()); - - headers[QueryProcessingStage::Complete] - = materializeBlock(InterpreterSelectQuery(query_info.query, context, QueryProcessingStage::Complete, 0, - std::make_shared(headers[QueryProcessingStage::FetchColumns])).execute().in->getHeader()); - Block header; size_t tables_count = selected_tables.size(); @@ -227,7 +215,22 @@ BlockInputStreams StorageMerge::read( ErrorCodes::INCOMPATIBLE_SOURCE_TABLES); if (!header) - header = headers[processed_stage_in_source_table]; + { + switch (processed_stage_in_source_table) + { + case QueryProcessingStage::FetchColumns: + header = getSampleBlockForColumns(column_names); + break; + case QueryProcessingStage::WithMergeableState: + header = materializeBlock(InterpreterSelectQuery(query_info.query, context, QueryProcessingStage::WithMergeableState, 0, + std::make_shared(getSampleBlockForColumns(column_names))).execute().in->getHeader()); + break; + case QueryProcessingStage::Complete: + header = materializeBlock(InterpreterSelectQuery(query_info.query, context, QueryProcessingStage::Complete, 0, + std::make_shared(getSampleBlockForColumns(column_names))).execute().in->getHeader()); + break; + } + } if (has_table_virtual_column) for (auto & stream : source_streams) From 7063e0cffe87b1a445bfa696aab98af9de63bbb9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 09:00:03 +0300 Subject: [PATCH 073/209] Removed unused method #1947 --- dbms/src/Interpreters/InterpreterSelectQuery.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 14416e5fd46..857b8c31bd6 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -150,10 +150,6 @@ private: */ void getDatabaseAndTableNames(String & database_name, String & table_name); - /** Select from the list of columns any, better - with minimum size. - */ - String getAnyColumn(); - /// Different stages of query execution. /// Fetch data from the table. Returns the stage to which the query was processed in Storage. From 1f029c3f15f27c4a0f8590c4988bbf965e594ab2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 09:00:48 +0300 Subject: [PATCH 074/209] Preparations #1947 --- .../Interpreters/InterpreterSelectQuery.cpp | 234 +++++++++--------- .../src/Interpreters/InterpreterSelectQuery.h | 31 +++ 2 files changed, 147 insertions(+), 118 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index da60ef7ada4..07ba3675546 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -186,6 +186,8 @@ void InterpreterSelectQuery::basicInit() } table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); + + /// TODO This looks weird. source_header = storage->getSampleBlockNonMaterialized(); } } @@ -402,6 +404,87 @@ void InterpreterSelectQuery::executeWithoutUnionImpl(Pipeline & pipeline, const executeSingleQuery(pipeline); } + +InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpressions(QueryProcessingStage::Enum from_stage) +{ + AnalysisResult res; + + /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing. + res.first_stage = from_stage < QueryProcessingStage::WithMergeableState + && to_stage >= QueryProcessingStage::WithMergeableState; + /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing. + res.second_stage = from_stage <= QueryProcessingStage::WithMergeableState + && to_stage > QueryProcessingStage::WithMergeableState; + + /** First we compose a chain of actions and remember the necessary steps from it. + * Regardless of from_stage and to_stage, we will compose a complete sequence of actions to perform optimization and + * throw out unnecessary columns based on the entire query. In unnecessary parts of the query, we will not execute subqueries. + */ + + { + ExpressionActionsChain chain; + + res.need_aggregate = query_analyzer->hasAggregation(); + + query_analyzer->appendArrayJoin(chain, !res.first_stage); + + if (query_analyzer->appendJoin(chain, !res.first_stage)) + { + res.has_join = true; + res.before_join = chain.getLastActions(); + chain.addStep(); + } + + if (query_analyzer->appendWhere(chain, !res.first_stage)) + { + res.has_where = true; + res.before_where = chain.getLastActions(); + chain.addStep(); + } + + if (res.need_aggregate) + { + query_analyzer->appendGroupBy(chain, !res.first_stage); + query_analyzer->appendAggregateFunctionsArguments(chain, !res.first_stage); + res.before_aggregation = chain.getLastActions(); + + chain.finalize(); + chain.clear(); + + if (query_analyzer->appendHaving(chain, !res.second_stage)) + { + res.has_having = true; + res.before_having = chain.getLastActions(); + chain.addStep(); + } + } + + /// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers. + query_analyzer->appendSelect(chain, res.need_aggregate ? !res.second_stage : !res.first_stage); + res.selected_columns = chain.getLastStep().required_output; + res.has_order_by = query_analyzer->appendOrderBy(chain, res.need_aggregate ? !res.second_stage : !res.first_stage); + res.before_order_and_select = chain.getLastActions(); + chain.addStep(); + + query_analyzer->appendProjectResult(chain); + res.final_projection = chain.getLastActions(); + + chain.finalize(); + chain.clear(); + } + + /// Before executing WHERE and HAVING, remove the extra columns from the block (mostly the aggregation keys). + if (res.has_where) + res.before_where->prependProjectInput(); + if (res.has_having) + res.before_having->prependProjectInput(); + + res.subqueries_for_sets = query_analyzer->getSubqueriesForSets(); + + return res; +} + + void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) { /** Streams of data. When the query is executed in parallel, we have several data streams. @@ -423,101 +506,17 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(to_stage)); + AnalysisResult expressions = analyzeExpressions(from_stage); + const Settings & settings = context.getSettingsRef(); if (to_stage > QueryProcessingStage::FetchColumns) { - bool has_join = false; - bool has_where = false; - bool need_aggregate = false; - bool has_having = false; - bool has_order_by = false; - - ExpressionActionsPtr before_join; /// including JOIN - ExpressionActionsPtr before_where; - ExpressionActionsPtr before_aggregation; - ExpressionActionsPtr before_having; - ExpressionActionsPtr before_order_and_select; - ExpressionActionsPtr final_projection; - - /// Columns from the SELECT list, before renaming them to aliases. - Names selected_columns; - - /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing. - bool first_stage = from_stage < QueryProcessingStage::WithMergeableState - && to_stage >= QueryProcessingStage::WithMergeableState; - /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing. - bool second_stage = from_stage <= QueryProcessingStage::WithMergeableState - && to_stage > QueryProcessingStage::WithMergeableState; - - /** First we compose a chain of actions and remember the necessary steps from it. - * Regardless of from_stage and to_stage, we will compose a complete sequence of actions to perform optimization and - * throw out unnecessary columns based on the entire query. In unnecessary parts of the query, we will not execute subqueries. - */ - - { - ExpressionActionsChain chain; - - need_aggregate = query_analyzer->hasAggregation(); - - query_analyzer->appendArrayJoin(chain, !first_stage); - - if (query_analyzer->appendJoin(chain, !first_stage)) - { - has_join = true; - before_join = chain.getLastActions(); - chain.addStep(); - } - - if (query_analyzer->appendWhere(chain, !first_stage)) - { - has_where = true; - before_where = chain.getLastActions(); - chain.addStep(); - } - - if (need_aggregate) - { - query_analyzer->appendGroupBy(chain, !first_stage); - query_analyzer->appendAggregateFunctionsArguments(chain, !first_stage); - before_aggregation = chain.getLastActions(); - - chain.finalize(); - chain.clear(); - - if (query_analyzer->appendHaving(chain, !second_stage)) - { - has_having = true; - before_having = chain.getLastActions(); - chain.addStep(); - } - } - - /// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers. - query_analyzer->appendSelect(chain, need_aggregate ? !second_stage : !first_stage); - selected_columns = chain.getLastStep().required_output; - has_order_by = query_analyzer->appendOrderBy(chain, need_aggregate ? !second_stage : !first_stage); - before_order_and_select = chain.getLastActions(); - chain.addStep(); - - query_analyzer->appendProjectResult(chain); - final_projection = chain.getLastActions(); - - chain.finalize(); - chain.clear(); - } - - /// Before executing WHERE and HAVING, remove the extra columns from the block (mostly the aggregation keys). - if (has_where) - before_where->prependProjectInput(); - if (has_having) - before_having->prependProjectInput(); - /// Now we will compose block streams that perform the necessary actions. /// Do I need to aggregate in a separate row rows that have not passed max_rows_to_group_by. bool aggregate_overflow_row = - need_aggregate && + expressions.need_aggregate && query.group_by_with_totals && settings.limits.max_rows_to_group_by && settings.limits.group_by_overflow_mode == OverflowMode::ANY && @@ -525,32 +524,32 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) /// Do I need to immediately finalize the aggregate functions after the aggregation? bool aggregate_final = - need_aggregate && + expressions.need_aggregate && to_stage > QueryProcessingStage::WithMergeableState && !query.group_by_with_totals; - if (first_stage) + if (expressions.first_stage) { - if (has_join) + if (expressions.has_join) { const ASTTableJoin & join = static_cast(*query.join()->table_join); if (join.kind == ASTTableJoin::Kind::Full || join.kind == ASTTableJoin::Kind::Right) - pipeline.stream_with_non_joined_data = before_join->createStreamWithNonJoinedDataIfFullOrRightJoin( + pipeline.stream_with_non_joined_data = expressions.before_join->createStreamWithNonJoinedDataIfFullOrRightJoin( pipeline.firstStream()->getHeader(), settings.max_block_size); for (auto & stream : pipeline.streams) /// Applies to all sources except stream_with_non_joined_data. - stream = std::make_shared(stream, before_join); + stream = std::make_shared(stream, expressions.before_join); } - if (has_where) - executeWhere(pipeline, before_where); + if (expressions.has_where) + executeWhere(pipeline, expressions.before_where); - if (need_aggregate) - executeAggregation(pipeline, before_aggregation, aggregate_overflow_row, aggregate_final); + if (expressions.need_aggregate) + executeAggregation(pipeline, expressions.before_aggregation, aggregate_overflow_row, aggregate_final); else { - executeExpression(pipeline, before_order_and_select); - executeDistinct(pipeline, true, selected_columns); + executeExpression(pipeline, expressions.before_order_and_select); + executeDistinct(pipeline, true, expressions.selected_columns); } /** For distributed query processing, @@ -558,36 +557,36 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) * but there is an ORDER or LIMIT, * then we will perform the preliminary sorting and LIMIT on the remote server. */ - if (!second_stage && !need_aggregate && !has_having) + if (!expressions.second_stage && !expressions.need_aggregate && !expressions.has_having) { - if (has_order_by) + if (expressions.has_order_by) executeOrder(pipeline); - if (has_order_by && query.limit_length) - executeDistinct(pipeline, false, selected_columns); + if (expressions.has_order_by && query.limit_length) + executeDistinct(pipeline, false, expressions.selected_columns); if (query.limit_length) executePreLimit(pipeline); } } - if (second_stage) + if (expressions.second_stage) { bool need_second_distinct_pass; - if (need_aggregate) + if (expressions.need_aggregate) { /// If you need to combine aggregated results from multiple servers - if (!first_stage) + if (!expressions.first_stage) executeMergeAggregated(pipeline, aggregate_overflow_row, aggregate_final); if (!aggregate_final) - executeTotalsAndHaving(pipeline, has_having, before_having, aggregate_overflow_row); - else if (has_having) - executeHaving(pipeline, before_having); + executeTotalsAndHaving(pipeline, expressions.has_having, expressions.before_having, aggregate_overflow_row); + else if (expressions.has_having) + executeHaving(pipeline, expressions.before_having); - executeExpression(pipeline, before_order_and_select); - executeDistinct(pipeline, true, selected_columns); + executeExpression(pipeline, expressions.before_order_and_select); + executeDistinct(pipeline, true, expressions.selected_columns); need_second_distinct_pass = query.distinct && pipeline.hasMoreThanOneStream(); } @@ -599,19 +598,19 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) executeTotalsAndHaving(pipeline, false, nullptr, aggregate_overflow_row); } - if (has_order_by) + if (expressions.has_order_by) { /** If there is an ORDER BY for distributed query processing, * but there is no aggregation, then on the remote servers ORDER BY was made * - therefore, we merge the sorted streams from remote servers. */ - if (!first_stage && !need_aggregate && !(query.group_by_with_totals && !aggregate_final)) + if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final)) executeMergeSorted(pipeline); else /// Otherwise, just sort. executeOrder(pipeline); } - executeProjection(pipeline, final_projection); + executeProjection(pipeline, expressions.final_projection); /// At this stage, we can calculate the minimums and maximums, if necessary. if (settings.extremes) @@ -624,8 +623,8 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) } /** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT, - * limiting the number of entries in each up to `offset + limit`. - */ + * limiting the number of rows in each up to `offset + limit`. + */ if (query.limit_length && pipeline.hasMoreThanOneStream() && !query.distinct && !query.limit_by_expression_list) executePreLimit(pipeline); @@ -653,9 +652,8 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) } } - SubqueriesForSets subqueries_for_sets = query_analyzer->getSubqueriesForSets(); - if (!subqueries_for_sets.empty()) - executeSubqueriesInSetsAndJoins(pipeline, subqueries_for_sets); + if (!expressions.subqueries_for_sets.empty()) + executeSubqueriesInSetsAndJoins(pipeline, expressions.subqueries_for_sets); } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 857b8c31bd6..aa87c048af0 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -128,6 +129,36 @@ private: /// Execute one SELECT query from the UNION ALL chain. void executeSingleQuery(Pipeline & pipeline); + + struct AnalysisResult + { + bool has_join = false; + bool has_where = false; + bool need_aggregate = false; + bool has_having = false; + bool has_order_by = false; + + ExpressionActionsPtr before_join; /// including JOIN + ExpressionActionsPtr before_where; + ExpressionActionsPtr before_aggregation; + ExpressionActionsPtr before_having; + ExpressionActionsPtr before_order_and_select; + ExpressionActionsPtr final_projection; + + /// Columns from the SELECT list, before renaming them to aliases. + Names selected_columns; + + /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing. + bool first_stage = false; + /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing. + bool second_stage = false; + + SubqueriesForSets subqueries_for_sets; + }; + + AnalysisResult analyzeExpressions(QueryProcessingStage::Enum from_stage); + + /** Leave only the necessary columns of the SELECT section in each query of the UNION ALL chain. * However, if you use at least one DISTINCT in the chain, then all the columns are considered necessary, * since otherwise DISTINCT would work differently. From 0207760c0516e144c32a5ee8a41215747a169994 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 09:37:53 +0300 Subject: [PATCH 075/209] Limit and quota on result must not apply for intermediate data in subqueries and INSERT SELECT [#CLICKHOUSE-3351] --- dbms/src/Interpreters/InterpreterInsertQuery.cpp | 3 ++- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 2 +- dbms/src/Interpreters/InterpreterSelectQuery.h | 3 ++- ...581_limit_on_result_and_subquery_and_insert.reference | 3 +++ .../00581_limit_on_result_and_subquery_and_insert.sql | 9 +++++++++ 5 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00581_limit_on_result_and_subquery_and_insert.reference create mode 100644 dbms/tests/queries/0_stateless/00581_limit_on_result_and_subquery_and_insert.sql diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index a01b42761da..259948b6c31 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -119,7 +119,8 @@ BlockIO InterpreterInsertQuery::execute() /// What type of query: INSERT or INSERT SELECT? if (query.select) { - InterpreterSelectQuery interpreter_select{query.select, context}; + /// Passing 1 as subquery_depth will disable limiting size of intermediate result. + InterpreterSelectQuery interpreter_select{query.select, context, QueryProcessingStage::Complete, 1}; res.in = interpreter_select.execute().in; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 07ba3675546..c7ac52f82bf 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -353,7 +353,7 @@ BlockIO InterpreterSelectQuery::execute() if (IProfilingBlockInputStream * stream = dynamic_cast(pipeline.firstStream().get())) { /// Constraints apply only to the final result. - if (to_stage == QueryProcessingStage::Complete) + if (to_stage == QueryProcessingStage::Complete && subquery_depth == 0) { const Settings & settings = context.getSettingsRef(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index aa87c048af0..ac735fb6b71 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -33,7 +33,8 @@ public: * You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing. * * subquery_depth - * - to control the restrictions on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed. + * - to control the restrictions on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed; + * for INSERT SELECT, a value 1 is passed instead of 0. * * input * - if given - read not from the table specified in the query, but from ready source. diff --git a/dbms/tests/queries/0_stateless/00581_limit_on_result_and_subquery_and_insert.reference b/dbms/tests/queries/0_stateless/00581_limit_on_result_and_subquery_and_insert.reference new file mode 100644 index 00000000000..c3f3d08511f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00581_limit_on_result_and_subquery_and_insert.reference @@ -0,0 +1,3 @@ +11 +11 +22 diff --git a/dbms/tests/queries/0_stateless/00581_limit_on_result_and_subquery_and_insert.sql b/dbms/tests/queries/0_stateless/00581_limit_on_result_and_subquery_and_insert.sql new file mode 100644 index 00000000000..c25187e8015 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00581_limit_on_result_and_subquery_and_insert.sql @@ -0,0 +1,9 @@ +SET max_result_rows = 10; + +SELECT count() FROM (SELECT * FROM system.numbers LIMIT 11); + +CREATE TEMPORARY TABLE t AS SELECT * FROM system.numbers LIMIT 11; +SELECT count() FROM t; + +INSERT INTO t SELECT * FROM system.numbers LIMIT 11; +SELECT count() FROM t; From 466837b17c505053d18afa52219c1b3069e74f50 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 10:06:59 +0300 Subject: [PATCH 076/209] Removed controversial feature #1896 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 12 ------------ .../00582_not_aliasing_functions.reference | 1 + .../0_stateless/00582_not_aliasing_functions.sql | 1 + 3 files changed, 2 insertions(+), 12 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00582_not_aliasing_functions.reference create mode 100644 dbms/tests/queries/0_stateless/00582_not_aliasing_functions.sql diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index cfcd7989df5..4332fe30f64 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -961,18 +961,6 @@ void ExpressionAnalyzer::normalizeTreeImpl( if ((func_node = typeid_cast(ast.get()))) { - /** Is there a column in the table whose name fully matches the function entry? - * For example, in the table there is a column "domain(URL)", and we requested domain(URL). - */ - String function_string = func_node->getColumnName(); - auto it = findColumn(function_string); - if (columns.end() != it) - { - ast = std::make_shared(func_node->range, function_string); - current_asts.insert(ast.get()); - replaced = true; - } - /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. if (functionIsInOrGlobalInOperator(func_node->name)) if (ASTIdentifier * right = typeid_cast(func_node->arguments->children.at(1).get())) diff --git a/dbms/tests/queries/0_stateless/00582_not_aliasing_functions.reference b/dbms/tests/queries/0_stateless/00582_not_aliasing_functions.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00582_not_aliasing_functions.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/00582_not_aliasing_functions.sql b/dbms/tests/queries/0_stateless/00582_not_aliasing_functions.sql new file mode 100644 index 00000000000..9c56eb5e5a8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00582_not_aliasing_functions.sql @@ -0,0 +1 @@ +SELECT count() FROM (SELECT count() FROM numbers(10)); From ec28a1e661f16f34af9e9ba0e95858db26d4ea27 Mon Sep 17 00:00:00 2001 From: "chenxing.xc" Date: Fri, 23 Feb 2018 15:17:43 +0800 Subject: [PATCH 077/209] fix check distribute_table crash issue --- .../ClusterProxy/DescribeStreamFactory.cpp | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp index a61e14763f7..88a4a8199ab 100644 --- a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp +++ b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp @@ -4,7 +4,6 @@ #include #include - namespace DB { @@ -33,24 +32,29 @@ void DescribeStreamFactory::createForShard( const Context & context, const ThrottlerPtr & throttler, BlockInputStreams & res) { - for (const Cluster::Address & local_address : shard_info.local_addresses) + if (shard_info.isLocal()) { - InterpreterDescribeQuery interpreter{query_ast, context}; - BlockInputStreamPtr stream = interpreter.execute().in; + for (const Cluster::Address & local_address : shard_info.local_addresses) + { + InterpreterDescribeQuery interpreter{query_ast, context}; + BlockInputStreamPtr stream = interpreter.execute().in; - /** Materialization is needed, since from remote servers the constants come materialized. - * If you do not do this, different types (Const and non-Const) columns will be produced in different threads, - * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same. - */ - BlockInputStreamPtr materialized_stream = std::make_shared(stream); - res.emplace_back(std::make_shared(materialized_stream, toBlockExtraInfo(local_address))); + /** Materialization is needed, since from remote servers the constants come materialized. + * If you do not do this, different types (Const and non-Const) columns will be produced in different threads, + * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same. + */ + BlockInputStreamPtr materialized_stream = std::make_shared(stream); + res.emplace_back(std::make_shared(materialized_stream, toBlockExtraInfo(local_address))); + } + } + else + { + auto remote_stream = std::make_shared( + shard_info.pool, query, InterpreterDescribeQuery::getSampleBlock(), context, nullptr, throttler); + remote_stream->setPoolMode(PoolMode::GET_ALL); + remote_stream->appendExtraInfo(); + res.emplace_back(std::move(remote_stream)); } - - auto remote_stream = std::make_shared( - shard_info.pool, query, InterpreterDescribeQuery::getSampleBlock(), context, nullptr, throttler); - remote_stream->setPoolMode(PoolMode::GET_ALL); - remote_stream->appendExtraInfo(); - res.emplace_back(std::move(remote_stream)); } } From 36a9502e80760c382b2676816ef4bd5b3d2f4a8a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 11:05:21 +0300 Subject: [PATCH 078/209] Simplification #1896 --- dbms/src/DataStreams/tests/filter_stream.cpp | 1 - .../tests/filter_stream_hitlog.cpp | 1 - dbms/src/Interpreters/ExpressionAnalyzer.cpp | 9 ++++--- dbms/src/Parsers/ASTSelectQuery.cpp | 2 +- dbms/src/Parsers/ASTSubquery.cpp | 5 ++-- dbms/src/Parsers/IAST.cpp | 26 ------------------- dbms/src/Parsers/IAST.h | 6 ----- dbms/src/Parsers/tests/select_parser.cpp | 1 - 8 files changed, 9 insertions(+), 42 deletions(-) diff --git a/dbms/src/DataStreams/tests/filter_stream.cpp b/dbms/src/DataStreams/tests/filter_stream.cpp index da3b2f7d5ad..d03561ea0b4 100644 --- a/dbms/src/DataStreams/tests/filter_stream.cpp +++ b/dbms/src/DataStreams/tests/filter_stream.cpp @@ -37,7 +37,6 @@ try formatAST(*ast, std::cerr); std::cerr << std::endl; - std::cerr << ast->getTreeID() << std::endl; Context context = Context::createGlobal(); diff --git a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp index bbfd65b1f93..fe9ce592a43 100644 --- a/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp +++ b/dbms/src/DataStreams/tests/filter_stream_hitlog.cpp @@ -101,7 +101,6 @@ int main(int, char **) formatAST(*ast, std::cerr); std::cerr << std::endl; - std::cerr << ast->getTreeID() << std::endl; /// create an object of an existing hit log table diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 4332fe30f64..2072d499317 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -880,7 +880,7 @@ void ExpressionAnalyzer::addASTAliases(ASTPtr & ast, int ignore_levels) aliases[alias] = ast; } - else if (typeid_cast(ast.get())) + else if (auto subquery = typeid_cast(ast.get())) { /// Set unique aliases for all subqueries. This is needed, because content of subqueries could change after recursive analysis, /// and auto-generated column names could become incorrect. @@ -894,7 +894,8 @@ void ExpressionAnalyzer::addASTAliases(ASTPtr & ast, int ignore_levels) ++subquery_index; } - ast->setAlias(alias); + subquery->setAlias(alias); + subquery->prefer_alias_to_column_name = true; aliases[alias] = ast; } } @@ -2361,9 +2362,9 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty getRootActions(join_params.using_expression_list, only_types, false, step.actions); /// Two JOINs are not supported with the same subquery, but different USINGs. - String join_id = join_element.getTreeID(); + auto join_hash = join_element.getTreeHash(); - SubqueryForSet & subquery_for_set = subqueries_for_sets[join_id]; + SubqueryForSet & subquery_for_set = subqueries_for_sets[toString(join_hash.first) + "_" + toString(join_hash.second)]; /// Special case - if table name is specified on the right of JOIN, then the table has the type Join (the previously prepared mapping). /// TODO This syntax does not support specifying a database name. diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 136a5ed5cf8..d2d5b716d3a 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -195,7 +195,7 @@ std::shared_ptr ASTSelectQuery::cloneImpl(bool traverse_union_al /** NOTE Members must clone exactly in the same order, * in which they were inserted into `children` in ParserSelectQuery. - * This is important because of the children's names the identifier (getTreeID) is compiled, + * This is important because of the children's names the identifier (getTreeHash) is compiled, * which can be used for column identifiers in the case of subqueries in the IN statement. * For distributed query processing, in case one of the servers is localhost and the other one is not, * localhost query is executed within the process and is cloned, diff --git a/dbms/src/Parsers/ASTSubquery.cpp b/dbms/src/Parsers/ASTSubquery.cpp index 337dc6cfc31..2db425a8c59 100644 --- a/dbms/src/Parsers/ASTSubquery.cpp +++ b/dbms/src/Parsers/ASTSubquery.cpp @@ -1,12 +1,13 @@ #include +#include namespace DB { String ASTSubquery::getColumnNameImpl() const { - /// This is a hack. We use alias, if available, because otherwise tree could change during analysis. - return alias.empty() ? getTreeID() : alias; + Hash hash = getTreeHash(); + return "__subquery_" + toString(hash.first) + "_" + toString(hash.second); } } diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp index b951affd648..8a687b3ad97 100644 --- a/dbms/src/Parsers/IAST.cpp +++ b/dbms/src/Parsers/IAST.cpp @@ -61,32 +61,6 @@ size_t IAST::checkSize(size_t max_size) const } -String IAST::getTreeID() const -{ - WriteBufferFromOwnString out; - getTreeIDImpl(out); - return out.str(); -} - - -void IAST::getTreeIDImpl(WriteBuffer & out) const -{ - out << getID(); - - if (!children.empty()) - { - out << '('; - for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) - { - if (it != children.begin()) - out << ", "; - (*it)->getTreeIDImpl(out); - } - out << ')'; - } -} - - IAST::Hash IAST::getTreeHash() const { SipHash hash_state; diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index c22a6ceb094..56971824548 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -72,12 +72,6 @@ public: /** Get a deep copy of the tree. */ virtual ASTPtr clone() const = 0; - /** Get text, describing and identifying this element and its subtree. - * Usually it consist of element's id and getTreeID of all children. - */ - String getTreeID() const; - void getTreeIDImpl(WriteBuffer & out) const; - /** Get hash code, identifying this element and its subtree. */ using Hash = std::pair; diff --git a/dbms/src/Parsers/tests/select_parser.cpp b/dbms/src/Parsers/tests/select_parser.cpp index 9c22bffcdab..840d11989ad 100644 --- a/dbms/src/Parsers/tests/select_parser.cpp +++ b/dbms/src/Parsers/tests/select_parser.cpp @@ -28,7 +28,6 @@ try std::cout << "Success." << std::endl; formatAST(*ast, std::cerr); std::cout << std::endl; - std::cout << std::endl << ast->getTreeID() << std::endl; return 0; } From 90ca41f0495cfb7297d170fba012fdf3cfc71b24 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 11:23:31 +0300 Subject: [PATCH 079/209] Fixed test #1896 --- .../queries/0_stateless/00458_merge_type_cast.sql | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00458_merge_type_cast.sql b/dbms/tests/queries/0_stateless/00458_merge_type_cast.sql index d1359cad93f..7a976e379e9 100644 --- a/dbms/tests/queries/0_stateless/00458_merge_type_cast.sql +++ b/dbms/tests/queries/0_stateless/00458_merge_type_cast.sql @@ -1,4 +1,3 @@ - SELECT ' UInt32 | UInt64 '; DROP TABLE IF EXISTS test.u32; @@ -7,7 +6,7 @@ DROP TABLE IF EXISTS test.merge_32_64; CREATE TABLE test.u32 (x UInt32, y UInt32 DEFAULT x) ENGINE = Memory; CREATE TABLE test.u64 (x UInt64, y UInt64 DEFAULT x) ENGINE = Memory; -CREATE TABLE test.merge_32_64 (x UInt64) ENGINE = Merge(test, 'u32|u64'); +CREATE TABLE test.merge_32_64 (x UInt64) ENGINE = Merge(test, '^u32|u64$'); INSERT INTO test.u32 (x) VALUES (1); INSERT INTO test.u64 (x) VALUES (1); @@ -42,7 +41,7 @@ DROP TABLE IF EXISTS test.merge_s64_u64; CREATE TABLE test.s64 (x Int64) ENGINE = Memory; CREATE TABLE test.u64 (x UInt64) ENGINE = Memory; -CREATE TABLE test.merge_s64_u64 (x UInt64) ENGINE = Merge(test, 's64|u64'); +CREATE TABLE test.merge_s64_u64 (x UInt64) ENGINE = Merge(test, '^s64|u64$'); INSERT INTO test.s64 VALUES (1); INSERT INTO test.s64 VALUES (-1); @@ -66,7 +65,7 @@ DROP TABLE IF EXISTS test.merge_one_two; CREATE TABLE test.one (x Int32) ENGINE = Memory; CREATE TABLE test.two (x UInt64) ENGINE = Memory; -CREATE TABLE test.merge_one_two (x UInt64) ENGINE = Merge(test, 'one|two'); +CREATE TABLE test.merge_one_two (x UInt64) ENGINE = Merge(test, '^one|two$'); INSERT INTO test.one VALUES (1); INSERT INTO test.two VALUES (1); @@ -87,7 +86,7 @@ DROP TABLE IF EXISTS test.merge_one_two; CREATE TABLE test.one (x String) ENGINE = Memory; CREATE TABLE test.two (x FixedString(16)) ENGINE = Memory; -CREATE TABLE test.merge_one_two (x String) ENGINE = Merge(test, 'one|two'); +CREATE TABLE test.merge_one_two (x String) ENGINE = Merge(test, '^one|two$'); INSERT INTO test.one VALUES ('1'); INSERT INTO test.two VALUES ('1'); @@ -103,7 +102,7 @@ DROP TABLE IF EXISTS test.merge_one_two; CREATE TABLE test.one (x DateTime) ENGINE = Memory; CREATE TABLE test.two (x UInt64) ENGINE = Memory; -CREATE TABLE test.merge_one_two (x UInt64) ENGINE = Merge(test, 'one|two'); +CREATE TABLE test.merge_one_two (x UInt64) ENGINE = Merge(test, '^one|two$'); INSERT INTO test.one VALUES (1); INSERT INTO test.two VALUES (1); @@ -119,7 +118,7 @@ DROP TABLE IF EXISTS test.merge_one_two; CREATE TABLE test.one (x Array(UInt32), z String DEFAULT '', y Array(UInt32)) ENGINE = Memory; CREATE TABLE test.two (x Array(UInt64), z String DEFAULT '', y Array(UInt64)) ENGINE = Memory; -CREATE TABLE test.merge_one_two (x Array(UInt64), z String, y Array(UInt64)) ENGINE = Merge(test, 'one|two'); +CREATE TABLE test.merge_one_two (x Array(UInt64), z String, y Array(UInt64)) ENGINE = Merge(test, '^one|two$'); INSERT INTO test.one (x, y) VALUES ([1], [0]); INSERT INTO test.two (x, y) VALUES ([1], [0]); From c966bf0b95c63c9d1718dbcad2dbeeafc26b0800 Mon Sep 17 00:00:00 2001 From: "chenxing.xc" Date: Fri, 23 Feb 2018 16:40:39 +0800 Subject: [PATCH 080/209] update fix check table --- .../ClusterProxy/DescribeStreamFactory.cpp | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp index 88a4a8199ab..8f98e660aca 100644 --- a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp +++ b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp @@ -32,22 +32,20 @@ void DescribeStreamFactory::createForShard( const Context & context, const ThrottlerPtr & throttler, BlockInputStreams & res) { - if (shard_info.isLocal()) + for (const Cluster::Address & local_address : shard_info.local_addresses) { - for (const Cluster::Address & local_address : shard_info.local_addresses) - { - InterpreterDescribeQuery interpreter{query_ast, context}; - BlockInputStreamPtr stream = interpreter.execute().in; + InterpreterDescribeQuery interpreter{query_ast, context}; + BlockInputStreamPtr stream = interpreter.execute().in; - /** Materialization is needed, since from remote servers the constants come materialized. - * If you do not do this, different types (Const and non-Const) columns will be produced in different threads, - * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same. - */ - BlockInputStreamPtr materialized_stream = std::make_shared(stream); - res.emplace_back(std::make_shared(materialized_stream, toBlockExtraInfo(local_address))); - } + /** Materialization is needed, since from remote servers the constants come materialized. + * If you do not do this, different types (Const and non-Const) columns will be produced in different threads, + * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same. + */ + BlockInputStreamPtr materialized_stream = std::make_shared(stream); + res.emplace_back(std::make_shared(materialized_stream, toBlockExtraInfo(local_address))); } - else + + if (shard_info.hasRemoteConnections()) { auto remote_stream = std::make_shared( shard_info.pool, query, InterpreterDescribeQuery::getSampleBlock(), context, nullptr, throttler); From ecc3ab86363d7662fef6c44e617274d6e04f0f3f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 11:42:10 +0300 Subject: [PATCH 081/209] Fixed test #1896 --- dbms/src/Storages/VirtualColumnUtils.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index 29409ce715e..70fa6f8712a 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -105,7 +105,7 @@ static bool isValidFunction(const ASTPtr & expression, const NameSet & columns) /// Extract all subfunctions of the main conjunction, but depending only on the specified columns static void extractFunctions(const ASTPtr & expression, const NameSet & columns, std::vector & result) { - const ASTFunction * function = typeid_cast(&*expression); + const ASTFunction * function = typeid_cast(expression.get()); if (function && function->name == "and") { for (size_t i = 0; i < function->arguments->children.size(); ++i) @@ -149,6 +149,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c extractFunctions(select.where_expression, columns, functions); if (select.prewhere_expression) extractFunctions(select.prewhere_expression, columns, functions); + ASTPtr expression_ast = buildWhereExpression(functions); if (!expression_ast) return; @@ -156,6 +157,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c /// Let's analyze and calculate the expression. ExpressionAnalyzer analyzer(expression_ast, context, {}, block.getNamesAndTypesList()); ExpressionActionsPtr actions = analyzer.getActions(false); + actions->execute(block); /// Filter the block. @@ -170,6 +172,8 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c ColumnPtr & column = block.safeGetByPosition(i).column; column = column->filter(filter, -1); } + + block.erase(filter_column_name); } } From ffc1a76d2860088b5a9b69097acb33bbb96c4df5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 12:09:49 +0300 Subject: [PATCH 082/209] Removed unused method #1947 --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 8 -------- dbms/src/Interpreters/InterpreterSelectQuery.h | 1 - 2 files changed, 9 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index c7ac52f82bf..3002b73297a 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -138,14 +138,6 @@ void InterpreterSelectQuery::init(const Names & required_column_names) } } -bool InterpreterSelectQuery::hasAggregation(const ASTSelectQuery & query_ptr) -{ - for (const ASTSelectQuery * elem = &query_ptr; elem; elem = static_cast(elem->next_union_all.get())) - if (elem->group_expression_list || elem->having_expression) - return true; - - return false; -} void InterpreterSelectQuery::basicInit() { diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index ac735fb6b71..bd34517635e 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -125,7 +125,6 @@ private: void init(const Names & required_column_names); void basicInit(); void initQueryAnalyzer(); - bool hasAggregation(const ASTSelectQuery & query_ptr); /// Execute one SELECT query from the UNION ALL chain. void executeSingleQuery(Pipeline & pipeline); From cae7f96f34cd88cb3979be9c0a32d600ca91aa0f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 13:02:29 +0300 Subject: [PATCH 083/209] Simplification [#CLICKHOUSE-2] --- .../IProfilingBlockInputStream.cpp | 33 ++++--------------- .../DataStreams/IProfilingBlockInputStream.h | 18 +++------- dbms/src/DataStreams/LazyBlockInputStream.h | 17 ---------- dbms/src/Interpreters/DDLWorker.cpp | 2 +- .../InterpreterKillQueryQuery.cpp | 3 +- .../MergeTree/MergeTreeBlockInputStream.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- 7 files changed, 14 insertions(+), 63 deletions(-) diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index ec8eb407f9c..f409e4423b2 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -27,7 +27,11 @@ IProfilingBlockInputStream::IProfilingBlockInputStream() Block IProfilingBlockInputStream::read() { - collectAndSendTotalRowsApprox(); + if (total_rows_approx) + { + progressImpl(Progress(0, 0, total_rows_approx)); + total_rows_approx = 0; + } if (!info.started) { @@ -64,7 +68,7 @@ Block IProfilingBlockInputStream::read() /** If the thread is over, then we will ask all children to abort the execution. * This makes sense when running a query with LIMIT * - there is a situation when all the necessary data has already been read, - * but `children sources are still working, + * but children sources are still working, * herewith they can work in separate threads or even remotely. */ cancel(); @@ -408,29 +412,4 @@ const Block & IProfilingBlockInputStream::getExtremes() const return extremes; } -void IProfilingBlockInputStream::collectTotalRowsApprox() -{ - bool old_val = false; - if (!collected_total_rows_approx.compare_exchange_strong(old_val, true)) - return; - - for (auto & child : children) - { - if (IProfilingBlockInputStream * p_child = dynamic_cast(&*child)) - { - p_child->collectTotalRowsApprox(); - total_rows_approx += p_child->total_rows_approx; - } - } -} - -void IProfilingBlockInputStream::collectAndSendTotalRowsApprox() -{ - if (collected_total_rows_approx) - return; - - collectTotalRowsApprox(); - progressImpl(Progress(0, 0, total_rows_approx)); -} - } diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.h b/dbms/src/DataStreams/IProfilingBlockInputStream.h index 048473be30c..ac23926526d 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.h +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.h @@ -98,7 +98,7 @@ public: /** Set the approximate total number of rows to read. */ - void setTotalRowsApprox(size_t value) { total_rows_approx = value; } + void addTotalRowsApprox(size_t value) { total_rows_approx += value; } /** Ask to abort the receipt of data as soon as possible. @@ -180,15 +180,10 @@ protected: Block totals; /// Minimums and maximums. The first row of the block - minimums, the second - the maximums. Block extremes; - /// The approximate total number of rows to read. For progress bar. - size_t total_rows_approx = 0; private: bool enabled_extremes = false; - /// Information about the approximate total number of rows is collected in the parent source. - std::atomic_bool collected_total_rows_approx {false}; - /// The limit on the number of rows/bytes has been exceeded, and you need to stop execution on the next `read` call, as if the thread has run out. bool limit_exceeded_need_break = false; @@ -199,6 +194,9 @@ private: QuotaForIntervals * quota = nullptr; /// If nullptr - the quota is not used. double prev_elapsed = 0; + /// The approximate total number of rows to read. For progress bar. + size_t total_rows_approx = 0; + /// The successors must implement this function. virtual Block readImpl() = 0; @@ -216,14 +214,6 @@ private: bool checkDataSizeLimits(); bool checkTimeLimits(); void checkQuota(Block & block); - - /// Gather information about the approximate total number of rows from all children. - void collectTotalRowsApprox(); - - /** Send information about the approximate total number of rows to the progress bar. - * It is done so that sending occurs only in the upper stream. - */ - void collectAndSendTotalRowsApprox(); }; } diff --git a/dbms/src/DataStreams/LazyBlockInputStream.h b/dbms/src/DataStreams/LazyBlockInputStream.h index 6d05e570f53..07ed7f463d0 100644 --- a/dbms/src/DataStreams/LazyBlockInputStream.h +++ b/dbms/src/DataStreams/LazyBlockInputStream.h @@ -64,23 +64,6 @@ protected: { std::lock_guard lock(cancel_mutex); - /** TODO Data race here. See IProfilingBlockInputStream::collectAndSendTotalRowsApprox. - Assume following pipeline: - - RemoteBlockInputStream - AsynchronousBlockInputStream - LazyBlockInputStream - - RemoteBlockInputStream calls AsynchronousBlockInputStream::readPrefix - and AsynchronousBlockInputStream spawns a thread and returns. - - The separate thread will call LazyBlockInputStream::read - LazyBlockInputStream::read will add more children to itself - - In the same moment, in main thread, RemoteBlockInputStream::read is called, - then IProfilingBlockInputStream::collectAndSendTotalRowsApprox is called - and iterates over set of children. - */ children.push_back(input); if (isCancelled() && p_input) diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index c6bcb23989b..eea11fbb3ff 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -954,7 +954,7 @@ public: for (const HostID & host: entry.hosts) waiting_hosts.emplace(host.toString()); - setTotalRowsApprox(entry.hosts.size()); + addTotalRowsApprox(entry.hosts.size()); timeout_seconds = context.getSettingsRef().distributed_ddl_task_timeout; } diff --git a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp index 64805455368..68f12dc5783 100644 --- a/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/dbms/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -102,7 +102,6 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce class SyncKillQueryInputStream : public IProfilingBlockInputStream { public: - SyncKillQueryInputStream(ProcessList & process_list_, QueryDescriptors && processes_to_stop_, Block && processes_block_, const Block & res_sample_block_) : process_list(process_list_), @@ -110,7 +109,7 @@ public: processes_block(std::move(processes_block_)), res_sample_block(res_sample_block_) { - total_rows_approx = processes_to_stop.size(); + addTotalRowsApprox(processes_to_stop.size()); } String getName() const override diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp index 13fa4100a70..e0a8dc94400 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockInputStream.cpp @@ -59,7 +59,7 @@ MergeTreeBlockInputStream::MergeTreeBlockInputStream( : "") << " rows starting from " << all_mark_ranges.front().begin * storage.index_granularity); - setTotalRowsApprox(total_rows); + addTotalRowsApprox(total_rows); header = storage.getSampleBlockForColumns(ordered_names); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index dc7018bb4d6..8c9655fe68a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -659,7 +659,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( if (i == 0) { /// Set the approximate number of rows for the first source only - static_cast(*res.front()).setTotalRowsApprox(total_rows); + static_cast(*res.front()).addTotalRowsApprox(total_rows); } } } From 1a3fa45b6f37b907b1affe64273d594fa0553df9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 13:43:24 +0300 Subject: [PATCH 084/209] Fixed race condition in Lazy stream [#CLICKHOUSE-2] --- .../DataStreams/BlockStreamProfileInfo.cpp | 16 ++-- .../CreatingSetsBlockInputStream.cpp | 18 ++--- .../CreatingSetsBlockInputStream.h | 2 +- .../ExpressionBlockInputStream.cpp | 2 +- .../DataStreams/ExpressionBlockInputStream.h | 2 +- .../DataStreams/FilterBlockInputStream.cpp | 2 +- dbms/src/DataStreams/FilterBlockInputStream.h | 2 +- dbms/src/DataStreams/IBlockInputStream.cpp | 43 +--------- dbms/src/DataStreams/IBlockInputStream.h | 26 ++++--- dbms/src/DataStreams/IBlockOutputStream.h | 2 +- .../IProfilingBlockInputStream.cpp | 78 ++++++++++--------- .../DataStreams/IProfilingBlockInputStream.h | 24 +++++- dbms/src/DataStreams/LazyBlockInputStream.h | 12 +-- .../TotalsHavingBlockInputStream.cpp | 2 +- .../TotalsHavingBlockInputStream.h | 2 +- 15 files changed, 107 insertions(+), 126 deletions(-) diff --git a/dbms/src/DataStreams/BlockStreamProfileInfo.cpp b/dbms/src/DataStreams/BlockStreamProfileInfo.cpp index 23226f7f7ec..5a31f97a748 100644 --- a/dbms/src/DataStreams/BlockStreamProfileInfo.cpp +++ b/dbms/src/DataStreams/BlockStreamProfileInfo.cpp @@ -77,11 +77,11 @@ void BlockStreamProfileInfo::collectInfosForStreamsWithName(const char * name, B return; } - for (const auto & child_stream : parent->getChildren()) + parent->forEachProfilingChild([&] (IProfilingBlockInputStream & child) { - if (const auto * profiling_child = dynamic_cast(child_stream.get())) - profiling_child->getProfileInfo().collectInfosForStreamsWithName(name, res); - } + child.getProfileInfo().collectInfosForStreamsWithName(name, res); + return false; + }); } @@ -107,11 +107,11 @@ void BlockStreamProfileInfo::calculateRowsBeforeLimit() const for (const BlockStreamProfileInfo * info_limit_or_sort : limits_or_sortings) { - for (const auto & child_stream : info_limit_or_sort->parent->getChildren()) + info_limit_or_sort->parent->forEachProfilingChild([&] (IProfilingBlockInputStream & child) { - if (const auto * profiling_child = dynamic_cast(child_stream.get())) - rows_before_limit += profiling_child->getProfileInfo().rows; - } + rows_before_limit += child.getProfileInfo().rows; + return false; + }); } } else diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp index 5e1d3835d71..ee88c1160c7 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -35,7 +35,7 @@ void CreatingSetsBlockInputStream::readPrefixImpl() } -const Block & CreatingSetsBlockInputStream::getTotals() +Block CreatingSetsBlockInputStream::getTotals() { auto input = dynamic_cast(children.back().get()); @@ -148,24 +148,20 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery) if (table_out) table_out->writeSuffix(); - /// We will display information about how many rows and bytes are read. - size_t rows = 0; - size_t bytes = 0; - watch.stop(); - subquery.source->getLeafRowsBytes(rows, bytes); - size_t head_rows = 0; if (IProfilingBlockInputStream * profiling_in = dynamic_cast(&*subquery.source)) { - head_rows = profiling_in->getProfileInfo().rows; + const BlockStreamProfileInfo & profile_info = profiling_in->getProfileInfo(); + + head_rows = profile_info.rows; if (subquery.join) subquery.join->setTotals(profiling_in->getTotals()); } - if (rows != 0) + if (head_rows != 0) { std::stringstream msg; msg << std::fixed << std::setprecision(3); @@ -178,9 +174,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery) if (subquery.table) msg << "Table with " << head_rows << " rows. "; - msg << "Read " << rows << " rows, " << bytes / 1048576.0 << " MiB in " << watch.elapsedSeconds() << " sec., " - << static_cast(rows / watch.elapsedSeconds()) << " rows/sec., " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec."; - + msg << "In " << watch.elapsedSeconds() << " sec."; LOG_DEBUG(log, msg.rdbuf()); } else diff --git a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h index 85088d1d8fc..d3cf53c034b 100644 --- a/dbms/src/DataStreams/CreatingSetsBlockInputStream.h +++ b/dbms/src/DataStreams/CreatingSetsBlockInputStream.h @@ -38,7 +38,7 @@ public: Block getHeader() const override { return children.back()->getHeader(); } /// Takes `totals` only from the main source, not from subquery sources. - const Block & getTotals() override; + Block getTotals() override; protected: Block readImpl() override; diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp index d274a456c22..f1840acd023 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.cpp +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.cpp @@ -13,7 +13,7 @@ ExpressionBlockInputStream::ExpressionBlockInputStream(const BlockInputStreamPtr String ExpressionBlockInputStream::getName() const { return "Expression"; } -const Block & ExpressionBlockInputStream::getTotals() +Block ExpressionBlockInputStream::getTotals() { if (IProfilingBlockInputStream * child = dynamic_cast(&*children.back())) { diff --git a/dbms/src/DataStreams/ExpressionBlockInputStream.h b/dbms/src/DataStreams/ExpressionBlockInputStream.h index a509d1e606e..022a573f858 100644 --- a/dbms/src/DataStreams/ExpressionBlockInputStream.h +++ b/dbms/src/DataStreams/ExpressionBlockInputStream.h @@ -22,7 +22,7 @@ public: ExpressionBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_); String getName() const override; - const Block & getTotals() override; + Block getTotals() override; Block getHeader() const override; protected: diff --git a/dbms/src/DataStreams/FilterBlockInputStream.cpp b/dbms/src/DataStreams/FilterBlockInputStream.cpp index 2f6115863cf..19905834468 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.cpp +++ b/dbms/src/DataStreams/FilterBlockInputStream.cpp @@ -53,7 +53,7 @@ FilterBlockInputStream::FilterBlockInputStream(const BlockInputStreamPtr & input String FilterBlockInputStream::getName() const { return "Filter"; } -const Block & FilterBlockInputStream::getTotals() +Block FilterBlockInputStream::getTotals() { if (IProfilingBlockInputStream * child = dynamic_cast(&*children.back())) { diff --git a/dbms/src/DataStreams/FilterBlockInputStream.h b/dbms/src/DataStreams/FilterBlockInputStream.h index c78e4c0919f..8bebda86fd4 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.h +++ b/dbms/src/DataStreams/FilterBlockInputStream.h @@ -25,7 +25,7 @@ public: FilterBlockInputStream(const BlockInputStreamPtr & input, const ExpressionActionsPtr & expression_, const String & filter_column_name_); String getName() const override; - const Block & getTotals() override; + Block getTotals() override; Block getHeader() const override; protected: diff --git a/dbms/src/DataStreams/IBlockInputStream.cpp b/dbms/src/DataStreams/IBlockInputStream.cpp index b4cc5444c87..bc0733c6529 100644 --- a/dbms/src/DataStreams/IBlockInputStream.cpp +++ b/dbms/src/DataStreams/IBlockInputStream.cpp @@ -13,6 +13,10 @@ namespace ErrorCodes } +/** It's safe to access children without mutex as long as these methods are called before first call to read, readPrefix. + */ + + String IBlockInputStream::getTreeID() const { std::stringstream s; @@ -87,44 +91,5 @@ void IBlockInputStream::dumpTree(std::ostream & ostr, size_t indent, size_t mult } } - -BlockInputStreams IBlockInputStream::getLeaves() -{ - BlockInputStreams res; - getLeavesImpl(res, nullptr); - return res; -} - - -void IBlockInputStream::getLeafRowsBytes(size_t & rows, size_t & bytes) -{ - BlockInputStreams leaves = getLeaves(); - rows = 0; - bytes = 0; - - for (BlockInputStreams::const_iterator it = leaves.begin(); it != leaves.end(); ++it) - { - if (const IProfilingBlockInputStream * profiling = dynamic_cast(&**it)) - { - const BlockStreamProfileInfo & info = profiling->getProfileInfo(); - rows += info.rows; - bytes += info.bytes; - } - } -} - - -void IBlockInputStream::getLeavesImpl(BlockInputStreams & res, const BlockInputStreamPtr & this_shared_ptr) -{ - if (children.empty()) - { - if (this_shared_ptr) - res.push_back(this_shared_ptr); - } - else - for (BlockInputStreams::iterator it = children.begin(); it != children.end(); ++it) - (*it)->getLeavesImpl(res, *it); -} - } diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index cf158f9b46c..56de7d6c054 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -89,18 +89,13 @@ public: /// In case of isGroupedOutput or isSortedOutput, return corresponding SortDescription virtual const SortDescription & getSortDescription() const { throw Exception("Output of " + getName() + " is not sorted", ErrorCodes::OUTPUT_IS_NOT_SORTED); } - BlockInputStreams & getChildren() { return children; } - + /** Must be called before read, readPrefix. + */ void dumpTree(std::ostream & ostr, size_t indent = 0, size_t multiplier = 1); - /// Get leaf sources (not including this one). - BlockInputStreams getLeaves(); - - /// Get the number of rows and bytes read in the leaf sources. - void getLeafRowsBytes(size_t & rows, size_t & bytes); - /** Check the depth of the pipeline. * If max_depth is specified and the `depth` is greater - throw an exception. + * Must be called before read, readPrefix. */ size_t checkDepth(size_t max_depth) const; @@ -108,13 +103,22 @@ public: */ void addTableLock(const TableStructureReadLockPtr & lock) { table_locks.push_back(lock); } -protected: - TableStructureReadLocks table_locks; + template + void forEachChild(F && f) + { + std::lock_guard lock(children_mutex); + for (auto & child : children) + if (f(*child)) + return; + } + +protected: BlockInputStreams children; + std::mutex children_mutex; private: - void getLeavesImpl(BlockInputStreams & res, const BlockInputStreamPtr & this_shared_ptr); + TableStructureReadLocks table_locks; size_t checkDepthImpl(size_t max_depth, size_t level) const; diff --git a/dbms/src/DataStreams/IBlockOutputStream.h b/dbms/src/DataStreams/IBlockOutputStream.h index e33fced86a3..33494422479 100644 --- a/dbms/src/DataStreams/IBlockOutputStream.h +++ b/dbms/src/DataStreams/IBlockOutputStream.h @@ -66,7 +66,7 @@ public: */ void addTableLock(const TableStructureReadLockPtr & lock) { table_locks.push_back(lock); } -protected: +private: TableStructureReadLocks table_locks; }; diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index f409e4423b2..70d287b864b 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -93,15 +93,21 @@ void IProfilingBlockInputStream::readPrefix() { readPrefixImpl(); - for (auto & child : children) - child->readPrefix(); + forEachChild([&] (IBlockInputStream & child) + { + child.readPrefix(); + return false; + }); } void IProfilingBlockInputStream::readSuffix() { - for (auto & child : children) - child->readSuffix(); + forEachChild([&] (IBlockInputStream & child) + { + child.readSuffix(); + return false; + }); readSuffixImpl(); } @@ -350,9 +356,11 @@ void IProfilingBlockInputStream::cancel() if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed)) return; - for (auto & child : children) - if (IProfilingBlockInputStream * p_child = dynamic_cast(&*child)) - p_child->cancel(); + forEachProfilingChild([] (IProfilingBlockInputStream & child) + { + child.cancel(); + return false; + }); } @@ -360,9 +368,11 @@ void IProfilingBlockInputStream::setProgressCallback(const ProgressCallback & ca { progress_callback = callback; - for (auto & child : children) - if (IProfilingBlockInputStream * p_child = dynamic_cast(&*child)) - p_child->setProgressCallback(callback); + forEachProfilingChild([&] (IProfilingBlockInputStream & child) + { + child.setProgressCallback(callback); + return false; + }); } @@ -370,46 +380,44 @@ void IProfilingBlockInputStream::setProcessListElement(ProcessListElement * elem { process_list_elem = elem; - for (auto & child : children) - if (IProfilingBlockInputStream * p_child = dynamic_cast(&*child)) - p_child->setProcessListElement(elem); + forEachProfilingChild([&] (IProfilingBlockInputStream & child) + { + child.setProcessListElement(elem); + return false; + }); } -const Block & IProfilingBlockInputStream::getTotals() +Block IProfilingBlockInputStream::getTotals() { if (totals) return totals; - for (auto & child : children) + Block res; + forEachProfilingChild([&] (IProfilingBlockInputStream & child) { - if (IProfilingBlockInputStream * p_child = dynamic_cast(&*child)) - { - const Block & res = p_child->getTotals(); - if (res) - return res; - } - } - - return totals; + res = child.getTotals(); + if (res) + return true; + return false; + }); + return res; } -const Block & IProfilingBlockInputStream::getExtremes() const +Block IProfilingBlockInputStream::getExtremes() { if (extremes) return extremes; - for (const auto & child : children) + Block res; + forEachProfilingChild([&] (IProfilingBlockInputStream & child) { - if (const IProfilingBlockInputStream * p_child = dynamic_cast(&*child)) - { - const Block & res = p_child->getExtremes(); - if (res) - return res; - } - } - - return extremes; + res = child.getExtremes(); + if (res) + return true; + return false; + }); + return res; } } diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.h b/dbms/src/DataStreams/IProfilingBlockInputStream.h index ac23926526d..e11df9a256e 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.h +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.h @@ -26,6 +26,8 @@ using ProfilingBlockInputStreamPtr = std::shared_ptr */ class IProfilingBlockInputStream : public IBlockInputStream { + friend struct BlockStreamProfileInfo; + public: IProfilingBlockInputStream(); @@ -56,10 +58,10 @@ public: * Call this method only after all the data has been retrieved with `read`, * otherwise there will be problems if any data at the same time is computed in another thread. */ - virtual const Block & getTotals(); + virtual Block getTotals(); /// The same for minimums and maximums. - const Block & getExtremes() const; + Block getExtremes(); /** Set the execution progress bar callback. @@ -181,6 +183,13 @@ protected: /// Minimums and maximums. The first row of the block - minimums, the second - the maximums. Block extremes; + + void addChild(BlockInputStreamPtr & child) + { + std::lock_guard lock(children_mutex); + children.push_back(child); + } + private: bool enabled_extremes = false; @@ -214,6 +223,17 @@ private: bool checkDataSizeLimits(); bool checkTimeLimits(); void checkQuota(Block & block); + + + template + void forEachProfilingChild(F && f) + { + std::lock_guard lock(children_mutex); + for (auto & child : children) + if (IProfilingBlockInputStream * p_child = dynamic_cast(child.get())) + if (f(*p_child)) + return; + } }; } diff --git a/dbms/src/DataStreams/LazyBlockInputStream.h b/dbms/src/DataStreams/LazyBlockInputStream.h index 07ed7f463d0..f4faceb3927 100644 --- a/dbms/src/DataStreams/LazyBlockInputStream.h +++ b/dbms/src/DataStreams/LazyBlockInputStream.h @@ -27,12 +27,6 @@ public: String getName() const override { return name; } - void cancel() override - { - std::lock_guard lock(cancel_mutex); - IProfilingBlockInputStream::cancel(); - } - Block getHeader() const override { return header; @@ -62,9 +56,7 @@ protected: input->readPrefix(); { - std::lock_guard lock(cancel_mutex); - - children.push_back(input); + addChild(input); if (isCancelled() && p_input) p_input->cancel(); @@ -80,8 +72,6 @@ private: Generator generator; BlockInputStreamPtr input; - - std::mutex cancel_mutex; }; } diff --git a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp index b204afc2944..db8b291d5f6 100644 --- a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp +++ b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp @@ -47,7 +47,7 @@ static void finalize(Block & block) } -const Block & TotalsHavingBlockInputStream::getTotals() +Block TotalsHavingBlockInputStream::getTotals() { if (!totals) { diff --git a/dbms/src/DataStreams/TotalsHavingBlockInputStream.h b/dbms/src/DataStreams/TotalsHavingBlockInputStream.h index 375854c5761..ab8c7d23671 100644 --- a/dbms/src/DataStreams/TotalsHavingBlockInputStream.h +++ b/dbms/src/DataStreams/TotalsHavingBlockInputStream.h @@ -27,7 +27,7 @@ public: String getName() const override { return "TotalsHaving"; } - const Block & getTotals() override; + Block getTotals() override; Block getHeader() const override; From 9e14dc18ebfb89962a88df9bbed8f06d1c9e1e0e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 13:49:48 +0300 Subject: [PATCH 085/209] Fixed race condition in Lazy stream [#CLICKHOUSE-2] --- dbms/src/Server/TCPHandler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Server/TCPHandler.cpp b/dbms/src/Server/TCPHandler.cpp index 345cf118a5b..7dadeef966b 100644 --- a/dbms/src/Server/TCPHandler.cpp +++ b/dbms/src/Server/TCPHandler.cpp @@ -400,7 +400,7 @@ void TCPHandler::processTablesStatusRequest() void TCPHandler::sendProfileInfo() { - if (const IProfilingBlockInputStream * input = dynamic_cast(&*state.io.in)) + if (const IProfilingBlockInputStream * input = dynamic_cast(state.io.in.get())) { writeVarUInt(Protocol::Server::ProfileInfo, *out); input->getProfileInfo().write(*out); @@ -411,7 +411,7 @@ void TCPHandler::sendProfileInfo() void TCPHandler::sendTotals() { - if (IProfilingBlockInputStream * input = dynamic_cast(&*state.io.in)) + if (IProfilingBlockInputStream * input = dynamic_cast(state.io.in.get())) { const Block & totals = input->getTotals(); @@ -432,9 +432,9 @@ void TCPHandler::sendTotals() void TCPHandler::sendExtremes() { - if (const IProfilingBlockInputStream * input = dynamic_cast(&*state.io.in)) + if (IProfilingBlockInputStream * input = dynamic_cast(state.io.in.get())) { - const Block & extremes = input->getExtremes(); + Block extremes = input->getExtremes(); if (extremes) { From c73f5d5e9503a1942580cc3ea316d92a6d8605f1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 14:35:05 +0300 Subject: [PATCH 086/209] Fixed test [#CLICKHOUSE-2] --- dbms/src/Parsers/ASTSubquery.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Parsers/ASTSubquery.cpp b/dbms/src/Parsers/ASTSubquery.cpp index 2db425a8c59..7b90b8a931b 100644 --- a/dbms/src/Parsers/ASTSubquery.cpp +++ b/dbms/src/Parsers/ASTSubquery.cpp @@ -6,6 +6,10 @@ namespace DB String ASTSubquery::getColumnNameImpl() const { + /// This is a hack. We use alias, if available, because otherwise tree could change during analysis. + if (!alias.empty()) + return alias; + Hash hash = getTreeHash(); return "__subquery_" + toString(hash.first) + "_" + toString(hash.second); } From d4217d7aac557cf9fd7e61f20c89f324167857f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 14:37:49 +0300 Subject: [PATCH 087/209] Updated test [#CLICKHOUSE-2] --- .../00416_pocopatch_progress_in_http_headers.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.reference b/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.reference index 3f978f5d728..afe27bfae32 100644 --- a/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.reference +++ b/dbms/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.reference @@ -1,4 +1,3 @@ -< X-ClickHouse-Progress: {"read_rows":"0","read_bytes":"0","total_rows":"0"} < X-ClickHouse-Progress: {"read_rows":"1","read_bytes":"8","total_rows":"0"} < X-ClickHouse-Progress: {"read_rows":"2","read_bytes":"16","total_rows":"0"} < X-ClickHouse-Progress: {"read_rows":"3","read_bytes":"24","total_rows":"0"} From 243563d247773e90fc18730722b409c881bccf66 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 21:55:54 +0300 Subject: [PATCH 088/209] Fixed build [#CLICKHOUSE-2] --- dbms/src/DataStreams/IBlockInputStream.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 56de7d6c054..988f15bffb7 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include From 16bef55d0e7bacc54f923911afdadedbe8ca3b3e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Feb 2018 22:07:38 +0300 Subject: [PATCH 089/209] Fixed bad translation [#CLICKHOUSE-2] --- dbms/src/AggregateFunctions/UniquesHashSet.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/AggregateFunctions/UniquesHashSet.h b/dbms/src/AggregateFunctions/UniquesHashSet.h index 28958978f0b..61d43316745 100644 --- a/dbms/src/AggregateFunctions/UniquesHashSet.h +++ b/dbms/src/AggregateFunctions/UniquesHashSet.h @@ -59,7 +59,7 @@ /** This hash function is not the most optimal, but UniquesHashSet states counted with it, -  * stored in many places on disks (in the Meter), so it continues to be used. +  * stored in many places on disks (in the Yandex.Metrika), so it continues to be used.   */ struct UniquesHashSetDefaultHash { @@ -337,8 +337,8 @@ public: /** Correction of a systematic error due to collisions during hashing in UInt32. * `fixed_res(res)` formula * - with how many different elements of fixed_res, - * when randomly scattered across 2^32 baskets, - * filled baskets with average of res is obtained. + * when randomly scattered across 2^32 buckets, + * filled buckets with average of res is obtained. */ size_t p32 = 1ULL << 32; size_t fixed_res = round(p32 * (log(p32) - log(p32 - res))); From 077f413a0d36aaa96967f224e4fba297501a7e66 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Sat, 24 Feb 2018 00:22:52 +0300 Subject: [PATCH 090/209] Added consistent hashing functions. [#CLICKHOUSE-3606] --- dbms/src/Functions/CMakeLists.txt | 2 + .../Functions/FunctionsConsistentHashing.cpp | 14 ++ .../Functions/FunctionsConsistentHashing.h | 173 ++++++++++++++++++ dbms/src/Functions/registerFunctions.cpp | 2 + ...580_consistent_hashing_functions.reference | 6 + .../00580_consistent_hashing_functions.sql | 4 + libs/CMakeLists.txt | 2 + libs/yandex-consistent-hashing/CMakeLists.txt | 5 + .../yandex-consistent-hashing/yandex/bitops.h | 55 ++++++ .../yandex/consistent_hashing.cpp | 125 +++++++++++++ .../yandex/consistent_hashing.h | 17 ++ .../yandex/popcount.cpp | 25 +++ .../yandex/popcount.h | 84 +++++++++ 13 files changed, 514 insertions(+) create mode 100644 dbms/src/Functions/FunctionsConsistentHashing.cpp create mode 100644 dbms/src/Functions/FunctionsConsistentHashing.h create mode 100644 dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.reference create mode 100644 dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.sql create mode 100644 libs/yandex-consistent-hashing/CMakeLists.txt create mode 100644 libs/yandex-consistent-hashing/yandex/bitops.h create mode 100644 libs/yandex-consistent-hashing/yandex/consistent_hashing.cpp create mode 100644 libs/yandex-consistent-hashing/yandex/consistent_hashing.h create mode 100644 libs/yandex-consistent-hashing/yandex/popcount.cpp create mode 100644 libs/yandex-consistent-hashing/yandex/popcount.h diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index d86e43cb435..347416a7a86 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -100,3 +100,5 @@ endif () if (ENABLE_TESTS) add_subdirectory (tests) endif () + +target_link_libraries (clickhouse_functions PRIVATE yandex-consistent-hashing) diff --git a/dbms/src/Functions/FunctionsConsistentHashing.cpp b/dbms/src/Functions/FunctionsConsistentHashing.cpp new file mode 100644 index 00000000000..abf789c6073 --- /dev/null +++ b/dbms/src/Functions/FunctionsConsistentHashing.cpp @@ -0,0 +1,14 @@ +#include "FunctionsConsistentHashing.h" +#include + + +namespace DB +{ + +void registerFunctionsConsistentHashing(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/FunctionsConsistentHashing.h b/dbms/src/Functions/FunctionsConsistentHashing.h new file mode 100644 index 00000000000..8678281cbeb --- /dev/null +++ b/dbms/src/Functions/FunctionsConsistentHashing.h @@ -0,0 +1,173 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_COLUMN; + extern const int BAD_ARGUMENTS; +} + + +struct YandexConsistentHashImpl +{ + static constexpr auto name = "YandexConsistentHash"; + + /// Actually it supports UInt64, but it is effective only if n < 65536 + using ResultType = UInt32; + using BucketsCountType = ResultType; + + static inline ResultType apply(UInt64 hash, BucketsCountType n) + { + return ConsistentHashing(hash, n); + } +}; + + +/// Code from https://arxiv.org/pdf/1406.2294.pdf +static inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets) { + int64_t b = -1, j = 0; + while (j < num_buckets) { + b = j; + key = key * 2862933555777941757ULL + 1; + j = static_cast((b + 1) * (double(1LL << 31) / double((key >> 33) + 1))); + } + return static_cast(b); +} + +struct JumpConsistentHashImpl +{ + static constexpr auto name = "JumpConsistentHash"; + + using ResultType = Int32; + using BucketsCountType = ResultType; + + static inline ResultType apply(UInt64 hash, BucketsCountType n) + { + return JumpConsistentHash(hash, n); + } +}; + + +template +class FunctionConsistentHashImpl : public IFunction +{ +public: + + static constexpr auto name = Impl::name; + using ResultType = typename Impl::ResultType; + using BucketsType = typename Impl::BucketsCountType; + + static FunctionPtr create(const Context &) { return std::make_shared>(); }; + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!arguments[0]->isInteger()) + throw Exception("Illegal type " + arguments[0]->getName() + " of the first argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!arguments[1]->isInteger()) + throw Exception("Illegal type " + arguments[1]->getName() + " of the second argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared>(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override + { + auto buckets_col = block.getByPosition(arguments[1]).column.get(); + if (!buckets_col->isColumnConst()) + throw Exception("The second argument of function " + getName() + " (number of buckets) must be constant", ErrorCodes::BAD_ARGUMENTS); + + constexpr UInt64 max_buckets = static_cast(std::numeric_limits::max()); + UInt64 num_buckets; + + auto check_range = [&] (auto buckets) + { + if (buckets <= 0) + throw Exception("The second argument of function " + getName() + " (number of buckets) must be positive number", + ErrorCodes::BAD_ARGUMENTS); + + if (static_cast(buckets) > max_buckets) + throw Exception("The value of the second argument of function " + getName() + " (number of buckets) is not fit to " + + DataTypeNumber().getName(), ErrorCodes::BAD_ARGUMENTS); + + num_buckets = static_cast(buckets); + }; + + Field buckets_field = (*buckets_col)[0]; + if (buckets_field.getType() == Field::Types::Int64) + check_range(buckets_field.safeGet()); + else if (buckets_field.getType() == Field::Types::UInt64) + check_range(buckets_field.safeGet()); + else + throw Exception("Illegal type " + String(buckets_field.getTypeName()) + " of the second argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + + const auto & hash_col_source = block.getByPosition(arguments[0]).column; + ColumnPtr hash_col = (hash_col_source->isColumnConst()) ? hash_col_source->convertToFullColumnIfConst() : hash_col_source; + ColumnPtr & res_col = block.getByPosition(result).column; + + + const IDataType * hash_type = block.getByPosition(arguments[0]).type.get(); + + if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); + else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); + else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); + else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); + else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); + else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); + else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); + else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); + else + throw Exception("Illegal type " + hash_type->getName() + " of the first argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + +private: + + template + void executeType(const ColumnPtr & col_hash_ptr, ColumnPtr & out_col_result, const UInt64 num_buckets) + { + auto col_hash = checkAndGetColumn>(col_hash_ptr.get()); + if (!col_hash) + throw Exception("Illegal type of the first argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + auto col_result = ColumnVector::create(); + typename ColumnVector::Container & vec_result = col_result->getData(); + const auto & vec_hash = col_hash->getData(); + + size_t size = vec_hash.size(); + vec_result.resize(size); + for (size_t i = 0; i < size; ++i) + vec_result[i] = Impl::apply(static_cast(vec_hash[i]), static_cast(num_buckets)); + + out_col_result = std::move(col_result); + } +}; + + +using FunctionYandexConsistentHash = FunctionConsistentHashImpl; +using FunctionJumpConsistentHas = FunctionConsistentHashImpl; + + +} diff --git a/dbms/src/Functions/registerFunctions.cpp b/dbms/src/Functions/registerFunctions.cpp index 365b4a730bf..0dcc66bfd77 100644 --- a/dbms/src/Functions/registerFunctions.cpp +++ b/dbms/src/Functions/registerFunctions.cpp @@ -24,6 +24,7 @@ void registerFunctionsExternalDictionaries(FunctionFactory &); void registerFunctionsExternalModels(FunctionFactory &); void registerFunctionsFormatting(FunctionFactory &); void registerFunctionsHashing(FunctionFactory &); +void registerFunctionsConsistentHashing(FunctionFactory &); void registerFunctionsHigherOrder(FunctionFactory &); void registerFunctionsLogical(FunctionFactory &); void registerFunctionsMiscellaneous(FunctionFactory &); @@ -60,6 +61,7 @@ void registerFunctions() registerFunctionsExternalModels(factory); registerFunctionsFormatting(factory); registerFunctionsHashing(factory); + registerFunctionsConsistentHashing(factory); registerFunctionsHigherOrder(factory); registerFunctionsLogical(factory); registerFunctionsMiscellaneous(factory); diff --git a/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.reference b/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.reference new file mode 100644 index 00000000000..64458288805 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.reference @@ -0,0 +1,6 @@ +0 43 520 0 361 237 +0 1 1 3 111 173 +358 +341 +111 +111 diff --git a/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.sql b/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.sql new file mode 100644 index 00000000000..1a2303d3072 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.sql @@ -0,0 +1,4 @@ +SELECT JumpConsistentHash(1, 1), JumpConsistentHash(42, 57), JumpConsistentHash(256, 1024), JumpConsistentHash(3735883980, 1), JumpConsistentHash(3735883980, 666), JumpConsistentHash(16045690984833335023, 255); +SELECT YandexConsistentHash(16045690984833335023, 1), YandexConsistentHash(16045690984833335023, 2), YandexConsistentHash(16045690984833335023, 3), YandexConsistentHash(16045690984833335023, 4), YandexConsistentHash(16045690984833335023, 173), YandexConsistentHash(16045690984833335023, 255); +SELECT JumpConsistentHash(intHash64(number), 787) FROM system.numbers LIMIT 1000000, 2; +SELECT YandexConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2; diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 970d2be15b4..cf2e8464452 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -18,3 +18,5 @@ endif () if (USE_MYSQL) add_subdirectory (libmysqlxx) endif () + +add_subdirectory (yandex-consistent-hashing) diff --git a/libs/yandex-consistent-hashing/CMakeLists.txt b/libs/yandex-consistent-hashing/CMakeLists.txt new file mode 100644 index 00000000000..694c2d071d9 --- /dev/null +++ b/libs/yandex-consistent-hashing/CMakeLists.txt @@ -0,0 +1,5 @@ +cmake_minimum_required(VERSION 2.8) +project(yandex-consistent-hashing CXX) + +add_library(yandex-consistent-hashing yandex/consistent_hashing.cpp yandex/popcount.cpp) +target_include_directories(yandex-consistent-hashing PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) \ No newline at end of file diff --git a/libs/yandex-consistent-hashing/yandex/bitops.h b/libs/yandex-consistent-hashing/yandex/bitops.h new file mode 100644 index 00000000000..0ddb7f8024b --- /dev/null +++ b/libs/yandex-consistent-hashing/yandex/bitops.h @@ -0,0 +1,55 @@ +#pragma once +#include +#include +#include + +// Assume little endian + +inline uint16_t & LO_16(uint32_t & x) { return reinterpret_cast(&x)[0]; } +inline uint16_t & HI_16(uint32_t & x) { return reinterpret_cast(&x)[1]; } + +inline uint32_t & LO_32(uint64_t & x) { return reinterpret_cast(&x)[0]; } +inline uint32_t & HI_32(uint64_t & x) { return reinterpret_cast(&x)[1]; } + + +#if defined(__GNUC__) + inline unsigned GetValueBitCountImpl(unsigned int value) noexcept { + // Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. + return std::numeric_limits::digits - __builtin_clz(value); + } + + inline unsigned GetValueBitCountImpl(unsigned long value) noexcept { + // Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. + return std::numeric_limits::digits - __builtin_clzl(value); + } + + inline unsigned GetValueBitCountImpl(unsigned long long value) noexcept { + // Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. + return std::numeric_limits::digits - __builtin_clzll(value); + } +#else + /// Stupid realization for non-GCC. Can use BSR from x86 instructions set. + template + inline unsigned GetValueBitCountImpl(T value) noexcept { + // Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. + unsigned result = 1; // result == 0 - impossible value, see Y_ASSERT(). + value >>= 1; + while (value) { + value >>= 1; + ++result; + } + + return result; + } +#endif + + +/** + * Returns the number of leading 0-bits in `value`, starting at the most significant bit position. + */ +template +static inline unsigned GetValueBitCount(T value) noexcept { + // Y_ASSERT(value > 0); + using TCvt = std::make_unsigned_t>; + return GetValueBitCountImpl(static_cast(value)); +} diff --git a/libs/yandex-consistent-hashing/yandex/consistent_hashing.cpp b/libs/yandex-consistent-hashing/yandex/consistent_hashing.cpp new file mode 100644 index 00000000000..347456eede3 --- /dev/null +++ b/libs/yandex-consistent-hashing/yandex/consistent_hashing.cpp @@ -0,0 +1,125 @@ +#include "consistent_hashing.h" + +#include "bitops.h" + +#include "popcount.h" + +#include + +/* + * (all numbers are written in big-endian manner: the least significant digit on the right) + * (only bit representations are used - no hex or octal, leading zeroes are ommited) + * + * Consistent hashing scheme: + * + * (sizeof(TValue) * 8, y] (y, 0] + * a = * ablock + * b = * cblock + * + * (sizeof(TValue) * 8, k] (k, 0] + * c = * cblock + * + * d = * + * + * k - is determined by 2^(k-1) < n <= 2^k inequality + * z - is number of ones in cblock + * y - number of digits after first one in cblock + * + * The cblock determines logic of using a- and b- blocks: + * + * bits of cblock | result of a function + * 0 : 0 + * 1 : 1 (optimization, the next case includes this one) + * 1?..? : 1ablock (z is even) or 1bblock (z is odd) if possible (=n), than smooth moving from n=2^(k-1) to n=2^k is applied. + * Using "*" bits of a-,b-,c-,d- blocks uint64_t value is combined, modulo of which determines + * if the value should be greather than 2^(k-1) or ConsistentHashing(x, 2^(k-1)) should be used. + * The last case is optimized according to previous checks. + */ + +namespace { + +template +TValue PowerOf2(size_t k) { + return (TValue)0x1 << k; +} + +template +TValue SelectAOrBBlock(TValue a, TValue b, TValue cBlock) { + size_t z = PopCount(cBlock); + bool useABlock = z % 2 == 0; + return useABlock ? a : b; +} + +// Gets the exact result for n = k2 = 2 ^ k +template +size_t ConsistentHashingForPowersOf2(TValue a, TValue b, TValue c, TValue k2) { + TValue cBlock = c & (k2 - 1); // (k, 0] bits of c + // Zero and one cases + if (cBlock < 2) { + // First two cases of result function table: 0 if cblock is 0, 1 if cblock is 1. + return cBlock; + } + size_t y = GetValueBitCount(cBlock) - 1; // cblock = 0..01?..? (y = number of digits after 1), y > 0 + TValue y2 = PowerOf2(y); // y2 = 2^y + TValue abBlock = SelectAOrBBlock(a, b, cBlock) & (y2 - 1); + return y2 + abBlock; +} + +template +uint64_t GetAsteriskBits(TValue a, TValue b, TValue c, TValue d, size_t k) { + size_t shift = sizeof(TValue) * 8 - k; + uint64_t res = (d << shift) | (c >> k); + ++shift; + res <<= shift; + res |= b >> (k - 1); + res <<= shift; + res |= a >> (k - 1); + + return res; +} + +template +size_t ConsistentHashingImpl(TValue a, TValue b, TValue c, TValue d, size_t n) { + if (n <= 0) + throw std::runtime_error("Can't map consistently to a zero values."); + + // Uninteresting case + if (n == 1) { + return 0; + } + size_t k = GetValueBitCount(n - 1); // 2^(k-1) < n <= 2^k, k >= 1 + TValue k2 = PowerOf2(k); // k2 = 2^k + size_t largeValue; + { + // Bit determined variant. Large scheme. + largeValue = ConsistentHashingForPowersOf2(a, b, c, k2); + if (largeValue < n) { + return largeValue; + } + } + // Since largeValue is not assigned yet + // Smooth moving from one bit scheme to another + TValue k21 = PowerOf2(k - 1); + { + size_t s = GetAsteriskBits(a, b, c, d, k) % (largeValue * (largeValue + 1)); + size_t largeValue2 = s / k2 + k21; + if (largeValue2 < n) { + return largeValue2; + } + } + // Bit determined variant. Short scheme. + return ConsistentHashingForPowersOf2(a, b, c, k21); // Do not apply checks. It is always less than k21 = 2^(k-1) +} + +} // namespace // anonymous + +std::size_t ConsistentHashing(std::uint64_t x, std::size_t n) { + uint32_t lo = LO_32(x); + uint32_t hi = HI_32(x); + return ConsistentHashingImpl(LO_16(lo), HI_16(lo), LO_16(hi), HI_16(hi), n); +} +std::size_t ConsistentHashing(std::uint64_t lo, std::uint64_t hi, std::size_t n) { + return ConsistentHashingImpl(LO_32(lo), HI_32(lo), LO_32(hi), HI_32(hi), n); +} diff --git a/libs/yandex-consistent-hashing/yandex/consistent_hashing.h b/libs/yandex-consistent-hashing/yandex/consistent_hashing.h new file mode 100644 index 00000000000..0ac2b01fcfb --- /dev/null +++ b/libs/yandex-consistent-hashing/yandex/consistent_hashing.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +/* + * Maps random ui64 x (in fact hash of some string) to n baskets/shards. + * Output value is id of a basket. 0 <= ConsistentHashing(x, n) < n. + * Probability of all baskets must be equal. Also, it should be consistent + * in terms, that with different n_1 < n_2 probability of + * ConsistentHashing(x, n_1) != ConsistentHashing(x, n_2) must be equal to + * (n_2 - n_1) / n_2 - the least possible with previous conditions. + * It requires O(1) memory and cpu to calculate. So, it is faster than classic + * consistent hashing algos with points on circle. + */ +std::size_t ConsistentHashing(std::uint64_t x, std::size_t n); // Works good for n < 65536 +std::size_t ConsistentHashing(std::uint64_t lo, std::uint64_t hi, std::size_t n); // Works good for n < 4294967296 diff --git a/libs/yandex-consistent-hashing/yandex/popcount.cpp b/libs/yandex-consistent-hashing/yandex/popcount.cpp new file mode 100644 index 00000000000..66edfe65829 --- /dev/null +++ b/libs/yandex-consistent-hashing/yandex/popcount.cpp @@ -0,0 +1,25 @@ +#include "popcount.h" + +static const uint8_t PopCountLUT8Impl[1 << 8] = { +#define B2(n) n, n + 1, n + 1, n + 2 +#define B4(n) B2(n), B2(n + 1), B2(n + 1), B2(n + 2) +#define B6(n) B4(n), B4(n + 1), B4(n + 1), B4(n + 2) + B6(0), B6(1), B6(1), B6(2)}; + +uint8_t const* PopCountLUT8 = PopCountLUT8Impl; + +#if !defined(_MSC_VER) +//ICE here for msvc + +static const uint8_t PopCountLUT16Impl[1 << 16] = { +#define B2(n) n, n + 1, n + 1, n + 2 +#define B4(n) B2(n), B2(n + 1), B2(n + 1), B2(n + 2) +#define B6(n) B4(n), B4(n + 1), B4(n + 1), B4(n + 2) +#define B8(n) B6(n), B6(n + 1), B6(n + 1), B6(n + 2) +#define B10(n) B8(n), B8(n + 1), B8(n + 1), B8(n + 2) +#define B12(n) B10(n), B10(n + 1), B10(n + 1), B10(n + 2) +#define B14(n) B12(n), B12(n + 1), B12(n + 1), B12(n + 2) + B14(0), B14(1), B14(1), B14(2)}; + +uint8_t const* PopCountLUT16 = PopCountLUT16Impl; +#endif diff --git a/libs/yandex-consistent-hashing/yandex/popcount.h b/libs/yandex-consistent-hashing/yandex/popcount.h new file mode 100644 index 00000000000..b49b2fb450a --- /dev/null +++ b/libs/yandex-consistent-hashing/yandex/popcount.h @@ -0,0 +1,84 @@ +#pragma once + +#include +#include +#include +using std::size_t; + +#include "bitops.h" + +#if defined(_MSC_VER) +#include +#endif + +#ifdef __SSE2__ +constexpr bool HavePOPCNTInstr = true; +#else +constexpr bool HavePOPCNTInstr = false; +#pragma GCC warning "SSE2 is not detected, PopCount function will be too slow" +#endif + +static inline uint32_t PopCountImpl(uint8_t n) { + extern uint8_t const* PopCountLUT8; + return PopCountLUT8[n]; +} + +static inline uint32_t PopCountImpl(uint16_t n) { +#if defined(_MSC_VER) + return __popcnt16(n); +#else + extern uint8_t const* PopCountLUT16; + return PopCountLUT16[n]; +#endif +} + +static inline uint32_t PopCountImpl(uint32_t n) { +#if defined(_MSC_VER) + return __popcnt(n); +#else +#if defined(__x86_64__) + + if (HavePOPCNTInstr) { + uint32_t r; + + __asm__("popcnt %1, %0;" + : "=r"(r) + : "r"(n) + :); + + return r; + } +#endif + + return PopCountImpl((uint16_t)LO_16(n)) + PopCountImpl((uint16_t)HI_16(n)); +#endif +} + +static inline uint32_t PopCountImpl(uint64_t n) { +#if defined(_MSC_VER) && !defined(_i386_) + return __popcnt64(n); +#else +#if defined(__x86_64__) + + if (HavePOPCNTInstr) { + uint64_t r; + + __asm__("popcnt %1, %0;" + : "=r"(r) + : "r"(n) + :); + + return r; + } +#endif + + return PopCountImpl((uint32_t)LO_32(n)) + PopCountImpl((uint32_t)HI_32(n)); +#endif +} + +template +static inline uint32_t PopCount(T n) { + using TCvt = std::make_unsigned_t>; + + return PopCountImpl(static_cast(n)); +} From c54c64a34cca1a23cd31cfc5e1cf6febbcb96460 Mon Sep 17 00:00:00 2001 From: Andrew Grigorev Date: Sat, 24 Feb 2018 01:11:21 +0300 Subject: [PATCH 091/209] Fix typo in AggregatingMergeTree docs --- docs/ru/table_engines/aggregatingmergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/table_engines/aggregatingmergetree.md b/docs/ru/table_engines/aggregatingmergetree.md index 7a56e610942..f14bb679cd0 100644 --- a/docs/ru/table_engines/aggregatingmergetree.md +++ b/docs/ru/table_engines/aggregatingmergetree.md @@ -84,4 +84,4 @@ ORDER BY StartDate; Вы можете создать такое материализованное представление и навесить на него обычное представление, выполняющее доагрегацию данных. -Заметим, что в большинстве случаев, использование `AggregatingMergeTree` является неоправданным, так как можно достаточно эффективно выполнять запросы по неагрегированным данных. +Заметим, что в большинстве случаев, использование `AggregatingMergeTree` является неоправданным, так как можно достаточно эффективно выполнять запросы по неагрегированным данным. From 11b4cf31632aa9dafe5776c897a02229e012b853 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Sat, 24 Feb 2018 01:47:35 +0300 Subject: [PATCH 092/209] Updated test. [#CLICKHOUSE-3606] --- .../task_month_to_week_description.xml | 2 +- dbms/tests/integration/test_cluster_copier/test.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml b/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml index 82cd16a6b6c..8c0dc7b28ab 100644 --- a/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml +++ b/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml @@ -29,7 +29,7 @@ ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/b', '{replica}') PARTITION BY toMonday(date) ORDER BY d - d + 1 + JumpConsistentHash(intHash64(d), 2) diff --git a/dbms/tests/integration/test_cluster_copier/test.py b/dbms/tests/integration/test_cluster_copier/test.py index 8ef4e27b913..f3d317d5d1c 100644 --- a/dbms/tests/integration/test_cluster_copier/test.py +++ b/dbms/tests/integration/test_cluster_copier/test.py @@ -129,8 +129,8 @@ class Task2: assert TSV(self.cluster.instances['s0_0_0'].query("SELECT count() FROM cluster(cluster0, default, a)")) == TSV("85\n") assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count(), uniqExact(date) FROM cluster(cluster1, default, b)")) == TSV("85\t85\n") - assert TSV(self.cluster.instances['s1_0_0'].query("SELECT DISTINCT d % 2 FROM b")) == TSV("1\n") - assert TSV(self.cluster.instances['s1_1_0'].query("SELECT DISTINCT d % 2 FROM b")) == TSV("0\n") + assert TSV(self.cluster.instances['s1_0_0'].query("SELECT DISTINCT JumpConsistentHash(intHash64(d), 2) FROM b")) == TSV("0\n") + assert TSV(self.cluster.instances['s1_1_0'].query("SELECT DISTINCT JumpConsistentHash(intHash64(d), 2) FROM b")) == TSV("1\n") assert TSV(self.cluster.instances['s1_0_0'].query("SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'")) == TSV("1\n") assert TSV(self.cluster.instances['s1_1_0'].query("SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'")) == TSV("1\n") @@ -184,20 +184,21 @@ def execute_task(task, cmd_options): zk.delete(zk_task_path, recursive=True) +# Tests + def test_copy1_simple(started_cluster): execute_task(Task1(started_cluster), []) - def test_copy1_with_recovering(started_cluster): execute_task(Task1(started_cluster), ['--copy-fault-probability', str(COPYING_FAIL_PROBABILITY)]) - def test_copy_month_to_week_partition(started_cluster): execute_task(Task2(started_cluster), []) -def test_copy_month_to_week_partition(started_cluster): +def test_copy_month_to_week_partition_with_recovering(started_cluster): execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.1)]) + if __name__ == '__main__': with contextmanager(started_cluster)() as cluster: for name, instance in cluster.instances.items(): From 3772c8847213531307bef4286c99b7d0b8de615f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Feb 2018 04:31:42 +0300 Subject: [PATCH 093/209] Removed bad code that may lead to (harmless) race condition [#CLICKHOUSE-2] --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 372 ++++++++---------- dbms/src/Parsers/ASTFunction.cpp | 4 +- dbms/src/Parsers/ASTFunction.h | 13 - dbms/src/Parsers/ASTSelectQuery.cpp | 2 +- dbms/src/Parsers/ParserInsertQuery.cpp | 1 - .../src/Parsers/ParserTablesInSelectQuery.cpp | 18 +- dbms/tests/instructions/sanitizers.txt | 10 +- 7 files changed, 181 insertions(+), 239 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 2072d499317..b890f2fb33c 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1108,35 +1108,6 @@ void ExpressionAnalyzer::normalizeTreeImpl( normalizeTreeImpl(select->having_expression, finished_asts, current_asts, current_alias, level + 1); } - /// Actions to be performed from the bottom up. - - if (ASTFunction * node = typeid_cast(ast.get())) - { - if (node->kind == ASTFunction::TABLE_FUNCTION) - { - } - else if (node->name == "lambda") - { - node->kind = ASTFunction::LAMBDA_EXPRESSION; - } - else if (AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) - { - node->kind = ASTFunction::AGGREGATE_FUNCTION; - } - else if (node->name == "arrayJoin") - { - node->kind = ASTFunction::ARRAY_JOIN; - } - else - { - node->kind = ASTFunction::FUNCTION; - } - - if (node->parameters && node->kind != ASTFunction::AGGREGATE_FUNCTION) - throw Exception("The only parametric functions (functions with two separate parenthesis pairs) are aggregate functions" - ", and '" + node->name + "' is not an aggregate function.", ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS); - } - current_asts.erase(initial_ast.get()); current_asts.erase(ast.get()); finished_asts[initial_ast] = ast; @@ -1181,7 +1152,6 @@ static ASTPtr addTypeConversion(std::unique_ptr && ast, const String func->alias = ast->alias; func->prefer_alias_to_column_name = ast->prefer_alias_to_column_name; ast->alias.clear(); - func->kind = ASTFunction::FUNCTION; func->name = "CAST"; auto exp_list = std::make_shared(ast->range); func->arguments = exp_list; @@ -1260,7 +1230,6 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) auto tuple = std::make_shared(ast->range); tuple->alias = subquery->alias; ast = tuple; - tuple->kind = ASTFunction::FUNCTION; tuple->name = "tuple"; auto exp_list = std::make_shared(ast->range); tuple->arguments = exp_list; @@ -1286,8 +1255,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) */ ASTFunction * func = typeid_cast(ast.get()); - if (func && func->kind == ASTFunction::FUNCTION - && functionIsInOrGlobalInOperator(func->name)) + if (func && functionIsInOrGlobalInOperator(func->name)) { for (auto & child : ast->children) { @@ -1495,7 +1463,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & } const ASTFunction * func = typeid_cast(node.get()); - if (func && func->kind == ASTFunction::FUNCTION && functionIsInOperator(func->name)) + if (func && functionIsInOperator(func->name)) { const IAST & args = *func->arguments; const ASTPtr & arg = args.children.at(1); @@ -1976,11 +1944,11 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, } else if (ASTFunction * node = typeid_cast(ast.get())) { - if (node->kind == ASTFunction::LAMBDA_EXPRESSION) + if (node->name == "lambda") throw Exception("Unexpected lambda expression", ErrorCodes::UNEXPECTED_EXPRESSION); /// Function arrayJoin. - if (node->kind == ASTFunction::ARRAY_JOIN) + if (node->name == "arrayJoin") { if (node->arguments->children.size() != 1) throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH); @@ -1999,193 +1967,193 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, return; } - if (node->kind == ASTFunction::FUNCTION) + if (functionIsInOrGlobalInOperator(node->name)) { - if (functionIsInOrGlobalInOperator(node->name)) + if (!no_subqueries) { - if (!no_subqueries) - { - /// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything). - getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack); + /// Let's find the type of the first argument (then getActionsImpl will be called again and will not affect anything). + getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack); - /// Transform tuple or subquery into a set. - makeSet(node, actions_stack.getSampleBlock()); + /// Transform tuple or subquery into a set. + makeSet(node, actions_stack.getSampleBlock()); + } + else + { + if (!only_consts) + { + /// We are in the part of the tree that we are not going to compute. You just need to define types. + /// Do not subquery and create sets. We insert an arbitrary column of the correct type. + ColumnWithTypeAndName fake_column; + fake_column.name = node->getColumnName(); + fake_column.type = std::make_shared(); + actions_stack.addAction(ExpressionAction::addColumn(fake_column)); + getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack); + } + return; + } + } + + /// A special function `indexHint`. Everything that is inside it is not calculated + /// (and is used only for index analysis, see PKCondition). + if (node->name == "indexHint") + { + actions_stack.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( + ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared(), node->getColumnName()))); + return; + } + + if (AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) + return; + + const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(node->name, context); + + Names argument_names; + DataTypes argument_types; + bool arguments_present = true; + + /// If the function has an argument-lambda expression, you need to determine its type before the recursive call. + bool has_lambda_arguments = false; + + for (auto & child : node->arguments->children) + { + ASTFunction * lambda = typeid_cast(child.get()); + if (lambda && lambda->name == "lambda") + { + /// If the argument is a lambda expression, just remember its approximate type. + if (lambda->arguments->children.size() != 2) + throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTFunction * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); + + if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") + throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); + + has_lambda_arguments = true; + argument_types.emplace_back(std::make_shared(DataTypes(lambda_args_tuple->arguments->children.size()))); + /// Select the name in the next cycle. + argument_names.emplace_back(); + } + else if (prepared_sets.count(child.get())) + { + ColumnWithTypeAndName column; + column.type = std::make_shared(); + + const SetPtr & set = prepared_sets[child.get()]; + + /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, + /// so that sets with the same record do not fuse together (they can have different types). + if (!set->empty()) + column.name = getUniqueName(actions_stack.getSampleBlock(), "__set"); + else + column.name = child->getColumnName(); + + if (!actions_stack.getSampleBlock().has(column.name)) + { + column.column = ColumnSet::create(1, set); + + actions_stack.addAction(ExpressionAction::addColumn(column)); + } + + argument_types.push_back(column.type); + argument_names.push_back(column.name); + } + else + { + /// If the argument is not a lambda expression, call it recursively and find out its type. + getActionsImpl(child, no_subqueries, only_consts, actions_stack); + std::string name = child->getColumnName(); + if (actions_stack.getSampleBlock().has(name)) + { + argument_types.push_back(actions_stack.getSampleBlock().getByName(name).type); + argument_names.push_back(name); } else { - if (!only_consts) + if (only_consts) { - /// We are in the part of the tree that we are not going to compute. You just need to define types. - /// Do not subquery and create sets. We insert an arbitrary column of the correct type. - ColumnWithTypeAndName fake_column; - fake_column.name = node->getColumnName(); - fake_column.type = std::make_shared(); - actions_stack.addAction(ExpressionAction::addColumn(fake_column)); - getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack); + arguments_present = false; + } + else + { + throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); } - return; } } + } - /// A special function `indexHint`. Everything that is inside it is not calculated - /// (and is used only for index analysis, see PKCondition). - if (node->name == "indexHint") + if (only_consts && !arguments_present) + return; + + if (has_lambda_arguments && !only_consts) + { + function_builder->getLambdaArgumentTypes(argument_types); + + /// Call recursively for lambda expressions. + for (size_t i = 0; i < node->arguments->children.size(); ++i) { - actions_stack.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName( - ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared(), node->getColumnName()))); - return; - } + ASTPtr child = node->arguments->children[i]; - const FunctionBuilderPtr & function_builder = FunctionFactory::instance().get(node->name, context); - - Names argument_names; - DataTypes argument_types; - bool arguments_present = true; - - /// If the function has an argument-lambda expression, you need to determine its type before the recursive call. - bool has_lambda_arguments = false; - - for (auto & child : node->arguments->children) - { ASTFunction * lambda = typeid_cast(child.get()); if (lambda && lambda->name == "lambda") { - /// If the argument is a lambda expression, just remember its approximate type. - if (lambda->arguments->children.size() != 2) - throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - + const DataTypeFunction * lambda_type = typeid_cast(argument_types[i].get()); ASTFunction * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); + ASTs lambda_arg_asts = lambda_args_tuple->arguments->children; + NamesAndTypesList lambda_arguments; - if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") - throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); - - has_lambda_arguments = true; - argument_types.emplace_back(std::make_shared(DataTypes(lambda_args_tuple->arguments->children.size()))); - /// Select the name in the next cycle. - argument_names.emplace_back(); - } - else if (prepared_sets.count(child.get())) - { - ColumnWithTypeAndName column; - column.type = std::make_shared(); - - const SetPtr & set = prepared_sets[child.get()]; - - /// If the argument is a set given by an enumeration of values (so, the set was already built), give it a unique name, - /// so that sets with the same record do not fuse together (they can have different types). - if (!set->empty()) - column.name = getUniqueName(actions_stack.getSampleBlock(), "__set"); - else - column.name = child->getColumnName(); - - if (!actions_stack.getSampleBlock().has(column.name)) + for (size_t j = 0; j < lambda_arg_asts.size(); ++j) { - column.column = ColumnSet::create(1, set); + ASTIdentifier * identifier = typeid_cast(lambda_arg_asts[j].get()); + if (!identifier) + throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); - actions_stack.addAction(ExpressionAction::addColumn(column)); + String arg_name = identifier->name; + + lambda_arguments.emplace_back(arg_name, lambda_type->getArgumentTypes()[j]); } - argument_types.push_back(column.type); - argument_names.push_back(column.name); - } - else - { - /// If the argument is not a lambda expression, call it recursively and find out its type. - getActionsImpl(child, no_subqueries, only_consts, actions_stack); - std::string name = child->getColumnName(); - if (actions_stack.getSampleBlock().has(name)) - { - argument_types.push_back(actions_stack.getSampleBlock().getByName(name).type); - argument_names.push_back(name); - } - else - { - if (only_consts) - { - arguments_present = false; - } - else - { - throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); - } - } + actions_stack.pushLevel(lambda_arguments); + getActionsImpl(lambda->arguments->children.at(1), no_subqueries, only_consts, actions_stack); + ExpressionActionsPtr lambda_actions = actions_stack.popLevel(); + + String result_name = lambda->arguments->children.at(1)->getColumnName(); + lambda_actions->finalize(Names(1, result_name)); + DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; + + Names captured; + Names required = lambda_actions->getRequiredColumns(); + for (size_t j = 0; j < required.size(); ++j) + if (findColumn(required[j], lambda_arguments) == lambda_arguments.end()) + captured.push_back(required[j]); + + /// We can not name `getColumnName()`, + /// because it does not uniquely define the expression (the types of arguments can be different). + String lambda_name = getUniqueName(actions_stack.getSampleBlock(), "__lambda"); + + auto function_capture = std::make_shared( + lambda_actions, captured, lambda_arguments, result_type, result_name); + actions_stack.addAction(ExpressionAction::applyFunction(function_capture, captured, lambda_name)); + + argument_types[i] = std::make_shared(lambda_type->getArgumentTypes(), result_type); + argument_names[i] = lambda_name; } } - - if (only_consts && !arguments_present) - return; - - if (has_lambda_arguments && !only_consts) - { - function_builder->getLambdaArgumentTypes(argument_types); - - /// Call recursively for lambda expressions. - for (size_t i = 0; i < node->arguments->children.size(); ++i) - { - ASTPtr child = node->arguments->children[i]; - - ASTFunction * lambda = typeid_cast(child.get()); - if (lambda && lambda->name == "lambda") - { - const DataTypeFunction * lambda_type = typeid_cast(argument_types[i].get()); - ASTFunction * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); - ASTs lambda_arg_asts = lambda_args_tuple->arguments->children; - NamesAndTypesList lambda_arguments; - - for (size_t j = 0; j < lambda_arg_asts.size(); ++j) - { - ASTIdentifier * identifier = typeid_cast(lambda_arg_asts[j].get()); - if (!identifier) - throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); - - String arg_name = identifier->name; - - lambda_arguments.emplace_back(arg_name, lambda_type->getArgumentTypes()[j]); - } - - actions_stack.pushLevel(lambda_arguments); - getActionsImpl(lambda->arguments->children.at(1), no_subqueries, only_consts, actions_stack); - ExpressionActionsPtr lambda_actions = actions_stack.popLevel(); - - String result_name = lambda->arguments->children.at(1)->getColumnName(); - lambda_actions->finalize(Names(1, result_name)); - DataTypePtr result_type = lambda_actions->getSampleBlock().getByName(result_name).type; - - Names captured; - Names required = lambda_actions->getRequiredColumns(); - for (size_t j = 0; j < required.size(); ++j) - if (findColumn(required[j], lambda_arguments) == lambda_arguments.end()) - captured.push_back(required[j]); - - /// We can not name `getColumnName()`, - /// because it does not uniquely define the expression (the types of arguments can be different). - String lambda_name = getUniqueName(actions_stack.getSampleBlock(), "__lambda"); - - auto function_capture = std::make_shared( - lambda_actions, captured, lambda_arguments, result_type, result_name); - actions_stack.addAction(ExpressionAction::applyFunction(function_capture, captured, lambda_name)); - - argument_types[i] = std::make_shared(lambda_type->getArgumentTypes(), result_type); - argument_names[i] = lambda_name; - } - } - } - - if (only_consts) - { - for (size_t i = 0; i < argument_names.size(); ++i) - { - if (!actions_stack.getSampleBlock().has(argument_names[i])) - { - arguments_present = false; - break; - } - } - } - - if (arguments_present) - actions_stack.addAction(ExpressionAction::applyFunction(function_builder, argument_names, node->getColumnName())); } + + if (only_consts) + { + for (size_t i = 0; i < argument_names.size(); ++i) + { + if (!actions_stack.getSampleBlock().has(argument_names[i])) + { + arguments_present = false; + break; + } + } + } + + if (arguments_present) + actions_stack.addAction(ExpressionAction::applyFunction(function_builder, argument_names, node->getColumnName())); } else if (ASTLiteral * node = typeid_cast(ast.get())) { @@ -2223,7 +2191,7 @@ void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr } const ASTFunction * node = typeid_cast(ast.get()); - if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION) + if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) { has_aggregation = true; AggregateDescription aggregate; @@ -2268,7 +2236,7 @@ void ExpressionAnalyzer::assertNoAggregates(const ASTPtr & ast, const char * des { const ASTFunction * node = typeid_cast(ast.get()); - if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION) + if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) throw Exception("Aggregate function " + node->getColumnName() + " is found " + String(description) + " in query", ErrorCodes::ILLEGAL_AGGREGATION); @@ -2570,7 +2538,7 @@ void ExpressionAnalyzer::getActionsBeforeAggregation(const ASTPtr & ast, Express { ASTFunction * node = typeid_cast(ast.get()); - if (node && node->kind == ASTFunction::AGGREGATE_FUNCTION) + if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) for (auto & argument : node->arguments->children) getRootActions(argument, no_subqueries, false, actions); else @@ -2836,7 +2804,7 @@ void ExpressionAnalyzer::getRequiredColumnsImpl(const ASTPtr & ast, if (ASTFunction * node = typeid_cast(ast.get())) { - if (node->kind == ASTFunction::LAMBDA_EXPRESSION) + if (node->name == "lambda") { if (node->arguments->children.size() != 2) throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index 434b14673ba..7d73e48b7ea 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -48,8 +48,8 @@ ASTPtr ASTFunction::clone() const auto res = std::make_shared(*this); res->children.clear(); - if (arguments) { res->arguments = arguments->clone(); res->children.push_back(res->arguments); } - if (parameters) { res->parameters = parameters->clone(); res->children.push_back(res->parameters); } + if (arguments) { res->arguments = arguments->clone(); res->children.push_back(res->arguments); } + if (parameters) { res->parameters = parameters->clone(); res->children.push_back(res->parameters); } return res; } diff --git a/dbms/src/Parsers/ASTFunction.h b/dbms/src/Parsers/ASTFunction.h index f4e7c32d139..462dc439329 100644 --- a/dbms/src/Parsers/ASTFunction.h +++ b/dbms/src/Parsers/ASTFunction.h @@ -12,24 +12,11 @@ namespace DB class ASTFunction : public ASTWithAlias { public: - /// TODO This is semantic, not syntax. Remove it. - enum FunctionKind - { - UNKNOWN, - TABLE_FUNCTION, - FUNCTION, - AGGREGATE_FUNCTION, - LAMBDA_EXPRESSION, - ARRAY_JOIN, - }; - String name; ASTPtr arguments; /// parameters - for parametric aggregate function. Example: quantile(0.9)(x) - what in first parens are 'parameters'. ASTPtr parameters; - FunctionKind kind{UNKNOWN}; - public: ASTFunction() = default; ASTFunction(const StringRange range_) : ASTWithAlias(range_) {} diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index d2d5b716d3a..aec8d6345d9 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -28,7 +28,7 @@ ASTSelectQuery::ASTSelectQuery(const StringRange range_) : ASTQueryWithOutput(ra bool ASTSelectQuery::hasArrayJoin(const ASTPtr & ast) { if (const ASTFunction * function = typeid_cast(&*ast)) - if (function->kind == ASTFunction::ARRAY_JOIN) + if (function->name == "arrayJoin") return true; for (const auto & child : ast->children) diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp index 0842c1fbd92..2fe8ca46f68 100644 --- a/dbms/src/Parsers/ParserInsertQuery.cpp +++ b/dbms/src/Parsers/ParserInsertQuery.cpp @@ -58,7 +58,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (!table_function_p.parse(pos, table_function, expected)) return false; - static_cast(*table_function).kind = ASTFunction::TABLE_FUNCTION; } else { diff --git a/dbms/src/Parsers/ParserTablesInSelectQuery.cpp b/dbms/src/Parsers/ParserTablesInSelectQuery.cpp index aa475fb477c..6b28deeb227 100644 --- a/dbms/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ParserTablesInSelectQuery.cpp @@ -22,21 +22,9 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { auto res = std::make_shared(); - if (ParserWithOptionalAlias(std::make_unique(), true) - .parse(pos, res->subquery, expected)) - { - } - else if (ParserWithOptionalAlias(std::make_unique(), true) - .parse(pos, res->table_function, expected)) - { - static_cast(*res->table_function).kind = ASTFunction::TABLE_FUNCTION; - } - else if (ParserWithOptionalAlias(std::make_unique(), true) - .parse(pos, res->database_and_table_name, expected)) - { - static_cast(*res->database_and_table_name).kind = ASTIdentifier::Table; - } - else + if (!ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->subquery, expected) + && !ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->table_function, expected) + && !ParserWithOptionalAlias(std::make_unique(), true).parse(pos, res->database_and_table_name, expected)) return false; /// FINAL diff --git a/dbms/tests/instructions/sanitizers.txt b/dbms/tests/instructions/sanitizers.txt index 8cc67bae05a..552a313e5e2 100644 --- a/dbms/tests/instructions/sanitizers.txt +++ b/dbms/tests/instructions/sanitizers.txt @@ -13,11 +13,11 @@ make -j24 # Copy binary to your server -scp ./dbms/src/Server/clickhouse yourserver:~/clickhouse-libcxx-asan +scp ./dbms/src/Server/clickhouse yourserver:~/clickhouse-asan # Start ClickHouse and run tests -sudo -u clickhouse ./clickhouse-libcxx-asan --config /etc/clickhouse-server/config.xml +sudo -u clickhouse ./clickhouse-asan --config /etc/clickhouse-server/config.xml # How to use Thread Sanitizer @@ -26,13 +26,13 @@ mkdir build && cd build # Note: All parameters are mandatory. -CC=clang CXX=clang++ cmake -D CCACHE_FOUND=0 -D CMAKE_BUILD_TYPE=TSan -D ENABLE_TCMALLOC=0 .. +CC=clang CXX=clang++ cmake -D CMAKE_BUILD_TYPE=TSan -D ENABLE_TCMALLOC=0 .. make -j24 # Copy binary to your server -scp ./dbms/src/Server/clickhouse yourserver:~/clickhouse-libcxx-tsan +scp ./dbms/src/Server/clickhouse yourserver:~/clickhouse-tsan # Start ClickHouse and run tests -sudo -u clickhouse TSAN_OPTIONS='halt_on_error=1' ./clickhouse-libcxx-tsan --config /etc/clickhouse-server/config.xml +sudo -u clickhouse TSAN_OPTIONS='halt_on_error=1' ./clickhouse-tsan --config /etc/clickhouse-server/config.xml From 2b974554e5092c4cc08e0b1861101393832bd31c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Feb 2018 04:55:43 +0300 Subject: [PATCH 094/209] Fixed test [#CLICKHOUSE-2] --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 11 ++++++++--- dbms/src/Storages/MergeTree/PKCondition.cpp | 3 +-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index b890f2fb33c..b7f67e8566c 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -368,7 +368,7 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String { /// Do not go to FROM, JOIN, UNION. if (!typeid_cast(child.get()) - && child.get() != select_query->next_union_all.get()) + && !typeid_cast(child.get())) { translateQualifiedNamesImpl(child, database_name, table_name, alias); } @@ -1136,7 +1136,7 @@ void ExpressionAnalyzer::executeScalarSubqueries() { /// Do not go to FROM, JOIN, UNION. if (!typeid_cast(child.get()) - && child.get() != select_query->next_union_all.get()) + && !typeid_cast(child.get())) { executeScalarSubqueriesImpl(child); } @@ -2169,7 +2169,12 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, else { for (auto & child : ast->children) - getActionsImpl(child, no_subqueries, only_consts, actions_stack); + { + /// Do not go to FROM, JOIN, UNION. + if (!typeid_cast(child.get()) + && !typeid_cast(child.get())) + getActionsImpl(child, no_subqueries, only_consts, actions_stack); + } } } diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/PKCondition.cpp index a7198d5625d..798266e1d15 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/PKCondition.cpp @@ -248,8 +248,7 @@ Block PKCondition::getBlockWithConstants( { DataTypeUInt8().createColumnConstWithDefaultValue(1), std::make_shared(), "_dummy" } }; - const auto expr_for_constant_folding = ExpressionAnalyzer{query, context, nullptr, all_columns} - .getConstActions(); + const auto expr_for_constant_folding = ExpressionAnalyzer{query, context, nullptr, all_columns}.getConstActions(); expr_for_constant_folding->execute(result); From 97766684d7df02d9f06e26ba0003588d35d037a8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Feb 2018 07:46:14 +0300 Subject: [PATCH 095/209] Fixed (harmless) race condition [#CLICKHOUSE-2] --- dbms/src/Interpreters/Aggregator.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index c0a998a3081..ecd19370ef9 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -2135,6 +2135,7 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) * Better hash function is needed because during external aggregation, * we may merge partitions of data with total number of keys far greater than 4 billion. */ + auto merge_method = method; #define APPLY_FOR_VARIANTS_THAT_MAY_USE_BETTER_HASH_FUNCTION(M) \ M(key64) \ @@ -2146,8 +2147,8 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) M(serialized) \ #define M(NAME) \ - if (method == AggregatedDataVariants::Type::NAME) \ - method = AggregatedDataVariants::Type::NAME ## _hash64; \ + if (merge_method == AggregatedDataVariants::Type::NAME) \ + merge_method = AggregatedDataVariants::Type::NAME ## _hash64; \ APPLY_FOR_VARIANTS_THAT_MAY_USE_BETTER_HASH_FUNCTION(M) #undef M @@ -2160,7 +2161,7 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) /// result will destroy the states of aggregate functions in the destructor result.aggregator = this; - result.init(method); + result.init(merge_method); result.keys_size = params.keys_size; result.key_sizes = key_sizes; From 615877e54456fcf9bd2ae47b8637d90d8bc073e7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Feb 2018 22:17:56 +0300 Subject: [PATCH 096/209] Updated Poco submodule: fixed usage of atomic bool flag, that was shown by TSan [#CLICKHOUSE-2] --- contrib/poco | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/poco b/contrib/poco index 81d4fdfcb88..3d885f5380f 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 81d4fdfcb887f89b0f7b1e9b503cbe63e6d8366b +Subproject commit 3d885f5380f24b4b91d8d4cf18c8cbc083d3ef8d From ab594375aba8d540f6902a192e7384855440f215 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 03:50:53 +0300 Subject: [PATCH 097/209] Better UNION ALL: development #1947 --- .../Interpreters/InterpreterSelectQuery.cpp | 12 +- .../src/Interpreters/InterpreterSelectQuery.h | 24 +-- .../InterpreterSelectWithUnionQuery.cpp | 140 ++++++++++++++++++ .../InterpreterSelectWithUnionQuery.h | 54 +++++++ dbms/src/Parsers/ASTSelectQuery.cpp | 79 +--------- dbms/src/Parsers/ASTSelectQuery.h | 26 +--- dbms/src/Parsers/ExpressionElementParsers.cpp | 5 +- dbms/src/Parsers/ParserCreateQuery.cpp | 5 +- dbms/src/Parsers/ParserQueryWithOutput.cpp | 4 +- dbms/src/Parsers/ParserSelectQuery.cpp | 15 +- dbms/src/Parsers/ParserSelectQuery.h | 2 +- .../Parsers/ParserSelectWithUnionQuery.cpp | 24 +++ dbms/src/Parsers/ParserSelectWithUnionQuery.h | 17 +++ dbms/src/Storages/StorageDistributed.cpp | 4 +- dbms/src/Storages/StorageView.cpp | 2 +- 15 files changed, 260 insertions(+), 153 deletions(-) create mode 100644 dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp create mode 100644 dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h create mode 100644 dbms/src/Parsers/ParserSelectWithUnionQuery.cpp create mode 100644 dbms/src/Parsers/ParserSelectWithUnionQuery.h diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 3002b73297a..fbe021d1bd8 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -323,7 +323,7 @@ void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, St Block InterpreterSelectQuery::getSampleBlock() { Pipeline pipeline; - executeWithoutUnionImpl(pipeline, std::make_shared(source_header)); + executeWithMultipleStreamsImpl(pipeline, std::make_shared(source_header)); auto res = pipeline.firstStream()->getHeader(); return res; } @@ -338,7 +338,7 @@ Block InterpreterSelectQuery::getSampleBlock(const ASTPtr & query_ptr_, const Co BlockIO InterpreterSelectQuery::execute() { Pipeline pipeline; - executeWithoutUnionImpl(pipeline, input); + executeWithMultipleStreamsImpl(pipeline, input); executeUnion(pipeline); /// Constraints on the result, the quota on the result, and also callback for progress. @@ -365,14 +365,14 @@ BlockIO InterpreterSelectQuery::execute() return res; } -BlockInputStreams InterpreterSelectQuery::executeWithoutUnion() +BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams() { Pipeline pipeline; - executeWithoutUnionImpl(pipeline, input); + executeWithMultipleStreamsImpl(pipeline, input); return pipeline.streams; } -void InterpreterSelectQuery::executeWithoutUnionImpl(Pipeline & pipeline, const BlockInputStreamPtr & input) +void InterpreterSelectQuery::executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input) { if (input) pipeline.streams.push_back(input); @@ -788,7 +788,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline else if (interpreter_subquery) { /// Subquery. - interpreter_subquery->executeWithoutUnionImpl(pipeline, {}); + interpreter_subquery->executeWithMultipleStreamsImpl(pipeline, {}); } else if (storage) { diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index bd34517635e..202c1b6b8fe 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -64,14 +64,11 @@ public: ~InterpreterSelectQuery(); - /** Execute a query, possibly part of UNION ALL chain. - * Get the stream of blocks to read - */ + /// Execute a query. Get the stream of blocks to read. BlockIO execute() override; - /** Execute the query without union of threads, if it is possible. - */ - BlockInputStreams executeWithoutUnion(); + /// Execute the query and return multuple streams for parallel processing. + BlockInputStreams executeWithMultipleStreams(); Block getSampleBlock(); @@ -113,9 +110,6 @@ private: } }; - /** - Optimization if an object is created only to call getSampleBlock(): consider only the first SELECT of the UNION ALL chain, because - * the first SELECT is sufficient to determine the required columns. - */ struct OnlyAnalyzeTag {}; InterpreterSelectQuery( OnlyAnalyzeTag, @@ -126,7 +120,6 @@ private: void basicInit(); void initQueryAnalyzer(); - /// Execute one SELECT query from the UNION ALL chain. void executeSingleQuery(Pipeline & pipeline); @@ -186,7 +179,7 @@ private: /// Fetch data from the table. Returns the stage to which the query was processed in Storage. QueryProcessingStage::Enum executeFetchColumns(Pipeline & pipeline); - void executeWithoutUnionImpl(Pipeline & pipeline, const BlockInputStreamPtr & input); + void executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input); void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expression); void executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final); void executeMergeAggregated(Pipeline & pipeline, bool overflow_row, bool final); @@ -224,15 +217,9 @@ private: /// How many streams we ask for storage to produce, and in how many threads we will do further processing. size_t max_streams = 1; - /// Is it the first SELECT query of the UNION ALL chain? - bool is_first_select_inside_union_all; - /// The object was created only for query analysis. bool only_analyze = false; - /// The next SELECT query in the UNION ALL chain, if any. - std::unique_ptr next_select_in_union_all; - /// Table from where to read data, if not subquery. StoragePtr storage; TableStructureReadLockPtr table_lock; @@ -240,9 +227,6 @@ private: /// Used when we read from prepared input, not table or subquery. BlockInputStreamPtr input; - /// Do union of streams within a SELECT query? - bool union_within_single_query = false; - Poco::Logger * log; }; diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp new file mode 100644 index 00000000000..adc84cc7311 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + +InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( + const ASTPtr & query_ptr_, + const Context & context_, + QueryProcessingStage::Enum to_stage_, + size_t subquery_depth_) + : query_ptr(query_ptr_), + context(context_), + to_stage(to_stage_), + subquery_depth(subquery_depth_) +{ + size_t num_selects = query_ptr->children.size(); + nested_interpreters.reserve(num_selects); + + if (!num_selects) + throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + + for (const auto & select : query_ptr->children) + nested_interpreters.emplace_back(std::make_unique(select, context, to_stage, subquery_depth)); +} + + +InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( + const ASTPtr & query_ptr_, + const Context & context_, + const Names & required_column_names, + QueryProcessingStage::Enum to_stage_, + size_t subquery_depth_) + : query_ptr(query_ptr_), + context(context_), + to_stage(to_stage_), + subquery_depth(subquery_depth_) +{ + size_t num_selects = query_ptr->children.size(); + nested_interpreters.reserve(num_selects); + + if (!num_selects) + throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + + for (const auto & select : query_ptr->children) + nested_interpreters.emplace_back(std::make_unique(select, context, required_column_names, to_stage, subquery_depth)); +} + + +InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default; + + +Block InterpreterSelectWithUnionQuery::getSampleBlock() +{ + return nested_interpreters.front()->getSampleBlock(); +} + +Block InterpreterSelectWithUnionQuery::getSampleBlock( + const ASTPtr & query_ptr, + const Context & context) +{ + if (query_ptr->children.empty()) + throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + + return InterpreterSelectQuery::getSampleBlock(query_ptr->children.front(), context); +} + + +BlockInputStreams InterpreterSelectWithUnionQuery::executeWithMultipleStreams() +{ + BlockInputStreams nested_streams; + + for (auto & interpreter : nested_interpreters) + { + BlockInputStreams streams = interpreter->executeWithMultipleStreams(); + nested_streams.insert(nested_streams.end(), streams.begin(), streams.end()); + } + + return nested_streams; +} + + +BlockIO InterpreterSelectWithUnionQuery::execute() +{ + BlockInputStreams nested_streams = executeWithMultipleStreams(); + BlockInputStreamPtr result_stream; + + if (nested_streams.empty()) + { + result_stream = std::make_shared(); + } + else if (nested_streams.size() == 1) + { + result_stream = nested_streams.front(); + } + else + { + const Settings & settings = context.getSettingsRef(); + + result_stream = std::make_shared>(nested_streams, nullptr /* TODO stream_with_non_joined_data */, settings.max_threads); + nested_streams.clear(); + } + + /// Constraints on the result, the quota on the result, and also callback for progress. + if (IProfilingBlockInputStream * stream = dynamic_cast(result_stream.get())) + { + /// Constraints apply only to the final result. + if (to_stage == QueryProcessingStage::Complete) + { + const Settings & settings = context.getSettingsRef(); + + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_CURRENT; + limits.max_rows_to_read = settings.limits.max_result_rows; + limits.max_bytes_to_read = settings.limits.max_result_bytes; + limits.read_overflow_mode = settings.limits.result_overflow_mode; + + stream->setLimits(limits); + stream->setQuota(context.getQuota()); + } + } + + BlockIO res; + res.in = result_stream; + res.in_sample = getSampleBlock(); + + return res; +} + +} diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h new file mode 100644 index 00000000000..cce77240517 --- /dev/null +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -0,0 +1,54 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class InterpreterSelectQuery; + + +/** Interprets one or multiple SELECT queries inside UNION ALL chain. + */ +class InterpreterSelectWithUnionQuery : public IInterpreter +{ +public: + InterpreterSelectWithUnionQuery( + const ASTPtr & query_ptr_, + const Context & context_, + QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, + size_t subquery_depth_ = 0); + + InterpreterSelectWithUnionQuery( + const ASTPtr & query_ptr_, + const Context & context_, + const Names & required_column_names, + QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, + size_t subquery_depth_ = 0); + + ~InterpreterSelectWithUnionQuery(); + + BlockIO execute() override; + + /// Execute the query without union of streams. + BlockInputStreams executeWithMultipleStreams(); + + Block getSampleBlock(); + + static Block getSampleBlock( + const ASTPtr & query_ptr_, + const Context & context_); + +private: + ASTPtr query_ptr; + Context context; + QueryProcessingStage::Enum to_stage; + size_t subquery_depth; + + std::vector> nested_interpreters; +}; + +} diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index aec8d6345d9..08dbc8007dc 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -14,14 +14,12 @@ namespace DB namespace ErrorCodes { - extern const int UNION_ALL_COLUMN_ALIAS_MISMATCH; - extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH; extern const int LOGICAL_ERROR; extern const int THERE_IS_NO_COLUMN; } -ASTSelectQuery::ASTSelectQuery(const StringRange range_) : ASTQueryWithOutput(range_) +ASTSelectQuery::ASTSelectQuery(const StringRange range_) : IAST(range_) { } @@ -47,30 +45,6 @@ bool ASTSelectQuery::hasAsterisk() const return false; } -void ASTSelectQuery::renameColumns(const ASTSelectQuery & source) -{ - const ASTs & from = source.select_expression_list->children; - ASTs & to = select_expression_list->children; - - if (from.size() != to.size()) - throw Exception("Size mismatch in UNION ALL chain", - DB::ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); - - for (size_t i = 0; i < from.size(); ++i) - { - /// If the column has an alias, it must match the name of the original column. - /// Otherwise, we assign it an alias, if required. - if (!to[i]->tryGetAlias().empty()) - { - if (to[i]->tryGetAlias() != from[i]->getAliasOrColumnName()) - throw Exception("Column alias mismatch in UNION ALL chain", - DB::ErrorCodes::UNION_ALL_COLUMN_ALIAS_MISMATCH); - } - else if (to[i]->getColumnName() != from[i]->getAliasOrColumnName()) - to[i]->setAlias(from[i]->getAliasOrColumnName()); - } -} - void ASTSelectQuery::rewriteSelectExpressionList(const Names & required_column_names) { /// All columns are kept if we have DISTINCT. @@ -159,34 +133,6 @@ void ASTSelectQuery::rewriteSelectExpressionList(const Names & required_column_n } ASTPtr ASTSelectQuery::clone() const -{ - auto ptr = cloneImpl(true); - - /// Set pointers to previous SELECT queries. - ASTPtr current = ptr; - static_cast(current.get())->prev_union_all = nullptr; - ASTPtr next = static_cast(current.get())->next_union_all; - while (next != nullptr) - { - ASTSelectQuery * next_select_query = static_cast(next.get()); - next_select_query->prev_union_all = current.get(); - current = next; - next = next_select_query->next_union_all; - } - - cloneOutputOptions(*ptr); - - return ptr; -} - -std::shared_ptr ASTSelectQuery::cloneFirstSelect() const -{ - auto res = cloneImpl(false); - res->prev_union_all = nullptr; - return res; -} - -std::shared_ptr ASTSelectQuery::cloneImpl(bool traverse_union_all) const { auto res = std::make_shared(*this); res->children.clear(); @@ -219,20 +165,10 @@ std::shared_ptr ASTSelectQuery::cloneImpl(bool traverse_union_al #undef CLONE - if (traverse_union_all) - { - if (next_union_all) - { - res->next_union_all = static_cast(&*next_union_all)->cloneImpl(true); - res->children.push_back(res->next_union_all); - } - } - else - res->next_union_all = nullptr; - return res; } + void ASTSelectQuery::formatQueryImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { frame.current_select = this; @@ -323,17 +259,6 @@ void ASTSelectQuery::formatQueryImpl(const FormatSettings & s, FormatState & sta s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "SETTINGS " << (s.hilite ? hilite_none : ""); settings->formatImpl(s, state, frame); } - - if (next_union_all) - { - s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "UNION ALL " << s.nl_or_ws << (s.hilite ? hilite_none : ""); - - // NOTE We can safely apply `static_cast` instead of `typeid_cast` because we know that in the `UNION ALL` chain - // there are only trees of type SELECT. - const ASTSelectQuery & next_ast = static_cast(*next_union_all); - - next_ast.formatImpl(s, state, frame); - } } diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index 5f4898d33d9..344f9e4f7a1 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include @@ -13,7 +12,7 @@ struct ASTTablesInSelectQueryElement; /** SELECT query */ -class ASTSelectQuery : public ASTQueryWithOutput +class ASTSelectQuery : public IAST { public: ASTSelectQuery() = default; @@ -28,23 +27,11 @@ public: /// Does the query contain an asterisk? bool hasAsterisk() const; - /// Rename the query columns to the same names as in the original query. - void renameColumns(const ASTSelectQuery & source); - - /// Rewrites select_expression_list to return only the required columns in the correct order. + /// Rewrites select_expression_list to return only the required columns in the correct order. TODO Wrong comment. void rewriteSelectExpressionList(const Names & required_column_names); - bool isUnionAllHead() const { return (prev_union_all == nullptr) && next_union_all != nullptr; } - ASTPtr clone() const override; - /// Get a deep copy of the first SELECT query tree. - std::shared_ptr cloneFirstSelect() const; - -private: - std::shared_ptr cloneImpl(bool traverse_union_all) const; - -public: bool distinct = false; ASTPtr with_expression_list; ASTPtr select_expression_list; @@ -73,15 +60,6 @@ public: void setDatabaseIfNeeded(const String & database_name); void replaceDatabaseAndTable(const String & database_name, const String & table_name); - /// A double-linked list of SELECT queries inside a UNION ALL query. - - /// The next SELECT query in the UNION ALL chain, if there is one - ASTPtr next_union_all; - /// Previous SELECT query in the UNION ALL chain (not inserted into children and not cloned) - /// The pointer is null for the following reasons: - /// 1. to prevent the occurrence of cyclic dependencies and, hence, memory leaks; - IAST * prev_union_all = nullptr; - protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 5e26f42593b..b1054917935 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -12,12 +12,11 @@ #include #include #include -#include #include #include #include -#include +#include #include #include @@ -108,7 +107,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { Pos begin = pos; ASTPtr select_node; - ParserSelectQuery select; + ParserSelectWithUnionQuery select; if (pos->type != TokenType::OpeningRoundBracket) return false; diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 6f3d3a4db7c..b772fff6976 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -203,7 +203,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserStorage storage_p; ParserIdentifier name_p; ParserColumnDeclarationList columns_p; - ParserSelectQuery select_p; + ParserSelectWithUnionQuery select_p; ASTPtr database; ASTPtr table; @@ -398,7 +398,6 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!s_as.ignore(pos, expected)) return false; - ParserSelectQuery select_p; if (!select_p.parse(pos, select, expected)) return false; } diff --git a/dbms/src/Parsers/ParserQueryWithOutput.cpp b/dbms/src/Parsers/ParserQueryWithOutput.cpp index f331fa6e30c..e7fdc390dd6 100644 --- a/dbms/src/Parsers/ParserQueryWithOutput.cpp +++ b/dbms/src/Parsers/ParserQueryWithOutput.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -18,7 +18,7 @@ namespace DB bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserShowTablesQuery show_tables_p; - ParserSelectQuery select_p; + ParserSelectWithUnionQuery select_p; ParserTablePropertiesQuery table_p; ParserDescribeTableQuery describe_table_p; ParserShowProcesslistQuery show_processlist_p; diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp index 99240a31f76..e56d6e11304 100644 --- a/dbms/src/Parsers/ParserSelectQuery.cpp +++ b/dbms/src/Parsers/ParserSelectQuery.cpp @@ -68,7 +68,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - /// FROM database.table or FROM table or FROM (subquery) or FROM tableFunction + /// FROM database.table or FROM table or FROM (subquery) or FROM tableFunction(...) if (s_from.ignore(pos, expected)) { if (!ParserTablesInSelectQuery().parse(pos, select_query->tables, expected)) @@ -173,16 +173,6 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - // UNION ALL select query - if (ParserKeyword("UNION ALL").ignore(pos, expected)) - { - ParserSelectQuery select_p; - if (!select_p.parse(pos, select_query->next_union_all, expected)) - return false; - auto next_select_query = static_cast(&*select_query->next_union_all); - next_select_query->prev_union_all = node.get(); - } - select_query->range = StringRange(begin, pos); if (select_query->with_expression_list) @@ -211,9 +201,6 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (select_query->settings) select_query->children.push_back(select_query->settings); - if (select_query->next_union_all) - select_query->children.push_back(select_query->next_union_all); - return true; } diff --git a/dbms/src/Parsers/ParserSelectQuery.h b/dbms/src/Parsers/ParserSelectQuery.h index 7a722fc673d..deac25df57d 100644 --- a/dbms/src/Parsers/ParserSelectQuery.h +++ b/dbms/src/Parsers/ParserSelectQuery.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp new file mode 100644 index 00000000000..b186bbfc460 --- /dev/null +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -0,0 +1,24 @@ +#include +#include +#include +#include + + +namespace DB +{ + +bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected) +{ + ASTPtr list_node; + + ParserList parser(std::make_unique(), std::make_unique("UNION ALL"), false); + if (!parser.parse(pos, end, list_node, max_parsed_pos, expected)) + return false; + + node = std::make_shared(list_node->range); + node->children = list_node->children; + + return true; +} + +} diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.h b/dbms/src/Parsers/ParserSelectWithUnionQuery.h new file mode 100644 index 00000000000..1996a28badb --- /dev/null +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ + + +class ParserSelectWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SELECT query, possibly with UNION"; } + bool parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected) override; +}; + +} diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index b191f598a0e..718d04202f0 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -62,8 +62,8 @@ namespace /// Creates a copy of query, changes database and table names. ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, const std::string & table) { - auto modified_query_ast = typeid_cast(*query).cloneFirstSelect(); - modified_query_ast->replaceDatabaseAndTable(database, table); + auto modified_query_ast = query->clone(); + typeid_cast(*modified_query_ast)->replaceDatabaseAndTable(database, table); return modified_query_ast; } diff --git a/dbms/src/Storages/StorageView.cpp b/dbms/src/Storages/StorageView.cpp index 3001bc29304..4e498ff1e0b 100644 --- a/dbms/src/Storages/StorageView.cpp +++ b/dbms/src/Storages/StorageView.cpp @@ -44,7 +44,7 @@ BlockInputStreams StorageView::read( const unsigned /*num_streams*/) { processed_stage = QueryProcessingStage::FetchColumns; - return InterpreterSelectQuery(inner_query->clone(), context, column_names).executeWithoutUnion(); + return InterpreterSelectQuery(inner_query->clone(), context, column_names).executeWithMultipleStreams(); } From b326a111d57d4889c119a0d2d6fecaf7d1255eb0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 04:30:03 +0300 Subject: [PATCH 098/209] Fixed bad code that may lead to race condition [#CLICKHOUSE-2] --- dbms/src/Common/Stopwatch.h | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index b3a1807941f..e48697019ce 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -2,7 +2,6 @@ #include #include -#include #include #ifdef __APPLE__ @@ -18,38 +17,26 @@ public: /** CLOCK_MONOTONIC works relatively efficient (~15 million calls/sec) and doesn't lead to syscall. * Pass CLOCK_MONOTONIC_COARSE, if you need better performance with acceptable cost of several milliseconds of inaccuracy. */ - Stopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) : clock_type(clock_type_) { restart(); } + Stopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) : clock_type(clock_type_) { start(); } - void start() { setStart(); is_running = true; } - void stop() { updateElapsed(); is_running = false; } - void restart() { elapsed_ns = 0; start(); } - UInt64 elapsed() const { updateElapsed(); return elapsed_ns; } - UInt64 elapsedMilliseconds() const { updateElapsed(); return elapsed_ns / 1000000UL; } - double elapsedSeconds() const { updateElapsed(); return static_cast(elapsed_ns) / 1000000000ULL; } + void start() { start_ns = nanoseconds(); is_running = true; } + void stop() { stop_ns = nanoseconds(); is_running = false; } + void restart() { start(); } + UInt64 elapsed() const { return is_running ? nanoseconds() - start_ns : stop_ns - start_ns; } + UInt64 elapsedMilliseconds() const { return elapsed() / 1000000UL; } + double elapsedSeconds() const { return static_cast(elapsed()) / 1000000000ULL; } private: - mutable UInt64 start_ns; - mutable UInt64 elapsed_ns; + UInt64 start_ns; + UInt64 stop_ns; clockid_t clock_type; bool is_running; - void setStart() + UInt64 nanoseconds() const { struct timespec ts; clock_gettime(clock_type, &ts); - start_ns = ts.tv_sec * 1000000000ULL + ts.tv_nsec; - } - - void updateElapsed() const - { - if (is_running) - { - struct timespec ts; - clock_gettime(clock_type, &ts); - UInt64 current_ns = ts.tv_sec * 1000000000ULL + ts.tv_nsec; - elapsed_ns += current_ns - start_ns; - start_ns = current_ns; - } + return ts.tv_sec * 1000000000ULL + ts.tv_nsec; } }; From c86fd3109781867b4fc27991aa81d04f43d310a2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 05:23:04 +0300 Subject: [PATCH 099/209] Addition to prev. revision [#CLICKHOUSE-2] --- dbms/src/Client/MultiplexedConnections.h | 1 - dbms/src/Common/CounterInFile.h | 1 - dbms/src/Common/tests/parallel_aggregation.cpp | 2 +- dbms/src/Interpreters/Context.cpp | 6 +++--- libs/libdaemon/src/BaseDaemon.cpp | 1 - 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/dbms/src/Client/MultiplexedConnections.h b/dbms/src/Client/MultiplexedConnections.h index 817a37b1d93..7955fb4d821 100644 --- a/dbms/src/Client/MultiplexedConnections.h +++ b/dbms/src/Client/MultiplexedConnections.h @@ -3,7 +3,6 @@ #include #include #include -#include #include namespace DB diff --git a/dbms/src/Common/CounterInFile.h b/dbms/src/Common/CounterInFile.h index 99320a1fbfd..2e7afaa79de 100644 --- a/dbms/src/Common/CounterInFile.h +++ b/dbms/src/Common/CounterInFile.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/Common/tests/parallel_aggregation.cpp b/dbms/src/Common/tests/parallel_aggregation.cpp index 1082a7cf564..8ed0fcedd70 100644 --- a/dbms/src/Common/tests/parallel_aggregation.cpp +++ b/dbms/src/Common/tests/parallel_aggregation.cpp @@ -167,7 +167,7 @@ void aggregate33(Map & local_map, Map & global_map, Mutex & mutex, Source::const if (inserted && local_map.size() == threshold) { - Poco::ScopedLock lock(mutex); + std::lock_guard lock(mutex); for (auto & value_type : local_map) global_map[value_type.first] += value_type.second; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 06993f4306b..d66265728d2 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -229,7 +229,7 @@ struct ContextShared Databases current_databases; { - Poco::ScopedLock lock(mutex); + std::lock_guard lock(mutex); current_databases = databases; } @@ -237,7 +237,7 @@ struct ContextShared database.second->shutdown(); { - Poco::ScopedLock lock(mutex); + std::lock_guard lock(mutex); databases.clear(); } } @@ -1428,7 +1428,7 @@ QueryLog & Context::getQueryLog() String partition_by = config.getString("query_log.partition_by", "toYYYYMM(event_date)"); size_t flush_interval_milliseconds = config.getUInt64( "query_log.flush_interval_milliseconds", DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS); - + String engine = "ENGINE = MergeTree PARTITION BY (" + partition_by + ") ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024"; system_logs->query_log = std::make_unique(*global_context, database, table, engine, flush_interval_milliseconds); diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index f7e04851e24..1629fee9113 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include From 722b6287ecde29b8c7f4f14827f1d5de533f54d5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 05:43:27 +0300 Subject: [PATCH 100/209] Miscellaneous [#CLICKHOUSE-2] --- dbms/src/Common/Stopwatch.h | 85 ++++++++++++++++--------- dbms/src/Common/tests/CMakeLists.txt | 3 + dbms/src/Common/tests/stopwatch.cpp | 40 ++++++++++++ dbms/src/Databases/DatabaseOrdinary.cpp | 8 +-- dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageMergeTree.h | 2 +- 6 files changed, 103 insertions(+), 37 deletions(-) create mode 100644 dbms/src/Common/tests/stopwatch.cpp diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index e48697019ce..f3c83436a1a 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -8,6 +8,18 @@ #include #endif + +namespace StopWatchDetail +{ + inline UInt64 nanoseconds(clockid_t clock_type) + { + struct timespec ts; + clock_gettime(clock_type, &ts); + return ts.tv_sec * 1000000000ULL + ts.tv_nsec; + } +} + + /** Differs from Poco::Stopwatch only by using 'clock_gettime' instead of 'gettimeofday', * returns nanoseconds instead of microseconds, and also by other minor differencies. */ @@ -32,50 +44,49 @@ private: clockid_t clock_type; bool is_running; - UInt64 nanoseconds() const - { - struct timespec ts; - clock_gettime(clock_type, &ts); - return ts.tv_sec * 1000000000ULL + ts.tv_nsec; - } + UInt64 nanoseconds() const { return StopWatchDetail::nanoseconds(clock_type); } }; -class StopwatchWithLock : public Stopwatch +class AtomicStopwatch { public: + AtomicStopwatch(clockid_t clock_type_ = CLOCK_MONOTONIC) : clock_type(clock_type_) { restart(); } + + void restart() { start_ns = nanoseconds(); } + UInt64 elapsed() const { return nanoseconds() - start_ns; } + UInt64 elapsedMilliseconds() const { return elapsed() / 1000000UL; } + double elapsedSeconds() const { return static_cast(elapsed()) / 1000000000ULL; } + /** If specified amount of time has passed and timer is not locked right now, then restarts timer and returns true. * Otherwise returns false. * This is done atomically. */ - bool lockTestAndRestart(double seconds) + bool compareAndRestart(double seconds) { - std::unique_lock lock(mutex, std::defer_lock); - if (!lock.try_lock()) - return false; + UInt64 threshold = seconds * 1000000000ULL; + UInt64 current_ns = nanoseconds(); + UInt64 current_start_ns = start_ns; - if (elapsedSeconds() >= seconds) + while (true) { - restart(); - return true; + if (current_ns < current_start_ns + threshold) + return false; + + if (start_ns.compare_exchange_weak(current_start_ns, current_ns)) + return true; } - else - return false; } struct Lock { - StopwatchWithLock * parent = nullptr; - std::unique_lock lock; + AtomicStopwatch * parent = nullptr; Lock() {} operator bool() const { return parent != nullptr; } - Lock(StopwatchWithLock * parent, std::unique_lock && lock) - : parent(parent), lock(std::move(lock)) - { - } + Lock(AtomicStopwatch * parent) : parent(parent) {} Lock(Lock &&) = default; @@ -92,21 +103,33 @@ public: * This is done atomically. * * Usage: - * if (auto lock = timer.lockTestAndRestartAfter(1)) + * if (auto lock = timer.compareAndRestartDeferred(1)) * /// do some work, that must be done in one thread and not more frequently than each second. */ - Lock lockTestAndRestartAfter(double seconds) + Lock compareAndRestartDeferred(double seconds) { - std::unique_lock lock(mutex, std::defer_lock); - if (!lock.try_lock()) - return {}; + UInt64 threshold = seconds * 1000000000ULL; + UInt64 current_ns = nanoseconds(); + UInt64 current_start_ns = start_ns; - if (elapsedSeconds() >= seconds) - return Lock(this, std::move(lock)); + while (true) + { + if ((current_start_ns & 0x8000000000000000ULL)) + return {}; - return {}; + if (current_ns < current_start_ns + threshold) + return {}; + + if (start_ns.compare_exchange_weak(current_start_ns, current_ns | 0x8000000000000000ULL)) + return Lock(this); + } } private: - std::mutex mutex; + std::atomic start_ns; + std::atomic lock {false}; + clockid_t clock_type; + + /// Most significant bit is a lock. When it is set, compareAndRestartDeferred method will return false. + UInt64 nanoseconds() const { return StopWatchDetail::nanoseconds(clock_type) & 0x7FFFFFFFFFFFFFFFULL; } }; diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index 6c835222990..f1605a13447 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -68,3 +68,6 @@ target_link_libraries (allocator clickhouse_common_io) add_executable (cow_columns cow_columns.cpp) target_link_libraries (cow_columns clickhouse_common_io) + +add_executable (stopwatch stopwatch.cpp) +target_link_libraries (stopwatch clickhouse_common_io) diff --git a/dbms/src/Common/tests/stopwatch.cpp b/dbms/src/Common/tests/stopwatch.cpp new file mode 100644 index 00000000000..acbd253b37f --- /dev/null +++ b/dbms/src/Common/tests/stopwatch.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include + + +int main(int, char **) +{ + static constexpr size_t num_threads = 10; + static constexpr size_t num_iterations = 3; + + std::vector threads(num_threads); + + AtomicStopwatch watch; + Stopwatch total_watch; + + for (size_t i = 0; i < num_threads; ++i) + { + threads[i] = std::thread([i, &watch, &total_watch] + { + size_t iteration = 0; + while (iteration < num_iterations) + { + if (auto lock = watch.compareAndRestartDeferred(1)) + { + std::cerr << "Thread " << i << ": begin iteration " << iteration << ", elapsed: " << total_watch.elapsedMilliseconds() << " ms.\n"; + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + std::cerr << "Thread " << i << ": end iteration " << iteration << ", elapsed: " << total_watch.elapsedMilliseconds() << " ms.\n"; + ++iteration; + } + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } + }); + } + + for (auto & thread : threads) + thread.join(); + + return 0; +} diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index fa29cbc6283..0021f47c4c7 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -149,7 +149,7 @@ void DatabaseOrdinary::loadTables( String data_path = context.getPath() + "data/" + escapeForFileName(name) + "/"; - StopwatchWithLock watch; + AtomicStopwatch watch; std::atomic tables_processed {0}; auto task_function = [&](FileNames::const_iterator begin, FileNames::const_iterator end) @@ -160,7 +160,7 @@ void DatabaseOrdinary::loadTables( /// Messages, so that it's not boring to wait for the server to load for a long time. if ((++tables_processed) % PRINT_MESSAGE_EACH_N_TABLES == 0 - || watch.lockTestAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) + || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) { LOG_INFO(log, std::fixed << std::setprecision(2) << tables_processed * 100.0 / total_tables << "%"); watch.restart(); @@ -200,7 +200,7 @@ void DatabaseOrdinary::startupTables(ThreadPool * thread_pool) { LOG_INFO(log, "Starting up tables."); - StopwatchWithLock watch; + AtomicStopwatch watch; std::atomic tables_processed {0}; size_t total_tables = tables.size(); @@ -209,7 +209,7 @@ void DatabaseOrdinary::startupTables(ThreadPool * thread_pool) for (auto it = begin; it != end; ++it) { if ((++tables_processed) % PRINT_MESSAGE_EACH_N_TABLES == 0 - || watch.lockTestAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) + || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) { LOG_INFO(log, std::fixed << std::setprecision(2) << tables_processed * 100.0 / total_tables << "%"); watch.restart(); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index f75810cd930..29d0ba0ba08 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -293,7 +293,7 @@ bool StorageMergeTree::merge( String * out_disable_reason) { /// Clear old parts. It does not matter to do it more frequently than each second. - if (auto lock = time_after_previous_cleanup.lockTestAndRestartAfter(1)) + if (auto lock = time_after_previous_cleanup.compareAndRestartDeferred(1)) { data.clearOldPartsFromFilesystem(); data.clearOldTemporaryDirectories(); diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index e4e880941ff..e1121ec84d0 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -100,7 +100,7 @@ private: SimpleIncrement increment{0}; /// For clearOldParts, clearOldTemporaryDirectories. - StopwatchWithLock time_after_previous_cleanup; + AtomicStopwatch time_after_previous_cleanup; MergeTreeData::DataParts currently_merging; std::mutex currently_merging_mutex; From f3f2b7467622aa1dfeae4f4fa586a23a893d0501 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 05:45:36 +0300 Subject: [PATCH 101/209] Miscellaneous [#CLICKHOUSE-2] --- dbms/src/Common/Stopwatch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index f3c83436a1a..a697534fa47 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -58,7 +58,7 @@ public: UInt64 elapsedMilliseconds() const { return elapsed() / 1000000UL; } double elapsedSeconds() const { return static_cast(elapsed()) / 1000000000ULL; } - /** If specified amount of time has passed and timer is not locked right now, then restarts timer and returns true. + /** If specified amount of time has passed, then restarts timer and returns true. * Otherwise returns false. * This is done atomically. */ From 84caaf9bab0a7e4247790e179d62791431379a61 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sun, 25 Feb 2018 12:01:13 +0800 Subject: [PATCH 102/209] add doc with toStartOfDay() --- docs/en/functions/date_time_functions.md | 4 ++++ docs/ru/functions/date_time_functions.md | 3 +++ 2 files changed, 7 insertions(+) diff --git a/docs/en/functions/date_time_functions.md b/docs/en/functions/date_time_functions.md index 162b09b8c16..da6ad9b4a7c 100644 --- a/docs/en/functions/date_time_functions.md +++ b/docs/en/functions/date_time_functions.md @@ -85,6 +85,10 @@ Note: If you need to round a date with time to any other number of seconds, minu Rounds down a date with time to the start of the hour. +## toStartOfDay + +Rounds down a date with time to the start of the day. + ## toTime Converts a date with time to a certain fixed date, while preserving the time. diff --git a/docs/ru/functions/date_time_functions.md b/docs/ru/functions/date_time_functions.md index b552255b7fd..c9e0c3eff28 100644 --- a/docs/ru/functions/date_time_functions.md +++ b/docs/ru/functions/date_time_functions.md @@ -74,6 +74,9 @@ SELECT ## toStartOfHour Округляет дату-с-временем вниз до начала часа. +## toStartOfDay +Округляет дату-с-временем вниз до начала дня. + ## toTime Переводит дату-с-временем на некоторую фиксированную дату, сохраняя при этом время. From 9219b3d600eae5fcdf99271bd9897ebab8decceb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 08:01:12 +0300 Subject: [PATCH 103/209] Fixed build [#CLICKHOUSE-2] --- dbms/src/Common/Stopwatch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index a697534fa47..dc3e7e12481 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #ifdef __APPLE__ From 9d3d0410d79349d3c286caaca1938b64900023be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 09:08:29 +0300 Subject: [PATCH 104/209] Removed useless code #1947 --- dbms/src/Parsers/ParserKillQueryQuery.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/dbms/src/Parsers/ParserKillQueryQuery.cpp b/dbms/src/Parsers/ParserKillQueryQuery.cpp index 7999e211fe5..bbc4eb22ead 100644 --- a/dbms/src/Parsers/ParserKillQueryQuery.cpp +++ b/dbms/src/Parsers/ParserKillQueryQuery.cpp @@ -1,16 +1,9 @@ #include #include -#include -#include #include -#include -#include -#include -#include #include -#include namespace DB { From 8c1e344b784426383259a7038940bed3998ee647 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 09:08:29 +0300 Subject: [PATCH 105/209] Removed useless code #1947 --- dbms/src/Parsers/ParserKillQueryQuery.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/dbms/src/Parsers/ParserKillQueryQuery.cpp b/dbms/src/Parsers/ParserKillQueryQuery.cpp index 7999e211fe5..bbc4eb22ead 100644 --- a/dbms/src/Parsers/ParserKillQueryQuery.cpp +++ b/dbms/src/Parsers/ParserKillQueryQuery.cpp @@ -1,16 +1,9 @@ #include #include -#include -#include #include -#include -#include -#include -#include #include -#include namespace DB { From 6e9363527503560e0d6344d1ef5801b03fd15227 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 09:34:20 +0300 Subject: [PATCH 106/209] Better UNION ALL: development #1947 --- dbms/src/Analyzers/AnalyzeColumns.cpp | 2 +- dbms/src/Interpreters/InterpreterFactory.cpp | 6 + .../Interpreters/InterpreterInsertQuery.cpp | 6 +- .../Interpreters/InterpreterSelectQuery.cpp | 224 +++--------------- .../src/Interpreters/InterpreterSelectQuery.h | 28 +-- .../InterpreterSelectWithUnionQuery.cpp | 34 +-- .../InterpreterSelectWithUnionQuery.h | 2 + dbms/src/Interpreters/executeQuery.cpp | 14 ++ dbms/src/Parsers/ASTCreateQuery.h | 4 +- dbms/src/Parsers/ASTQueryWithOutput.h | 10 +- dbms/src/Parsers/ASTSelectQuery.cpp | 2 +- dbms/src/Parsers/ASTSelectQuery.h | 2 +- dbms/src/Parsers/ParserInsertQuery.cpp | 6 +- .../Parsers/ParserSelectWithUnionQuery.cpp | 5 +- dbms/src/Parsers/ParserSelectWithUnionQuery.h | 2 +- dbms/src/Server/Client.cpp | 4 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageMaterializedView.cpp | 9 +- dbms/src/Storages/StorageMergeTree.cpp | 1 - .../Storages/StorageReplicatedMergeTree.cpp | 1 - dbms/src/Storages/StorageView.cpp | 5 +- 21 files changed, 101 insertions(+), 268 deletions(-) diff --git a/dbms/src/Analyzers/AnalyzeColumns.cpp b/dbms/src/Analyzers/AnalyzeColumns.cpp index 1e0883f2d3a..98205ee5777 100644 --- a/dbms/src/Analyzers/AnalyzeColumns.cpp +++ b/dbms/src/Analyzers/AnalyzeColumns.cpp @@ -385,7 +385,7 @@ void AnalyzeColumns::process(ASTPtr & ast, const CollectAliases & aliases, const for (auto & child : ast->children) { - if (select && (child.get() == select->format.get() || child.get() == select->settings.get())) + if (select && child.get() == select->settings.get()) continue; processImpl(child, columns, aliases, tables); diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index d13c509053c..d212d1c63d1 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +60,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & { return std::make_unique(query, context, stage); } + else if (typeid_cast(query.get())) + { + return std::make_unique(query, context, stage); + } else if (typeid_cast(query.get())) { /// readonly is checked inside InterpreterInsertQuery diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 259948b6c31..8892a766356 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -12,10 +12,10 @@ #include #include -#include +#include #include -#include +#include #include #include @@ -120,7 +120,7 @@ BlockIO InterpreterInsertQuery::execute() if (query.select) { /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - InterpreterSelectQuery interpreter_select{query.select, context, QueryProcessingStage::Complete, 1}; + InterpreterSelectWithUnionQuery interpreter_select{query.select, context, QueryProcessingStage::Complete, 1}; res.in = interpreter_select.execute().in; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index fbe021d1bd8..1f0f7178080 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -59,7 +60,6 @@ namespace ErrorCodes { extern const int TOO_DEEP_SUBQUERIES; extern const int THERE_IS_NO_COLUMN; - extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH; extern const int SAMPLING_NOT_SUPPORTED; extern const int ILLEGAL_FINAL; extern const int ILLEGAL_PREWHERE; @@ -71,7 +71,7 @@ namespace ErrorCodes InterpreterSelectQuery::~InterpreterSelectQuery() = default; -void InterpreterSelectQuery::init(const Names & required_column_names) +void InterpreterSelectQuery::init() { ProfileEvents::increment(ProfileEvents::SelectQuery); @@ -84,63 +84,6 @@ void InterpreterSelectQuery::init(const Names & required_column_names) max_streams = settings.max_threads; - if (is_first_select_inside_union_all) - { - /// Create a SELECT query chain. - InterpreterSelectQuery * interpreter = this; - ASTPtr tail = query.next_union_all; - - while (tail) - { - ASTPtr head = tail; - - ASTSelectQuery & head_query = static_cast(*head); - tail = head_query.next_union_all; - - interpreter->next_select_in_union_all = std::make_unique(head, context, to_stage, subquery_depth); - interpreter = interpreter->next_select_in_union_all.get(); - } - } - - if (is_first_select_inside_union_all && hasAsterisk()) - { - basicInit(); - - // We execute this code here, because otherwise the following kind of query would not work - // SELECT X FROM (SELECT * FROM (SELECT 1 AS X, 2 AS Y) UNION ALL SELECT 3, 4) - // because the asterisk is replaced with columns only when query_analyzer objects are created in basicInit(). - renameColumns(); - - if (!required_column_names.empty() && (source_header.columns() != required_column_names.size())) - { - rewriteExpressionList(required_column_names); - /// Now there is obsolete information to execute the query. We update this information. - initQueryAnalyzer(); - } - } - else - { - renameColumns(); - if (!required_column_names.empty()) - { - rewriteExpressionList(required_column_names); - - if (is_first_select_inside_union_all) - { - for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - p->query_analyzer = std::make_unique( - p->query_ptr, p->context, p->storage, p->source_header.getNamesAndTypesList(), p->subquery_depth, - false, p->query_analyzer->getSubqueriesForSets()); - } - } - - basicInit(); - } -} - - -void InterpreterSelectQuery::basicInit() -{ /// Read from prepared input. if (input) { @@ -204,13 +147,6 @@ void InterpreterSelectQuery::basicInit() context.addExternalTable(it.first, it.second); } -void InterpreterSelectQuery::initQueryAnalyzer() -{ - query_analyzer = std::make_unique(query_ptr, context, storage, source_header.getNamesAndTypesList(), subquery_depth, !only_analyze); - - for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - p->query_analyzer = std::make_unique(p->query_ptr, p->context, p->storage, p->source_header.getNamesAndTypesList(), p->subquery_depth, !only_analyze); -} InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, @@ -225,7 +161,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, - const Names & required_column_names_, + const Names & /*required_column_names_*/, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, const BlockInputStreamPtr & input) @@ -234,11 +170,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( , context(context_) , to_stage(to_stage_) , subquery_depth(subquery_depth_) - , is_first_select_inside_union_all(query.isUnionAllHead()) , input(input) , log(&Logger::get("InterpreterSelectQuery")) { - init(required_column_names_); + init(); } @@ -248,50 +183,13 @@ InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & qu , context(context_) , to_stage(QueryProcessingStage::Complete) , subquery_depth(0) - , is_first_select_inside_union_all(false), only_analyze(true) + , only_analyze(true) , log(&Logger::get("InterpreterSelectQuery")) { - init({}); + init(); } -bool InterpreterSelectQuery::hasAsterisk() const -{ - if (query.hasAsterisk()) - return true; - - if (is_first_select_inside_union_all) - for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - if (p->query.hasAsterisk()) - return true; - - return false; -} - -void InterpreterSelectQuery::renameColumns() -{ - if (is_first_select_inside_union_all) - for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - p->query.renameColumns(query); -} - -void InterpreterSelectQuery::rewriteExpressionList(const Names & required_column_names) -{ - if (query.distinct) - return; - - if (is_first_select_inside_union_all) - for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - if (p->query.distinct) - return; - - query.rewriteSelectExpressionList(required_column_names); - - if (is_first_select_inside_union_all) - for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - p->query.rewriteSelectExpressionList(required_column_names); -} - void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, String & table_name) { auto query_database = query.database(); @@ -323,7 +221,7 @@ void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, St Block InterpreterSelectQuery::getSampleBlock() { Pipeline pipeline; - executeWithMultipleStreamsImpl(pipeline, std::make_shared(source_header)); + executeImpl(pipeline, std::make_shared(source_header)); auto res = pipeline.firstStream()->getHeader(); return res; } @@ -338,28 +236,9 @@ Block InterpreterSelectQuery::getSampleBlock(const ASTPtr & query_ptr_, const Co BlockIO InterpreterSelectQuery::execute() { Pipeline pipeline; - executeWithMultipleStreamsImpl(pipeline, input); + executeImpl(pipeline, input); executeUnion(pipeline); - /// Constraints on the result, the quota on the result, and also callback for progress. - if (IProfilingBlockInputStream * stream = dynamic_cast(pipeline.firstStream().get())) - { - /// Constraints apply only to the final result. - if (to_stage == QueryProcessingStage::Complete && subquery_depth == 0) - { - const Settings & settings = context.getSettingsRef(); - - IProfilingBlockInputStream::LocalLimits limits; - limits.mode = IProfilingBlockInputStream::LIMITS_CURRENT; - limits.max_rows_to_read = settings.limits.max_result_rows; - limits.max_bytes_to_read = settings.limits.max_result_bytes; - limits.read_overflow_mode = settings.limits.result_overflow_mode; - - stream->setLimits(limits); - stream->setQuota(context.getQuota()); - } - } - BlockIO res; res.in = pipeline.firstStream(); return res; @@ -368,34 +247,10 @@ BlockIO InterpreterSelectQuery::execute() BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams() { Pipeline pipeline; - executeWithMultipleStreamsImpl(pipeline, input); + executeImpl(pipeline, input); return pipeline.streams; } -void InterpreterSelectQuery::executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input) -{ - if (input) - pipeline.streams.push_back(input); - - if (is_first_select_inside_union_all) - { - executeSingleQuery(pipeline); - for (auto p = next_select_in_union_all.get(); p != nullptr; p = p->next_select_in_union_all.get()) - { - Pipeline other_pipeline; - p->executeSingleQuery(other_pipeline); - pipeline.streams.insert(pipeline.streams.end(), other_pipeline.streams.begin(), other_pipeline.streams.end()); - } - - pipeline.transform([&](auto & stream) - { - stream = std::make_shared(stream); - }); - } - else - executeSingleQuery(pipeline); -} - InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpressions(QueryProcessingStage::Enum from_stage) { @@ -477,8 +332,11 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression } -void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) +void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input) { + if (input) + pipeline.streams.push_back(input); + /** Streams of data. When the query is executed in parallel, we have several data streams. * If there is no GROUP BY, then perform all operations before ORDER BY and LIMIT in parallel, then * if there is an ORDER BY, then glue the streams using UnionBlockInputStream, and then MergeSortingBlockInputStream, @@ -487,12 +345,8 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) * If there is GROUP BY, then we will perform all operations up to GROUP BY, inclusive, in parallel; * a parallel GROUP BY will glue streams into one, * then perform the remaining operations with one resulting stream. - * If the query is a member of the UNION ALL chain and does not contain GROUP BY, ORDER BY, DISTINCT, or LIMIT, - * then the data sources are merged not at this level, but at the upper level. */ - union_within_single_query = false; - /** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */ QueryProcessingStage::Enum from_stage = executeFetchColumns(pipeline); @@ -564,7 +418,8 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) if (expressions.second_stage) { - bool need_second_distinct_pass; + bool need_second_distinct_pass = false; + bool need_merge_streams = false; if (expressions.need_aggregate) { @@ -618,20 +473,22 @@ void InterpreterSelectQuery::executeSingleQuery(Pipeline & pipeline) * limiting the number of rows in each up to `offset + limit`. */ if (query.limit_length && pipeline.hasMoreThanOneStream() && !query.distinct && !query.limit_by_expression_list) + { executePreLimit(pipeline); + } - if (pipeline.stream_with_non_joined_data || need_second_distinct_pass) - union_within_single_query = true; + if (need_second_distinct_pass + || query.limit_length + || query.limit_by_expression_list + || pipeline.stream_with_non_joined_data) + { + need_merge_streams = true; + } - /// To execute LIMIT BY we should merge all streams together. - if (query.limit_by_expression_list && pipeline.hasMoreThanOneStream()) - union_within_single_query = true; - - if (union_within_single_query) + if (need_merge_streams) + { executeUnion(pipeline); - if (pipeline.streams.size() == 1) - { /** If there was more than one stream, * then DISTINCT needs to be performed once again after merging all streams. */ @@ -664,7 +521,7 @@ static void getLimitLengthAndOffset(ASTSelectQuery & query, size_t & length, siz QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline & pipeline) { /// The subquery interpreter, if the subquery - std::optional interpreter_subquery; + std::optional interpreter_subquery; /// List of columns to read to execute the query. Names required_columns = query_analyzer->getRequiredColumns(); @@ -741,8 +598,6 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline size_t limit_offset = 0; getLimitLengthAndOffset(query, limit_length, limit_offset); - size_t max_block_size = settings.max_block_size; - /** With distributed query processing, almost no computations are done in the threads, * but wait and receive data from remote servers. * If we have 20 remote servers, and max_threads = 8, then it would not be very good @@ -757,6 +612,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline max_streams = settings.max_distributed_connections; } + size_t max_block_size = settings.max_block_size; + /** Optimization - if not specified DISTINCT, WHERE, GROUP, HAVING, ORDER, LIMIT BY but LIMIT is specified, and limit + offset < max_block_size, * then as the block size we will use limit + offset (not to read more from the table than requested), * and also set the number of threads to 1. @@ -770,7 +627,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline && !query.limit_by_expression_list && query.limit_length && !query_analyzer->hasAggregation() - && limit_length + limit_offset < settings.max_block_size) + && limit_length + limit_offset < max_block_size) { max_block_size = limit_length + limit_offset; max_streams = 1; @@ -788,7 +645,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline else if (interpreter_subquery) { /// Subquery. - interpreter_subquery->executeWithMultipleStreamsImpl(pipeline, {}); + pipeline.streams = interpreter_subquery->executeWithMultipleStreams(); } else if (storage) { @@ -1158,9 +1015,6 @@ void InterpreterSelectQuery::executeDistinct(Pipeline & pipeline, bool before_or else stream = std::make_shared(stream, settings.limits, limit_for_distinct, columns); }); - - if (pipeline.hasMoreThanOneStream()) - union_within_single_query = true; } } @@ -1173,13 +1027,11 @@ void InterpreterSelectQuery::executeUnion(Pipeline & pipeline) pipeline.firstStream() = std::make_shared>(pipeline.streams, pipeline.stream_with_non_joined_data, max_streams); pipeline.stream_with_non_joined_data = nullptr; pipeline.streams.resize(1); - union_within_single_query = false; } else if (pipeline.stream_with_non_joined_data) { pipeline.streams.push_back(pipeline.stream_with_non_joined_data); pipeline.stream_with_non_joined_data = nullptr; - union_within_single_query = false; } } @@ -1198,9 +1050,6 @@ void InterpreterSelectQuery::executePreLimit(Pipeline & pipeline) { stream = std::make_shared(stream, limit_length + limit_offset, 0, false); }); - - if (pipeline.hasMoreThanOneStream()) - union_within_single_query = true; } } @@ -1211,18 +1060,14 @@ void InterpreterSelectQuery::executeLimitBy(Pipeline & pipeline) return; Names columns; - size_t value = safeGet(typeid_cast(*query.limit_by_value).value); - for (const auto & elem : query.limit_by_expression_list->children) - { columns.emplace_back(elem->getAliasOrColumnName()); - } + + size_t value = safeGet(typeid_cast(*query.limit_by_value).value); pipeline.transform([&](auto & stream) { - stream = std::make_shared( - stream, value, columns - ); + stream = std::make_shared(stream, value, columns); }); } @@ -1294,7 +1139,6 @@ void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline } -/// TODO This is trash. void InterpreterSelectQuery::ignoreWithTotals() { query.group_by_with_totals = false; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 202c1b6b8fe..77b3d3c4059 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -76,6 +76,8 @@ public: const ASTPtr & query_ptr_, const Context & context_); + void ignoreWithTotals(); + private: struct Pipeline { @@ -116,11 +118,9 @@ private: const ASTPtr & query_ptr_, const Context & context_); - void init(const Names & required_column_names); - void basicInit(); - void initQueryAnalyzer(); + void init(); - void executeSingleQuery(Pipeline & pipeline); + void executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input); struct AnalysisResult @@ -152,24 +152,6 @@ private: AnalysisResult analyzeExpressions(QueryProcessingStage::Enum from_stage); - /** Leave only the necessary columns of the SELECT section in each query of the UNION ALL chain. - * However, if you use at least one DISTINCT in the chain, then all the columns are considered necessary, - * since otherwise DISTINCT would work differently. - * - * Always leave arrayJoin, because it changes number of rows. - * - * TODO If query doesn't have GROUP BY, but have aggregate functions, - * then leave at least one aggregate function, - * In order that fact of aggregation has not been lost. - */ - void rewriteExpressionList(const Names & required_column_names); - - /// Does the request contain at least one asterisk? - bool hasAsterisk() const; - - // Rename the columns of each query for the UNION ALL chain into the same names as in the first query. - void renameColumns(); - /** From which table to read. With JOIN, the "left" table is returned. */ void getDatabaseAndTableNames(String & database_name, String & table_name); @@ -196,8 +178,6 @@ private: void executeDistinct(Pipeline & pipeline, bool before_order, Names columns); void executeSubqueriesInSetsAndJoins(Pipeline & pipeline, std::unordered_map & subqueries_for_sets); - void ignoreWithTotals(); - /** If there is a SETTINGS section in the SELECT query, then apply settings from it. * * Section SETTINGS - settings for a specific query. diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index adc84cc7311..39d1b13e06d 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -97,44 +97,30 @@ BlockIO InterpreterSelectWithUnionQuery::execute() if (nested_streams.empty()) { - result_stream = std::make_shared(); + result_stream = std::make_shared(getSampleBlock()); } else if (nested_streams.size() == 1) { result_stream = nested_streams.front(); + nested_streams.clear(); } else { const Settings & settings = context.getSettingsRef(); - - result_stream = std::make_shared>(nested_streams, nullptr /* TODO stream_with_non_joined_data */, settings.max_threads); + result_stream = std::make_shared>(nested_streams, nullptr, settings.max_threads); nested_streams.clear(); } - /// Constraints on the result, the quota on the result, and also callback for progress. - if (IProfilingBlockInputStream * stream = dynamic_cast(result_stream.get())) - { - /// Constraints apply only to the final result. - if (to_stage == QueryProcessingStage::Complete) - { - const Settings & settings = context.getSettingsRef(); - - IProfilingBlockInputStream::LocalLimits limits; - limits.mode = IProfilingBlockInputStream::LIMITS_CURRENT; - limits.max_rows_to_read = settings.limits.max_result_rows; - limits.max_bytes_to_read = settings.limits.max_result_bytes; - limits.read_overflow_mode = settings.limits.result_overflow_mode; - - stream->setLimits(limits); - stream->setQuota(context.getQuota()); - } - } - BlockIO res; res.in = result_stream; - res.in_sample = getSampleBlock(); - return res; } + +void InterpreterSelectWithUnionQuery::ignoreWithTotals() +{ + for (auto & interpreter : nested_interpreters) + interpreter->ignoreWithTotals(); +} + } diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h index cce77240517..71a606e1bd4 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -42,6 +42,8 @@ public: const ASTPtr & query_ptr_, const Context & context_); + void ignoreWithTotals(); + private: ASTPtr query_ptr; Context context; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index eb31be2a80f..10e20e47cbc 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -223,6 +223,20 @@ static std::tuple executeQueryImpl( { stream->setProgressCallback(context.getProgressCallback()); stream->setProcessListElement(context.getProcessListElement()); + + /// Limits on the result, the quota on the result, and also callback for progress. + /// Limits apply only to the final result. + if (stage == QueryProcessingStage::Complete) + { + IProfilingBlockInputStream::LocalLimits limits; + limits.mode = IProfilingBlockInputStream::LIMITS_CURRENT; + limits.max_rows_to_read = settings.limits.max_result_rows; + limits.max_bytes_to_read = settings.limits.max_result_bytes; + limits.read_overflow_mode = settings.limits.result_overflow_mode; + + stream->setLimits(limits); + stream->setQuota(quota); + } } } diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 5f1c8c66891..fb96a927d4e 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -93,7 +93,7 @@ public: ASTStorage * storage = nullptr; String as_database; String as_table; - ASTSelectQuery * select = nullptr; + ASTSelectWithUnionQuery * select = nullptr; ASTCreateQuery() = default; ASTCreateQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} diff --git a/dbms/src/Parsers/ASTQueryWithOutput.h b/dbms/src/Parsers/ASTQueryWithOutput.h index 40ac02380e5..5f2c43a50ba 100644 --- a/dbms/src/Parsers/ASTQueryWithOutput.h +++ b/dbms/src/Parsers/ASTQueryWithOutput.h @@ -31,17 +31,19 @@ protected: }; -template +/** Helper template for simple queries like SHOW PROCESSLIST. + */ +template class ASTQueryWithOutputImpl : public ASTQueryWithOutput { public: explicit ASTQueryWithOutputImpl() = default; explicit ASTQueryWithOutputImpl(StringRange range_) : ASTQueryWithOutput(range_) {} - String getID() const override { return AstIDAndQueryNames::ID; }; + String getID() const override { return ASTIDAndQueryNames::ID; }; ASTPtr clone() const override { - auto res = std::make_shared>(*this); + auto res = std::make_shared>(*this); res->children.clear(); cloneOutputOptions(*res); return res; @@ -51,7 +53,7 @@ protected: void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override { settings.ostr << (settings.hilite ? hilite_keyword : "") - << AstIDAndQueryNames::Query << (settings.hilite ? hilite_none : ""); + << ASTIDAndQueryNames::Query << (settings.hilite ? hilite_none : ""); } }; diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 08dbc8007dc..9629481bc9a 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -169,7 +169,7 @@ ASTPtr ASTSelectQuery::clone() const } -void ASTSelectQuery::formatQueryImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { frame.current_select = this; frame.need_parens = false; diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index 344f9e4f7a1..abde8212ebb 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -61,7 +61,7 @@ public: void replaceDatabaseAndTable(const String & database_name, const String & table_name); protected: - void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; } diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp index 2fe8ca46f68..59bd7d05e6b 100644 --- a/dbms/src/Parsers/ParserInsertQuery.cpp +++ b/dbms/src/Parsers/ParserInsertQuery.cpp @@ -1,12 +1,12 @@ #include #include -#include +#include #include #include #include #include -#include +#include #include #include @@ -122,7 +122,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected)) { pos = before_select; - ParserSelectQuery select_p; + ParserSelectWithUnionQuery select_p; select_p.parse(pos, select, expected); } else diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp index b186bbfc460..e243738e45e 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -7,17 +7,16 @@ namespace DB { -bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected) +bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list_node; ParserList parser(std::make_unique(), std::make_unique("UNION ALL"), false); - if (!parser.parse(pos, end, list_node, max_parsed_pos, expected)) + if (!parser.parse(pos, list_node, expected)) return false; node = std::make_shared(list_node->range); node->children = list_node->children; - return true; } diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.h b/dbms/src/Parsers/ParserSelectWithUnionQuery.h index 1996a28badb..33857fe33cb 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.h +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.h @@ -11,7 +11,7 @@ class ParserSelectWithUnionQuery : public IParserBase { protected: const char * getName() const override { return "SELECT query, possibly with UNION"; } - bool parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected) override; + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; } diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 238be504aac..5b38c110bfb 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include @@ -716,7 +716,7 @@ private: /// Convert external tables to ExternalTableData and send them using the connection. void sendExternalTables() { - const ASTSelectQuery * select = typeid_cast(&*parsed_query); + auto * select = typeid_cast(&*parsed_query); if (!select && !external_tables.empty()) throw Exception("External tables could be sent only with select query", ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 718d04202f0..894f6f2dd44 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -63,7 +63,7 @@ namespace ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, const std::string & table) { auto modified_query_ast = query->clone(); - typeid_cast(*modified_query_ast)->replaceDatabaseAndTable(database, table); + typeid_cast(*modified_query_ast).replaceDatabaseAndTable(database, table); return modified_query_ast; } diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index 820ccdb969e..6a78f99ee56 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -76,7 +76,12 @@ StorageMaterializedView::StorageMaterializedView( "You must specify where to save results of a MaterializedView query: either ENGINE or an existing table in a TO clause", ErrorCodes::INCORRECT_QUERY); - extractDependentTable(*query.select, select_database_name, select_table_name); + if (query.select->children.size() != 1) + throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::INCORRECT_QUERY); + + inner_query = query.select->children.at(0); + + extractDependentTable(typeid_cast(*inner_query), select_database_name, select_table_name); if (!select_table_name.empty()) global_context.addDependency( @@ -96,8 +101,6 @@ StorageMaterializedView::StorageMaterializedView( has_inner_table = true; } - inner_query = query.select->ptr(); - /// If there is an ATTACH request, then the internal table must already be connected. if (!attach_ && has_inner_table) { diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 29d0ba0ba08..09b5bc58094 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index b08cce6db49..1b692e0397a 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include diff --git a/dbms/src/Storages/StorageView.cpp b/dbms/src/Storages/StorageView.cpp index 4e498ff1e0b..26583eac9b6 100644 --- a/dbms/src/Storages/StorageView.cpp +++ b/dbms/src/Storages/StorageView.cpp @@ -1,7 +1,6 @@ -#include +#include #include #include -#include #include #include @@ -44,7 +43,7 @@ BlockInputStreams StorageView::read( const unsigned /*num_streams*/) { processed_stage = QueryProcessingStage::FetchColumns; - return InterpreterSelectQuery(inner_query->clone(), context, column_names).executeWithMultipleStreams(); + return InterpreterSelectWithUnionQuery(inner_query->clone(), context, column_names).executeWithMultipleStreams(); } From 935a9cbc4b4fb603543ac3ef5dafb965da3c38ae Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sun, 25 Feb 2018 15:04:18 +0800 Subject: [PATCH 107/209] Create issue & pull request template in github --- .github/ISSUE_TEMPLATE.md | 5 +++++ .github/PULL_REQUEST_TEMPLATE.md | 1 + 2 files changed, 6 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 00000000000..3ec7ec9efa6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,5 @@ +### Expected behavior + +### Actual behavior + +### Steps to reproduce the behavior diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000000..98c94cb52f2 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1 @@ +- [ ] I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en From 88b01e867d8a23986254328e084c6d527855f020 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Feb 2018 10:39:45 +0300 Subject: [PATCH 108/209] Better UNION ALL: development #1947 --- .../InterpreterSelectWithUnionQuery.cpp | 20 +++++++++++++------ .../Parsers/ParserSelectWithUnionQuery.cpp | 8 ++++++-- dbms/src/Storages/StorageMaterializedView.cpp | 4 ++-- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 39d1b13e06d..27397459276 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -24,13 +25,15 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( to_stage(to_stage_), subquery_depth(subquery_depth_) { - size_t num_selects = query_ptr->children.size(); + const ASTSelectWithUnionQuery & ast = typeid_cast(*query_ptr); + + size_t num_selects = ast.list_of_selects->children.size(); nested_interpreters.reserve(num_selects); if (!num_selects) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); - for (const auto & select : query_ptr->children) + for (const auto & select : ast.list_of_selects->children) nested_interpreters.emplace_back(std::make_unique(select, context, to_stage, subquery_depth)); } @@ -46,13 +49,15 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( to_stage(to_stage_), subquery_depth(subquery_depth_) { - size_t num_selects = query_ptr->children.size(); + const ASTSelectWithUnionQuery & ast = typeid_cast(*query_ptr); + + size_t num_selects = ast.list_of_selects->children.size(); nested_interpreters.reserve(num_selects); if (!num_selects) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); - for (const auto & select : query_ptr->children) + for (const auto & select : ast.list_of_selects->children) nested_interpreters.emplace_back(std::make_unique(select, context, required_column_names, to_stage, subquery_depth)); } @@ -69,10 +74,13 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock( const ASTPtr & query_ptr, const Context & context) { - if (query_ptr->children.empty()) + const ASTSelectWithUnionQuery & ast = typeid_cast(*query_ptr); + + size_t num_selects = ast.list_of_selects->children.size(); + if (!num_selects) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); - return InterpreterSelectQuery::getSampleBlock(query_ptr->children.front(), context); + return InterpreterSelectQuery::getSampleBlock(ast.list_of_selects->children.front(), context); } diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp index e243738e45e..66d39e23196 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -15,8 +15,12 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (!parser.parse(pos, list_node, expected)) return false; - node = std::make_shared(list_node->range); - node->children = list_node->children; + auto res = std::make_shared(list_node->range); + + res->list_of_selects = std::move(list_node); + res->children.push_back(res->list_of_selects); + + node = res; return true; } diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index 6a78f99ee56..dfa3a140da4 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -76,10 +76,10 @@ StorageMaterializedView::StorageMaterializedView( "You must specify where to save results of a MaterializedView query: either ENGINE or an existing table in a TO clause", ErrorCodes::INCORRECT_QUERY); - if (query.select->children.size() != 1) + if (query.select->list_of_selects->children.size() != 1) throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::INCORRECT_QUERY); - inner_query = query.select->children.at(0); + inner_query = query.select->list_of_selects->children.at(0); extractDependentTable(typeid_cast(*inner_query), select_database_name, select_table_name); From c12d93a873294b8082c78582402ba2a92cc6c9b8 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 26 Feb 2018 03:19:42 +0300 Subject: [PATCH 109/209] Allow build without rt library (#1963) * Allow build without rt library * fix --- libs/libcommon/CMakeLists.txt | 5 ++++- libs/libcommon/src/tests/CMakeLists.txt | 4 ++-- libs/libmysqlxx/src/tests/CMakeLists.txt | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index ee6e6af543e..44b3e4767d0 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -111,10 +111,13 @@ target_link_libraries ( ${Boost_FILESYSTEM_LIBRARY} ${MALLOC_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} - ${RT_LIBRARY} ${GLIBC_COMPATIBILITY_LIBRARIES} ${MEMCPY_LIBRARIES}) +if (RT_LIBRARY) + target_link_libraries (common ${RT_LIBRARY}) +endif () + if (ENABLE_TESTS) add_subdirectory (src/tests) endif () diff --git a/libs/libcommon/src/tests/CMakeLists.txt b/libs/libcommon/src/tests/CMakeLists.txt index 7fd59a2a07d..56b9a927e76 100644 --- a/libs/libcommon/src/tests/CMakeLists.txt +++ b/libs/libcommon/src/tests/CMakeLists.txt @@ -14,8 +14,8 @@ target_link_libraries (date_lut2 common ${PLATFORM_LIBS}) target_link_libraries (date_lut3 common ${PLATFORM_LIBS}) target_link_libraries (date_lut4 common ${PLATFORM_LIBS}) target_link_libraries (date_lut_default_timezone common ${PLATFORM_LIBS}) -target_link_libraries (multi_version common ${Boost_FILESYSTEM_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${RT_LIBRARY}) +target_link_libraries (multi_version common) add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp) -target_link_libraries (unit_tests_libcommon gtest_main common ${Boost_FILESYSTEM_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${RT_LIBRARY}) +target_link_libraries (unit_tests_libcommon gtest_main common) add_check(unit_tests_libcommon) diff --git a/libs/libmysqlxx/src/tests/CMakeLists.txt b/libs/libmysqlxx/src/tests/CMakeLists.txt index d8853c4938d..3646a35c58f 100644 --- a/libs/libmysqlxx/src/tests/CMakeLists.txt +++ b/libs/libmysqlxx/src/tests/CMakeLists.txt @@ -2,6 +2,6 @@ add_executable (mysqlxx_test mysqlxx_test.cpp) add_executable (failover failover.cpp) -target_link_libraries (mysqlxx_test mysqlxx ${RT_LIBRARY}) +target_link_libraries (mysqlxx_test mysqlxx) target_link_libraries (failover mysqlxx ${Poco_Util_LIBRARY} ${Poco_Foundation_LIBRARY}) target_link_rt_by_force (failover) From c5dacee1be1302455475f32a06faeff3ee03936f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 03:22:29 +0300 Subject: [PATCH 110/209] Removed issue template (postponed) #1962. --- .github/ISSUE_TEMPLATE.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index 3ec7ec9efa6..00000000000 --- a/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,5 +0,0 @@ -### Expected behavior - -### Actual behavior - -### Steps to reproduce the behavior From 601ff1eee4bbc3f2dee7ac2dd23b112f657ba8f9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 06:37:08 +0300 Subject: [PATCH 111/209] Simplification of AST #1947 --- dbms/src/Analyzers/AnalyzeColumns.cpp | 9 ++- dbms/src/Core/iostream_debug_helpers.cpp | 4 +- dbms/src/Interpreters/Aggregator.cpp | 2 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 27 +++++---- .../InJoinSubqueriesPreprocessor.cpp | 10 ++-- .../Interpreters/InterpreterAlterQuery.cpp | 8 +-- .../Interpreters/InterpreterCreateQuery.cpp | 10 ++-- .../Interpreters/InterpreterSelectQuery.cpp | 2 +- .../evaluateConstantExpression.cpp | 5 +- dbms/src/Parsers/ASTAlterQuery.cpp | 4 -- dbms/src/Parsers/ASTAlterQuery.h | 2 - dbms/src/Parsers/ASTAsterisk.h | 2 - dbms/src/Parsers/ASTCheckQuery.h | 2 - dbms/src/Parsers/ASTColumnDeclaration.h | 3 - dbms/src/Parsers/ASTCreateQuery.h | 5 -- dbms/src/Parsers/ASTDropQuery.h | 3 - dbms/src/Parsers/ASTEnumElement.h | 6 +- dbms/src/Parsers/ASTExpressionList.h | 3 - dbms/src/Parsers/ASTFunction.h | 23 +------- dbms/src/Parsers/ASTIdentifier.h | 5 +- dbms/src/Parsers/ASTInsertQuery.h | 3 - dbms/src/Parsers/ASTKillQueryQuery.h | 4 -- dbms/src/Parsers/ASTLiteral.h | 3 +- dbms/src/Parsers/ASTNameTypePair.h | 3 - dbms/src/Parsers/ASTOptimizeQuery.h | 3 - dbms/src/Parsers/ASTOrderByElement.h | 9 +-- dbms/src/Parsers/ASTPartition.h | 2 - dbms/src/Parsers/ASTQualifiedAsterisk.h | 2 - dbms/src/Parsers/ASTQueryWithOutput.h | 5 -- dbms/src/Parsers/ASTQueryWithTableAndOutput.h | 7 --- dbms/src/Parsers/ASTRenameQuery.h | 3 - dbms/src/Parsers/ASTSampleRatio.h | 4 +- dbms/src/Parsers/ASTSelectQuery.cpp | 20 +++---- dbms/src/Parsers/ASTSelectQuery.h | 3 - dbms/src/Parsers/ASTSetQuery.h | 3 - dbms/src/Parsers/ASTShowTablesQuery.h | 3 - dbms/src/Parsers/ASTSubquery.h | 4 +- dbms/src/Parsers/ASTSystemQuery.h | 3 - dbms/src/Parsers/ASTUseQuery.h | 3 - dbms/src/Parsers/ExpressionElementParsers.cpp | 58 ++++++------------- dbms/src/Parsers/ExpressionListParsers.cpp | 8 +-- dbms/src/Parsers/IAST.h | 8 +-- dbms/src/Parsers/IParserBase.cpp | 4 +- dbms/src/Parsers/ParserAlterQuery.cpp | 3 - dbms/src/Parsers/ParserCase.cpp | 11 ++-- dbms/src/Parsers/ParserCheckQuery.cpp | 6 +- dbms/src/Parsers/ParserCreateQuery.cpp | 20 ++----- dbms/src/Parsers/ParserCreateQuery.h | 8 +-- dbms/src/Parsers/ParserDescribeTableQuery.cpp | 3 - dbms/src/Parsers/ParserDropQuery.cpp | 4 +- dbms/src/Parsers/ParserInsertQuery.cpp | 4 +- dbms/src/Parsers/ParserKillQueryQuery.cpp | 3 - dbms/src/Parsers/ParserOptimizeQuery.cpp | 4 +- dbms/src/Parsers/ParserPartition.cpp | 1 - dbms/src/Parsers/ParserRenameQuery.cpp | 4 +- dbms/src/Parsers/ParserSampleRatio.cpp | 4 +- dbms/src/Parsers/ParserSelectQuery.cpp | 4 -- .../Parsers/ParserSelectWithUnionQuery.cpp | 2 +- dbms/src/Parsers/ParserSetQuery.cpp | 4 +- dbms/src/Parsers/ParserShowProcesslistQuery.h | 3 - dbms/src/Parsers/ParserShowTablesQuery.cpp | 4 -- dbms/src/Parsers/ParserSystemQuery.cpp | 3 - .../Parsers/ParserTablePropertiesQuery.cpp | 4 -- dbms/src/Parsers/ParserUseQuery.cpp | 6 +- dbms/src/Parsers/StringRange.h | 6 +- dbms/src/Parsers/TablePropertiesQueriesASTs.h | 2 - .../parseIdentifierOrStringLiteral.cpp | 4 +- dbms/src/Parsers/queryToString.cpp | 1 - dbms/src/Server/ClusterCopier.cpp | 8 +-- dbms/src/Storages/AlterCommands.cpp | 10 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 13 ++--- dbms/src/Storages/MergeTree/PKCondition.cpp | 3 +- dbms/src/Storages/StorageBuffer.cpp | 2 +- dbms/src/Storages/StorageCatBoostPool.cpp | 5 +- dbms/src/Storages/StorageDistributed.cpp | 9 +-- dbms/src/Storages/StorageMergeTree.cpp | 4 +- dbms/src/Storages/VirtualColumnUtils.cpp | 2 +- .../transformQueryForExternalDatabase.cpp | 2 +- 78 files changed, 132 insertions(+), 344 deletions(-) diff --git a/dbms/src/Analyzers/AnalyzeColumns.cpp b/dbms/src/Analyzers/AnalyzeColumns.cpp index 98205ee5777..6367699ea8e 100644 --- a/dbms/src/Analyzers/AnalyzeColumns.cpp +++ b/dbms/src/Analyzers/AnalyzeColumns.cpp @@ -135,7 +135,7 @@ ASTPtr createASTIdentifierForColumnInTable(const String & column, const CollectT { ASTPtr database_name_identifier_node; if (!table.database_name.empty()) - database_name_identifier_node = std::make_shared(StringRange(), table.database_name, ASTIdentifier::Column); + database_name_identifier_node = std::make_shared(table.database_name, ASTIdentifier::Column); ASTPtr table_name_identifier_node; String table_name_or_alias; @@ -146,9 +146,9 @@ ASTPtr createASTIdentifierForColumnInTable(const String & column, const CollectT table_name_or_alias = table.alias; if (!table_name_or_alias.empty()) - table_name_identifier_node = std::make_shared(StringRange(), table_name_or_alias, ASTIdentifier::Column); + table_name_identifier_node = std::make_shared(table_name_or_alias, ASTIdentifier::Column); - ASTPtr column_identifier_node = std::make_shared(StringRange(), column, ASTIdentifier::Column); + ASTPtr column_identifier_node = std::make_shared(column, ASTIdentifier::Column); String compound_name; if (database_name_identifier_node) @@ -157,8 +157,7 @@ ASTPtr createASTIdentifierForColumnInTable(const String & column, const CollectT compound_name += table_name_or_alias + "."; compound_name += column; - auto elem = std::make_shared( - StringRange(), compound_name, ASTIdentifier::Column); + auto elem = std::make_shared(compound_name, ASTIdentifier::Column); if (database_name_identifier_node) elem->children.emplace_back(std::move(database_name_identifier_node)); diff --git a/dbms/src/Core/iostream_debug_helpers.cpp b/dbms/src/Core/iostream_debug_helpers.cpp index 57a3d215b69..a4b0cba85b8 100644 --- a/dbms/src/Core/iostream_debug_helpers.cpp +++ b/dbms/src/Core/iostream_debug_helpers.cpp @@ -123,9 +123,7 @@ std::ostream & operator<<(std::ostream & stream, const SubqueryForSet & what) std::ostream & operator<<(std::ostream & stream, const IAST & what) { - stream << "IAST(" - << "query_string = " << what.query_string - <<"){"; + stream << "IAST{"; what.dumpTree(stream); stream << "}"; return stream; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index ecd19370ef9..eebc95cada9 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -112,7 +112,7 @@ Block Aggregator::getHeader(bool final) const else type = std::make_shared(params.aggregates[i].function, argument_types, params.aggregates[i].parameters); - res.insert({ type->createColumn(), type, params.aggregates[i].column_name }); + res.insert({ type, params.aggregates[i].column_name }); } } else if (params.intermediate_header) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index b7f67e8566c..a1cf16a1a27 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -359,7 +359,7 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String || (!alias.empty() && ident->name == alias)))) { /// Replace to plain asterisk. - ast = std::make_shared(ast->range); + ast = std::make_shared(); } } else @@ -688,8 +688,7 @@ static std::shared_ptr interpretSubquery( /// manually substitute column names in place of asterisk for (const auto & column : columns) - select_expression_list->children.emplace_back(std::make_shared( - StringRange{}, column.name)); + select_expression_list->children.emplace_back(std::make_shared(column.name)); select_query->replaceDatabaseAndTable(database_table.first, database_table.second); } @@ -808,7 +807,7 @@ void ExpressionAnalyzer::addExternalStorage(ASTPtr & subquery_or_table_name_or_t * instead of doing a subquery, you just need to read it. */ - auto database_and_table_name = std::make_shared(StringRange(), external_table_name, ASTIdentifier::Table); + auto database_and_table_name = std::make_shared(external_table_name, ASTIdentifier::Table); if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) { @@ -1032,7 +1031,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( { ASTs all_columns; for (const auto & column_name_type : columns) - all_columns.emplace_back(std::make_shared(asterisk->range, column_name_type.name)); + all_columns.emplace_back(std::make_shared(column_name_type.name)); asts.erase(asts.begin() + i); asts.insert(asts.begin() + i, all_columns.begin(), all_columns.end()); @@ -1147,17 +1146,17 @@ void ExpressionAnalyzer::executeScalarSubqueries() static ASTPtr addTypeConversion(std::unique_ptr && ast, const String & type_name) { - auto func = std::make_shared(ast->range); + auto func = std::make_shared(); ASTPtr res = func; func->alias = ast->alias; func->prefer_alias_to_column_name = ast->prefer_alias_to_column_name; ast->alias.clear(); func->name = "CAST"; - auto exp_list = std::make_shared(ast->range); + auto exp_list = std::make_shared(); func->arguments = exp_list; func->children.push_back(func->arguments); exp_list->children.emplace_back(ast.release()); - exp_list->children.emplace_back(std::make_shared(StringRange(), type_name)); + exp_list->children.emplace_back(std::make_shared(type_name)); return res; } @@ -1200,7 +1199,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) if (!block) { /// Interpret subquery with empty result as Null literal - auto ast_new = std::make_unique(ast->range, Null()); + auto ast_new = std::make_unique(Null()); ast_new->setAlias(ast->tryGetAlias()); ast = std::move(ast_new); return; @@ -1220,18 +1219,18 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) size_t columns = block.columns(); if (columns == 1) { - auto lit = std::make_unique(ast->range, (*block.safeGetByPosition(0).column)[0]); + auto lit = std::make_unique((*block.safeGetByPosition(0).column)[0]); lit->alias = subquery->alias; lit->prefer_alias_to_column_name = subquery->prefer_alias_to_column_name; ast = addTypeConversion(std::move(lit), block.safeGetByPosition(0).type->getName()); } else { - auto tuple = std::make_shared(ast->range); + auto tuple = std::make_shared(); tuple->alias = subquery->alias; ast = tuple; tuple->name = "tuple"; - auto exp_list = std::make_shared(ast->range); + auto exp_list = std::make_shared(); tuple->arguments = exp_list; tuple->children.push_back(tuple->arguments); @@ -1239,7 +1238,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) for (size_t i = 0; i < columns; ++i) { exp_list->children[i] = addTypeConversion( - std::make_unique(ast->range, (*block.safeGetByPosition(i).column)[0]), + std::make_unique((*block.safeGetByPosition(i).column)[0]), block.safeGetByPosition(i).type->getName()); } } @@ -1375,7 +1374,7 @@ void ExpressionAnalyzer::optimizeGroupBy() } select_query->group_expression_list = std::make_shared(); - select_query->group_expression_list->children.emplace_back(std::make_shared(StringRange(), UInt64(unused_column))); + select_query->group_expression_list->children.emplace_back(std::make_shared(UInt64(unused_column))); } } diff --git a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 363db236759..cc89def49ee 100644 --- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -102,20 +102,18 @@ StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & contex void replaceDatabaseAndTable(ASTPtr & database_and_table, const String & database_name, const String & table_name) { - ASTPtr table = std::make_shared(StringRange(), table_name, ASTIdentifier::Table); + ASTPtr table = std::make_shared(table_name, ASTIdentifier::Table); if (!database_name.empty()) { - ASTPtr database = std::make_shared(StringRange(), database_name, ASTIdentifier::Database); + ASTPtr database = std::make_shared(database_name, ASTIdentifier::Database); - database_and_table = std::make_shared( - StringRange(), database_name + "." + table_name, ASTIdentifier::Table); + database_and_table = std::make_shared(database_name + "." + table_name, ASTIdentifier::Table); database_and_table->children = {database, table}; } else { - database_and_table = std::make_shared( - StringRange(), table_name, ASTIdentifier::Table); + database_and_table = std::make_shared(table_name, ASTIdentifier::Table); } } diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 76fef51f98f..f990fd8570f 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -106,9 +106,7 @@ void InterpreterAlterQuery::parseAlter( command.column_name = ast_col_decl.name; if (ast_col_decl.type) { - StringRange type_range = ast_col_decl.type->range; - String type_string(type_range.first, type_range.second - type_range.first); - command.data_type = data_type_factory.get(type_string); + command.data_type = data_type_factory.get(ast_col_decl.type); } if (ast_col_decl.default_expression) { @@ -154,9 +152,7 @@ void InterpreterAlterQuery::parseAlter( command.column_name = ast_col_decl.name; if (ast_col_decl.type) { - StringRange type_range = ast_col_decl.type->range; - String type_string(type_range.first, type_range.second - type_range.first); - command.data_type = data_type_factory.get(type_string); + command.data_type = data_type_factory.get(ast_col_decl.type); } if (ast_col_decl.default_expression) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 17262f3cc56..0444b26671b 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -199,8 +199,8 @@ static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast const auto data_type_ptr = columns.back().type.get(); default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(StringRange(), tmp_column_name), - std::make_shared(StringRange(), Field(data_type_ptr->getName()))), final_column_name)); + makeASTFunction("CAST", std::make_shared(tmp_column_name), + std::make_shared(Field(data_type_ptr->getName()))), final_column_name)); default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), tmp_column_name)); } else @@ -233,7 +233,7 @@ static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast if (!explicit_type->equals(*deduced_type)) { col_decl_ptr->default_expression = makeASTFunction("CAST", col_decl_ptr->default_expression, - std::make_shared(StringRange(), explicit_type->getName())); + std::make_shared(explicit_type->getName())); col_decl_ptr->children.clear(); col_decl_ptr->children.push_back(col_decl_ptr->type); @@ -293,7 +293,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns) ParserIdentifierWithOptionalParameters storage_p; column_declaration->type = parseQuery(storage_p, pos, end, "data type"); - column_declaration->type->query_string = type_name; + column_declaration->type->owned_string = type_name; columns_list->children.emplace_back(column_declaration); } @@ -321,7 +321,7 @@ ASTPtr InterpreterCreateQuery::formatColumns( ParserIdentifierWithOptionalParameters storage_p; column_declaration->type = parseQuery(storage_p, pos, end, "data type"); - column_declaration->type->query_string = type_name; + column_declaration->type->owned_string = type_name; const auto it = column_defaults.find(column.name); if (it != std::end(column_defaults)) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 1f0f7178080..dace01ab37d 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -553,7 +553,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline if (default_it != std::end(storage->column_defaults) && default_it->second.type == ColumnDefaultType::Alias) required_columns_expr_list->children.emplace_back(setAlias(default_it->second.expression->clone(), column)); else - required_columns_expr_list->children.emplace_back(std::make_shared(StringRange(), column)); + required_columns_expr_list->children.emplace_back(std::make_shared(column)); } alias_actions = ExpressionAnalyzer{required_columns_expr_list, context, storage, source_header.getNamesAndTypesList()}.getActions(true); diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp index 964aae94619..497df93f9bc 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.cpp +++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp @@ -55,15 +55,14 @@ ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const Context & if (typeid_cast(node.get())) return node; - return std::make_shared(node->range, - evaluateConstantExpression(node, context).first); + return std::make_shared(evaluateConstantExpression(node, context).first); } ASTPtr evaluateConstantExpressionOrIdentifierAsLiteral(const ASTPtr & node, const Context & context) { if (auto id = typeid_cast(node.get())) - return std::make_shared(node->range, Field(id->name)); + return std::make_shared(Field(id->name)); return evaluateConstantExpressionAsLiteral(node, context); } diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index d9ed5d83fc2..6b439e83dda 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -33,10 +33,6 @@ void ASTAlterQuery::addParameters(const Parameters & params) children.push_back(params.primary_key); } -ASTAlterQuery::ASTAlterQuery(StringRange range_) : ASTQueryWithOutput(range_) -{ -} - /** Get the text that identifies this element. */ String ASTAlterQuery::getID() const { diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 5b5e543d64d..dc1c4dde849 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -87,8 +87,6 @@ public: void addParameters(const Parameters & params); - explicit ASTAlterQuery(StringRange range_ = StringRange()); - /** Get the text that identifies this element. */ String getID() const override; diff --git a/dbms/src/Parsers/ASTAsterisk.h b/dbms/src/Parsers/ASTAsterisk.h index a52885f63cf..3861c992c75 100644 --- a/dbms/src/Parsers/ASTAsterisk.h +++ b/dbms/src/Parsers/ASTAsterisk.h @@ -9,8 +9,6 @@ namespace DB class ASTAsterisk : public IAST { public: - ASTAsterisk() = default; - ASTAsterisk(StringRange range_) : IAST(range_) {} String getID() const override { return "Asterisk"; } ASTPtr clone() const override { return std::make_shared(*this); } String getColumnName() const override { return "*"; } diff --git a/dbms/src/Parsers/ASTCheckQuery.h b/dbms/src/Parsers/ASTCheckQuery.h index 01e9c34b616..74f6249a732 100644 --- a/dbms/src/Parsers/ASTCheckQuery.h +++ b/dbms/src/Parsers/ASTCheckQuery.h @@ -7,8 +7,6 @@ namespace DB struct ASTCheckQuery : public ASTQueryWithOutput { - ASTCheckQuery(StringRange range_ = StringRange()) : ASTQueryWithOutput(range_) {}; - /** Get the text that identifies this element. */ String getID() const override { return ("CheckQuery_" + database + "_" + table); }; diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 0cf49be00ec..57a1f7695d7 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -16,9 +16,6 @@ public: String default_specifier; ASTPtr default_expression; - ASTColumnDeclaration() = default; - ASTColumnDeclaration(const StringRange range) : IAST{range} {} - String getID() const override { return "ColumnDeclaration_" + name; } ASTPtr clone() const override diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index fb96a927d4e..216639cab2d 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -20,8 +20,6 @@ public: IAST * sample_by = nullptr; ASTSetQuery * settings = nullptr; - ASTStorage() = default; - ASTStorage(StringRange range_) : IAST(range_) {} String getID() const override { return "Storage definition"; } ASTPtr clone() const override @@ -95,9 +93,6 @@ public: String as_table; ASTSelectWithUnionQuery * select = nullptr; - ASTCreateQuery() = default; - ASTCreateQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return (attach ? "AttachQuery_" : "CreateQuery_") + database + "_" + table; }; diff --git a/dbms/src/Parsers/ASTDropQuery.h b/dbms/src/Parsers/ASTDropQuery.h index 3735a58ab66..41e9b255bf3 100644 --- a/dbms/src/Parsers/ASTDropQuery.h +++ b/dbms/src/Parsers/ASTDropQuery.h @@ -19,9 +19,6 @@ public: String database; String table; - ASTDropQuery() = default; - explicit ASTDropQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return (detach ? "DetachQuery_" : "DropQuery_") + database + "_" + table; }; diff --git a/dbms/src/Parsers/ASTEnumElement.h b/dbms/src/Parsers/ASTEnumElement.h index 1898c5d9172..10b4e1e7482 100644 --- a/dbms/src/Parsers/ASTEnumElement.h +++ b/dbms/src/Parsers/ASTEnumElement.h @@ -14,14 +14,14 @@ public: String name; Field value; - ASTEnumElement(const StringRange range, const String & name, const Field & value) - : IAST{range}, name{name}, value {value} {} + ASTEnumElement(const String & name, const Field & value) + : name{name}, value {value} {} String getID() const override { return "EnumElement"; } ASTPtr clone() const override { - return std::make_shared(StringRange(), name, value); + return std::make_shared(name, value); } protected: diff --git a/dbms/src/Parsers/ASTExpressionList.h b/dbms/src/Parsers/ASTExpressionList.h index 4d49df84ad6..cfe9cb3b714 100644 --- a/dbms/src/Parsers/ASTExpressionList.h +++ b/dbms/src/Parsers/ASTExpressionList.h @@ -11,9 +11,6 @@ namespace DB class ASTExpressionList : public IAST { public: - ASTExpressionList() = default; - ASTExpressionList(const StringRange range_) : IAST(range_) {} - String getID() const override { return "ExpressionList"; } ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTFunction.h b/dbms/src/Parsers/ASTFunction.h index 462dc439329..9e78de369a1 100644 --- a/dbms/src/Parsers/ASTFunction.h +++ b/dbms/src/Parsers/ASTFunction.h @@ -18,9 +18,6 @@ public: ASTPtr parameters; public: - ASTFunction() = default; - ASTFunction(const StringRange range_) : ASTWithAlias(range_) {} - /** Get text identifying the AST node. */ String getID() const override; @@ -36,7 +33,6 @@ template ASTPtr makeASTFunction(const String & name, Args &&... args) { const auto function = std::make_shared(); - ASTPtr result{function}; function->name = name; function->arguments = std::make_shared(); @@ -44,24 +40,7 @@ ASTPtr makeASTFunction(const String & name, Args &&... args) function->arguments->children = { std::forward(args)... }; - return result; -} - - -template -ASTPtr makeASTFunction(const String & name, const StringRange & function_range, - const StringRange & arguments_range, Args &&... args) -{ - const auto function = std::make_shared(function_range); - ASTPtr result{function}; - - function->name = name; - function->arguments = std::make_shared(arguments_range); - function->children.push_back(function->arguments); - - function->arguments->children = { std::forward(args)... }; - - return result; + return function; } } diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 1c424f8e50e..017e33af500 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -25,9 +25,8 @@ public: /// what this identifier identifies Kind kind; - ASTIdentifier() = default; - ASTIdentifier(const StringRange range_, const String & name_, const Kind kind_ = Column) - : ASTWithAlias(range_), name(name_), kind(kind_) {} + ASTIdentifier(const String & name_, const Kind kind_ = Column) + : name(name_), kind(kind_) {} /** Get the text that identifies this element. */ String getID() const override { return "Identifier_" + name; } diff --git a/dbms/src/Parsers/ASTInsertQuery.h b/dbms/src/Parsers/ASTInsertQuery.h index bbd730ae50c..12e3de2ef42 100644 --- a/dbms/src/Parsers/ASTInsertQuery.h +++ b/dbms/src/Parsers/ASTInsertQuery.h @@ -26,9 +26,6 @@ public: const char * data = nullptr; const char * end = nullptr; - ASTInsertQuery() = default; - explicit ASTInsertQuery(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "InsertQuery_" + database + "_" + table; }; diff --git a/dbms/src/Parsers/ASTKillQueryQuery.h b/dbms/src/Parsers/ASTKillQueryQuery.h index e28c97e4baf..23ef73fec3d 100644 --- a/dbms/src/Parsers/ASTKillQueryQuery.h +++ b/dbms/src/Parsers/ASTKillQueryQuery.h @@ -11,10 +11,6 @@ public: bool sync = false; // SYNC or ASYNC mode bool test = false; // does it TEST mode? (doesn't cancel queries just checks and shows them) - ASTKillQueryQuery() = default; - - ASTKillQueryQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - ASTPtr clone() const override { return std::make_shared(*this); } String getID() const override; diff --git a/dbms/src/Parsers/ASTLiteral.h b/dbms/src/Parsers/ASTLiteral.h index e31ae7f49da..36707a7e950 100644 --- a/dbms/src/Parsers/ASTLiteral.h +++ b/dbms/src/Parsers/ASTLiteral.h @@ -15,8 +15,7 @@ class ASTLiteral : public ASTWithAlias public: Field value; - ASTLiteral() = default; - ASTLiteral(const StringRange range_, const Field & value_) : ASTWithAlias(range_), value(value_) {} + ASTLiteral(const Field & value_) : value(value_) {} /** Get the text that identifies this element. */ String getID() const override { return "Literal_" + applyVisitor(FieldVisitorDump(), value); } diff --git a/dbms/src/Parsers/ASTNameTypePair.h b/dbms/src/Parsers/ASTNameTypePair.h index ae0574f1dac..9dad01df2f5 100644 --- a/dbms/src/Parsers/ASTNameTypePair.h +++ b/dbms/src/Parsers/ASTNameTypePair.h @@ -16,9 +16,6 @@ public: /// type ASTPtr type; - ASTNameTypePair() = default; - ASTNameTypePair(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "NameTypePair_" + name; } diff --git a/dbms/src/Parsers/ASTOptimizeQuery.h b/dbms/src/Parsers/ASTOptimizeQuery.h index 3caae258b41..1a4bd260ed3 100644 --- a/dbms/src/Parsers/ASTOptimizeQuery.h +++ b/dbms/src/Parsers/ASTOptimizeQuery.h @@ -22,9 +22,6 @@ public: /// Do deduplicate (default: false) bool deduplicate; - ASTOptimizeQuery() = default; - ASTOptimizeQuery(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "OptimizeQuery_" + database + "_" + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); }; diff --git a/dbms/src/Parsers/ASTOrderByElement.h b/dbms/src/Parsers/ASTOrderByElement.h index bd3d2c5eb06..19d0d723a7c 100644 --- a/dbms/src/Parsers/ASTOrderByElement.h +++ b/dbms/src/Parsers/ASTOrderByElement.h @@ -19,11 +19,12 @@ public: /** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */ ASTPtr collation; - ASTOrderByElement() = default; - ASTOrderByElement(const StringRange range_, - const int direction_, const int nulls_direction_, const bool nulls_direction_was_explicitly_specified_, + ASTOrderByElement( + const int direction_, + const int nulls_direction_, + const bool nulls_direction_was_explicitly_specified_, ASTPtr & collation_) - : IAST(range_), + : direction(direction_), nulls_direction(nulls_direction_), nulls_direction_was_explicitly_specified(nulls_direction_was_explicitly_specified_), collation(collation_) {} diff --git a/dbms/src/Parsers/ASTPartition.h b/dbms/src/Parsers/ASTPartition.h index 9f78d56fca1..b1ed866284a 100644 --- a/dbms/src/Parsers/ASTPartition.h +++ b/dbms/src/Parsers/ASTPartition.h @@ -17,8 +17,6 @@ public: String id; - ASTPartition() = default; - ASTPartition(StringRange range_) : IAST(range_) {} String getID() const override; ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTQualifiedAsterisk.h b/dbms/src/Parsers/ASTQualifiedAsterisk.h index 52d9e8b39c6..5baf24686fc 100644 --- a/dbms/src/Parsers/ASTQualifiedAsterisk.h +++ b/dbms/src/Parsers/ASTQualifiedAsterisk.h @@ -12,8 +12,6 @@ namespace DB class ASTQualifiedAsterisk : public IAST { public: - ASTQualifiedAsterisk() = default; - ASTQualifiedAsterisk(StringRange range_) : IAST(range_) {} String getID() const override { return "QualifiedAsterisk"; } ASTPtr clone() const override { return std::make_shared(*this); } String getColumnName() const override; diff --git a/dbms/src/Parsers/ASTQueryWithOutput.h b/dbms/src/Parsers/ASTQueryWithOutput.h index 5f2c43a50ba..07503c3e197 100644 --- a/dbms/src/Parsers/ASTQueryWithOutput.h +++ b/dbms/src/Parsers/ASTQueryWithOutput.h @@ -14,9 +14,6 @@ public: ASTPtr out_file; ASTPtr format; - ASTQueryWithOutput() = default; - explicit ASTQueryWithOutput(const StringRange range_) : IAST(range_) {} - void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const final; /// Remove 'FORMAT and INTO OUTFILE ' if exists @@ -37,8 +34,6 @@ template class ASTQueryWithOutputImpl : public ASTQueryWithOutput { public: - explicit ASTQueryWithOutputImpl() = default; - explicit ASTQueryWithOutputImpl(StringRange range_) : ASTQueryWithOutput(range_) {} String getID() const override { return ASTIDAndQueryNames::ID; }; ASTPtr clone() const override diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h index abcb7fa26e5..952ea23fd61 100644 --- a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h @@ -16,9 +16,6 @@ public: String database; String table; - ASTQueryWithTableAndOutput() = default; - explicit ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} - protected: void formatHelper(const FormatSettings & settings, const char * name) const { @@ -32,10 +29,6 @@ template class ASTQueryWithTableAndOutputImpl : public ASTQueryWithTableAndOutput { public: - ASTQueryWithTableAndOutputImpl() = default; - - explicit ASTQueryWithTableAndOutputImpl(const StringRange range_) : ASTQueryWithTableAndOutput(range_) {} - String getID() const override { return AstIDAndQueryNames::ID + ("_" + database) + "_" + table; }; ASTPtr clone() const override diff --git a/dbms/src/Parsers/ASTRenameQuery.h b/dbms/src/Parsers/ASTRenameQuery.h index 1a2eda8b029..3da772a5552 100644 --- a/dbms/src/Parsers/ASTRenameQuery.h +++ b/dbms/src/Parsers/ASTRenameQuery.h @@ -28,9 +28,6 @@ public: using Elements = std::vector; Elements elements; - ASTRenameQuery() = default; - explicit ASTRenameQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "Rename"; }; diff --git a/dbms/src/Parsers/ASTSampleRatio.h b/dbms/src/Parsers/ASTSampleRatio.h index 84915cab6cb..ca91d0b6cbb 100644 --- a/dbms/src/Parsers/ASTSampleRatio.h +++ b/dbms/src/Parsers/ASTSampleRatio.h @@ -26,9 +26,7 @@ public: Rational ratio; - ASTSampleRatio() = default; - ASTSampleRatio(const StringRange range_) : IAST(range_) {} - ASTSampleRatio(const StringRange range_, Rational & ratio_) : IAST(range_), ratio(ratio_) {} + ASTSampleRatio(Rational & ratio_) : ratio(ratio_) {} String getID() const override { return "SampleRatio_" + toString(ratio); } diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 9629481bc9a..6924ec37fba 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -19,10 +19,6 @@ namespace ErrorCodes } -ASTSelectQuery::ASTSelectQuery(const StringRange range_) : IAST(range_) -{ -} - bool ASTSelectQuery::hasArrayJoin(const ASTPtr & ast) { if (const ASTFunction * function = typeid_cast(&*ast)) @@ -101,7 +97,7 @@ void ASTSelectQuery::rewriteSelectExpressionList(const Names & required_column_n if (!other_required_columns_in_select.count(name) && !columns_with_array_join.count(name)) { if (asterisk.first) - new_children.push_back({ std::make_shared(asterisk.first->range, name), asterisk.second }); + new_children.push_back({ std::make_shared(name), asterisk.second }); else throw Exception("SELECT query doesn't have required column: " + backQuoteIfNeed(name), ErrorCodes::THERE_IS_NO_COLUMN); } @@ -455,11 +451,11 @@ void ASTSelectQuery::setDatabaseIfNeeded(const String & database_name) if (table_expression->database_and_table_name->children.empty()) { - ASTPtr database = std::make_shared(StringRange(), database_name, ASTIdentifier::Database); + ASTPtr database = std::make_shared(database_name, ASTIdentifier::Database); ASTPtr table = table_expression->database_and_table_name; const String & old_name = static_cast(*table_expression->database_and_table_name).name; - table_expression->database_and_table_name = std::make_shared(StringRange(), database_name + "." + old_name, ASTIdentifier::Table); + table_expression->database_and_table_name = std::make_shared(database_name + "." + old_name, ASTIdentifier::Table); table_expression->database_and_table_name->children = {database, table}; } else if (table_expression->database_and_table_name->children.size() != 2) @@ -486,20 +482,18 @@ void ASTSelectQuery::replaceDatabaseAndTable(const String & database_name, const table_expression = table_expr.get(); } - ASTPtr table = std::make_shared(StringRange(), table_name, ASTIdentifier::Table); + ASTPtr table = std::make_shared(table_name, ASTIdentifier::Table); if (!database_name.empty()) { - ASTPtr database = std::make_shared(StringRange(), database_name, ASTIdentifier::Database); + ASTPtr database = std::make_shared(database_name, ASTIdentifier::Database); - table_expression->database_and_table_name = std::make_shared( - StringRange(), database_name + "." + table_name, ASTIdentifier::Table); + table_expression->database_and_table_name = std::make_shared(database_name + "." + table_name, ASTIdentifier::Table); table_expression->database_and_table_name->children = {database, table}; } else { - table_expression->database_and_table_name = std::make_shared( - StringRange(), table_name, ASTIdentifier::Table); + table_expression->database_and_table_name = std::make_shared(table_name, ASTIdentifier::Table); } } diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index abde8212ebb..ebafabd681d 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -15,9 +15,6 @@ struct ASTTablesInSelectQueryElement; class ASTSelectQuery : public IAST { public: - ASTSelectQuery() = default; - ASTSelectQuery(const StringRange range_); - /** Get the text that identifies this element. */ String getID() const override { return "SelectQuery"; }; diff --git a/dbms/src/Parsers/ASTSetQuery.h b/dbms/src/Parsers/ASTSetQuery.h index 08b617db646..3a41ed0d80c 100644 --- a/dbms/src/Parsers/ASTSetQuery.h +++ b/dbms/src/Parsers/ASTSetQuery.h @@ -25,9 +25,6 @@ public: using Changes = std::vector; Changes changes; - ASTSetQuery() = default; - explicit ASTSetQuery(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "Set"; }; diff --git a/dbms/src/Parsers/ASTShowTablesQuery.h b/dbms/src/Parsers/ASTShowTablesQuery.h index 176f9d69697..09ee0475847 100644 --- a/dbms/src/Parsers/ASTShowTablesQuery.h +++ b/dbms/src/Parsers/ASTShowTablesQuery.h @@ -20,9 +20,6 @@ public: String like; bool not_like{false}; - ASTShowTablesQuery() = default; - ASTShowTablesQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "ShowTables"; }; diff --git a/dbms/src/Parsers/ASTSubquery.h b/dbms/src/Parsers/ASTSubquery.h index 2ec2b4469fa..1b9ba97ac88 100644 --- a/dbms/src/Parsers/ASTSubquery.h +++ b/dbms/src/Parsers/ASTSubquery.h @@ -12,9 +12,6 @@ namespace DB class ASTSubquery : public ASTWithAlias { public: - ASTSubquery() = default; - ASTSubquery(const StringRange range_) : ASTWithAlias(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "Subquery"; } @@ -44,6 +41,7 @@ protected: children[0]->formatImpl(settings, state, frame_nested); settings.ostr << nl_or_nothing << indent_str << ")"; } + String getColumnNameImpl() const override; }; diff --git a/dbms/src/Parsers/ASTSystemQuery.h b/dbms/src/Parsers/ASTSystemQuery.h index 8f9a4bf2208..1821b73bc12 100644 --- a/dbms/src/Parsers/ASTSystemQuery.h +++ b/dbms/src/Parsers/ASTSystemQuery.h @@ -39,9 +39,6 @@ public: //String target_replica_database; //String target_replica_table; - ASTSystemQuery() = default; - explicit ASTSystemQuery(const StringRange range) : IAST(range) {} - String getID() const override { return "SYSTEM query"; }; ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTUseQuery.h b/dbms/src/Parsers/ASTUseQuery.h index dd23b24f41a..71108b200d1 100644 --- a/dbms/src/Parsers/ASTUseQuery.h +++ b/dbms/src/Parsers/ASTUseQuery.h @@ -14,9 +14,6 @@ class ASTUseQuery : public IAST public: String database; - ASTUseQuery() = default; - ASTUseQuery(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "UseQuery_" + database; }; diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index b1054917935..8300d1ed65f 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -35,7 +35,6 @@ namespace ErrorCodes bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ASTPtr contents_node; ParserExpressionList contents(false); @@ -50,7 +49,7 @@ bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; - auto function_node = std::make_shared(StringRange(begin, pos)); + auto function_node = std::make_shared(); function_node->name = "array"; function_node->arguments = contents_node; function_node->children.push_back(contents_node); @@ -62,7 +61,6 @@ bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ASTPtr contents_node; ParserExpressionList contents(false); @@ -92,7 +90,7 @@ bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & } else { - auto function_node = std::make_shared(StringRange(begin, pos)); + auto function_node = std::make_shared(); function_node->name = "tuple"; function_node->arguments = contents_node; function_node->children.push_back(contents_node); @@ -105,7 +103,6 @@ bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ASTPtr select_node; ParserSelectWithUnionQuery select; @@ -120,7 +117,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; - node = std::make_shared(StringRange(begin, pos)); + node = std::make_shared(); typeid_cast(*node).children.push_back(select_node); return true; } @@ -128,8 +125,6 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &) { - Pos begin = pos; - /// Identifier in backquotes or in double quotes if (pos->type == TokenType::QuotedIdentifier) { @@ -144,14 +139,14 @@ bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &) if (s.empty()) /// Identifiers "empty string" are not allowed. return false; + node = std::make_shared(s); ++pos; - node = std::make_shared(StringRange(begin), s); return true; } else if (pos->type == TokenType::BareWord) { + node = std::make_shared(String(pos->begin, pos->end)); ++pos; - node = std::make_shared(StringRange(begin), String(begin->begin, begin->end)); return true; } @@ -161,8 +156,6 @@ bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &) bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ASTPtr id_list; if (!ParserList(std::make_unique(), std::make_unique(TokenType::Dot), false) .parse(pos, id_list, expected)) @@ -177,7 +170,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex name += static_cast(*child.get()).name; } - node = std::make_shared(StringRange(begin, pos), name); + node = std::make_shared(name); /// In `children`, remember the identifiers-components, if there are more than one. if (list.children.size() > 1) @@ -189,8 +182,6 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserIdentifier id_parser; ParserKeyword distinct("DISTINCT"); ParserExpressionList contents(false); @@ -266,7 +257,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++pos; } - auto function_node = std::make_shared(StringRange(begin, pos)); + auto function_node = std::make_shared(); function_node->name = typeid_cast(*identifier).name; /// func(DISTINCT ...) is equivalent to funcDistinct(...) @@ -334,10 +325,10 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; } - expr_list_args = std::make_shared(StringRange{contents_begin, pos}); + expr_list_args = std::make_shared(); first_argument->setAlias({}); expr_list_args->children.push_back(first_argument); - expr_list_args->children.emplace_back(std::make_shared(StringRange(), type)); + expr_list_args->children.emplace_back(std::make_shared(type)); } else { @@ -362,7 +353,7 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; } - expr_list_args = std::make_shared(StringRange{contents_begin, pos}); + expr_list_args = std::make_shared(); expr_list_args->children.push_back(first_argument); expr_list_args->children.push_back(type_as_literal); } @@ -371,7 +362,7 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; ++pos; - const auto function_node = std::make_shared(StringRange(begin, pos)); + const auto function_node = std::make_shared(); ASTPtr node_holder{function_node}; function_node->name = name; @@ -385,11 +376,10 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect bool ParserNull::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ParserKeyword nested_parser("NULL"); if (nested_parser.parse(pos, node, expected)) { - node = std::make_shared(StringRange(StringRange(begin, pos)), Null()); + node = std::make_shared(Null()); return true; } else @@ -411,7 +401,6 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Field res; - Pos begin = pos; if (!pos.isValid()) return false; @@ -463,7 +452,7 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } ++pos; - node = std::make_shared(StringRange(begin, pos), res); + node = std::make_shared(res); return true; } @@ -472,7 +461,6 @@ bool ParserUnsignedInteger::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { Field res; - Pos begin = pos; if (!pos.isValid()) return false; @@ -486,7 +474,7 @@ bool ParserUnsignedInteger::parseImpl(Pos & pos, ASTPtr & node, Expected & expec res = x; ++pos; - node = std::make_shared(StringRange(begin, pos), res); + node = std::make_shared(res); return true; } @@ -496,8 +484,6 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (pos->type != TokenType::StringLiteral) return false; - Pos begin = pos; - String s; ReadBufferFromMemory in(pos->begin, pos->size()); @@ -518,7 +504,7 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte } ++pos; - node = std::make_shared(StringRange(begin, pos), s); + node = std::make_shared(s); return true; } @@ -528,7 +514,6 @@ bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (pos->type != TokenType::OpeningSquareBracket) return false; - Pos begin = pos; Array arr; ParserLiteral literal_p; @@ -542,7 +527,7 @@ bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (pos->type == TokenType::ClosingSquareBracket) { ++pos; - node = std::make_shared(StringRange(begin, pos), arr); + node = std::make_shared(arr); return true; } else if (pos->type == TokenType::Comma) @@ -654,11 +639,10 @@ template class ParserAliasImpl; bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected &) { - Pos begin = pos; if (pos->type == TokenType::Asterisk) { ++pos; - node = std::make_shared(StringRange(begin, pos)); + node = std::make_shared(); return true; } return false; @@ -667,8 +651,6 @@ bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected &) bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - if (!ParserCompoundIdentifier().parse(pos, node, expected)) return false; @@ -680,7 +662,7 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return false; ++pos; - auto res = std::make_shared(StringRange(begin, pos)); + auto res = std::make_shared(); res->children.push_back(node); node = std::move(res); return true; @@ -787,8 +769,6 @@ template class ParserWithOptionalAliasImpl; bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserExpressionWithOptionalAlias elem_p(false); ParserKeyword ascending("ASCENDING"); ParserKeyword descending("DESCENDING"); @@ -833,7 +813,7 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; } - node = std::make_shared(StringRange(begin, pos), direction, nulls_direction, nulls_direction_was_explicitly_specified, locale_node); + node = std::make_shared(direction, nulls_direction, nulls_direction_was_explicitly_specified, locale_node); node->children.push_back(expr_elem); if (locale_node) node->children.push_back(locale_node); diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp index 50f5ce81e51..5ac3d9f046d 100644 --- a/dbms/src/Parsers/ExpressionListParsers.cpp +++ b/dbms/src/Parsers/ExpressionListParsers.cpp @@ -206,7 +206,6 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node bool ParserVariableArityOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ASTPtr arguments; if (!elem_parser->parse(pos, node, expected)) @@ -230,9 +229,6 @@ bool ParserVariableArityOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expect arguments->children.push_back(elem); } - if (arguments) - arguments->range = node->range = StringRange(begin, pos); - return true; } @@ -575,8 +571,6 @@ bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!ParserComparisonExpression{}.parse(pos, node_comp, expected)) return false; - Pos begin = pos; - ParserKeyword s_is{"IS"}; ParserKeyword s_not{"NOT"}; ParserKeyword s_null{"NULL"}; @@ -593,7 +587,7 @@ bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expec auto args = std::make_shared(); args->children.push_back(node_comp); - auto function = std::make_shared(StringRange{begin, pos}); + auto function = std::make_shared(); function->name = is_not ? "isNotNull" : "isNull"; function->arguments = args; function->children.push_back(function->arguments); diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 56971824548..9c4f33ebf49 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -40,13 +40,9 @@ public: ASTs children; StringRange range; - /** A string with a full query. - * This pointer does not allow it to be deleted while the range refers to it. - */ - StringPtr query_string; + /// This pointer does not allow it to be deleted while the range refers to it. + StringPtr owned_string; - IAST() = default; - IAST(const StringRange range_) : range(range_) {} virtual ~IAST() = default; /** Get the canonical name of the column if the element is a column */ diff --git a/dbms/src/Parsers/IParserBase.cpp b/dbms/src/Parsers/IParserBase.cpp index b7e44ace03d..c1a17805068 100644 --- a/dbms/src/Parsers/IParserBase.cpp +++ b/dbms/src/Parsers/IParserBase.cpp @@ -17,13 +17,13 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected) bool res = parseImpl(pos, node, expected); - /// TODO expected - if (!res) { node = nullptr; pos = begin; } + else + node->range = StringRange(begin, pos); return res; } diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 2667c2bf247..dbd1805e7b1 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -12,8 +12,6 @@ namespace DB bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_alter_table("ALTER TABLE"); ParserKeyword s_add_column("ADD COLUMN"); ParserKeyword s_drop_column("DROP COLUMN"); @@ -218,7 +216,6 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } while (!parsing_finished); - query->range = StringRange(begin, pos); query->cluster = cluster_str; node = query; diff --git a/dbms/src/Parsers/ParserCase.cpp b/dbms/src/Parsers/ParserCase.cpp index 368ccc657ca..e932dfdb04f 100644 --- a/dbms/src/Parsers/ParserCase.cpp +++ b/dbms/src/Parsers/ParserCase.cpp @@ -10,8 +10,6 @@ namespace DB bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_case{"CASE"}; ParserKeyword s_when{"WHEN"}; ParserKeyword s_then{"THEN"}; @@ -22,7 +20,6 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!s_case.parse(pos, node, expected)) { /// Parse as a simple ASTFunction. - pos = begin; return ParserFunction{}.parse(pos, node, expected); } @@ -80,10 +77,10 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parse_branches()) return false; - auto function_args = std::make_shared(StringRange{begin, pos}); + auto function_args = std::make_shared(); function_args->children = std::move(args); - auto function = std::make_shared(StringRange{begin, pos}); + auto function = std::make_shared(); function->name = "caseWithExpression"; function->arguments = function_args; function->children.push_back(function->arguments); @@ -95,10 +92,10 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parse_branches()) return false; - auto function_args = std::make_shared(StringRange{begin, pos}); + auto function_args = std::make_shared(); function_args->children = std::move(args); - auto function = std::make_shared(StringRange{begin, pos}); + auto function = std::make_shared(); function->name = "multiIf"; function->arguments = function_args; function->children.push_back(function->arguments); diff --git a/dbms/src/Parsers/ParserCheckQuery.cpp b/dbms/src/Parsers/ParserCheckQuery.cpp index b4e243b240b..d9fd46694d6 100644 --- a/dbms/src/Parsers/ParserCheckQuery.cpp +++ b/dbms/src/Parsers/ParserCheckQuery.cpp @@ -12,8 +12,6 @@ namespace DB bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_check_table("CHECK TABLE"); ParserToken s_dot(TokenType::Dot); @@ -32,7 +30,7 @@ bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!table_parser.parse(pos, table, expected)) return false; - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); query->database = typeid_cast(*database).name; query->table = typeid_cast(*table).name; node = query; @@ -40,7 +38,7 @@ bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else { table = database; - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); query->table = typeid_cast(*table).name; node = query; } diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index b772fff6976..967f5725ca3 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -21,8 +21,6 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr name; ASTPtr columns; - Pos begin = pos; - /// For now `name == 'Nested'`, probably alternative nested data structures will appear if (!name_p.parse(pos, name, expected)) return false; @@ -36,7 +34,7 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!close.ignore(pos)) return false; - auto func = std::make_shared(StringRange(begin, pos)); + auto func = std::make_shared(); func->name = typeid_cast(*name).name; func->arguments = columns; func->children.push_back(columns); @@ -65,15 +63,13 @@ bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node, ParserIdentifier non_parametric; ParserIdentifierWithParameters parametric; - Pos begin = pos; - if (parametric.parse(pos, node, expected)) return true; ASTPtr ident; if (non_parametric.parse(pos, ident, expected)) { - auto func = std::make_shared(StringRange(begin)); + auto func = std::make_shared(); func->name = typeid_cast(*ident).name; node = func; return true; @@ -87,7 +83,7 @@ bool ParserTypeInCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & if (ParserIdentifierWithOptionalParameters::parseImpl(pos, node, expected)) { const auto & id_with_params = typeid_cast(*node); - node = std::make_shared(id_with_params.range, String{ id_with_params.range.first, id_with_params.range.second }); + node = std::make_shared(String{ id_with_params.range.first, id_with_params.range.second }); return true; } @@ -120,8 +116,6 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserExpression expression_p; ParserSetQuery settings_p(/* parse_only_internals_ = */ true); - Pos begin = pos; - ASTPtr engine; ASTPtr partition_by; ASTPtr order_by; @@ -171,7 +165,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) break; } - auto storage = std::make_shared(StringRange(begin, pos)); + auto storage = std::make_shared(); storage->set(storage->engine, engine); storage->set(storage->partition_by, partition_by); storage->set(storage->order_by, order_by); @@ -185,8 +179,6 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_create("CREATE"); ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_attach("ATTACH"); @@ -259,7 +251,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) // Shortcut for ATTACH a previously detached table if (attach && (!pos.isValid() || pos.get().type == TokenType::Semicolon)) { - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; query->attach = attach; @@ -402,7 +394,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; query->attach = attach; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 4e5261ff8b6..4225fa9b8ca 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -75,13 +75,11 @@ bool IParserNameTypePair::parseImpl(Pos & pos, ASTPtr & node, Expect NameParser name_parser; ParserIdentifierWithOptionalParameters type_parser; - Pos begin = pos; - ASTPtr name, type; if (name_parser.parse(pos, name, expected) && type_parser.parse(pos, type, expected)) { - auto name_type_pair = std::make_shared(StringRange(begin, pos)); + auto name_type_pair = std::make_shared(); name_type_pair->name = typeid_cast(*name).name; name_type_pair->type = type; name_type_pair->children.push_back(type); @@ -122,8 +120,6 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_alias{"ALIAS"}; ParserTernaryOperatorExpression expr_parser; - const auto begin = pos; - /// mandatory column name ASTPtr name; if (!name_parser.parse(pos, name, expected)) @@ -160,7 +156,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E else if (!type) return false; /// reject sole column name without type - const auto column_declaration = std::make_shared(StringRange{begin, pos}); + const auto column_declaration = std::make_shared(); node = column_declaration; column_declaration->name = typeid_cast(*name).name; if (type) diff --git a/dbms/src/Parsers/ParserDescribeTableQuery.cpp b/dbms/src/Parsers/ParserDescribeTableQuery.cpp index 1341f105c89..ebfc3baa33f 100644 --- a/dbms/src/Parsers/ParserDescribeTableQuery.cpp +++ b/dbms/src/Parsers/ParserDescribeTableQuery.cpp @@ -14,8 +14,6 @@ namespace DB bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_describe("DESCRIBE"); ParserKeyword s_desc("DESC"); ParserKeyword s_table("TABLE"); @@ -36,7 +34,6 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex if (!ParserTableExpression().parse(pos, table_expression, expected)) return false; - query->range = StringRange(begin, pos); query->table_expression = table_expression; node = query; diff --git a/dbms/src/Parsers/ParserDropQuery.cpp b/dbms/src/Parsers/ParserDropQuery.cpp index 0475711a225..fe4249db019 100644 --- a/dbms/src/Parsers/ParserDropQuery.cpp +++ b/dbms/src/Parsers/ParserDropQuery.cpp @@ -13,8 +13,6 @@ namespace DB bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_drop("DROP"); ParserKeyword s_detach("DETACH"); ParserKeyword s_temporary("TEMPORARY"); @@ -81,7 +79,7 @@ bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; query->detach = detach; diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp index 59bd7d05e6b..399aa43ea98 100644 --- a/dbms/src/Parsers/ParserInsertQuery.cpp +++ b/dbms/src/Parsers/ParserInsertQuery.cpp @@ -24,8 +24,6 @@ namespace ErrorCodes bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_insert_into("INSERT INTO"); ParserKeyword s_table("TABLE"); ParserKeyword s_function("FUNCTION"); @@ -130,7 +128,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; if (table_function) diff --git a/dbms/src/Parsers/ParserKillQueryQuery.cpp b/dbms/src/Parsers/ParserKillQueryQuery.cpp index bbc4eb22ead..8179a4897fc 100644 --- a/dbms/src/Parsers/ParserKillQueryQuery.cpp +++ b/dbms/src/Parsers/ParserKillQueryQuery.cpp @@ -11,7 +11,6 @@ namespace DB bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; auto query = std::make_shared(); if (!ParserKeyword{"KILL QUERY"}.ignore(pos, expected)) @@ -31,8 +30,6 @@ bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expect else if (ParserKeyword{"TEST"}.ignore(pos)) query->test = true; - query->range = StringRange(begin, pos); - node = std::move(query); return true; diff --git a/dbms/src/Parsers/ParserOptimizeQuery.cpp b/dbms/src/Parsers/ParserOptimizeQuery.cpp index 90e9146210a..c01a1a7b5df 100644 --- a/dbms/src/Parsers/ParserOptimizeQuery.cpp +++ b/dbms/src/Parsers/ParserOptimizeQuery.cpp @@ -15,8 +15,6 @@ namespace DB bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_optimize_table("OPTIMIZE TABLE"); ParserKeyword s_partition("PARTITION"); ParserKeyword s_final("FINAL"); @@ -56,7 +54,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (s_deduplicate.ignore(pos, expected)) deduplicate = true; - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; if (database) diff --git a/dbms/src/Parsers/ParserPartition.cpp b/dbms/src/Parsers/ParserPartition.cpp index 9ecd03d9cd8..1daf4dead18 100644 --- a/dbms/src/Parsers/ParserPartition.cpp +++ b/dbms/src/Parsers/ParserPartition.cpp @@ -73,7 +73,6 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) partition->fields_count = fields_count; } - partition->range = StringRange(begin, pos); node = partition; return true; } diff --git a/dbms/src/Parsers/ParserRenameQuery.cpp b/dbms/src/Parsers/ParserRenameQuery.cpp index 219311b63d1..6eb8d768df9 100644 --- a/dbms/src/Parsers/ParserRenameQuery.cpp +++ b/dbms/src/Parsers/ParserRenameQuery.cpp @@ -40,8 +40,6 @@ static bool parseDatabaseAndTable( bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_rename_table("RENAME TABLE"); ParserKeyword s_to("TO"); ParserToken s_comma(TokenType::Comma); @@ -71,7 +69,7 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); query->cluster = cluster_str; node = query; diff --git a/dbms/src/Parsers/ParserSampleRatio.cpp b/dbms/src/Parsers/ParserSampleRatio.cpp index 85cba60a370..3091ed91570 100644 --- a/dbms/src/Parsers/ParserSampleRatio.cpp +++ b/dbms/src/Parsers/ParserSampleRatio.cpp @@ -83,8 +83,6 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat */ bool ParserSampleRatio::parseImpl(Pos & pos, ASTPtr & node, Expected &) { - auto begin = pos; - ASTSampleRatio::Rational numerator; ASTSampleRatio::Rational denominator; ASTSampleRatio::Rational res; @@ -111,7 +109,7 @@ bool ParserSampleRatio::parseImpl(Pos & pos, ASTPtr & node, Expected &) res = numerator; } - node = std::make_shared(StringRange(begin, pos), res); + node = std::make_shared(res); return true; } diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp index e56d6e11304..4bf89d49984 100644 --- a/dbms/src/Parsers/ParserSelectQuery.cpp +++ b/dbms/src/Parsers/ParserSelectQuery.cpp @@ -22,8 +22,6 @@ namespace ErrorCodes bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - auto select_query = std::make_shared(); node = select_query; @@ -173,8 +171,6 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - select_query->range = StringRange(begin, pos); - if (select_query->with_expression_list) select_query->children.push_back(select_query->with_expression_list); select_query->children.push_back(select_query->select_expression_list); diff --git a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp index 66d39e23196..503d92cbcb1 100644 --- a/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/dbms/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -15,7 +15,7 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (!parser.parse(pos, list_node, expected)) return false; - auto res = std::make_shared(list_node->range); + auto res = std::make_shared(); res->list_of_selects = std::move(list_node); res->children.push_back(res->list_of_selects); diff --git a/dbms/src/Parsers/ParserSetQuery.cpp b/dbms/src/Parsers/ParserSetQuery.cpp index 109c4e5acc1..11f125bb955 100644 --- a/dbms/src/Parsers/ParserSetQuery.cpp +++ b/dbms/src/Parsers/ParserSetQuery.cpp @@ -40,8 +40,6 @@ static bool parseNameValuePair(ASTSetQuery::Change & change, IParser::Pos & pos, bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserToken s_comma(TokenType::Comma); if (!parse_only_internals) @@ -65,7 +63,7 @@ bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; query->is_standalone = !parse_only_internals; diff --git a/dbms/src/Parsers/ParserShowProcesslistQuery.h b/dbms/src/Parsers/ParserShowProcesslistQuery.h index 5958259fb76..b7a661516b0 100644 --- a/dbms/src/Parsers/ParserShowProcesslistQuery.h +++ b/dbms/src/Parsers/ParserShowProcesslistQuery.h @@ -19,14 +19,11 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - auto query = std::make_shared(); if (!ParserKeyword("SHOW PROCESSLIST").ignore(pos, expected)) return false; - query->range = StringRange(begin, pos); node = query; return true; diff --git a/dbms/src/Parsers/ParserShowTablesQuery.cpp b/dbms/src/Parsers/ParserShowTablesQuery.cpp index 5399eeef50c..e4d6b5288d2 100644 --- a/dbms/src/Parsers/ParserShowTablesQuery.cpp +++ b/dbms/src/Parsers/ParserShowTablesQuery.cpp @@ -15,8 +15,6 @@ namespace DB bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_show("SHOW"); ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_tables("TABLES"); @@ -67,8 +65,6 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } - query->range = StringRange(begin, pos); - if (database) query->from = typeid_cast(*database).name; if (like) diff --git a/dbms/src/Parsers/ParserSystemQuery.cpp b/dbms/src/Parsers/ParserSystemQuery.cpp index 2ef71b9dcb3..b430e9e7fc7 100644 --- a/dbms/src/Parsers/ParserSystemQuery.cpp +++ b/dbms/src/Parsers/ParserSystemQuery.cpp @@ -18,8 +18,6 @@ namespace DB bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) { - auto begin = pos; - if (!ParserKeyword{"SYSTEM"}.ignore(pos)) return false; @@ -51,7 +49,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & throw Exception("SYNC REPLICA is not supported yet", ErrorCodes::NOT_IMPLEMENTED); } - res->range = {begin, pos}; node = std::move(res); return true; } diff --git a/dbms/src/Parsers/ParserTablePropertiesQuery.cpp b/dbms/src/Parsers/ParserTablePropertiesQuery.cpp index 8ed7046b2b2..4c9383d460a 100644 --- a/dbms/src/Parsers/ParserTablePropertiesQuery.cpp +++ b/dbms/src/Parsers/ParserTablePropertiesQuery.cpp @@ -13,8 +13,6 @@ namespace DB bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_exists("EXISTS"); ParserKeyword s_describe("DESCRIBE"); ParserKeyword s_desc("DESC"); @@ -56,8 +54,6 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } - query->range = StringRange(begin, pos); - if (database) query->database = typeid_cast(*database).name; if (table) diff --git a/dbms/src/Parsers/ParserUseQuery.cpp b/dbms/src/Parsers/ParserUseQuery.cpp index 25d41dae213..9e521a0d746 100644 --- a/dbms/src/Parsers/ParserUseQuery.cpp +++ b/dbms/src/Parsers/ParserUseQuery.cpp @@ -9,10 +9,9 @@ namespace DB { + bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_use("USE"); ParserIdentifier name_p; @@ -24,10 +23,11 @@ bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!name_p.parse(pos, database, expected)) return false; - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); query->database = typeid_cast(*database).name; node = query; return true; } + } diff --git a/dbms/src/Parsers/StringRange.h b/dbms/src/Parsers/StringRange.h index 0c901bf5309..b919a899293 100644 --- a/dbms/src/Parsers/StringRange.h +++ b/dbms/src/Parsers/StringRange.h @@ -11,8 +11,8 @@ namespace DB struct StringRange { - const char * first; - const char * second; + const char * first = nullptr; + const char * second = nullptr; StringRange() {} StringRange(const char * begin, const char * end) : first(begin), second(end) {} @@ -41,7 +41,7 @@ using StringPtr = std::shared_ptr; inline String toString(const StringRange & range) { - return String(range.first, range.second); + return range.first ? String(range.first, range.second) : String(); } } diff --git a/dbms/src/Parsers/TablePropertiesQueriesASTs.h b/dbms/src/Parsers/TablePropertiesQueriesASTs.h index 667ef00399b..312b112f494 100644 --- a/dbms/src/Parsers/TablePropertiesQueriesASTs.h +++ b/dbms/src/Parsers/TablePropertiesQueriesASTs.h @@ -32,8 +32,6 @@ class ASTDescribeQuery : public ASTQueryWithOutput public: ASTPtr table_expression; - ASTDescribeQuery() = default; - explicit ASTDescribeQuery(StringRange range_) : ASTQueryWithOutput(range_) {} String getID() const override { return "DescribeQuery"; }; ASTPtr clone() const override diff --git a/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp b/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp index 81281bbc0d6..eaff2e85a9a 100644 --- a/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp +++ b/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp @@ -10,12 +10,10 @@ namespace DB bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, String & result) { - IParser::Pos begin = pos; ASTPtr res; if (!ParserIdentifier().parse(pos, res, expected)) { - pos = begin; if (!ParserStringLiteral().parse(pos, res, expected)) return false; @@ -27,4 +25,4 @@ bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, Str return true; } -} \ No newline at end of file +} diff --git a/dbms/src/Parsers/queryToString.cpp b/dbms/src/Parsers/queryToString.cpp index 35e2edd61d6..d214468c2a9 100644 --- a/dbms/src/Parsers/queryToString.cpp +++ b/dbms/src/Parsers/queryToString.cpp @@ -13,7 +13,6 @@ namespace DB { std::ostringstream out; formatAST(query, out, false, true); - return out.str(); } } diff --git a/dbms/src/Server/ClusterCopier.cpp b/dbms/src/Server/ClusterCopier.cpp index ae1d0643ff1..571b8a58232 100644 --- a/dbms/src/Server/ClusterCopier.cpp +++ b/dbms/src/Server/ClusterCopier.cpp @@ -373,9 +373,9 @@ std::shared_ptr createASTStorageDistributed( const String & cluster_name, const String & database, const String & table, const ASTPtr & sharding_key_ast = nullptr) { auto args = std::make_shared(); - args->children.emplace_back(std::make_shared(StringRange(nullptr, nullptr), cluster_name)); - args->children.emplace_back(std::make_shared(StringRange(nullptr, nullptr), database)); - args->children.emplace_back(std::make_shared(StringRange(nullptr, nullptr), table)); + args->children.emplace_back(std::make_shared(cluster_name)); + args->children.emplace_back(std::make_shared(database)); + args->children.emplace_back(std::make_shared(table)); if (sharding_key_ast) args->children.emplace_back(sharding_key_ast); @@ -487,7 +487,7 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast) return storage.partition_by->clone(); static const char * all = "all"; - return std::make_shared(StringRange(all, all + strlen(all)), Field(all, strlen(all))); + return std::make_shared(Field(all, strlen(all))); } else { diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index b5a726ecc6e..2ad9ae6e27d 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -245,8 +245,8 @@ void AlterCommands::validate(IStorage * table, const Context & context) const auto column_type_raw_ptr = command.data_type.get(); default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(StringRange(), tmp_column_name), - std::make_shared(StringRange(), Field(column_type_raw_ptr->getName()))), + makeASTFunction("CAST", std::make_shared(tmp_column_name), + std::make_shared(Field(column_type_raw_ptr->getName()))), final_column_name)); default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name)); @@ -299,8 +299,8 @@ void AlterCommands::validate(IStorage * table, const Context & context) const auto & column_type_ptr = column_it->type; default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(StringRange(), tmp_column_name), - std::make_shared(StringRange(), Field(column_type_ptr->getName()))), + makeASTFunction("CAST", std::make_shared(tmp_column_name), + std::make_shared(Field(column_type_ptr->getName()))), column_name)); default_expr_list->children.emplace_back(setAlias(col_def.second.expression->clone(), tmp_column_name)); @@ -345,7 +345,7 @@ void AlterCommands::validate(IStorage * table, const Context & context) } command_ptr->default_expression = makeASTFunction("CAST", command_ptr->default_expression->clone(), - std::make_shared(StringRange(), Field(explicit_type->getName()))); + std::make_shared(Field(explicit_type->getName()))); } } else diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8c9655fe68a..b6d19da1d89 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -434,7 +434,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( ASTPtr args = std::make_shared(); args->children.push_back(data.sampling_expression); - args->children.push_back(std::make_shared(StringRange(), lower)); + args->children.push_back(std::make_shared(lower)); lower_function = std::make_shared(); lower_function->name = "greaterOrEquals"; @@ -451,7 +451,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( ASTPtr args = std::make_shared(); args->children.push_back(data.sampling_expression); - args->children.push_back(std::make_shared(StringRange(), upper)); + args->children.push_back(std::make_shared(upper)); upper_function = std::make_shared(); upper_function->name = "less"; @@ -846,8 +846,8 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( { auto function = std::make_shared(); auto arguments = std::make_shared(); - auto sign = std::make_shared(); - auto one = std::make_shared(); + auto sign = std::make_shared(data.merging_params.sign_column); + auto one = std::make_shared(Field(static_cast(1))); function->name = "equals"; function->arguments = arguments; @@ -856,11 +856,6 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( arguments->children.push_back(sign); arguments->children.push_back(one); - sign->name = data.merging_params.sign_column; - sign->kind = ASTIdentifier::Column; - - one->value = Field(static_cast(1)); - out_expression = ExpressionAnalyzer(function, context, {}, data.getColumnsList()).getActions(false); out_column = function->getColumnName(); } diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/PKCondition.cpp index 798266e1d15..1d80ea38a87 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/PKCondition.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -593,7 +594,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, { throw Exception("Primary key expression contains comparison between inconvertible types: " + desired_type->getName() + " and " + src_type->getName() + - " inside " + DB::toString(node->range), + " inside " + queryToString(node), ErrorCodes::BAD_TYPE_OF_FIELD); } } diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 1b32ba197ff..62ab67df163 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -558,7 +558,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl insert->columns = list_of_columns; list_of_columns->children.reserve(columns_intersection.size()); for (const String & column : columns_intersection) - list_of_columns->children.push_back(std::make_shared(StringRange(), column, ASTIdentifier::Column)); + list_of_columns->children.push_back(std::make_shared(column, ASTIdentifier::Column)); InterpreterInsertQuery interpreter{insert, context, allow_materialized}; diff --git a/dbms/src/Storages/StorageCatBoostPool.cpp b/dbms/src/Storages/StorageCatBoostPool.cpp index 74a40372b79..cbaf523e3bf 100644 --- a/dbms/src/Storages/StorageCatBoostPool.cpp +++ b/dbms/src/Storages/StorageCatBoostPool.cpp @@ -243,13 +243,12 @@ void StorageCatBoostPool::createSampleBlockAndColumns() if (!desc.alias.empty()) { - auto alias = std::make_shared(); - alias->name = desc.column_name; + auto alias = std::make_shared(desc.column_name); column_defaults[desc.alias] = {ColumnDefaultType::Alias, alias}; alias_columns.emplace_back(desc.alias, type); } - sample_block.insert(ColumnWithTypeAndName(type->createColumn(), type, desc.column_name)); + sample_block.insert(ColumnWithTypeAndName(type, desc.column_name)); } columns.insert(columns.end(), num_columns.begin(), num_columns.end()); columns.insert(columns.end(), cat_columns.begin(), cat_columns.end()); diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 894f6f2dd44..9d4afdfdde6 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -267,13 +267,10 @@ BlockInputStreams StorageDistributed::describe(const Context & context, const Se std::string name = remote_database + '.' + remote_table; - auto id = std::make_shared(); - id->name = name; + auto id = std::make_shared(name); - auto desc_database = std::make_shared(); - auto desc_table = std::make_shared(); - desc_database->name = remote_database; - desc_table->name = remote_table; + auto desc_database = std::make_shared(remote_database); + auto desc_table = std::make_shared(remote_table); id->children.push_back(desc_database); id->children.push_back(desc_table); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 09b5bc58094..97e100585cb 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -201,9 +201,10 @@ void StorageMergeTree::alter( IDatabase::ASTModifier storage_modifier; if (primary_key_is_modified) + { storage_modifier = [&new_primary_key_ast] (IAST & ast) { - auto tuple = std::make_shared(new_primary_key_ast->range); + auto tuple = std::make_shared(); tuple->name = "tuple"; tuple->arguments = new_primary_key_ast; tuple->children.push_back(tuple->arguments); @@ -213,6 +214,7 @@ void StorageMergeTree::alter( auto & storage_ast = typeid_cast(ast); typeid_cast(*storage_ast.engine->arguments).children.at(1) = tuple; }; + } context.getDatabase(database_name)->alterTable( context, table_name, diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index 70fa6f8712a..e9efe406e45 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -81,7 +81,7 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va } ASTExpressionList & with = typeid_cast(*select.with_expression_list); - auto literal = std::make_shared(StringRange(), value); + auto literal = std::make_shared(value); literal->alias = column_name; literal->prefer_alias_to_column_name = true; with.children.push_back(literal); diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index 538c8ede9a4..bf4fd14b23a 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -70,7 +70,7 @@ String transformQueryForExternalDatabase( auto select_expr_list = std::make_shared(); for (const auto & name : used_columns) - select_expr_list->children.push_back(std::make_shared(StringRange(), name)); + select_expr_list->children.push_back(std::make_shared(name)); select->select_expression_list = std::move(select_expr_list); From d6121a8544abb8d81e69b554fcc697cfa8fa90ff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 06:40:20 +0300 Subject: [PATCH 112/209] Simplification of AST #1947 --- dbms/src/Analyzers/AnalyzeColumns.cpp | 9 ++- dbms/src/Core/iostream_debug_helpers.cpp | 4 +- dbms/src/Interpreters/Aggregator.cpp | 2 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 27 +++++---- .../InJoinSubqueriesPreprocessor.cpp | 10 ++-- .../Interpreters/InterpreterAlterQuery.cpp | 8 +-- .../Interpreters/InterpreterCreateQuery.cpp | 10 ++-- .../Interpreters/InterpreterSelectQuery.cpp | 2 +- .../evaluateConstantExpression.cpp | 5 +- dbms/src/Parsers/ASTAlterQuery.cpp | 4 -- dbms/src/Parsers/ASTAlterQuery.h | 2 - dbms/src/Parsers/ASTAsterisk.h | 2 - dbms/src/Parsers/ASTCheckQuery.h | 2 - dbms/src/Parsers/ASTColumnDeclaration.h | 3 - dbms/src/Parsers/ASTCreateQuery.h | 5 -- dbms/src/Parsers/ASTDropQuery.h | 3 - dbms/src/Parsers/ASTEnumElement.h | 6 +- dbms/src/Parsers/ASTExpressionList.h | 3 - dbms/src/Parsers/ASTFunction.h | 23 +------- dbms/src/Parsers/ASTIdentifier.h | 5 +- dbms/src/Parsers/ASTInsertQuery.h | 3 - dbms/src/Parsers/ASTKillQueryQuery.h | 4 -- dbms/src/Parsers/ASTLiteral.h | 3 +- dbms/src/Parsers/ASTNameTypePair.h | 3 - dbms/src/Parsers/ASTOptimizeQuery.h | 3 - dbms/src/Parsers/ASTOrderByElement.h | 9 +-- dbms/src/Parsers/ASTPartition.h | 2 - dbms/src/Parsers/ASTQualifiedAsterisk.h | 2 - dbms/src/Parsers/ASTQueryWithOutput.h | 13 ++--- dbms/src/Parsers/ASTQueryWithTableAndOutput.h | 7 --- dbms/src/Parsers/ASTRenameQuery.h | 3 - dbms/src/Parsers/ASTSampleRatio.h | 4 +- dbms/src/Parsers/ASTSelectQuery.cpp | 20 +++---- dbms/src/Parsers/ASTSelectQuery.h | 3 - dbms/src/Parsers/ASTSetQuery.h | 3 - dbms/src/Parsers/ASTShowTablesQuery.h | 3 - dbms/src/Parsers/ASTSubquery.h | 4 +- dbms/src/Parsers/ASTSystemQuery.h | 3 - dbms/src/Parsers/ASTUseQuery.h | 3 - dbms/src/Parsers/ExpressionElementParsers.cpp | 58 ++++++------------- dbms/src/Parsers/ExpressionListParsers.cpp | 8 +-- dbms/src/Parsers/IAST.h | 8 +-- dbms/src/Parsers/IParserBase.cpp | 4 +- dbms/src/Parsers/ParserAlterQuery.cpp | 3 - dbms/src/Parsers/ParserCase.cpp | 11 ++-- dbms/src/Parsers/ParserCheckQuery.cpp | 6 +- dbms/src/Parsers/ParserCreateQuery.cpp | 20 ++----- dbms/src/Parsers/ParserCreateQuery.h | 8 +-- dbms/src/Parsers/ParserDescribeTableQuery.cpp | 3 - dbms/src/Parsers/ParserDropQuery.cpp | 4 +- dbms/src/Parsers/ParserInsertQuery.cpp | 4 +- dbms/src/Parsers/ParserKillQueryQuery.cpp | 3 - dbms/src/Parsers/ParserOptimizeQuery.cpp | 4 +- dbms/src/Parsers/ParserPartition.cpp | 1 - dbms/src/Parsers/ParserRenameQuery.cpp | 4 +- dbms/src/Parsers/ParserSampleRatio.cpp | 4 +- dbms/src/Parsers/ParserSelectQuery.cpp | 4 -- dbms/src/Parsers/ParserSetQuery.cpp | 4 +- dbms/src/Parsers/ParserShowProcesslistQuery.h | 3 - dbms/src/Parsers/ParserShowTablesQuery.cpp | 4 -- dbms/src/Parsers/ParserSystemQuery.cpp | 3 - .../Parsers/ParserTablePropertiesQuery.cpp | 4 -- dbms/src/Parsers/ParserUseQuery.cpp | 6 +- dbms/src/Parsers/StringRange.h | 6 +- dbms/src/Parsers/TablePropertiesQueriesASTs.h | 2 - .../parseIdentifierOrStringLiteral.cpp | 4 +- dbms/src/Parsers/queryToString.cpp | 1 - dbms/src/Server/ClusterCopier.cpp | 8 +-- dbms/src/Storages/AlterCommands.cpp | 10 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 13 ++--- dbms/src/Storages/MergeTree/PKCondition.cpp | 3 +- dbms/src/Storages/StorageBuffer.cpp | 2 +- dbms/src/Storages/StorageCatBoostPool.cpp | 5 +- dbms/src/Storages/StorageDistributed.cpp | 9 +-- dbms/src/Storages/StorageMergeTree.cpp | 4 +- dbms/src/Storages/VirtualColumnUtils.cpp | 2 +- .../transformQueryForExternalDatabase.cpp | 2 +- 77 files changed, 135 insertions(+), 347 deletions(-) diff --git a/dbms/src/Analyzers/AnalyzeColumns.cpp b/dbms/src/Analyzers/AnalyzeColumns.cpp index 1e0883f2d3a..1a8b137a573 100644 --- a/dbms/src/Analyzers/AnalyzeColumns.cpp +++ b/dbms/src/Analyzers/AnalyzeColumns.cpp @@ -135,7 +135,7 @@ ASTPtr createASTIdentifierForColumnInTable(const String & column, const CollectT { ASTPtr database_name_identifier_node; if (!table.database_name.empty()) - database_name_identifier_node = std::make_shared(StringRange(), table.database_name, ASTIdentifier::Column); + database_name_identifier_node = std::make_shared(table.database_name, ASTIdentifier::Column); ASTPtr table_name_identifier_node; String table_name_or_alias; @@ -146,9 +146,9 @@ ASTPtr createASTIdentifierForColumnInTable(const String & column, const CollectT table_name_or_alias = table.alias; if (!table_name_or_alias.empty()) - table_name_identifier_node = std::make_shared(StringRange(), table_name_or_alias, ASTIdentifier::Column); + table_name_identifier_node = std::make_shared(table_name_or_alias, ASTIdentifier::Column); - ASTPtr column_identifier_node = std::make_shared(StringRange(), column, ASTIdentifier::Column); + ASTPtr column_identifier_node = std::make_shared(column, ASTIdentifier::Column); String compound_name; if (database_name_identifier_node) @@ -157,8 +157,7 @@ ASTPtr createASTIdentifierForColumnInTable(const String & column, const CollectT compound_name += table_name_or_alias + "."; compound_name += column; - auto elem = std::make_shared( - StringRange(), compound_name, ASTIdentifier::Column); + auto elem = std::make_shared(compound_name, ASTIdentifier::Column); if (database_name_identifier_node) elem->children.emplace_back(std::move(database_name_identifier_node)); diff --git a/dbms/src/Core/iostream_debug_helpers.cpp b/dbms/src/Core/iostream_debug_helpers.cpp index 57a3d215b69..a4b0cba85b8 100644 --- a/dbms/src/Core/iostream_debug_helpers.cpp +++ b/dbms/src/Core/iostream_debug_helpers.cpp @@ -123,9 +123,7 @@ std::ostream & operator<<(std::ostream & stream, const SubqueryForSet & what) std::ostream & operator<<(std::ostream & stream, const IAST & what) { - stream << "IAST(" - << "query_string = " << what.query_string - <<"){"; + stream << "IAST{"; what.dumpTree(stream); stream << "}"; return stream; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index ecd19370ef9..eebc95cada9 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -112,7 +112,7 @@ Block Aggregator::getHeader(bool final) const else type = std::make_shared(params.aggregates[i].function, argument_types, params.aggregates[i].parameters); - res.insert({ type->createColumn(), type, params.aggregates[i].column_name }); + res.insert({ type, params.aggregates[i].column_name }); } } else if (params.intermediate_header) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index b7f67e8566c..a1cf16a1a27 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -359,7 +359,7 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String || (!alias.empty() && ident->name == alias)))) { /// Replace to plain asterisk. - ast = std::make_shared(ast->range); + ast = std::make_shared(); } } else @@ -688,8 +688,7 @@ static std::shared_ptr interpretSubquery( /// manually substitute column names in place of asterisk for (const auto & column : columns) - select_expression_list->children.emplace_back(std::make_shared( - StringRange{}, column.name)); + select_expression_list->children.emplace_back(std::make_shared(column.name)); select_query->replaceDatabaseAndTable(database_table.first, database_table.second); } @@ -808,7 +807,7 @@ void ExpressionAnalyzer::addExternalStorage(ASTPtr & subquery_or_table_name_or_t * instead of doing a subquery, you just need to read it. */ - auto database_and_table_name = std::make_shared(StringRange(), external_table_name, ASTIdentifier::Table); + auto database_and_table_name = std::make_shared(external_table_name, ASTIdentifier::Table); if (auto ast_table_expr = typeid_cast(subquery_or_table_name_or_table_expression.get())) { @@ -1032,7 +1031,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( { ASTs all_columns; for (const auto & column_name_type : columns) - all_columns.emplace_back(std::make_shared(asterisk->range, column_name_type.name)); + all_columns.emplace_back(std::make_shared(column_name_type.name)); asts.erase(asts.begin() + i); asts.insert(asts.begin() + i, all_columns.begin(), all_columns.end()); @@ -1147,17 +1146,17 @@ void ExpressionAnalyzer::executeScalarSubqueries() static ASTPtr addTypeConversion(std::unique_ptr && ast, const String & type_name) { - auto func = std::make_shared(ast->range); + auto func = std::make_shared(); ASTPtr res = func; func->alias = ast->alias; func->prefer_alias_to_column_name = ast->prefer_alias_to_column_name; ast->alias.clear(); func->name = "CAST"; - auto exp_list = std::make_shared(ast->range); + auto exp_list = std::make_shared(); func->arguments = exp_list; func->children.push_back(func->arguments); exp_list->children.emplace_back(ast.release()); - exp_list->children.emplace_back(std::make_shared(StringRange(), type_name)); + exp_list->children.emplace_back(std::make_shared(type_name)); return res; } @@ -1200,7 +1199,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) if (!block) { /// Interpret subquery with empty result as Null literal - auto ast_new = std::make_unique(ast->range, Null()); + auto ast_new = std::make_unique(Null()); ast_new->setAlias(ast->tryGetAlias()); ast = std::move(ast_new); return; @@ -1220,18 +1219,18 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) size_t columns = block.columns(); if (columns == 1) { - auto lit = std::make_unique(ast->range, (*block.safeGetByPosition(0).column)[0]); + auto lit = std::make_unique((*block.safeGetByPosition(0).column)[0]); lit->alias = subquery->alias; lit->prefer_alias_to_column_name = subquery->prefer_alias_to_column_name; ast = addTypeConversion(std::move(lit), block.safeGetByPosition(0).type->getName()); } else { - auto tuple = std::make_shared(ast->range); + auto tuple = std::make_shared(); tuple->alias = subquery->alias; ast = tuple; tuple->name = "tuple"; - auto exp_list = std::make_shared(ast->range); + auto exp_list = std::make_shared(); tuple->arguments = exp_list; tuple->children.push_back(tuple->arguments); @@ -1239,7 +1238,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) for (size_t i = 0; i < columns; ++i) { exp_list->children[i] = addTypeConversion( - std::make_unique(ast->range, (*block.safeGetByPosition(i).column)[0]), + std::make_unique((*block.safeGetByPosition(i).column)[0]), block.safeGetByPosition(i).type->getName()); } } @@ -1375,7 +1374,7 @@ void ExpressionAnalyzer::optimizeGroupBy() } select_query->group_expression_list = std::make_shared(); - select_query->group_expression_list->children.emplace_back(std::make_shared(StringRange(), UInt64(unused_column))); + select_query->group_expression_list->children.emplace_back(std::make_shared(UInt64(unused_column))); } } diff --git a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 363db236759..cc89def49ee 100644 --- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -102,20 +102,18 @@ StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & contex void replaceDatabaseAndTable(ASTPtr & database_and_table, const String & database_name, const String & table_name) { - ASTPtr table = std::make_shared(StringRange(), table_name, ASTIdentifier::Table); + ASTPtr table = std::make_shared(table_name, ASTIdentifier::Table); if (!database_name.empty()) { - ASTPtr database = std::make_shared(StringRange(), database_name, ASTIdentifier::Database); + ASTPtr database = std::make_shared(database_name, ASTIdentifier::Database); - database_and_table = std::make_shared( - StringRange(), database_name + "." + table_name, ASTIdentifier::Table); + database_and_table = std::make_shared(database_name + "." + table_name, ASTIdentifier::Table); database_and_table->children = {database, table}; } else { - database_and_table = std::make_shared( - StringRange(), table_name, ASTIdentifier::Table); + database_and_table = std::make_shared(table_name, ASTIdentifier::Table); } } diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index 76fef51f98f..f990fd8570f 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -106,9 +106,7 @@ void InterpreterAlterQuery::parseAlter( command.column_name = ast_col_decl.name; if (ast_col_decl.type) { - StringRange type_range = ast_col_decl.type->range; - String type_string(type_range.first, type_range.second - type_range.first); - command.data_type = data_type_factory.get(type_string); + command.data_type = data_type_factory.get(ast_col_decl.type); } if (ast_col_decl.default_expression) { @@ -154,9 +152,7 @@ void InterpreterAlterQuery::parseAlter( command.column_name = ast_col_decl.name; if (ast_col_decl.type) { - StringRange type_range = ast_col_decl.type->range; - String type_string(type_range.first, type_range.second - type_range.first); - command.data_type = data_type_factory.get(type_string); + command.data_type = data_type_factory.get(ast_col_decl.type); } if (ast_col_decl.default_expression) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 17262f3cc56..0444b26671b 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -199,8 +199,8 @@ static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast const auto data_type_ptr = columns.back().type.get(); default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(StringRange(), tmp_column_name), - std::make_shared(StringRange(), Field(data_type_ptr->getName()))), final_column_name)); + makeASTFunction("CAST", std::make_shared(tmp_column_name), + std::make_shared(Field(data_type_ptr->getName()))), final_column_name)); default_expr_list->children.emplace_back(setAlias(col_decl.default_expression->clone(), tmp_column_name)); } else @@ -233,7 +233,7 @@ static ColumnsAndDefaults parseColumns(const ASTExpressionList & column_list_ast if (!explicit_type->equals(*deduced_type)) { col_decl_ptr->default_expression = makeASTFunction("CAST", col_decl_ptr->default_expression, - std::make_shared(StringRange(), explicit_type->getName())); + std::make_shared(explicit_type->getName())); col_decl_ptr->children.clear(); col_decl_ptr->children.push_back(col_decl_ptr->type); @@ -293,7 +293,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns) ParserIdentifierWithOptionalParameters storage_p; column_declaration->type = parseQuery(storage_p, pos, end, "data type"); - column_declaration->type->query_string = type_name; + column_declaration->type->owned_string = type_name; columns_list->children.emplace_back(column_declaration); } @@ -321,7 +321,7 @@ ASTPtr InterpreterCreateQuery::formatColumns( ParserIdentifierWithOptionalParameters storage_p; column_declaration->type = parseQuery(storage_p, pos, end, "data type"); - column_declaration->type->query_string = type_name; + column_declaration->type->owned_string = type_name; const auto it = column_defaults.find(column.name); if (it != std::end(column_defaults)) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 3002b73297a..56996c1eb5e 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -696,7 +696,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline if (default_it != std::end(storage->column_defaults) && default_it->second.type == ColumnDefaultType::Alias) required_columns_expr_list->children.emplace_back(setAlias(default_it->second.expression->clone(), column)); else - required_columns_expr_list->children.emplace_back(std::make_shared(StringRange(), column)); + required_columns_expr_list->children.emplace_back(std::make_shared(column)); } alias_actions = ExpressionAnalyzer{required_columns_expr_list, context, storage, source_header.getNamesAndTypesList()}.getActions(true); diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp index 964aae94619..497df93f9bc 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.cpp +++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp @@ -55,15 +55,14 @@ ASTPtr evaluateConstantExpressionAsLiteral(const ASTPtr & node, const Context & if (typeid_cast(node.get())) return node; - return std::make_shared(node->range, - evaluateConstantExpression(node, context).first); + return std::make_shared(evaluateConstantExpression(node, context).first); } ASTPtr evaluateConstantExpressionOrIdentifierAsLiteral(const ASTPtr & node, const Context & context) { if (auto id = typeid_cast(node.get())) - return std::make_shared(node->range, Field(id->name)); + return std::make_shared(Field(id->name)); return evaluateConstantExpressionAsLiteral(node, context); } diff --git a/dbms/src/Parsers/ASTAlterQuery.cpp b/dbms/src/Parsers/ASTAlterQuery.cpp index d9ed5d83fc2..6b439e83dda 100644 --- a/dbms/src/Parsers/ASTAlterQuery.cpp +++ b/dbms/src/Parsers/ASTAlterQuery.cpp @@ -33,10 +33,6 @@ void ASTAlterQuery::addParameters(const Parameters & params) children.push_back(params.primary_key); } -ASTAlterQuery::ASTAlterQuery(StringRange range_) : ASTQueryWithOutput(range_) -{ -} - /** Get the text that identifies this element. */ String ASTAlterQuery::getID() const { diff --git a/dbms/src/Parsers/ASTAlterQuery.h b/dbms/src/Parsers/ASTAlterQuery.h index 5b5e543d64d..dc1c4dde849 100644 --- a/dbms/src/Parsers/ASTAlterQuery.h +++ b/dbms/src/Parsers/ASTAlterQuery.h @@ -87,8 +87,6 @@ public: void addParameters(const Parameters & params); - explicit ASTAlterQuery(StringRange range_ = StringRange()); - /** Get the text that identifies this element. */ String getID() const override; diff --git a/dbms/src/Parsers/ASTAsterisk.h b/dbms/src/Parsers/ASTAsterisk.h index a52885f63cf..3861c992c75 100644 --- a/dbms/src/Parsers/ASTAsterisk.h +++ b/dbms/src/Parsers/ASTAsterisk.h @@ -9,8 +9,6 @@ namespace DB class ASTAsterisk : public IAST { public: - ASTAsterisk() = default; - ASTAsterisk(StringRange range_) : IAST(range_) {} String getID() const override { return "Asterisk"; } ASTPtr clone() const override { return std::make_shared(*this); } String getColumnName() const override { return "*"; } diff --git a/dbms/src/Parsers/ASTCheckQuery.h b/dbms/src/Parsers/ASTCheckQuery.h index 01e9c34b616..74f6249a732 100644 --- a/dbms/src/Parsers/ASTCheckQuery.h +++ b/dbms/src/Parsers/ASTCheckQuery.h @@ -7,8 +7,6 @@ namespace DB struct ASTCheckQuery : public ASTQueryWithOutput { - ASTCheckQuery(StringRange range_ = StringRange()) : ASTQueryWithOutput(range_) {}; - /** Get the text that identifies this element. */ String getID() const override { return ("CheckQuery_" + database + "_" + table); }; diff --git a/dbms/src/Parsers/ASTColumnDeclaration.h b/dbms/src/Parsers/ASTColumnDeclaration.h index 0cf49be00ec..57a1f7695d7 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.h +++ b/dbms/src/Parsers/ASTColumnDeclaration.h @@ -16,9 +16,6 @@ public: String default_specifier; ASTPtr default_expression; - ASTColumnDeclaration() = default; - ASTColumnDeclaration(const StringRange range) : IAST{range} {} - String getID() const override { return "ColumnDeclaration_" + name; } ASTPtr clone() const override diff --git a/dbms/src/Parsers/ASTCreateQuery.h b/dbms/src/Parsers/ASTCreateQuery.h index 5f1c8c66891..e1180de4af0 100644 --- a/dbms/src/Parsers/ASTCreateQuery.h +++ b/dbms/src/Parsers/ASTCreateQuery.h @@ -20,8 +20,6 @@ public: IAST * sample_by = nullptr; ASTSetQuery * settings = nullptr; - ASTStorage() = default; - ASTStorage(StringRange range_) : IAST(range_) {} String getID() const override { return "Storage definition"; } ASTPtr clone() const override @@ -95,9 +93,6 @@ public: String as_table; ASTSelectQuery * select = nullptr; - ASTCreateQuery() = default; - ASTCreateQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return (attach ? "AttachQuery_" : "CreateQuery_") + database + "_" + table; }; diff --git a/dbms/src/Parsers/ASTDropQuery.h b/dbms/src/Parsers/ASTDropQuery.h index 3735a58ab66..41e9b255bf3 100644 --- a/dbms/src/Parsers/ASTDropQuery.h +++ b/dbms/src/Parsers/ASTDropQuery.h @@ -19,9 +19,6 @@ public: String database; String table; - ASTDropQuery() = default; - explicit ASTDropQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return (detach ? "DetachQuery_" : "DropQuery_") + database + "_" + table; }; diff --git a/dbms/src/Parsers/ASTEnumElement.h b/dbms/src/Parsers/ASTEnumElement.h index 1898c5d9172..10b4e1e7482 100644 --- a/dbms/src/Parsers/ASTEnumElement.h +++ b/dbms/src/Parsers/ASTEnumElement.h @@ -14,14 +14,14 @@ public: String name; Field value; - ASTEnumElement(const StringRange range, const String & name, const Field & value) - : IAST{range}, name{name}, value {value} {} + ASTEnumElement(const String & name, const Field & value) + : name{name}, value {value} {} String getID() const override { return "EnumElement"; } ASTPtr clone() const override { - return std::make_shared(StringRange(), name, value); + return std::make_shared(name, value); } protected: diff --git a/dbms/src/Parsers/ASTExpressionList.h b/dbms/src/Parsers/ASTExpressionList.h index 4d49df84ad6..cfe9cb3b714 100644 --- a/dbms/src/Parsers/ASTExpressionList.h +++ b/dbms/src/Parsers/ASTExpressionList.h @@ -11,9 +11,6 @@ namespace DB class ASTExpressionList : public IAST { public: - ASTExpressionList() = default; - ASTExpressionList(const StringRange range_) : IAST(range_) {} - String getID() const override { return "ExpressionList"; } ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTFunction.h b/dbms/src/Parsers/ASTFunction.h index 462dc439329..9e78de369a1 100644 --- a/dbms/src/Parsers/ASTFunction.h +++ b/dbms/src/Parsers/ASTFunction.h @@ -18,9 +18,6 @@ public: ASTPtr parameters; public: - ASTFunction() = default; - ASTFunction(const StringRange range_) : ASTWithAlias(range_) {} - /** Get text identifying the AST node. */ String getID() const override; @@ -36,7 +33,6 @@ template ASTPtr makeASTFunction(const String & name, Args &&... args) { const auto function = std::make_shared(); - ASTPtr result{function}; function->name = name; function->arguments = std::make_shared(); @@ -44,24 +40,7 @@ ASTPtr makeASTFunction(const String & name, Args &&... args) function->arguments->children = { std::forward(args)... }; - return result; -} - - -template -ASTPtr makeASTFunction(const String & name, const StringRange & function_range, - const StringRange & arguments_range, Args &&... args) -{ - const auto function = std::make_shared(function_range); - ASTPtr result{function}; - - function->name = name; - function->arguments = std::make_shared(arguments_range); - function->children.push_back(function->arguments); - - function->arguments->children = { std::forward(args)... }; - - return result; + return function; } } diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 1c424f8e50e..017e33af500 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -25,9 +25,8 @@ public: /// what this identifier identifies Kind kind; - ASTIdentifier() = default; - ASTIdentifier(const StringRange range_, const String & name_, const Kind kind_ = Column) - : ASTWithAlias(range_), name(name_), kind(kind_) {} + ASTIdentifier(const String & name_, const Kind kind_ = Column) + : name(name_), kind(kind_) {} /** Get the text that identifies this element. */ String getID() const override { return "Identifier_" + name; } diff --git a/dbms/src/Parsers/ASTInsertQuery.h b/dbms/src/Parsers/ASTInsertQuery.h index bbd730ae50c..12e3de2ef42 100644 --- a/dbms/src/Parsers/ASTInsertQuery.h +++ b/dbms/src/Parsers/ASTInsertQuery.h @@ -26,9 +26,6 @@ public: const char * data = nullptr; const char * end = nullptr; - ASTInsertQuery() = default; - explicit ASTInsertQuery(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "InsertQuery_" + database + "_" + table; }; diff --git a/dbms/src/Parsers/ASTKillQueryQuery.h b/dbms/src/Parsers/ASTKillQueryQuery.h index e28c97e4baf..23ef73fec3d 100644 --- a/dbms/src/Parsers/ASTKillQueryQuery.h +++ b/dbms/src/Parsers/ASTKillQueryQuery.h @@ -11,10 +11,6 @@ public: bool sync = false; // SYNC or ASYNC mode bool test = false; // does it TEST mode? (doesn't cancel queries just checks and shows them) - ASTKillQueryQuery() = default; - - ASTKillQueryQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - ASTPtr clone() const override { return std::make_shared(*this); } String getID() const override; diff --git a/dbms/src/Parsers/ASTLiteral.h b/dbms/src/Parsers/ASTLiteral.h index e31ae7f49da..36707a7e950 100644 --- a/dbms/src/Parsers/ASTLiteral.h +++ b/dbms/src/Parsers/ASTLiteral.h @@ -15,8 +15,7 @@ class ASTLiteral : public ASTWithAlias public: Field value; - ASTLiteral() = default; - ASTLiteral(const StringRange range_, const Field & value_) : ASTWithAlias(range_), value(value_) {} + ASTLiteral(const Field & value_) : value(value_) {} /** Get the text that identifies this element. */ String getID() const override { return "Literal_" + applyVisitor(FieldVisitorDump(), value); } diff --git a/dbms/src/Parsers/ASTNameTypePair.h b/dbms/src/Parsers/ASTNameTypePair.h index ae0574f1dac..9dad01df2f5 100644 --- a/dbms/src/Parsers/ASTNameTypePair.h +++ b/dbms/src/Parsers/ASTNameTypePair.h @@ -16,9 +16,6 @@ public: /// type ASTPtr type; - ASTNameTypePair() = default; - ASTNameTypePair(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "NameTypePair_" + name; } diff --git a/dbms/src/Parsers/ASTOptimizeQuery.h b/dbms/src/Parsers/ASTOptimizeQuery.h index 3caae258b41..1a4bd260ed3 100644 --- a/dbms/src/Parsers/ASTOptimizeQuery.h +++ b/dbms/src/Parsers/ASTOptimizeQuery.h @@ -22,9 +22,6 @@ public: /// Do deduplicate (default: false) bool deduplicate; - ASTOptimizeQuery() = default; - ASTOptimizeQuery(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "OptimizeQuery_" + database + "_" + table + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); }; diff --git a/dbms/src/Parsers/ASTOrderByElement.h b/dbms/src/Parsers/ASTOrderByElement.h index bd3d2c5eb06..19d0d723a7c 100644 --- a/dbms/src/Parsers/ASTOrderByElement.h +++ b/dbms/src/Parsers/ASTOrderByElement.h @@ -19,11 +19,12 @@ public: /** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */ ASTPtr collation; - ASTOrderByElement() = default; - ASTOrderByElement(const StringRange range_, - const int direction_, const int nulls_direction_, const bool nulls_direction_was_explicitly_specified_, + ASTOrderByElement( + const int direction_, + const int nulls_direction_, + const bool nulls_direction_was_explicitly_specified_, ASTPtr & collation_) - : IAST(range_), + : direction(direction_), nulls_direction(nulls_direction_), nulls_direction_was_explicitly_specified(nulls_direction_was_explicitly_specified_), collation(collation_) {} diff --git a/dbms/src/Parsers/ASTPartition.h b/dbms/src/Parsers/ASTPartition.h index 9f78d56fca1..b1ed866284a 100644 --- a/dbms/src/Parsers/ASTPartition.h +++ b/dbms/src/Parsers/ASTPartition.h @@ -17,8 +17,6 @@ public: String id; - ASTPartition() = default; - ASTPartition(StringRange range_) : IAST(range_) {} String getID() const override; ASTPtr clone() const override; diff --git a/dbms/src/Parsers/ASTQualifiedAsterisk.h b/dbms/src/Parsers/ASTQualifiedAsterisk.h index 52d9e8b39c6..5baf24686fc 100644 --- a/dbms/src/Parsers/ASTQualifiedAsterisk.h +++ b/dbms/src/Parsers/ASTQualifiedAsterisk.h @@ -12,8 +12,6 @@ namespace DB class ASTQualifiedAsterisk : public IAST { public: - ASTQualifiedAsterisk() = default; - ASTQualifiedAsterisk(StringRange range_) : IAST(range_) {} String getID() const override { return "QualifiedAsterisk"; } ASTPtr clone() const override { return std::make_shared(*this); } String getColumnName() const override; diff --git a/dbms/src/Parsers/ASTQueryWithOutput.h b/dbms/src/Parsers/ASTQueryWithOutput.h index 40ac02380e5..eea314ecbdf 100644 --- a/dbms/src/Parsers/ASTQueryWithOutput.h +++ b/dbms/src/Parsers/ASTQueryWithOutput.h @@ -14,9 +14,6 @@ public: ASTPtr out_file; ASTPtr format; - ASTQueryWithOutput() = default; - explicit ASTQueryWithOutput(const StringRange range_) : IAST(range_) {} - void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const final; /// Remove 'FORMAT and INTO OUTFILE ' if exists @@ -31,17 +28,15 @@ protected: }; -template +template class ASTQueryWithOutputImpl : public ASTQueryWithOutput { public: - explicit ASTQueryWithOutputImpl() = default; - explicit ASTQueryWithOutputImpl(StringRange range_) : ASTQueryWithOutput(range_) {} - String getID() const override { return AstIDAndQueryNames::ID; }; + String getID() const override { return ASTIDAndQueryNames::ID; }; ASTPtr clone() const override { - auto res = std::make_shared>(*this); + auto res = std::make_shared>(*this); res->children.clear(); cloneOutputOptions(*res); return res; @@ -51,7 +46,7 @@ protected: void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override { settings.ostr << (settings.hilite ? hilite_keyword : "") - << AstIDAndQueryNames::Query << (settings.hilite ? hilite_none : ""); + << ASTIDAndQueryNames::Query << (settings.hilite ? hilite_none : ""); } }; diff --git a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h index abcb7fa26e5..952ea23fd61 100644 --- a/dbms/src/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/src/Parsers/ASTQueryWithTableAndOutput.h @@ -16,9 +16,6 @@ public: String database; String table; - ASTQueryWithTableAndOutput() = default; - explicit ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} - protected: void formatHelper(const FormatSettings & settings, const char * name) const { @@ -32,10 +29,6 @@ template class ASTQueryWithTableAndOutputImpl : public ASTQueryWithTableAndOutput { public: - ASTQueryWithTableAndOutputImpl() = default; - - explicit ASTQueryWithTableAndOutputImpl(const StringRange range_) : ASTQueryWithTableAndOutput(range_) {} - String getID() const override { return AstIDAndQueryNames::ID + ("_" + database) + "_" + table; }; ASTPtr clone() const override diff --git a/dbms/src/Parsers/ASTRenameQuery.h b/dbms/src/Parsers/ASTRenameQuery.h index 1a2eda8b029..3da772a5552 100644 --- a/dbms/src/Parsers/ASTRenameQuery.h +++ b/dbms/src/Parsers/ASTRenameQuery.h @@ -28,9 +28,6 @@ public: using Elements = std::vector; Elements elements; - ASTRenameQuery() = default; - explicit ASTRenameQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "Rename"; }; diff --git a/dbms/src/Parsers/ASTSampleRatio.h b/dbms/src/Parsers/ASTSampleRatio.h index 84915cab6cb..ca91d0b6cbb 100644 --- a/dbms/src/Parsers/ASTSampleRatio.h +++ b/dbms/src/Parsers/ASTSampleRatio.h @@ -26,9 +26,7 @@ public: Rational ratio; - ASTSampleRatio() = default; - ASTSampleRatio(const StringRange range_) : IAST(range_) {} - ASTSampleRatio(const StringRange range_, Rational & ratio_) : IAST(range_), ratio(ratio_) {} + ASTSampleRatio(Rational & ratio_) : ratio(ratio_) {} String getID() const override { return "SampleRatio_" + toString(ratio); } diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index aec8d6345d9..5c13e730b66 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -21,10 +21,6 @@ namespace ErrorCodes } -ASTSelectQuery::ASTSelectQuery(const StringRange range_) : ASTQueryWithOutput(range_) -{ -} - bool ASTSelectQuery::hasArrayJoin(const ASTPtr & ast) { if (const ASTFunction * function = typeid_cast(&*ast)) @@ -127,7 +123,7 @@ void ASTSelectQuery::rewriteSelectExpressionList(const Names & required_column_n if (!other_required_columns_in_select.count(name) && !columns_with_array_join.count(name)) { if (asterisk.first) - new_children.push_back({ std::make_shared(asterisk.first->range, name), asterisk.second }); + new_children.push_back({ std::make_shared(name), asterisk.second }); else throw Exception("SELECT query doesn't have required column: " + backQuoteIfNeed(name), ErrorCodes::THERE_IS_NO_COLUMN); } @@ -530,11 +526,11 @@ void ASTSelectQuery::setDatabaseIfNeeded(const String & database_name) if (table_expression->database_and_table_name->children.empty()) { - ASTPtr database = std::make_shared(StringRange(), database_name, ASTIdentifier::Database); + ASTPtr database = std::make_shared(database_name, ASTIdentifier::Database); ASTPtr table = table_expression->database_and_table_name; const String & old_name = static_cast(*table_expression->database_and_table_name).name; - table_expression->database_and_table_name = std::make_shared(StringRange(), database_name + "." + old_name, ASTIdentifier::Table); + table_expression->database_and_table_name = std::make_shared(database_name + "." + old_name, ASTIdentifier::Table); table_expression->database_and_table_name->children = {database, table}; } else if (table_expression->database_and_table_name->children.size() != 2) @@ -561,20 +557,18 @@ void ASTSelectQuery::replaceDatabaseAndTable(const String & database_name, const table_expression = table_expr.get(); } - ASTPtr table = std::make_shared(StringRange(), table_name, ASTIdentifier::Table); + ASTPtr table = std::make_shared(table_name, ASTIdentifier::Table); if (!database_name.empty()) { - ASTPtr database = std::make_shared(StringRange(), database_name, ASTIdentifier::Database); + ASTPtr database = std::make_shared(database_name, ASTIdentifier::Database); - table_expression->database_and_table_name = std::make_shared( - StringRange(), database_name + "." + table_name, ASTIdentifier::Table); + table_expression->database_and_table_name = std::make_shared(database_name + "." + table_name, ASTIdentifier::Table); table_expression->database_and_table_name->children = {database, table}; } else { - table_expression->database_and_table_name = std::make_shared( - StringRange(), table_name, ASTIdentifier::Table); + table_expression->database_and_table_name = std::make_shared(table_name, ASTIdentifier::Table); } } diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index 5f4898d33d9..a524a79bd79 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -16,9 +16,6 @@ struct ASTTablesInSelectQueryElement; class ASTSelectQuery : public ASTQueryWithOutput { public: - ASTSelectQuery() = default; - ASTSelectQuery(const StringRange range_); - /** Get the text that identifies this element. */ String getID() const override { return "SelectQuery"; }; diff --git a/dbms/src/Parsers/ASTSetQuery.h b/dbms/src/Parsers/ASTSetQuery.h index 08b617db646..3a41ed0d80c 100644 --- a/dbms/src/Parsers/ASTSetQuery.h +++ b/dbms/src/Parsers/ASTSetQuery.h @@ -25,9 +25,6 @@ public: using Changes = std::vector; Changes changes; - ASTSetQuery() = default; - explicit ASTSetQuery(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "Set"; }; diff --git a/dbms/src/Parsers/ASTShowTablesQuery.h b/dbms/src/Parsers/ASTShowTablesQuery.h index 176f9d69697..09ee0475847 100644 --- a/dbms/src/Parsers/ASTShowTablesQuery.h +++ b/dbms/src/Parsers/ASTShowTablesQuery.h @@ -20,9 +20,6 @@ public: String like; bool not_like{false}; - ASTShowTablesQuery() = default; - ASTShowTablesQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "ShowTables"; }; diff --git a/dbms/src/Parsers/ASTSubquery.h b/dbms/src/Parsers/ASTSubquery.h index 2ec2b4469fa..1b9ba97ac88 100644 --- a/dbms/src/Parsers/ASTSubquery.h +++ b/dbms/src/Parsers/ASTSubquery.h @@ -12,9 +12,6 @@ namespace DB class ASTSubquery : public ASTWithAlias { public: - ASTSubquery() = default; - ASTSubquery(const StringRange range_) : ASTWithAlias(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "Subquery"; } @@ -44,6 +41,7 @@ protected: children[0]->formatImpl(settings, state, frame_nested); settings.ostr << nl_or_nothing << indent_str << ")"; } + String getColumnNameImpl() const override; }; diff --git a/dbms/src/Parsers/ASTSystemQuery.h b/dbms/src/Parsers/ASTSystemQuery.h index 8f9a4bf2208..1821b73bc12 100644 --- a/dbms/src/Parsers/ASTSystemQuery.h +++ b/dbms/src/Parsers/ASTSystemQuery.h @@ -39,9 +39,6 @@ public: //String target_replica_database; //String target_replica_table; - ASTSystemQuery() = default; - explicit ASTSystemQuery(const StringRange range) : IAST(range) {} - String getID() const override { return "SYSTEM query"; }; ASTPtr clone() const override { return std::make_shared(*this); } diff --git a/dbms/src/Parsers/ASTUseQuery.h b/dbms/src/Parsers/ASTUseQuery.h index dd23b24f41a..71108b200d1 100644 --- a/dbms/src/Parsers/ASTUseQuery.h +++ b/dbms/src/Parsers/ASTUseQuery.h @@ -14,9 +14,6 @@ class ASTUseQuery : public IAST public: String database; - ASTUseQuery() = default; - ASTUseQuery(const StringRange range_) : IAST(range_) {} - /** Get the text that identifies this element. */ String getID() const override { return "UseQuery_" + database; }; diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 5e26f42593b..9463b737d82 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -36,7 +36,6 @@ namespace ErrorCodes bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ASTPtr contents_node; ParserExpressionList contents(false); @@ -51,7 +50,7 @@ bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; - auto function_node = std::make_shared(StringRange(begin, pos)); + auto function_node = std::make_shared(); function_node->name = "array"; function_node->arguments = contents_node; function_node->children.push_back(contents_node); @@ -63,7 +62,6 @@ bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ASTPtr contents_node; ParserExpressionList contents(false); @@ -93,7 +91,7 @@ bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & } else { - auto function_node = std::make_shared(StringRange(begin, pos)); + auto function_node = std::make_shared(); function_node->name = "tuple"; function_node->arguments = contents_node; function_node->children.push_back(contents_node); @@ -106,7 +104,6 @@ bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ASTPtr select_node; ParserSelectQuery select; @@ -121,7 +118,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; ++pos; - node = std::make_shared(StringRange(begin, pos)); + node = std::make_shared(); typeid_cast(*node).children.push_back(select_node); return true; } @@ -129,8 +126,6 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &) { - Pos begin = pos; - /// Identifier in backquotes or in double quotes if (pos->type == TokenType::QuotedIdentifier) { @@ -145,14 +140,14 @@ bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &) if (s.empty()) /// Identifiers "empty string" are not allowed. return false; + node = std::make_shared(s); ++pos; - node = std::make_shared(StringRange(begin), s); return true; } else if (pos->type == TokenType::BareWord) { + node = std::make_shared(String(pos->begin, pos->end)); ++pos; - node = std::make_shared(StringRange(begin), String(begin->begin, begin->end)); return true; } @@ -162,8 +157,6 @@ bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected &) bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ASTPtr id_list; if (!ParserList(std::make_unique(), std::make_unique(TokenType::Dot), false) .parse(pos, id_list, expected)) @@ -178,7 +171,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex name += static_cast(*child.get()).name; } - node = std::make_shared(StringRange(begin, pos), name); + node = std::make_shared(name); /// In `children`, remember the identifiers-components, if there are more than one. if (list.children.size() > 1) @@ -190,8 +183,6 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserIdentifier id_parser; ParserKeyword distinct("DISTINCT"); ParserExpressionList contents(false); @@ -267,7 +258,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ++pos; } - auto function_node = std::make_shared(StringRange(begin, pos)); + auto function_node = std::make_shared(); function_node->name = typeid_cast(*identifier).name; /// func(DISTINCT ...) is equivalent to funcDistinct(...) @@ -335,10 +326,10 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; } - expr_list_args = std::make_shared(StringRange{contents_begin, pos}); + expr_list_args = std::make_shared(); first_argument->setAlias({}); expr_list_args->children.push_back(first_argument); - expr_list_args->children.emplace_back(std::make_shared(StringRange(), type)); + expr_list_args->children.emplace_back(std::make_shared(type)); } else { @@ -363,7 +354,7 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; } - expr_list_args = std::make_shared(StringRange{contents_begin, pos}); + expr_list_args = std::make_shared(); expr_list_args->children.push_back(first_argument); expr_list_args->children.push_back(type_as_literal); } @@ -372,7 +363,7 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; ++pos; - const auto function_node = std::make_shared(StringRange(begin, pos)); + const auto function_node = std::make_shared(); ASTPtr node_holder{function_node}; function_node->name = name; @@ -386,11 +377,10 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect bool ParserNull::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ParserKeyword nested_parser("NULL"); if (nested_parser.parse(pos, node, expected)) { - node = std::make_shared(StringRange(StringRange(begin, pos)), Null()); + node = std::make_shared(Null()); return true; } else @@ -412,7 +402,6 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Field res; - Pos begin = pos; if (!pos.isValid()) return false; @@ -464,7 +453,7 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } ++pos; - node = std::make_shared(StringRange(begin, pos), res); + node = std::make_shared(res); return true; } @@ -473,7 +462,6 @@ bool ParserUnsignedInteger::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { Field res; - Pos begin = pos; if (!pos.isValid()) return false; @@ -487,7 +475,7 @@ bool ParserUnsignedInteger::parseImpl(Pos & pos, ASTPtr & node, Expected & expec res = x; ++pos; - node = std::make_shared(StringRange(begin, pos), res); + node = std::make_shared(res); return true; } @@ -497,8 +485,6 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (pos->type != TokenType::StringLiteral) return false; - Pos begin = pos; - String s; ReadBufferFromMemory in(pos->begin, pos->size()); @@ -519,7 +505,7 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte } ++pos; - node = std::make_shared(StringRange(begin, pos), s); + node = std::make_shared(s); return true; } @@ -529,7 +515,6 @@ bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (pos->type != TokenType::OpeningSquareBracket) return false; - Pos begin = pos; Array arr; ParserLiteral literal_p; @@ -543,7 +528,7 @@ bool ParserArrayOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (pos->type == TokenType::ClosingSquareBracket) { ++pos; - node = std::make_shared(StringRange(begin, pos), arr); + node = std::make_shared(arr); return true; } else if (pos->type == TokenType::Comma) @@ -655,11 +640,10 @@ template class ParserAliasImpl; bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected &) { - Pos begin = pos; if (pos->type == TokenType::Asterisk) { ++pos; - node = std::make_shared(StringRange(begin, pos)); + node = std::make_shared(); return true; } return false; @@ -668,8 +652,6 @@ bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected &) bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - if (!ParserCompoundIdentifier().parse(pos, node, expected)) return false; @@ -681,7 +663,7 @@ bool ParserQualifiedAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return false; ++pos; - auto res = std::make_shared(StringRange(begin, pos)); + auto res = std::make_shared(); res->children.push_back(node); node = std::move(res); return true; @@ -788,8 +770,6 @@ template class ParserWithOptionalAliasImpl; bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserExpressionWithOptionalAlias elem_p(false); ParserKeyword ascending("ASCENDING"); ParserKeyword descending("DESCENDING"); @@ -834,7 +814,7 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; } - node = std::make_shared(StringRange(begin, pos), direction, nulls_direction, nulls_direction_was_explicitly_specified, locale_node); + node = std::make_shared(direction, nulls_direction, nulls_direction_was_explicitly_specified, locale_node); node->children.push_back(expr_elem); if (locale_node) node->children.push_back(locale_node); diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp index 50f5ce81e51..5ac3d9f046d 100644 --- a/dbms/src/Parsers/ExpressionListParsers.cpp +++ b/dbms/src/Parsers/ExpressionListParsers.cpp @@ -206,7 +206,6 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node bool ParserVariableArityOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; ASTPtr arguments; if (!elem_parser->parse(pos, node, expected)) @@ -230,9 +229,6 @@ bool ParserVariableArityOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expect arguments->children.push_back(elem); } - if (arguments) - arguments->range = node->range = StringRange(begin, pos); - return true; } @@ -575,8 +571,6 @@ bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!ParserComparisonExpression{}.parse(pos, node_comp, expected)) return false; - Pos begin = pos; - ParserKeyword s_is{"IS"}; ParserKeyword s_not{"NOT"}; ParserKeyword s_null{"NULL"}; @@ -593,7 +587,7 @@ bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expec auto args = std::make_shared(); args->children.push_back(node_comp); - auto function = std::make_shared(StringRange{begin, pos}); + auto function = std::make_shared(); function->name = is_not ? "isNotNull" : "isNull"; function->arguments = args; function->children.push_back(function->arguments); diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 56971824548..9c4f33ebf49 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -40,13 +40,9 @@ public: ASTs children; StringRange range; - /** A string with a full query. - * This pointer does not allow it to be deleted while the range refers to it. - */ - StringPtr query_string; + /// This pointer does not allow it to be deleted while the range refers to it. + StringPtr owned_string; - IAST() = default; - IAST(const StringRange range_) : range(range_) {} virtual ~IAST() = default; /** Get the canonical name of the column if the element is a column */ diff --git a/dbms/src/Parsers/IParserBase.cpp b/dbms/src/Parsers/IParserBase.cpp index b7e44ace03d..c1a17805068 100644 --- a/dbms/src/Parsers/IParserBase.cpp +++ b/dbms/src/Parsers/IParserBase.cpp @@ -17,13 +17,13 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected) bool res = parseImpl(pos, node, expected); - /// TODO expected - if (!res) { node = nullptr; pos = begin; } + else + node->range = StringRange(begin, pos); return res; } diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index 2667c2bf247..dbd1805e7b1 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -12,8 +12,6 @@ namespace DB bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_alter_table("ALTER TABLE"); ParserKeyword s_add_column("ADD COLUMN"); ParserKeyword s_drop_column("DROP COLUMN"); @@ -218,7 +216,6 @@ bool ParserAlterQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } while (!parsing_finished); - query->range = StringRange(begin, pos); query->cluster = cluster_str; node = query; diff --git a/dbms/src/Parsers/ParserCase.cpp b/dbms/src/Parsers/ParserCase.cpp index 368ccc657ca..e932dfdb04f 100644 --- a/dbms/src/Parsers/ParserCase.cpp +++ b/dbms/src/Parsers/ParserCase.cpp @@ -10,8 +10,6 @@ namespace DB bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_case{"CASE"}; ParserKeyword s_when{"WHEN"}; ParserKeyword s_then{"THEN"}; @@ -22,7 +20,6 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!s_case.parse(pos, node, expected)) { /// Parse as a simple ASTFunction. - pos = begin; return ParserFunction{}.parse(pos, node, expected); } @@ -80,10 +77,10 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parse_branches()) return false; - auto function_args = std::make_shared(StringRange{begin, pos}); + auto function_args = std::make_shared(); function_args->children = std::move(args); - auto function = std::make_shared(StringRange{begin, pos}); + auto function = std::make_shared(); function->name = "caseWithExpression"; function->arguments = function_args; function->children.push_back(function->arguments); @@ -95,10 +92,10 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parse_branches()) return false; - auto function_args = std::make_shared(StringRange{begin, pos}); + auto function_args = std::make_shared(); function_args->children = std::move(args); - auto function = std::make_shared(StringRange{begin, pos}); + auto function = std::make_shared(); function->name = "multiIf"; function->arguments = function_args; function->children.push_back(function->arguments); diff --git a/dbms/src/Parsers/ParserCheckQuery.cpp b/dbms/src/Parsers/ParserCheckQuery.cpp index b4e243b240b..d9fd46694d6 100644 --- a/dbms/src/Parsers/ParserCheckQuery.cpp +++ b/dbms/src/Parsers/ParserCheckQuery.cpp @@ -12,8 +12,6 @@ namespace DB bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_check_table("CHECK TABLE"); ParserToken s_dot(TokenType::Dot); @@ -32,7 +30,7 @@ bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!table_parser.parse(pos, table, expected)) return false; - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); query->database = typeid_cast(*database).name; query->table = typeid_cast(*table).name; node = query; @@ -40,7 +38,7 @@ bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) else { table = database; - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); query->table = typeid_cast(*table).name; node = query; } diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 6f3d3a4db7c..24c111afc3d 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -21,8 +21,6 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr name; ASTPtr columns; - Pos begin = pos; - /// For now `name == 'Nested'`, probably alternative nested data structures will appear if (!name_p.parse(pos, name, expected)) return false; @@ -36,7 +34,7 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!close.ignore(pos)) return false; - auto func = std::make_shared(StringRange(begin, pos)); + auto func = std::make_shared(); func->name = typeid_cast(*name).name; func->arguments = columns; func->children.push_back(columns); @@ -65,15 +63,13 @@ bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node, ParserIdentifier non_parametric; ParserIdentifierWithParameters parametric; - Pos begin = pos; - if (parametric.parse(pos, node, expected)) return true; ASTPtr ident; if (non_parametric.parse(pos, ident, expected)) { - auto func = std::make_shared(StringRange(begin)); + auto func = std::make_shared(); func->name = typeid_cast(*ident).name; node = func; return true; @@ -87,7 +83,7 @@ bool ParserTypeInCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & if (ParserIdentifierWithOptionalParameters::parseImpl(pos, node, expected)) { const auto & id_with_params = typeid_cast(*node); - node = std::make_shared(id_with_params.range, String{ id_with_params.range.first, id_with_params.range.second }); + node = std::make_shared(String{ id_with_params.range.first, id_with_params.range.second }); return true; } @@ -120,8 +116,6 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserExpression expression_p; ParserSetQuery settings_p(/* parse_only_internals_ = */ true); - Pos begin = pos; - ASTPtr engine; ASTPtr partition_by; ASTPtr order_by; @@ -171,7 +165,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) break; } - auto storage = std::make_shared(StringRange(begin, pos)); + auto storage = std::make_shared(); storage->set(storage->engine, engine); storage->set(storage->partition_by, partition_by); storage->set(storage->order_by, order_by); @@ -185,8 +179,6 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_create("CREATE"); ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_attach("ATTACH"); @@ -259,7 +251,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) // Shortcut for ATTACH a previously detached table if (attach && (!pos.isValid() || pos.get().type == TokenType::Semicolon)) { - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; query->attach = attach; @@ -403,7 +395,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; query->attach = attach; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 4e5261ff8b6..4225fa9b8ca 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -75,13 +75,11 @@ bool IParserNameTypePair::parseImpl(Pos & pos, ASTPtr & node, Expect NameParser name_parser; ParserIdentifierWithOptionalParameters type_parser; - Pos begin = pos; - ASTPtr name, type; if (name_parser.parse(pos, name, expected) && type_parser.parse(pos, type, expected)) { - auto name_type_pair = std::make_shared(StringRange(begin, pos)); + auto name_type_pair = std::make_shared(); name_type_pair->name = typeid_cast(*name).name; name_type_pair->type = type; name_type_pair->children.push_back(type); @@ -122,8 +120,6 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_alias{"ALIAS"}; ParserTernaryOperatorExpression expr_parser; - const auto begin = pos; - /// mandatory column name ASTPtr name; if (!name_parser.parse(pos, name, expected)) @@ -160,7 +156,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E else if (!type) return false; /// reject sole column name without type - const auto column_declaration = std::make_shared(StringRange{begin, pos}); + const auto column_declaration = std::make_shared(); node = column_declaration; column_declaration->name = typeid_cast(*name).name; if (type) diff --git a/dbms/src/Parsers/ParserDescribeTableQuery.cpp b/dbms/src/Parsers/ParserDescribeTableQuery.cpp index 1341f105c89..ebfc3baa33f 100644 --- a/dbms/src/Parsers/ParserDescribeTableQuery.cpp +++ b/dbms/src/Parsers/ParserDescribeTableQuery.cpp @@ -14,8 +14,6 @@ namespace DB bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_describe("DESCRIBE"); ParserKeyword s_desc("DESC"); ParserKeyword s_table("TABLE"); @@ -36,7 +34,6 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex if (!ParserTableExpression().parse(pos, table_expression, expected)) return false; - query->range = StringRange(begin, pos); query->table_expression = table_expression; node = query; diff --git a/dbms/src/Parsers/ParserDropQuery.cpp b/dbms/src/Parsers/ParserDropQuery.cpp index 0475711a225..fe4249db019 100644 --- a/dbms/src/Parsers/ParserDropQuery.cpp +++ b/dbms/src/Parsers/ParserDropQuery.cpp @@ -13,8 +13,6 @@ namespace DB bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_drop("DROP"); ParserKeyword s_detach("DETACH"); ParserKeyword s_temporary("TEMPORARY"); @@ -81,7 +79,7 @@ bool ParserDropQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; query->detach = detach; diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp index 2fe8ca46f68..ef1aeb6620e 100644 --- a/dbms/src/Parsers/ParserInsertQuery.cpp +++ b/dbms/src/Parsers/ParserInsertQuery.cpp @@ -24,8 +24,6 @@ namespace ErrorCodes bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_insert_into("INSERT INTO"); ParserKeyword s_table("TABLE"); ParserKeyword s_function("FUNCTION"); @@ -130,7 +128,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; if (table_function) diff --git a/dbms/src/Parsers/ParserKillQueryQuery.cpp b/dbms/src/Parsers/ParserKillQueryQuery.cpp index bbc4eb22ead..8179a4897fc 100644 --- a/dbms/src/Parsers/ParserKillQueryQuery.cpp +++ b/dbms/src/Parsers/ParserKillQueryQuery.cpp @@ -11,7 +11,6 @@ namespace DB bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; auto query = std::make_shared(); if (!ParserKeyword{"KILL QUERY"}.ignore(pos, expected)) @@ -31,8 +30,6 @@ bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expect else if (ParserKeyword{"TEST"}.ignore(pos)) query->test = true; - query->range = StringRange(begin, pos); - node = std::move(query); return true; diff --git a/dbms/src/Parsers/ParserOptimizeQuery.cpp b/dbms/src/Parsers/ParserOptimizeQuery.cpp index 90e9146210a..c01a1a7b5df 100644 --- a/dbms/src/Parsers/ParserOptimizeQuery.cpp +++ b/dbms/src/Parsers/ParserOptimizeQuery.cpp @@ -15,8 +15,6 @@ namespace DB bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_optimize_table("OPTIMIZE TABLE"); ParserKeyword s_partition("PARTITION"); ParserKeyword s_final("FINAL"); @@ -56,7 +54,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (s_deduplicate.ignore(pos, expected)) deduplicate = true; - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; if (database) diff --git a/dbms/src/Parsers/ParserPartition.cpp b/dbms/src/Parsers/ParserPartition.cpp index 9ecd03d9cd8..1daf4dead18 100644 --- a/dbms/src/Parsers/ParserPartition.cpp +++ b/dbms/src/Parsers/ParserPartition.cpp @@ -73,7 +73,6 @@ bool ParserPartition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) partition->fields_count = fields_count; } - partition->range = StringRange(begin, pos); node = partition; return true; } diff --git a/dbms/src/Parsers/ParserRenameQuery.cpp b/dbms/src/Parsers/ParserRenameQuery.cpp index 219311b63d1..6eb8d768df9 100644 --- a/dbms/src/Parsers/ParserRenameQuery.cpp +++ b/dbms/src/Parsers/ParserRenameQuery.cpp @@ -40,8 +40,6 @@ static bool parseDatabaseAndTable( bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_rename_table("RENAME TABLE"); ParserKeyword s_to("TO"); ParserToken s_comma(TokenType::Comma); @@ -71,7 +69,7 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); query->cluster = cluster_str; node = query; diff --git a/dbms/src/Parsers/ParserSampleRatio.cpp b/dbms/src/Parsers/ParserSampleRatio.cpp index 85cba60a370..3091ed91570 100644 --- a/dbms/src/Parsers/ParserSampleRatio.cpp +++ b/dbms/src/Parsers/ParserSampleRatio.cpp @@ -83,8 +83,6 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat */ bool ParserSampleRatio::parseImpl(Pos & pos, ASTPtr & node, Expected &) { - auto begin = pos; - ASTSampleRatio::Rational numerator; ASTSampleRatio::Rational denominator; ASTSampleRatio::Rational res; @@ -111,7 +109,7 @@ bool ParserSampleRatio::parseImpl(Pos & pos, ASTPtr & node, Expected &) res = numerator; } - node = std::make_shared(StringRange(begin, pos), res); + node = std::make_shared(res); return true; } diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp index 99240a31f76..c8c2aec1d50 100644 --- a/dbms/src/Parsers/ParserSelectQuery.cpp +++ b/dbms/src/Parsers/ParserSelectQuery.cpp @@ -22,8 +22,6 @@ namespace ErrorCodes bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - auto select_query = std::make_shared(); node = select_query; @@ -183,8 +181,6 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) next_select_query->prev_union_all = node.get(); } - select_query->range = StringRange(begin, pos); - if (select_query->with_expression_list) select_query->children.push_back(select_query->with_expression_list); select_query->children.push_back(select_query->select_expression_list); diff --git a/dbms/src/Parsers/ParserSetQuery.cpp b/dbms/src/Parsers/ParserSetQuery.cpp index 109c4e5acc1..11f125bb955 100644 --- a/dbms/src/Parsers/ParserSetQuery.cpp +++ b/dbms/src/Parsers/ParserSetQuery.cpp @@ -40,8 +40,6 @@ static bool parseNameValuePair(ASTSetQuery::Change & change, IParser::Pos & pos, bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserToken s_comma(TokenType::Comma); if (!parse_only_internals) @@ -65,7 +63,7 @@ bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); node = query; query->is_standalone = !parse_only_internals; diff --git a/dbms/src/Parsers/ParserShowProcesslistQuery.h b/dbms/src/Parsers/ParserShowProcesslistQuery.h index 5958259fb76..b7a661516b0 100644 --- a/dbms/src/Parsers/ParserShowProcesslistQuery.h +++ b/dbms/src/Parsers/ParserShowProcesslistQuery.h @@ -19,14 +19,11 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - auto query = std::make_shared(); if (!ParserKeyword("SHOW PROCESSLIST").ignore(pos, expected)) return false; - query->range = StringRange(begin, pos); node = query; return true; diff --git a/dbms/src/Parsers/ParserShowTablesQuery.cpp b/dbms/src/Parsers/ParserShowTablesQuery.cpp index 5399eeef50c..e4d6b5288d2 100644 --- a/dbms/src/Parsers/ParserShowTablesQuery.cpp +++ b/dbms/src/Parsers/ParserShowTablesQuery.cpp @@ -15,8 +15,6 @@ namespace DB bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_show("SHOW"); ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_tables("TABLES"); @@ -67,8 +65,6 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } - query->range = StringRange(begin, pos); - if (database) query->from = typeid_cast(*database).name; if (like) diff --git a/dbms/src/Parsers/ParserSystemQuery.cpp b/dbms/src/Parsers/ParserSystemQuery.cpp index 2ef71b9dcb3..b430e9e7fc7 100644 --- a/dbms/src/Parsers/ParserSystemQuery.cpp +++ b/dbms/src/Parsers/ParserSystemQuery.cpp @@ -18,8 +18,6 @@ namespace DB bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) { - auto begin = pos; - if (!ParserKeyword{"SYSTEM"}.ignore(pos)) return false; @@ -51,7 +49,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & throw Exception("SYNC REPLICA is not supported yet", ErrorCodes::NOT_IMPLEMENTED); } - res->range = {begin, pos}; node = std::move(res); return true; } diff --git a/dbms/src/Parsers/ParserTablePropertiesQuery.cpp b/dbms/src/Parsers/ParserTablePropertiesQuery.cpp index 8ed7046b2b2..4c9383d460a 100644 --- a/dbms/src/Parsers/ParserTablePropertiesQuery.cpp +++ b/dbms/src/Parsers/ParserTablePropertiesQuery.cpp @@ -13,8 +13,6 @@ namespace DB bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_exists("EXISTS"); ParserKeyword s_describe("DESCRIBE"); ParserKeyword s_desc("DESC"); @@ -56,8 +54,6 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } - query->range = StringRange(begin, pos); - if (database) query->database = typeid_cast(*database).name; if (table) diff --git a/dbms/src/Parsers/ParserUseQuery.cpp b/dbms/src/Parsers/ParserUseQuery.cpp index 25d41dae213..9e521a0d746 100644 --- a/dbms/src/Parsers/ParserUseQuery.cpp +++ b/dbms/src/Parsers/ParserUseQuery.cpp @@ -9,10 +9,9 @@ namespace DB { + bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - Pos begin = pos; - ParserKeyword s_use("USE"); ParserIdentifier name_p; @@ -24,10 +23,11 @@ bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!name_p.parse(pos, database, expected)) return false; - auto query = std::make_shared(StringRange(begin, pos)); + auto query = std::make_shared(); query->database = typeid_cast(*database).name; node = query; return true; } + } diff --git a/dbms/src/Parsers/StringRange.h b/dbms/src/Parsers/StringRange.h index 0c901bf5309..b919a899293 100644 --- a/dbms/src/Parsers/StringRange.h +++ b/dbms/src/Parsers/StringRange.h @@ -11,8 +11,8 @@ namespace DB struct StringRange { - const char * first; - const char * second; + const char * first = nullptr; + const char * second = nullptr; StringRange() {} StringRange(const char * begin, const char * end) : first(begin), second(end) {} @@ -41,7 +41,7 @@ using StringPtr = std::shared_ptr; inline String toString(const StringRange & range) { - return String(range.first, range.second); + return range.first ? String(range.first, range.second) : String(); } } diff --git a/dbms/src/Parsers/TablePropertiesQueriesASTs.h b/dbms/src/Parsers/TablePropertiesQueriesASTs.h index 667ef00399b..312b112f494 100644 --- a/dbms/src/Parsers/TablePropertiesQueriesASTs.h +++ b/dbms/src/Parsers/TablePropertiesQueriesASTs.h @@ -32,8 +32,6 @@ class ASTDescribeQuery : public ASTQueryWithOutput public: ASTPtr table_expression; - ASTDescribeQuery() = default; - explicit ASTDescribeQuery(StringRange range_) : ASTQueryWithOutput(range_) {} String getID() const override { return "DescribeQuery"; }; ASTPtr clone() const override diff --git a/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp b/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp index 81281bbc0d6..eaff2e85a9a 100644 --- a/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp +++ b/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp @@ -10,12 +10,10 @@ namespace DB bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, String & result) { - IParser::Pos begin = pos; ASTPtr res; if (!ParserIdentifier().parse(pos, res, expected)) { - pos = begin; if (!ParserStringLiteral().parse(pos, res, expected)) return false; @@ -27,4 +25,4 @@ bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, Str return true; } -} \ No newline at end of file +} diff --git a/dbms/src/Parsers/queryToString.cpp b/dbms/src/Parsers/queryToString.cpp index 35e2edd61d6..d214468c2a9 100644 --- a/dbms/src/Parsers/queryToString.cpp +++ b/dbms/src/Parsers/queryToString.cpp @@ -13,7 +13,6 @@ namespace DB { std::ostringstream out; formatAST(query, out, false, true); - return out.str(); } } diff --git a/dbms/src/Server/ClusterCopier.cpp b/dbms/src/Server/ClusterCopier.cpp index ae1d0643ff1..571b8a58232 100644 --- a/dbms/src/Server/ClusterCopier.cpp +++ b/dbms/src/Server/ClusterCopier.cpp @@ -373,9 +373,9 @@ std::shared_ptr createASTStorageDistributed( const String & cluster_name, const String & database, const String & table, const ASTPtr & sharding_key_ast = nullptr) { auto args = std::make_shared(); - args->children.emplace_back(std::make_shared(StringRange(nullptr, nullptr), cluster_name)); - args->children.emplace_back(std::make_shared(StringRange(nullptr, nullptr), database)); - args->children.emplace_back(std::make_shared(StringRange(nullptr, nullptr), table)); + args->children.emplace_back(std::make_shared(cluster_name)); + args->children.emplace_back(std::make_shared(database)); + args->children.emplace_back(std::make_shared(table)); if (sharding_key_ast) args->children.emplace_back(sharding_key_ast); @@ -487,7 +487,7 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast) return storage.partition_by->clone(); static const char * all = "all"; - return std::make_shared(StringRange(all, all + strlen(all)), Field(all, strlen(all))); + return std::make_shared(Field(all, strlen(all))); } else { diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index b5a726ecc6e..2ad9ae6e27d 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -245,8 +245,8 @@ void AlterCommands::validate(IStorage * table, const Context & context) const auto column_type_raw_ptr = command.data_type.get(); default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(StringRange(), tmp_column_name), - std::make_shared(StringRange(), Field(column_type_raw_ptr->getName()))), + makeASTFunction("CAST", std::make_shared(tmp_column_name), + std::make_shared(Field(column_type_raw_ptr->getName()))), final_column_name)); default_expr_list->children.emplace_back(setAlias(command.default_expression->clone(), tmp_column_name)); @@ -299,8 +299,8 @@ void AlterCommands::validate(IStorage * table, const Context & context) const auto & column_type_ptr = column_it->type; default_expr_list->children.emplace_back(setAlias( - makeASTFunction("CAST", std::make_shared(StringRange(), tmp_column_name), - std::make_shared(StringRange(), Field(column_type_ptr->getName()))), + makeASTFunction("CAST", std::make_shared(tmp_column_name), + std::make_shared(Field(column_type_ptr->getName()))), column_name)); default_expr_list->children.emplace_back(setAlias(col_def.second.expression->clone(), tmp_column_name)); @@ -345,7 +345,7 @@ void AlterCommands::validate(IStorage * table, const Context & context) } command_ptr->default_expression = makeASTFunction("CAST", command_ptr->default_expression->clone(), - std::make_shared(StringRange(), Field(explicit_type->getName()))); + std::make_shared(Field(explicit_type->getName()))); } } else diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 8c9655fe68a..b6d19da1d89 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -434,7 +434,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( ASTPtr args = std::make_shared(); args->children.push_back(data.sampling_expression); - args->children.push_back(std::make_shared(StringRange(), lower)); + args->children.push_back(std::make_shared(lower)); lower_function = std::make_shared(); lower_function->name = "greaterOrEquals"; @@ -451,7 +451,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( ASTPtr args = std::make_shared(); args->children.push_back(data.sampling_expression); - args->children.push_back(std::make_shared(StringRange(), upper)); + args->children.push_back(std::make_shared(upper)); upper_function = std::make_shared(); upper_function->name = "less"; @@ -846,8 +846,8 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( { auto function = std::make_shared(); auto arguments = std::make_shared(); - auto sign = std::make_shared(); - auto one = std::make_shared(); + auto sign = std::make_shared(data.merging_params.sign_column); + auto one = std::make_shared(Field(static_cast(1))); function->name = "equals"; function->arguments = arguments; @@ -856,11 +856,6 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( arguments->children.push_back(sign); arguments->children.push_back(one); - sign->name = data.merging_params.sign_column; - sign->kind = ASTIdentifier::Column; - - one->value = Field(static_cast(1)); - out_expression = ExpressionAnalyzer(function, context, {}, data.getColumnsList()).getActions(false); out_column = function->getColumnName(); } diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/PKCondition.cpp index 798266e1d15..1d80ea38a87 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/PKCondition.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -593,7 +594,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, { throw Exception("Primary key expression contains comparison between inconvertible types: " + desired_type->getName() + " and " + src_type->getName() + - " inside " + DB::toString(node->range), + " inside " + queryToString(node), ErrorCodes::BAD_TYPE_OF_FIELD); } } diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 1b32ba197ff..62ab67df163 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -558,7 +558,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl insert->columns = list_of_columns; list_of_columns->children.reserve(columns_intersection.size()); for (const String & column : columns_intersection) - list_of_columns->children.push_back(std::make_shared(StringRange(), column, ASTIdentifier::Column)); + list_of_columns->children.push_back(std::make_shared(column, ASTIdentifier::Column)); InterpreterInsertQuery interpreter{insert, context, allow_materialized}; diff --git a/dbms/src/Storages/StorageCatBoostPool.cpp b/dbms/src/Storages/StorageCatBoostPool.cpp index 74a40372b79..cbaf523e3bf 100644 --- a/dbms/src/Storages/StorageCatBoostPool.cpp +++ b/dbms/src/Storages/StorageCatBoostPool.cpp @@ -243,13 +243,12 @@ void StorageCatBoostPool::createSampleBlockAndColumns() if (!desc.alias.empty()) { - auto alias = std::make_shared(); - alias->name = desc.column_name; + auto alias = std::make_shared(desc.column_name); column_defaults[desc.alias] = {ColumnDefaultType::Alias, alias}; alias_columns.emplace_back(desc.alias, type); } - sample_block.insert(ColumnWithTypeAndName(type->createColumn(), type, desc.column_name)); + sample_block.insert(ColumnWithTypeAndName(type, desc.column_name)); } columns.insert(columns.end(), num_columns.begin(), num_columns.end()); columns.insert(columns.end(), cat_columns.begin(), cat_columns.end()); diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index b191f598a0e..aad4245d39c 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -267,13 +267,10 @@ BlockInputStreams StorageDistributed::describe(const Context & context, const Se std::string name = remote_database + '.' + remote_table; - auto id = std::make_shared(); - id->name = name; + auto id = std::make_shared(name); - auto desc_database = std::make_shared(); - auto desc_table = std::make_shared(); - desc_database->name = remote_database; - desc_table->name = remote_table; + auto desc_database = std::make_shared(remote_database); + auto desc_table = std::make_shared(remote_table); id->children.push_back(desc_database); id->children.push_back(desc_table); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 29d0ba0ba08..317891065c4 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -202,9 +202,10 @@ void StorageMergeTree::alter( IDatabase::ASTModifier storage_modifier; if (primary_key_is_modified) + { storage_modifier = [&new_primary_key_ast] (IAST & ast) { - auto tuple = std::make_shared(new_primary_key_ast->range); + auto tuple = std::make_shared(); tuple->name = "tuple"; tuple->arguments = new_primary_key_ast; tuple->children.push_back(tuple->arguments); @@ -214,6 +215,7 @@ void StorageMergeTree::alter( auto & storage_ast = typeid_cast(ast); typeid_cast(*storage_ast.engine->arguments).children.at(1) = tuple; }; + } context.getDatabase(database_name)->alterTable( context, table_name, diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index 70fa6f8712a..e9efe406e45 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -81,7 +81,7 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va } ASTExpressionList & with = typeid_cast(*select.with_expression_list); - auto literal = std::make_shared(StringRange(), value); + auto literal = std::make_shared(value); literal->alias = column_name; literal->prefer_alias_to_column_name = true; with.children.push_back(literal); diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index 538c8ede9a4..bf4fd14b23a 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -70,7 +70,7 @@ String transformQueryForExternalDatabase( auto select_expr_list = std::make_shared(); for (const auto & name : used_columns) - select_expr_list->children.push_back(std::make_shared(StringRange(), name)); + select_expr_list->children.push_back(std::make_shared(name)); select->select_expression_list = std::move(select_expr_list); From 89c0dc2eeaf1dd8eef1362104e6e1e50d8038c6c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 06:45:46 +0300 Subject: [PATCH 113/209] Fixed build #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index a1cf16a1a27..93352e7b027 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1027,7 +1027,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( ASTs & asts = node->children; for (int i = static_cast(asts.size()) - 1; i >= 0; --i) { - if (ASTAsterisk * asterisk = typeid_cast(asts[i].get())) + if (typeid_cast(asts[i].get())) { ASTs all_columns; for (const auto & column_name_type : columns) From a4544ac498830703f8044cd56d0d06fc35d387e5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 06:47:32 +0300 Subject: [PATCH 114/209] Fixed error [#CLICKHOUSE-2] --- dbms/src/Parsers/IParserBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Parsers/IParserBase.cpp b/dbms/src/Parsers/IParserBase.cpp index c1a17805068..dffb1d7597d 100644 --- a/dbms/src/Parsers/IParserBase.cpp +++ b/dbms/src/Parsers/IParserBase.cpp @@ -22,7 +22,7 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected) node = nullptr; pos = begin; } - else + else if (node) node->range = StringRange(begin, pos); return res; From 5c450708e1d32b35e736c66f69f8bf137136245f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 07:47:27 +0300 Subject: [PATCH 115/209] Fixed bad code #1947 --- dbms/src/Parsers/ExpressionElementParsers.cpp | 5 +++-- dbms/src/Parsers/ParserCreateQuery.cpp | 2 +- dbms/src/Parsers/ParserCreateQuery.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 9463b737d82..5727d058c22 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -319,6 +319,7 @@ bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect { /// CAST(expression AS type) const auto type = first_argument->tryGetAlias(); + if (type.empty()) { /// there is only one argument and it has no alias @@ -610,7 +611,7 @@ bool ParserAliasImpl::parseImpl(Pos & pos, ASTPtr & node, Expe ParserKeyword s_as("AS"); ParserIdentifier id_p; - bool has_as_word = s_as.parse(pos, node, expected); + bool has_as_word = s_as.ignore(pos, expected); if (!allow_alias_without_as_keyword && !has_as_word) return false; @@ -747,7 +748,7 @@ bool ParserWithOptionalAliasImpl::parseImpl(Pos & pos, ASTPtr & nod ASTPtr alias_node; if (ParserAlias(allow_alias_without_as_keyword_now).parse(pos, alias_node, expected)) { - String alias_name = typeid_cast(*alias_node).name; + String alias_name = typeid_cast(*alias_node).name; if (ASTWithAlias * ast_with_alias = dynamic_cast(node.get())) { diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index 24c111afc3d..84ed3874be3 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -80,7 +80,7 @@ bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node, bool ParserTypeInCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (ParserIdentifierWithOptionalParameters::parseImpl(pos, node, expected)) + if (ParserIdentifierWithOptionalParameters().parse(pos, node, expected)) { const auto & id_with_params = typeid_cast(*node); node = std::make_shared(String{ id_with_params.range.first, id_with_params.range.second }); diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 4225fa9b8ca..6c0c7a21d52 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -48,7 +48,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); }; -class ParserTypeInCastExpression : public ParserIdentifierWithOptionalParameters +class ParserTypeInCastExpression : public IParserBase { protected: const char * getName() const { return "type in cast expression"; } From 6ef9917fe215d1f32852317d94e96178aed54844 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 09:12:59 +0300 Subject: [PATCH 116/209] Better UNION ALL: development #1947 --- .../Interpreters/InterpreterSelectQuery.cpp | 58 ++++++------ .../InterpreterSelectWithUnionQuery.cpp | 90 ++++++++++++++++--- .../InterpreterSelectWithUnionQuery.h | 4 + 3 files changed, 114 insertions(+), 38 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index dace01ab37d..264aae5325b 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -94,7 +95,7 @@ void InterpreterSelectQuery::init() auto table_expression = query.table(); /// Read from subquery. - if (table_expression && typeid_cast(table_expression.get())) + if (table_expression && typeid_cast(table_expression.get())) { source_header = InterpreterSelectQuery::getSampleBlock(table_expression, context); } @@ -564,7 +565,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline } auto query_table = query.table(); - if (query_table && typeid_cast(query_table.get())) + if (query_table && typeid_cast(query_table.get())) { /** There are no limits on the maximum size of the result for the subquery. * Since the result of the query is not the result of the entire query. @@ -1072,6 +1073,31 @@ void InterpreterSelectQuery::executeLimitBy(Pipeline & pipeline) } +bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query) +{ + if (query.group_by_with_totals) + return true; + + /** NOTE You can also check that the table in the subquery is distributed, and that it only looks at one shard. + * In other cases, totals will be computed on the initiating server of the query, and it is not necessary to read the data to the end. + */ + + auto query_table = query.table(); + if (query_table) + { + auto ast_union = typeid_cast(query_table.get()); + if (ast_union) + { + for (const auto & elem : ast_union->list_of_selects->children) + if (hasWithTotalsInAnySubqueryInFromClause(typeid_cast(*elem))) + return true; + } + } + + return false; +} + + void InterpreterSelectQuery::executeLimit(Pipeline & pipeline) { size_t limit_length = 0; @@ -1093,34 +1119,10 @@ void InterpreterSelectQuery::executeLimit(Pipeline & pipeline) bool always_read_till_end = false; if (query.group_by_with_totals && !query.order_expression_list) - { always_read_till_end = true; - } - auto query_table = query.table(); - if (!query.group_by_with_totals && query_table && typeid_cast(query_table.get())) - { - const ASTSelectQuery * subquery = static_cast(query_table.get()); - - while (subquery->table()) - { - if (subquery->group_by_with_totals) - { - /** NOTE You can also check that the table in the subquery is distributed, and that it only looks at one shard. - * In other cases, totals will be computed on the initiating server of the query, and it is not necessary to read the data to the end. - */ - - always_read_till_end = true; - break; - } - - auto subquery_table = subquery->table(); - if (typeid_cast(subquery_table.get())) - subquery = static_cast(subquery_table.get()); - else - break; - } - } + if (!query.group_by_with_totals && hasWithTotalsInAnySubqueryInFromClause(query)) + always_read_till_end = true; pipeline.transform([&](auto & stream) { diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 27397459276..b5c7afeca51 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include #include @@ -12,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH; } @@ -35,6 +39,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( for (const auto & select : ast.list_of_selects->children) nested_interpreters.emplace_back(std::make_unique(select, context, to_stage, subquery_depth)); + + init(); } @@ -59,28 +65,88 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( for (const auto & select : ast.list_of_selects->children) nested_interpreters.emplace_back(std::make_unique(select, context, required_column_names, to_stage, subquery_depth)); + + init(); } InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default; +void InterpreterSelectWithUnionQuery::init() +{ + size_t num_selects = nested_interpreters.size(); + + if (!num_selects) + throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + + if (num_selects == 1) + { + result_header = nested_interpreters.front()->getSampleBlock(); + } + else + { + Blocks headers(num_selects); + for (size_t query_num = 0; query_num < num_selects; ++query_num) + headers[query_num] = nested_interpreters[query_num]->getSampleBlock(); + + result_header = headers.front(); + size_t num_columns = result_header.columns(); + + for (size_t query_num = 1; query_num < num_selects; ++query_num) + if (headers[query_num].columns() != num_columns) + throw Exception("Different number of columns in UNION ALL elements", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); + + for (size_t column_num = 0; column_num < num_columns; ++column_num) + { + ColumnWithTypeAndName & result_elem = result_header.getByPosition(column_num); + + /// Determine common type. + + DataTypes types(num_selects); + for (size_t query_num = 0; query_num < num_selects; ++query_num) + types[query_num] = headers[query_num].getByPosition(column_num).type; + + result_elem.type = getLeastSupertype(types); + + /// If there are different constness or different values of constants, the result must be non-constant. + + if (result_elem.column->isColumnConst()) + { + bool need_materialize = false; + for (size_t query_num = 1; query_num < num_selects; ++query_num) + { + const ColumnWithTypeAndName & source_elem = headers[query_num].getByPosition(column_num); + + if (!source_elem.column->isColumnConst() + || (static_cast(*result_elem.column).getField() + != static_cast(*source_elem.column).getField())) + { + need_materialize = true; + break; + } + } + + if (need_materialize) + result_elem.column = result_elem.type->createColumn(); + } + + /// BTW, result column names are from first SELECT. + } + } +} + + Block InterpreterSelectWithUnionQuery::getSampleBlock() { - return nested_interpreters.front()->getSampleBlock(); + return result_header; } Block InterpreterSelectWithUnionQuery::getSampleBlock( const ASTPtr & query_ptr, const Context & context) { - const ASTSelectWithUnionQuery & ast = typeid_cast(*query_ptr); - - size_t num_selects = ast.list_of_selects->children.size(); - if (!num_selects) - throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); - - return InterpreterSelectQuery::getSampleBlock(ast.list_of_selects->children.front(), context); + return InterpreterSelectWithUnionQuery(query_ptr, context).getSampleBlock(); } @@ -114,8 +180,12 @@ BlockIO InterpreterSelectWithUnionQuery::execute() } else { - const Settings & settings = context.getSettingsRef(); - result_stream = std::make_shared>(nested_streams, nullptr, settings.max_threads); + /// Unify data structure. + if (nested_interpreters.size() > 1) + for (auto & stream : nested_streams) + stream = std::make_shared(context, stream, result_header, ConvertingBlockInputStream::MatchColumnsMode::Position); + + result_stream = std::make_shared>(nested_streams, nullptr, context.getSettingsRef().max_threads); nested_streams.clear(); } diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h index 71a606e1bd4..c0346065804 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -51,6 +51,10 @@ private: size_t subquery_depth; std::vector> nested_interpreters; + + Block result_header; + + void init(); }; } From ac37b89b166d7b48023b86037b7c36ec5a52e747 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 12:05:06 +0300 Subject: [PATCH 117/209] Better UNION ALL: development #1947 --- dbms/src/Core/iostream_debug_helpers.cpp | 2 - .../PushingToViewsBlockOutputStream.cpp | 2 +- .../ClusterProxy/SelectStreamFactory.cpp | 2 +- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 159 +++++++++++------- dbms/src/Interpreters/ExpressionAnalyzer.h | 29 +++- dbms/src/Interpreters/InterpreterFactory.cpp | 4 +- .../Interpreters/InterpreterInsertQuery.cpp | 2 +- .../Interpreters/InterpreterSelectQuery.cpp | 26 +-- .../src/Interpreters/InterpreterSelectQuery.h | 11 +- .../InterpreterSelectWithUnionQuery.cpp | 25 --- .../InterpreterSelectWithUnionQuery.h | 8 +- .../evaluateConstantExpression.cpp | 2 +- dbms/src/Parsers/ASTSelectQuery.cpp | 111 +----------- dbms/src/Parsers/ASTSelectQuery.h | 9 - dbms/src/Storages/StorageBuffer.cpp | 2 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageMerge.cpp | 4 +- .../transformQueryForExternalDatabase.cpp | 2 +- 18 files changed, 140 insertions(+), 262 deletions(-) diff --git a/dbms/src/Core/iostream_debug_helpers.cpp b/dbms/src/Core/iostream_debug_helpers.cpp index a4b0cba85b8..3fbbb02a98f 100644 --- a/dbms/src/Core/iostream_debug_helpers.cpp +++ b/dbms/src/Core/iostream_debug_helpers.cpp @@ -133,10 +133,8 @@ std::ostream & operator<<(std::ostream & stream, const ExpressionAnalyzer & what { stream << "ExpressionAnalyzer{" << "hasAggregation=" << what.hasAggregation() - << ", RequiredColumns=" << what.getRequiredColumns() << ", SubqueriesForSet=" << what.getSubqueriesForSets() << ", ExternalTables=" << what.getExternalTables() - // TODO << "}"; return stream; } diff --git a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 06296b8b34b..b1f0dc8a9cd 100644 --- a/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/dbms/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -65,7 +65,7 @@ void PushingToViewsBlockOutputStream::write(const Block & block) try { BlockInputStreamPtr from = std::make_shared(block); - InterpreterSelectQuery select(view.query, *views_context, QueryProcessingStage::Complete, 0, from); + InterpreterSelectQuery select(view.query, *views_context, {}, QueryProcessingStage::Complete, 0, from); BlockInputStreamPtr data = std::make_shared(select.execute().in); copyData(*data, *view.out); } diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 43ef98dfb26..cfda6f171a6 100644 --- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -44,7 +44,7 @@ namespace BlockInputStreamPtr createLocalStream(const ASTPtr & query_ast, const Context & context, QueryProcessingStage::Enum processed_stage) { - InterpreterSelectQuery interpreter{query_ast, context, processed_stage}; + InterpreterSelectQuery interpreter{query_ast, context, {}, processed_stage}; BlockInputStreamPtr stream = interpreter.execute().in; /** Materialization is needed, since from remote servers the constants come materialized. diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 93352e7b027..fcdf93844a0 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include +#include #include #include #include @@ -156,12 +158,14 @@ ExpressionAnalyzer::ExpressionAnalyzer( const ASTPtr & ast_, const Context & context_, const StoragePtr & storage_, - const NamesAndTypesList & columns_, + const NamesAndTypesList & source_columns_, + const Names & required_result_columns_, size_t subquery_depth_, bool do_global_, const SubqueriesForSets & subqueries_for_set_) : ast(ast_), context(context_), settings(context.getSettings()), - subquery_depth(subquery_depth_), columns(columns_), + subquery_depth(subquery_depth_), + source_columns(source_columns_), required_result_columns(required_result_columns_.begin(), required_result_columns_.end()), storage(storage_ ? storage_ : getTable()), do_global(do_global_), subqueries_for_sets(subqueries_for_set_) { @@ -171,7 +175,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( void ExpressionAnalyzer::init() { - removeDuplicateColumns(columns); + removeDuplicateColumns(source_columns); select_query = typeid_cast(ast.get()); @@ -211,7 +215,7 @@ void ExpressionAnalyzer::init() /// array_join_alias_to_name, array_join_result_to_source. getArrayJoinedColumns(); - /// Delete the unnecessary from `columns` list. Create `unknown_required_columns`. Form `columns_added_by_join`. + /// Delete the unnecessary from `columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. collectUsedColumns(); /// external_tables, subqueries_for_sets for global subqueries. @@ -366,9 +370,9 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String { for (auto & child : ast->children) { - /// Do not go to FROM, JOIN, UNION. + /// Do not go to FROM, JOIN, subqueries. if (!typeid_cast(child.get()) - && !typeid_cast(child.get())) + && !typeid_cast(child.get())) { translateQualifiedNamesImpl(child, database_name, table_name, alias); } @@ -483,7 +487,7 @@ void ExpressionAnalyzer::analyzeAggregation() if (select_query && (select_query->group_expression_list || select_query->having_expression)) has_aggregation = true; - ExpressionActionsPtr temp_actions = std::make_shared(columns, settings); + ExpressionActionsPtr temp_actions = std::make_shared(source_columns, settings); if (select_query && select_query->array_join_expression_list()) { @@ -644,8 +648,8 @@ static std::pair getDatabaseAndTableNameFromIdentifier(const AST } -static std::shared_ptr interpretSubquery( - const ASTPtr & subquery_or_table_name, const Context & context, size_t subquery_depth, const Names & required_columns) +static std::shared_ptr interpretSubquery( + const ASTPtr & subquery_or_table_name, const Context & context, size_t subquery_depth, const Names & required_source_columns) { /// Subquery or table name. The name of the table is similar to the subquery `SELECT * FROM t`. const ASTSubquery * subquery = typeid_cast(subquery_or_table_name.get()); @@ -673,8 +677,13 @@ static std::shared_ptr interpretSubquery( if (table) { /// create ASTSelectQuery for "SELECT * FROM table" as if written by hand + const auto select_with_union_query = std::make_shared(); + query = select_with_union_query; + + select_with_union_query->list_of_selects = std::make_shared(); + const auto select_query = std::make_shared(); - query = select_query; + select_with_union_query->list_of_selects->children.push_back(select_query); const auto select_expression_list = std::make_shared(); select_query->select_expression_list = select_expression_list; @@ -732,12 +741,8 @@ static std::shared_ptr interpretSubquery( } } - if (required_columns.empty()) - return std::make_shared( - query, subquery_context, QueryProcessingStage::Complete, subquery_depth + 1); - else - return std::make_shared( - query, subquery_context, required_columns, QueryProcessingStage::Complete, subquery_depth + 1); + return std::make_shared( + query, subquery_context, required_source_columns, QueryProcessingStage::Complete, subquery_depth + 1); } @@ -853,9 +858,9 @@ void ExpressionAnalyzer::addASTAliases(ASTPtr & ast, int ignore_levels) if (typeid_cast(ast.get())) new_ignore_levels = 3; - /// Don't descent into UNION ALL, table functions and subqueries. + /// Don't descent into table functions and subqueries. if (!typeid_cast(child.get()) - && !typeid_cast(child.get())) + && !typeid_cast(child.get())) addASTAliases(child, new_ignore_levels); } @@ -1030,7 +1035,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( if (typeid_cast(asts[i].get())) { ASTs all_columns; - for (const auto & column_name_type : columns) + for (const auto & column_name_type : source_columns) all_columns.emplace_back(std::make_shared(column_name_type.name)); asts.erase(asts.begin() + i); @@ -1121,7 +1126,7 @@ void ExpressionAnalyzer::addAliasColumns() if (!storage) return; - columns.insert(std::end(columns), std::begin(storage->alias_columns), std::end(storage->alias_columns)); + source_columns.insert(std::end(source_columns), std::begin(storage->alias_columns), std::end(storage->alias_columns)); } @@ -1189,7 +1194,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) subquery_context.setSettings(subquery_settings); ASTPtr query = subquery->children.at(0); - BlockIO res = InterpreterSelectQuery(query, subquery_context, QueryProcessingStage::Complete, subquery_depth + 1).execute(); + BlockIO res = InterpreterSelectQuery(query, subquery_context, {}, QueryProcessingStage::Complete, subquery_depth + 1).execute(); Block block; try @@ -1366,7 +1371,7 @@ void ExpressionAnalyzer::optimizeGroupBy() UInt64 unused_column = 0; String unused_column_name = toString(unused_column); - while (columns.end() != std::find_if(columns.begin(), columns.end(), + while (source_columns.end() != std::find_if(source_columns.begin(), source_columns.end(), [&unused_column_name](const NameAndTypePair & name_type) { return name_type.name == unused_column_name; })) { ++unused_column; @@ -1478,7 +1483,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & { try { - ExpressionActionsPtr temp_actions = std::make_shared(columns, settings); + ExpressionActionsPtr temp_actions = std::make_shared(source_columns, settings); getRootActions(func->arguments->children.at(0), true, false, temp_actions); makeExplicitSet(func, temp_actions->getSampleBlock(), true); } @@ -1841,14 +1846,14 @@ void ExpressionAnalyzer::getArrayJoinedColumns() String result_name = expr->getAliasOrColumnName(); /// This is an array. - if (!typeid_cast(expr.get()) || findColumn(source_name, columns) != columns.end()) + if (!typeid_cast(expr.get()) || findColumn(source_name, source_columns) != source_columns.end()) { array_join_result_to_source[result_name] = source_name; } else /// This is a nested table. { bool found = false; - for (const auto & column_name_type : columns) + for (const auto & column_name_type : source_columns) { auto splitted = Nested::splitName(column_name_type.name); if (splitted.first == source_name && !splitted.second.empty()) @@ -1932,7 +1937,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, /// If such a column exists in the table, then the user probably forgot to surround it with an aggregate function or add it to GROUP BY. bool found = false; - for (const auto & column_name_type : columns) + for (const auto & column_name_type : source_columns) if (column_name_type.name == name) found = true; @@ -2296,7 +2301,7 @@ bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool on if (!select_query->array_join_expression_list()) return false; - initChain(chain, columns); + initChain(chain, source_columns); ExpressionActionsChain::Step & step = chain.steps.back(); getRootActions(select_query->array_join_expression_list(), only_types, false, step.actions); @@ -2323,7 +2328,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (!select_query->join()) return false; - initChain(chain, columns); + initChain(chain, source_columns); ExpressionActionsChain::Step & step = chain.steps.back(); const ASTTablesInSelectQueryElement & join_element = static_cast(*select_query->join()); @@ -2408,7 +2413,7 @@ bool ExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, bool only_t if (!select_query->where_expression) return false; - initChain(chain, columns); + initChain(chain, source_columns); ExpressionActionsChain::Step & step = chain.steps.back(); step.required_output.push_back(select_query->where_expression->getColumnName()); @@ -2424,7 +2429,7 @@ bool ExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain, bool only if (!select_query->group_expression_list) return false; - initChain(chain, columns); + initChain(chain, source_columns); ExpressionActionsChain::Step & step = chain.steps.back(); ASTs asts = select_query->group_expression_list->children; @@ -2441,7 +2446,7 @@ void ExpressionAnalyzer::appendAggregateFunctionsArguments(ExpressionActionsChai { assertAggregation(); - initChain(chain, columns); + initChain(chain, source_columns); ExpressionActionsChain::Step & step = chain.steps.back(); for (size_t i = 0; i < aggregate_descriptions.size(); ++i) @@ -2530,8 +2535,12 @@ void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) con ASTs asts = select_query->select_expression_list->children; for (size_t i = 0; i < asts.size(); ++i) { - result_columns.emplace_back(asts[i]->getColumnName(), asts[i]->getAliasOrColumnName()); - step.required_output.push_back(result_columns.back().second); + String result_name = asts[i]->getAliasOrColumnName(); + if (required_result_columns.empty() || required_result_columns.count(result_name)) + { + result_columns.emplace_back(asts[i]->getColumnName(), result_name); + step.required_output.push_back(result_columns.back().second); + } } step.actions->add(ExpressionAction::project(result_columns)); @@ -2553,7 +2562,7 @@ void ExpressionAnalyzer::getActionsBeforeAggregation(const ASTPtr & ast, Express ExpressionActionsPtr ExpressionAnalyzer::getActions(bool project_result) { - ExpressionActionsPtr actions = std::make_shared(columns, settings); + ExpressionActionsPtr actions = std::make_shared(source_columns, settings); NamesWithAliases result_columns; Names result_names; @@ -2584,7 +2593,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getActions(bool project_result) else { /// We will not delete the original columns. - for (const auto & column_name_type : columns) + for (const auto & column_name_type : source_columns) result_names.push_back(column_name_type.name); } @@ -2622,7 +2631,7 @@ void ExpressionAnalyzer::collectUsedColumns() NameSet ignored; NameSet available_columns; - for (const auto & column : columns) + for (const auto & column : source_columns) available_columns.insert(column.name); if (select_query && select_query->array_join_expression_list()) @@ -2640,7 +2649,7 @@ void ExpressionAnalyzer::collectUsedColumns() { /// Nothing needs to be ignored for expressions in ARRAY JOIN. NameSet empty; - getRequiredColumnsImpl(expressions[i], available_columns, required, empty, empty, empty); + getRequiredSourceColumnsImpl(expressions[i], available_columns, required, empty, empty, empty); } ignored.insert(expressions[i]->getAliasOrColumnName()); @@ -2654,7 +2663,7 @@ void ExpressionAnalyzer::collectUsedColumns() collectJoinedColumns(available_joined_columns, columns_added_by_join); NameSet required_joined_columns; - getRequiredColumnsImpl(ast, available_columns, required, ignored, available_joined_columns, required_joined_columns); + getRequiredSourceColumnsInSelectImpl(available_columns, required, ignored, available_joined_columns, required_joined_columns); for (NamesAndTypesList::iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end();) { @@ -2669,22 +2678,22 @@ void ExpressionAnalyzer::collectUsedColumns() for (const auto & result_source : array_join_result_to_source) array_join_sources.insert(result_source.second); - for (const auto & column_name_type : columns) + for (const auto & column_name_type : source_columns) if (array_join_sources.count(column_name_type.name)) required.insert(column_name_type.name); /// You need to read at least one column to find the number of rows. if (required.empty()) - required.insert(ExpressionActions::getSmallestColumn(columns)); + required.insert(ExpressionActions::getSmallestColumn(source_columns)); - unknown_required_columns = required; + unknown_required_source_columns = required; - for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end();) + for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();) { - unknown_required_columns.erase(it->name); + unknown_required_source_columns.erase(it->name); if (!required.count(it->name)) - columns.erase(it++); + source_columns.erase(it++); else ++it; } @@ -2693,12 +2702,12 @@ void ExpressionAnalyzer::collectUsedColumns() /// in columns list, so that when further processing the request they are perceived as real. if (storage) { - for (auto it = unknown_required_columns.begin(); it != unknown_required_columns.end();) + for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) { if (storage->hasColumn(*it)) { - columns.push_back(storage->getColumn(*it)); - unknown_required_columns.erase(it++); + source_columns.push_back(storage->getColumn(*it)); + unknown_required_source_columns.erase(it++); } else ++it; @@ -2764,30 +2773,54 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd } -Names ExpressionAnalyzer::getRequiredColumns() const +Names ExpressionAnalyzer::getRequiredSourceColumns() const { - if (!unknown_required_columns.empty()) - throw Exception("Unknown identifier: " + *unknown_required_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER); + if (!unknown_required_source_columns.empty()) + throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER); Names res; - for (const auto & column_name_type : columns) + for (const auto & column_name_type : source_columns) res.push_back(column_name_type.name); return res; } -void ExpressionAnalyzer::getRequiredColumnsImpl(const ASTPtr & ast, - const NameSet & available_columns, NameSet & required_columns, NameSet & ignored_names, +void ExpressionAnalyzer::getRequiredSourceColumnsInSelectImpl( + const NameSet & available_columns, NameSet & required_source_columns, NameSet & ignored_names, + const NameSet & available_joined_columns, NameSet & required_joined_columns) +{ + if (!select_query) + { + getRequiredSourceColumnsImpl(ast, available_columns, required_source_columns, + ignored_names, available_joined_columns, required_joined_columns); + return; + } + + /// TODO: DISTINCT, arrayJoin + for (const auto & child : select_query->select_expression_list->children) + if (required_result_columns.empty() || required_result_columns.count(child->getAliasOrColumnName())) + getRequiredSourceColumnsImpl(child, available_columns, required_source_columns, + ignored_names, available_joined_columns, required_joined_columns); + + for (const auto & child : select_query->children) + if (child != select_query->select_expression_list) + getRequiredSourceColumnsImpl(child, available_columns, required_source_columns, + ignored_names, available_joined_columns, required_joined_columns); +} + + +void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, + const NameSet & available_columns, NameSet & required_source_columns, NameSet & ignored_names, const NameSet & available_joined_columns, NameSet & required_joined_columns) { /** Find all the identifiers in the query. - * We will look for them recursively, bypassing by depth AST. + * We will use depth first search in AST. * In this case * - for lambda functions we will not take formal parameters; - * - do not go into subqueries (there are their identifiers); - * - is some exception for the ARRAY JOIN section (it has a slightly different identifier); - * - identifiers available from JOIN, we put in required_joined_columns. + * - do not go into subqueries (they have their own identifiers); + * - there is some exception for the ARRAY JOIN clause (it has a slightly different identifiers); + * - we put identifiers available from JOIN in required_joined_columns. */ if (ASTIdentifier * node = typeid_cast(ast.get())) @@ -2798,7 +2831,7 @@ void ExpressionAnalyzer::getRequiredColumnsImpl(const ASTPtr & ast, { if (!available_joined_columns.count(node->name) || available_columns.count(node->name)) /// Read column from left table if has. - required_columns.insert(node->name); + required_source_columns.insert(node->name); else required_joined_columns.insert(node->name); } @@ -2818,7 +2851,7 @@ void ExpressionAnalyzer::getRequiredColumnsImpl(const ASTPtr & ast, if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); - /// You do not need to add formal parameters of the lambda expression in required_columns. + /// You do not need to add formal parameters of the lambda expression in required_source_columns. Names added_ignored; for (auto & child : lambda_args_tuple->arguments->children) { @@ -2834,8 +2867,8 @@ void ExpressionAnalyzer::getRequiredColumnsImpl(const ASTPtr & ast, } } - getRequiredColumnsImpl(node->arguments->children.at(1), - available_columns, required_columns, ignored_names, + getRequiredSourceColumnsImpl(node->arguments->children.at(1), + available_columns, required_source_columns, ignored_names, available_joined_columns, required_joined_columns); for (size_t i = 0; i < added_ignored.size(); ++i) @@ -2858,8 +2891,8 @@ void ExpressionAnalyzer::getRequiredColumnsImpl(const ASTPtr & ast, */ if (!typeid_cast(child.get()) && !typeid_cast(child.get())) - getRequiredColumnsImpl(child, available_columns, required_columns, - ignored_names, available_joined_columns, required_joined_columns); + getRequiredSourceColumnsImpl(child, available_columns, required_source_columns, + ignored_names, available_joined_columns, required_joined_columns); } } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 41e56c691a5..15be363ac38 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -69,7 +69,8 @@ public: const ASTPtr & ast_, const Context & context_, const StoragePtr & storage_, - const NamesAndTypesList & columns_, + const NamesAndTypesList & source_columns_, + const Names & required_result_columns_ = {}, size_t subquery_depth_ = 0, bool do_global_ = false, const SubqueriesForSets & subqueries_for_set_ = {}); @@ -83,7 +84,7 @@ public: /** Get a set of columns that are enough to read from the table to evaluate the expression. * Columns added from another table by JOIN are not counted. */ - Names getRequiredColumns() const; + Names getRequiredSourceColumns() const; /** These methods allow you to build a chain of transformations over a block, that receives values in the desired sections of the query. * @@ -146,12 +147,16 @@ private: size_t subquery_depth; /// Columns that are mentioned in the expression, but were not specified in the constructor. - NameSet unknown_required_columns; + NameSet unknown_required_source_columns; /** Original columns. - * First, all available columns of the table are placed here. Then (when parsing the query), unused columns are deleted. + * First, all available columns of the table are placed here. Then (when analyzing the query), unused columns are deleted. */ - NamesAndTypesList columns; + NamesAndTypesList source_columns; + + /** If non-empty, ignore all expressions in not from this list. + */ + NameSet required_result_columns; /// Columns after ARRAY JOIN, JOIN, and/or aggregation. NamesAndTypesList aggregated_columns; @@ -209,10 +214,10 @@ private: void init(); static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols); - NamesAndTypesList::iterator findColumn(const String & name) { return findColumn(name, columns); } + NamesAndTypesList::iterator findColumn(const String & name) { return findColumn(name, source_columns); } /** Remove all unnecessary columns from the list of all available columns of the table (`columns`). - * At the same time, form a set of unknown columns (`unknown_required_columns`), + * At the same time, form a set of unknown columns (`unknown_required_source_columns`), * as well as the columns added by JOIN (`columns_added_by_join`). */ void collectUsedColumns(); @@ -292,8 +297,14 @@ private: * The set of columns available_joined_columns are the columns available from JOIN, they are not needed for reading from the main table. * Put in required_joined_columns the set of columns available from JOIN and needed. */ - void getRequiredColumnsImpl(const ASTPtr & ast, - const NameSet & available_columns, NameSet & required_columns, NameSet & ignored_names, + void getRequiredSourceColumnsImpl(const ASTPtr & ast, + const NameSet & available_columns, NameSet & required_source_columns, NameSet & ignored_names, + const NameSet & available_joined_columns, NameSet & required_joined_columns); + + /** Same as above but skip unnecessary elements in SELECT according to 'required_result_columns'. + */ + void getRequiredSourceColumnsInSelectImpl( + const NameSet & available_columns, NameSet & required_source_columns, NameSet & ignored_names, const NameSet & available_joined_columns, NameSet & required_joined_columns); /// Get the table from which the query is made diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index d212d1c63d1..4361166cb91 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -58,11 +58,11 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & { if (typeid_cast(query.get())) { - return std::make_unique(query, context, stage); + return std::make_unique(query, context, Names{}, stage); } else if (typeid_cast(query.get())) { - return std::make_unique(query, context, stage); + return std::make_unique(query, context, Names{}, stage); } else if (typeid_cast(query.get())) { diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 8892a766356..ef29a6decba 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -120,7 +120,7 @@ BlockIO InterpreterInsertQuery::execute() if (query.select) { /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - InterpreterSelectWithUnionQuery interpreter_select{query.select, context, QueryProcessingStage::Complete, 1}; + InterpreterSelectWithUnionQuery interpreter_select{query.select, context, {}, QueryProcessingStage::Complete, 1}; res.in = interpreter_select.execute().in; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 264aae5325b..9e310ec745b 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -72,7 +72,7 @@ namespace ErrorCodes InterpreterSelectQuery::~InterpreterSelectQuery() = default; -void InterpreterSelectQuery::init() +void InterpreterSelectQuery::init(const Names & required_column_names) { ProfileEvents::increment(ProfileEvents::SelectQuery); @@ -97,7 +97,7 @@ void InterpreterSelectQuery::init() /// Read from subquery. if (table_expression && typeid_cast(table_expression.get())) { - source_header = InterpreterSelectQuery::getSampleBlock(table_expression, context); + source_header = InterpreterSelectWithUnionQuery::getSampleBlock(table_expression, context); } else { @@ -131,7 +131,8 @@ void InterpreterSelectQuery::init() if (!source_header) throw Exception("There are no available columns", ErrorCodes::THERE_IS_NO_COLUMN); - query_analyzer = std::make_unique(query_ptr, context, storage, source_header.getNamesAndTypesList(), subquery_depth, !only_analyze); + query_analyzer = std::make_unique( + query_ptr, context, storage, source_header.getNamesAndTypesList(), required_column_names, subquery_depth, !only_analyze); if (query.sample_size() && (input || !storage || !storage->supportsSampling())) throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); @@ -152,17 +153,7 @@ void InterpreterSelectQuery::init() InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, - QueryProcessingStage::Enum to_stage_, - size_t subquery_depth_, - const BlockInputStreamPtr & input) - : InterpreterSelectQuery(query_ptr_, context_, {}, to_stage_, subquery_depth_, input) -{ -} - -InterpreterSelectQuery::InterpreterSelectQuery( - const ASTPtr & query_ptr_, - const Context & context_, - const Names & /*required_column_names_*/, + const Names & required_column_names_, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, const BlockInputStreamPtr & input) @@ -174,7 +165,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( , input(input) , log(&Logger::get("InterpreterSelectQuery")) { - init(); + init(required_column_names_); } @@ -187,7 +178,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & qu , only_analyze(true) , log(&Logger::get("InterpreterSelectQuery")) { - init(); + init({}); } @@ -525,7 +516,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline std::optional interpreter_subquery; /// List of columns to read to execute the query. - Names required_columns = query_analyzer->getRequiredColumns(); + Names required_columns = query_analyzer->getRequiredSourceColumns(); + /// Actions to calculate ALIAS if required. ExpressionActionsPtr alias_actions; /// Are ALIAS columns required for query execution? diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 77b3d3c4059..744d143f4c9 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -50,14 +50,7 @@ public: InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, - QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, - size_t subquery_depth_ = 0, - const BlockInputStreamPtr & input = nullptr); - - InterpreterSelectQuery( - const ASTPtr & query_ptr_, - const Context & context_, - const Names & required_column_names, + const Names & required_column_names = Names{}, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, size_t subquery_depth_ = 0, const BlockInputStreamPtr & input = nullptr); @@ -118,7 +111,7 @@ private: const ASTPtr & query_ptr_, const Context & context_); - void init(); + void init(const Names & required_column_names); void executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input); diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index b5c7afeca51..13da3b00839 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -19,31 +19,6 @@ namespace ErrorCodes } -InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( - const ASTPtr & query_ptr_, - const Context & context_, - QueryProcessingStage::Enum to_stage_, - size_t subquery_depth_) - : query_ptr(query_ptr_), - context(context_), - to_stage(to_stage_), - subquery_depth(subquery_depth_) -{ - const ASTSelectWithUnionQuery & ast = typeid_cast(*query_ptr); - - size_t num_selects = ast.list_of_selects->children.size(); - nested_interpreters.reserve(num_selects); - - if (!num_selects) - throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); - - for (const auto & select : ast.list_of_selects->children) - nested_interpreters.emplace_back(std::make_unique(select, context, to_stage, subquery_depth)); - - init(); -} - - InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h index c0346065804..ec1116e082a 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -19,13 +19,7 @@ public: InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, - QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, - size_t subquery_depth_ = 0); - - InterpreterSelectWithUnionQuery( - const ASTPtr & query_ptr_, - const Context & context_, - const Names & required_column_names, + const Names & required_column_names = Names{}, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, size_t subquery_depth_ = 0); diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp index 497df93f9bc..8ab3ca7bf1a 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.cpp +++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes std::pair> evaluateConstantExpression(const ASTPtr & node, const Context & context) { ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer( - node, context, nullptr, NamesAndTypesList{{ "_dummy", std::make_shared() }}).getConstActions(); + node, context, nullptr, NamesAndTypesList{{ "_dummy", std::make_shared() }}, Names()).getConstActions(); /// There must be at least one column in the block so that it knows the number of rows. Block block_with_constants{{ ColumnConst::create(ColumnUInt8::create(1, 0), 1), std::make_shared(), "_dummy" }}; diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 6924ec37fba..f234b0ae4b5 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -15,119 +15,10 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int THERE_IS_NO_COLUMN; + extern const int NOT_IMPLEMENTED; } -bool ASTSelectQuery::hasArrayJoin(const ASTPtr & ast) -{ - if (const ASTFunction * function = typeid_cast(&*ast)) - if (function->name == "arrayJoin") - return true; - - for (const auto & child : ast->children) - if (hasArrayJoin(child)) - return true; - - return false; -} - -bool ASTSelectQuery::hasAsterisk() const -{ - for (const auto & ast : select_expression_list->children) - if (typeid_cast(&*ast) != nullptr) - return true; - - return false; -} - -void ASTSelectQuery::rewriteSelectExpressionList(const Names & required_column_names) -{ - /// All columns are kept if we have DISTINCT. - if (distinct) - return; - - /** Always keep columns that contain arrayJoin inside. - * In addition, keep all columns in 'required_column_names'. - * If SELECT has at least one asterisk, replace it with the rest of required_column_names - * and ignore all other asterisks. - * We must keep columns in same related order. - */ - - /// Analyze existing expression list. - - using ASTAndPosition = std::pair; - - std::map columns_with_array_join; - std::map other_required_columns_in_select; - ASTAndPosition asterisk; - - size_t position = 0; - for (const auto & child : select_expression_list->children) - { - if (typeid_cast(child.get())) - { - if (!asterisk.first) - asterisk = { child, position }; - } - else - { - auto name = child->getAliasOrColumnName(); - - if (hasArrayJoin(child)) - columns_with_array_join[name] = { child, position }; - else if (required_column_names.end() != std::find(required_column_names.begin(), required_column_names.end(), name)) - other_required_columns_in_select[name] = { child, position }; - } - ++position; - } - - /// Create a new expression list. - - std::vector new_children; - - for (const auto & name_child : other_required_columns_in_select) - new_children.push_back(name_child.second); - - for (const auto & name_child : columns_with_array_join) - new_children.push_back(name_child.second); - - for (const auto & name : required_column_names) - { - if (!other_required_columns_in_select.count(name) && !columns_with_array_join.count(name)) - { - if (asterisk.first) - new_children.push_back({ std::make_shared(name), asterisk.second }); - else - throw Exception("SELECT query doesn't have required column: " + backQuoteIfNeed(name), ErrorCodes::THERE_IS_NO_COLUMN); - } - } - - std::sort(new_children.begin(), new_children.end(), [](const auto & a, const auto & b) { return a.second < b.second; }); - - ASTPtr result = std::make_shared(); - - for (const auto & child : new_children) - result->children.push_back(child.first); - - /// Replace expression list in the query. - - for (auto & child : children) - { - if (child == select_expression_list) - { - child = result; - break; - } - } - select_expression_list = result; - - /** NOTE: It might seem that we could spoil the query by throwing an expression with an alias that is used somewhere else. - * This can not happen, because this method is always called for a query, for which ExpressionAnalyzer was created at least once, - * which ensures that all aliases in it are already set. Not quite obvious logic. - */ -} - ASTPtr ASTSelectQuery::clone() const { auto res = std::make_shared(*this); diff --git a/dbms/src/Parsers/ASTSelectQuery.h b/dbms/src/Parsers/ASTSelectQuery.h index ebafabd681d..c31e91f3194 100644 --- a/dbms/src/Parsers/ASTSelectQuery.h +++ b/dbms/src/Parsers/ASTSelectQuery.h @@ -18,15 +18,6 @@ public: /** Get the text that identifies this element. */ String getID() const override { return "SelectQuery"; }; - /// Check for the presence of the `arrayJoin` function. (Not capital `ARRAY JOIN`.) - static bool hasArrayJoin(const ASTPtr & ast); - - /// Does the query contain an asterisk? - bool hasAsterisk() const; - - /// Rewrites select_expression_list to return only the required columns in the correct order. TODO Wrong comment. - void rewriteSelectExpressionList(const Names & required_column_names); - ASTPtr clone() const override; bool distinct = false; diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 62ab67df163..f2a4128e514 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -137,7 +137,7 @@ BlockInputStreams StorageBuffer::read( */ if (processed_stage > QueryProcessingStage::FetchColumns) for (auto & stream : streams_from_buffers) - stream = InterpreterSelectQuery(query_info.query, context, processed_stage, 0, stream).execute().in; + stream = InterpreterSelectQuery(query_info.query, context, {}, processed_stage, 0, stream).execute().in; streams_from_dst.insert(streams_from_dst.end(), streams_from_buffers.begin(), streams_from_buffers.end()); return streams_from_dst; diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 9d4afdfdde6..edeeb59cba6 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -191,7 +191,7 @@ BlockInputStreams StorageDistributed::read( const auto & modified_query_ast = rewriteSelectQuery( query_info.query, remote_database, remote_table); - Block header = materializeBlock(InterpreterSelectQuery(query_info.query, context, processed_stage, 0, + Block header = materializeBlock(InterpreterSelectQuery(query_info.query, context, {}, processed_stage, 0, std::make_shared(getSampleBlockForColumns(column_names))).execute().in->getHeader()); ClusterProxy::SelectStreamFactory select_stream_factory( diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 6193ffce779..fba95dafda3 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -222,11 +222,11 @@ BlockInputStreams StorageMerge::read( header = getSampleBlockForColumns(column_names); break; case QueryProcessingStage::WithMergeableState: - header = materializeBlock(InterpreterSelectQuery(query_info.query, context, QueryProcessingStage::WithMergeableState, 0, + header = materializeBlock(InterpreterSelectQuery(query_info.query, context, {}, QueryProcessingStage::WithMergeableState, 0, std::make_shared(getSampleBlockForColumns(column_names))).execute().in->getHeader()); break; case QueryProcessingStage::Complete: - header = materializeBlock(InterpreterSelectQuery(query_info.query, context, QueryProcessingStage::Complete, 0, + header = materializeBlock(InterpreterSelectQuery(query_info.query, context, {}, QueryProcessingStage::Complete, 0, std::make_shared(getSampleBlockForColumns(column_names))).execute().in->getHeader()); break; } diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index bf4fd14b23a..897fb9fdfa9 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -62,7 +62,7 @@ String transformQueryForExternalDatabase( const Context & context) { ExpressionAnalyzer analyzer(query.clone(), context, {}, available_columns); - const Names & used_columns = analyzer.getRequiredColumns(); + const Names & used_columns = analyzer.getRequiredSourceColumns(); auto select = std::make_shared(); From 44d0736138093788c21549bbceea3c48addbd91e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 12:10:11 +0300 Subject: [PATCH 118/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 2 +- .../Interpreters/InterpreterSelectWithUnionQuery.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index fcdf93844a0..36fff0bbdb0 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -913,7 +913,7 @@ StoragePtr ExpressionAnalyzer::getTable() auto select_table = select->table(); if (select_table - && !typeid_cast(select_table.get()) + && !typeid_cast(select_table.get()) && !typeid_cast(select_table.get())) { String database = select_database diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 13da3b00839..fafafae4422 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -135,6 +135,11 @@ BlockInputStreams InterpreterSelectWithUnionQuery::executeWithMultipleStreams() nested_streams.insert(nested_streams.end(), streams.begin(), streams.end()); } + /// Unify data structure. + if (nested_interpreters.size() > 1) + for (auto & stream : nested_streams) + stream = std::make_shared(context, stream, result_header, ConvertingBlockInputStream::MatchColumnsMode::Position); + return nested_streams; } @@ -155,11 +160,6 @@ BlockIO InterpreterSelectWithUnionQuery::execute() } else { - /// Unify data structure. - if (nested_interpreters.size() > 1) - for (auto & stream : nested_streams) - stream = std::make_shared(context, stream, result_header, ConvertingBlockInputStream::MatchColumnsMode::Position); - result_stream = std::make_shared>(nested_streams, nullptr, context.getSettingsRef().max_threads); nested_streams.clear(); } From 437ba4856b509c3107fd227962c0dfcc8c784066 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 12:55:14 +0300 Subject: [PATCH 119/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 42 +++++++++++++++---- dbms/src/Interpreters/ExpressionAnalyzer.h | 6 +++ .../Interpreters/InterpreterSelectQuery.cpp | 7 +++- 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 36fff0bbdb0..ea9caa8bc10 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -215,6 +215,9 @@ void ExpressionAnalyzer::init() /// array_join_alias_to_name, array_join_result_to_source. getArrayJoinedColumns(); + /// All selected columns in case of DISTINCT; columns that contain arrayJoin function inside. + calculateRequiredColumnsBeforeProjection(); + /// Delete the unnecessary from `columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. collectUsedColumns(); @@ -2491,11 +2494,9 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_ getRootActions(select_query->select_expression_list, only_types, false, step.actions); - ASTs asts = select_query->select_expression_list->children; - for (size_t i = 0; i < asts.size(); ++i) - { - step.required_output.push_back(asts[i]->getColumnName()); - } + for (const auto & child : select_query->select_expression_list->children) + if (required_columns_before_projection.count(child->getColumnName())) + step.required_output.push_back(child->getColumnName()); } bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types) @@ -2797,9 +2798,8 @@ void ExpressionAnalyzer::getRequiredSourceColumnsInSelectImpl( return; } - /// TODO: DISTINCT, arrayJoin for (const auto & child : select_query->select_expression_list->children) - if (required_result_columns.empty() || required_result_columns.count(child->getAliasOrColumnName())) + if (required_columns_before_projection.count(child->getColumnName())) getRequiredSourceColumnsImpl(child, available_columns, required_source_columns, ignored_names, available_joined_columns, required_joined_columns); @@ -2896,4 +2896,32 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, } } + +static bool hasArrayJoin(const ASTPtr & ast) +{ + if (const ASTFunction * function = typeid_cast(&*ast)) + if (function->name == "arrayJoin") + return true; + + for (const auto & child : ast->children) + if (!typeid_cast(child.get()) && hasArrayJoin(child)) + return true; + + return false; +} + + +void ExpressionAnalyzer::calculateRequiredColumnsBeforeProjection() +{ + if (!select_query) + return; + + for (const auto & child : select_query->select_expression_list->children) + if (required_result_columns.empty() + || select_query->distinct + || hasArrayJoin(child) + || required_result_columns.count(child->getAliasOrColumnName())) + required_columns_before_projection.insert(child->getColumnName()); +} + } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 15be363ac38..e01871ac141 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -335,6 +335,12 @@ private: */ void translateQualifiedNames(); void translateQualifiedNamesImpl(ASTPtr & node, const String & database_name, const String & table_name, const String & alias); + + /** Sometimes we have to calculate more columns in SELECT clause than will be returned from query. + * This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. + */ + NameSet required_columns_before_projection; + void calculateRequiredColumnsBeforeProjection(); }; } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 9e310ec745b..4361a17dedf 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -449,8 +449,6 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt executeOrder(pipeline); } - executeProjection(pipeline, expressions.final_projection); - /// At this stage, we can calculate the minimums and maximums, if necessary. if (settings.extremes) { @@ -487,9 +485,14 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (need_second_distinct_pass) executeDistinct(pipeline, false, Names()); + executeProjection(pipeline, expressions.final_projection); executeLimitBy(pipeline); executeLimit(pipeline); } + else + { + executeProjection(pipeline, expressions.final_projection); + } } } From 4dee4770ebe7db3b2fd71123dfc8bdab0640fae5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 13:00:56 +0300 Subject: [PATCH 120/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 38 ++++++++++---------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index ea9caa8bc10..ad65ed1f36d 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -25,7 +25,6 @@ #include #include -#include #include #include #include @@ -718,27 +717,30 @@ static std::shared_ptr interpretSubquery( std::set all_column_names; std::set assigned_column_names; - if (ASTSelectQuery * select = typeid_cast(query.get())) + if (ASTSelectWithUnionQuery * select_with_union = typeid_cast(query.get())) { - for (auto & expr : select->select_expression_list->children) - all_column_names.insert(expr->getAliasOrColumnName()); - - for (auto & expr : select->select_expression_list->children) + if (ASTSelectQuery * select = typeid_cast(select_with_union->list_of_selects->children.at(0).get())) { - auto name = expr->getAliasOrColumnName(); + for (auto & expr : select->select_expression_list->children) + all_column_names.insert(expr->getAliasOrColumnName()); - if (!assigned_column_names.insert(name).second) + for (auto & expr : select->select_expression_list->children) { - size_t i = 1; - while (all_column_names.end() != all_column_names.find(name + "_" + toString(i))) - ++i; + auto name = expr->getAliasOrColumnName(); - name = name + "_" + toString(i); - expr = expr->clone(); /// Cancels fuse of the same expressions in the tree. - expr->setAlias(name); + if (!assigned_column_names.insert(name).second) + { + size_t i = 1; + while (all_column_names.end() != all_column_names.find(name + "_" + toString(i))) + ++i; - all_column_names.insert(name); - assigned_column_names.insert(name); + name = name + "_" + toString(i); + expr = expr->clone(); /// Cancels fuse of the same expressions in the tree. + expr->setAlias(name); + + all_column_names.insert(name); + assigned_column_names.insert(name); + } } } } @@ -1197,7 +1199,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) subquery_context.setSettings(subquery_settings); ASTPtr query = subquery->children.at(0); - BlockIO res = InterpreterSelectQuery(query, subquery_context, {}, QueryProcessingStage::Complete, subquery_depth + 1).execute(); + BlockIO res = InterpreterSelectWithUnionQuery(query, subquery_context, {}, QueryProcessingStage::Complete, subquery_depth + 1).execute(); Block block; try @@ -2739,7 +2741,7 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd else if (table_expression.subquery) { const auto & subquery = table_expression.subquery->children.at(0); - nested_result_sample = InterpreterSelectQuery::getSampleBlock(subquery, context); + nested_result_sample = InterpreterSelectWithUnionQuery::getSampleBlock(subquery, context); } if (table_join.using_expression_list) From b5610b9f81a0d7ff433f9dcdc516f121927d7b19 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 13:12:26 +0300 Subject: [PATCH 121/209] Better UNION ALL: development #1947 --- dbms/src/Parsers/ASTSelectWithUnionQuery.cpp | 42 ++++++++++++++++++++ dbms/src/Parsers/ASTSelectWithUnionQuery.h | 25 ++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 dbms/src/Parsers/ASTSelectWithUnionQuery.cpp create mode 100644 dbms/src/Parsers/ASTSelectWithUnionQuery.h diff --git a/dbms/src/Parsers/ASTSelectWithUnionQuery.cpp b/dbms/src/Parsers/ASTSelectWithUnionQuery.cpp new file mode 100644 index 00000000000..041f9bce8d5 --- /dev/null +++ b/dbms/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -0,0 +1,42 @@ +#include +#include +#include + + +namespace DB +{ + +ASTPtr ASTSelectWithUnionQuery::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + + res->list_of_selects = list_of_selects->clone(); + res->children.push_back(res->list_of_selects); + + cloneOutputOptions(*res); + return res; +} + + +void ASTSelectWithUnionQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + for (ASTs::const_iterator it = list_of_selects->children.begin(); it != list_of_selects->children.end(); ++it) + { + if (it != list_of_selects->children.begin()) + settings.ostr << settings.nl_or_ws << indent_str << hilite_keyword << "UNION ALL" << hilite_none << settings.nl_or_ws; + + (*it)->formatImpl(settings, state, frame); + } +} + + +void ASTSelectWithUnionQuery::setDatabaseIfNeeded(const String & database_name) +{ + for (auto & child : list_of_selects->children) + typeid_cast(*child).setDatabaseIfNeeded(database_name); +} + +} diff --git a/dbms/src/Parsers/ASTSelectWithUnionQuery.h b/dbms/src/Parsers/ASTSelectWithUnionQuery.h new file mode 100644 index 00000000000..e4e88426ede --- /dev/null +++ b/dbms/src/Parsers/ASTSelectWithUnionQuery.h @@ -0,0 +1,25 @@ +#pragma once + +#include + + +namespace DB +{ + +/** Single SELECT query or multiple SELECT queries with UNION ALL. + * Only UNION ALL is possible. No UNION DISTINCT or plain UNION. + */ +class ASTSelectWithUnionQuery : public ASTQueryWithOutput +{ +public: + String getID() const override { return "SelectWithUnionQuery"; } + + ASTPtr clone() const override; + void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + + void setDatabaseIfNeeded(const String & database_name); + + ASTPtr list_of_selects; +}; + +} From 417cc7f6e20ab6108b6390812e9c30c35af5b6ae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 13:19:47 +0300 Subject: [PATCH 122/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 4361a17dedf..67e683ee028 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,5 +1,3 @@ -#include - #include #include #include @@ -516,7 +514,7 @@ static void getLimitLengthAndOffset(ASTSelectQuery & query, size_t & length, siz QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline & pipeline) { /// The subquery interpreter, if the subquery - std::optional interpreter_subquery; + std::unique_ptr interpreter_subquery; /// List of columns to read to execute the query. Names required_columns = query_analyzer->getRequiredSourceColumns(); @@ -573,7 +571,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline subquery_settings.extremes = 0; subquery_context.setSettings(subquery_settings); - interpreter_subquery.emplace( + interpreter_subquery = std::make_unique( query_table, subquery_context, required_columns, QueryProcessingStage::Complete, subquery_depth + 1); /// If there is an aggregation in the outer query, WITH TOTALS is ignored in the subquery. From 120530e44cf834c22747f2fac8bed0878926eef1 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Mon, 26 Feb 2018 04:27:33 +0300 Subject: [PATCH 123/209] Add requested changes. [#CLICKHOUSE-3606] --- .../Functions/FunctionsConsistentHashing.cpp | 3 +- .../Functions/FunctionsConsistentHashing.h | 183 ++++++++++++------ .../task_month_to_week_description.xml | 2 +- .../integration/test_cluster_copier/test.py | 4 +- .../00580_consistent_hashing_functions.sql | 8 +- .../yandex-consistent-hashing/yandex/bitops.h | 21 +- .../yandex/consistent_hashing.h | 2 + .../yandex/popcount.h | 38 +--- 8 files changed, 144 insertions(+), 117 deletions(-) diff --git a/dbms/src/Functions/FunctionsConsistentHashing.cpp b/dbms/src/Functions/FunctionsConsistentHashing.cpp index abf789c6073..7f93257774b 100644 --- a/dbms/src/Functions/FunctionsConsistentHashing.cpp +++ b/dbms/src/Functions/FunctionsConsistentHashing.cpp @@ -8,7 +8,8 @@ namespace DB void registerFunctionsConsistentHashing(FunctionFactory & factory) { factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/dbms/src/Functions/FunctionsConsistentHashing.h b/dbms/src/Functions/FunctionsConsistentHashing.h index 8678281cbeb..673bce1389d 100644 --- a/dbms/src/Functions/FunctionsConsistentHashing.h +++ b/dbms/src/Functions/FunctionsConsistentHashing.h @@ -6,9 +6,8 @@ #include #include #include - +#include #include -#include namespace DB @@ -23,10 +22,12 @@ namespace ErrorCodes } +/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov struct YandexConsistentHashImpl { - static constexpr auto name = "YandexConsistentHash"; + static constexpr auto name = "yandexConsistentHash"; + using HashType = UInt64; /// Actually it supports UInt64, but it is effective only if n < 65536 using ResultType = UInt32; using BucketsCountType = ResultType; @@ -51,8 +52,9 @@ static inline int32_t JumpConsistentHash(uint64_t key, int32_t num_buckets) { struct JumpConsistentHashImpl { - static constexpr auto name = "JumpConsistentHash"; + static constexpr auto name = "jumpConsistentHash"; + using HashType = UInt64; using ResultType = Int32; using BucketsCountType = ResultType; @@ -63,14 +65,57 @@ struct JumpConsistentHashImpl }; +/// Sumbur algorithm https://github.com/mailru/sumbur-ruby/blob/master/lib/sumbur/pure_ruby.rb +static inline UInt32 sumburConsistentHash(UInt32 hashed_integer, UInt32 cluster_capacity) +{ + UInt32 l = 0xFFFFFFFF; + UInt32 part = l / cluster_capacity; + + if (l - hashed_integer < part) + return 0; + + UInt32 h = hashed_integer; + UInt32 n = 1; + UInt32 i = 2; + while (i <= cluster_capacity) + { + auto c = l / (i * (i - 1)); + if (c <= h) + h -= c; + else + { + h += c * (i - n - 1); + n = i; + if (l / n - h < part) + break; + } + i += 1; + } + + return n - 1; +} + +struct SumburConsistentHashImpl +{ + static constexpr auto name = "sumburConsistentHash"; + + using HashType = UInt32; + using ResultType = UInt32; + using BucketsCountType = ResultType; + + static inline ResultType apply(UInt32 hash, BucketsCountType n) + { + return sumburConsistentHash(hash, n); + } +}; + + template class FunctionConsistentHashImpl : public IFunction { public: static constexpr auto name = Impl::name; - using ResultType = typename Impl::ResultType; - using BucketsType = typename Impl::BucketsCountType; static FunctionPtr create(const Context &) { return std::make_shared>(); }; @@ -84,6 +129,10 @@ public: throw Exception("Illegal type " + arguments[0]->getName() + " of the first argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + if (arguments[0]->getSizeOfValueInMemory() > sizeof(HashType)) + throw Exception("Function " + getName() + " accepts " + std::to_string(sizeof(HashType) * 8) + "-bit integers at most" + + ", got " + arguments[0]->getName(), ErrorCodes::BAD_ARGUMENTS); + if (!arguments[1]->isInteger()) throw Exception("Illegal type " + arguments[1]->getName() + " of the second argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -91,83 +140,91 @@ public: return std::make_shared>(); } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override { - auto buckets_col = block.getByPosition(arguments[1]).column.get(); - if (!buckets_col->isColumnConst()) + if (block.getByPosition(arguments[1]).column->isColumnConst()) + executeConstBuckets(block, arguments, result); + else throw Exception("The second argument of function " + getName() + " (number of buckets) must be constant", ErrorCodes::BAD_ARGUMENTS); - - constexpr UInt64 max_buckets = static_cast(std::numeric_limits::max()); - UInt64 num_buckets; - - auto check_range = [&] (auto buckets) - { - if (buckets <= 0) - throw Exception("The second argument of function " + getName() + " (number of buckets) must be positive number", - ErrorCodes::BAD_ARGUMENTS); - - if (static_cast(buckets) > max_buckets) - throw Exception("The value of the second argument of function " + getName() + " (number of buckets) is not fit to " + - DataTypeNumber().getName(), ErrorCodes::BAD_ARGUMENTS); - - num_buckets = static_cast(buckets); - }; - - Field buckets_field = (*buckets_col)[0]; - if (buckets_field.getType() == Field::Types::Int64) - check_range(buckets_field.safeGet()); - else if (buckets_field.getType() == Field::Types::UInt64) - check_range(buckets_field.safeGet()); - else - throw Exception("Illegal type " + String(buckets_field.getTypeName()) + " of the second argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - - const auto & hash_col_source = block.getByPosition(arguments[0]).column; - ColumnPtr hash_col = (hash_col_source->isColumnConst()) ? hash_col_source->convertToFullColumnIfConst() : hash_col_source; - ColumnPtr & res_col = block.getByPosition(result).column; - - - const IDataType * hash_type = block.getByPosition(arguments[0]).type.get(); - - if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); - else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); - else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); - else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); - else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); - else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); - else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); - else if (checkDataType(hash_type)) executeType(hash_col, res_col, num_buckets); - else - throw Exception("Illegal type " + hash_type->getName() + " of the first argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } private: - template - void executeType(const ColumnPtr & col_hash_ptr, ColumnPtr & out_col_result, const UInt64 num_buckets) + using HashType = typename Impl::HashType; + using ResultType = typename Impl::ResultType; + using BucketsType = typename Impl::BucketsCountType; + static constexpr auto max_buckets = static_cast(std::numeric_limits::max()); + + template + inline BucketsType checkBucketsRange(T buckets) { - auto col_hash = checkAndGetColumn>(col_hash_ptr.get()); + if (unlikely(buckets <= 0)) + throw Exception("The second argument of function " + getName() + " (number of buckets) must be positive number", + ErrorCodes::BAD_ARGUMENTS); + + if (unlikely(static_cast(buckets) > max_buckets)) + throw Exception("The value of the second argument of function " + getName() + " (number of buckets) is not fit to " + + DataTypeNumber().getName(), ErrorCodes::BAD_ARGUMENTS); + + return static_cast(buckets); + } + + void executeConstBuckets(Block & block, const ColumnNumbers & arguments, size_t result) + { + Field buckets_field = (*block.getByPosition(arguments[1]).column)[0]; + BucketsType num_buckets; + + if (buckets_field.getType() == Field::Types::Int64) + num_buckets = checkBucketsRange(buckets_field.get()); + else if (buckets_field.getType() == Field::Types::UInt64) + num_buckets = checkBucketsRange(buckets_field.get()); + else + throw Exception("Illegal type " + String(buckets_field.getTypeName()) + " of the second argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto & hash_col = block.getByPosition(arguments[0]).column; + const IDataType * hash_type = block.getByPosition(arguments[0]).type.get(); + auto res_col = ColumnVector::create(); + + if (checkDataType(hash_type)) executeType(hash_col, num_buckets, res_col.get()); + else if (checkDataType(hash_type)) executeType(hash_col, num_buckets, res_col.get()); + else if (checkDataType(hash_type)) executeType(hash_col, num_buckets, res_col.get()); + else if (checkDataType(hash_type)) executeType(hash_col, num_buckets, res_col.get()); + else if (checkDataType(hash_type)) executeType(hash_col, num_buckets, res_col.get()); + else if (checkDataType(hash_type)) executeType(hash_col, num_buckets, res_col.get()); + else if (checkDataType(hash_type)) executeType(hash_col, num_buckets, res_col.get()); + else if (checkDataType(hash_type)) executeType(hash_col, num_buckets, res_col.get()); + else + throw Exception("Illegal type " + hash_type->getName() + " of the first argument of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + block.getByPosition(result).column = std::move(res_col); + } + + template + void executeType(const ColumnPtr & col_hash_ptr, BucketsType num_buckets, ColumnVector * col_result) + { + auto col_hash = checkAndGetColumn>(col_hash_ptr.get()); if (!col_hash) throw Exception("Illegal type of the first argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - auto col_result = ColumnVector::create(); - typename ColumnVector::Container & vec_result = col_result->getData(); + + auto & vec_result = col_result->getData(); const auto & vec_hash = col_hash->getData(); size_t size = vec_hash.size(); vec_result.resize(size); for (size_t i = 0; i < size; ++i) - vec_result[i] = Impl::apply(static_cast(vec_hash[i]), static_cast(num_buckets)); - - out_col_result = std::move(col_result); + vec_result[i] = Impl::apply(static_cast(vec_hash[i]), num_buckets); } }; using FunctionYandexConsistentHash = FunctionConsistentHashImpl; -using FunctionJumpConsistentHas = FunctionConsistentHashImpl; +using FunctionJumpConsistentHash = FunctionConsistentHashImpl; +using FunctionSumburConsistentHash = FunctionConsistentHashImpl; } diff --git a/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml b/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml index 8c0dc7b28ab..2955adab873 100644 --- a/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml +++ b/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml @@ -29,7 +29,7 @@ ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/b', '{replica}') PARTITION BY toMonday(date) ORDER BY d - JumpConsistentHash(intHash64(d), 2) + jumpConsistentHash(intHash64(d), 2) diff --git a/dbms/tests/integration/test_cluster_copier/test.py b/dbms/tests/integration/test_cluster_copier/test.py index f3d317d5d1c..54b1ff87c50 100644 --- a/dbms/tests/integration/test_cluster_copier/test.py +++ b/dbms/tests/integration/test_cluster_copier/test.py @@ -129,8 +129,8 @@ class Task2: assert TSV(self.cluster.instances['s0_0_0'].query("SELECT count() FROM cluster(cluster0, default, a)")) == TSV("85\n") assert TSV(self.cluster.instances['s1_0_0'].query("SELECT count(), uniqExact(date) FROM cluster(cluster1, default, b)")) == TSV("85\t85\n") - assert TSV(self.cluster.instances['s1_0_0'].query("SELECT DISTINCT JumpConsistentHash(intHash64(d), 2) FROM b")) == TSV("0\n") - assert TSV(self.cluster.instances['s1_1_0'].query("SELECT DISTINCT JumpConsistentHash(intHash64(d), 2) FROM b")) == TSV("1\n") + assert TSV(self.cluster.instances['s1_0_0'].query("SELECT DISTINCT jumpConsistentHash(intHash64(d), 2) FROM b")) == TSV("0\n") + assert TSV(self.cluster.instances['s1_1_0'].query("SELECT DISTINCT jumpConsistentHash(intHash64(d), 2) FROM b")) == TSV("1\n") assert TSV(self.cluster.instances['s1_0_0'].query("SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'")) == TSV("1\n") assert TSV(self.cluster.instances['s1_1_0'].query("SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'")) == TSV("1\n") diff --git a/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.sql b/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.sql index 1a2303d3072..20d1892f192 100644 --- a/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.sql +++ b/dbms/tests/queries/0_stateless/00580_consistent_hashing_functions.sql @@ -1,4 +1,4 @@ -SELECT JumpConsistentHash(1, 1), JumpConsistentHash(42, 57), JumpConsistentHash(256, 1024), JumpConsistentHash(3735883980, 1), JumpConsistentHash(3735883980, 666), JumpConsistentHash(16045690984833335023, 255); -SELECT YandexConsistentHash(16045690984833335023, 1), YandexConsistentHash(16045690984833335023, 2), YandexConsistentHash(16045690984833335023, 3), YandexConsistentHash(16045690984833335023, 4), YandexConsistentHash(16045690984833335023, 173), YandexConsistentHash(16045690984833335023, 255); -SELECT JumpConsistentHash(intHash64(number), 787) FROM system.numbers LIMIT 1000000, 2; -SELECT YandexConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2; +SELECT jumpConsistentHash(1, 1), jumpConsistentHash(42, 57), jumpConsistentHash(256, 1024), jumpConsistentHash(3735883980, 1), jumpConsistentHash(3735883980, 666), jumpConsistentHash(16045690984833335023, 255); +SELECT yandexConsistentHash(16045690984833335023, 1), yandexConsistentHash(16045690984833335023, 2), yandexConsistentHash(16045690984833335023, 3), yandexConsistentHash(16045690984833335023, 4), yandexConsistentHash(16045690984833335023, 173), yandexConsistentHash(16045690984833335023, 255); +SELECT jumpConsistentHash(intHash64(number), 787) FROM system.numbers LIMIT 1000000, 2; +SELECT yandexConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2; diff --git a/libs/yandex-consistent-hashing/yandex/bitops.h b/libs/yandex-consistent-hashing/yandex/bitops.h index 0ddb7f8024b..697063ee77e 100644 --- a/libs/yandex-consistent-hashing/yandex/bitops.h +++ b/libs/yandex-consistent-hashing/yandex/bitops.h @@ -3,36 +3,33 @@ #include #include -// Assume little endian -inline uint16_t & LO_16(uint32_t & x) { return reinterpret_cast(&x)[0]; } -inline uint16_t & HI_16(uint32_t & x) { return reinterpret_cast(&x)[1]; } +inline uint16_t LO_16(uint32_t x) { return static_cast(x & 0x0000FFFF); } +inline uint16_t HI_16(uint32_t x) { return static_cast(x >> 16); } -inline uint32_t & LO_32(uint64_t & x) { return reinterpret_cast(&x)[0]; } -inline uint32_t & HI_32(uint64_t & x) { return reinterpret_cast(&x)[1]; } +inline uint32_t LO_32(uint64_t x) { return static_cast(x & 0x00000000FFFFFFFF); } +inline uint32_t HI_32(uint64_t x) { return static_cast(x >> 32); } +/// Clang also defines __GNUC__ #if defined(__GNUC__) inline unsigned GetValueBitCountImpl(unsigned int value) noexcept { - // Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. + // NOTE: __builtin_clz* have undefined result for zero. return std::numeric_limits::digits - __builtin_clz(value); } inline unsigned GetValueBitCountImpl(unsigned long value) noexcept { - // Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. return std::numeric_limits::digits - __builtin_clzl(value); } inline unsigned GetValueBitCountImpl(unsigned long long value) noexcept { - // Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. return std::numeric_limits::digits - __builtin_clzll(value); } #else - /// Stupid realization for non-GCC. Can use BSR from x86 instructions set. + /// Stupid realization for non GCC-like compilers. Can use BSR from x86 instructions set. template inline unsigned GetValueBitCountImpl(T value) noexcept { - // Y_ASSERT(value); // because __builtin_clz* have undefined result for zero. - unsigned result = 1; // result == 0 - impossible value, see Y_ASSERT(). + unsigned result = 1; // result == 0 - impossible value, since value cannot be zero value >>= 1; while (value) { value >>= 1; @@ -46,10 +43,10 @@ inline uint32_t & HI_32(uint64_t & x) { return reinterpret_cast(&x)[ /** * Returns the number of leading 0-bits in `value`, starting at the most significant bit position. + * NOTE: value cannot be zero */ template static inline unsigned GetValueBitCount(T value) noexcept { - // Y_ASSERT(value > 0); using TCvt = std::make_unsigned_t>; return GetValueBitCountImpl(static_cast(value)); } diff --git a/libs/yandex-consistent-hashing/yandex/consistent_hashing.h b/libs/yandex-consistent-hashing/yandex/consistent_hashing.h index 0ac2b01fcfb..fba229c2bd4 100644 --- a/libs/yandex-consistent-hashing/yandex/consistent_hashing.h +++ b/libs/yandex-consistent-hashing/yandex/consistent_hashing.h @@ -4,6 +4,8 @@ #include /* + * Author: Konstantin Oblakov + * * Maps random ui64 x (in fact hash of some string) to n baskets/shards. * Output value is id of a basket. 0 <= ConsistentHashing(x, n) < n. * Probability of all baskets must be equal. Also, it should be consistent diff --git a/libs/yandex-consistent-hashing/yandex/popcount.h b/libs/yandex-consistent-hashing/yandex/popcount.h index b49b2fb450a..fdb56173e44 100644 --- a/libs/yandex-consistent-hashing/yandex/popcount.h +++ b/libs/yandex-consistent-hashing/yandex/popcount.h @@ -11,12 +11,6 @@ using std::size_t; #include #endif -#ifdef __SSE2__ -constexpr bool HavePOPCNTInstr = true; -#else -constexpr bool HavePOPCNTInstr = false; -#pragma GCC warning "SSE2 is not detected, PopCount function will be too slow" -#endif static inline uint32_t PopCountImpl(uint8_t n) { extern uint8_t const* PopCountLUT8; @@ -35,21 +29,9 @@ static inline uint32_t PopCountImpl(uint16_t n) { static inline uint32_t PopCountImpl(uint32_t n) { #if defined(_MSC_VER) return __popcnt(n); +#elif defined(__GNUC__) // it is true for Clang also + return __builtin_popcount(n); #else -#if defined(__x86_64__) - - if (HavePOPCNTInstr) { - uint32_t r; - - __asm__("popcnt %1, %0;" - : "=r"(r) - : "r"(n) - :); - - return r; - } -#endif - return PopCountImpl((uint16_t)LO_16(n)) + PopCountImpl((uint16_t)HI_16(n)); #endif } @@ -57,21 +39,9 @@ static inline uint32_t PopCountImpl(uint32_t n) { static inline uint32_t PopCountImpl(uint64_t n) { #if defined(_MSC_VER) && !defined(_i386_) return __popcnt64(n); +#elif defined(__GNUC__) // it is true for Clang also + return __builtin_popcountll(n); #else -#if defined(__x86_64__) - - if (HavePOPCNTInstr) { - uint64_t r; - - __asm__("popcnt %1, %0;" - : "=r"(r) - : "r"(n) - :); - - return r; - } -#endif - return PopCountImpl((uint32_t)LO_32(n)) + PopCountImpl((uint32_t)HI_32(n)); #endif } From 4e27c268a17bdbd713d28c2590f159aeff8ee2e4 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Mon, 26 Feb 2018 18:27:36 +0300 Subject: [PATCH 124/209] Add clickhouse-copier description to the docs. [#CLICKHOUSE-3606] --- dbms/src/Server/ClusterCopier.h | 128 +------------- .../task_month_to_week_description.xml | 6 +- docs/en/utils/clickhouse-copier.md | 156 ++++++++++++++++++ docs/en/utils/clickhouse-local.md | 0 docs/en/utils/index.md | 6 + docs/mkdocs_en.yml | 5 + docs/mkdocs_ru.yml | 5 + docs/ru/utils/clickhouse-copier.md | 156 ++++++++++++++++++ docs/ru/utils/clickhouse-local.md | 0 docs/ru/utils/index.md | 6 + 10 files changed, 343 insertions(+), 125 deletions(-) create mode 100644 docs/en/utils/clickhouse-copier.md create mode 100644 docs/en/utils/clickhouse-local.md create mode 100644 docs/en/utils/index.md create mode 100644 docs/ru/utils/clickhouse-copier.md create mode 100644 docs/ru/utils/clickhouse-local.md create mode 100644 docs/ru/utils/index.md diff --git a/dbms/src/Server/ClusterCopier.h b/dbms/src/Server/ClusterCopier.h index 1a76a0c8c11..b24b08be0e7 100644 --- a/dbms/src/Server/ClusterCopier.h +++ b/dbms/src/Server/ClusterCopier.h @@ -1,132 +1,12 @@ #pragma once #include -/* = clickhouse-cluster-copier util = +/* clickhouse cluster copier util * Copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed fault-tolerant manner. * - * Configuration of copying tasks is set in special ZooKeeper node (called the description node). - * A ZooKeeper path to the description node is specified via --task-path parameter. - * So, node /task/path/description should contain special XML content describing copying tasks. + * See overview in the docs: docs/en/utils/clickhouse-copier.md * - * Simultaneously many clickhouse-cluster-copier processes located on any servers could execute the same task. - * ZooKeeper node /task/path/ is used by the processes to coordinate their work. - * You must not add additional child nodes to /task/path/. - * - * Currently you are responsible for launching cluster-copier processes. - * You can launch as many processes as you want, whenever and wherever you want. - * Each process try to select nearest available shard of source cluster and copy some part of data (partition) from it to the whole - * destination cluster with resharding. - * Therefore it makes sense to launch cluster-copier processes on the source cluster nodes to reduce the network usage. - * - * Since the workers coordinate their work via ZooKeeper, in addition to --task-path you have to specify ZooKeeper - * configuration via --config-file parameter. Example of zookeeper.xml: - - - - - 127.0.0.1 - 2181 - - - - - * When you run clickhouse-cluster-copier --config-file --task-path - * the process connects to ZooKeeper, reads tasks config from /task/path/description and executes them. - * - * - * = Format of task config = - - - - - - - false - - 127.0.0.1 - 9000 - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - <-- Source cluster name (from section) and tables in it that should be copied --> - source_cluster - test - hits - - <-- Destination cluster name and tables in which the data should be inserted --> - destination_cluster - test - hits2 - - - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/hits2/{shard}/hits2', '{replica}', EventDate, (CounterID, EventDate), 8192) - - - intHash32(UserID) - - - CounterID != 0 - - - - 201712 - 201801 - ... - - - - - ... - - ... - - - - - * = Implementation details = + * Implementation details: * * cluster-copier workers pull each partition of each shard of the source cluster and push it to the destination cluster through * Distributed table (to preform data resharding). So, worker job is a partition of a source shard. @@ -144,7 +24,7 @@ * /server_fqdn#PID_timestamp - cluster-copier worker ID * ... * /tables - directory with table tasks - * /table_hits - directory of table_hits task + * /cluster.db.table - directory of table_hits task * /partition1 - directory for partition1 * /shards - directory for source cluster shards * /1 - worker job for the first shard of partition1 of table test.hits diff --git a/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml b/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml index 2955adab873..fe2d4a71596 100644 --- a/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml +++ b/dbms/tests/integration/test_cluster_copier/task_month_to_week_description.xml @@ -26,7 +26,11 @@ --> - ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/b', '{replica}') PARTITION BY toMonday(date) ORDER BY d + ENGINE= + ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/b', '{replica}') + PARTITION BY toMonday(date) + ORDER BY d + jumpConsistentHash(intHash64(d), 2) diff --git a/docs/en/utils/clickhouse-copier.md b/docs/en/utils/clickhouse-copier.md new file mode 100644 index 00000000000..25d22f19222 --- /dev/null +++ b/docs/en/utils/clickhouse-copier.md @@ -0,0 +1,156 @@ +# clickhouse-copier util + +The util copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed and fault-tolerant manner. + +Configuration of copying tasks is set in special ZooKeeper node (called the `/description` node). +A ZooKeeper path to the description node is specified via `--task-path ` parameter. +So, node `/task/path/description` should contain special XML content describing copying tasks. + +Simultaneously many `clickhouse-copier` processes located on any servers could execute the same task. +ZooKeeper node `/task/path/` is used by the processes to coordinate their work. +You must not add additional child nodes to `/task/path/`. + +Currently you are responsible for manual launching of all `cluster-copier` processes. +You can launch as many processes as you want, whenever and wherever you want. +Each process try to select the nearest available shard of source cluster and copy some part of data (partition) from it to the whole +destination cluster (with resharding). +Therefore it makes sense to launch cluster-copier processes on the source cluster nodes to reduce the network usage. + +Since the workers coordinate their work via ZooKeeper, in addition to `--task-path ` you have to specify ZooKeeper +cluster configuration via `--config-file ` parameter. Example of `zookeeper.xml`: + +```xml + + + + 127.0.0.1 + 2181 + + + +``` + +When you run `clickhouse-copier --config-file --task-path ` the process connects to ZooKeeper cluster, reads tasks config from `/task/path/description` and executes them. + +## Format of task config + +Here is an example of `/task/path/description` content: + +```xml + + + + + + false + + 127.0.0.1 + 9000 + + + ... + + + + ... + + + + + 2 + + + + 1 + + + + + 0 + + + + + 3 + + 1 + + + + + + + + source_cluster + test + hits + + + destination_cluster + test + hits2 + + + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') + PARTITION BY toMonday(date) + ORDER BY (CounterID, EventDate) + + + + jumpConsistentHash(intHash64(UserID), 2) + + + CounterID != 0 + + + + '2018-02-26' + '2018-03-05' + ... + + + + + + ... + + ... + + +``` + +cluster-copier processes watch for `/task/path/description` node update. +So, if you modify the config settings or `max_workers` params, they will be updated. + +## Example + +```bash +clickhouse-copier copier --daemon --config /path/to/copier/zookeeper.xml --task-path /clickhouse-copier/cluster1_tables_hits --base-dir /path/to/copier_logs +``` + +`--base-dir /path/to/copier_logs` specifies where auxilary and log files of the copier process will be saved. +In this case it will create `/path/to/copier_logs/clickhouse-copier_YYYYMMHHSS_/` dir with log and status-files. +If it is not specified it will use current dir (`/clickhouse-copier_YYYYMMHHSS_/` if it is run as a `--daemon`). diff --git a/docs/en/utils/clickhouse-local.md b/docs/en/utils/clickhouse-local.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/docs/en/utils/index.md b/docs/en/utils/index.md new file mode 100644 index 00000000000..7a8c5ee5138 --- /dev/null +++ b/docs/en/utils/index.md @@ -0,0 +1,6 @@ +# ClickHouse utilites + +There are several ClickHouse utilites that are separate executable files: + +* `clickhouse-local` allows to execute SQL queries on a local data like `awk` +* `clickhouse-copier` copies (and reshards) immutable data from one cluster to another in a fault-tolerant manner. diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index ec933308103..c39e8a60242 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -235,6 +235,11 @@ pages: - 'Settings': 'operations/settings/settings.md' - 'Settings profiles': 'operations/settings/settings_profiles.md' +- 'Utilites': + - 'Utilites': 'utils/index.md' + - 'clickhouse-copier': 'utils/clickhouse-copier.md' + #- 'clickhouse-local' : 'utils/clickhouse-local.md' + - 'ClickHouse Development': # - 'ClickHouse Development': 'development/index.md' - 'Overview of ClickHouse architecture': 'development/architecture.md' diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml index ad26a510ad9..05d7e9d8eb8 100644 --- a/docs/mkdocs_ru.yml +++ b/docs/mkdocs_ru.yml @@ -238,6 +238,11 @@ pages: - 'Настройки': 'operations/settings/settings.md' - 'Профили настроек': 'operations/settings/settings_profiles.md' +- 'Утилиты': + - 'Утилиты': 'utils/index.md' + - 'clickhouse-copier': 'utils/clickhouse-copier.md' + #- 'clickhouse-local' : 'utils/clickhouse-local.md' + - 'ClickHouse Development': # - 'ClickHouse Development': 'development/index.md' - 'Overview of ClickHouse architecture': 'development/architecture.md' diff --git a/docs/ru/utils/clickhouse-copier.md b/docs/ru/utils/clickhouse-copier.md new file mode 100644 index 00000000000..25d22f19222 --- /dev/null +++ b/docs/ru/utils/clickhouse-copier.md @@ -0,0 +1,156 @@ +# clickhouse-copier util + +The util copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed and fault-tolerant manner. + +Configuration of copying tasks is set in special ZooKeeper node (called the `/description` node). +A ZooKeeper path to the description node is specified via `--task-path ` parameter. +So, node `/task/path/description` should contain special XML content describing copying tasks. + +Simultaneously many `clickhouse-copier` processes located on any servers could execute the same task. +ZooKeeper node `/task/path/` is used by the processes to coordinate their work. +You must not add additional child nodes to `/task/path/`. + +Currently you are responsible for manual launching of all `cluster-copier` processes. +You can launch as many processes as you want, whenever and wherever you want. +Each process try to select the nearest available shard of source cluster and copy some part of data (partition) from it to the whole +destination cluster (with resharding). +Therefore it makes sense to launch cluster-copier processes on the source cluster nodes to reduce the network usage. + +Since the workers coordinate their work via ZooKeeper, in addition to `--task-path ` you have to specify ZooKeeper +cluster configuration via `--config-file ` parameter. Example of `zookeeper.xml`: + +```xml + + + + 127.0.0.1 + 2181 + + + +``` + +When you run `clickhouse-copier --config-file --task-path ` the process connects to ZooKeeper cluster, reads tasks config from `/task/path/description` and executes them. + +## Format of task config + +Here is an example of `/task/path/description` content: + +```xml + + + + + + false + + 127.0.0.1 + 9000 + + + ... + + + + ... + + + + + 2 + + + + 1 + + + + + 0 + + + + + 3 + + 1 + + + + + + + + source_cluster + test + hits + + + destination_cluster + test + hits2 + + + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') + PARTITION BY toMonday(date) + ORDER BY (CounterID, EventDate) + + + + jumpConsistentHash(intHash64(UserID), 2) + + + CounterID != 0 + + + + '2018-02-26' + '2018-03-05' + ... + + + + + + ... + + ... + + +``` + +cluster-copier processes watch for `/task/path/description` node update. +So, if you modify the config settings or `max_workers` params, they will be updated. + +## Example + +```bash +clickhouse-copier copier --daemon --config /path/to/copier/zookeeper.xml --task-path /clickhouse-copier/cluster1_tables_hits --base-dir /path/to/copier_logs +``` + +`--base-dir /path/to/copier_logs` specifies where auxilary and log files of the copier process will be saved. +In this case it will create `/path/to/copier_logs/clickhouse-copier_YYYYMMHHSS_/` dir with log and status-files. +If it is not specified it will use current dir (`/clickhouse-copier_YYYYMMHHSS_/` if it is run as a `--daemon`). diff --git a/docs/ru/utils/clickhouse-local.md b/docs/ru/utils/clickhouse-local.md new file mode 100644 index 00000000000..e69de29bb2d diff --git a/docs/ru/utils/index.md b/docs/ru/utils/index.md new file mode 100644 index 00000000000..760fc0100c3 --- /dev/null +++ b/docs/ru/utils/index.md @@ -0,0 +1,6 @@ +# Утилиты ClickHouse + +Существует несколько утилит ClickHouse, которые представляют из себя отдельные исполняемые файлы: + +* `clickhouse-local` позволяет выполнять SQL-запросы над данными подобно тому, как это делает `awk` +* `clickhouse-copier` копирует (и перешардирует) неизменяемые данные с одного кластера на другой отказоустойчивым способом. From 1362e0be6832365152963a4382741ed3a477e4b7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 19:50:07 +0300 Subject: [PATCH 125/209] Doc: reordered sections; fixed errors in titles. --- docs/mkdocs_en.yml | 142 ++++++++++++++++++++++----------------------- 1 file changed, 70 insertions(+), 72 deletions(-) diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index ec933308103..5966c952a03 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -22,8 +22,6 @@ theme: primary: 'white' accent: 'white' font: false -# text: Roboto -# code: Roboto Mono logo: 'images/logo.svg' favicon: 'assets/images/favicon.ico' include_search_page: false @@ -53,12 +51,12 @@ pages: - 'Getting started': - 'Deploying and running': 'getting_started/index.md' - 'Testing data': - - 'AMPLab Big Data Benchmark': 'getting_started/example_datasets/amplab_benchmark.md' - - 'Terabyte of click logs from Criteo': 'getting_started/example_datasets/criteo.md' - - 'Data about New York taxis': 'getting_started/example_datasets/nyc_taxi.md' - 'OnTime': 'getting_started/example_datasets/ontime.md' - - 'Star scheme': 'getting_started/example_datasets/star_schema.md' + - 'New York Taxi data': 'getting_started/example_datasets/nyc_taxi.md' + - 'AMPLab Big Data Benchmark': 'getting_started/example_datasets/amplab_benchmark.md' - 'WikiStat': 'getting_started/example_datasets/wikistat.md' + - 'Terabyte click logs from Criteo': 'getting_started/example_datasets/criteo.md' + - 'Star Schema Benchmark': 'getting_started/example_datasets/star_schema.md' - 'Interfaces': - 'Interfaces': 'interfaces/index.md' @@ -77,95 +75,95 @@ pages: - 'Table engines': - 'Introduction': 'table_engines/index.md' - - 'AggregatingMergeTree': 'table_engines/aggregatingmergetree.md' - - 'Buffer': 'table_engines/buffer.md' - - 'CollapsingMergeTree': 'table_engines/collapsingmergetree.md' - - 'Custom partitioning key': 'table_engines/custom_partitioning_key.md' - - 'Distributed': 'table_engines/distributed.md' - - 'External data for query processing': 'table_engines/external_data.md' - - 'File(InputFormat)': 'table_engines/file.md' - - 'GraphiteMergeTree': 'table_engines/graphitemergetree.md' - - 'Join': 'table_engines/join.md' - - 'Kafka': 'table_engines/kafka.md' - - 'Log': 'table_engines/log.md' - - 'MaterializedView': 'table_engines/materializedview.md' - - 'Memory': 'table_engines/memory.md' - - 'Merge': 'table_engines/merge.md' - - 'MergeTree': 'table_engines/mergetree.md' - - 'Null': 'table_engines/null.md' - - 'ReplacingMergeTree': 'table_engines/replacingmergetree.md' - - 'Data replication': 'table_engines/replication.md' - - 'Set': 'table_engines/set.md' - - 'SummingMergeTree': 'table_engines/summingmergetree.md' - 'TinyLog': 'table_engines/tinylog.md' + - 'Log': 'table_engines/log.md' + - 'Memory': 'table_engines/memory.md' + - 'MergeTree': 'table_engines/mergetree.md' + - 'Custom partitioning key': 'table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'table_engines/collapsingmergetree.md' + - 'GraphiteMergeTree': 'table_engines/graphitemergetree.md' + - 'Data replication': 'table_engines/replication.md' + - 'Distributed': 'table_engines/distributed.md' + - 'Merge': 'table_engines/merge.md' + - 'Buffer': 'table_engines/buffer.md' + - 'File': 'table_engines/file.md' + - 'Null': 'table_engines/null.md' + - 'Set': 'table_engines/set.md' + - 'Join': 'table_engines/join.md' - 'View': 'table_engines/view.md' + - 'MaterializedView': 'table_engines/materializedview.md' + - 'Kafka': 'table_engines/kafka.md' + - 'External data for query processing': 'table_engines/external_data.md' - 'System tables': - 'Introduction': 'system_tables/index.md' - - 'system.asynchronous_metrics': 'system_tables/system.asynchronous_metrics.md' - - 'system.clusters': 'system_tables/system.clusters.md' - - 'system.columns': 'system_tables/system.columns.md' - - 'system.databases': 'system_tables/system.databases.md' - - 'system.dictionaries': 'system_tables/system.dictionaries.md' - - 'system.events': 'system_tables/system.events.md' - - 'system.functions': 'system_tables/system.functions.md' - - 'system.merges': 'system_tables/system.merges.md' - - 'system.metrics': 'system_tables/system.metrics.md' + - 'system.one': 'system_tables/system.one.md' - 'system.numbers': 'system_tables/system.numbers.md' - 'system.numbers_mt': 'system_tables/system.numbers_mt.md' - - 'system.one': 'system_tables/system.one.md' + - 'system.databases': 'system_tables/system.databases.md' + - 'system.tables': 'system_tables/system.tables.md' + - 'system.columns': 'system_tables/system.columns.md' - 'system.parts': 'system_tables/system.parts.md' - 'system.processes': 'system_tables/system.processes.md' + - 'system.merges': 'system_tables/system.merges.md' + - 'system.events': 'system_tables/system.events.md' + - 'system.metrics': 'system_tables/system.metrics.md' + - 'system.asynchronous_metrics': 'system_tables/system.asynchronous_metrics.md' - 'system.replicas': 'system_tables/system.replicas.md' + - 'system.dictionaries': 'system_tables/system.dictionaries.md' + - 'system.clusters': 'system_tables/system.clusters.md' + - 'system.functions': 'system_tables/system.functions.md' - 'system.settings': 'system_tables/system.settings.md' - - 'system.tables': 'system_tables/system.tables.md' - 'system.zookeeper': 'system_tables/system.zookeeper.md' - 'Table functions': - 'Introduction': 'table_functions/index.md' - - 'merge': 'table_functions/merge.md' - 'remote': 'table_functions/remote.md' + - 'merge': 'table_functions/merge.md' - 'Formats': - 'Introduction': 'formats/index.md' - - 'CapnProto': 'formats/capnproto.md' + - 'TabSeparated': 'formats/tabseparated.md' + - 'TabSeparatedRaw': 'formats/tabseparatedraw.md' + - 'TabSeparatedWithNames': 'formats/tabseparatedwithnames.md' + - 'TabSeparatedWithNamesAndTypes': 'formats/tabseparatedwithnamesandtypes.md' - 'CSV': 'formats/csv.md' - 'CSVWithNames': 'formats/csvwithnames.md' + - 'Values': 'formats/values.md' + - 'Vertical': 'formats/vertical.md' - 'JSON': 'formats/json.md' - 'JSONCompact': 'formats/jsoncompact.md' - 'JSONEachRow': 'formats/jsoneachrow.md' - - 'Native': 'formats/native.md' - - 'Null': 'formats/null.md' + - 'TSKV': 'formats/tskv.md' - 'Pretty': 'formats/pretty.md' - 'PrettyCompact': 'formats/prettycompact.md' - 'PrettyCompactMonoBlock': 'formats/prettycompactmonoblock.md' - 'PrettyNoEscapes': 'formats/prettynoescapes.md' - 'PrettySpace': 'formats/prettyspace.md' - 'RowBinary': 'formats/rowbinary.md' - - 'TabSeparated': 'formats/tabseparated.md' - - 'TabSeparatedRaw': 'formats/tabseparatedraw.md' - - 'TabSeparatedWithNames': 'formats/tabseparatedwithnames.md' - - 'TabSeparatedWithNamesAndTypes': 'formats/tabseparatedwithnamesandtypes.md' - - 'TSKV': 'formats/tskv.md' - - 'Values': 'formats/values.md' - - 'Vertical': 'formats/vertical.md' + - 'Native': 'formats/native.md' + - 'Null': 'formats/null.md' - 'XML': 'formats/xml.md' + - 'CapnProto': 'formats/capnproto.md' - 'Data types': - 'Introduction': 'data_types/index.md' - - 'Array(T)': 'data_types/array.md' + - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md' + - 'Float32, Float64': 'data_types/float.md' - 'Boolean values': 'data_types/boolean.md' + - 'String': 'data_types/string.md' + - 'FixedString(N)': 'data_types/fixedstring.md' - 'Date': 'data_types/date.md' - 'DateTime': 'data_types/datetime.md' - 'Enum': 'data_types/enum.md' - - 'FixedString(N)': 'data_types/fixedstring.md' - - 'Float32, Float64': 'data_types/float.md' - - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md' - - 'String': 'data_types/string.md' + - 'Array(T)': 'data_types/array.md' + - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md' - 'Tuple(T1, T2, ...)': 'data_types/tuple.md' - 'Nested data structures': # - 'Вложенные структуры данных': 'data_types/nested_data_structures/index.md' - - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md' - 'Nested(Name1 Type1, Name2 Type2, ...)': 'data_types/nested_data_structures/nested.md' - 'Special data types': # - 'Служебные типы данных': 'data_types/special_data_types/index.md' @@ -177,31 +175,31 @@ pages: - 'Functions': - 'Introduction': 'functions/index.md' - 'Arithmetic functions': 'functions/arithmetic_functions.md' - - 'Functions for working with arrays': 'functions/array_functions.md' - - 'arrayJoin function': 'functions/array_join.md' - - 'Bit functions': 'functions/bit_functions.md' - 'Comparison functions': 'functions/comparison_functions.md' - - 'Conditional functions': 'functions/conditional_functions.md' + - 'Logical functions': 'functions/logical_functions.md' + - 'Type conversion functions': 'functions/type_conversion_functions.md' - 'Functions for working with dates and times': 'functions/date_time_functions.md' - - 'Encoding functions': 'functions/encoding_functions.md' - - 'Functions for working with external dictionaries': 'functions/ext_dict_functions.md' + - 'Functions for working with strings': 'functions/string_functions.md' + - 'Functions for searching strings': 'functions/string_search_functions.md' + - 'Functions for searching and replacing in strings': 'functions/string_replace_functions.md' + - 'Conditional functions': 'functions/conditional_functions.md' + - 'Mathematical functions': 'functions/math_functions.md' + - 'Rounding functions': 'functions/rounding_functions.md' + - 'Functions for working with arrays': 'functions/array_functions.md' + - 'Functions for splitting and merging strings and arrays': 'functions/splitting_merging_functions.md' + - 'Bit functions': 'functions/bit_functions.md' - 'Hash functions': 'functions/hash_functions.md' - - 'Higher-order functions': 'functions/higher_order_functions.md' - - 'Functions for implementing the IN operator': 'functions/in_functions.md' + - 'Functions for generating pseudo-random numbers': 'functions/random_functions.md' + - 'Encoding functions': 'functions/encoding_functions.md' + - 'Functions for working with URLs': 'functions/url_functions.md' - 'Functions for working with IP addresses': 'functions/ip_address_functions.md' - 'Functions for working with JSON.': 'functions/json_functions.md' - - 'Logical functions': 'functions/logical_functions.md' - - 'Mathematical functions': 'functions/math_functions.md' + - 'Higher-order functions': 'functions/higher_order_functions.md' - 'Other functions': 'functions/other_functions.md' - - 'Functions for generating pseudo-random numbers': 'functions/random_functions.md' - - 'Rounding functions': 'functions/rounding_functions.md' - - 'Functions for splitting and merging strings and arrays': 'functions/splitting_merging_functions.md' - - 'Functions for working with strings': 'functions/string_functions.md' - - 'Functions for searching and replacing in strings': 'functions/string_replace_functions.md' - - 'Functions for searching strings': 'functions/string_search_functions.md' - - 'Type conversion functions': 'functions/type_conversion_functions.md' - - 'Functions for working with URLs': 'functions/url_functions.md' + - 'Functions for working with external dictionaries': 'functions/ext_dict_functions.md' - 'Functions for working with Yandex.Metrica dictionaries': 'functions/ym_dict_functions.md' + - 'Functions for implementing the IN operator': 'functions/in_functions.md' + - 'arrayJoin function': 'functions/array_join.md' - 'Aggregate functions': - 'Introduction': 'agg_functions/index.md' From 566d66e858f91c5f907bf9af02a08ddd6c72a7f8 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 26 Feb 2018 19:57:14 +0300 Subject: [PATCH 126/209] Dictionary Library source: Persistent per-dictionary custom data store (#1917) * LibraryDictionarySource: Support numeric types, initial support for strings * Dictionary Library source with string types * clang-format * Dictionary Library source: Persistent per-dictionary custom data store * Fix unused * Allocate -> New * Update examples * fix --- .../Dictionaries/LibraryDictionarySource.cpp | 42 +++--- .../Dictionaries/LibraryDictionarySource.h | 5 +- .../dictionary_library/dictionary_library.cpp | 129 +++++++++++------- .../dictionary_library/dictionary_library_c.c | 34 +++-- 4 files changed, 130 insertions(+), 80 deletions(-) diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.cpp b/dbms/src/Dictionaries/LibraryDictionarySource.cpp index 457996696cc..04ee831bb1b 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.cpp +++ b/dbms/src/Dictionaries/LibraryDictionarySource.cpp @@ -61,7 +61,7 @@ namespace if (bracket_pos != std::string::npos && bracket_pos > 0) key_name = key.substr(0, bracket_pos); strings.emplace_back(key_name); - strings.emplace_back(config.getString(config_root + '.' + key)); + strings.emplace_back(config.getString(config_root + "." + key)); } return CStringsHolder(strings); } @@ -118,13 +118,14 @@ LibraryDictionarySource::LibraryDictionarySource(const DictionaryStructure & dic , context(context) { if (!Poco::File(path).exists()) - { throw Exception("LibraryDictionarySource: Can't load lib " + toString() + ": " + Poco::File(path).path() + " - File doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); - } description.init(sample_block); library = std::make_shared(path); settings = std::make_shared(getLibSettings(config, config_prefix + lib_config_settings)); + auto fptr = library->tryGet("ClickHouseDictionary_v2_libNew"); + if (fptr) + lib_data = fptr(); } LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource & other) @@ -137,9 +138,17 @@ LibraryDictionarySource::LibraryDictionarySource(const LibraryDictionarySource & , library{other.library} , description{other.description} , settings{other.settings} + , lib_data{other.lib_data} { } +LibraryDictionarySource::~LibraryDictionarySource() +{ + auto fptr = library->tryGet("ClickHouseDictionary_v2_libDelete"); + if (fptr) + fptr(lib_data); +} + BlockInputStreamPtr LibraryDictionarySource::loadAll() { LOG_TRACE(log, "loadAll " + toString()); @@ -155,13 +164,13 @@ BlockInputStreamPtr LibraryDictionarySource::loadAll() } void * data_ptr = nullptr; - /// Get function pointer before dataAllocate call because library->get may throw. + /// Get function pointer before dataNew call because library->get may throw. auto fptr = library->getstrings), decltype(&columns))>("ClickHouseDictionary_v2_loadAll"); - data_ptr = library->get("ClickHouseDictionary_v2_dataAllocate")(); + data_ptr = library->get("ClickHouseDictionary_v2_dataNew")(lib_data); auto data = fptr(data_ptr, &settings->strings, &columns); auto block = dataToBlock(description.sample_block, data); - library->get("ClickHouseDictionary_v2_dataDelete")(data_ptr); + library->get("ClickHouseDictionary_v2_dataDelete")(lib_data, data_ptr); return std::make_shared(block); } @@ -181,13 +190,13 @@ BlockInputStreamPtr LibraryDictionarySource::loadIds(const std::vector & } void * data_ptr = nullptr; - /// Get function pointer before dataAllocate call because library->get may throw. + /// Get function pointer before dataNew call because library->get may throw. auto fptr = library->getstrings), decltype(&columns_pass), decltype(&ids_data))>( "ClickHouseDictionary_v2_loadIds"); - data_ptr = library->get("ClickHouseDictionary_v2_dataAllocate")(); + data_ptr = library->get("ClickHouseDictionary_v2_dataNew")(lib_data); auto data = fptr(data_ptr, &settings->strings, &columns_pass, &ids_data); auto block = dataToBlock(description.sample_block, data); - library->get("ClickHouseDictionary_v2_dataDelete")(data_ptr); + library->get("ClickHouseDictionary_v2_dataDelete")(lib_data, data_ptr); return std::make_shared(block); } @@ -208,30 +217,31 @@ BlockInputStreamPtr LibraryDictionarySource::loadKeys(const Columns & key_column ext::bit_cast(requested_rows.data()), requested_rows.size()}; void * data_ptr = nullptr; - /// Get function pointer before dataAllocate call because library->get may throw. + /// Get function pointer before dataNew call because library->get may throw. auto fptr = library->getstrings), decltype(&columns_pass), decltype(&requested_rows_c))>( "ClickHouseDictionary_v2_loadKeys"); - data_ptr = library->get("ClickHouseDictionary_v2_dataAllocate")(); + data_ptr = library->get("ClickHouseDictionary_v2_dataNew")(lib_data); auto data = fptr(data_ptr, &settings->strings, &columns_pass, &requested_rows_c); auto block = dataToBlock(description.sample_block, data); - library->get("ClickHouseDictionary_v2_dataDelete")(data_ptr); + library->get("ClickHouseDictionary_v2_dataDelete")(lib_data, data_ptr); return std::make_shared(block); } bool LibraryDictionarySource::isModified() const { - auto fptr = library->tryGetstrings))>("ClickHouseDictionary_v2_isModified"); + auto fptr = library->tryGetstrings))>("ClickHouseDictionary_v2_isModified"); if (fptr) - return fptr(&settings->strings); + return fptr(lib_data, &settings->strings); return true; } bool LibraryDictionarySource::supportsSelectiveLoad() const { - auto fptr = library->tryGetstrings))>("ClickHouseDictionary_v2_supportsSelectiveLoad"); + auto fptr + = library->tryGetstrings))>("ClickHouseDictionary_v2_supportsSelectiveLoad"); if (fptr) - return fptr(&settings->strings); + return fptr(lib_data, &settings->strings); return true; } diff --git a/dbms/src/Dictionaries/LibraryDictionarySource.h b/dbms/src/Dictionaries/LibraryDictionarySource.h index 2ed25ad3430..93742cdaef5 100644 --- a/dbms/src/Dictionaries/LibraryDictionarySource.h +++ b/dbms/src/Dictionaries/LibraryDictionarySource.h @@ -24,7 +24,7 @@ class CStringsHolder; /// Allows loading dictionaries from dynamic libraries (.so) /// Experimental version -/// Now supports only uint64 types +/// Example: dbms/tests/external_dictionaries/dictionary_library/dictionary_library.cpp class LibraryDictionarySource final : public IDictionarySource { public: @@ -36,6 +36,8 @@ public: LibraryDictionarySource(const LibraryDictionarySource & other); + ~LibraryDictionarySource(); + BlockInputStreamPtr loadAll() override; BlockInputStreamPtr loadIds(const std::vector & ids) override; @@ -63,5 +65,6 @@ private: SharedLibraryPtr library; ExternalResultDescription description; std::shared_ptr settings; + void * lib_data = nullptr; }; } diff --git a/dbms/tests/external_dictionaries/dictionary_library/dictionary_library.cpp b/dbms/tests/external_dictionaries/dictionary_library/dictionary_library.cpp index 3c759ba336e..7bb6e2eb726 100644 --- a/dbms/tests/external_dictionaries/dictionary_library/dictionary_library.cpp +++ b/dbms/tests/external_dictionaries/dictionary_library/dictionary_library.cpp @@ -1,5 +1,7 @@ /// c++ sample dictionary library +/// proller: TODO: describe + #include #include #include @@ -10,13 +12,46 @@ //#define DUMPS(VAR) #VAR " = " << VAR //#define DUMP(VAR) std::cerr << __FILE__ << ":" << __LINE__ << " " << DUMPS(VAR) << "\n"; + +struct LibHolder +{ + //Some your data, maybe service connection +}; + struct DataHolder { - std::vector> vector; - std::unique_ptr columnsHolder; - ClickHouseLibrary::ColumnsUInt64 columns; + std::vector> dataHolder; // Actual data storage + std::vector> fieldHolder; // Pointers and sizes of data + std::unique_ptr rowHolder; + ClickHouseLibrary::Table ctable; // Result data prepared for transfer via c-style interface + LibHolder * lib = nullptr; }; + +void MakeColumnsFromVector(DataHolder * ptr) +{ + for (const auto & row : ptr->dataHolder) + { + std::vector fields; + for (const auto & field : row) + fields.push_back({&field, sizeof(field)}); + + ptr->fieldHolder.push_back(fields); + } + + const auto rows_num = ptr->fieldHolder.size(); + ptr->rowHolder = std::make_unique(rows_num); + size_t i = 0; + for (auto & row : ptr->fieldHolder) + { + ptr->rowHolder[i].size = row.size(); + ptr->rowHolder[i].data = row.data(); + ++i; + } + ptr->ctable.size = rows_num; + ptr->ctable.data = ptr->rowHolder.get(); +} + extern "C" { void * ClickHouseDictionary_v2_loadIds(void * data_ptr, @@ -29,6 +64,9 @@ void * ClickHouseDictionary_v2_loadIds(void * data_ptr, if (ids) std::cerr << "loadIds lib call ptr=" << data_ptr << " => " << ptr << " size=" << ids->size << "\n"; + if (!ptr) + return nullptr; + if (settings) { std::cerr << "settings passed: " << settings->size << "\n"; @@ -42,42 +80,27 @@ void * ClickHouseDictionary_v2_loadIds(void * data_ptr, for (size_t i = 0; i < columns->size; ++i) std::cerr << "column " << i << " :" << columns->data[i] << "\n"; } - if (ptr) - { - if (ids) - { - std::cerr << "ids passed: " << ids->size << "\n"; - for (size_t i = 0; i < ids->size; ++i) - { - std::cerr << "id " << i << " :" << ids->data[i] << " replying.\n"; - ptr->vector.emplace_back(std::vector{ids->data[i], ids->data[i] + 1, (1 + ids->data[i]) * 10, 65}); - } - } - ptr->columnsHolder = std::make_unique(ptr->vector.size()); - size_t i = 0; - for (auto & col : ptr->vector) + if (ids) + { + std::cerr << "ids passed: " << ids->size << "\n"; + for (size_t i = 0; i < ids->size; ++i) { - //DUMP(i); - //DUMP(col); - ptr->columnsHolder[i].size = col.size(); - ptr->columnsHolder[i].data = col.data(); - ++i; + std::cerr << "id " << i << " :" << ids->data[i] << " replying.\n"; + ptr->dataHolder.emplace_back(std::vector{ids->data[i], ids->data[i] + 1, (1 + ids->data[i]) * 10, 65}); } - ptr->columns.size = ptr->vector.size(); - //DUMP(ptr->columns.size); - ptr->columns.data = ptr->columnsHolder.get(); - //DUMP(ptr->columns.columns); - return static_cast(&ptr->columns); } - return nullptr; + MakeColumnsFromVector(ptr); + return static_cast(&ptr->ctable); } void * ClickHouseDictionary_v2_loadAll(void * data_ptr, ClickHouseLibrary::CStrings * settings, ClickHouseLibrary::CStrings * /*columns*/) { auto ptr = static_cast(data_ptr); std::cerr << "loadAll lib call ptr=" << data_ptr << " => " << ptr << "\n"; + if (!ptr) + return nullptr; if (settings) { std::cerr << "settings passed: " << settings->size << "\n"; @@ -87,31 +110,15 @@ void * ClickHouseDictionary_v2_loadAll(void * data_ptr, ClickHouseLibrary::CStri } } - if (ptr) + for (size_t i = 0; i < 7; ++i) { - for (size_t i = 0; i < 7; ++i) - { - std::cerr << "id " << i << " :" - << " generating.\n"; - ptr->vector.emplace_back(std::vector{i, i + 1, (1 + i) * 10, 65}); - } - - ptr->columnsHolder = std::make_unique(ptr->vector.size()); - size_t i = 0; - for (auto & col : ptr->vector) - { - ptr->columnsHolder[i].size = col.size(); - ptr->columnsHolder[i].data = col.data(); - ++i; - } - ptr->columns.size = ptr->vector.size(); - //DUMP(ptr->columns.size); - ptr->columns.data = ptr->columnsHolder.get(); - //DUMP(ptr->columns.columns); - return static_cast(&ptr->columns); + std::cerr << "id " << i << " :" + << " generating.\n"; + ptr->dataHolder.emplace_back(std::vector{i, i + 1, (1 + i) * 10, 65}); } - //return; - return nullptr; + + MakeColumnsFromVector(ptr); + return static_cast(&ptr->ctable); } void * ClickHouseDictionary_v2_loadKeys(void * data_ptr, @@ -146,16 +153,32 @@ void * ClickHouseDictionary_v2_loadKeys(void * data_ptr, } } + //MakeColumnsFromVector(ptr); + return nullptr; } -void * ClickHouseDictionary_v2_dataAllocate() +void * ClickHouseDictionary_v2_libNew() +{ + auto lib_ptr = new LibHolder; + return lib_ptr; +} + +void ClickHouseDictionary_v2_libDelete(void * lib_ptr) +{ + auto ptr = static_cast(lib_ptr); + delete ptr; + return; +} + +void * ClickHouseDictionary_v2_dataNew(void * lib_ptr) { auto data_ptr = new DataHolder; + data_ptr->lib = static_castlib)>(lib_ptr); return data_ptr; } -void ClickHouseDictionary_v2_dataDelete(void * data_ptr) +void ClickHouseDictionary_v2_dataDelete(void * /*lib_ptr*/, void * data_ptr) { auto ptr = static_cast(data_ptr); delete ptr; diff --git a/dbms/tests/external_dictionaries/dictionary_library/dictionary_library_c.c b/dbms/tests/external_dictionaries/dictionary_library/dictionary_library_c.c index 936516aa9fc..9d95894302a 100644 --- a/dbms/tests/external_dictionaries/dictionary_library/dictionary_library_c.c +++ b/dbms/tests/external_dictionaries/dictionary_library/dictionary_library_c.c @@ -19,39 +19,53 @@ typedef struct CString * data; } ClickHouseLibCStrings; -void * ClickHouseDictionary_v1_loadIds(void * data_ptr, ClickHouseLibCStrings * settings, ClickHouseLibCStrings * columns, ClickHouseLibVectorUInt64 * ids) +void * ClickHouseDictionary_v2_loadIds( + void * data_ptr, ClickHouseLibCStrings * settings, ClickHouseLibCStrings * columns, ClickHouseLibVectorUInt64 * ids) { printf("loadIds c lib call ptr=%p size=%" PRIu64 "\n", data_ptr, ids->size); return 0; } -void * ClickHouseDictionary_v1_loadAll(void * data_ptr, ClickHouseLibCStrings * settings, ClickHouseLibCStrings * columns) +void * ClickHouseDictionary_v2_loadAll(void * data_ptr, ClickHouseLibCStrings * settings, ClickHouseLibCStrings * columns) { printf("loadAll c lib call ptr=%p \n", data_ptr); return 0; } -void * ClickHouseDictionary_v1_loadKeys(void * data_ptr, - ClickHouseLibCStrings * settings, - ClickHouseLibCStrings * columns, - const ClickHouseLibVectorUInt64 * requested_rows) +void * ClickHouseDictionary_v2_loadKeys( + void * data_ptr, ClickHouseLibCStrings * settings, ClickHouseLibCStrings * columns, const ClickHouseLibVectorUInt64 * requested_rows) { printf("loadKeys c lib call ptr=%p size=%" PRIu64 "\n", data_ptr, requested_rows->size); return 0; } +void * ClickHouseDictionary_v2_libNew() +{ + int size = 101; + void * lib_ptr = malloc(size); + printf("libNew c lib call lib_ptr=%p \n", lib_ptr); + return lib_ptr; +} -void * ClickHouseDictionary_v1_dataAllocate() +void ClickHouseDictionary_v2_libDelete(void * lib_ptr) +{ + printf("libDelete c lib call lib_ptr=%p \n", lib_ptr); + free(lib_ptr); + return; +} + + +void * ClickHouseDictionary_v2_dataNew(void * lib_ptr) { int size = 100; void * data_ptr = malloc(size); - printf("dataAllocate c lib call ptr=%p \n", data_ptr); + printf("dataNew c lib call lib_ptr=%p data_ptr=%p \n", lib_ptr, data_ptr); return data_ptr; } -void ClickHouseDictionary_v1_dataDelete(void * data_ptr) +void ClickHouseDictionary_v2_dataDelete(void * lib_ptr, void * data_ptr) { - printf("dataDelete c lib call ptr=%p \n", data_ptr); + printf("dataDelete c lib call lib_ptr=%p data_ptr=%p \n", lib_ptr, data_ptr); free(data_ptr); return; } From 1564e316163f1beea754b42ac53d2065c88eb619 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 26 Feb 2018 19:58:51 +0300 Subject: [PATCH 127/209] check that lengths of arrays passed to arrayReduce() are equal [#CLICKHOUSE-3250] #1142 --- dbms/src/Functions/FunctionsArray.cpp | 28 +++++++++++++++++---------- dbms/src/Functions/FunctionsArray.h | 2 +- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp index 27d5d3688e8..31df15a9c01 100644 --- a/dbms/src/Functions/FunctionsArray.cpp +++ b/dbms/src/Functions/FunctionsArray.cpp @@ -2412,33 +2412,41 @@ void FunctionArrayReduce::executeImpl(Block & block, const ColumnNumbers & argum /// Aggregate functions do not support constant columns. Therefore, we materialize them. std::vector materialized_columns; - std::vector aggregate_arguments_vec(arguments.size() - 1); + const size_t num_arguments_columns = arguments.size() - 1; + + std::vector aggregate_arguments_vec(num_arguments_columns); + const ColumnArray::Offsets * offsets = nullptr; bool is_const = true; - for (size_t i = 0, size = arguments.size() - 1; i < size; ++i) + for (size_t i = 0; i < num_arguments_columns; ++i) { const IColumn * col = block.getByPosition(arguments[i + 1]).column.get(); + const ColumnArray::Offsets * offsets_i = nullptr; if (const ColumnArray * arr = checkAndGetColumn(col)) { aggregate_arguments_vec[i] = &arr->getData(); + offsets_i = &arr->getOffsets(); is_const = false; } - else if (const ColumnConst * arr = checkAndGetColumnConst(col)) + else if (const ColumnConst * const_arr = checkAndGetColumnConst(col)) { - materialized_columns.emplace_back(arr->convertToFullColumn()); - aggregate_arguments_vec[i] = &typeid_cast(*materialized_columns.back().get()).getData(); + materialized_columns.emplace_back(const_arr->convertToFullColumn()); + const auto & arr = typeid_cast(*materialized_columns.back().get()); + aggregate_arguments_vec[i] = &arr.getData(); + offsets_i = &arr.getOffsets(); } else throw Exception("Illegal column " + col->getName() + " as argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + if (i == 0) + offsets = offsets_i; + else if (*offsets_i != *offsets) + throw Exception("Lengths of all arrays passsed to " + getName() + " must be equal.", + ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); } const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); - const ColumnArray::Offsets & offsets = typeid_cast(!materialized_columns.empty() - ? *materialized_columns.front().get() - : *block.getByPosition(arguments[1]).column.get()).getOffsets(); - MutableColumnPtr result_holder = block.getByPosition(result).type->createColumn(); IColumn & res_col = *result_holder; @@ -2453,7 +2461,7 @@ void FunctionArrayReduce::executeImpl(Block & block, const ColumnNumbers & argum for (size_t i = 0; i < rows; ++i) { agg_func.create(place); - ColumnArray::Offset next_offset = offsets[i]; + ColumnArray::Offset next_offset = (*offsets)[i]; try { diff --git a/dbms/src/Functions/FunctionsArray.h b/dbms/src/Functions/FunctionsArray.h index d9954fe8506..322a5a9affa 100644 --- a/dbms/src/Functions/FunctionsArray.h +++ b/dbms/src/Functions/FunctionsArray.h @@ -48,7 +48,7 @@ namespace ErrorCodes * * arrayEnumerateUniq(arr) * - outputs an array parallel (having same size) to this, where for each element specified - * how much times this element was encountered before (including this element) among elements with the same value. + * how many times this element was encountered before (including this element) among elements with the same value. * For example: arrayEnumerateUniq([10, 20, 10, 30]) = [1, 1, 2, 1] * arrayEnumerateUniq(arr1, arr2...) * - for tuples from elements in the corresponding positions in several arrays. From cf9a33d04f00b121f6ebe533e081f83e35237928 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 26 Feb 2018 21:34:22 +0300 Subject: [PATCH 128/209] CLICKHOUSE-3600 Use correct lld version, cmake fixes (#1968) * Allow build without rt library * fix * Cmake misc * CLICKHOUSE-3600 Use correct lld version * CLICKHOUSE-3600 Use correct lld version * Use libllvm same version as clang * fix * Fix build with iodbc * Fix iodbc build * Fix gold find * Fix empty lines --- .travis.yml | 4 +- CMakeLists.txt | 66 ++++---------------------- cmake/arch.cmake | 19 ++++++++ cmake/find_llvm.cmake | 21 +++++--- cmake/lib_name.cmake | 1 - cmake/print_flags.cmake | 6 +++ cmake/sanitize.cmake | 27 +++++++++++ dbms/CMakeLists.txt | 1 + dbms/src/CMakeLists.txt | 4 ++ dbms/src/TableFunctions/CMakeLists.txt | 1 + docker/builder/build.sh | 2 +- release | 4 +- utils/travis/normal.sh | 2 +- utils/travis/pbuilder.sh | 4 +- 14 files changed, 89 insertions(+), 73 deletions(-) create mode 100644 cmake/arch.cmake create mode 100644 cmake/print_flags.cmake create mode 100644 cmake/sanitize.cmake diff --git a/.travis.yml b/.travis.yml index 7f867dc10b7..6dab6f478ab 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,7 +36,7 @@ matrix: sources: - ubuntu-toolchain-r-test - llvm-toolchain-trusty-5.0 - packages: [ g++-7, clang-5.0, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libzookeeper-mt-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo ] + packages: [ g++-7, clang-5.0, lld-5.0, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libzookeeper-mt-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo ] env: - MATRIX_EVAL="export CC=clang-5.0 && export CXX=clang++-5.0" @@ -118,7 +118,7 @@ matrix: # packages: [ pbuilder, fakeroot, debhelper ] # # env: -# - MATRIX_EVAL="export DEB_CC=clang-6.0 && export DEB_CXX=clang++-6.0 && export DIST=bionic && export EXTRAPACKAGES=clang-6.0" +# - MATRIX_EVAL="export DEB_CC=clang-6.0 && export DEB_CXX=clang++-6.0 && export DIST=bionic && export EXTRAPACKAGES='clang-6.0 lld-6.0'" # # script: # - utils/travis/pbuilder.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index cfe48a0d3fb..2bffc353cef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,29 +35,7 @@ message (STATUS "CMAKE_BUILD_TYPE: " ${CMAKE_BUILD_TYPE} ) # TSan is not supported due to false positive errors in libstdc++ and necessity to rebuild libstdc++ with TSan set (CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel;ASan;UBSan" CACHE STRING "" FORCE) -if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") - set (ARCH_AARCH64 1) -endif () -if (ARCH_AARCH64 OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm") - set (ARCH_ARM 1) -endif () -if (CMAKE_LIBRARY_ARCHITECTURE MATCHES "i386") - set (ARCH_I386 1) -endif () -if ( ( ARCH_ARM AND NOT ARCH_AARCH64 ) OR ARCH_I386) - set (ARCH_32 1) - message (WARNING "Support for 32bit platforms is highly experimental") -endif () -if (CMAKE_SYSTEM MATCHES "Linux") - set (ARCH_LINUX 1) -endif () -if (CMAKE_SYSTEM MATCHES "FreeBSD") - set (ARCH_FREEBSD 1) -endif () - -if (ARCH_FREEBSD) - set (PLATFORM_EXTRA_CXX_FLAG "-DCLOCK_MONOTONIC_COARSE=CLOCK_MONOTONIC_FAST") -endif () +include (cmake/arch.cmake) set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall") # -Werror is also added inside directories with our own code. set (CXX_WARNING_FLAGS "${CXX_WARNING_FLAGS} -Wnon-virtual-dtor") @@ -111,8 +89,10 @@ endif () set (COMPILER_FLAGS "${COMPILER_FLAGS} ${CXX11_ABI_FLAGS}") -find_program (LLD_PATH NAMES lld) -find_program (GOLD_PATH NAMES gold) +string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER}) + +find_program (LLD_PATH NAMES "lld${COMPILER_POSTFIX}" "lld") +find_program (GOLD_PATH NAMES "gold") if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND LLD_PATH AND NOT LINKER_NAME) set (LINKER_NAME "lld") @@ -121,8 +101,8 @@ elseif (GOLD_PATH) endif () if (LINKER_NAME) - message(STATUS "Using linker: ${LINKER_NAME}") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}") + message(STATUS "Using linker: ${LINKER_NAME} (selected from: LLD_PATH=${LLD_PATH}; GOLD_PATH=${GOLD_PATH}; COMPILER_POSTFIX=${COMPILER_POSTFIX})") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}") endif () option (PIPE "-pipe compiler option [less /tmp usage, more ram usage]" ON) @@ -196,29 +176,7 @@ if (NOT MAKE_STATIC_LIBRARIES) set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif () -set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer") -if (SAN_DEBUG) - set (SAN_FLAGS "${SAN_FLAGS} -O0") -else () - set (SAN_FLAGS "${SAN_FLAGS} -O3") -endif () - -set (CMAKE_CXX_FLAGS_ASAN "${CMAKE_CXX_FLAGS_ASAN} ${SAN_FLAGS} -fsanitize=address") -set (CMAKE_C_FLAGS_ASAN "${CMAKE_C_FLAGS_ASAN} ${SAN_FLAGS} -fsanitize=address") -set (CMAKE_CXX_FLAGS_UBSAN "${CMAKE_CXX_FLAGS_UBSAN} ${SAN_FLAGS} -fsanitize=undefined") -set (CMAKE_C_FLAGS_UBSAN "${CMAKE_C_FLAGS_UBSAN} ${SAN_FLAGS} -fsanitize=undefined") -set (CMAKE_CXX_FLAGS_MSAN "${CMAKE_CXX_FLAGS_MSAN} ${SAN_FLAGS} -fsanitize=memory") -set (CMAKE_C_FLAGS_MSAN "${CMAKE_C_FLAGS_MSAN} ${SAN_FLAGS} -fsanitize=memory") -set (CMAKE_CXX_FLAGS_TSAN "${CMAKE_CXX_FLAGS_TSAN} ${SAN_FLAGS} -fsanitize=thread") -set (CMAKE_C_FLAGS_TSAN "${CMAKE_C_FLAGS_TSAN} ${SAN_FLAGS} -fsanitize=thread") - -# clang use static linking by default -if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_EXE_LINKER_FLAGS_ASAN "${CMAKE_EXE_LINKER_FLAGS_ASAN} -static-libasan") - set (CMAKE_EXE_LINKER_FLAGS_UBSAN "${CMAKE_EXE_LINKER_FLAGS_UBSAN} -static-libubsan") - set (CMAKE_EXE_LINKER_FLAGS_MSAN "${CMAKE_EXE_LINKER_FLAGS_MSAN} -static-libmsan") - set (CMAKE_EXE_LINKER_FLAGS_TSAN "${CMAKE_EXE_LINKER_FLAGS_TSAN} -static-libtsan") -endif () +include (cmake/sanitize.cmake) # Using "include-what-you-use" tool. option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF) @@ -307,13 +265,7 @@ include (libs/libcommon/cmake/find_cctz.cmake) include (libs/libmysqlxx/cmake/find_mysqlclient.cmake) include (libs/libdaemon/cmake/find_unwind.cmake) - -set (FULL_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UC}}") -set (FULL_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UC}}") -set (FULL_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${CMAKE_BUILD_TYPE_UC}}") -message (STATUS "C_FLAGS = ${FULL_C_FLAGS}") -message (STATUS "CXX_FLAGS = ${FULL_CXX_FLAGS}") -message (STATUS "LINKER_FLAGS = ${FULL_EXE_LINKER_FLAGS}") +include (cmake/print_flags.cmake) # Directory for Yandex specific files set (CLICKHOUSE_PRIVATE_DIR ${ClickHouse_SOURCE_DIR}/private/) diff --git a/cmake/arch.cmake b/cmake/arch.cmake new file mode 100644 index 00000000000..1191d70bd19 --- /dev/null +++ b/cmake/arch.cmake @@ -0,0 +1,19 @@ +if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") + set (ARCH_AARCH64 1) +endif () +if (ARCH_AARCH64 OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + set (ARCH_ARM 1) +endif () +if (CMAKE_LIBRARY_ARCHITECTURE MATCHES "i386") + set (ARCH_I386 1) +endif () +if ( ( ARCH_ARM AND NOT ARCH_AARCH64 ) OR ARCH_I386) + set (ARCH_32 1) + message (WARNING "Support for 32bit platforms is highly experimental") +endif () +if (CMAKE_SYSTEM MATCHES "Linux") + set (ARCH_LINUX 1) +endif () +if (CMAKE_SYSTEM MATCHES "FreeBSD") + set (ARCH_FREEBSD 1) +endif () diff --git a/cmake/find_llvm.cmake b/cmake/find_llvm.cmake index 8945bff451e..dafb533a0f9 100644 --- a/cmake/find_llvm.cmake +++ b/cmake/find_llvm.cmake @@ -1,6 +1,6 @@ -option (USE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" FALSE) +option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" FALSE) -if (USE_EMBEDDED_COMPILER) +if (ENABLE_EMBEDDED_COMPILER) # Based on source code of YT. # Authors: Ivan Puzyrevskiy, Alexey Lukyanchikov, Ruslan Savchenko. @@ -15,10 +15,14 @@ if (USE_EMBEDDED_COMPILER) # llvm_map_components_to_libraries - Maps LLVM used components to required libraries. # Usage: llvm_map_components_to_libraries(REQUIRED_LLVM_LIBRARIES core jit interpreter native ...) - if (ARCH_FREEBSD) - set(LLVM_VERSION_POSTFIX "50" CACHE INTERNAL "") + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(LLVM_VERSION_POSTFIX "${COMPILER_POSTFIX}" CACHE STRING "") else() - set(LLVM_VERSION_POSTFIX "-5.0" CACHE INTERNAL "") + if (ARCH_FREEBSD) + set(LLVM_VERSION_POSTFIX "50" CACHE STRING "") + else() + set(LLVM_VERSION_POSTFIX "-5.0" CACHE STRING "") + endif() endif() find_program(LLVM_CONFIG_EXECUTABLE @@ -28,9 +32,8 @@ if (USE_EMBEDDED_COMPILER) mark_as_advanced(LLVM_CONFIG_EXECUTABLE) if(NOT LLVM_CONFIG_EXECUTABLE) - message(FATAL_ERROR "Cannot find LLVM (looking for `llvm-config`). Please, provide LLVM_ROOT environment variable.") + message(FATAL_ERROR "Cannot find LLVM (looking for `llvm-config${LLVM_VERSION_POSTFIX}`, `llvm-config`, `llvm-config-devel`). Please, provide LLVM_ROOT environment variable.") else() - set(LLVM_FOUND TRUE) execute_process( @@ -97,4 +100,8 @@ if (USE_EMBEDDED_COMPILER) message(STATUS "LLVM Library Directory: ${LLVM_LIBRARY_DIRS}") message(STATUS "LLVM C++ Compiler: ${LLVM_CXXFLAGS}") endif() + + if (LLVM_FOUND AND LLVM_INCLUDE_DIRS AND LLVM_LIBRARY_DIRS) + set(USE_EMBEDDED_COMPILER TRUE) + endif() endif() diff --git a/cmake/lib_name.cmake b/cmake/lib_name.cmake index e7cbc777670..79e1b3e19e4 100644 --- a/cmake/lib_name.cmake +++ b/cmake/lib_name.cmake @@ -1,4 +1,3 @@ - set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide) set(CITYHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcityhash/include) set(COMMON_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/libs/libcommon/include ${ClickHouse_BINARY_DIR}/libs/libcommon/include) diff --git a/cmake/print_flags.cmake b/cmake/print_flags.cmake new file mode 100644 index 00000000000..ad6bca21933 --- /dev/null +++ b/cmake/print_flags.cmake @@ -0,0 +1,6 @@ +set (FULL_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UC}}") +set (FULL_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UC}}") +set (FULL_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${CMAKE_BUILD_TYPE_UC}}") +message (STATUS "compiler C = ${CMAKE_C_COMPILER} ${FULL_C_FLAGS}") +message (STATUS "compiler CXX = ${CMAKE_CXX_COMPILER} ${FULL_CXX_FLAGS}") +message (STATUS "LINKER_FLAGS = ${FULL_EXE_LINKER_FLAGS}") diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake new file mode 100644 index 00000000000..d54e1f23eed --- /dev/null +++ b/cmake/sanitize.cmake @@ -0,0 +1,27 @@ +set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer") +if (SAN_DEBUG) + set (SAN_FLAGS "${SAN_FLAGS} -O0") +else () + set (SAN_FLAGS "${SAN_FLAGS} -O3") +endif () + +set (CMAKE_CXX_FLAGS_ASAN "${CMAKE_CXX_FLAGS_ASAN} ${SAN_FLAGS} -fsanitize=address") +set (CMAKE_C_FLAGS_ASAN "${CMAKE_C_FLAGS_ASAN} ${SAN_FLAGS} -fsanitize=address") +set (CMAKE_EXE_LINKER_FLAGS_ASAN "${CMAKE_EXE_LINKER_FLAGS_ASAN} -fsanitize=address") +set (CMAKE_CXX_FLAGS_UBSAN "${CMAKE_CXX_FLAGS_UBSAN} ${SAN_FLAGS} -fsanitize=undefined") +set (CMAKE_C_FLAGS_UBSAN "${CMAKE_C_FLAGS_UBSAN} ${SAN_FLAGS} -fsanitize=undefined") +set (CMAKE_EXE_LINKER_FLAGS_UBSAN "${CMAKE_EXE_LINKER_FLAGS_UBSAN} -fsanitize=undefined") +set (CMAKE_CXX_FLAGS_MSAN "${CMAKE_CXX_FLAGS_MSAN} ${SAN_FLAGS} -fsanitize=memory") +set (CMAKE_C_FLAGS_MSAN "${CMAKE_C_FLAGS_MSAN} ${SAN_FLAGS} -fsanitize=memory") +set (CMAKE_EXE_LINKER_FLAGS_MSAN "${CMAKE_EXE_LINKER_FLAGS_MSAN} -fsanitize=memory") +set (CMAKE_CXX_FLAGS_TSAN "${CMAKE_CXX_FLAGS_TSAN} ${SAN_FLAGS} -fsanitize=thread") +set (CMAKE_C_FLAGS_TSAN "${CMAKE_C_FLAGS_TSAN} ${SAN_FLAGS} -fsanitize=thread") +set (CMAKE_EXE_LINKER_FLAGS_TSAN "${CMAKE_EXE_LINKER_FLAGS_TSAN} -fsanitize=thread") + +# clang use static linking by default +if (MAKE_STATIC_LIBRARIES AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set (CMAKE_EXE_LINKER_FLAGS_ASAN "${CMAKE_EXE_LINKER_FLAGS_ASAN} -static-libasan") + set (CMAKE_EXE_LINKER_FLAGS_UBSAN "${CMAKE_EXE_LINKER_FLAGS_UBSAN} -static-libubsan") + set (CMAKE_EXE_LINKER_FLAGS_MSAN "${CMAKE_EXE_LINKER_FLAGS_MSAN} -static-libmsan") + set (CMAKE_EXE_LINKER_FLAGS_TSAN "${CMAKE_EXE_LINKER_FLAGS_TSAN} -static-libtsan") +endif () diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 979ac769385..551ed17cb76 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -162,6 +162,7 @@ endif () if (Poco_DataODBC_FOUND) target_link_libraries (dbms ${Poco_DataODBC_LIBRARY}) + target_include_directories (dbms PRIVATE ${ODBC_INCLUDE_DIRECTORIES}) endif() if (Poco_MongoDB_FOUND) diff --git a/dbms/src/CMakeLists.txt b/dbms/src/CMakeLists.txt index d52fb965f9e..3f132eddbb5 100644 --- a/dbms/src/CMakeLists.txt +++ b/dbms/src/CMakeLists.txt @@ -1,3 +1,7 @@ +if (ARCH_FREEBSD) + set (PLATFORM_EXTRA_CXX_FLAG "-DCLOCK_MONOTONIC_COARSE=CLOCK_MONOTONIC_FAST") +endif () + add_subdirectory (Columns) add_subdirectory (Common) add_subdirectory (Core) diff --git a/dbms/src/TableFunctions/CMakeLists.txt b/dbms/src/TableFunctions/CMakeLists.txt index 6d25305c133..4551be50c8b 100644 --- a/dbms/src/TableFunctions/CMakeLists.txt +++ b/dbms/src/TableFunctions/CMakeLists.txt @@ -8,4 +8,5 @@ add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) target_link_libraries(clickhouse_table_functions dbms clickhouse_storages_system ${Poco_Foundation_LIBRARY}) if (Poco_DataODBC_FOUND) target_link_libraries (clickhouse_table_functions ${Poco_DataODBC_LIBRARY}) + target_include_directories (clickhouse_table_functions PRIVATE ${ODBC_INCLUDE_DIRECTORIES}) endif () diff --git a/docker/builder/build.sh b/docker/builder/build.sh index 1d091221bc5..a392638a319 100644 --- a/docker/builder/build.sh +++ b/docker/builder/build.sh @@ -2,6 +2,6 @@ mkdir -p /server/build_docker cd /server/build_docker -cmake /server -DUSE_EMBEDDED_COMPILER=1 -DENABLE_TESTS=0 +cmake /server -DENABLE_EMBEDDED_COMPILER=1 -DENABLE_TESTS=0 make -j $(nproc || grep -c ^processor /proc/cpuinfo) #ctest -V -j $(nproc || grep -c ^processor /proc/cpuinfo) diff --git a/release b/release index b8b3b33d743..0fba786373b 100755 --- a/release +++ b/release @@ -58,7 +58,7 @@ then # GLIBC_COMPATIBILITY отключен по умолчанию export DEB_CC=clang-5.0 export DEB_CXX=clang++-5.0 - EXTRAPACKAGES="$EXTRAPACKAGES clang-5.0 " + EXTRAPACKAGES="$EXTRAPACKAGES clang-5.0 lld-5.0" elif [[ $BUILD_TYPE == 'valgrind' ]]; then LIBTCMALLOC_OPTS="-DENABLE_TCMALLOC=0" VERSION_POSTFIX+=-$BUILD_TYPE @@ -72,7 +72,7 @@ if [ -z "$THREAD_COUNT" ] ; then THREAD_COUNT=`nproc || grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu || echo 4` fi -CMAKE_FLAGS=" $LIBTCMALLOC_OPTS -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DUSE_EMBEDDED_COMPILER=1 $CMAKE_FLAGS" +CMAKE_FLAGS=" $LIBTCMALLOC_OPTS -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DENABLE_EMBEDDED_COMPILER=1 $CMAKE_FLAGS" export CMAKE_FLAGS export EXTRAPACKAGES diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index 54acd478260..6f94981b735 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -28,7 +28,7 @@ cmake .. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPILER=`which $D `# Use all possible contrib libs from system` \ -DUNBUNDLED=1 \ `# Disable all features` \ - -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DUSE_EMBEDDED_COMPILER=0 -DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 $CMAKE_FLAGS \ + -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_TCMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 $CMAKE_FLAGS \ && make -j `nproc || grep -c ^processor /proc/cpuinfo || sysctl -n hw.ncpu || echo 4` clickhouse-bundle \ `# Skip tests:` \ `# 00281 requires internal compiler` \ diff --git a/utils/travis/pbuilder.sh b/utils/travis/pbuilder.sh index 8d7478cb8e8..635db59fdaf 100755 --- a/utils/travis/pbuilder.sh +++ b/utils/travis/pbuilder.sh @@ -21,10 +21,10 @@ env TEST_RUN=${TEST_RUN=1} \ DEB_CC=${DEB_CC=$CC} DEB_CXX=${DEB_CXX=$CXX} \ CCACHE_SIZE=${CCACHE_SIZE:=4G} \ `# Disable all features` \ - CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DUSE_EMBEDDED_COMPILER=1 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ + CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=1 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ - EXTRAPACKAGES="psmisc clang-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libzookeeper-mt-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libzookeeper-mt-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev $EXTRAPACKAGES" \ ./release --pbuilder $RELEASE_OPT date From dff85044ecbcbce4f25de64864f53b1eb8f93edd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 21:38:52 +0300 Subject: [PATCH 129/209] Fixed bad test #1947 --- .../0_stateless/00079_defaulted_columns.sql | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql b/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql index ce9d2c429f2..b1819d8e0c4 100644 --- a/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql +++ b/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql @@ -1,40 +1,40 @@ -drop table if exists defaulted_test; +drop table if exists test.defaulted; -create table defaulted_test (col1 default 0) engine=Memory; -desc table defaulted_test; -drop table defaulted_test; +create table test.defaulted (col1 default 0) engine=Memory; +desc table test.defaulted; +drop table test.defaulted; -create table defaulted_test (col1 UInt32, col2 default col1 + 1, col3 materialized col1 + 2, col4 alias col1 + 3) engine=Memory; -desc table defaulted_test; -insert into defaulted_test (col1) values (10); -select * from defaulted_test; -select col3, col4 from defaulted_test; -drop table defaulted_test; +create table test.defaulted (col1 UInt32, col2 default col1 + 1, col3 materialized col1 + 2, col4 alias col1 + 3) engine=Memory; +desc table test.defaulted; +insert into test.defaulted (col1) values (10); +select * from test.defaulted; +select col3, col4 from test.defaulted; +drop table test.defaulted; -create table defaulted_test (col1 Int8, col2 UInt64 default (SELECT dummy+99 from system.one)) engine=Memory; -insert into defaulted_test (col1) values (0); -select col2 from defaulted_test; -drop table defaulted_test; +create table test.defaulted (col1 Int8, col2 UInt64 default (SELECT dummy+99 from system.one)) engine=Memory; +insert into test.defaulted (col1) values (0); +select col2 from test.defaulted; +drop table test.defaulted; -create table defaulted_test (payload String, date materialized today(), key materialized 0 * rand()) engine=MergeTree(date, key, 8192); -desc table defaulted_test; -insert into defaulted_test (payload) values ('hello clickhouse'); -select * from defaulted_test; -alter table defaulted_test add column payload_length materialized length(payload); -desc table defaulted_test; -select *, payload_length from defaulted_test; -insert into defaulted_test (payload) values ('some string'); -select *, payload_length from defaulted_test order by payload; -select *, payload_length from defaulted_test order by payload; -alter table defaulted_test modify column payload_length default length(payload); -desc table defaulted_test; -select * from defaulted_test order by payload; -alter table defaulted_test modify column payload_length default length(payload) % 65535; -desc table defaulted_test; -select * from defaulted_test order by payload; -alter table defaulted_test modify column payload_length UInt16 default length(payload); -desc table defaulted_test; -alter table defaulted_test drop column payload_length; -desc table defaulted_test; -select * from defaulted_test order by payload; -drop table defaulted_test; +create table test.defaulted (payload String, date materialized today(), key materialized 0 * rand()) engine=MergeTree(date, key, 8192); +desc table test.defaulted; +insert into test.defaulted (payload) values ('hello clickhouse'); +select * from test.defaulted; +alter table test.defaulted add column payload_length materialized length(payload); +desc table test.defaulted; +select *, payload_length from test.defaulted; +insert into test.defaulted (payload) values ('some string'); +select *, payload_length from test.defaulted order by payload; +select *, payload_length from test.defaulted order by payload; +alter table test.defaulted modify column payload_length default length(payload); +desc table test.defaulted; +select * from test.defaulted order by payload; +alter table test.defaulted modify column payload_length default length(payload) % 65535; +desc table test.defaulted; +select * from test.defaulted order by payload; +alter table test.defaulted modify column payload_length UInt16 default length(payload); +desc table test.defaulted; +alter table test.defaulted drop column payload_length; +desc table test.defaulted; +select * from test.defaulted order by payload; +drop table test.defaulted; From 03d4c352ec8c49992c61bedfa53b539b3c75ef9a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 26 Feb 2018 21:38:52 +0300 Subject: [PATCH 130/209] Fixed bad test #1947 --- .../0_stateless/00079_defaulted_columns.sql | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql b/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql index ce9d2c429f2..b1819d8e0c4 100644 --- a/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql +++ b/dbms/tests/queries/0_stateless/00079_defaulted_columns.sql @@ -1,40 +1,40 @@ -drop table if exists defaulted_test; +drop table if exists test.defaulted; -create table defaulted_test (col1 default 0) engine=Memory; -desc table defaulted_test; -drop table defaulted_test; +create table test.defaulted (col1 default 0) engine=Memory; +desc table test.defaulted; +drop table test.defaulted; -create table defaulted_test (col1 UInt32, col2 default col1 + 1, col3 materialized col1 + 2, col4 alias col1 + 3) engine=Memory; -desc table defaulted_test; -insert into defaulted_test (col1) values (10); -select * from defaulted_test; -select col3, col4 from defaulted_test; -drop table defaulted_test; +create table test.defaulted (col1 UInt32, col2 default col1 + 1, col3 materialized col1 + 2, col4 alias col1 + 3) engine=Memory; +desc table test.defaulted; +insert into test.defaulted (col1) values (10); +select * from test.defaulted; +select col3, col4 from test.defaulted; +drop table test.defaulted; -create table defaulted_test (col1 Int8, col2 UInt64 default (SELECT dummy+99 from system.one)) engine=Memory; -insert into defaulted_test (col1) values (0); -select col2 from defaulted_test; -drop table defaulted_test; +create table test.defaulted (col1 Int8, col2 UInt64 default (SELECT dummy+99 from system.one)) engine=Memory; +insert into test.defaulted (col1) values (0); +select col2 from test.defaulted; +drop table test.defaulted; -create table defaulted_test (payload String, date materialized today(), key materialized 0 * rand()) engine=MergeTree(date, key, 8192); -desc table defaulted_test; -insert into defaulted_test (payload) values ('hello clickhouse'); -select * from defaulted_test; -alter table defaulted_test add column payload_length materialized length(payload); -desc table defaulted_test; -select *, payload_length from defaulted_test; -insert into defaulted_test (payload) values ('some string'); -select *, payload_length from defaulted_test order by payload; -select *, payload_length from defaulted_test order by payload; -alter table defaulted_test modify column payload_length default length(payload); -desc table defaulted_test; -select * from defaulted_test order by payload; -alter table defaulted_test modify column payload_length default length(payload) % 65535; -desc table defaulted_test; -select * from defaulted_test order by payload; -alter table defaulted_test modify column payload_length UInt16 default length(payload); -desc table defaulted_test; -alter table defaulted_test drop column payload_length; -desc table defaulted_test; -select * from defaulted_test order by payload; -drop table defaulted_test; +create table test.defaulted (payload String, date materialized today(), key materialized 0 * rand()) engine=MergeTree(date, key, 8192); +desc table test.defaulted; +insert into test.defaulted (payload) values ('hello clickhouse'); +select * from test.defaulted; +alter table test.defaulted add column payload_length materialized length(payload); +desc table test.defaulted; +select *, payload_length from test.defaulted; +insert into test.defaulted (payload) values ('some string'); +select *, payload_length from test.defaulted order by payload; +select *, payload_length from test.defaulted order by payload; +alter table test.defaulted modify column payload_length default length(payload); +desc table test.defaulted; +select * from test.defaulted order by payload; +alter table test.defaulted modify column payload_length default length(payload) % 65535; +desc table test.defaulted; +select * from test.defaulted order by payload; +alter table test.defaulted modify column payload_length UInt16 default length(payload); +desc table test.defaulted; +alter table test.defaulted drop column payload_length; +desc table test.defaulted; +select * from test.defaulted order by payload; +drop table test.defaulted; From 8669983ce197a62103ee0f1c083dcbcc0847c389 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Mon, 26 Feb 2018 22:00:40 +0300 Subject: [PATCH 131/209] Add performance test. [#CLICKHOUSE-3606] --- .../consistent_hashes/consistent_hashes.xml | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 dbms/tests/performance/consistent_hashes/consistent_hashes.xml diff --git a/dbms/tests/performance/consistent_hashes/consistent_hashes.xml b/dbms/tests/performance/consistent_hashes/consistent_hashes.xml new file mode 100644 index 00000000000..04526b89bfe --- /dev/null +++ b/dbms/tests/performance/consistent_hashes/consistent_hashes.xml @@ -0,0 +1,36 @@ + + consistent_hashes + once + + + + 1000 + 5000 + + + + + + + + + + + hash_func + + yandexConsistentHash + jumpConsistentHash + + + + buckets + + 2 + 500 + 2000000000 + + + + + SELECT {hash_func}(number, {buckets}) FROM system.numbers LIMIT 1000000000 + From 2a56dd075fe24b810e5317faee2e54b5a018aa49 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 26 Feb 2018 22:33:16 +0300 Subject: [PATCH 132/209] fix typo --- dbms/src/Functions/FunctionsArray.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp index 31df15a9c01..7da348e806b 100644 --- a/dbms/src/Functions/FunctionsArray.cpp +++ b/dbms/src/Functions/FunctionsArray.cpp @@ -1024,7 +1024,7 @@ void FunctionArrayUniq::executeImpl(Block & block, const ColumnNumbers & argumen if (i == 0) offsets = &offsets_i; else if (offsets_i != *offsets) - throw Exception("Lengths of all arrays passsed to " + getName() + " must be equal.", + throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); data_columns[i] = &array->getData(); @@ -1331,7 +1331,7 @@ void FunctionArrayEnumerateUniq::executeImpl(Block & block, const ColumnNumbers if (i == 0) offsets = &offsets_i; else if (offsets_i != *offsets) - throw Exception("Lengths of all arrays passsed to " + getName() + " must be equal.", + throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); data_columns[i] = &array->getData(); @@ -2442,7 +2442,7 @@ void FunctionArrayReduce::executeImpl(Block & block, const ColumnNumbers & argum if (i == 0) offsets = offsets_i; else if (*offsets_i != *offsets) - throw Exception("Lengths of all arrays passsed to " + getName() + " must be equal.", + throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); } const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); From 720a11fe4e878345eac1781f1d441a4a98443e44 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 26 Feb 2018 22:38:06 +0300 Subject: [PATCH 133/209] Fix gcc "may be used uninitialized" warning --- dbms/src/Common/Stopwatch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Common/Stopwatch.h b/dbms/src/Common/Stopwatch.h index dc3e7e12481..1036265f32a 100644 --- a/dbms/src/Common/Stopwatch.h +++ b/dbms/src/Common/Stopwatch.h @@ -39,10 +39,10 @@ public: double elapsedSeconds() const { return static_cast(elapsed()) / 1000000000ULL; } private: - UInt64 start_ns; - UInt64 stop_ns; + UInt64 start_ns = 0; + UInt64 stop_ns = 0; clockid_t clock_type; - bool is_running; + bool is_running = false; UInt64 nanoseconds() const { return StopWatchDetail::nanoseconds(clock_type); } }; From 5d27e43f960122e44e53afe539539bc97795acd1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Feb 2018 00:00:42 +0300 Subject: [PATCH 134/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 19 ++-- dbms/src/Interpreters/ExpressionAnalyzer.h | 4 +- .../Interpreters/InterpreterSelectQuery.cpp | 94 ++++++++++--------- .../src/Interpreters/InterpreterSelectQuery.h | 1 + 4 files changed, 68 insertions(+), 50 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index ad65ed1f36d..c5373b489de 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -60,6 +60,8 @@ #include #include +#include + namespace DB { @@ -193,8 +195,10 @@ void ExpressionAnalyzer::init() /// Common subexpression elimination. Rewrite rules. normalizeTree(); - /// ALIAS columns should not be substituted for ASTAsterisk, we will add them now, after normalizeTree. - addAliasColumns(); + /// ALIAS and MATERIALIZED columns should not be substituted for ASTAsterisk, we will add them now, after normalizeTree. + addAliasAndMaterializedColumns(); + + DUMP(source_columns); /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(); @@ -217,9 +221,11 @@ void ExpressionAnalyzer::init() /// All selected columns in case of DISTINCT; columns that contain arrayJoin function inside. calculateRequiredColumnsBeforeProjection(); - /// Delete the unnecessary from `columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. + /// Delete the unnecessary from `source_columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. collectUsedColumns(); + DUMP(source_columns); + /// external_tables, subqueries_for_sets for global subqueries. /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers. initGlobalSubqueriesAndExternalTables(); @@ -1123,7 +1129,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( } -void ExpressionAnalyzer::addAliasColumns() +void ExpressionAnalyzer::addAliasAndMaterializedColumns() { if (!select_query) return; @@ -1132,6 +1138,7 @@ void ExpressionAnalyzer::addAliasColumns() return; source_columns.insert(std::end(source_columns), std::begin(storage->alias_columns), std::end(storage->alias_columns)); + source_columns.insert(std::end(source_columns), std::begin(storage->materialized_columns), std::end(storage->materialized_columns)); } @@ -2701,8 +2708,8 @@ void ExpressionAnalyzer::collectUsedColumns() ++it; } - /// Perhaps, there are virtual columns among the unknown columns. Remove them from the list of unknown and add - /// in columns list, so that when further processing the request they are perceived as real. + /// If there are virtual columns among the unknown columns. Remove them from the list of unknown and add + /// in columns list, so that when further processing they are also considered. if (storage) { for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index e01871ac141..72ee74cca3c 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -251,8 +251,8 @@ private: void makeSet(const ASTFunction * node, const Block & sample_block); - /// Adds a list of ALIAS columns from the table - void addAliasColumns(); + /// Adds a list of ALIAS and MATERIALIZED columns from the table. + void addAliasAndMaterializedColumns(); /// Replacing scalar subqueries with constant values. void executeScalarSubqueries(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 67e683ee028..7b6bdd2c64b 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -46,6 +46,9 @@ #include #include +#include +#include + namespace ProfileEvents { @@ -66,6 +69,36 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +InterpreterSelectQuery::InterpreterSelectQuery( + const ASTPtr & query_ptr_, + const Context & context_, + const Names & required_column_names_, + QueryProcessingStage::Enum to_stage_, + size_t subquery_depth_, + const BlockInputStreamPtr & input) + : query_ptr(query_ptr_) + , query(typeid_cast(*query_ptr)) + , context(context_) + , to_stage(to_stage_) + , subquery_depth(subquery_depth_) + , input(input) + , log(&Logger::get("InterpreterSelectQuery")) +{ + init(required_column_names_); +} + + +InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & query_ptr_, const Context & context_) + : query_ptr(query_ptr_) + , query(typeid_cast(*query_ptr)) + , context(context_) + , to_stage(QueryProcessingStage::Complete) + , subquery_depth(0) + , only_analyze(true) + , log(&Logger::get("InterpreterSelectQuery")) +{ + init({}); +} InterpreterSelectQuery::~InterpreterSelectQuery() = default; @@ -121,7 +154,8 @@ void InterpreterSelectQuery::init(const Names & required_column_names) table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); - /// TODO This looks weird. + /// Source header should contain only columns that can be substituted for asterisk. + /// Materialied and alias columns will be processed by ExpressionAnalyzer. source_header = storage->getSampleBlockNonMaterialized(); } } @@ -148,38 +182,6 @@ void InterpreterSelectQuery::init(const Names & required_column_names) } -InterpreterSelectQuery::InterpreterSelectQuery( - const ASTPtr & query_ptr_, - const Context & context_, - const Names & required_column_names_, - QueryProcessingStage::Enum to_stage_, - size_t subquery_depth_, - const BlockInputStreamPtr & input) - : query_ptr(query_ptr_) - , query(typeid_cast(*query_ptr)) - , context(context_) - , to_stage(to_stage_) - , subquery_depth(subquery_depth_) - , input(input) - , log(&Logger::get("InterpreterSelectQuery")) -{ - init(required_column_names_); -} - - -InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & query_ptr_, const Context & context_) - : query_ptr(query_ptr_) - , query(typeid_cast(*query_ptr)) - , context(context_) - , to_stage(QueryProcessingStage::Complete) - , subquery_depth(0) - , only_analyze(true) - , log(&Logger::get("InterpreterSelectQuery")) -{ - init({}); -} - - void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, String & table_name) { auto query_database = query.database(); @@ -536,6 +538,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline } } + DUMP(alias_columns_required); + if (alias_columns_required) { /// We will create an expression to return all the requested columns, with the calculation of the required ALIAS columns. @@ -550,11 +554,17 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline required_columns_expr_list->children.emplace_back(std::make_shared(column)); } + DUMP(queryToString(required_columns_expr_list)); + alias_actions = ExpressionAnalyzer{required_columns_expr_list, context, storage, source_header.getNamesAndTypesList()}.getActions(true); + DUMP(alias_actions->dumpActions()); + /// The set of required columns could be added as a result of adding an action to calculate ALIAS. required_columns = alias_actions->getRequiredColumns(); } + + DUMP(required_columns); } auto query_table = query.table(); @@ -678,15 +688,6 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline if (pipeline.streams.empty()) pipeline.streams.emplace_back(std::make_shared(storage->getSampleBlockForColumns(required_columns))); - if (alias_actions) - { - /// Wrap each stream returned from the table to calculate and add ALIAS columns - pipeline.transform([&] (auto & stream) - { - stream = std::make_shared(stream, alias_actions); - }); - } - pipeline.transform([&](auto & stream) { stream->addTableLock(table_lock); @@ -723,6 +724,15 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline else throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR); + /// Aliases in table declaration. + if (alias_actions) + { + pipeline.transform([&](auto & stream) + { + stream = std::make_shared(stream, alias_actions); + }); + } + return from_stage; } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 744d143f4c9..c2109cba211 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -155,6 +155,7 @@ private: QueryProcessingStage::Enum executeFetchColumns(Pipeline & pipeline); void executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input); + void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expression); void executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final); void executeMergeAggregated(Pipeline & pipeline, bool overflow_row, bool final); From cf5c378ed7be29b838cc5a38b04ce7f4f0e32ed3 Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 27 Feb 2018 03:01:51 +0300 Subject: [PATCH 135/209] Add optimized version of sumbur hash, reduced buckets range. [#CLICKHOUSE-3606] --- dbms/src/Functions/CMakeLists.txt | 4 +- .../Functions/FunctionsConsistentHashing.h | 38 +----- .../consistent_hashes/consistent_hashes.xml | 3 +- libs/CMakeLists.txt | 2 +- libs/libconsistent-hashing/CMakeLists.txt | 5 + libs/libconsistent-hashing/mailru/sumbur.cpp | 113 ++++++++++++++++++ libs/libconsistent-hashing/mailru/sumbur.h | 28 +++++ .../yandex/bitops.h | 0 .../yandex/consistent_hashing.cpp | 0 .../yandex/consistent_hashing.h | 0 .../yandex/popcount.cpp | 0 .../yandex/popcount.h | 0 libs/yandex-consistent-hashing/CMakeLists.txt | 5 - 13 files changed, 155 insertions(+), 43 deletions(-) create mode 100644 libs/libconsistent-hashing/CMakeLists.txt create mode 100644 libs/libconsistent-hashing/mailru/sumbur.cpp create mode 100644 libs/libconsistent-hashing/mailru/sumbur.h rename libs/{yandex-consistent-hashing => libconsistent-hashing}/yandex/bitops.h (100%) rename libs/{yandex-consistent-hashing => libconsistent-hashing}/yandex/consistent_hashing.cpp (100%) rename libs/{yandex-consistent-hashing => libconsistent-hashing}/yandex/consistent_hashing.h (100%) rename libs/{yandex-consistent-hashing => libconsistent-hashing}/yandex/popcount.cpp (100%) rename libs/{yandex-consistent-hashing => libconsistent-hashing}/yandex/popcount.h (100%) delete mode 100644 libs/yandex-consistent-hashing/CMakeLists.txt diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 347416a7a86..cf0bf00b075 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -78,7 +78,7 @@ list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h FunctionHelpers.h) add_library(clickhouse_functions ${clickhouse_functions_sources}) -target_link_libraries(clickhouse_functions dbms) +target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing) target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash) target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src) target_include_directories (clickhouse_functions BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) @@ -100,5 +100,3 @@ endif () if (ENABLE_TESTS) add_subdirectory (tests) endif () - -target_link_libraries (clickhouse_functions PRIVATE yandex-consistent-hashing) diff --git a/dbms/src/Functions/FunctionsConsistentHashing.h b/dbms/src/Functions/FunctionsConsistentHashing.h index 673bce1389d..bd08dce9d31 100644 --- a/dbms/src/Functions/FunctionsConsistentHashing.h +++ b/dbms/src/Functions/FunctionsConsistentHashing.h @@ -7,7 +7,9 @@ #include #include #include + #include +#include namespace DB @@ -65,47 +67,17 @@ struct JumpConsistentHashImpl }; -/// Sumbur algorithm https://github.com/mailru/sumbur-ruby/blob/master/lib/sumbur/pure_ruby.rb -static inline UInt32 sumburConsistentHash(UInt32 hashed_integer, UInt32 cluster_capacity) -{ - UInt32 l = 0xFFFFFFFF; - UInt32 part = l / cluster_capacity; - - if (l - hashed_integer < part) - return 0; - - UInt32 h = hashed_integer; - UInt32 n = 1; - UInt32 i = 2; - while (i <= cluster_capacity) - { - auto c = l / (i * (i - 1)); - if (c <= h) - h -= c; - else - { - h += c * (i - n - 1); - n = i; - if (l / n - h < part) - break; - } - i += 1; - } - - return n - 1; -} - struct SumburConsistentHashImpl { static constexpr auto name = "sumburConsistentHash"; using HashType = UInt32; - using ResultType = UInt32; + using ResultType = UInt16; using BucketsCountType = ResultType; - static inline ResultType apply(UInt32 hash, BucketsCountType n) + static inline ResultType apply(HashType hash, BucketsCountType n) { - return sumburConsistentHash(hash, n); + return static_cast(sumburConsistentHash(hash, n)); } }; diff --git a/dbms/tests/performance/consistent_hashes/consistent_hashes.xml b/dbms/tests/performance/consistent_hashes/consistent_hashes.xml index 04526b89bfe..33930add3b7 100644 --- a/dbms/tests/performance/consistent_hashes/consistent_hashes.xml +++ b/dbms/tests/performance/consistent_hashes/consistent_hashes.xml @@ -27,10 +27,11 @@ 2 500 - 2000000000 + 65535 SELECT {hash_func}(number, {buckets}) FROM system.numbers LIMIT 1000000000 + SELECT sumburConsistentHash(toUInt32(number), {buckets}) FROM system.numbers LIMIT 10000 diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index cf2e8464452..dbd960e16f1 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -19,4 +19,4 @@ if (USE_MYSQL) add_subdirectory (libmysqlxx) endif () -add_subdirectory (yandex-consistent-hashing) +add_subdirectory (libconsistent-hashing) diff --git a/libs/libconsistent-hashing/CMakeLists.txt b/libs/libconsistent-hashing/CMakeLists.txt new file mode 100644 index 00000000000..ad520abace0 --- /dev/null +++ b/libs/libconsistent-hashing/CMakeLists.txt @@ -0,0 +1,5 @@ +cmake_minimum_required(VERSION 2.8) +project(libconsistent-hashing CXX) + +add_library(libconsistent-hashing yandex/consistent_hashing.cpp yandex/popcount.cpp mailru/sumbur.cpp) +target_include_directories(libconsistent-hashing PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) \ No newline at end of file diff --git a/libs/libconsistent-hashing/mailru/sumbur.cpp b/libs/libconsistent-hashing/mailru/sumbur.cpp new file mode 100644 index 00000000000..3b905f0adc7 --- /dev/null +++ b/libs/libconsistent-hashing/mailru/sumbur.cpp @@ -0,0 +1,113 @@ +//Copyright (c) 2011-2012 Mail.RU +//Copyright (c) 2011-2012 Maksim Kalinchenko +//Copyright (c) 2012 Sokolov Yura aka funny-falcon +// +//MIT License +// +//Permission is hereby granted, free of charge, to any person obtaining +//a copy of this software and associated documentation files (the +//"Software"), to deal in the Software without restriction, including +//without limitation the rights to use, copy, modify, merge, publish, +//distribute, sublicense, and/or sell copies of the Software, and to +//permit persons to whom the Software is furnished to do so, subject to +//the following conditions: +// +//The above copyright notice and this permission notice shall be +//included in all copies or substantial portions of the Software. +// +//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +//EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +//MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +//NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +//LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +//WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#include + + +#define L 0xFFFFFFFF + +static unsigned int L27_38[] = {L / 27, L / 28, L / 29, L / 30, L / 31, L / 32, + L / 33, L / 34, L / 35, L / 36, L / 37, L / 38, + L / 39, L / 40, L / 41, L / 42, L / 43, L / 44, + L / 45, L / 46, L / 47, L / 48, L / 49, L / 50, + L / 51, L / 52, L / 53, L / 54, L / 55, L / 56, + L / 57, L / 58, L / 59, L / 60, L / 61, L / 62 + }; +static unsigned int LL27_38[] = {L/(26*27), L/(27*28), L/(28*29), L/(29*30), L/(30*31), L/(31*32), + L/(32*33), L/(33*34), L/(34*35), L/(35*36), L/(36*37), L/(37*38), + L/(38*39), L/(39*40), L/(40*41), L/(41*42), L/(42*43), L/(43*44), + L/(44*45), L/(45*46), L/(46*47), L/(47*48), L/(48*49), L/(49*50), + L/(50*51), L/(51*52), L/(52*53), L/(53*54), L/(54*55), L/(55*56), + L/(56*57), L/(57*58), L/(58*59), L/(59*60), L/(60*61), L/(61*62) + }; + +unsigned int sumburConsistentHash(unsigned int hashed_int, unsigned int capacity) +{ + unsigned int h = hashed_int; + unsigned int capa = capacity; + unsigned int part, n, i, c; + + if (capa == 0) + throw std::runtime_error("Sumbur is not applicable to empty cluster"); + + part = L / capa; + + if (L - h < part) return 0; + + n = 1; + + do { + if (h >= L / 2) h -= L / 2; + else { + n = 2; + if (L / 2 - h < part) return 1; + } + if (capa == 2) return 1; + +#define curslice(i) (L / (i * (i - 1))) +#define unroll(i) \ + if (curslice(i) <= h) h -= curslice(i); \ + else { \ + h += curslice(i) * (i - n - 1); \ + n = i; \ + if (L / i - h < part) return n-1; \ + } \ + if (capa == i) return (n-1) + + unroll(3); unroll(4); unroll(5); + unroll(6); unroll(7); unroll(8); + unroll(9); unroll(10); unroll(11); + unroll(12); unroll(13); unroll(14); + unroll(15); unroll(16); unroll(17); + unroll(18); unroll(19); unroll(20); + unroll(21); unroll(22); unroll(23); + unroll(24); unroll(25); unroll(26); + + for (i = 27; i <= capa && i <= 62; i++) { + c = LL27_38[i-27]; + if (c <= h) { + h -= c; + } + else { + h += c * (i - n - 1); + n = i; + if (L27_38[i-27] - h < part) return n-1; + } + } + + for(i = 63; i <= capa; i++) { + c = L / (i * (i - 1)); + if (c <= h) { + h -= c; + } + else { + h += c * (i - n - 1); + n = i; + if (L / i - h < part) return n - 1; + } + } + } while(0); + return n - 1; +} diff --git a/libs/libconsistent-hashing/mailru/sumbur.h b/libs/libconsistent-hashing/mailru/sumbur.h new file mode 100644 index 00000000000..1632665a073 --- /dev/null +++ b/libs/libconsistent-hashing/mailru/sumbur.h @@ -0,0 +1,28 @@ +//Copyright (c) 2011-2012 Mail.RU +//Copyright (c) 2011-2012 Maksim Kalinchenko +//Copyright (c) 2012 Sokolov Yura aka funny-falcon +// +//MIT License +// +//Permission is hereby granted, free of charge, to any person obtaining +//a copy of this software and associated documentation files (the +//"Software"), to deal in the Software without restriction, including +//without limitation the rights to use, copy, modify, merge, publish, +//distribute, sublicense, and/or sell copies of the Software, and to +//permit persons to whom the Software is furnished to do so, subject to +//the following conditions: +// +//The above copyright notice and this permission notice shall be +//included in all copies or substantial portions of the Software. +// +//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +//EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +//MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +//NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +//LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +//OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +//WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +/// Source code: https://github.com/mailru/sumbur-ruby/blob/master/ext/sumbur/sumbur.c + +unsigned int sumburConsistentHash(unsigned int hashed_int, unsigned int capacity); diff --git a/libs/yandex-consistent-hashing/yandex/bitops.h b/libs/libconsistent-hashing/yandex/bitops.h similarity index 100% rename from libs/yandex-consistent-hashing/yandex/bitops.h rename to libs/libconsistent-hashing/yandex/bitops.h diff --git a/libs/yandex-consistent-hashing/yandex/consistent_hashing.cpp b/libs/libconsistent-hashing/yandex/consistent_hashing.cpp similarity index 100% rename from libs/yandex-consistent-hashing/yandex/consistent_hashing.cpp rename to libs/libconsistent-hashing/yandex/consistent_hashing.cpp diff --git a/libs/yandex-consistent-hashing/yandex/consistent_hashing.h b/libs/libconsistent-hashing/yandex/consistent_hashing.h similarity index 100% rename from libs/yandex-consistent-hashing/yandex/consistent_hashing.h rename to libs/libconsistent-hashing/yandex/consistent_hashing.h diff --git a/libs/yandex-consistent-hashing/yandex/popcount.cpp b/libs/libconsistent-hashing/yandex/popcount.cpp similarity index 100% rename from libs/yandex-consistent-hashing/yandex/popcount.cpp rename to libs/libconsistent-hashing/yandex/popcount.cpp diff --git a/libs/yandex-consistent-hashing/yandex/popcount.h b/libs/libconsistent-hashing/yandex/popcount.h similarity index 100% rename from libs/yandex-consistent-hashing/yandex/popcount.h rename to libs/libconsistent-hashing/yandex/popcount.h diff --git a/libs/yandex-consistent-hashing/CMakeLists.txt b/libs/yandex-consistent-hashing/CMakeLists.txt deleted file mode 100644 index 694c2d071d9..00000000000 --- a/libs/yandex-consistent-hashing/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -cmake_minimum_required(VERSION 2.8) -project(yandex-consistent-hashing CXX) - -add_library(yandex-consistent-hashing yandex/consistent_hashing.cpp yandex/popcount.cpp) -target_include_directories(yandex-consistent-hashing PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) \ No newline at end of file From 0e7c49fe9bce00e87157e8cfc26ad6f72abfc266 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 27 Feb 2018 20:17:02 +0300 Subject: [PATCH 136/209] Build fixes (#1970) * Macos test fix * Fix freebsd build --- contrib/CMakeLists.txt | 4 +++- dbms/CMakeLists.txt | 4 ++++ dbms/src/CMakeLists.txt | 4 ---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 4f3e38c0cbc..28445658fe9 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -90,7 +90,9 @@ if (USE_INTERNAL_RDKAFKA_LIBRARY) endif () if (USE_INTERNAL_CAPNP_LIBRARY) - if (NOT APPLE) # tests never end + if (APPLE) # tests never end + set (BUILD_TESTING 0 CACHE INTERNAL "") + else () set (BUILD_TESTING ${ENABLE_TESTS} CACHE INTERNAL "") endif () set (_save ${CMAKE_CXX_EXTENSIONS}) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 551ed17cb76..f960e82a66a 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -82,6 +82,10 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions add_library(clickhouse_common_io ${SPLIT_SHARED} ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) +if (ARCH_FREEBSD) + target_compile_definitions (clickhouse_common_io PUBLIC CLOCK_MONOTONIC_COARSE=CLOCK_MONOTONIC_FAST) +endif () + add_subdirectory(src/Common/ZooKeeper) add_subdirectory(src/Common/ConfigProcessor) diff --git a/dbms/src/CMakeLists.txt b/dbms/src/CMakeLists.txt index 3f132eddbb5..d52fb965f9e 100644 --- a/dbms/src/CMakeLists.txt +++ b/dbms/src/CMakeLists.txt @@ -1,7 +1,3 @@ -if (ARCH_FREEBSD) - set (PLATFORM_EXTRA_CXX_FLAG "-DCLOCK_MONOTONIC_COARSE=CLOCK_MONOTONIC_FAST") -endif () - add_subdirectory (Columns) add_subdirectory (Common) add_subdirectory (Core) From f89d9dbfb9c30d050c511bc79f5e5a5ed6571e7a Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Tue, 27 Feb 2018 19:52:00 +0300 Subject: [PATCH 137/209] Fixed segfault in an integration test. [#CLICKHOUSE-2] --- dbms/src/Common/ZooKeeper/ZooKeeper.cpp | 3 ++- dbms/src/Common/ZooKeeper/ZooKeeper.h | 8 ++++---- dbms/tests/integration/test_cluster_copier/test.py | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp index eda88bc52b3..85bbe140d39 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.cpp @@ -641,7 +641,8 @@ int32_t ZooKeeper::tryMulti(const Ops & ops_, OpResultsPtr * out_results_) int32_t ZooKeeper::tryMultiUnsafe(const Ops & ops, MultiTransactionInfo & info) { info.code = multiImpl(ops, &info.op_results); - info.ops = &ops; + for (const OpPtr & op : ops) + info.ops.emplace_back(op->clone()); return info.code; } diff --git a/dbms/src/Common/ZooKeeper/ZooKeeper.h b/dbms/src/Common/ZooKeeper/ZooKeeper.h index ee71873ef15..8d35e37d27f 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeper.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeper.h @@ -499,13 +499,13 @@ struct MultiTransactionInfo { MultiTransactionInfo() = default; - const Ops * ops = nullptr; + Ops ops; int32_t code = ZOK; OpResultsPtr op_results; bool empty() const { - return ops == nullptr; + return ops.empty(); } bool hasFailedOp() const @@ -515,7 +515,7 @@ struct MultiTransactionInfo const Op & getFailedOp() const { - return *ops->at(getFailedOpIndex(op_results, code)); + return *ops.at(getFailedOpIndex(op_results, code)); } KeeperException getException() const @@ -523,7 +523,7 @@ struct MultiTransactionInfo if (hasFailedOp()) { size_t i = getFailedOpIndex(op_results, code); - return KeeperException("Transaction failed at op #" + std::to_string(i) + ": " + ops->at(i)->describe(), code); + return KeeperException("Transaction failed at op #" + std::to_string(i) + ": " + ops.at(i)->describe(), code); } else return KeeperException(code); diff --git a/dbms/tests/integration/test_cluster_copier/test.py b/dbms/tests/integration/test_cluster_copier/test.py index 54b1ff87c50..731b3a888f5 100644 --- a/dbms/tests/integration/test_cluster_copier/test.py +++ b/dbms/tests/integration/test_cluster_copier/test.py @@ -196,7 +196,7 @@ def test_copy_month_to_week_partition(started_cluster): execute_task(Task2(started_cluster), []) def test_copy_month_to_week_partition_with_recovering(started_cluster): - execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.1)]) + execute_task(Task2(started_cluster), ['--copy-fault-probability', str(0.3)]) if __name__ == '__main__': From 8d72ca25d575a0c2c749a429a5a362b84d1b1bfa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Feb 2018 22:00:55 +0300 Subject: [PATCH 138/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 24 +++++++++----- dbms/src/Interpreters/ExpressionAnalyzer.h | 4 +-- .../Interpreters/InterpreterSelectQuery.cpp | 31 ++++++------------- .../src/Interpreters/InterpreterSelectQuery.h | 14 +++------ 4 files changed, 32 insertions(+), 41 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index c5373b489de..9fd63bb979d 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -176,9 +176,11 @@ ExpressionAnalyzer::ExpressionAnalyzer( void ExpressionAnalyzer::init() { + select_query = typeid_cast(ast.get()); + removeDuplicateColumns(source_columns); - select_query = typeid_cast(ast.get()); + addAliasColumns(); translateQualifiedNames(); @@ -195,9 +197,6 @@ void ExpressionAnalyzer::init() /// Common subexpression elimination. Rewrite rules. normalizeTree(); - /// ALIAS and MATERIALIZED columns should not be substituted for ASTAsterisk, we will add them now, after normalizeTree. - addAliasAndMaterializedColumns(); - DUMP(source_columns); /// Executing scalar subqueries - replacing them with constant values. @@ -1046,8 +1045,18 @@ void ExpressionAnalyzer::normalizeTreeImpl( if (typeid_cast(asts[i].get())) { ASTs all_columns; - for (const auto & column_name_type : source_columns) - all_columns.emplace_back(std::make_shared(column_name_type.name)); + + if (storage) + { + /// If we select from a table, get only not MATERIALIZED, not ALIAS columns. + for (const auto & name_type : storage->getColumnsListNonMaterialized()) + all_columns.emplace_back(std::make_shared(name_type.name)); + } + else + { + for (const auto & name_type : source_columns) + all_columns.emplace_back(std::make_shared(name_type.name)); + } asts.erase(asts.begin() + i); asts.insert(asts.begin() + i, all_columns.begin(), all_columns.end()); @@ -1129,7 +1138,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( } -void ExpressionAnalyzer::addAliasAndMaterializedColumns() +void ExpressionAnalyzer::addAliasColumns() { if (!select_query) return; @@ -1138,7 +1147,6 @@ void ExpressionAnalyzer::addAliasAndMaterializedColumns() return; source_columns.insert(std::end(source_columns), std::begin(storage->alias_columns), std::end(storage->alias_columns)); - source_columns.insert(std::end(source_columns), std::begin(storage->materialized_columns), std::end(storage->materialized_columns)); } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 72ee74cca3c..2461a431d35 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -251,8 +251,8 @@ private: void makeSet(const ASTFunction * node, const Block & sample_block); - /// Adds a list of ALIAS and MATERIALIZED columns from the table. - void addAliasAndMaterializedColumns(); + /// Adds a list of ALIAS columns from the table. + void addAliasColumns(); /// Replacing scalar subqueries with constant values. void executeScalarSubqueries(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 7b6bdd2c64b..5a34bdd3027 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -46,9 +46,6 @@ #include #include -#include -#include - namespace ProfileEvents { @@ -72,7 +69,7 @@ namespace ErrorCodes InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, - const Names & required_column_names_, + const Names & required_result_column_names_, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, const BlockInputStreamPtr & input) @@ -84,7 +81,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( , input(input) , log(&Logger::get("InterpreterSelectQuery")) { - init(required_column_names_); + init(required_result_column_names_); } @@ -103,7 +100,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & qu InterpreterSelectQuery::~InterpreterSelectQuery() = default; -void InterpreterSelectQuery::init(const Names & required_column_names) +void InterpreterSelectQuery::init(const Names & required_result_column_names) { ProfileEvents::increment(ProfileEvents::SelectQuery); @@ -154,9 +151,7 @@ void InterpreterSelectQuery::init(const Names & required_column_names) table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); - /// Source header should contain only columns that can be substituted for asterisk. - /// Materialied and alias columns will be processed by ExpressionAnalyzer. - source_header = storage->getSampleBlockNonMaterialized(); + source_header = storage->getSampleBlock(); } } @@ -164,7 +159,7 @@ void InterpreterSelectQuery::init(const Names & required_column_names) throw Exception("There are no available columns", ErrorCodes::THERE_IS_NO_COLUMN); query_analyzer = std::make_unique( - query_ptr, context, storage, source_header.getNamesAndTypesList(), required_column_names, subquery_depth, !only_analyze); + query_ptr, context, storage, source_header.getNamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze); if (query.sample_size() && (input || !storage || !storage->supportsSampling())) throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); @@ -515,9 +510,6 @@ static void getLimitLengthAndOffset(ASTSelectQuery & query, size_t & length, siz QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline & pipeline) { - /// The subquery interpreter, if the subquery - std::unique_ptr interpreter_subquery; - /// List of columns to read to execute the query. Names required_columns = query_analyzer->getRequiredSourceColumns(); @@ -538,8 +530,6 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline } } - DUMP(alias_columns_required); - if (alias_columns_required) { /// We will create an expression to return all the requested columns, with the calculation of the required ALIAS columns. @@ -554,19 +544,16 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline required_columns_expr_list->children.emplace_back(std::make_shared(column)); } - DUMP(queryToString(required_columns_expr_list)); - alias_actions = ExpressionAnalyzer{required_columns_expr_list, context, storage, source_header.getNamesAndTypesList()}.getActions(true); - DUMP(alias_actions->dumpActions()); - /// The set of required columns could be added as a result of adding an action to calculate ALIAS. required_columns = alias_actions->getRequiredColumns(); } - - DUMP(required_columns); } + /// The subquery interpreter, if the subquery + std::unique_ptr interpreter_subquery; + auto query_table = query.table(); if (query_table && typeid_cast(query_table.get())) { @@ -725,7 +712,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR); /// Aliases in table declaration. - if (alias_actions) + if (from_stage == QueryProcessingStage::FetchColumns && alias_actions) { pipeline.transform([&](auto & stream) { diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index c2109cba211..7175d72a638 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -37,20 +37,16 @@ public: * for INSERT SELECT, a value 1 is passed instead of 0. * * input - * - if given - read not from the table specified in the query, but from ready source. + * - if given - read not from the table specified in the query, but from prepared source. * - * required_column_names - * - delete all columns except the specified ones from the query - it is used to delete unnecessary columns from subqueries. - * - * table_column_names - * - the list of available columns of the table. - * Used, for example, with reference to `input`. + * required_result_column_names + * - don't calculate all columns except the specified ones from the query - it is used to remove calculation of unnecessary columns from subqueries. */ InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, - const Names & required_column_names = Names{}, + const Names & required_result_column_names = Names{}, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, size_t subquery_depth_ = 0, const BlockInputStreamPtr & input = nullptr); @@ -111,7 +107,7 @@ private: const ASTPtr & query_ptr_, const Context & context_); - void init(const Names & required_column_names); + void init(const Names & required_result_column_names); void executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input); From d4937621d3394f7b3503b671dda7b892678040ff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Feb 2018 22:02:13 +0300 Subject: [PATCH 139/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 9fd63bb979d..8f7278ec3f5 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -60,8 +60,6 @@ #include #include -#include - namespace DB { @@ -197,8 +195,6 @@ void ExpressionAnalyzer::init() /// Common subexpression elimination. Rewrite rules. normalizeTree(); - DUMP(source_columns); - /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(); @@ -223,8 +219,6 @@ void ExpressionAnalyzer::init() /// Delete the unnecessary from `source_columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. collectUsedColumns(); - DUMP(source_columns); - /// external_tables, subqueries_for_sets for global subqueries. /// Replaces global subqueries with the generated names of temporary tables that will be sent to remote servers. initGlobalSubqueriesAndExternalTables(); From 7526d65cff2b056a8bd00415f05bc9c259e452c4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Feb 2018 22:38:59 +0300 Subject: [PATCH 140/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/InterpreterCreateQuery.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 0444b26671b..54187825310 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -27,8 +27,9 @@ #include #include #include -#include +#include #include +#include #include #include @@ -474,7 +475,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) Block as_select_sample; if (create.select && (!create.attach || !create.columns)) - as_select_sample = InterpreterSelectQuery::getSampleBlock(create.select->clone(), context); + as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), context); String as_database_name = create.as_database.empty() ? current_database : create.as_database; String as_table_name = create.as_table; From a3386b3ebaf9e93af1a37b3fdf48b54719608063 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Feb 2018 23:16:58 +0300 Subject: [PATCH 141/209] Better UNION ALL: development #1947 --- .../InterpreterSelectWithUnionQuery.cpp | 77 +++++++++++++++---- .../InterpreterSelectWithUnionQuery.h | 4 +- 2 files changed, 63 insertions(+), 18 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index fafafae4422..e7fcd1c62b0 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -22,7 +23,7 @@ namespace ErrorCodes InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, - const Names & required_column_names, + const Names & required_result_column_names, QueryProcessingStage::Enum to_stage_, size_t subquery_depth_) : query_ptr(query_ptr_), @@ -33,27 +34,70 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( const ASTSelectWithUnionQuery & ast = typeid_cast(*query_ptr); size_t num_selects = ast.list_of_selects->children.size(); + + if (!num_selects) + throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + + /// Check number of columns. + + size_t num_columns = 0; + for (const auto & select : ast.list_of_selects->children) + { + size_t current_num_columns = typeid_cast(*select).select_expression_list->children.size(); + + if (!current_num_columns) + throw Exception("Logical error: SELECT query has zero columns in SELECT clause", ErrorCodes::LOGICAL_ERROR); + + if (!num_columns) + num_columns = current_num_columns; + else if (num_columns != current_num_columns) + throw Exception("Different number of columns in UNION ALL elements.", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); + } + + /// Initialize interpreters for each SELECT query. + /// Note that we pass 'required_result_column_names' to first SELECT. + /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' of the first SELECT, + /// because names could be different. + nested_interpreters.reserve(num_selects); - if (!num_selects) - throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + std::vector positions_of_required_result_columns(required_result_column_names.size()); - for (const auto & select : ast.list_of_selects->children) - nested_interpreters.emplace_back(std::make_unique(select, context, required_column_names, to_stage, subquery_depth)); + { + const auto & first_select = static_cast(*ast.list_of_selects->children.at(0)); - init(); -} + for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num) + { + bool found = false; + for (size_t position_in_select = 0; position_in_select < num_columns; ++position_in_select) + { + if (first_select.select_expression_list->children.at(position_in_select)->getAliasOrColumnName() + == required_result_column_names[required_result_num]) + { + found = true; + positions_of_required_result_columns[required_result_num] = position_in_select; + break; + } + } + if (!found) + throw Exception("Logical error: cannot find result column " + backQuoteIfNeed(required_result_column_names[required_result_num]) + + " in first SELECT query in UNION ALL", ErrorCodes::LOGICAL_ERROR); + } + } + for (size_t query_num = 0; query_num < num_selects; ++query_num) + { + const auto & select = ast.list_of_selects->children.at(query_num); + Names current_required_result_column_names(required_result_column_names.size()); + for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num) + current_required_result_column_names[required_result_num] + = static_cast(*select).select_expression_list + ->children.at(positions_of_required_result_columns[required_result_num])->getAliasOrColumnName(); -InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default; + nested_interpreters.emplace_back(std::make_unique(select, context, current_required_result_column_names, to_stage, subquery_depth)); + } - -void InterpreterSelectWithUnionQuery::init() -{ - size_t num_selects = nested_interpreters.size(); - - if (!num_selects) - throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + /// Determine structure of result. if (num_selects == 1) { @@ -112,6 +156,9 @@ void InterpreterSelectWithUnionQuery::init() } +InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default; + + Block InterpreterSelectWithUnionQuery::getSampleBlock() { return result_header; diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h index ec1116e082a..732b806903b 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -19,7 +19,7 @@ public: InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, - const Names & required_column_names = Names{}, + const Names & required_result_column_names = Names{}, QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, size_t subquery_depth_ = 0); @@ -47,8 +47,6 @@ private: std::vector> nested_interpreters; Block result_header; - - void init(); }; } From 492053bc436c84751a4fc3b7e9574562e4f1c1d7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Feb 2018 23:43:42 +0300 Subject: [PATCH 142/209] Better UNION ALL: development #1947 --- .../InterpreterSelectWithUnionQuery.cpp | 67 +++++++------------ 1 file changed, 26 insertions(+), 41 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index e7fcd1c62b0..2b632e4d8a0 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -38,63 +38,48 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( if (!num_selects) throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); - /// Check number of columns. - - size_t num_columns = 0; - for (const auto & select : ast.list_of_selects->children) - { - size_t current_num_columns = typeid_cast(*select).select_expression_list->children.size(); - - if (!current_num_columns) - throw Exception("Logical error: SELECT query has zero columns in SELECT clause", ErrorCodes::LOGICAL_ERROR); - - if (!num_columns) - num_columns = current_num_columns; - else if (num_columns != current_num_columns) - throw Exception("Different number of columns in UNION ALL elements.", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); - } - /// Initialize interpreters for each SELECT query. /// Note that we pass 'required_result_column_names' to first SELECT. - /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' of the first SELECT, + /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT, /// because names could be different. nested_interpreters.reserve(num_selects); - std::vector positions_of_required_result_columns(required_result_column_names.size()); - + std::vector required_result_column_names_for_other_selects(num_selects); + if (!required_result_column_names.empty()) { - const auto & first_select = static_cast(*ast.list_of_selects->children.at(0)); + /// Result header if there are no filtering by 'required_result_column_names'. + /// We use it to determine positions of 'required_result_column_names' in SELECT clause. + Block full_result_header = InterpreterSelectQuery( + ast.list_of_selects->children.at(0), context, Names(), to_stage, subquery_depth).getSampleBlock(); + + std::vector positions_of_required_result_columns(required_result_column_names.size()); for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num) + positions_of_required_result_columns[required_result_num] = full_result_header.getPositionByName(required_result_column_names[required_result_num]); + + for (size_t query_num = 1; query_num < num_selects; ++query_num) { - bool found = false; - for (size_t position_in_select = 0; position_in_select < num_columns; ++position_in_select) - { - if (first_select.select_expression_list->children.at(position_in_select)->getAliasOrColumnName() - == required_result_column_names[required_result_num]) - { - found = true; - positions_of_required_result_columns[required_result_num] = position_in_select; - break; - } - } - if (!found) - throw Exception("Logical error: cannot find result column " + backQuoteIfNeed(required_result_column_names[required_result_num]) - + " in first SELECT query in UNION ALL", ErrorCodes::LOGICAL_ERROR); + Block full_result_header_for_current_select = InterpreterSelectQuery( + ast.list_of_selects->children.at(query_num), context, Names(), to_stage, subquery_depth).getSampleBlock(); + + if (full_result_header_for_current_select.columns() != full_result_header.columns()) + throw Exception("Different number of columns in UNION ALL elements", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); + + required_result_column_names_for_other_selects[query_num].reserve(required_result_column_names.size()); + for (const auto & pos : positions_of_required_result_columns) + required_result_column_names_for_other_selects[query_num].push_back(full_result_header_for_current_select.getByPosition(pos).name); } } for (size_t query_num = 0; query_num < num_selects; ++query_num) { - const auto & select = ast.list_of_selects->children.at(query_num); - Names current_required_result_column_names(required_result_column_names.size()); - for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num) - current_required_result_column_names[required_result_num] - = static_cast(*select).select_expression_list - ->children.at(positions_of_required_result_columns[required_result_num])->getAliasOrColumnName(); + const Names & current_required_result_column_names = query_num == 0 + ? required_result_column_names + : required_result_column_names_for_other_selects[query_num]; - nested_interpreters.emplace_back(std::make_unique(select, context, current_required_result_column_names, to_stage, subquery_depth)); + nested_interpreters.emplace_back(std::make_unique( + ast.list_of_selects->children.at(query_num), context, current_required_result_column_names, to_stage, subquery_depth)); } /// Determine structure of result. From 17ffe77b8cad78464905af7d429509743c902705 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 00:03:28 +0300 Subject: [PATCH 143/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 48 ++++++++------------ dbms/src/Interpreters/ExpressionAnalyzer.h | 9 +--- 2 files changed, 20 insertions(+), 37 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 8f7278ec3f5..08aa376d287 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -165,17 +165,28 @@ ExpressionAnalyzer::ExpressionAnalyzer( : ast(ast_), context(context_), settings(context.getSettings()), subquery_depth(subquery_depth_), source_columns(source_columns_), required_result_columns(required_result_columns_.begin(), required_result_columns_.end()), - storage(storage_ ? storage_ : getTable()), + storage(storage_), do_global(do_global_), subqueries_for_sets(subqueries_for_set_) -{ - init(); -} - - -void ExpressionAnalyzer::init() { select_query = typeid_cast(ast.get()); + if (!storage && select_query) + { + auto select_database = select_query->database(); + auto select_table = select_query->table(); + + if (select_table + && !typeid_cast(select_table.get()) + && !typeid_cast(select_table.get())) + { + String database = select_database + ? typeid_cast(*select_database).name + : ""; + const String & table = typeid_cast(*select_table).name; + storage = context.tryGetTable(database, table); + } + } + removeDuplicateColumns(source_columns); addAliasColumns(); @@ -909,29 +920,6 @@ void ExpressionAnalyzer::addASTAliases(ASTPtr & ast, int ignore_levels) } -StoragePtr ExpressionAnalyzer::getTable() -{ - if (const ASTSelectQuery * select = typeid_cast(ast.get())) - { - auto select_database = select->database(); - auto select_table = select->table(); - - if (select_table - && !typeid_cast(select_table.get()) - && !typeid_cast(select_table.get())) - { - String database = select_database - ? typeid_cast(*select_database).name - : ""; - const String & table = typeid_cast(*select_table).name; - return context.tryGetTable(database, table); - } - } - - return StoragePtr(); -} - - void ExpressionAnalyzer::normalizeTree() { SetOfASTs tmp_set; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 2461a431d35..a178f320b11 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -161,8 +161,8 @@ private: /// Columns after ARRAY JOIN, JOIN, and/or aggregation. NamesAndTypesList aggregated_columns; - /// The table from which the query is made. - const StoragePtr storage; + /// The main table in FROM clause, if exists. + StoragePtr storage; bool has_aggregation = false; NamesAndTypesList aggregation_keys; @@ -211,8 +211,6 @@ private: Tables external_tables; size_t external_table_id = 1; - void init(); - static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols); NamesAndTypesList::iterator findColumn(const String & name) { return findColumn(name, source_columns); } @@ -307,9 +305,6 @@ private: const NameSet & available_columns, NameSet & required_source_columns, NameSet & ignored_names, const NameSet & available_joined_columns, NameSet & required_joined_columns); - /// Get the table from which the query is made - StoragePtr getTable(); - /// columns - the columns that are present before the transformations begin. void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const; From c35727c7edc2b122e7883e9d3caf8f94b4eefff0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 03:07:39 +0300 Subject: [PATCH 144/209] Miscellaneous #1947 --- dbms/src/Storages/ColumnDefault.h | 18 ------------------ dbms/src/Storages/ITableDeclaration.h | 6 +++--- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/dbms/src/Storages/ColumnDefault.h b/dbms/src/Storages/ColumnDefault.h index b7094ab3d64..33f17952d08 100644 --- a/dbms/src/Storages/ColumnDefault.h +++ b/dbms/src/Storages/ColumnDefault.h @@ -16,24 +16,6 @@ enum class ColumnDefaultType Alias }; -} - - -namespace std -{ - template <> struct hash - { - size_t operator()(const DB::ColumnDefaultType type) const - { - return hash{}(static_cast(type)); - } - }; -} - - -namespace DB -{ - ColumnDefaultType columnDefaultTypeFromString(const std::string & str); std::string toString(const ColumnDefaultType type); diff --git a/dbms/src/Storages/ITableDeclaration.h b/dbms/src/Storages/ITableDeclaration.h index 335ca7ace3d..841ebc6c753 100644 --- a/dbms/src/Storages/ITableDeclaration.h +++ b/dbms/src/Storages/ITableDeclaration.h @@ -91,9 +91,9 @@ public: const ColumnDefaults & column_defaults); NamesAndTypesList columns; - NamesAndTypesList materialized_columns{}; - NamesAndTypesList alias_columns{}; - ColumnDefaults column_defaults{}; + NamesAndTypesList materialized_columns; + NamesAndTypesList alias_columns; + ColumnDefaults column_defaults; private: virtual const NamesAndTypesList & getColumnsListImpl() const From 9ea0a603a0c2b5cf89e92daa7a257c470509b4c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 04:29:55 +0300 Subject: [PATCH 145/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 23 +++--- dbms/src/Interpreters/ExpressionAnalyzer.h | 5 +- .../Interpreters/InterpreterSelectQuery.cpp | 81 ++++++++----------- .../src/Interpreters/InterpreterSelectQuery.h | 11 ++- 4 files changed, 54 insertions(+), 66 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 08aa376d287..d77909bbcd6 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -60,6 +60,8 @@ #include #include +#include + namespace DB { @@ -187,7 +189,12 @@ ExpressionAnalyzer::ExpressionAnalyzer( } } - removeDuplicateColumns(source_columns); + if (storage && source_columns.empty()) + source_columns = storage->getSampleBlock().getNamesAndTypesList(); + else + removeDuplicateColumns(source_columns); + + DUMP(source_columns); addAliasColumns(); @@ -2686,7 +2693,7 @@ void ExpressionAnalyzer::collectUsedColumns() if (required.empty()) required.insert(ExpressionActions::getSmallestColumn(source_columns)); - unknown_required_source_columns = required; + NameSet unknown_required_source_columns = required; for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();) { @@ -2713,6 +2720,9 @@ void ExpressionAnalyzer::collectUsedColumns() ++it; } } + + if (!unknown_required_source_columns.empty()) + throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER); } void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAndTypesList & joined_columns_name_type) @@ -2775,14 +2785,7 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd Names ExpressionAnalyzer::getRequiredSourceColumns() const { - if (!unknown_required_source_columns.empty()) - throw Exception("Unknown identifier: " + *unknown_required_source_columns.begin(), ErrorCodes::UNKNOWN_IDENTIFIER); - - Names res; - for (const auto & column_name_type : source_columns) - res.push_back(column_name_type.name); - - return res; + return source_columns.getNames(); } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index a178f320b11..036a94af6ee 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -69,7 +69,7 @@ public: const ASTPtr & ast_, const Context & context_, const StoragePtr & storage_, - const NamesAndTypesList & source_columns_, + const NamesAndTypesList & source_columns_ = {}, const Names & required_result_columns_ = {}, size_t subquery_depth_ = 0, bool do_global_ = false, @@ -146,9 +146,6 @@ private: Settings settings; size_t subquery_depth; - /// Columns that are mentioned in the expression, but were not specified in the constructor. - NameSet unknown_required_source_columns; - /** Original columns. * First, all available columns of the table are placed here. Then (when analyzing the query), unused columns are deleted. */ diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 5a34bdd3027..057eee19c8c 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -113,53 +112,44 @@ void InterpreterSelectQuery::init(const Names & required_result_column_names) max_streams = settings.max_threads; - /// Read from prepared input. + const auto & table_expression = query.table(); + NamesAndTypesList source_columns; + if (input) { - source_header = input->getHeader(); + /// Read from prepared input. + source_columns = input->getHeader().getNamesAndTypesList(); + } + else if (table_expression && typeid_cast(table_expression.get())) + { + /// Read from subquery. + source_columns = InterpreterSelectWithUnionQuery::getSampleBlock(table_expression, context).getNamesAndTypesList(); + } + else if (table_expression && typeid_cast(table_expression.get())) + { + /// Read from table function. + + TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get( + typeid_cast(table_expression.get())->name, context); + /// Run it and remember the result + storage = table_function_ptr->execute(table_expression, context); } else { - auto table_expression = query.table(); + /// Read from table. Even without table expression (implicit SELECT ... FROM system.one). + String database_name; + String table_name; - /// Read from subquery. - if (table_expression && typeid_cast(table_expression.get())) - { - source_header = InterpreterSelectWithUnionQuery::getSampleBlock(table_expression, context); - } - else - { - /// Read from table function. - if (table_expression && typeid_cast(table_expression.get())) - { - /// Get the table function - TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get( - typeid_cast(table_expression.get())->name, context); - /// Run it and remember the result - storage = table_function_ptr->execute(table_expression, context); - } - else - { - /// Read from table. - String database_name; - String table_name; + getDatabaseAndTableNames(database_name, table_name); - getDatabaseAndTableNames(database_name, table_name); - - storage = context.getTable(database_name, table_name); - } - - table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); - - source_header = storage->getSampleBlock(); - } + storage = context.getTable(database_name, table_name); } - if (!source_header) - throw Exception("There are no available columns", ErrorCodes::THERE_IS_NO_COLUMN); + if (storage) + table_lock = storage->lockStructure(false, __PRETTY_FUNCTION__); query_analyzer = std::make_unique( - query_ptr, context, storage, source_header.getNamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze); + query_ptr, context, storage, source_columns, required_result_column_names, subquery_depth, !only_analyze); if (query.sample_size() && (input || !storage || !storage->supportsSampling())) throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); @@ -208,7 +198,7 @@ void InterpreterSelectQuery::getDatabaseAndTableNames(String & database_name, St Block InterpreterSelectQuery::getSampleBlock() { Pipeline pipeline; - executeImpl(pipeline, std::make_shared(source_header)); + executeImpl(pipeline, input, true); auto res = pipeline.firstStream()->getHeader(); return res; } @@ -223,7 +213,7 @@ Block InterpreterSelectQuery::getSampleBlock(const ASTPtr & query_ptr_, const Co BlockIO InterpreterSelectQuery::execute() { Pipeline pipeline; - executeImpl(pipeline, input); + executeImpl(pipeline, input, false); executeUnion(pipeline); BlockIO res; @@ -234,7 +224,7 @@ BlockIO InterpreterSelectQuery::execute() BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams() { Pipeline pipeline; - executeImpl(pipeline, input); + executeImpl(pipeline, input, false); return pipeline.streams; } @@ -319,7 +309,7 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression } -void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input) +void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input, bool dry_run) { if (input) pipeline.streams.push_back(input); @@ -335,7 +325,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt */ /** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */ - QueryProcessingStage::Enum from_stage = executeFetchColumns(pipeline); + QueryProcessingStage::Enum from_stage = executeFetchColumns(pipeline, dry_run); LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(to_stage)); @@ -508,7 +498,7 @@ static void getLimitLengthAndOffset(ASTSelectQuery & query, size_t & length, siz } } -QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline & pipeline) +QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline & pipeline, bool dry_run) { /// List of columns to read to execute the query. Names required_columns = query_analyzer->getRequiredSourceColumns(); @@ -544,7 +534,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline required_columns_expr_list->children.emplace_back(std::make_shared(column)); } - alias_actions = ExpressionAnalyzer{required_columns_expr_list, context, storage, source_header.getNamesAndTypesList()}.getActions(true); + alias_actions = ExpressionAnalyzer(required_columns_expr_list, context, storage).getActions(true); /// The set of required columns could be added as a result of adding an action to calculate ALIAS. required_columns = alias_actions->getRequiredColumns(); @@ -668,8 +658,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline optimize_prewhere(*merge_tree); } - /// If there was no already prepared input. - if (pipeline.streams.empty()) + if (!dry_run) pipeline.streams = storage->read(required_columns, query_info, context, from_stage, max_block_size, max_streams); if (pipeline.streams.empty()) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 7175d72a638..fb84178ff5b 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -33,7 +33,7 @@ public: * You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing. * * subquery_depth - * - to control the restrictions on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed; + * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed; * for INSERT SELECT, a value 1 is passed instead of 0. * * input @@ -109,7 +109,7 @@ private: void init(const Names & required_result_column_names); - void executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input); + void executeImpl(Pipeline & pipeline, const BlockInputStreamPtr & input, bool dry_run); struct AnalysisResult @@ -147,10 +147,10 @@ private: /// Different stages of query execution. - /// Fetch data from the table. Returns the stage to which the query was processed in Storage. - QueryProcessingStage::Enum executeFetchColumns(Pipeline & pipeline); + void executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input, bool dry_run); - void executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input); + /// Fetch data from the table. Returns the stage to which the query was processed in Storage. + QueryProcessingStage::Enum executeFetchColumns(Pipeline & pipeline, bool dry_run); void executeWhere(Pipeline & pipeline, const ExpressionActionsPtr & expression); void executeAggregation(Pipeline & pipeline, const ExpressionActionsPtr & expression, bool overflow_row, bool final); @@ -182,7 +182,6 @@ private: QueryProcessingStage::Enum to_stage; size_t subquery_depth; std::unique_ptr query_analyzer; - Block source_header; /// How many streams we ask for storage to produce, and in how many threads we will do further processing. size_t max_streams = 1; From 73e208ff97f2d56641688ddbed9b1bb2cf73e0b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 05:32:34 +0300 Subject: [PATCH 146/209] Better UNION ALL: development #1947 --- .../Interpreters/InterpreterSelectQuery.cpp | 29 ++++++++++++------- .../src/Interpreters/InterpreterSelectQuery.h | 1 + 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 057eee19c8c..67cc5004970 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -434,20 +434,10 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt executeOrder(pipeline); } - /// At this stage, we can calculate the minimums and maximums, if necessary. - if (settings.extremes) - { - pipeline.transform([&](auto & stream) - { - if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) - p_stream->enableExtremes(); - }); - } - /** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT, * limiting the number of rows in each up to `offset + limit`. */ - if (query.limit_length && pipeline.hasMoreThanOneStream() && !query.distinct && !query.limit_by_expression_list) + if (query.limit_length && pipeline.hasMoreThanOneStream() && !query.distinct && !query.limit_by_expression_list && !settings.extremes) { executePreLimit(pipeline); } @@ -470,13 +460,17 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (need_second_distinct_pass) executeDistinct(pipeline, false, Names()); + /** We must do projection after DISTINCT because projection may remove some columns. + */ executeProjection(pipeline, expressions.final_projection); + executeExtremes(pipeline); executeLimitBy(pipeline); executeLimit(pipeline); } else { executeProjection(pipeline, expressions.final_projection); + executeExtremes(pipeline); } } } @@ -1111,6 +1105,19 @@ void InterpreterSelectQuery::executeLimit(Pipeline & pipeline) } +void InterpreterSelectQuery::executeExtremes(Pipeline & pipeline) +{ + if (!context.getSettingsRef().extremes) + return; + + pipeline.transform([&](auto & stream) + { + if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) + p_stream->enableExtremes(); + }); +} + + void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline, SubqueriesForSets & subqueries_for_sets) { const Settings & settings = context.getSettingsRef(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index fb84178ff5b..93dcce3645d 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -166,6 +166,7 @@ private: void executeLimit(Pipeline & pipeline); void executeProjection(Pipeline & pipeline, const ExpressionActionsPtr & expression); void executeDistinct(Pipeline & pipeline, bool before_order, Names columns); + void executeExtremes(Pipeline & pipeline); void executeSubqueriesInSetsAndJoins(Pipeline & pipeline, std::unordered_map & subqueries_for_sets); /** If there is a SETTINGS section in the SELECT query, then apply settings from it. From ca879f368bf90e4711fbc59876449f714cf4e9d2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 05:33:04 +0300 Subject: [PATCH 147/209] Added setting "union_all_sequential" #1947 --- .../src/Interpreters/InterpreterSelectWithUnionQuery.cpp | 9 ++++++++- dbms/src/Interpreters/Settings.h | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 2b632e4d8a0..9c61f83212e 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -178,6 +179,8 @@ BlockInputStreams InterpreterSelectWithUnionQuery::executeWithMultipleStreams() BlockIO InterpreterSelectWithUnionQuery::execute() { + const Settings & settings = context.getSettingsRef(); + BlockInputStreams nested_streams = executeWithMultipleStreams(); BlockInputStreamPtr result_stream; @@ -192,7 +195,11 @@ BlockIO InterpreterSelectWithUnionQuery::execute() } else { - result_stream = std::make_shared>(nested_streams, nullptr, context.getSettingsRef().max_threads); + if (settings.union_all_sequential) + result_stream = std::make_shared(nested_streams); + else + result_stream = std::make_shared>(nested_streams, nullptr, settings.max_threads); + nested_streams.clear(); } diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 0a725509186..a223c71e425 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -180,7 +180,9 @@ struct Settings M(SettingBool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown") \ M(SettingBool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.") \ \ - M(SettingBool, empty_result_for_aggregation_by_empty_set, false, "Return empty result when aggregating without keys on empty set.") + M(SettingBool, empty_result_for_aggregation_by_empty_set, false, "Return empty result when aggregating without keys on empty set.") \ + \ + M(SettingBool, union_all_sequential, false, "Execute SELECTs in UNION ALL sequentially, one after the other. Each SELECT can still be processed in multiple threads.") \ /// Possible limits for query execution. From c4fdfbf287a1f3f2ea59ad354d05d79693621df8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 07:14:18 +0300 Subject: [PATCH 148/209] Fixed error #1947 --- dbms/src/DataTypes/FieldToDataType.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataTypes/FieldToDataType.cpp b/dbms/src/DataTypes/FieldToDataType.cpp index 9feecc71914..715ee30baaf 100644 --- a/dbms/src/DataTypes/FieldToDataType.cpp +++ b/dbms/src/DataTypes/FieldToDataType.cpp @@ -64,7 +64,13 @@ DataTypePtr FieldToDataType::operator() (Array & x) const DataTypePtr res = getLeastSupertype(element_types); for (Field & elem : x) - elem = convertFieldToType(elem, *res); + { + Field converted = convertFieldToType(elem, *res); + + /// Otherwise elem must be NaN (convertFieldToType cannot convert NaN and returns NULL instead). + if (!converted.isNull()) + elem = converted; + } return std::make_shared(res); } From f4dbca54a1f89401b0477fc60c8f3042ffc8a93b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 07:30:27 +0300 Subject: [PATCH 149/209] Fixed bad code #1947 --- .../Analyzers/TypeAndConstantInference.cpp | 5 +-- dbms/src/DataTypes/FieldToDataType.cpp | 35 +++++++------------ dbms/src/DataTypes/FieldToDataType.h | 19 +++++----- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 +- dbms/src/Interpreters/convertFieldToType.cpp | 6 ++++ 5 files changed, 33 insertions(+), 35 deletions(-) diff --git a/dbms/src/Analyzers/TypeAndConstantInference.cpp b/dbms/src/Analyzers/TypeAndConstantInference.cpp index 657e2d16e06..bc62e0b2c29 100644 --- a/dbms/src/Analyzers/TypeAndConstantInference.cpp +++ b/dbms/src/Analyzers/TypeAndConstantInference.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -72,8 +73,8 @@ void processLiteral(const String & column_name, const ASTPtr & ast, TypeAndConst TypeAndConstantInference::ExpressionInfo expression_info; expression_info.node = ast; expression_info.is_constant_expression = true; - expression_info.value = literal->value; - expression_info.data_type = applyVisitor(FieldToDataType(), expression_info.value); + expression_info.data_type = applyVisitor(FieldToDataType(), literal->value); + expression_info.value = convertFieldToType(literal->value, *expression_info.data_type); info.emplace(column_name, std::move(expression_info)); } diff --git a/dbms/src/DataTypes/FieldToDataType.cpp b/dbms/src/DataTypes/FieldToDataType.cpp index 715ee30baaf..1b4fbd53c6b 100644 --- a/dbms/src/DataTypes/FieldToDataType.cpp +++ b/dbms/src/DataTypes/FieldToDataType.cpp @@ -21,12 +21,12 @@ namespace ErrorCodes } -DataTypePtr FieldToDataType::operator() (Null &) const +DataTypePtr FieldToDataType::operator() (const Null &) const { return std::make_shared(std::make_shared()); } -DataTypePtr FieldToDataType::operator() (UInt64 & x) const +DataTypePtr FieldToDataType::operator() (const UInt64 & x) const { if (x <= std::numeric_limits::max()) return std::make_shared(); if (x <= std::numeric_limits::max()) return std::make_shared(); @@ -34,7 +34,7 @@ DataTypePtr FieldToDataType::operator() (UInt64 & x) const return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (Int64 & x) const +DataTypePtr FieldToDataType::operator() (const Int64 & x) const { if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); @@ -42,51 +42,40 @@ DataTypePtr FieldToDataType::operator() (Int64 & x) const return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (Float64 &) const +DataTypePtr FieldToDataType::operator() (const Float64 &) const { return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (String &) const +DataTypePtr FieldToDataType::operator() (const String &) const { return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (Array & x) const +DataTypePtr FieldToDataType::operator() (const Array & x) const { DataTypes element_types; element_types.reserve(x.size()); - for (Field & elem : x) + for (const Field & elem : x) element_types.emplace_back(applyVisitor(FieldToDataType(), elem)); - DataTypePtr res = getLeastSupertype(element_types); - - for (Field & elem : x) - { - Field converted = convertFieldToType(elem, *res); - - /// Otherwise elem must be NaN (convertFieldToType cannot convert NaN and returns NULL instead). - if (!converted.isNull()) - elem = converted; - } - - return std::make_shared(res); + return std::make_shared(getLeastSupertype(element_types)); } -DataTypePtr FieldToDataType::operator() (Tuple & x) const +DataTypePtr FieldToDataType::operator() (const Tuple & x) const { - auto & tuple = static_cast(x); + auto & tuple = static_cast(x); if (tuple.empty()) throw Exception("Cannot infer type of an empty tuple", ErrorCodes::EMPTY_DATA_PASSED); DataTypes element_types; element_types.reserve(ext::size(tuple)); - for (auto & element : tuple) - element_types.push_back(applyVisitor(FieldToDataType{}, element)); + for (const auto & element : tuple) + element_types.push_back(applyVisitor(FieldToDataType(), element)); return std::make_shared(element_types); } diff --git a/dbms/src/DataTypes/FieldToDataType.h b/dbms/src/DataTypes/FieldToDataType.h index addb2be2d09..c6256a6f04b 100644 --- a/dbms/src/DataTypes/FieldToDataType.h +++ b/dbms/src/DataTypes/FieldToDataType.h @@ -10,19 +10,20 @@ class IDataType; using DataTypePtr = std::shared_ptr; -/** For a given value, Field returns the minimum data type that allows this value to be stored. - * In case Field is an array, converts all elements to a common type. +/** For a given Field returns the minimum data type that allows this value to be stored. + * Note that you still have to convert Field to corresponding data type before inserting to columns + * (for example, this is necessary to convert elements of Array to common type). */ class FieldToDataType : public StaticVisitor { public: - DataTypePtr operator() (Null & x) const; - DataTypePtr operator() (UInt64 & x) const; - DataTypePtr operator() (Int64 & x) const; - DataTypePtr operator() (Float64 & x) const; - DataTypePtr operator() (String & x) const; - DataTypePtr operator() (Array & x) const; - DataTypePtr operator() (Tuple & x) const; + DataTypePtr operator() (const Null & x) const; + DataTypePtr operator() (const UInt64 & x) const; + DataTypePtr operator() (const Int64 & x) const; + DataTypePtr operator() (const Float64 & x) const; + DataTypePtr operator() (const String & x) const; + DataTypePtr operator() (const Array & x) const; + DataTypePtr operator() (const Tuple & x) const; }; } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index d77909bbcd6..8efbf318864 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -2173,7 +2174,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, DataTypePtr type = applyVisitor(FieldToDataType(), node->value); ColumnWithTypeAndName column; - column.column = type->createColumnConst(1, node->value); + column.column = type->createColumnConst(1, convertFieldToType(node->value, *type)); column.type = type; column.name = node->getColumnName(); diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 7110c418011..361351b659b 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -16,8 +16,10 @@ #include #include #include +#include #include + namespace DB { @@ -45,6 +47,10 @@ static Field convertNumericTypeImpl(const Field & from) { From value = from.get(); + /// Note that NaNs doesn't compare equal to anything, but they are still in range of any Float type. + if (isNaN(value) && std::is_floating_point_v) + return value; + if (!accurate::equalsOp(value, To(value))) return {}; From 672fc3a4410283ed688f25d1e22dcd5d23a26177 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 07:49:39 +0300 Subject: [PATCH 150/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 8efbf318864..3fb8cbbd124 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -61,8 +61,6 @@ #include #include -#include - namespace DB { @@ -195,8 +193,6 @@ ExpressionAnalyzer::ExpressionAnalyzer( else removeDuplicateColumns(source_columns); - DUMP(source_columns); - addAliasColumns(); translateQualifiedNames(); From 7fc6e0f16ec209f151491ddf2e5697abd8f93060 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 07:51:09 +0300 Subject: [PATCH 151/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/InterpreterSelectQuery.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 93dcce3645d..78651d9cb77 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -147,6 +147,7 @@ private: /// Different stages of query execution. + /// dry_run - don't read from table, use empty header block instead. void executeWithMultipleStreamsImpl(Pipeline & pipeline, const BlockInputStreamPtr & input, bool dry_run); /// Fetch data from the table. Returns the stage to which the query was processed in Storage. From 8b2ca20da281b4399e8c06d004847ac48b200d74 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 07:55:43 +0300 Subject: [PATCH 152/209] Better UNION ALL: development #1947 --- .../Interpreters/InterpreterDescribeQuery.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp index a01f4e327e8..65b0fea25f1 100644 --- a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp @@ -1,20 +1,18 @@ #include -#include #include #include #include -#include #include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include #include +#include #include +#include namespace DB @@ -62,7 +60,9 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() auto table_expression = typeid_cast(ast.table_expression.get()); if (table_expression->subquery) - columns = InterpreterSelectQuery::getSampleBlock(table_expression->subquery->children[0], context).getNamesAndTypesList(); + { + columns = InterpreterSelectWithUnionQuery::getSampleBlock(table_expression->subquery->children[0], context).getNamesAndTypesList(); + } else { if (table_expression->table_function) From 9c889af882c9dbb14dbbe2be4374a3ad42b2b6ed Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 28 Feb 2018 16:23:40 +0300 Subject: [PATCH 153/209] Add allow_distributed_ddl setting. [#CLICKHOUSE-3611] Clearer exception message. Fixed Int64 settings parsing. --- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Interpreters/DDLWorker.cpp | 36 +++++++++++++++++++------- dbms/src/Interpreters/Settings.h | 5 ++-- dbms/src/Interpreters/SettingsCommon.h | 3 ++- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 632c754a8d1..40a630c566a 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -368,6 +368,7 @@ namespace ErrorCodes extern const int INSERT_WAS_DEDUPLICATED = 389; extern const int CANNOT_GET_CREATE_TABLE_QUERY = 390; extern const int EXTERNAL_LIBRARY_ERROR = 391; + extern const int QUERY_IS_PROHIBITED = 392; extern const int KEEPER_EXCEPTION = 999; diff --git a/dbms/src/Interpreters/DDLWorker.cpp b/dbms/src/Interpreters/DDLWorker.cpp index eea11fbb3ff..3ae9a1d5a06 100644 --- a/dbms/src/Interpreters/DDLWorker.cpp +++ b/dbms/src/Interpreters/DDLWorker.cpp @@ -54,6 +54,7 @@ namespace ErrorCodes extern const int UNKNOWN_TYPE_OF_QUERY; extern const int UNFINISHED; extern const int UNKNOWN_STATUS_OF_DISTRIBUTED_DDL_TASK; + extern const int QUERY_IS_PROHIBITED; } @@ -980,21 +981,32 @@ public: if (is_cancelled) return res; - auto elapsed_seconds = watch.elapsedSeconds(); - if (timeout_seconds >= 0 && elapsed_seconds > timeout_seconds) + if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds) { - throw Exception("Watching task " + node_path + " is executing too long (" + toString(std::round(elapsed_seconds)) + " sec.)", - ErrorCodes::TIMEOUT_EXCEEDED); + size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished; + size_t num_active_hosts = current_active_hosts.size(); + + std::stringstream msg; + msg << "Watching task " << node_path << " is executing longer than distributed_ddl_task_timeout" + << " (=" << timeout_seconds << ") seconds." + << " There are " << num_unfinished_hosts << " unfinished hosts" + << " (" << num_active_hosts << " of them are currently active)" + << ", they are going to execute the query in background"; + + throw Exception(msg.str(), ErrorCodes::TIMEOUT_EXCEEDED); } if (num_hosts_finished != 0 || try_number != 0) - std::this_thread::sleep_for(std::chrono::milliseconds(50 * std::min(static_cast(20), try_number + 1))); + { + auto current_sleep_for = std::chrono::milliseconds(std::min(static_cast(1000), 50 * (try_number + 1))); + std::this_thread::sleep_for(current_sleep_for); + } /// TODO: add shared lock if (!zookeeper->exists(node_path)) { throw Exception("Cannot provide query execution status. The query's node " + node_path - + " had been deleted by the cleaner since it was finished (or its lifetime is expired)", + + " has been deleted by the cleaner since it was finished (or its lifetime is expired)", ErrorCodes::UNFINISHED); } @@ -1003,7 +1015,7 @@ public: if (new_hosts.empty()) continue; - Strings cur_active_hosts = getChildrenAllowNoNode(zookeeper, node_path + "/active"); + current_active_hosts = getChildrenAllowNoNode(zookeeper, node_path + "/active"); MutableColumns columns = sample.cloneEmptyColumns(); for (const String & host_id : new_hosts) @@ -1019,12 +1031,14 @@ public: UInt16 port; Cluster::Address::fromString(host_id, host, port); + ++num_hosts_finished; + columns[0]->insert(host); columns[1]->insert(static_cast(port)); columns[2]->insert(static_cast(status.code)); columns[3]->insert(status.message); - columns[4]->insert(static_cast(waiting_hosts.size() - (++num_hosts_finished))); - columns[5]->insert(static_cast(cur_active_hosts.size())); + columns[4]->insert(static_cast(waiting_hosts.size() - num_hosts_finished)); + columns[5]->insert(static_cast(current_active_hosts.size())); } res = sample.cloneWithColumns(std::move(columns)); } @@ -1086,6 +1100,7 @@ private: NameSet waiting_hosts; /// hosts from task host list NameSet finished_hosts; /// finished hosts from host list NameSet ignoring_hosts; /// appeared hosts that are not in hosts list + Strings current_active_hosts; /// Hosts that were in active state at the last check size_t num_hosts_finished = 0; Int64 timeout_seconds = 120; @@ -1104,6 +1119,9 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont throw Exception("Distributed execution is not supported for such DDL queries", ErrorCodes::NOT_IMPLEMENTED); } + if (!context.getSettingsRef().allow_distributed_ddl) + throw Exception("Distributed DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); + if (auto query_alter = dynamic_cast(query_ptr.get())) { for (const auto & param : query_alter->parameters) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 0a725509186..26a40def4fa 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -170,7 +170,7 @@ struct Settings \ M(SettingBool, insert_distributed_sync, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.") \ M(SettingUInt64, insert_distributed_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.") \ - M(SettingInt64, distributed_ddl_task_timeout, 120, "Timeout for DDL query responses from all hosts in cluster. Negative value means infinite.") \ + M(SettingInt64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. Negative value means infinite.") \ M(SettingMilliseconds, stream_flush_interval_ms, DEFAULT_QUERY_LOG_FLUSH_INTERVAL_MILLISECONDS, "Timeout for flushing data from streaming storages.") \ M(SettingString, format_schema, "", "Schema identifier (used by schema-based formats)") \ M(SettingBool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.") \ @@ -180,7 +180,8 @@ struct Settings M(SettingBool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown") \ M(SettingBool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.") \ \ - M(SettingBool, empty_result_for_aggregation_by_empty_set, false, "Return empty result when aggregating without keys on empty set.") + M(SettingBool, empty_result_for_aggregation_by_empty_set, false, "Return empty result when aggregating without keys on empty set.") \ + M(SettingBool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.") /// Possible limits for query execution. diff --git a/dbms/src/Interpreters/SettingsCommon.h b/dbms/src/Interpreters/SettingsCommon.h index 59c1c0dac28..68838b43b07 100644 --- a/dbms/src/Interpreters/SettingsCommon.h +++ b/dbms/src/Interpreters/SettingsCommon.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -57,7 +58,7 @@ struct SettingInt void set(const Field & x) { - set(safeGet(x)); + set(applyVisitor(FieldVisitorConvertToNumber(), x)); } void set(const String & x) From 72bccacba5b7abf50973a92ad9422ed8ff49986d Mon Sep 17 00:00:00 2001 From: Vitaliy Lyudvichenko Date: Wed, 28 Feb 2018 16:25:13 +0300 Subject: [PATCH 154/209] Skip temporary editors' config files. [#CLICKHOUSE-3] Resolves #1190 --- dbms/src/Common/ConfigProcessor/ConfigProcessor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/ConfigProcessor/ConfigProcessor.cpp b/dbms/src/Common/ConfigProcessor/ConfigProcessor.cpp index 0b4572c4997..01c75d0ea16 100644 --- a/dbms/src/Common/ConfigProcessor/ConfigProcessor.cpp +++ b/dbms/src/Common/ConfigProcessor/ConfigProcessor.cpp @@ -377,7 +377,9 @@ ConfigProcessor::Files ConfigProcessor::getConfigMergeFiles(const std::string & for (Poco::DirectoryIterator it(merge_dir_name); it != Poco::DirectoryIterator(); ++it) { Poco::File & file = *it; - if (file.isFile() && (endsWith(file.path(), ".xml") || endsWith(file.path(), ".conf"))) + if (file.isFile() + && (endsWith(file.path(), ".xml") || endsWith(file.path(), ".conf")) + && !startsWith(file.path(), ".")) // skip temporary files { files.push_back(file.path()); } From 78bceb6e325d1a4582359c8ac4b5e52b929e6c16 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 21:15:53 +0300 Subject: [PATCH 155/209] Miscellaneous [#CLICKHOUSE-2] --- libs/libcommon/include/common/logger_useful.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/libs/libcommon/include/common/logger_useful.h b/libs/libcommon/include/common/logger_useful.h index 7ecd6f3bcd2..245a79c7982 100644 --- a/libs/libcommon/include/common/logger_useful.h +++ b/libs/libcommon/include/common/logger_useful.h @@ -31,12 +31,6 @@ using Poco::Logger; oss_internal_rare << message; \ (logger)->information(oss_internal_rare.str());}} while(false) -#define LOG_NOTICE(logger, message) do { \ - if ((logger)->notice()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->notice(oss_internal_rare.str());}} while(false) - #define LOG_WARNING(logger, message) do { \ if ((logger)->warning()) {\ std::stringstream oss_internal_rare; \ @@ -48,15 +42,3 @@ using Poco::Logger; std::stringstream oss_internal_rare; \ oss_internal_rare << message; \ (logger)->error(oss_internal_rare.str());}} while(false) - -#define LOG_CRITICAL(logger, message) do { \ - if ((logger)->critical()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->critical(oss_internal_rare.str());}} while(false) - -#define LOG_FATAL(logger, message) do { \ - if ((logger)->fatal()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->fatal(oss_internal_rare.str());}} while(false) From eca5d9d5cb7f2d2bdd9242158487ce76c4da7d0f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 21:15:53 +0300 Subject: [PATCH 156/209] Miscellaneous [#CLICKHOUSE-2] --- libs/libcommon/include/common/logger_useful.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/libs/libcommon/include/common/logger_useful.h b/libs/libcommon/include/common/logger_useful.h index 7ecd6f3bcd2..245a79c7982 100644 --- a/libs/libcommon/include/common/logger_useful.h +++ b/libs/libcommon/include/common/logger_useful.h @@ -31,12 +31,6 @@ using Poco::Logger; oss_internal_rare << message; \ (logger)->information(oss_internal_rare.str());}} while(false) -#define LOG_NOTICE(logger, message) do { \ - if ((logger)->notice()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->notice(oss_internal_rare.str());}} while(false) - #define LOG_WARNING(logger, message) do { \ if ((logger)->warning()) {\ std::stringstream oss_internal_rare; \ @@ -48,15 +42,3 @@ using Poco::Logger; std::stringstream oss_internal_rare; \ oss_internal_rare << message; \ (logger)->error(oss_internal_rare.str());}} while(false) - -#define LOG_CRITICAL(logger, message) do { \ - if ((logger)->critical()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->critical(oss_internal_rare.str());}} while(false) - -#define LOG_FATAL(logger, message) do { \ - if ((logger)->fatal()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->fatal(oss_internal_rare.str());}} while(false) From 701a098acf964f3994361b0fa8f17cf8bf2cdbe0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 21:25:07 +0300 Subject: [PATCH 157/209] Miscellaneous [#CLICKHOUSE-2] --- dbms/src/Common/AIO.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/AIO.h b/dbms/src/Common/AIO.h index fe5842ab91b..e7287c85d3d 100644 --- a/dbms/src/Common/AIO.h +++ b/dbms/src/Common/AIO.h @@ -160,7 +160,7 @@ class AIOContextPool : public ext::singleton const auto it = promises.find(id); if (it == std::end(promises)) { - LOG_CRITICAL(&Poco::Logger::get("AIOcontextPool"), "Found io_event with unknown id " << id); + LOG_ERROR(&Poco::Logger::get("AIOcontextPool"), "Found io_event with unknown id " << id); continue; } From 063facc769fb4fa7285ced8afb42327644747c77 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 21:25:07 +0300 Subject: [PATCH 158/209] Miscellaneous [#CLICKHOUSE-2] --- dbms/src/Common/AIO.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/AIO.h b/dbms/src/Common/AIO.h index fe5842ab91b..e7287c85d3d 100644 --- a/dbms/src/Common/AIO.h +++ b/dbms/src/Common/AIO.h @@ -160,7 +160,7 @@ class AIOContextPool : public ext::singleton const auto it = promises.find(id); if (it == std::end(promises)) { - LOG_CRITICAL(&Poco::Logger::get("AIOcontextPool"), "Found io_event with unknown id " << id); + LOG_ERROR(&Poco::Logger::get("AIOcontextPool"), "Found io_event with unknown id " << id); continue; } From 63467dbcc211587f4e27b92f5e7eb13cd05631b7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 22:36:01 +0300 Subject: [PATCH 159/209] Fixed error #1947 --- dbms/src/Core/Block.cpp | 15 +++++++++++---- dbms/src/Functions/FunctionsMiscellaneous.cpp | 4 +++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 4d3889da8b9..c969e3256da 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -409,10 +410,16 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons return on_error("Block structure mismatch in " + context_description + " stream: different columns:\n" + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); - if (actual.column->isColumnConst() && expected.column->isColumnConst() - && static_cast(*actual.column).getField() != static_cast(*expected.column).getField()) - return on_error("Block structure mismatch in " + context_description + " stream: different values of constants", - ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + if (actual.column->isColumnConst() && expected.column->isColumnConst()) + { + Field actual_value = static_cast(*actual.column).getField(); + Field expected_value = static_cast(*expected.column).getField(); + + if (actual_value != expected_value) + return on_error("Block structure mismatch in " + context_description + " stream: different values of constants, actual: " + + applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value), + ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + } } return ReturnType(true); diff --git a/dbms/src/Functions/FunctionsMiscellaneous.cpp b/dbms/src/Functions/FunctionsMiscellaneous.cpp index 33abc2dfd41..7e9fcf1c5e2 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/FunctionsMiscellaneous.cpp @@ -338,9 +338,11 @@ public: { const auto & elem = block.getByPosition(arguments[0]); + /// Note that the result is not a constant, because it contains block size. + block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), - elem.type->getName() + ", " + elem.column->dumpStructure()); + elem.type->getName() + ", " + elem.column->dumpStructure())->convertToFullColumnIfConst(); } }; From f1de27a21f52a6dc7c63f1b0cccee994853f1706 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 22:36:01 +0300 Subject: [PATCH 160/209] Fixed error #1947 --- dbms/src/Core/Block.cpp | 15 +++++++++++---- dbms/src/Functions/FunctionsMiscellaneous.cpp | 4 +++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 4d3889da8b9..c969e3256da 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -409,10 +410,16 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons return on_error("Block structure mismatch in " + context_description + " stream: different columns:\n" + lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); - if (actual.column->isColumnConst() && expected.column->isColumnConst() - && static_cast(*actual.column).getField() != static_cast(*expected.column).getField()) - return on_error("Block structure mismatch in " + context_description + " stream: different values of constants", - ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + if (actual.column->isColumnConst() && expected.column->isColumnConst()) + { + Field actual_value = static_cast(*actual.column).getField(); + Field expected_value = static_cast(*expected.column).getField(); + + if (actual_value != expected_value) + return on_error("Block structure mismatch in " + context_description + " stream: different values of constants, actual: " + + applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value), + ErrorCodes::BLOCKS_HAVE_DIFFERENT_STRUCTURE); + } } return ReturnType(true); diff --git a/dbms/src/Functions/FunctionsMiscellaneous.cpp b/dbms/src/Functions/FunctionsMiscellaneous.cpp index 33abc2dfd41..7e9fcf1c5e2 100644 --- a/dbms/src/Functions/FunctionsMiscellaneous.cpp +++ b/dbms/src/Functions/FunctionsMiscellaneous.cpp @@ -338,9 +338,11 @@ public: { const auto & elem = block.getByPosition(arguments[0]); + /// Note that the result is not a constant, because it contains block size. + block.getByPosition(result).column = DataTypeString().createColumnConst(block.rows(), - elem.type->getName() + ", " + elem.column->dumpStructure()); + elem.type->getName() + ", " + elem.column->dumpStructure())->convertToFullColumnIfConst(); } }; From 9c685842817d11e8f420f1040aa94649a87604d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 22:47:33 +0300 Subject: [PATCH 161/209] Fixed error #1947 --- dbms/src/Columns/ColumnNullable.h | 1 + dbms/src/Storages/StorageView.cpp | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index f39ae421c00..6eff31379ff 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -78,6 +78,7 @@ public: bool isFixedAndContiguous() const override { return false; } bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); } size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); } + bool onlyNull() const override { return nested_column->isDummy(); } /// Return the column that represents values. diff --git a/dbms/src/Storages/StorageView.cpp b/dbms/src/Storages/StorageView.cpp index 26583eac9b6..4103c4fed31 100644 --- a/dbms/src/Storages/StorageView.cpp +++ b/dbms/src/Storages/StorageView.cpp @@ -6,6 +6,8 @@ #include #include +#include + namespace DB { @@ -43,7 +45,14 @@ BlockInputStreams StorageView::read( const unsigned /*num_streams*/) { processed_stage = QueryProcessingStage::FetchColumns; - return InterpreterSelectWithUnionQuery(inner_query->clone(), context, column_names).executeWithMultipleStreams(); + BlockInputStreams res = InterpreterSelectWithUnionQuery(inner_query->clone(), context, column_names).executeWithMultipleStreams(); + + /// It's expected that the columns read from storage are not constant. + /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. + for (auto & stream : res) + stream = std::make_shared(stream); + + return res; } From cfe900c9973516f6f05e927b76f7a62708850c09 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 23:05:29 +0300 Subject: [PATCH 162/209] Fixed test #1947. --- .../queries/0_stateless/00570_empty_array_is_const.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00570_empty_array_is_const.reference b/dbms/tests/queries/0_stateless/00570_empty_array_is_const.reference index 1eedb9f964c..2c45046335d 100644 --- a/dbms/tests/queries/0_stateless/00570_empty_array_is_const.reference +++ b/dbms/tests/queries/0_stateless/00570_empty_array_is_const.reference @@ -1,3 +1,3 @@ Array(Nothing), Const(size = 1, Array(size = 1, UInt64(size = 1), Nothing(size = 0))) Array(Array(Array(Nothing))), Const(size = 1, Array(size = 1, UInt64(size = 1), Array(size = 1, UInt64(size = 1), Array(size = 1, UInt64(size = 1), Nothing(size = 0))))) -Array(Array(UInt8)), Const(size = 1, Array(size = 1, UInt64(size = 1), Array(size = 2, UInt64(size = 2), UInt8(size = 1)))) +Array(Array(UInt8)), Const(size = 2, Array(size = 1, UInt64(size = 1), Array(size = 2, UInt64(size = 2), UInt8(size = 1)))) From 89b2d0d2de1ebca541ab205882a9b36afdc98463 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Feb 2018 23:05:29 +0300 Subject: [PATCH 163/209] Fixed test #1947. --- .../queries/0_stateless/00570_empty_array_is_const.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00570_empty_array_is_const.reference b/dbms/tests/queries/0_stateless/00570_empty_array_is_const.reference index 1eedb9f964c..2c45046335d 100644 --- a/dbms/tests/queries/0_stateless/00570_empty_array_is_const.reference +++ b/dbms/tests/queries/0_stateless/00570_empty_array_is_const.reference @@ -1,3 +1,3 @@ Array(Nothing), Const(size = 1, Array(size = 1, UInt64(size = 1), Nothing(size = 0))) Array(Array(Array(Nothing))), Const(size = 1, Array(size = 1, UInt64(size = 1), Array(size = 1, UInt64(size = 1), Array(size = 1, UInt64(size = 1), Nothing(size = 0))))) -Array(Array(UInt8)), Const(size = 1, Array(size = 1, UInt64(size = 1), Array(size = 2, UInt64(size = 2), UInt8(size = 1)))) +Array(Array(UInt8)), Const(size = 2, Array(size = 1, UInt64(size = 1), Array(size = 2, UInt64(size = 2), UInt8(size = 1)))) From d6a1f8470fd84246ecad9da678e8c743ba1a4291 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 28 Feb 2018 23:34:25 +0300 Subject: [PATCH 164/209] CLICKHOUSE-3553 Rebuild loggers on every config change (#1973) * Macos test fix * Fix freebsd build * CLICKHOUSE-3553 Rebuild loggers on every config change * rename clickhouse_common_configprocessor -> clickhouse_common_config; move Server/ConfigReloader.cpp -> Common/Config/ * CLICKHOUSE-3553 Set new level to all loggers on reload * Dont use hash * Update BaseDaemon.h --- dbms/CMakeLists.txt | 4 +- dbms/src/Common/Config/CMakeLists.txt | 9 ++ .../ConfigProcessor.cpp | 0 .../ConfigProcessor.h | 0 .../Config}/ConfigReloader.cpp | 5 +- .../Config}/ConfigReloader.h | 3 +- .../src/Common/ConfigProcessor/CMakeLists.txt | 9 -- .../src/Common/ZooKeeper/tests/CMakeLists.txt | 2 +- .../tests/zk_many_watches_reconnect.cpp | 2 +- dbms/src/Interpreters/Context.cpp | 2 +- .../ExternalLoaderConfigRepository.cpp | 2 +- dbms/src/Interpreters/tests/users.cpp | 2 +- dbms/src/Server/CMakeLists.txt | 3 +- dbms/src/Server/ExtractFromConfig.cpp | 2 +- dbms/src/Server/LocalServer.cpp | 2 +- dbms/src/Server/Server.cpp | 8 +- dbms/src/Server/config.xml | 1 + libs/libdaemon/CMakeLists.txt | 2 +- libs/libdaemon/include/daemon/BaseDaemon.h | 9 +- libs/libdaemon/src/BaseDaemon.cpp | 98 +++++++++++-------- utils/config-processor/CMakeLists.txt | 2 +- utils/config-processor/config-processor.cpp | 2 +- 22 files changed, 93 insertions(+), 76 deletions(-) create mode 100644 dbms/src/Common/Config/CMakeLists.txt rename dbms/src/Common/{ConfigProcessor => Config}/ConfigProcessor.cpp (100%) rename dbms/src/Common/{ConfigProcessor => Config}/ConfigProcessor.h (100%) rename dbms/src/{Server => Common/Config}/ConfigReloader.cpp (98%) rename dbms/src/{Server => Common/Config}/ConfigReloader.h (97%) delete mode 100644 dbms/src/Common/ConfigProcessor/CMakeLists.txt diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index f960e82a66a..7725f5f7f85 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -87,7 +87,7 @@ if (ARCH_FREEBSD) endif () add_subdirectory(src/Common/ZooKeeper) -add_subdirectory(src/Common/ConfigProcessor) +add_subdirectory(src/Common/Config) if (MAKE_STATIC_LIBRARIES) add_library(dbms ${dbms_headers} ${dbms_sources}) @@ -141,7 +141,7 @@ target_link_libraries (clickhouse_common_io target_link_libraries (dbms clickhouse_parsers - clickhouse_common_configprocessor + clickhouse_common_config clickhouse_common_io ${MYSQLXX_LIBRARY} ${FARMHASH_LIBRARIES} diff --git a/dbms/src/Common/Config/CMakeLists.txt b/dbms/src/Common/Config/CMakeLists.txt new file mode 100644 index 00000000000..ae7d623d457 --- /dev/null +++ b/dbms/src/Common/Config/CMakeLists.txt @@ -0,0 +1,9 @@ + +include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) + +add_headers_and_sources(clickhouse_common_config .) + +add_library(clickhouse_common_config ${SPLIT_SHARED} ${clickhouse_common_config_headers} ${clickhouse_common_config_sources}) + +target_link_libraries (clickhouse_common_config clickhouse_common_zookeeper string_utils) +target_include_directories (clickhouse_common_config PRIVATE ${DBMS_INCLUDE_DIR}) diff --git a/dbms/src/Common/ConfigProcessor/ConfigProcessor.cpp b/dbms/src/Common/Config/ConfigProcessor.cpp similarity index 100% rename from dbms/src/Common/ConfigProcessor/ConfigProcessor.cpp rename to dbms/src/Common/Config/ConfigProcessor.cpp diff --git a/dbms/src/Common/ConfigProcessor/ConfigProcessor.h b/dbms/src/Common/Config/ConfigProcessor.h similarity index 100% rename from dbms/src/Common/ConfigProcessor/ConfigProcessor.h rename to dbms/src/Common/Config/ConfigProcessor.h diff --git a/dbms/src/Server/ConfigReloader.cpp b/dbms/src/Common/Config/ConfigReloader.cpp similarity index 98% rename from dbms/src/Server/ConfigReloader.cpp rename to dbms/src/Common/Config/ConfigReloader.cpp index e7d6224ea01..53a5aa61cfa 100644 --- a/dbms/src/Server/ConfigReloader.cpp +++ b/dbms/src/Common/Config/ConfigReloader.cpp @@ -2,12 +2,9 @@ #include #include - #include - -#include #include -#include +#include "ConfigProcessor.h" namespace DB diff --git a/dbms/src/Server/ConfigReloader.h b/dbms/src/Common/Config/ConfigReloader.h similarity index 97% rename from dbms/src/Server/ConfigReloader.h rename to dbms/src/Common/Config/ConfigReloader.h index 02301346e13..2dcbea7a8bc 100644 --- a/dbms/src/Server/ConfigReloader.h +++ b/dbms/src/Common/Config/ConfigReloader.h @@ -1,9 +1,8 @@ #pragma once -#include +#include "ConfigProcessor.h" #include #include - #include #include #include diff --git a/dbms/src/Common/ConfigProcessor/CMakeLists.txt b/dbms/src/Common/ConfigProcessor/CMakeLists.txt deleted file mode 100644 index 923955dc8c9..00000000000 --- a/dbms/src/Common/ConfigProcessor/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ - -include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) - -add_headers_and_sources(clickhouse_common_configprocessor .) - -add_library(clickhouse_common_configprocessor ${SPLIT_SHARED} ${clickhouse_common_configprocessor_headers} ${clickhouse_common_configprocessor_sources}) - -target_link_libraries (clickhouse_common_configprocessor clickhouse_common_zookeeper string_utils) -target_include_directories (clickhouse_common_configprocessor PRIVATE ${DBMS_INCLUDE_DIR}) diff --git a/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt b/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt index ed55b348205..a5a035e558b 100644 --- a/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt +++ b/dbms/src/Common/ZooKeeper/tests/CMakeLists.txt @@ -14,4 +14,4 @@ add_executable(zkutil_zookeeper_holder zkutil_zookeeper_holder.cpp) target_link_libraries(zkutil_zookeeper_holder clickhouse_common_zookeeper) add_executable (zk_many_watches_reconnect zk_many_watches_reconnect.cpp) -target_link_libraries (zk_many_watches_reconnect clickhouse_common_zookeeper clickhouse_common_configprocessor) +target_link_libraries (zk_many_watches_reconnect clickhouse_common_zookeeper clickhouse_common_config) diff --git a/dbms/src/Common/ZooKeeper/tests/zk_many_watches_reconnect.cpp b/dbms/src/Common/ZooKeeper/tests/zk_many_watches_reconnect.cpp index 62169096a2d..b1d1d5d3101 100644 --- a/dbms/src/Common/ZooKeeper/tests/zk_many_watches_reconnect.cpp +++ b/dbms/src/Common/ZooKeeper/tests/zk_many_watches_reconnect.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index d66265728d2..bbb2bc5e28a 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -45,7 +45,7 @@ #include #include -#include +#include #include #include diff --git a/dbms/src/Interpreters/ExternalLoaderConfigRepository.cpp b/dbms/src/Interpreters/ExternalLoaderConfigRepository.cpp index a1a6f270ef7..bfc7965a5ad 100644 --- a/dbms/src/Interpreters/ExternalLoaderConfigRepository.cpp +++ b/dbms/src/Interpreters/ExternalLoaderConfigRepository.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include diff --git a/dbms/src/Interpreters/tests/users.cpp b/dbms/src/Interpreters/tests/users.cpp index 84f09150cff..5da4ed26134 100644 --- a/dbms/src/Interpreters/tests/users.cpp +++ b/dbms/src/Interpreters/tests/users.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index adfd3309f06..d9f2a341237 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -15,7 +15,6 @@ option (ENABLE_CLICKHOUSE_COPIER "Enable copier" ${ENABLE_CLICKHOUSE_COPIER}) configure_file (config_tools.h.in ${CMAKE_CURRENT_BINARY_DIR}/config_tools.h) add_library (clickhouse-server-lib - ConfigReloader.cpp HTTPHandler.cpp InterserverIOHTTPHandler.cpp MetricsTransmitter.cpp @@ -35,7 +34,7 @@ add_library (clickhouse-local-lib LocalServer.cpp) target_link_libraries (clickhouse-local-lib clickhouse-server-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions) add_library (clickhouse-extract-from-config-lib ${SPLIT_SHARED} ExtractFromConfig.cpp) -target_link_libraries (clickhouse-extract-from-config-lib clickhouse_common_configprocessor clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-extract-from-config-lib clickhouse_common_config clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) add_library (clickhouse-client-lib Client.cpp) target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) diff --git a/dbms/src/Server/ExtractFromConfig.cpp b/dbms/src/Server/ExtractFromConfig.cpp index 961f9e31463..7f0122256dd 100644 --- a/dbms/src/Server/ExtractFromConfig.cpp +++ b/dbms/src/Server/ExtractFromConfig.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include diff --git a/dbms/src/Server/LocalServer.cpp b/dbms/src/Server/LocalServer.cpp index 138f49461b0..ed25725f5d2 100644 --- a/dbms/src/Server/LocalServer.cpp +++ b/dbms/src/Server/LocalServer.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index c31f054aaac..44d4bb602c5 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -30,7 +30,7 @@ #include #include #include -#include "ConfigReloader.h" +#include #include "HTTPHandlerFactory.h" #include "MetricsTransmitter.h" #include "StatusFile.h" @@ -228,7 +228,11 @@ int Server::main(const std::vector & /*args*/) auto main_config_reloader = std::make_unique(config_path, include_from_path, std::move(main_config_zk_node_cache), - [&](ConfigurationPtr config) { global_context->setClustersConfig(config); }, + [&](ConfigurationPtr config) + { + buildLoggers(*config); + global_context->setClustersConfig(config); + }, /* already_loaded = */ true); /// Initialize users config reloader. diff --git a/dbms/src/Server/config.xml b/dbms/src/Server/config.xml index e0b4ea563ac..ac871674e1e 100644 --- a/dbms/src/Server/config.xml +++ b/dbms/src/Server/config.xml @@ -1,6 +1,7 @@ + trace /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.err.log diff --git a/libs/libdaemon/CMakeLists.txt b/libs/libdaemon/CMakeLists.txt index 292903b4a47..6f31c4e5b38 100644 --- a/libs/libdaemon/CMakeLists.txt +++ b/libs/libdaemon/CMakeLists.txt @@ -17,4 +17,4 @@ endif () target_include_directories (daemon PUBLIC include) target_include_directories (daemon PRIVATE ${ClickHouse_SOURCE_DIR}/libs/libpocoext/include) -target_link_libraries (daemon clickhouse_common_io clickhouse_common_configprocessor ${EXECINFO_LIBRARY}) +target_link_libraries (daemon clickhouse_common_io clickhouse_common_config ${EXECINFO_LIBRARY}) diff --git a/libs/libdaemon/include/daemon/BaseDaemon.h b/libs/libdaemon/include/daemon/BaseDaemon.h index c59ba187277..20c38469d16 100644 --- a/libs/libdaemon/include/daemon/BaseDaemon.h +++ b/libs/libdaemon/include/daemon/BaseDaemon.h @@ -23,7 +23,7 @@ #include #include #include -#include +#include namespace Poco { class TaskManager; } @@ -67,7 +67,7 @@ public: void reloadConfiguration(); /// Строит необходимые логгеры - void buildLoggers(); + void buildLoggers(Poco::Util::AbstractConfiguration & config); /// Определяет параметр командной строки void defineOptions(Poco::Util::OptionSet & _options) override; @@ -228,6 +228,11 @@ protected: std::string config_path; ConfigProcessor::LoadedConfig loaded_config; Poco::Util::AbstractConfiguration * last_configuration = nullptr; + +private: + + /// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed. + std::string config_logger; }; diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 1629fee9113..3c0576dcd92 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include @@ -565,67 +565,58 @@ void BaseDaemon::wakeup() } -void BaseDaemon::buildLoggers() +void BaseDaemon::buildLoggers(Poco::Util::AbstractConfiguration & config) { - bool is_daemon = config().getBool("application.runAsDaemon", false); + auto current_logger = config.getString("logger"); + if (config_logger == current_logger) + return; + config_logger = current_logger; - /// Change path for logging. - if (config().hasProperty("logger.log")) - { - std::string path = createDirectory(config().getString("logger.log")); - if (is_daemon - && chdir(path.c_str()) != 0) - throw Poco::Exception("Cannot change directory to " + path); - } - else - { - if (is_daemon - && chdir("/tmp") != 0) - throw Poco::Exception("Cannot change directory to /tmp"); - } + bool is_daemon = config.getBool("application.runAsDaemon", false); // Split log and error log. Poco::AutoPtr split = new SplitterChannel; - if (config().hasProperty("logger.log")) + auto log_level = config.getString("logger.level", "trace"); + if (config.hasProperty("logger.log")) { - createDirectory(config().getString("logger.log")); - std::cerr << "Logging to " << config().getString("logger.log") << std::endl; + createDirectory(config.getString("logger.log")); + std::cerr << "Logging " << log_level << " to " << config.getString("logger.log") << std::endl; // Set up two channel chains. Poco::AutoPtr pf = new OwnPatternFormatter(this); pf->setProperty("times", "local"); Poco::AutoPtr log = new FormattingChannel(pf); log_file = new FileChannel; - log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config().getString("logger.log")).absolute().toString()); - log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config().getRawString("logger.size", "100M")); + log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config.getString("logger.log")).absolute().toString()); + log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); - log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config().getRawString("logger.compress", "true")); - log_file->setProperty(Poco::FileChannel::PROP_PURGECOUNT, config().getRawString("logger.count", "1")); - log_file->setProperty(Poco::FileChannel::PROP_FLUSH, config().getRawString("logger.flush", "true")); - log_file->setProperty(Poco::FileChannel::PROP_ROTATEONOPEN, config().getRawString("logger.rotateOnOpen", "false")); + log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config.getRawString("logger.compress", "true")); + log_file->setProperty(Poco::FileChannel::PROP_PURGECOUNT, config.getRawString("logger.count", "1")); + log_file->setProperty(Poco::FileChannel::PROP_FLUSH, config.getRawString("logger.flush", "true")); + log_file->setProperty(Poco::FileChannel::PROP_ROTATEONOPEN, config.getRawString("logger.rotateOnOpen", "false")); log->setChannel(log_file); split->addChannel(log); log_file->open(); } - if (config().hasProperty("logger.errorlog")) + if (config.hasProperty("logger.errorlog")) { - createDirectory(config().getString("logger.errorlog")); - std::cerr << "Logging errors to " << config().getString("logger.errorlog") << std::endl; + createDirectory(config.getString("logger.errorlog")); + std::cerr << "Logging errors to " << config.getString("logger.errorlog") << std::endl; Poco::AutoPtr level = new Poco::LevelFilterChannel; level->setLevel(Message::PRIO_NOTICE); Poco::AutoPtr pf = new OwnPatternFormatter(this); pf->setProperty("times", "local"); Poco::AutoPtr errorlog = new FormattingChannel(pf); error_log_file = new FileChannel; - error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config().getString("logger.errorlog")).absolute().toString()); - error_log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config().getRawString("logger.size", "100M")); + error_log_file->setProperty(Poco::FileChannel::PROP_PATH, Poco::Path(config.getString("logger.errorlog")).absolute().toString()); + error_log_file->setProperty(Poco::FileChannel::PROP_ROTATION, config.getRawString("logger.size", "100M")); error_log_file->setProperty(Poco::FileChannel::PROP_ARCHIVE, "number"); - error_log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config().getRawString("logger.compress", "true")); - error_log_file->setProperty(Poco::FileChannel::PROP_PURGECOUNT, config().getRawString("logger.count", "1")); - error_log_file->setProperty(Poco::FileChannel::PROP_FLUSH, config().getRawString("logger.flush", "true")); - error_log_file->setProperty(Poco::FileChannel::PROP_ROTATEONOPEN, config().getRawString("logger.rotateOnOpen", "false")); + error_log_file->setProperty(Poco::FileChannel::PROP_COMPRESS, config.getRawString("logger.compress", "true")); + error_log_file->setProperty(Poco::FileChannel::PROP_PURGECOUNT, config.getRawString("logger.count", "1")); + error_log_file->setProperty(Poco::FileChannel::PROP_FLUSH, config.getRawString("logger.flush", "true")); + error_log_file->setProperty(Poco::FileChannel::PROP_ROTATEONOPEN, config.getRawString("logger.rotateOnOpen", "false")); errorlog->setChannel(error_log_file); level->setChannel(errorlog); split->addChannel(level); @@ -635,7 +626,7 @@ void BaseDaemon::buildLoggers() /// "dynamic_layer_selection" is needed only for Yandex.Metrika, that share part of ClickHouse code. /// We don't need this configuration parameter. - if (config().getBool("logger.use_syslog", false) || config().getBool("dynamic_layer_selection", false)) + if (config.getBool("logger.use_syslog", false) || config.getBool("dynamic_layer_selection", false)) { Poco::AutoPtr pf = new OwnPatternFormatter(this, OwnPatternFormatter::ADD_LAYER_TAG); pf->setProperty("times", "local"); @@ -646,14 +637,14 @@ void BaseDaemon::buildLoggers() syslog_channel->open(); } - if (config().getBool("logger.console", false) || (!config().hasProperty("logger.console") && !is_daemon && (isatty(STDIN_FILENO) || isatty(STDERR_FILENO)))) + if (config.getBool("logger.console", false) || (!config.hasProperty("logger.console") && !is_daemon && (isatty(STDIN_FILENO) || isatty(STDERR_FILENO)))) { Poco::AutoPtr file = new ConsoleChannel; Poco::AutoPtr pf = new OwnPatternFormatter(this); pf->setProperty("times", "local"); Poco::AutoPtr log = new FormattingChannel(pf); log->setChannel(file); - logger().warning("Logging to console"); + logger().warning("Logging " + log_level + " to console"); split->addChannel(log); } @@ -662,19 +653,25 @@ void BaseDaemon::buildLoggers() logger().setChannel(split); // Global logging level (it can be overridden for specific loggers). - logger().setLevel(config().getString("logger.level", "trace")); + logger().setLevel(log_level); + + // Set level to all already created loggers + std::vector names; + Logger::root().names(names); + for (const auto & name : names) + Logger::root().get(name).setLevel(log_level); // Attach to the root logger. - Logger::root().setLevel(logger().getLevel()); + Logger::root().setLevel(log_level); Logger::root().setChannel(logger().getChannel()); // Explicitly specified log levels for specific loggers. AbstractConfiguration::Keys levels; - config().keys("logger.levels", levels); + config.keys("logger.levels", levels); if(!levels.empty()) for(AbstractConfiguration::Keys::iterator it = levels.begin(); it != levels.end(); ++it) - Logger::get(*it).setLevel(config().getString("logger.levels." + *it, "trace")); + Logger::get(*it).setLevel(config.getString("logger.levels." + *it, "trace")); } @@ -839,7 +836,22 @@ void BaseDaemon::initialize(Application & self) pid.seed(config().getString("pid")); } - buildLoggers(); + /// Change path for logging. + if (config().hasProperty("logger.log")) + { + std::string path = createDirectory(config().getString("logger.log")); + if (is_daemon + && chdir(path.c_str()) != 0) + throw Poco::Exception("Cannot change directory to " + path); + } + else + { + if (is_daemon + && chdir("/tmp") != 0) + throw Poco::Exception("Cannot change directory to /tmp"); + } + + buildLoggers(config()); if (is_daemon) { diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 965258803fa..d25c25d2f02 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,4 +1,4 @@ add_executable (config-processor config-processor.cpp) -target_link_libraries (config-processor clickhouse_common_configprocessor) +target_link_libraries (config-processor clickhouse_common_config) INSTALL(TARGETS config-processor RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT config-processor) diff --git a/utils/config-processor/config-processor.cpp b/utils/config-processor/config-processor.cpp index ff3a3e6160c..31eaf8522a4 100644 --- a/utils/config-processor/config-processor.cpp +++ b/utils/config-processor/config-processor.cpp @@ -1,4 +1,4 @@ -#include +#include #include int main(int argc, char ** argv) From e0798edc09a31c00f44f1afba5b3d78894c95e67 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 04:25:06 +0300 Subject: [PATCH 165/209] Better UNION ALL: development #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 54 +++++++------------ dbms/src/Interpreters/ExpressionAnalyzer.h | 9 +--- .../Interpreters/InterpreterSelectQuery.cpp | 36 ++++++------- 3 files changed, 36 insertions(+), 63 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 3fb8cbbd124..90aa6243f51 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -210,6 +210,12 @@ ExpressionAnalyzer::ExpressionAnalyzer( /// Common subexpression elimination. Rewrite rules. normalizeTree(); + /// Remove unneeded columns according to 'required_source_columns'. + /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. + /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) + /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. + removeUnneededColumnsFromSelectClause(); + /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(); @@ -228,9 +234,6 @@ ExpressionAnalyzer::ExpressionAnalyzer( /// array_join_alias_to_name, array_join_result_to_source. getArrayJoinedColumns(); - /// All selected columns in case of DISTINCT; columns that contain arrayJoin function inside. - calculateRequiredColumnsBeforeProjection(); - /// Delete the unnecessary from `source_columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. collectUsedColumns(); @@ -2498,8 +2501,7 @@ void ExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, bool only_ getRootActions(select_query->select_expression_list, only_types, false, step.actions); for (const auto & child : select_query->select_expression_list->children) - if (required_columns_before_projection.count(child->getColumnName())) - step.required_output.push_back(child->getColumnName()); + step.required_output.push_back(child->getColumnName()); } bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types) @@ -2667,7 +2669,7 @@ void ExpressionAnalyzer::collectUsedColumns() collectJoinedColumns(available_joined_columns, columns_added_by_join); NameSet required_joined_columns; - getRequiredSourceColumnsInSelectImpl(available_columns, required, ignored, available_joined_columns, required_joined_columns); + getRequiredSourceColumnsImpl(ast, available_columns, required, ignored, available_joined_columns, required_joined_columns); for (NamesAndTypesList::iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end();) { @@ -2786,29 +2788,6 @@ Names ExpressionAnalyzer::getRequiredSourceColumns() const } -void ExpressionAnalyzer::getRequiredSourceColumnsInSelectImpl( - const NameSet & available_columns, NameSet & required_source_columns, NameSet & ignored_names, - const NameSet & available_joined_columns, NameSet & required_joined_columns) -{ - if (!select_query) - { - getRequiredSourceColumnsImpl(ast, available_columns, required_source_columns, - ignored_names, available_joined_columns, required_joined_columns); - return; - } - - for (const auto & child : select_query->select_expression_list->children) - if (required_columns_before_projection.count(child->getColumnName())) - getRequiredSourceColumnsImpl(child, available_columns, required_source_columns, - ignored_names, available_joined_columns, required_joined_columns); - - for (const auto & child : select_query->children) - if (child != select_query->select_expression_list) - getRequiredSourceColumnsImpl(child, available_columns, required_source_columns, - ignored_names, available_joined_columns, required_joined_columns); -} - - void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, const NameSet & available_columns, NameSet & required_source_columns, NameSet & ignored_names, const NameSet & available_joined_columns, NameSet & required_joined_columns) @@ -2910,17 +2889,20 @@ static bool hasArrayJoin(const ASTPtr & ast) } -void ExpressionAnalyzer::calculateRequiredColumnsBeforeProjection() +void ExpressionAnalyzer::removeUnneededColumnsFromSelectClause() { if (!select_query) return; - for (const auto & child : select_query->select_expression_list->children) - if (required_result_columns.empty() - || select_query->distinct - || hasArrayJoin(child) - || required_result_columns.count(child->getAliasOrColumnName())) - required_columns_before_projection.insert(child->getColumnName()); + if (required_result_columns.empty() || select_query->distinct) + return; + + ASTs & elements = select_query->select_expression_list->children; + + elements.erase(std::remove_if(elements.begin(), elements.end(), [this](const auto & node) + { + return !required_result_columns.count(node->getAliasOrColumnName()) && !hasArrayJoin(node); + }), elements.end()); } } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 036a94af6ee..bdddb169271 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -296,12 +296,6 @@ private: const NameSet & available_columns, NameSet & required_source_columns, NameSet & ignored_names, const NameSet & available_joined_columns, NameSet & required_joined_columns); - /** Same as above but skip unnecessary elements in SELECT according to 'required_result_columns'. - */ - void getRequiredSourceColumnsInSelectImpl( - const NameSet & available_columns, NameSet & required_source_columns, NameSet & ignored_names, - const NameSet & available_joined_columns, NameSet & required_joined_columns); - /// columns - the columns that are present before the transformations begin. void initChain(ExpressionActionsChain & chain, const NamesAndTypesList & columns) const; @@ -331,8 +325,7 @@ private: /** Sometimes we have to calculate more columns in SELECT clause than will be returned from query. * This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. */ - NameSet required_columns_before_projection; - void calculateRequiredColumnsBeforeProjection(); + void removeUnneededColumnsFromSelectClause(); }; } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 67cc5004970..a586e6ee1c3 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -327,7 +327,8 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt /** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */ QueryProcessingStage::Enum from_stage = executeFetchColumns(pipeline, dry_run); - LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(to_stage)); + if (!dry_run) + LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(to_stage)); AnalysisResult expressions = analyzeExpressions(from_stage); @@ -451,27 +452,24 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt } if (need_merge_streams) - { executeUnion(pipeline); - /** If there was more than one stream, - * then DISTINCT needs to be performed once again after merging all streams. - */ - if (need_second_distinct_pass) - executeDistinct(pipeline, false, Names()); + /** If there was more than one stream, + * then DISTINCT needs to be performed once again after merging all streams. + */ + if (need_second_distinct_pass) + executeDistinct(pipeline, false, Names()); - /** We must do projection after DISTINCT because projection may remove some columns. - */ - executeProjection(pipeline, expressions.final_projection); - executeExtremes(pipeline); - executeLimitBy(pipeline); - executeLimit(pipeline); - } - else - { - executeProjection(pipeline, expressions.final_projection); - executeExtremes(pipeline); - } + /** We must do projection after DISTINCT because projection may remove some columns. + */ + executeLimitBy(pipeline); + executeProjection(pipeline, expressions.final_projection); + + /** Extremes are calculated before LIMIT, but after LIMIT BY. This is Ok. + */ + executeExtremes(pipeline); + + executeLimit(pipeline); } } From 280b4663c173273174c4a2a48567c3469c07d7c5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 04:49:36 +0300 Subject: [PATCH 166/209] Caching table functions in query context #1947 --- dbms/src/Interpreters/Context.cpp | 54 ++++++++++++------- dbms/src/Interpreters/Context.h | 12 +++-- .../Interpreters/InterpreterSelectQuery.cpp | 6 +-- 3 files changed, 44 insertions(+), 28 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index d66265728d2..f626f6983a8 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -93,7 +94,7 @@ struct ContextShared std::shared_ptr runtime_components_factory; /// For access of most of shared objects. Recursive mutex. - mutable Poco::Mutex mutex; + mutable std::recursive_mutex mutex; /// Separate mutex for access of dictionaries. Separate mutex to avoid locks when server doing request to itself. mutable std::mutex embedded_dictionaries_mutex; mutable std::mutex external_dictionaries_mutex; @@ -282,19 +283,19 @@ Context::~Context() } -InterserverIOHandler & Context::getInterserverIOHandler() { return shared->interserver_io_handler; } +InterserverIOHandler & Context::getInterserverIOHandler() { return shared->interserver_io_handler; } -std::unique_lock Context::getLock() const +std::unique_lock Context::getLock() const { ProfileEvents::increment(ProfileEvents::ContextLock); CurrentMetrics::Increment increment{CurrentMetrics::ContextLockWait}; - return std::unique_lock(shared->mutex); + return std::unique_lock(shared->mutex); } -ProcessList & Context::getProcessList() { return shared->process_list; } -const ProcessList & Context::getProcessList() const { return shared->process_list; } -MergeList & Context::getMergeList() { return shared->merge_list; } -const MergeList & Context::getMergeList() const { return shared->merge_list; } +ProcessList & Context::getProcessList() { return shared->process_list; } +const ProcessList & Context::getProcessList() const { return shared->process_list; } +MergeList & Context::getMergeList() { return shared->merge_list; } +const MergeList & Context::getMergeList() const { return shared->merge_list; } const Databases Context::getDatabases() const @@ -748,13 +749,10 @@ Tables Context::getExternalTables() const StoragePtr Context::tryGetExternalTable(const String & table_name) const { - auto lock = getLock(); - - Tables::const_iterator jt = external_tables.find(table_name); - if (external_tables.end() == jt) + auto it = external_tables.find(table_name); + if (external_tables.end() == it) return StoragePtr(); - - return jt->second; + return it->second; } @@ -824,19 +822,37 @@ void Context::addExternalTable(const String & table_name, const StoragePtr & sto StoragePtr Context::tryRemoveExternalTable(const String & table_name) { - auto lock = getLock(); - - Tables::const_iterator it = external_tables.find(table_name); + auto it = external_tables.find(table_name); if (external_tables.end() == it) return StoragePtr(); auto storage = it->second; external_tables.erase(it); return storage; - - return {}; } + +StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) +{ + /// Slightly suboptimal. + auto hash = table_expression->getTreeHash(); + String key = toString(hash.first) + '_' + toString(hash.second); + + StoragePtr & res = table_function_results[key]; + + if (!res) + { + TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get( + typeid_cast(table_expression.get())->name, *this); + + /// Run it and remember the result + res = table_function_ptr->execute(table_expression, *this); + } + + return res; +} + + DDLGuard::DDLGuard(Map & map_, std::mutex & mutex_, std::unique_lock && /*lock*/, const String & elem, const String & message) : map(map_), mutex(mutex_) { diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 66ea075372a..1656f757c2e 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -56,6 +56,7 @@ class IDatabase; class DDLGuard; class DDLWorker; class IStorage; +class ITableFunction; using StoragePtr = std::shared_ptr; using Tables = std::map; class IAST; @@ -102,7 +103,8 @@ private: String default_format; /// Format, used when server formats data by itself and if query does not have FORMAT specification. /// Thus, used in HTTP interface. If not specified - then some globally default format is used. - Tables external_tables; /// Temporary tables. + Tables external_tables; /// Temporary tables. Keyed by table name. + Tables table_function_results; /// Temporary tables obtained by execution of table functions. Keyed by AST tree id. Context * session_context = nullptr; /// Session context or nullptr. Could be equal to this. Context * global_context = nullptr; /// Global context or nullptr. Could be equal to this. SystemLogsPtr system_logs; /// Used to log queries and operations on parts @@ -180,6 +182,8 @@ public: void addExternalTable(const String & table_name, const StoragePtr & storage); StoragePtr tryRemoveExternalTable(const String & table_name); + StoragePtr executeTableFunction(const ASTPtr & table_expression); + void addDatabase(const String & database_name, const DatabasePtr & database); DatabasePtr detachDatabase(const String & database_name); @@ -251,7 +255,7 @@ public: std::chrono::steady_clock::duration closeSessions() const; /// For methods below you may need to acquire a lock by yourself. - std::unique_lock getLock() const; + std::unique_lock getLock() const; const Context & getSessionContext() const; Context & getSessionContext(); @@ -261,8 +265,8 @@ public: Context & getGlobalContext(); bool hasGlobalContext() const { return global_context != nullptr; } - void setSessionContext(Context & context_) { session_context = &context_; } - void setGlobalContext(Context & context_) { global_context = &context_; } + void setSessionContext(Context & context_) { session_context = &context_; } + void setGlobalContext(Context & context_) { global_context = &context_; } const Settings & getSettingsRef() const { return settings; }; Settings & getSettingsRef() { return settings; }; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index a586e6ee1c3..bcc4753b951 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -128,11 +128,7 @@ void InterpreterSelectQuery::init(const Names & required_result_column_names) else if (table_expression && typeid_cast(table_expression.get())) { /// Read from table function. - - TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get( - typeid_cast(table_expression.get())->name, context); - /// Run it and remember the result - storage = table_function_ptr->execute(table_expression, context); + storage = context.executeTableFunction(table_expression); } else { From fe2da8a2c70a6d978bf13382a02dc02c7a78c900 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 07:23:32 +0300 Subject: [PATCH 167/209] Added method rename for StorageView [#CLICKHOUSE-2] --- dbms/src/Storages/StorageView.cpp | 15 +++------------ dbms/src/Storages/StorageView.h | 14 ++++++-------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/dbms/src/Storages/StorageView.cpp b/dbms/src/Storages/StorageView.cpp index 4103c4fed31..9351873c752 100644 --- a/dbms/src/Storages/StorageView.cpp +++ b/dbms/src/Storages/StorageView.cpp @@ -1,7 +1,5 @@ #include -#include #include -#include #include #include @@ -20,14 +18,9 @@ namespace ErrorCodes StorageView::StorageView( const String & table_name_, - const String & database_name_, const ASTCreateQuery & query, - const NamesAndTypesList & columns_, - const NamesAndTypesList & materialized_columns_, - const NamesAndTypesList & alias_columns_, - const ColumnDefaults & column_defaults_) - : IStorage{columns_, materialized_columns_, alias_columns_, column_defaults_}, table_name(table_name_), - database_name(database_name_) + const NamesAndTypesList & columns_) + : IStorage{columns_, {}, {}, {}}, table_name(table_name_) { if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); @@ -63,9 +56,7 @@ void registerStorageView(StorageFactory & factory) if (args.query.storage) throw Exception("Specifying ENGINE is not allowed for a View", ErrorCodes::INCORRECT_QUERY); - return StorageView::create( - args.table_name, args.database_name, args.query, args.columns, - args.materialized_columns, args.alias_columns, args.column_defaults); + return StorageView::create(args.table_name, args.query, args.columns); }); } diff --git a/dbms/src/Storages/StorageView.h b/dbms/src/Storages/StorageView.h index 85442020683..6a16e0d16e6 100644 --- a/dbms/src/Storages/StorageView.h +++ b/dbms/src/Storages/StorageView.h @@ -29,22 +29,20 @@ public: size_t max_block_size, unsigned num_streams) override; + void rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & new_table_name) override + { + table_name = new_table_name; + } + private: - String select_database_name; - String select_table_name; String table_name; - String database_name; ASTPtr inner_query; protected: StorageView( const String & table_name_, - const String & database_name_, const ASTCreateQuery & query, - const NamesAndTypesList & columns_, - const NamesAndTypesList & materialized_columns_, - const NamesAndTypesList & alias_columns_, - const ColumnDefaults & column_defaults_); + const NamesAndTypesList & columns_); }; } From f159be383226ac7df03182b5303483026689a9fd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 07:42:18 +0300 Subject: [PATCH 168/209] Fixed error #1947 --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- dbms/src/Interpreters/InterpreterSelectQuery.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index bcc4753b951..d583c9afa23 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -72,7 +72,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, const BlockInputStreamPtr & input) - : query_ptr(query_ptr_) + : query_ptr(query_ptr_->clone()) , query(typeid_cast(*query_ptr)) , context(context_) , to_stage(to_stage_) @@ -85,7 +85,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( InterpreterSelectQuery::InterpreterSelectQuery(OnlyAnalyzeTag, const ASTPtr & query_ptr_, const Context & context_) - : query_ptr(query_ptr_) + : query_ptr(query_ptr_->clone()) , query(typeid_cast(*query_ptr)) , context(context_) , to_stage(QueryProcessingStage::Complete) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 78651d9cb77..2150672e2c7 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -26,7 +26,6 @@ public: /** * query_ptr * - A query AST to interpret. - * NOTE: The interpreter can modify the query during the execution. If this is undesirable, clone the query. * * to_stage * - the stage to which the query is to be executed. By default - till to the end. From dc9e7aa82ba3dff371387265a83d91d184c9369c Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 1 Mar 2018 12:47:20 +0800 Subject: [PATCH 169/209] Remove deprecated docs --- docs/en/introduction/distinctive_features.md | 2 +- docs/ru/introduction/distinctive_features.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/introduction/distinctive_features.md b/docs/en/introduction/distinctive_features.md index 5af3f2d1697..3927405579f 100644 --- a/docs/en/introduction/distinctive_features.md +++ b/docs/en/introduction/distinctive_features.md @@ -28,7 +28,7 @@ In ClickHouse, data can reside on different shards. Each shard can be a group of ## SQL support If you are familiar with standard SQL, we can't really talk about SQL support. -NULL-s are not supported. All the functions have different names. +All the functions have different names. However, this is a declarative query language based on SQL that can't be differentiated from SQL in many instances. Support for JOINs. Subqueries are supported in FROM, IN, and JOIN clauses, as well as scalar subqueries. Dependent subqueries are not supported. diff --git a/docs/ru/introduction/distinctive_features.md b/docs/ru/introduction/distinctive_features.md index 7cc23ed81f5..63ba71d4367 100644 --- a/docs/ru/introduction/distinctive_features.md +++ b/docs/ru/introduction/distinctive_features.md @@ -28,7 +28,7 @@ ## Поддержка SQL Если вы знаете, что такое стандартный SQL, то говорить о поддержке SQL всё-таки нельзя. -Не поддерживаются NULL-ы. Все функции названы по-другому. +Все функции названы по-другому. Тем не менее, это - декларативный язык запросов на основе SQL и во многих случаях не отличимый от SQL. Поддерживаются JOIN-ы. Поддерживаются подзапросы в секциях FROM, IN, JOIN, а также скалярные подзапросы. Зависимые подзапросы не поддерживаются. From 58f9d3deccc8b61440482733b6aa02e4860433a6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 07:51:54 +0300 Subject: [PATCH 170/209] Better test [#CLICKHOUSE-2] --- ..._versioned_collapsing_merge_tree.reference | 32 +++++++++---------- .../00564_versioned_collapsing_merge_tree.sql | 32 +++++++++---------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.reference b/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.reference index edc91a8f558..408617f86e5 100644 --- a/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.reference +++ b/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.reference @@ -2,7 +2,7 @@ table with 2 blocks final table with 2 blocks optimized 2018-01-31 str_8 0 -1 2018-01-31 str_9 0 1 -######################### +------------------------- table with 2 blocks final 2018-01-31 str_0 0 -1 2018-01-31 str_0 0 -1 @@ -45,12 +45,12 @@ table with 2 blocks optimized 2018-01-31 str_8 0 -1 2018-01-31 str_9 0 1 2018-01-31 str_9 0 1 -######################### +------------------------- table with 2 blocks final table with 2 blocks optimized 2018-01-31 str_9 0 1 2018-01-31 str_9 0 -1 -######################### +------------------------- table with 2 blocks final 2018-01-31 str_0 0 -1 2018-01-31 str_0 1 1 @@ -93,12 +93,12 @@ table with 2 blocks optimized 2018-01-31 str_8 1 1 2018-01-31 str_9 0 1 2018-01-31 str_9 1 -1 -######################### +------------------------- table with 4 blocks final table with 4 blocks optimized 2018-01-31 str_9 0 1 2018-01-31 str_9 0 -1 -######################### +------------------------- table with 5 blocks final 2018-01-31 str_0 1 -1 2018-01-31 str_1 1 -1 @@ -121,12 +121,12 @@ table with 5 blocks optimized 2018-01-31 str_7 1 -1 2018-01-31 str_8 1 -1 2018-01-31 str_9 1 -1 -######################### +------------------------- table with 2 blocks final table with 2 blocks optimized 2018-01-31 str_999999 0 1 2018-01-31 str_999999 0 -1 -######################### +------------------------- table with 2 blocks final 2018-01-31 0 0 1 2018-01-31 1 0 1 @@ -263,14 +263,14 @@ table with 2 blocks final table with 2 blocks optimized 2018-01-31 0 0 -1 2018-01-31 127 0 1 -######################### +------------------------- Vertival merge -######################### +------------------------- table with 2 blocks final table with 2 blocks optimized 2018-01-31 str_8 0 -1 2018-01-31 str_9 0 1 -######################### +------------------------- table with 2 blocks final 2018-01-31 str_0 0 -1 2018-01-31 str_0 0 -1 @@ -313,12 +313,12 @@ table with 2 blocks optimized 2018-01-31 str_8 0 -1 2018-01-31 str_9 0 1 2018-01-31 str_9 0 1 -######################### +------------------------- table with 2 blocks final table with 2 blocks optimized 2018-01-31 str_9 0 1 2018-01-31 str_9 0 -1 -######################### +------------------------- table with 2 blocks final 2018-01-31 str_0 0 -1 2018-01-31 str_0 1 1 @@ -361,12 +361,12 @@ table with 2 blocks optimized 2018-01-31 str_8 1 1 2018-01-31 str_9 0 1 2018-01-31 str_9 1 -1 -######################### +------------------------- table with 4 blocks final table with 4 blocks optimized 2018-01-31 str_9 0 1 2018-01-31 str_9 0 -1 -######################### +------------------------- table with 5 blocks final 2018-01-31 str_0 1 -1 2018-01-31 str_1 1 -1 @@ -389,12 +389,12 @@ table with 5 blocks optimized 2018-01-31 str_7 1 -1 2018-01-31 str_8 1 -1 2018-01-31 str_9 1 -1 -######################### +------------------------- table with 2 blocks final table with 2 blocks optimized 2018-01-31 str_999999 0 1 2018-01-31 str_999999 0 -1 -######################### +------------------------- table with 2 blocks final 2018-01-31 0 0 1 2018-01-31 1 0 1 diff --git a/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql b/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql index 25a78001ccc..483257de16a 100644 --- a/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql +++ b/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql @@ -8,7 +8,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(date, (date, value), 8192, sign, version); @@ -20,7 +20,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(date, (date, value), 8192, sign, version); @@ -32,7 +32,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(date, (date, value), 8192, sign, version); @@ -44,7 +44,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(date, (date, value), 8192, sign, version); @@ -58,7 +58,7 @@ optimize table test.mult_tab; select 'table with 4 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(date, (date, value), 8192, sign, version); @@ -73,7 +73,7 @@ optimize table test.mult_tab; select 'table with 5 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(date, (date, value), 8192, sign, version); @@ -85,7 +85,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value UInt64, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(date, (date), 8192, sign, version); @@ -97,9 +97,9 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; select 'Vertival merge'; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(sign, version) order by (date) settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0; @@ -111,7 +111,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(sign, version) order by (date, value) settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0; @@ -123,7 +123,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(sign, version) order by (date, value) settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0; @@ -135,7 +135,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(sign, version) order by (date, value) settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0; @@ -147,7 +147,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(sign, version) order by (date, value) settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0; @@ -161,7 +161,7 @@ optimize table test.mult_tab; select 'table with 4 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(sign, version) order by (date, value) settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0; @@ -176,7 +176,7 @@ optimize table test.mult_tab; select 'table with 5 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value String, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(sign, version) order by (date, value) settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0; @@ -188,7 +188,7 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; -select '#########################'; +select '-------------------------'; drop table if exists test.mult_tab; create table test.mult_tab (date Date, value UInt64, version UInt64, sign Int8) engine = VersionedCollapsingMergeTree(sign, version) order by (date) settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0; From e604be27992284f1d599df7803d8c5d4b71b3cb2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 07:57:51 +0300 Subject: [PATCH 171/209] Fixed error #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 5 +++-- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 90aa6243f51..10362fc54f6 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -2867,8 +2867,9 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, /** We will not go to the ARRAY JOIN section, because we need to look at the names of non-ARRAY-JOIN columns. * There, `collectUsedColumns` will send us separately. */ - if (!typeid_cast(child.get()) - && !typeid_cast(child.get())) + if (!typeid_cast(child.get()) + && !typeid_cast(child.get()) + && !typeid_cast(child.get())) getRequiredSourceColumnsImpl(child, available_columns, required_source_columns, ignored_names, available_joined_columns, required_joined_columns); } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index d583c9afa23..05f3c856206 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -72,7 +72,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( QueryProcessingStage::Enum to_stage_, size_t subquery_depth_, const BlockInputStreamPtr & input) - : query_ptr(query_ptr_->clone()) + : query_ptr(query_ptr_->clone()) /// Note: the query is cloned because it will be modified during analysis. , query(typeid_cast(*query_ptr)) , context(context_) , to_stage(to_stage_) From f7e0912d81fea09326996a3aaee951ea4148ca30 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 08:24:56 +0300 Subject: [PATCH 172/209] Added support for LIMIT BY arbitary expressions [#CLICKHOUSE-3613] #1947 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 18 ++++++++++++++++++ dbms/src/Interpreters/ExpressionAnalyzer.h | 1 + .../Interpreters/InterpreterSelectQuery.cpp | 13 +++++++++++-- dbms/src/Interpreters/InterpreterSelectQuery.h | 1 + .../00583_limit_by_expressions.reference | 13 +++++++++++++ .../0_stateless/00583_limit_by_expressions.sql | 7 +++++++ 6 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00583_limit_by_expressions.reference create mode 100644 dbms/tests/queries/0_stateless/00583_limit_by_expressions.sql diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 10362fc54f6..e0b78a30b0d 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -2529,6 +2529,24 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only return true; } +bool ExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain, bool only_types) +{ + assertSelect(); + + if (!select_query->limit_by_expression_list) + return false; + + initChain(chain, aggregated_columns); + ExpressionActionsChain::Step & step = chain.steps.back(); + + getRootActions(select_query->limit_by_expression_list, only_types, false, step.actions); + + for (const auto & child : select_query->limit_by_expression_list->children) + step.required_output.push_back(child->getColumnName()); + + return true; +} + void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) const { assertSelect(); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index bdddb169271..ccad4b13740 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -111,6 +111,7 @@ public: bool appendHaving(ExpressionActionsChain & chain, bool only_types); void appendSelect(ExpressionActionsChain & chain, bool only_types); bool appendOrderBy(ExpressionActionsChain & chain, bool only_types); + bool appendLimitBy(ExpressionActionsChain & chain, bool only_types); /// Deletes all columns except mentioned by SELECT, arranges the remaining columns and renames them to aliases. void appendProjectResult(ExpressionActionsChain & chain) const; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 05f3c856206..3fe7896ff58 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -286,6 +286,10 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression res.before_order_and_select = chain.getLastActions(); chain.addStep(); + query_analyzer->appendLimitBy(chain, !res.second_stage); + res.before_limit_by = chain.getLastActions(); + chain.addStep(); + query_analyzer->appendProjectResult(chain); res.final_projection = chain.getLastActions(); @@ -456,9 +460,14 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (need_second_distinct_pass) executeDistinct(pipeline, false, Names()); + if (query.limit_by_expression_list) + { + executeExpression(pipeline, expressions.before_limit_by); + executeLimitBy(pipeline); + } + /** We must do projection after DISTINCT because projection may remove some columns. */ - executeLimitBy(pipeline); executeProjection(pipeline, expressions.final_projection); /** Extremes are calculated before LIMIT, but after LIMIT BY. This is Ok. @@ -1029,7 +1038,7 @@ void InterpreterSelectQuery::executeLimitBy(Pipeline & pipeline) Names columns; for (const auto & elem : query.limit_by_expression_list->children) - columns.emplace_back(elem->getAliasOrColumnName()); + columns.emplace_back(elem->getColumnName()); size_t value = safeGet(typeid_cast(*query.limit_by_value).value); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 2150672e2c7..19e312189d8 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -124,6 +124,7 @@ private: ExpressionActionsPtr before_aggregation; ExpressionActionsPtr before_having; ExpressionActionsPtr before_order_and_select; + ExpressionActionsPtr before_limit_by; ExpressionActionsPtr final_projection; /// Columns from the SELECT list, before renaming them to aliases. diff --git a/dbms/tests/queries/0_stateless/00583_limit_by_expressions.reference b/dbms/tests/queries/0_stateless/00583_limit_by_expressions.reference new file mode 100644 index 00000000000..c9ce7e7c8aa --- /dev/null +++ b/dbms/tests/queries/0_stateless/00583_limit_by_expressions.reference @@ -0,0 +1,13 @@ +1 +1 +1 +1 +1 +0 +1 +2 +3 +0 +1 +5 +6 diff --git a/dbms/tests/queries/0_stateless/00583_limit_by_expressions.sql b/dbms/tests/queries/0_stateless/00583_limit_by_expressions.sql new file mode 100644 index 00000000000..0163a36f5f0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00583_limit_by_expressions.sql @@ -0,0 +1,7 @@ +SELECT 1 FROM system.one LIMIT 1 BY 1; +SELECT 1 FROM system.one LIMIT 1 BY 1 AS one; +SELECT 1 as one FROM system.one LIMIT 1 BY 1; +SELECT 1 as one FROM system.one LIMIT 1 BY one; +SELECT 1 as one FROM system.one LIMIT 1 BY rand(); +SELECT number FROM numbers(10) LIMIT 2 BY number % 2; +SELECT number FROM numbers(10) LIMIT 2 BY intDiv(number, 5); From b9c8c04d4b33f04b8d539c4f0fc2e37405ffc80b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 08:42:44 +0300 Subject: [PATCH 173/209] Fixed error #1947 --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 3fe7896ff58..3872dca74ca 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -458,7 +458,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt * then DISTINCT needs to be performed once again after merging all streams. */ if (need_second_distinct_pass) - executeDistinct(pipeline, false, Names()); + executeDistinct(pipeline, false, expressions.selected_columns); if (query.limit_by_expression_list) { From 15dc5da1907dc869a3c957e060dd254c0a9110e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 09:07:04 +0300 Subject: [PATCH 174/209] Fixed error #1947 --- dbms/src/DataStreams/IBlockInputStream.cpp | 2 +- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 13 ++++++++----- dbms/src/Interpreters/InterpreterSelectQuery.h | 1 + 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/dbms/src/DataStreams/IBlockInputStream.cpp b/dbms/src/DataStreams/IBlockInputStream.cpp index bc0733c6529..0f0511b6fef 100644 --- a/dbms/src/DataStreams/IBlockInputStream.cpp +++ b/dbms/src/DataStreams/IBlockInputStream.cpp @@ -68,7 +68,7 @@ void IBlockInputStream::dumpTree(std::ostream & ostr, size_t indent, size_t mult ostr << String(indent, ' ') << getName(); if (multiplier > 1) ostr << " × " << multiplier; - // ostr << ": " << getHeader().dumpStructure(); + ostr << ": " << getHeader().dumpStructure(); ostr << std::endl; ++indent; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 3872dca74ca..61bef8d3f8f 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -286,9 +286,12 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression res.before_order_and_select = chain.getLastActions(); chain.addStep(); - query_analyzer->appendLimitBy(chain, !res.second_stage); - res.before_limit_by = chain.getLastActions(); - chain.addStep(); + if (query_analyzer->appendLimitBy(chain, !res.second_stage)) + { + res.has_limit_by = true; + res.before_limit_by = chain.getLastActions(); + chain.addStep(); + } query_analyzer->appendProjectResult(chain); res.final_projection = chain.getLastActions(); @@ -438,7 +441,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt /** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT, * limiting the number of rows in each up to `offset + limit`. */ - if (query.limit_length && pipeline.hasMoreThanOneStream() && !query.distinct && !query.limit_by_expression_list && !settings.extremes) + if (query.limit_length && pipeline.hasMoreThanOneStream() && !query.distinct && !expressions.has_limit_by && !settings.extremes) { executePreLimit(pipeline); } @@ -460,7 +463,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt if (need_second_distinct_pass) executeDistinct(pipeline, false, expressions.selected_columns); - if (query.limit_by_expression_list) + if (expressions.has_limit_by) { executeExpression(pipeline, expressions.before_limit_by); executeLimitBy(pipeline); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 19e312189d8..dd000232a44 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -118,6 +118,7 @@ private: bool need_aggregate = false; bool has_having = false; bool has_order_by = false; + bool has_limit_by = false; ExpressionActionsPtr before_join; /// including JOIN ExpressionActionsPtr before_where; From 8cb640ac5469334d20265dbfed739cd0fd111193 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 09:08:09 +0300 Subject: [PATCH 175/209] Better UNION ALL: development #1947 --- dbms/src/DataStreams/IBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/IBlockInputStream.cpp b/dbms/src/DataStreams/IBlockInputStream.cpp index 0f0511b6fef..440a1503d46 100644 --- a/dbms/src/DataStreams/IBlockInputStream.cpp +++ b/dbms/src/DataStreams/IBlockInputStream.cpp @@ -68,7 +68,7 @@ void IBlockInputStream::dumpTree(std::ostream & ostr, size_t indent, size_t mult ostr << String(indent, ' ') << getName(); if (multiplier > 1) ostr << " × " << multiplier; - ostr << ": " << getHeader().dumpStructure(); + //ostr << ": " << getHeader().dumpStructure(); ostr << std::endl; ++indent; From c80a0e549a998b8f4f337610ad75008f9e50129c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 09:18:25 +0300 Subject: [PATCH 176/209] Fixed error in clickhouse-client [#CLICKHOUSE-2] --- dbms/src/Server/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 5b38c110bfb..78820fc142a 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -459,7 +459,7 @@ private: while (ws > 0 && isWhitespaceASCII(line[ws - 1])) --ws; - if (ws == 0 && line.empty()) + if (ws == 0 || line.empty()) continue; bool ends_with_semicolon = line[ws - 1] == ';'; From d8fdf33efd518c8d4a9139890856c78365b252c4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 09:18:25 +0300 Subject: [PATCH 177/209] Fixed error in clickhouse-client [#CLICKHOUSE-2] --- dbms/src/Server/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 238be504aac..e4e13b5b9d9 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -459,7 +459,7 @@ private: while (ws > 0 && isWhitespaceASCII(line[ws - 1])) --ws; - if (ws == 0 && line.empty()) + if (ws == 0 || line.empty()) continue; bool ends_with_semicolon = line[ws - 1] == ';'; From 69e8133fdce30d8d5490eca64ba322a2b033f5d6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 09:21:15 +0300 Subject: [PATCH 178/209] Added a test by Stas Sviridov #320 --- .../00584_view_union_all.reference | 2 ++ .../0_stateless/00584_view_union_all.sql | 29 +++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00584_view_union_all.reference create mode 100644 dbms/tests/queries/0_stateless/00584_view_union_all.sql diff --git a/dbms/tests/queries/0_stateless/00584_view_union_all.reference b/dbms/tests/queries/0_stateless/00584_view_union_all.reference new file mode 100644 index 00000000000..fbffd3a0d8e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00584_view_union_all.reference @@ -0,0 +1,2 @@ +hello A 0 +hello ALL 0 diff --git a/dbms/tests/queries/0_stateless/00584_view_union_all.sql b/dbms/tests/queries/0_stateless/00584_view_union_all.sql new file mode 100644 index 00000000000..3460352e809 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00584_view_union_all.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS test.Test; + +CREATE TABLE test.Test ( + createdDate Date, + str String, + key Enum8('A' = 0, 'B' = 1, 'ALL' = 2), + a Int64 +) +ENGINE = MergeTree(createdDate, str, 8192); + +INSERT INTO test.Test VALUES ('2000-01-01', 'hello', 'A', 123); + +SET max_threads = 1; + +CREATE VIEW test.TestView AS + SELECT str, key, sumIf(a, 0) AS sum + FROM test.Test + GROUP BY str, key + + UNION ALL + + SELECT str AS str, CAST('ALL' AS Enum8('A' = 0, 'B' = 1, 'ALL' = 2)) AS key, sumIf(a, 0) AS sum + FROM test.Test + GROUP BY str; + +SELECT * FROM test.TestView; + +DROP TABLE test.TestView; +DROP TABLE test.Test; From 322e0ef4ec3ebb24474b9549d37d0736b99f067a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 09:35:54 +0300 Subject: [PATCH 179/209] Added a test from George #728 --- ...query_aggregation_column_removal.reference | 28 ++ ...ll_subquery_aggregation_column_removal.sql | 331 ++++++++++++++++++ 2 files changed, 359 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference create mode 100644 dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql diff --git a/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference b/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference new file mode 100644 index 00000000000..a0265bdb7ed --- /dev/null +++ b/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference @@ -0,0 +1,28 @@ +{"total":"1","domain":"baidu.com"} +{"total":"2","domain":"facebook.com"} +{"total":"1","domain":"google.com"} +{"total":"2","domain":"yandex.ru"} +{"total":"1","domain":"baidu.com"} +{"total":"2","domain":"facebook.com"} +{"total":"1","domain":"google.com"} +{"total":"2","domain":"yandex.ru"} +1 baidu.com +2 facebook.com +1 google.com +2 yandex.ru +1 baidu.com +2 facebook.com +1 google.com +2 yandex.ru +1 baidu.com +1 google.com +2 facebook.com +2 yandex.ru +1 +1 +2 +2 +baidu.com +google.com +facebook.com +yandex.ru diff --git a/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql b/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql new file mode 100644 index 00000000000..5c2a0b9701d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql @@ -0,0 +1,331 @@ +DROP TABLE IF EXISTS test.clicks; +DROP TABLE IF EXISTS test.transactions; + +CREATE TABLE test.clicks (domain String) ENGINE = Memory; +CREATE TABLE test.transactions (domain String) ENGINE = Memory; + +INSERT INTO test.clicks VALUES ('facebook.com'), ('yandex.ru'), ('google.com'); +INSERT INTO test.transactions VALUES ('facebook.com'), ('yandex.ru'), ('baidu.com'); + + +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain + UNION ALL + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +FORMAT JSONEachRow; + + +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +UNION ALL + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +FORMAT JSONEachRow; + + +SELECT DISTINCT * FROM +( +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain + UNION ALL + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 + +UNION ALL + +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +UNION ALL + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +); + + +SELECT DISTINCT total, domain FROM +( +SELECT + sum(total_count) AS total, + sum(facebookHits) AS facebook, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain + UNION ALL + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 + +UNION ALL + +SELECT + sum(total_count) AS total, + max(facebookHits) AS facebook, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +UNION ALL + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +) +ORDER BY domain, total; + + +SELECT * FROM +( +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain + UNION ALL + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +) +ALL FULL OUTER JOIN +( +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +UNION ALL + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +) +USING (total, domain) +ORDER BY total, domain; + + +SELECT total FROM +( +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain + UNION ALL + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +) +ALL FULL OUTER JOIN +( +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +UNION ALL + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +) +USING (total, domain) +ORDER BY total, domain; + + +SELECT domain FROM +( +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain + UNION ALL + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +) +ALL FULL OUTER JOIN +( +SELECT + sum(total_count) AS total, + domain +FROM +( + SELECT + COUNT(*) AS total_count, + SUM(if(domain = 'facebook.com', 1, 0)) AS facebookHits, + domain + FROM test.clicks + GROUP BY domain +UNION ALL + SELECT + COUNT(*) AS total_count, + toUInt64(0) AS facebookHits, + domain + FROM test.transactions + GROUP BY domain +) +GROUP BY domain +ORDER BY domain +LIMIT 10 +) +USING (total, domain) +ORDER BY total, domain; + + +DROP TABLE test.clicks; +DROP TABLE test.transactions; From 0bd9575125172ac0b1de913fa9a874a4abe56762 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 09:50:46 +0300 Subject: [PATCH 180/209] Added a test from FacedSID #859 --- ...ing_unused_columns_from_subquery.reference | 1 + ..._removing_unused_columns_from_subquery.sql | 48 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.reference create mode 100644 dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql diff --git a/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.reference b/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.reference new file mode 100644 index 00000000000..6db331af725 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.reference @@ -0,0 +1 @@ +nan diff --git a/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql b/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql new file mode 100644 index 00000000000..691cc6000df --- /dev/null +++ b/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql @@ -0,0 +1,48 @@ +DROP TABLE IF EXISTS test.local_statements; +DROP TABLE IF EXISTS test.statements; + +CREATE TABLE test.local_statements ( statementId String, eventDate Date, eventHour DateTime, eventTime DateTime, verb String, objectId String, onCourse UInt8, courseId UInt16, contextRegistration String, resultScoreRaw Float64, resultScoreMin Float64, resultScoreMax Float64, resultSuccess UInt8, resultCompletition UInt8, resultDuration UInt32, resultResponse String, learnerId String, learnerHash String, contextId UInt16) ENGINE = MergeTree ORDER BY tuple(); + +CREATE TABLE test.statements ( statementId String, eventDate Date, eventHour DateTime, eventTime DateTime, verb String, objectId String, onCourse UInt8, courseId UInt16, contextRegistration String, resultScoreRaw Float64, resultScoreMin Float64, resultScoreMax Float64, resultSuccess UInt8, resultCompletition UInt8, resultDuration UInt32, resultResponse String, learnerId String, learnerHash String, contextId UInt16) ENGINE = Distributed(test_shard_localhost, 'test', 'local_statements', sipHash64(learnerHash)); + +INSERT INTO test.local_statements FORMAT CSV "2b3b04ee-0bb8-4200-906f-d47c48e56bd0","2016-08-25","2016-08-25 14:00:00","2016-08-25 14:43:34","http://adlnet.gov/expapi/verbs/passed","https://crmm.ru/xapi/courses/spp/2/0/3/2/8",0,1,"c13d788c-26e0-40e3-bacb-a1ff78ee1518",100,0,0,0,0,0,"","https://sberbank-school.ru/xapi/accounts/userid/94312","6f696f938a69b5e173093718e1c2bbf2",0 + +SELECT avg(diff) +FROM +( + SELECT * + FROM + ( + SELECT + learnerHash, + passed - eventTime AS diff + FROM test.statements + GLOBAL ANY INNER JOIN + ( + SELECT + learnerHash, + argMax(eventTime, resultScoreRaw) AS passed + FROM + ( + SELECT + learnerHash, + eventTime, + resultScoreRaw + FROM test.statements + WHERE (courseId = 1) AND (onCourse = 0) + AND (verb = 'http://adlnet.gov/expapi/verbs/passed') AND (objectId = 'https://crmm.ru/xapi/courses/spp/1/1/0-1') + ORDER BY eventTime ASC + ) + GROUP BY learnerHash + ) USING (learnerHash) + WHERE (courseId = 1) AND (onCourse = 0) + AND (verb = 'http://adlnet.gov/expapi/verbs/interacted') AND (eventTime <= passed) AND (diff > 0) + ORDER BY eventTime DESC + LIMIT 1 BY learnerHash + ) + ORDER BY diff DESC + LIMIT 7, 126 +); + +DROP TABLE test.local_statements; +DROP TABLE test.statements; From 37f27b3e4be88fe7caa9059b3427c57ccc5b64fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 10:05:03 +0300 Subject: [PATCH 181/209] Added a test for type conversions in UNION ALL #1947 --- .../00587_union_all_type_conversions.reference | 18 ++++++++++++++++++ .../00587_union_all_type_conversions.sql | 12 ++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00587_union_all_type_conversions.reference create mode 100644 dbms/tests/queries/0_stateless/00587_union_all_type_conversions.sql diff --git a/dbms/tests/queries/0_stateless/00587_union_all_type_conversions.reference b/dbms/tests/queries/0_stateless/00587_union_all_type_conversions.reference new file mode 100644 index 00000000000..be82f93ae83 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00587_union_all_type_conversions.reference @@ -0,0 +1,18 @@ +1 +-1 +1 Int16 +-1 Int16 +1 +\N +1 Nullable(UInt8) +\N Nullable(UInt8) +1 +\N +1 +1 Nullable(Float64) 2 +\N Nullable(Float64) 1 +1 +2 +3 +nan +\N diff --git a/dbms/tests/queries/0_stateless/00587_union_all_type_conversions.sql b/dbms/tests/queries/0_stateless/00587_union_all_type_conversions.sql new file mode 100644 index 00000000000..ad7b0a81149 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00587_union_all_type_conversions.sql @@ -0,0 +1,12 @@ +SET max_threads = 1; + +SELECT 1 UNION ALL SELECT -1; +SELECT x, toTypeName(x) FROM (SELECT 1 AS x UNION ALL SELECT -1); + +SELECT 1 UNION ALL SELECT NULL; +SELECT x, toTypeName(x) FROM (SELECT 1 AS x UNION ALL SELECT NULL); + +SELECT 1 AS x UNION ALL SELECT NULL UNION ALL SELECT 1.0; +SELECT x, toTypeName(x), count() FROM (SELECT 1 AS x UNION ALL SELECT NULL UNION ALL SELECT 1.0) GROUP BY x; + +SELECT arrayJoin(x) AS res FROM (SELECT [1, 2, 3] AS x UNION ALL SELECT [nan, NULL]) ORDER BY res; From c42d3ac17fbf73f1e67c1329171f121d24343fdc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 10:11:35 +0300 Subject: [PATCH 182/209] Fixed error (PREWHERE in Distributed table) #1947 --- dbms/src/Storages/StorageDistributed.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index edeeb59cba6..c1681e6fbea 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -171,7 +171,7 @@ StoragePtr StorageDistributed::createWithOwnCluster( BlockInputStreams StorageDistributed::read( - const Names & column_names, + const Names & /*column_names*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum & processed_stage, @@ -191,8 +191,7 @@ BlockInputStreams StorageDistributed::read( const auto & modified_query_ast = rewriteSelectQuery( query_info.query, remote_database, remote_table); - Block header = materializeBlock(InterpreterSelectQuery(query_info.query, context, {}, processed_stage, 0, - std::make_shared(getSampleBlockForColumns(column_names))).execute().in->getHeader()); + Block header = materializeBlock(InterpreterSelectQuery(query_info.query, context, {}, processed_stage).getSampleBlock()); ClusterProxy::SelectStreamFactory select_stream_factory( header, processed_stage, QualifiedTableName{remote_database, remote_table}, context.getExternalTables()); From 4c87ae427dd339deefc4382c13dc745bc0b850d8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 10:18:15 +0300 Subject: [PATCH 183/209] Added test #1947 --- .../00588_distributed_prewhere.reference | 5 +++++ .../0_stateless/00588_distributed_prewhere.sql | 15 +++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00588_distributed_prewhere.reference create mode 100644 dbms/tests/queries/0_stateless/00588_distributed_prewhere.sql diff --git a/dbms/tests/queries/0_stateless/00588_distributed_prewhere.reference b/dbms/tests/queries/0_stateless/00588_distributed_prewhere.reference new file mode 100644 index 00000000000..3e3c6d8b378 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00588_distributed_prewhere.reference @@ -0,0 +1,5 @@ +1 hello +1 hello +1 hello +1 hello +1 hello diff --git a/dbms/tests/queries/0_stateless/00588_distributed_prewhere.sql b/dbms/tests/queries/0_stateless/00588_distributed_prewhere.sql new file mode 100644 index 00000000000..41ab4e7d8e7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00588_distributed_prewhere.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test.mergetree; +DROP TABLE IF EXISTS test.distributed; + +CREATE TABLE test.mergetree (x UInt64, s String) ENGINE = MergeTree ORDER BY x; +INSERT INTO test.mergetree VALUES (1, 'hello'), (2, 'world'); + +SELECT * FROM test.mergetree PREWHERE x = 1 WHERE s LIKE '%l%' ORDER BY x, s; +SELECT * FROM remote('127.0.0.{1,2,3}', test.mergetree) PREWHERE x = 1 WHERE s LIKE '%l%' ORDER BY x, s; + +CREATE TABLE test.distributed AS test.mergetree ENGINE = Distributed(test_shard_localhost, test, mergetree); + +SELECT * FROM test.distributed PREWHERE x = 1 WHERE s LIKE '%l%' ORDER BY x, s; + +DROP TABLE test.mergetree; +DROP TABLE test.distributed; From 0908ed15615112283762ebef35c472268721668e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 10:27:02 +0300 Subject: [PATCH 184/209] Added test #1947 --- ..._removal_unused_columns_aggregation.reference | 16 ++++++++++++++++ .../00589_removal_unused_columns_aggregation.sql | 13 +++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00589_removal_unused_columns_aggregation.reference create mode 100644 dbms/tests/queries/0_stateless/00589_removal_unused_columns_aggregation.sql diff --git a/dbms/tests/queries/0_stateless/00589_removal_unused_columns_aggregation.reference b/dbms/tests/queries/0_stateless/00589_removal_unused_columns_aggregation.reference new file mode 100644 index 00000000000..c67029cbbe5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00589_removal_unused_columns_aggregation.reference @@ -0,0 +1,16 @@ +45 4.5 +45 4.5 +45 +4.5 +1 +1 +1 +1 +1 +1 +2 +4 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00589_removal_unused_columns_aggregation.sql b/dbms/tests/queries/0_stateless/00589_removal_unused_columns_aggregation.sql new file mode 100644 index 00000000000..67f8e49f428 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00589_removal_unused_columns_aggregation.sql @@ -0,0 +1,13 @@ +SELECT * FROM (SELECT sum(x) AS a, avg(x) AS b FROM (SELECT number AS x FROM numbers(10))); +SELECT a, b FROM (SELECT sum(x) AS a, avg(x) AS b FROM (SELECT number AS x FROM numbers(10))); +SELECT a FROM (SELECT sum(x) AS a, avg(x) AS b FROM (SELECT number AS x FROM numbers(10))); +SELECT b FROM (SELECT sum(x) AS a, avg(x) AS b FROM (SELECT number AS x FROM numbers(10))); +SELECT 1 FROM (SELECT sum(x) AS a, avg(x) AS b FROM (SELECT number AS x FROM numbers(10))); +SELECT 1 FROM (SELECT sum(x), avg(x) FROM (SELECT number AS x FROM numbers(10))); +SELECT count() FROM (SELECT sum(x) AS a, avg(x) AS b FROM (SELECT number AS x FROM numbers(10))); +SELECT count() FROM (SELECT sum(x), avg(x) FROM (SELECT number AS x FROM numbers(10))); +SELECT count() FROM (SELECT sum(x), avg(x) FROM (SELECT number % 3 AS x FROM numbers(10) GROUP BY x)); +SELECT 1 FROM (SELECT DISTINCT sum(x), avg(x) FROM (SELECT number AS x FROM numbers(10))); +SELECT count() FROM (SELECT sum(x), arrayJoin([min(x), max(x)]) FROM (SELECT number AS x FROM numbers(10))); +SELECT count() FROM (SELECT arrayJoin([sum(x), medianExact(x)]), arrayJoin([min(x), max(x)]) FROM (SELECT number AS x FROM numbers(10))); +SELECT 1 FROM (SELECT arrayJoin([sum(x), medianExact(x)]), arrayJoin([min(x), max(x)]) FROM (SELECT number AS x FROM numbers(10))); From 28df0c37125541e1b020e210bdea4422e3ae2a02 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 10:43:08 +0300 Subject: [PATCH 185/209] Removed unused constant #1947 --- dbms/src/Common/ErrorCodes.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 40a630c566a..96776c8d30a 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -262,7 +262,6 @@ namespace ErrorCodes extern const int PARTITION_ALREADY_EXISTS = 256; extern const int PARTITION_DOESNT_EXIST = 257; extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH = 258; - extern const int UNION_ALL_COLUMN_ALIAS_MISMATCH = 259; extern const int CLIENT_OUTPUT_FORMAT_SPECIFIED = 260; extern const int UNKNOWN_BLOCK_INFO_FIELD = 261; extern const int BAD_COLLATION = 262; From a27d27ac94647320df294a37d396a094ec7209ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 10:47:09 +0300 Subject: [PATCH 186/209] Added test #1947 [#CLICKHOUSE-3264] --- .../00590_limit_by_column_removal.reference | 12 ++++++++++++ .../0_stateless/00590_limit_by_column_removal.sql | 3 +++ 2 files changed, 15 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00590_limit_by_column_removal.reference create mode 100644 dbms/tests/queries/0_stateless/00590_limit_by_column_removal.sql diff --git a/dbms/tests/queries/0_stateless/00590_limit_by_column_removal.reference b/dbms/tests/queries/0_stateless/00590_limit_by_column_removal.reference new file mode 100644 index 00000000000..352a64620be --- /dev/null +++ b/dbms/tests/queries/0_stateless/00590_limit_by_column_removal.reference @@ -0,0 +1,12 @@ +1 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +45 diff --git a/dbms/tests/queries/0_stateless/00590_limit_by_column_removal.sql b/dbms/tests/queries/0_stateless/00590_limit_by_column_removal.sql new file mode 100644 index 00000000000..fd8b7eeed94 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00590_limit_by_column_removal.sql @@ -0,0 +1,3 @@ +SELECT x FROM (SELECT 1 AS x, 2 AS y) LIMIT 1 BY y; +SELECT x FROM (SELECT number AS x, number + 1 AS y FROM system.numbers LIMIT 10) ORDER BY y LIMIT 1 BY y; +SELECT sum(x) FROM (SELECT x, y FROM (SELECT number AS x, number + 1 AS y FROM system.numbers LIMIT 10) ORDER BY y LIMIT 1 BY y); From 8197574e37768f5981ea1b140f8bbcd280b91b86 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 11:07:46 +0300 Subject: [PATCH 187/209] Added test #1947 --- .../00591_columns_removal_union_all.reference | 21 +++++++++++++++++++ .../00591_columns_removal_union_all.sql | 4 ++++ 2 files changed, 25 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00591_columns_removal_union_all.reference create mode 100644 dbms/tests/queries/0_stateless/00591_columns_removal_union_all.sql diff --git a/dbms/tests/queries/0_stateless/00591_columns_removal_union_all.reference b/dbms/tests/queries/0_stateless/00591_columns_removal_union_all.reference new file mode 100644 index 00000000000..680a5020bea --- /dev/null +++ b/dbms/tests/queries/0_stateless/00591_columns_removal_union_all.reference @@ -0,0 +1,21 @@ +0 +0 +1 +1 +1 +2 +2 +3 +3 +4 +4 +5 +5 +6 +6 +7 +7 +8 +8 +9 +9 diff --git a/dbms/tests/queries/0_stateless/00591_columns_removal_union_all.sql b/dbms/tests/queries/0_stateless/00591_columns_removal_union_all.sql new file mode 100644 index 00000000000..cb5771603b2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00591_columns_removal_union_all.sql @@ -0,0 +1,4 @@ +SELECT * FROM +( + SELECT x FROM (SELECT x, y, arrayJoin(z) FROM (SELECT number AS x, number + 1 AS y, [number % 2, number % 3] AS z FROM numbers(10)) UNION ALL SELECT 1, 2, 3) +) ORDER BY x; From 559da607fce65258709d94a8062ad7b09296689a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 11:33:48 +0300 Subject: [PATCH 188/209] Added test #878 --- .../00592_union_all_different_aliases.reference | 4 ++++ .../0_stateless/00592_union_all_different_aliases.sql | 7 +++++++ 2 files changed, 11 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00592_union_all_different_aliases.reference create mode 100644 dbms/tests/queries/0_stateless/00592_union_all_different_aliases.sql diff --git a/dbms/tests/queries/0_stateless/00592_union_all_different_aliases.reference b/dbms/tests/queries/0_stateless/00592_union_all_different_aliases.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00592_union_all_different_aliases.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00592_union_all_different_aliases.sql b/dbms/tests/queries/0_stateless/00592_union_all_different_aliases.sql new file mode 100644 index 00000000000..0c837a5a4f5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00592_union_all_different_aliases.sql @@ -0,0 +1,7 @@ +SELECT 1 AS a +UNION ALL +SELECT 1 AS b; + +SELECT 1 AS a +UNION ALL +SELECT 1 AS a; From b1628507545d2883fa8b77fdefdf1fd3b93654c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 11:46:59 +0300 Subject: [PATCH 189/209] Fixed error in checking limit on number of columns to read; added test for unnecessary columns removal inside subqueries with UNION ALL #1947 [#CLICKHOUSE-2210] --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 9 +++++++-- .../00593_union_all_assert_columns_removed.reference | 4 ++++ .../00593_union_all_assert_columns_removed.sql | 10 ++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00593_union_all_assert_columns_removed.reference create mode 100644 dbms/tests/queries/0_stateless/00593_union_all_assert_columns_removed.sql diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 61bef8d3f8f..935c44c743a 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -569,7 +569,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline const Settings & settings = context.getSettingsRef(); /// Limitation on the number of columns to read. - if (settings.limits.max_columns_to_read && required_columns.size() > settings.limits.max_columns_to_read) + /// It's not applied in 'dry_run' mode, because the query could be analyzed without removal of unnecessary columns. + if (!dry_run && settings.limits.max_columns_to_read && required_columns.size() > settings.limits.max_columns_to_read) throw Exception("Limit for number of columns to read exceeded. " "Requested: " + toString(required_columns.size()) + ", maximum: " + settings.limits.max_columns_to_read.toString(), @@ -626,7 +627,11 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline else if (interpreter_subquery) { /// Subquery. - pipeline.streams = interpreter_subquery->executeWithMultipleStreams(); + + if (!dry_run) + pipeline.streams = interpreter_subquery->executeWithMultipleStreams(); + else + pipeline.streams.emplace_back(std::make_shared(interpreter_subquery->getSampleBlock())); } else if (storage) { diff --git a/dbms/tests/queries/0_stateless/00593_union_all_assert_columns_removed.reference b/dbms/tests/queries/0_stateless/00593_union_all_assert_columns_removed.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00593_union_all_assert_columns_removed.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00593_union_all_assert_columns_removed.sql b/dbms/tests/queries/0_stateless/00593_union_all_assert_columns_removed.sql new file mode 100644 index 00000000000..7f9a781985b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00593_union_all_assert_columns_removed.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS test.columns; +CREATE TABLE test.columns (a UInt8, b UInt8, c UInt8) ENGINE = Memory; +INSERT INTO test.columns VALUES (1, 2, 3); +SET max_columns_to_read = 1; + +SELECT a FROM (SELECT * FROM test.columns); +SELECT a FROM (SELECT * FROM (SELECT * FROM test.columns)); +SELECT a FROM (SELECT * FROM test.columns UNION ALL SELECT * FROM test.columns); + +DROP TABLE test.columns; From a2e649f8c0c46d18eaea89ce047fadc5d28bd438 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 11:56:12 +0300 Subject: [PATCH 190/209] Updated test #1947 --- ...where.reference => 00588_shard_distributed_prewhere.reference} | 0 ...tributed_prewhere.sql => 00588_shard_distributed_prewhere.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{00588_distributed_prewhere.reference => 00588_shard_distributed_prewhere.reference} (100%) rename dbms/tests/queries/0_stateless/{00588_distributed_prewhere.sql => 00588_shard_distributed_prewhere.sql} (100%) diff --git a/dbms/tests/queries/0_stateless/00588_distributed_prewhere.reference b/dbms/tests/queries/0_stateless/00588_shard_distributed_prewhere.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00588_distributed_prewhere.reference rename to dbms/tests/queries/0_stateless/00588_shard_distributed_prewhere.reference diff --git a/dbms/tests/queries/0_stateless/00588_distributed_prewhere.sql b/dbms/tests/queries/0_stateless/00588_shard_distributed_prewhere.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00588_distributed_prewhere.sql rename to dbms/tests/queries/0_stateless/00588_shard_distributed_prewhere.sql From ca60c0ccdfccd67d621ab4b5783445f11f2db08a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 12:32:00 +0300 Subject: [PATCH 191/209] Added a test from Mikhail Kalashnikov for ALIAS columns in Distributed tables #590 #1947 --- .../00594_alias_in_distributed.reference | 60 +++++++++++++++++++ .../00594_alias_in_distributed.sql | 52 ++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00594_alias_in_distributed.reference create mode 100644 dbms/tests/queries/0_stateless/00594_alias_in_distributed.sql diff --git a/dbms/tests/queries/0_stateless/00594_alias_in_distributed.reference b/dbms/tests/queries/0_stateless/00594_alias_in_distributed.reference new file mode 100644 index 00000000000..12d58133e97 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00594_alias_in_distributed.reference @@ -0,0 +1,60 @@ +1 12345 12345 +2 54321 0 +0 0 +1 12345 12345 +2 54321 0 +0 0 +1 12345 +2 54321 +0 +1 12345 12345 +2 54321 0 +0 0 +1 12345 12345 +2 54321 0 +0 0 +1 12345 +2 54321 +0 +12345 12345 +54321 0 + 0 +12345 +0 +0 +12345 12345 +54321 0 + 0 +12345 +0 +0 +1 12345 12345 +2 54321 0 +0 0 +1 12345 12345 +2 54321 0 +0 0 +1 12345 +2 54321 +0 +1 12345 12345 +2 54321 0 +0 0 +1 12345 12345 +2 54321 0 +0 0 +1 12345 +2 54321 +0 +12345 12345 +54321 0 + 0 +12345 +0 +0 +12345 12345 +54321 0 + 0 +12345 +0 +0 diff --git a/dbms/tests/queries/0_stateless/00594_alias_in_distributed.sql b/dbms/tests/queries/0_stateless/00594_alias_in_distributed.sql new file mode 100644 index 00000000000..901b90dcb3b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00594_alias_in_distributed.sql @@ -0,0 +1,52 @@ +DROP TABLE IF EXISTS test.alias_local10; +DROP TABLE IF EXISTS test.alias10; + +CREATE TABLE test.alias_local10 ( + Id Int8, + EventDate Date DEFAULT '2000-01-01', + field1 Int8, + field2 String, + field3 ALIAS CASE WHEN field1 = 1 THEN field2 ELSE '0' END +) ENGINE = MergeTree(EventDate, (Id, EventDate), 8192); + +CREATE TABLE test.alias10 AS test.alias_local10 ENGINE = Distributed(test_shard_localhost, test, alias_local10, cityHash64(Id)); + +INSERT INTO test.alias_local10 (Id, EventDate, field1, field2) VALUES (1, '2000-01-01', 1, '12345'), (2, '2000-01-01', 2, '54321'), (3, '2000-01-01', 0, ''); + +SELECT field1, field2, field3 FROM test.alias_local10; +SELECT field1, field2, field3 FROM test.alias_local10 WHERE EventDate='2000-01-01'; +SELECT field1, field2 FROM test.alias_local10 WHERE EventDate='2000-01-01'; + +SELECT field1, field2, field3 FROM test.alias10; +SELECT field1, field2, field3 FROM test.alias10 WHERE EventDate='2000-01-01'; +SELECT field1, field2 FROM test.alias10 WHERE EventDate='2000-01-01'; + +SELECT field2, field3 FROM test.alias10 WHERE EventDate='2000-01-01'; +SELECT field3 FROM test.alias10 WHERE EventDate='2000-01-01'; +SELECT field2, field3 FROM test.alias10; +SELECT field3 FROM test.alias10; + +DROP TABLE test.alias10; +CREATE TABLE test.alias10 ( + Id Int8, + EventDate Date, + field1 Int8, + field2 String, + field3 String +) ENGINE = Distributed(test_shard_localhost, test, alias_local10); + +SELECT field1, field2, field3 FROM test.alias_local10; +SELECT field1, field2, field3 FROM test.alias_local10 WHERE EventDate='2000-01-01'; +SELECT field1, field2 FROM test.alias_local10 WHERE EventDate='2000-01-01'; + +SELECT field1, field2, field3 FROM test.alias10; +SELECT field1, field2, field3 FROM test.alias10 WHERE EventDate='2000-01-01'; +SELECT field1, field2 FROM test.alias10 WHERE EventDate='2000-01-01'; + +SELECT field2, field3 FROM test.alias10 WHERE EventDate='2000-01-01'; +SELECT field3 FROM test.alias10 WHERE EventDate='2000-01-01'; +SELECT field2, field3 FROM test.alias10; +SELECT field3 FROM test.alias10; + +DROP TABLE test.alias_local10; +DROP TABLE test.alias10; From 37f9034575202f58b041b9d8fbbe164546882136 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Mar 2018 12:41:40 +0300 Subject: [PATCH 192/209] Updated test #1550 --- .../0_stateless/00594_alias_in_distributed.reference | 6 ++++++ .../queries/0_stateless/00594_alias_in_distributed.sql | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00594_alias_in_distributed.reference b/dbms/tests/queries/0_stateless/00594_alias_in_distributed.reference index 12d58133e97..194cd8cd230 100644 --- a/dbms/tests/queries/0_stateless/00594_alias_in_distributed.reference +++ b/dbms/tests/queries/0_stateless/00594_alias_in_distributed.reference @@ -28,6 +28,9 @@ 12345 0 0 +1 +12345 +12345 1 12345 12345 2 54321 0 0 0 @@ -58,3 +61,6 @@ 12345 0 0 +1 +12345 +12345 diff --git a/dbms/tests/queries/0_stateless/00594_alias_in_distributed.sql b/dbms/tests/queries/0_stateless/00594_alias_in_distributed.sql index 901b90dcb3b..54e47c78942 100644 --- a/dbms/tests/queries/0_stateless/00594_alias_in_distributed.sql +++ b/dbms/tests/queries/0_stateless/00594_alias_in_distributed.sql @@ -26,6 +26,10 @@ SELECT field3 FROM test.alias10 WHERE EventDate='2000-01-01'; SELECT field2, field3 FROM test.alias10; SELECT field3 FROM test.alias10; +SELECT field1 FROM test.alias10 WHERE field3 = '12345'; +SELECT field2 FROM test.alias10 WHERE field3 = '12345'; +SELECT field3 FROM test.alias10 WHERE field3 = '12345'; + DROP TABLE test.alias10; CREATE TABLE test.alias10 ( Id Int8, @@ -48,5 +52,9 @@ SELECT field3 FROM test.alias10 WHERE EventDate='2000-01-01'; SELECT field2, field3 FROM test.alias10; SELECT field3 FROM test.alias10; +SELECT field1 FROM test.alias10 WHERE field3 = '12345'; +SELECT field2 FROM test.alias10 WHERE field3 = '12345'; +SELECT field3 FROM test.alias10 WHERE field3 = '12345'; + DROP TABLE test.alias_local10; DROP TABLE test.alias10; From ce2b100deb6e2f81af965bb48098bdd61c916653 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 1 Mar 2018 22:49:16 +0800 Subject: [PATCH 193/209] ISSUES-66 add insert into view test --- .../00595_insert_into_view.reference | 2 ++ .../0_stateless/00595_insert_into_view.sh | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100755 dbms/tests/queries/0_stateless/00595_insert_into_view.reference create mode 100755 dbms/tests/queries/0_stateless/00595_insert_into_view.sh diff --git a/dbms/tests/queries/0_stateless/00595_insert_into_view.reference b/dbms/tests/queries/0_stateless/00595_insert_into_view.reference new file mode 100755 index 00000000000..53864072aac --- /dev/null +++ b/dbms/tests/queries/0_stateless/00595_insert_into_view.reference @@ -0,0 +1,2 @@ +1 +test_string diff --git a/dbms/tests/queries/0_stateless/00595_insert_into_view.sh b/dbms/tests/queries/0_stateless/00595_insert_into_view.sh new file mode 100755 index 00000000000..030f1931928 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00595_insert_into_view.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +exception_pattern="Code: 48.*Method write is not supported by storage View" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.test;" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.test_view;" + +${CLICKHOUSE_CLIENT} --query "CREATE TABLE test.test (s String) ENGINE = Log;" +${CLICKHOUSE_CLIENT} --query "CREATE VIEW test.test_view AS SELECT * FROM test.test;" + +echo `${CLICKHOUSE_CLIENT} --query "INSERT INTO test.test_view VALUES('test_string');" 2>&1 | grep -c "$exception_pattern"` +${CLICKHOUSE_CLIENT} --query "INSERT INTO test.test VALUES('test_string');" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM test.test;" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.test;" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test.test_view;" From 54cba2f5a6550f38c61f0e4968d112217cbf1112 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Thu, 1 Mar 2018 23:48:25 +0800 Subject: [PATCH 194/209] Fix exist subquery when create materializer view --- dbms/src/Storages/StorageMaterializedView.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index dfa3a140da4..3f94ff29351 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -44,9 +44,14 @@ static void extractDependentTable(const ASTSelectQuery & query, String & select_ select_database_name = typeid_cast(*query_database).name; select_table_name = ast_id->name; } - else if (auto ast_select = typeid_cast(query_table.get())) + else if (auto ast_select = typeid_cast(query_table.get())) { - extractDependentTable(*ast_select, select_database_name, select_table_name); + if (ast_select->list_of_selects->children.size() != 1) + throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::INCORRECT_QUERY); + + auto inner_query = ast_select->list_of_selects->children.at(0); + + extractDependentTable(typeid_cast(*inner_query), select_database_name, select_table_name); } else throw Exception("Logical error while creating StorageMaterializedView." From 6d9ba5249362ad52dbfcd5c5d29f97fb97e78ac0 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 1 Mar 2018 23:16:03 +0300 Subject: [PATCH 195/209] Debian init and cron fixes --- debian/clickhouse-server-base.postinst | 1 + debian/clickhouse-server.cron.d | 2 +- debian/clickhouse-server.init | 27 +++++++++++++++++++------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/debian/clickhouse-server-base.postinst b/debian/clickhouse-server-base.postinst index e15dbf5c67d..c7aa47b5ef9 100644 --- a/debian/clickhouse-server-base.postinst +++ b/debian/clickhouse-server-base.postinst @@ -6,6 +6,7 @@ CLICKHOUSE_GROUP=${CLICKHOUSE_USER} CLICKHOUSE_DATADIR=/var/lib/clickhouse CLICKHOUSE_LOGDIR=/var/log/clickhouse-server +test -f /etc/default/clickhouse && . /etc/default/clickhouse if [ "$1" = configure ]; then if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then diff --git a/debian/clickhouse-server.cron.d b/debian/clickhouse-server.cron.d index df2260b9e8b..90431886613 100644 --- a/debian/clickhouse-server.cron.d +++ b/debian/clickhouse-server.cron.d @@ -1 +1 @@ -#*/10 * * * * root (which service > /dev/null && (service clickhouse-server condstart || true)) || /etc/init.d/clickhouse-server condstart 1>/dev/null 2>&1 +#*/10 * * * * root (which service > /dev/null 2>&1 && (service clickhouse-server condstart || true)) || /etc/init.d/clickhouse-server condstart > /dev/null 2>&1 diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index 4c011c2efae..8d0a75f573e 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -14,6 +14,7 @@ CLICKHOUSE_GROUP=${CLICKHOUSE_USER} SHELL=/bin/bash PROGRAM=clickhouse-server GENERIC_PROGRAM=clickhouse +EXTRACT_FROM_CONFIG=${GENERIC_PROGRAM}-extract-from-config SYSCONFDIR=/etc/$PROGRAM CLICKHOUSE_LOGDIR=/var/log/clickhouse-server CLICKHOUSE_LOGDIR_USER=root @@ -87,8 +88,8 @@ die() # Check that configuration file is Ok. check_config() { - if [ -x "$BINDIR/$GENERIC_PROGRAM" ]; then - su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$GENERIC_PROGRAM extract-from-config --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; + if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then + su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; fi } @@ -99,8 +100,8 @@ initdb() su -s /bin/sh ${CLICKHOUSE_USER} -c "test -w ${SYSCONFDIR}" || chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${SYSCONFDIR} fi - if [ -x "$BINDIR/$GENERIC_PROGRAM" ]; then - CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$GENERIC_PROGRAM extract-from-config --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") + if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then + CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}"; fi @@ -231,22 +232,34 @@ forcerestart() start } +use_cron() +{ + # 1. running systemd + if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then + return 1 + fi + # 2. disabled by config + if [ -z "$CLICKHOUSE_CRONFILE" ]; then + return 2 + fi + return 0 +} enable_cron() { - [ ! -z "$CLICKHOUSE_CRONFILE" ] && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE" + use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE" } disable_cron() { - [ ! -z "$CLICKHOUSE_CRONFILE" ] && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE" + use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE" } is_cron_disabled() { - [ -z "$CLICKHOUSE_CRONFILE" ] && return 0 + use_cron || return 0 # Assumes that either no lines are commented or all lines are commented. # Also please note, that currently cron file for ClickHouse has only one line (but some time ago there was more). From f749405bf0166afa52edb94a55daca8dc7e86b0b Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 2 Mar 2018 03:17:25 +0300 Subject: [PATCH 196/209] libressl as submodule (#1983) * Allow use bundled *ssl library * fix * Add submodule * Fixes * fix * fixes * fixes * fix * fix * update poco * fix warnings * fix * fix --- .gitmodules | 3 +++ CMakeLists.txt | 14 +++++++--- cmake/arch.cmake | 10 +++++++ cmake/find_capnp.cmake | 2 +- cmake/find_openssl.cmake | 23 ---------------- cmake/find_poco.cmake | 4 +-- cmake/find_rdkafka.cmake | 2 +- cmake/find_ssl.cmake | 44 +++++++++++++++++++++++++++++++ cmake/find_zlib.cmake | 11 +++++++- contrib/CMakeLists.txt | 37 +++++++++++++++++++++++--- contrib/libboost/CMakeLists.txt | 5 ++-- contrib/poco | 2 +- contrib/ssl | 1 + dbms/CMakeLists.txt | 4 ++- dbms/src/Interpreters/Cluster.cpp | 1 - 15 files changed, 122 insertions(+), 41 deletions(-) delete mode 100644 cmake/find_openssl.cmake create mode 100644 cmake/find_ssl.cmake create mode 160000 contrib/ssl diff --git a/.gitmodules b/.gitmodules index e076a6a3d78..98e031e8b6f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -31,3 +31,6 @@ [submodule "contrib/re2"] path = contrib/re2 url = https://github.com/google/re2.git +[submodule "contrib/ssl"] + path = contrib/ssl + url = https://github.com/ClickHouse-Extras/ssl.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 2bffc353cef..cf546c72f83 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,8 +37,14 @@ set (CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel;ASan;UBS include (cmake/arch.cmake) -set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall") # -Werror is also added inside directories with our own code. -set (CXX_WARNING_FLAGS "${CXX_WARNING_FLAGS} -Wnon-virtual-dtor") +if (NOT MSVC) + set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall") # -Werror is also added inside directories with our own code. +endif () + +if (COMPILER_GCC OR COMPILER_CLANG) + set (CXX_WARNING_FLAGS "${CXX_WARNING_FLAGS} -Wnon-virtual-dtor") +endif () + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # clang: warning: argument unused during compilation: '-stdlib=libc++' # clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument] @@ -50,7 +56,7 @@ if (ARCH_LINUX) endif () option (TEST_COVERAGE "Enables flags for test coverage" OFF) -option (ENABLE_TESTS "Enables tests" ON) +option (ENABLE_TESTS "Enables tests" ${NOT_MSVC}) option (USE_STATIC_LIBRARIES "Set to FALSE to use shared libraries" ON) option (MAKE_STATIC_LIBRARIES "Set to FALSE to make shared libraries" ${USE_STATIC_LIBRARIES}) @@ -222,7 +228,7 @@ message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE include(GNUInstallDirs) -include (cmake/find_openssl.cmake) +include (cmake/find_ssl.cmake) if (NOT OPENSSL_FOUND) message (FATAL_ERROR "Need openssl for build. debian tip: sudo apt install libssl-dev") endif () diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 1191d70bd19..f61bac96ab0 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -17,3 +17,13 @@ endif () if (CMAKE_SYSTEM MATCHES "FreeBSD") set (ARCH_FREEBSD 1) endif () + +if (NOT MSVC) + set (NOT_MSVC 1) +endif () + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set (COMPILER_GCC 1) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set (COMPILER_CLANG 1) +endif () diff --git a/cmake/find_capnp.cmake b/cmake/find_capnp.cmake index 885b7ec3c94..22301acc3ee 100644 --- a/cmake/find_capnp.cmake +++ b/cmake/find_capnp.cmake @@ -1,4 +1,4 @@ -option (ENABLE_CAPNP "Enable Cap'n Proto" ON) +option (ENABLE_CAPNP "Enable Cap'n Proto" ${NOT_MSVC}) if (ENABLE_CAPNP) diff --git a/cmake/find_openssl.cmake b/cmake/find_openssl.cmake deleted file mode 100644 index 512ca24dfe5..00000000000 --- a/cmake/find_openssl.cmake +++ /dev/null @@ -1,23 +0,0 @@ -set (OPENSSL_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES}) -if (APPLE) - set (OPENSSL_ROOT_DIR "/usr/local/opt/openssl") - # https://rt.openssl.org/Ticket/Display.html?user=guest&pass=guest&id=2232 - if (USE_STATIC_LIBRARIES) - message(WARNING "Disable USE_STATIC_LIBRARIES if you have linking problems with OpenSSL on MacOS") - endif () -endif () -find_package (OpenSSL) -if (NOT OPENSSL_FOUND) - # Try to find manually. - set (OPENSSL_INCLUDE_PATHS "/usr/local/opt/openssl/include") - set (OPENSSL_PATHS "/usr/local/opt/openssl/lib") - find_path (OPENSSL_INCLUDE_DIR NAMES openssl/ssl.h PATHS ${OPENSSL_INCLUDE_PATHS}) - find_library (OPENSSL_SSL_LIBRARY ssl PATHS ${OPENSSL_PATHS}) - find_library (OPENSSL_CRYPTO_LIBRARY crypto PATHS ${OPENSSL_PATHS}) - if (OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY AND OPENSSL_INCLUDE_DIR) - set (OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) - set (OPENSSL_FOUND 1) - endif () -endif () - -message (STATUS "Using openssl=${OPENSSL_FOUND}: ${OPENSSL_INCLUDE_DIR} : ${OPENSSL_LIBRARIES}") diff --git a/cmake/find_poco.cmake b/cmake/find_poco.cmake index 0750067429a..9051d63ae47 100644 --- a/cmake/find_poco.cmake +++ b/cmake/find_poco.cmake @@ -57,8 +57,8 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) if (USE_STATIC_LIBRARIES AND USE_INTERNAL_ZLIB_LIBRARY) list (APPEND Poco_INCLUDE_DIRS - "${ClickHouse_SOURCE_DIR}/contrib/zlib-ng/" - "${ClickHouse_BINARY_DIR}/contrib/zlib-ng/" + "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}/" + "${ClickHouse_BINARY_DIR}/contrib/${INTERNAL_ZLIB_NAME}/" ) endif () diff --git a/cmake/find_rdkafka.cmake b/cmake/find_rdkafka.cmake index c662a73cb37..e5964b51dc7 100644 --- a/cmake/find_rdkafka.cmake +++ b/cmake/find_rdkafka.cmake @@ -1,4 +1,4 @@ -option (ENABLE_RDKAFKA "Enable kafka" ON) +option (ENABLE_RDKAFKA "Enable kafka" ${NOT_MSVC}) if (ENABLE_RDKAFKA) diff --git a/cmake/find_ssl.cmake b/cmake/find_ssl.cmake new file mode 100644 index 00000000000..ec40e498da1 --- /dev/null +++ b/cmake/find_ssl.cmake @@ -0,0 +1,44 @@ +option (USE_INTERNAL_SSL_LIBRARY "Set to FALSE to use system *ssl library instead of bundled" ${MSVC}) + +set (OPENSSL_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES}) + +if (NOT USE_INTERNAL_SSL_LIBRARY) + if (APPLE) + set (OPENSSL_ROOT_DIR "/usr/local/opt/openssl") + # https://rt.openssl.org/Ticket/Display.html?user=guest&pass=guest&id=2232 + if (USE_STATIC_LIBRARIES) + message(WARNING "Disable USE_STATIC_LIBRARIES if you have linking problems with OpenSSL on MacOS") + endif () + endif () + find_package (OpenSSL) + + if (NOT OPENSSL_FOUND) + # Try to find manually. + set (OPENSSL_INCLUDE_PATHS "/usr/local/opt/openssl/include") + set (OPENSSL_PATHS "/usr/local/opt/openssl/lib") + find_path (OPENSSL_INCLUDE_DIR NAMES openssl/ssl.h PATHS ${OPENSSL_INCLUDE_PATHS}) + find_library (OPENSSL_SSL_LIBRARY ssl PATHS ${OPENSSL_PATHS}) + find_library (OPENSSL_CRYPTO_LIBRARY crypto PATHS ${OPENSSL_PATHS}) + if (OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY AND OPENSSL_INCLUDE_DIR) + set (OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) + set (OPENSSL_FOUND 1) + endif () + endif () +endif () + +if (NOT OPENSSL_FOUND) + set (USE_INTERNAL_SSL_LIBRARY 1) + set (OPENSSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/ssl") + set (OPENSSL_INCLUDE_DIR "${OPENSSL_ROOT_DIR}/include") + if (NOT USE_STATIC_LIBRARIES AND TARGET crypto-shared AND TARGET ssl-shared) + set (OPENSSL_CRYPTO_LIBRARY crypto-shared) + set (OPENSSL_SSL_LIBRARY ssl-shared) + else () + set (OPENSSL_CRYPTO_LIBRARY crypto) + set (OPENSSL_SSL_LIBRARY ssl) + endif () + set (OPENSSL_LIBRARIES ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) + set (OPENSSL_FOUND 1) +endif () + +message (STATUS "Using ssl=${OPENSSL_FOUND}: ${OPENSSL_INCLUDE_DIR} : ${OPENSSL_LIBRARIES}") diff --git a/cmake/find_zlib.cmake b/cmake/find_zlib.cmake index bafb1bb75b8..17350f9fd58 100644 --- a/cmake/find_zlib.cmake +++ b/cmake/find_zlib.cmake @@ -5,6 +5,15 @@ if (NOT USE_INTERNAL_ZLIB_LIBRARY) endif () if (NOT ZLIB_FOUND) + if (NOT MSVC) + set (INTERNAL_ZLIB_NAME "zlib-ng") + else () + set (INTERNAL_ZLIB_NAME "zlib") + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}") + message (WARNING "Will use standard zlib, please clone manually:\n git clone https://github.com/madler/zlib.git ${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}") + endif () + endif () + set (USE_INTERNAL_ZLIB_LIBRARY 1) set (ZLIB_COMPAT 1) # for zlib-ng, also enables WITH_GZFILEOP set (WITH_NATIVE_INSTRUCTIONS ${ARCHNATIVE}) @@ -15,7 +24,7 @@ if (NOT ZLIB_FOUND) set(WITH_NEON 1 CACHE INTERNAL "") set(WITH_ACLE 1 CACHE INTERNAL "") endif () - set (ZLIB_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/zlib-ng" "${ClickHouse_BINARY_DIR}/contrib/zlib-ng") # generated zconf.h + set (ZLIB_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}" "${ClickHouse_BINARY_DIR}/contrib/${INTERNAL_ZLIB_NAME}") # generated zconf.h set (ZLIB_INCLUDE_DIRS ${ZLIB_INCLUDE_DIR}) # for poco set (ZLIB_FOUND 1) # for poco if (USE_STATIC_LIBRARIES) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 28445658fe9..d7631a8c9d6 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,4 +1,6 @@ -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast") +if (NOT MSVC) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast") +endif () if (USE_INTERNAL_BOOST_LIBRARY) add_subdirectory (libboost) @@ -48,7 +50,7 @@ if (USE_INTERNAL_UNWIND_LIBRARY) endif () if (USE_INTERNAL_ZLIB_LIBRARY) - add_subdirectory (zlib-ng) + add_subdirectory (${INTERNAL_ZLIB_NAME}) # todo: make pull to Dead2/zlib-ng and remove: # We should use same defines when including zlib.h as used when zlib compiled target_compile_definitions (zlib PUBLIC ZLIB_COMPAT WITH_GZFILEOP) @@ -81,12 +83,33 @@ if (NOT ARCH_ARM) add_subdirectory (libcpuid) endif () +if (USE_INTERNAL_SSL_LIBRARY) + if (NOT MAKE_STATIC_LIBRARIES) + set (BUILD_SHARED 1) + endif () + set (USE_SHARED ${USE_STATIC_LIBRARIES}) + add_subdirectory (ssl) + target_include_directories(${OPENSSL_CRYPTO_LIBRARY} PUBLIC ${OPENSSL_INCLUDE_DIR}) + target_include_directories(${OPENSSL_SSL_LIBRARY} PUBLIC ${OPENSSL_INCLUDE_DIR}) +endif () + if (USE_INTERNAL_RDKAFKA_LIBRARY) set (RDKAFKA_BUILD_EXAMPLES OFF CACHE INTERNAL "") set (RDKAFKA_BUILD_TESTS OFF CACHE INTERNAL "") - set (RDKAFKA_BUILD_STATIC ON CACHE INTERNAL "") + set (RDKAFKA_BUILD_STATIC ${MAKE_STATIC_LIBRARIES} CACHE INTERNAL "") mark_as_advanced (ZLIB_INCLUDE_DIR) + + if (USE_INTERNAL_SSL_LIBRARY) + add_library(bundled-ssl ALIAS ${OPENSSL_SSL_LIBRARY}) + set (WITH_BUNDLED_SSL 1) + endif () + add_subdirectory (librdkafka) + + if (USE_INTERNAL_SSL_LIBRARY) + target_include_directories(rdkafka PRIVATE BEFORE ${OPENSSL_INCLUDE_DIR}) + endif () + target_include_directories(rdkafka PRIVATE BEFORE ${ZLIB_INCLUDE_DIR}) endif () if (USE_INTERNAL_CAPNP_LIBRARY) @@ -104,17 +127,23 @@ endif () if (USE_INTERNAL_POCO_LIBRARY) + set (ALLOW_DUPLICATE_CUSTOM_TARGETS 1) set (save_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) set (_save ${ENABLE_TESTS}) set (ENABLE_TESTS 0) set (CMAKE_DISABLE_FIND_PACKAGE_ZLIB 1) + if (USE_INTERNAL_SSL_LIBRARY) + set (DISABLE_INTERNAL_OPENSSL 1) + set (ENABLE_NETSSL 0) # TODO! + set (ENABLE_CRYPTO 0) # TODO! + endif () add_subdirectory (poco) unset (CMAKE_DISABLE_FIND_PACKAGE_ZLIB) set (ENABLE_TESTS ${_save}) set (CMAKE_CXX_FLAGS ${save_CMAKE_CXX_FLAGS}) set (CMAKE_C_FLAGS ${save_CMAKE_C_FLAGS}) - if (OPENSSL_FOUND) + if (OPENSSL_FOUND AND TARGET Crypto) # Bug in poco https://github.com/pocoproject/poco/pull/2100 found on macos target_include_directories(Crypto PUBLIC ${OPENSSL_INCLUDE_DIR}) endif () diff --git a/contrib/libboost/CMakeLists.txt b/contrib/libboost/CMakeLists.txt index 8cfe084636a..8533951a14b 100644 --- a/contrib/libboost/CMakeLists.txt +++ b/contrib/libboost/CMakeLists.txt @@ -1,5 +1,6 @@ -add_definitions(-Wno-unused-variable) -add_definitions(-Wno-deprecated-declarations) +if (NOT MSVC) + add_definitions(-Wno-unused-variable -Wno-deprecated-declarations) +endif () add_library(boost_program_options_internal boost_1_65_0/libs/program_options/src/cmdline.cpp diff --git a/contrib/poco b/contrib/poco index 3d885f5380f..cf1ad2e9a30 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 3d885f5380f24b4b91d8d4cf18c8cbc083d3ef8d +Subproject commit cf1ad2e9a30ee9161772dc7bc9bf6e165cc51768 diff --git a/contrib/ssl b/contrib/ssl new file mode 160000 index 00000000000..6fbe1c6f404 --- /dev/null +++ b/contrib/ssl @@ -0,0 +1 @@ +Subproject commit 6fbe1c6f404193989c5f6a63115d80fbe34ce2a3 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 7725f5f7f85..0a46bf0c665 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -18,7 +18,9 @@ get_property (BUILD_INCLUDE_DIRECTORIES DIRECTORY ${ClickHouse_SOURCE_DIR} PROPE string (TIMESTAMP BUILD_DATE "%Y-%m-%d" UTC) configure_file (${CMAKE_CURRENT_SOURCE_DIR}/src/Common/config_build.cpp.in ${CONFIG_BUILD}) -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") +if (NOT MSVC) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") +endif () if (NOT NO_WERROR) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp index 995a93e4324..3f8efbb9c64 100644 --- a/dbms/src/Interpreters/Cluster.cpp +++ b/dbms/src/Interpreters/Cluster.cpp @@ -10,7 +10,6 @@ #include #include #include -#include namespace DB { From 915f84a24f1336870857247459be26c12ae407b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Mar 2018 07:05:20 +0300 Subject: [PATCH 197/209] Miscellaneous #1947 --- dbms/src/Interpreters/InterpreterSelectQuery.h | 6 ++++-- dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index dd000232a44..0316c4aea4f 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -39,7 +39,9 @@ public: * - if given - read not from the table specified in the query, but from prepared source. * * required_result_column_names - * - don't calculate all columns except the specified ones from the query - it is used to remove calculation of unnecessary columns from subqueries. + * - don't calculate all columns except the specified ones from the query + * - it is used to remove calculation (and reading) of unnecessary columns from subqueries. + * empty means - use all columns. */ InterpreterSelectQuery( @@ -78,7 +80,7 @@ private: /** When executing FULL or RIGHT JOIN, there will be a data stream from which you can read "not joined" rows. * It has a special meaning, since reading from it should be done after reading from the main streams. - * It is joined to the main streams in UnionBlockInputStream or ParallelAggregatingBlockInputStream. + * It is appended to the main streams in UnionBlockInputStream or ParallelAggregatingBlockInputStream. */ BlockInputStreamPtr stream_with_non_joined_data; diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index c5cf660a9ba..2535d71a686 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -47,7 +47,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( nested_interpreters.reserve(num_selects); std::vector required_result_column_names_for_other_selects(num_selects); - if (!required_result_column_names.empty()) + if (!required_result_column_names.empty() && num_selects > 1) { /// Result header if there are no filtering by 'required_result_column_names'. /// We use it to determine positions of 'required_result_column_names' in SELECT clause. From 651a3128960065529965f1f4301ba95456729852 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Mar 2018 08:03:28 +0300 Subject: [PATCH 198/209] Added ProfileEvent for table function executions [#CLICKHOUSE-3615] --- dbms/CMakeLists.txt | 2 +- dbms/src/Common/ProfileEvents.cpp | 1 + dbms/src/Interpreters/Context.cpp | 2 +- dbms/src/TableFunctions/CMakeLists.txt | 2 +- dbms/src/TableFunctions/ITableFunction.cpp | 19 +++++++++++++++++++ dbms/src/TableFunctions/ITableFunction.h | 7 +++++-- .../TableFunctionCatBoostPool.cpp | 2 +- .../TableFunctionCatBoostPool.h | 3 ++- .../src/TableFunctions/TableFunctionMerge.cpp | 2 +- dbms/src/TableFunctions/TableFunctionMerge.h | 3 ++- .../src/TableFunctions/TableFunctionMySQL.cpp | 2 +- dbms/src/TableFunctions/TableFunctionMySQL.h | 3 ++- .../TableFunctions/TableFunctionNumbers.cpp | 2 +- .../src/TableFunctions/TableFunctionNumbers.h | 3 ++- dbms/src/TableFunctions/TableFunctionODBC.cpp | 2 +- dbms/src/TableFunctions/TableFunctionODBC.h | 3 ++- .../TableFunctions/TableFunctionRemote.cpp | 2 +- dbms/src/TableFunctions/TableFunctionRemote.h | 4 +--- .../TableFunctionShardByHash.cpp | 2 +- .../TableFunctions/TableFunctionShardByHash.h | 3 ++- 20 files changed, 48 insertions(+), 21 deletions(-) create mode 100644 dbms/src/TableFunctions/ITableFunction.cpp diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 0a46bf0c665..965b9b2bfe1 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -79,7 +79,7 @@ list (APPEND dbms_headers src/AggregateFunctions/FactoryHelpers.h src/AggregateFunctions/parseAggregateFunctionParameters.h) -list (APPEND dbms_sources src/TableFunctions/TableFunctionFactory.cpp) +list (APPEND dbms_sources src/TableFunctions/ITableFunction.cpp src/TableFunctions/TableFunctionFactory.cpp) list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions/TableFunctionFactory.h) add_library(clickhouse_common_io ${SPLIT_SHARED} ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) diff --git a/dbms/src/Common/ProfileEvents.cpp b/dbms/src/Common/ProfileEvents.cpp index d225f3631d5..71bf37c1a3a 100644 --- a/dbms/src/Common/ProfileEvents.cpp +++ b/dbms/src/Common/ProfileEvents.cpp @@ -30,6 +30,7 @@ M(ArenaAllocChunks) \ M(ArenaAllocBytes) \ M(FunctionExecute) \ + M(TableFunctionExecute) \ M(MarkCacheHits) \ M(MarkCacheMisses) \ M(CreatedReadBufferOrdinary) \ diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index e144a8c0462..2b050445640 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1036,7 +1036,7 @@ void Context::setDefaultFormat(const String & name) default_format = name; } -const Macros& Context::getMacros() const +const Macros & Context::getMacros() const { return shared->macros; } diff --git a/dbms/src/TableFunctions/CMakeLists.txt b/dbms/src/TableFunctions/CMakeLists.txt index 4551be50c8b..e717359090e 100644 --- a/dbms/src/TableFunctions/CMakeLists.txt +++ b/dbms/src/TableFunctions/CMakeLists.txt @@ -1,7 +1,7 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(clickhouse_table_functions .) -list(REMOVE_ITEM clickhouse_table_functions_sources TableFunctionFactory.cpp) +list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp TableFunctionFactory.cpp) list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h TableFunctionFactory.h) add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) diff --git a/dbms/src/TableFunctions/ITableFunction.cpp b/dbms/src/TableFunctions/ITableFunction.cpp new file mode 100644 index 00000000000..b15cbbc9fd9 --- /dev/null +++ b/dbms/src/TableFunctions/ITableFunction.cpp @@ -0,0 +1,19 @@ +#include +#include + + +namespace ProfileEvents +{ + extern const Event TableFunctionExecute; +} + +namespace DB +{ + +StoragePtr ITableFunction::execute(const ASTPtr & ast_function, const Context & context) const +{ + ProfileEvents::increment(ProfileEvents::TableFunctionExecute); + return executeImpl(ast_function, context); +} + +} diff --git a/dbms/src/TableFunctions/ITableFunction.h b/dbms/src/TableFunctions/ITableFunction.h index 912a9ee8642..8e3809464b2 100644 --- a/dbms/src/TableFunctions/ITableFunction.h +++ b/dbms/src/TableFunctions/ITableFunction.h @@ -31,10 +31,13 @@ public: /// Get the main function name. virtual std::string getName() const = 0; - /// Create storage according to the query - virtual StoragePtr execute(const ASTPtr & ast_function, const Context & context) const = 0; + /// Create storage according to the query. + StoragePtr execute(const ASTPtr & ast_function, const Context & context) const; virtual ~ITableFunction() {}; + +private: + virtual StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const = 0; }; using TableFunctionPtr = std::shared_ptr; diff --git a/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp b/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp index 8185d0149e4..09ab2d3d7bb 100644 --- a/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp +++ b/dbms/src/TableFunctions/TableFunctionCatBoostPool.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes } -StoragePtr TableFunctionCatBoostPool::execute(const ASTPtr & ast_function, const Context & context) const +StoragePtr TableFunctionCatBoostPool::executeImpl(const ASTPtr & ast_function, const Context & context) const { ASTs & args_func = typeid_cast(*ast_function).children; diff --git a/dbms/src/TableFunctions/TableFunctionCatBoostPool.h b/dbms/src/TableFunctions/TableFunctionCatBoostPool.h index 962d5424413..061b5a735f6 100644 --- a/dbms/src/TableFunctions/TableFunctionCatBoostPool.h +++ b/dbms/src/TableFunctions/TableFunctionCatBoostPool.h @@ -14,7 +14,8 @@ class TableFunctionCatBoostPool : public ITableFunction public: static constexpr auto name = "catBoostPool"; std::string getName() const override { return name; } - StoragePtr execute(const ASTPtr & ast_function, const Context & context) const override; +private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; }; } diff --git a/dbms/src/TableFunctions/TableFunctionMerge.cpp b/dbms/src/TableFunctions/TableFunctionMerge.cpp index 9c12cf32ae3..d37d2002d5a 100644 --- a/dbms/src/TableFunctions/TableFunctionMerge.cpp +++ b/dbms/src/TableFunctions/TableFunctionMerge.cpp @@ -55,7 +55,7 @@ static NamesAndTypesList chooseColumns(const String & source_database, const Str } -StoragePtr TableFunctionMerge::execute(const ASTPtr & ast_function, const Context & context) const +StoragePtr TableFunctionMerge::executeImpl(const ASTPtr & ast_function, const Context & context) const { ASTs & args_func = typeid_cast(*ast_function).children; diff --git a/dbms/src/TableFunctions/TableFunctionMerge.h b/dbms/src/TableFunctions/TableFunctionMerge.h index beea2a9b267..2fb512ac590 100644 --- a/dbms/src/TableFunctions/TableFunctionMerge.h +++ b/dbms/src/TableFunctions/TableFunctionMerge.h @@ -15,7 +15,8 @@ class TableFunctionMerge : public ITableFunction public: static constexpr auto name = "merge"; std::string getName() const override { return name; } - StoragePtr execute(const ASTPtr & ast_function, const Context & context) const override; +private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; }; diff --git a/dbms/src/TableFunctions/TableFunctionMySQL.cpp b/dbms/src/TableFunctions/TableFunctionMySQL.cpp index 28874112cfe..f1b4e911385 100644 --- a/dbms/src/TableFunctions/TableFunctionMySQL.cpp +++ b/dbms/src/TableFunctions/TableFunctionMySQL.cpp @@ -80,7 +80,7 @@ DataTypePtr getDataType(const String & mysql_data_type, bool is_unsigned, size_t } -StoragePtr TableFunctionMySQL::execute(const ASTPtr & ast_function, const Context & context) const +StoragePtr TableFunctionMySQL::executeImpl(const ASTPtr & ast_function, const Context & context) const { const ASTFunction & args_func = typeid_cast(*ast_function); diff --git a/dbms/src/TableFunctions/TableFunctionMySQL.h b/dbms/src/TableFunctions/TableFunctionMySQL.h index bf16121b7fd..870b3f75624 100644 --- a/dbms/src/TableFunctions/TableFunctionMySQL.h +++ b/dbms/src/TableFunctions/TableFunctionMySQL.h @@ -18,7 +18,8 @@ public: { return name; } - StoragePtr execute(const ASTPtr & ast_function, const Context & context) const override; +private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; }; } diff --git a/dbms/src/TableFunctions/TableFunctionNumbers.cpp b/dbms/src/TableFunctions/TableFunctionNumbers.cpp index 4582432e43d..60136dccfab 100644 --- a/dbms/src/TableFunctions/TableFunctionNumbers.cpp +++ b/dbms/src/TableFunctions/TableFunctionNumbers.cpp @@ -18,7 +18,7 @@ namespace ErrorCodes } -StoragePtr TableFunctionNumbers::execute(const ASTPtr & ast_function, const Context & context) const +StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, const Context & context) const { ASTs & args_func = typeid_cast(*ast_function).children; diff --git a/dbms/src/TableFunctions/TableFunctionNumbers.h b/dbms/src/TableFunctions/TableFunctionNumbers.h index 2c9b09be17f..6a3ab7f9090 100644 --- a/dbms/src/TableFunctions/TableFunctionNumbers.h +++ b/dbms/src/TableFunctions/TableFunctionNumbers.h @@ -15,7 +15,8 @@ class TableFunctionNumbers : public ITableFunction public: static constexpr auto name = "numbers"; std::string getName() const override { return name; } - StoragePtr execute(const ASTPtr & ast_function, const Context & context) const override; +private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; }; diff --git a/dbms/src/TableFunctions/TableFunctionODBC.cpp b/dbms/src/TableFunctions/TableFunctionODBC.cpp index 591e7f8af8b..cc4c430ae8f 100644 --- a/dbms/src/TableFunctions/TableFunctionODBC.cpp +++ b/dbms/src/TableFunctions/TableFunctionODBC.cpp @@ -59,7 +59,7 @@ DataTypePtr getDataType(SQLSMALLINT type) } } -StoragePtr TableFunctionODBC::execute(const ASTPtr & ast_function, const Context & context) const +StoragePtr TableFunctionODBC::executeImpl(const ASTPtr & ast_function, const Context & context) const { const ASTFunction & args_func = typeid_cast(*ast_function); diff --git a/dbms/src/TableFunctions/TableFunctionODBC.h b/dbms/src/TableFunctions/TableFunctionODBC.h index 90389eb0d96..b0f81749647 100644 --- a/dbms/src/TableFunctions/TableFunctionODBC.h +++ b/dbms/src/TableFunctions/TableFunctionODBC.h @@ -20,7 +20,8 @@ public: { return name; } - StoragePtr execute(const ASTPtr & ast_function, const Context & context) const override; +private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; }; } diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index 93eaa8ed549..4a20f116011 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -181,7 +181,7 @@ static std::vector parseDescription(const String & description, size_t l } -StoragePtr TableFunctionRemote::execute(const ASTPtr & ast_function, const Context & context) const +StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const Context & context) const { ASTs & args_func = typeid_cast(*ast_function).children; diff --git a/dbms/src/TableFunctions/TableFunctionRemote.h b/dbms/src/TableFunctions/TableFunctionRemote.h index d1cce599903..526fecf74ed 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.h +++ b/dbms/src/TableFunctions/TableFunctionRemote.h @@ -16,14 +16,12 @@ namespace DB class TableFunctionRemote : public ITableFunction { public: - explicit TableFunctionRemote(const std::string & name_ = "remote"); std::string getName() const override { return name; } - StoragePtr execute(const ASTPtr & ast_function, const Context & context) const override; - private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; std::string name; bool is_cluster_function; diff --git a/dbms/src/TableFunctions/TableFunctionShardByHash.cpp b/dbms/src/TableFunctions/TableFunctionShardByHash.cpp index 4872eae026d..d3c6368c4f2 100644 --- a/dbms/src/TableFunctions/TableFunctionShardByHash.cpp +++ b/dbms/src/TableFunctions/TableFunctionShardByHash.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -StoragePtr TableFunctionShardByHash::execute(const ASTPtr & ast_function, const Context & context) const +StoragePtr TableFunctionShardByHash::executeImpl(const ASTPtr & ast_function, const Context & context) const { ASTs & args_func = typeid_cast(*ast_function).children; diff --git a/dbms/src/TableFunctions/TableFunctionShardByHash.h b/dbms/src/TableFunctions/TableFunctionShardByHash.h index 85f8d2d89da..d1b1bde7d08 100644 --- a/dbms/src/TableFunctions/TableFunctionShardByHash.h +++ b/dbms/src/TableFunctions/TableFunctionShardByHash.h @@ -16,7 +16,8 @@ class TableFunctionShardByHash : public ITableFunction public: static constexpr auto name = "shardByHash"; std::string getName() const override { return name; } - StoragePtr execute(const ASTPtr & ast_function, const Context & context) const override; +private: + StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; }; } From 7e7fe3c441ffeaff8e14ac3419d90b40a4f73b94 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Mar 2018 08:44:17 +0300 Subject: [PATCH 199/209] Execute table functions once for a query [#CLICKHOUSE-3615] --- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Interpreters/Context.cpp | 40 +++++++++++++------ dbms/src/Interpreters/Context.h | 6 +++ .../Interpreters/InterpreterSelectQuery.cpp | 5 ++- .../InterpreterSelectWithUnionQuery.cpp | 3 ++ dbms/src/Interpreters/executeQuery.cpp | 2 + 6 files changed, 43 insertions(+), 14 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 96776c8d30a..d68ba4091cd 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -368,6 +368,7 @@ namespace ErrorCodes extern const int CANNOT_GET_CREATE_TABLE_QUERY = 390; extern const int EXTERNAL_LIBRARY_ERROR = 391; extern const int QUERY_IS_PROHIBITED = 392; + extern const int THERE_IS_NO_QUERY = 393; extern const int KEEPER_EXCEPTION = 999; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 2b050445640..9945c14e824 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -76,6 +76,7 @@ namespace ErrorCodes extern const int DATABASE_ALREADY_EXISTS; extern const int TABLE_METADATA_DOESNT_EXIST; extern const int THERE_IS_NO_SESSION; + extern const int THERE_IS_NO_QUERY; extern const int NO_ELEMENTS_IN_CONFIG; extern const int DDL_GUARD_IS_ACTIVE; extern const int TABLE_SIZE_EXCEEDS_MAX_DROP_SIZE_LIMIT; @@ -919,30 +920,29 @@ ASTPtr Context::getCreateQuery(const String & database_name, const String & tabl Settings Context::getSettings() const { - auto lock = getLock(); return settings; } Limits Context::getLimits() const { - auto lock = getLock(); return settings.limits; } void Context::setSettings(const Settings & settings_) { - auto lock = getLock(); settings = settings_; } void Context::setSetting(const String & name, const Field & value) { - auto lock = getLock(); if (name == "profile") + { + auto lock = getLock(); settings.setProfile(value.safeGet(), *shared->users_config); + } else settings.set(name, value); } @@ -950,9 +950,11 @@ void Context::setSetting(const String & name, const Field & value) void Context::setSetting(const String & name, const std::string & value) { - auto lock = getLock(); if (name == "profile") + { + auto lock = getLock(); settings.setProfile(value, *shared->users_config); + } else settings.set(name, value); } @@ -960,14 +962,12 @@ void Context::setSetting(const String & name, const std::string & value) String Context::getCurrentDatabase() const { - auto lock = getLock(); return current_database; } String Context::getCurrentQueryId() const { - auto lock = getLock(); return client_info.current_query_id; } @@ -982,8 +982,6 @@ void Context::setCurrentDatabase(const String & name) void Context::setCurrentQueryId(const String & query_id) { - auto lock = getLock(); - if (!client_info.current_query_id.empty()) throw Exception("Logical error: attempt to set query_id twice", ErrorCodes::LOGICAL_ERROR); @@ -1006,8 +1004,12 @@ void Context::setCurrentQueryId(const String & query_id) }; } random; - random.a = shared->rng(); - random.b = shared->rng(); + { + auto lock = getLock(); + + random.a = shared->rng(); + random.b = shared->rng(); + } /// Use protected constructor. struct UUID : Poco::UUID @@ -1025,14 +1027,12 @@ void Context::setCurrentQueryId(const String & query_id) String Context::getDefaultFormat() const { - auto lock = getLock(); return default_format.empty() ? "TabSeparated" : default_format; } void Context::setDefaultFormat(const String & name) { - auto lock = getLock(); default_format = name; } @@ -1047,6 +1047,20 @@ void Context::setMacros(Macros && macros) shared->macros = macros; } +const Context & Context::getQueryContext() const +{ + if (!query_context) + throw Exception("There is no query", ErrorCodes::THERE_IS_NO_QUERY); + return *query_context; +} + +Context & Context::getQueryContext() +{ + if (!query_context) + throw Exception("There is no query", ErrorCodes::THERE_IS_NO_QUERY); + return *query_context; +} + const Context & Context::getSessionContext() const { if (!session_context) diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 1656f757c2e..9204cdd23aa 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -105,6 +105,7 @@ private: /// Thus, used in HTTP interface. If not specified - then some globally default format is used. Tables external_tables; /// Temporary tables. Keyed by table name. Tables table_function_results; /// Temporary tables obtained by execution of table functions. Keyed by AST tree id. + Context * query_context = nullptr; Context * session_context = nullptr; /// Session context or nullptr. Could be equal to this. Context * global_context = nullptr; /// Global context or nullptr. Could be equal to this. SystemLogsPtr system_logs; /// Used to log queries and operations on parts @@ -257,6 +258,10 @@ public: /// For methods below you may need to acquire a lock by yourself. std::unique_lock getLock() const; + const Context & getQueryContext() const; + Context & getQueryContext(); + bool hasQueryContext() const { return query_context != nullptr; } + const Context & getSessionContext() const; Context & getSessionContext(); bool hasSessionContext() const { return session_context != nullptr; } @@ -265,6 +270,7 @@ public: Context & getGlobalContext(); bool hasGlobalContext() const { return global_context != nullptr; } + void setQueryContext(Context & context_) { query_context = &context_; } void setSessionContext(Context & context_) { session_context = &context_; } void setGlobalContext(Context & context_) { global_context = &context_; } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 935c44c743a..ad515e8a8d6 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -103,6 +103,9 @@ void InterpreterSelectQuery::init(const Names & required_result_column_names) { ProfileEvents::increment(ProfileEvents::SelectQuery); + if (!context.hasQueryContext()) + context.setQueryContext(context); + initSettings(); const Settings & settings = context.getSettingsRef(); @@ -128,7 +131,7 @@ void InterpreterSelectQuery::init(const Names & required_result_column_names) else if (table_expression && typeid_cast(table_expression.get())) { /// Read from table function. - storage = context.executeTableFunction(table_expression); + storage = context.getQueryContext().executeTableFunction(table_expression); } else { diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 2535d71a686..5752503d0f6 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -32,6 +32,9 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( to_stage(to_stage_), subquery_depth(subquery_depth_) { + if (!context.hasQueryContext()) + context.setQueryContext(context); + const ASTSelectWithUnionQuery & ast = typeid_cast(*query_ptr); size_t num_selects = ast.list_of_selects->children.size(); diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 10e20e47cbc..e0d6c8a3fd6 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -140,6 +140,8 @@ static std::tuple executeQueryImpl( ProfileEvents::increment(ProfileEvents::Query); time_t current_time = time(nullptr); + context.setQueryContext(context); + const Settings & settings = context.getSettingsRef(); ParserQuery parser(end); From 05893c1d4480740787bd1776e8722d1b24814142 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Mar 2018 08:52:11 +0300 Subject: [PATCH 200/209] Miscellaneous [#CLICKHOUSE-2] --- dbms/tests/queries/shell_config.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/shell_config.sh b/dbms/tests/queries/shell_config.sh index 780e19ca598..b060c56877d 100644 --- a/dbms/tests/queries/shell_config.sh +++ b/dbms/tests/queries/shell_config.sh @@ -18,6 +18,6 @@ export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=`${CLICKHOUSE_EXTRACT_CONF export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:="8443"} export CLICKHOUSE_PORT_HTTP_PROTO=${CLICKHOUSE_PORT_HTTP_PROTO:="http"} export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"} -export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="curl --max-time 5"} +export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="curl --max-time 10"} export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."} mkdir -p ${CLICKHOUSE_TMP} From 22bbc9f08ca25642d85a5afd1f4a713dcaf8c29a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Mar 2018 09:33:17 +0300 Subject: [PATCH 201/209] Added limit on size of AST after expansion of aliases [#CLICKHOUSE-18] --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 14 +++++++++++++- dbms/src/Interpreters/Limits.h | 1 + dbms/src/Storages/StorageView.cpp | 2 +- .../00596_limit_on_expanded_ast.reference | 1 + .../0_stateless/00596_limit_on_expanded_ast.sh | 10 ++++++++++ 5 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00596_limit_on_expanded_ast.reference create mode 100755 dbms/tests/queries/0_stateless/00596_limit_on_expanded_ast.sh diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index e0b78a30b0d..9f49ec096bc 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -83,6 +83,7 @@ namespace ErrorCodes extern const int ILLEGAL_AGGREGATION; extern const int SUPPORT_IS_DISABLED; extern const int TOO_DEEP_AST; + extern const int TOO_BIG_AST; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -932,6 +933,16 @@ void ExpressionAnalyzer::normalizeTree() SetOfASTs tmp_set; MapOfASTs tmp_map; normalizeTreeImpl(ast, tmp_map, tmp_set, "", 0); + + try + { + ast->checkSize(settings.limits.max_expanded_ast_elements); + } + catch (Exception & e) + { + e.addMessage("(after expansion of aliases)"); + throw; + } } @@ -942,7 +953,8 @@ void ExpressionAnalyzer::normalizeTreeImpl( ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level) { if (level > settings.limits.max_ast_depth) - throw Exception("Normalized AST is too deep. Maximum: " + settings.limits.max_ast_depth.toString(), ErrorCodes::TOO_DEEP_AST); + throw Exception("Normalized AST is too deep. Maximum: " + + settings.limits.max_ast_depth.toString(), ErrorCodes::TOO_DEEP_AST); if (finished_asts.count(ast)) { diff --git a/dbms/src/Interpreters/Limits.h b/dbms/src/Interpreters/Limits.h index cd6fc409508..4b6942cf2f3 100644 --- a/dbms/src/Interpreters/Limits.h +++ b/dbms/src/Interpreters/Limits.h @@ -63,6 +63,7 @@ struct Limits M(SettingUInt64, max_pipeline_depth, 1000, "") \ M(SettingUInt64, max_ast_depth, 1000, "") /** Checked not during parsing, */ \ M(SettingUInt64, max_ast_elements, 50000, "") /** but after parsing the request. */ \ + M(SettingUInt64, max_expanded_ast_elements, 500000, "Limit after expansion of aliases.") \ \ /** 0 - everything is allowed. 1 - only read requests. 2 - only read requests, as well as changing settings, except for the readonly setting. */ \ M(SettingUInt64, readonly, 0, "") \ diff --git a/dbms/src/Storages/StorageView.cpp b/dbms/src/Storages/StorageView.cpp index 9351873c752..76cd481a855 100644 --- a/dbms/src/Storages/StorageView.cpp +++ b/dbms/src/Storages/StorageView.cpp @@ -38,7 +38,7 @@ BlockInputStreams StorageView::read( const unsigned /*num_streams*/) { processed_stage = QueryProcessingStage::FetchColumns; - BlockInputStreams res = InterpreterSelectWithUnionQuery(inner_query->clone(), context, column_names).executeWithMultipleStreams(); + BlockInputStreams res = InterpreterSelectWithUnionQuery(inner_query, context, column_names).executeWithMultipleStreams(); /// It's expected that the columns read from storage are not constant. /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. diff --git a/dbms/tests/queries/0_stateless/00596_limit_on_expanded_ast.reference b/dbms/tests/queries/0_stateless/00596_limit_on_expanded_ast.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00596_limit_on_expanded_ast.reference @@ -0,0 +1 @@ +1 diff --git a/dbms/tests/queries/0_stateless/00596_limit_on_expanded_ast.sh b/dbms/tests/queries/0_stateless/00596_limit_on_expanded_ast.sh new file mode 100755 index 00000000000..85d13cbdb47 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00596_limit_on_expanded_ast.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +exception_pattern="too big" + +${CLICKHOUSE_CLIENT} --max_expanded_ast_elements=500000 --query=" + select 1 as a, a+a as b, b+b as c, c+c as d, d+d as e, e+e as f, f+f as g, g+g as h, h+h as i, i+i as j, j+j as k, k+k as l, l+l as m, m+m as n, n+n as o, o+o as p, p+p as q, q+q as r, r+r as s, s+s as t, t+t as u, u+u as v, v+v as w, w+w as x, x+x as y, y+y as z +" 2>&1 | grep -c "$exception_pattern" From 9024cf5fc81ff7ec959d2ab192294e2cb76762bf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Mar 2018 10:43:47 +0300 Subject: [PATCH 202/209] Fixed test [#CLICKHOUSE-2]. --- .../0_stateless/00564_versioned_collapsing_merge_tree.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql b/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql index 483257de16a..85500dba763 100644 --- a/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql +++ b/dbms/tests/queries/0_stateless/00564_versioned_collapsing_merge_tree.sql @@ -200,4 +200,4 @@ optimize table test.mult_tab; select 'table with 2 blocks optimized'; select * from test.mult_tab; - +DROP TABLE test.mult_tab; From ce2fa2c4e7bacd746ad921db162b682b6fa0d20f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Mar 2018 11:12:43 +0300 Subject: [PATCH 203/209] Inline documentation for most of Limits [#CLICKHOUSE-2] --- dbms/src/Interpreters/Limits.h | 95 ++++++++++++-------------------- dbms/src/Interpreters/Settings.h | 1 - 2 files changed, 35 insertions(+), 61 deletions(-) diff --git a/dbms/src/Interpreters/Limits.h b/dbms/src/Interpreters/Limits.h index 4b6942cf2f3..e04d1ec7795 100644 --- a/dbms/src/Interpreters/Limits.h +++ b/dbms/src/Interpreters/Limits.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -11,49 +10,36 @@ namespace DB /** Limits during query execution are part of the settings. * Used to provide a more safe execution of queries from the user interface. - * Basically, constraints are checked for each block (not every row). That is, the limits can be slightly violated. + * Basically, limits are checked for each block (not every row). That is, the limits can be slightly violated. * Almost all limits apply only to SELECTs. - * Almost all limits apply to each thread individually. + * Almost all limits apply to each stream individually. */ struct Limits { - /** Enumeration of limits: type, name, default value. - * By default: everything is unlimited, except for rather weak restrictions on the depth of recursion and the size of the expressions. - */ - #define APPLY_FOR_LIMITS(M) \ - /** Limits on reading from the most "deep" sources. \ - * That is, only in the deepest subquery. \ - * When reading from a remote server, it is only checked on a remote server. \ - */ \ - M(SettingUInt64, max_rows_to_read, 0, "") \ - M(SettingUInt64, max_bytes_to_read, 0, "") \ - M(SettingOverflowMode, read_overflow_mode, OverflowMode::THROW, "") \ + M(SettingUInt64, max_rows_to_read, 0, "Limit on read rows from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.") \ + M(SettingUInt64, max_bytes_to_read, 0, "Limit on read bytes (after decompression) from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.") \ + M(SettingOverflowMode, read_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \ \ M(SettingUInt64, max_rows_to_group_by, 0, "") \ - M(SettingOverflowMode, group_by_overflow_mode, OverflowMode::THROW, "") \ + M(SettingOverflowMode, group_by_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \ M(SettingUInt64, max_bytes_before_external_group_by, 0, "") \ \ M(SettingUInt64, max_rows_to_sort, 0, "") \ M(SettingUInt64, max_bytes_to_sort, 0, "") \ - M(SettingOverflowMode, sort_overflow_mode, OverflowMode::THROW, "") \ + M(SettingOverflowMode, sort_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \ M(SettingUInt64, max_bytes_before_external_sort, 0, "") \ \ - /** Limits on result size. \ - * Are also checked for subqueries and on remote servers. \ - */ \ - M(SettingUInt64, max_result_rows, 0, "") \ - M(SettingUInt64, max_result_bytes, 0, "") \ - M(SettingOverflowMode, result_overflow_mode, OverflowMode::THROW, "") \ + M(SettingUInt64, max_result_rows, 0, "Limit on result size in rows. Also checked for intermediate data sent from remote servers.") \ + M(SettingUInt64, max_result_bytes, 0, "Limit on result size in bytes (uncompressed). Also checked for intermediate data sent from remote servers.") \ + M(SettingOverflowMode, result_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \ \ /* TODO: Check also when merging and finalizing aggregate functions. */ \ M(SettingSeconds, max_execution_time, 0, "") \ - M(SettingOverflowMode, timeout_overflow_mode, OverflowMode::THROW, "") \ + M(SettingOverflowMode, timeout_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \ \ - /** In rows per second. */ \ - M(SettingUInt64, min_execution_speed, 0, "") \ - /** Check that the speed is not too low after the specified time has elapsed. */ \ - M(SettingSeconds, timeout_before_checking_execution_speed, 0, "") \ + M(SettingUInt64, min_execution_speed, 0, "In rows per second.") \ + M(SettingSeconds, timeout_before_checking_execution_speed, 0, "Check that the speed is not too low after the specified time has elapsed.") \ \ M(SettingUInt64, max_columns_to_read, 0, "") \ M(SettingUInt64, max_temporary_columns, 0, "") \ @@ -61,46 +47,35 @@ struct Limits \ M(SettingUInt64, max_subquery_depth, 100, "") \ M(SettingUInt64, max_pipeline_depth, 1000, "") \ - M(SettingUInt64, max_ast_depth, 1000, "") /** Checked not during parsing, */ \ - M(SettingUInt64, max_ast_elements, 50000, "") /** but after parsing the request. */ \ - M(SettingUInt64, max_expanded_ast_elements, 500000, "Limit after expansion of aliases.") \ + M(SettingUInt64, max_ast_depth, 1000, "Maximum depth of query syntax tree. Checked after parsing.") \ + M(SettingUInt64, max_ast_elements, 50000, "Maximum size of query syntax tree in number of nodes. Checked after parsing.") \ + M(SettingUInt64, max_expanded_ast_elements, 500000, "Maximum size of query syntax tree in number of nodes after expansion of aliases and the asterisk.") \ \ - /** 0 - everything is allowed. 1 - only read requests. 2 - only read requests, as well as changing settings, except for the readonly setting. */ \ - M(SettingUInt64, readonly, 0, "") \ + M(SettingUInt64, readonly, 0, "0 - everything is allowed. 1 - only read requests. 2 - only read requests, as well as changing settings, except for the 'readonly' setting.") \ \ - /** Limits for the maximum size of the set resulting from the execution of the IN section. */ \ - M(SettingUInt64, max_rows_in_set, 0, "") \ - M(SettingUInt64, max_bytes_in_set, 0, "") \ - M(SettingOverflowMode, set_overflow_mode, OverflowMode::THROW, "") \ + M(SettingUInt64, max_rows_in_set, 0, "Maximum size of the set (in number of elements) resulting from the execution of the IN section.") \ + M(SettingUInt64, max_bytes_in_set, 0, "Maximum size of the set (in bytes in memory) resulting from the execution of the IN section.") \ + M(SettingOverflowMode, set_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \ \ - /** Limits for the maximum size of the set obtained by executing the IN section. */ \ - M(SettingUInt64, max_rows_in_join, 0, "") \ - M(SettingUInt64, max_bytes_in_join, 0, "") \ - M(SettingOverflowMode, join_overflow_mode, OverflowMode::THROW, "") \ + M(SettingUInt64, max_rows_in_join, 0, "Maximum size of the hash table for JOIN (in number of rows).") \ + M(SettingUInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).") \ + M(SettingOverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \ \ - /** Limits for the maximum size of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed. */ \ - M(SettingUInt64, max_rows_to_transfer, 0, "") \ - M(SettingUInt64, max_bytes_to_transfer, 0, "") \ - M(SettingOverflowMode, transfer_overflow_mode, OverflowMode::THROW, "") \ + M(SettingUInt64, max_rows_to_transfer, 0, "Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.") \ + M(SettingUInt64, max_bytes_to_transfer, 0, "Maximum size (in uncompressed bytes) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.") \ + M(SettingOverflowMode, transfer_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \ \ - /** Limits for the maximum size of the stored state when executing DISTINCT. */ \ - M(SettingUInt64, max_rows_in_distinct, 0, "") \ - M(SettingUInt64, max_bytes_in_distinct, 0, "") \ - M(SettingOverflowMode, distinct_overflow_mode, OverflowMode::THROW, "") \ + M(SettingUInt64, max_rows_in_distinct, 0, "Maximum number of elements during execution of DISTINCT.") \ + M(SettingUInt64, max_bytes_in_distinct, 0, "Maximum total size of state (in uncompressed bytes) in memory for the execution of DISTINCT.") \ + M(SettingOverflowMode, distinct_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \ \ - /** Maximum memory usage when processing a request. 0 - not bounded. */ \ - M(SettingUInt64, max_memory_usage, 0, "") /* For one query */ \ - /* Totally for concurrently running queries of one user */ \ - M(SettingUInt64, max_memory_usage_for_user, 0, "") \ - /* Totally for all concurrent queries */ \ - M(SettingUInt64, max_memory_usage_for_all_queries, 0, "") \ + M(SettingUInt64, max_memory_usage, 0, "Maximum memory usage for processing of single query. Zero means unlimited.") \ + M(SettingUInt64, max_memory_usage_for_user, 0, "Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited.") \ + M(SettingUInt64, max_memory_usage_for_all_queries, 0, "Maximum memory usage for processing all concurrently running queries on the server. Zero means unlimited.") \ \ - /** The maximum speed of data exchange over the network in bytes per second. 0 - not bounded. */ \ - M(SettingUInt64, max_network_bandwidth, 0, "") \ - /** The maximum number of bytes to receive or transmit over the network, as part of the query. */ \ - M(SettingUInt64, max_network_bytes, 0, "") \ - /** The maximum speed of data exchange over the network for the user in bytes per second. 0 - not bounded. */ \ - M(SettingUInt64, max_network_bandwidth_for_user, 0, "") + M(SettingUInt64, max_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second. Zero means unlimited.") \ + M(SettingUInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.") \ + M(SettingUInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries for the user. Zero means unlimited.") #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ TYPE NAME {DEFAULT}; diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 26a40def4fa..414f9dd35bb 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include From 430d6a3d174f3150112b5cfac55cfad49764dcd4 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Fri, 2 Mar 2018 12:44:48 +0300 Subject: [PATCH 204/209] Ru contents is synchronized with En one. Utils section is restructured. clickhouse-copier is editted. --- docs/en/query_language/clickhouse_local.md | 4 - docs/en/utils/clickhouse-local.md | 4 + docs/mkdocs_en.yml | 13 +- docs/mkdocs_ru.yml | 153 ++++++++++----------- docs/ru/query_language/clickhouse_local.md | 3 - docs/ru/utils/clickhouse-copier.md | 66 ++++----- docs/ru/utils/clickhouse-local.md | 5 + docs/ru/utils/index.md | 6 +- 8 files changed, 124 insertions(+), 130 deletions(-) delete mode 100644 docs/en/query_language/clickhouse_local.md delete mode 100644 docs/ru/query_language/clickhouse_local.md diff --git a/docs/en/query_language/clickhouse_local.md b/docs/en/query_language/clickhouse_local.md deleted file mode 100644 index d18cc200320..00000000000 --- a/docs/en/query_language/clickhouse_local.md +++ /dev/null @@ -1,4 +0,0 @@ -# The clickhouse-local program - -The `clickhouse-local` program enables you to perform fast processing on local files that store tables, without having to deploy and configure clickhouse-server. - diff --git a/docs/en/utils/clickhouse-local.md b/docs/en/utils/clickhouse-local.md index e69de29bb2d..d18cc200320 100644 --- a/docs/en/utils/clickhouse-local.md +++ b/docs/en/utils/clickhouse-local.md @@ -0,0 +1,4 @@ +# The clickhouse-local program + +The `clickhouse-local` program enables you to perform fast processing on local files that store tables, without having to deploy and configure clickhouse-server. + diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index c108052fdd2..1daf36ecfd8 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -12,6 +12,9 @@ edit_uri: 'edit/master/docs/en' extra_css: - assets/stylesheets/custom.css +markdown_extensions: + - codehilite + theme: name: null custom_dir: 'mkdocs-material-theme' @@ -34,9 +37,6 @@ extra: search: language: 'en' -markdown_extensions: - - codehilite - pages: - 'ClickHouse': 'index.md' @@ -59,7 +59,7 @@ pages: - 'Star Schema Benchmark': 'getting_started/example_datasets/star_schema.md' - 'Interfaces': - - 'Interfaces': 'interfaces/index.md' + - 'Introduction': 'interfaces/index.md' - 'Command-line client': 'interfaces/cli.md' - 'HTTP interface': 'interfaces/http_interface.md' - 'JDBC driver': 'interfaces/jdbc.md' @@ -71,7 +71,6 @@ pages: # - 'Query language': 'query_language/index.md' - 'Queries': 'query_language/queries.md' - 'Syntax': 'query_language/syntax.md' - - 'The clickhouse-local program': 'query_language/clickhouse_local.md' - 'Table engines': - 'Introduction': 'table_engines/index.md' @@ -234,9 +233,9 @@ pages: - 'Settings profiles': 'operations/settings/settings_profiles.md' - 'Utilites': - - 'Utilites': 'utils/index.md' + - 'Introduction': 'utils/index.md' - 'clickhouse-copier': 'utils/clickhouse-copier.md' - #- 'clickhouse-local' : 'utils/clickhouse-local.md' + - 'clickhouse-local': 'utils/clickhouse-local.md' - 'ClickHouse Development': # - 'ClickHouse Development': 'development/index.md' diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml index 05d7e9d8eb8..743a29c6cf9 100644 --- a/docs/mkdocs_ru.yml +++ b/docs/mkdocs_ru.yml @@ -25,8 +25,6 @@ theme: primary: 'white' accent: 'white' font: false -# text: Roboto -# code: Roboto Mono logo: 'images/logo.svg' favicon: 'assets/images/favicon.ico' include_search_page: false @@ -39,8 +37,6 @@ extra: search: language: 'en, ru' - - pages: - 'ClickHouse': 'index.md' @@ -55,12 +51,12 @@ pages: - 'Начало работы': - 'Установка и запуск': 'getting_started/index.md' - 'Тестовые наборы данных': - - 'AMPLab Big Data Benchmark': 'getting_started/example_datasets/amplab_benchmark.md' - - 'Терабайт логов кликов от Criteo': 'getting_started/example_datasets/criteo.md' - - 'Данные о такси в Нью-Йорке': 'getting_started/example_datasets/nyc_taxi.md' - 'OnTime': 'getting_started/example_datasets/ontime.md' - - 'Схема «Звезда»': 'getting_started/example_datasets/star_schema.md' + - 'Данные о такси в Нью-Йорке': 'getting_started/example_datasets/nyc_taxi.md' + - 'AMPLab Big Data Benchmark': 'getting_started/example_datasets/amplab_benchmark.md' - 'WikiStat': 'getting_started/example_datasets/wikistat.md' + - 'Терабайт логов кликов от Criteo': 'getting_started/example_datasets/criteo.md' + - 'Схема «Звезда»': 'getting_started/example_datasets/star_schema.md' - 'Интерфейсы': - 'Введение': 'interfaces/index.md' @@ -75,100 +71,99 @@ pages: # - 'Язык запросов': 'query_language/index.md' - 'Запросы': 'query_language/queries.md' - 'Синтаксис': 'query_language/syntax.md' - - 'Программа clickhouse-local': 'query_language/clickhouse_local.md' - 'Движки таблиц': - 'Введение': 'table_engines/index.md' - - 'AggregatingMergeTree': 'table_engines/aggregatingmergetree.md' - - 'Buffer': 'table_engines/buffer.md' - - 'CollapsingMergeTree': 'table_engines/collapsingmergetree.md' - - 'Произвольный ключ партиционирования': 'table_engines/custom_partitioning_key.md' - - 'Dictionary': 'table_engines/dictionary.md' - - 'Distributed': 'table_engines/distributed.md' - - 'Внешние данные для обработки запроса': 'table_engines/external_data.md' - - 'File(InputFormat)': 'table_engines/file.md' - - 'GraphiteMergeTree': 'table_engines/graphitemergetree.md' - - 'Join': 'table_engines/join.md' - - 'Kafka': 'table_engines/kafka.md' - - 'Log': 'table_engines/log.md' - - 'MaterializedView': 'table_engines/materializedview.md' - - 'Memory': 'table_engines/memory.md' - - 'Merge': 'table_engines/merge.md' - - 'MergeTree': 'table_engines/mergetree.md' - - 'Null': 'table_engines/null.md' - - 'ReplacingMergeTree': 'table_engines/replacingmergetree.md' - - 'Репликация данных': 'table_engines/replication.md' - - 'Set': 'table_engines/set.md' - - 'SummingMergeTree': 'table_engines/summingmergetree.md' - 'TinyLog': 'table_engines/tinylog.md' + - 'Log': 'table_engines/log.md' + - 'Memory': 'table_engines/memory.md' + - 'MergeTree': 'table_engines/mergetree.md' + - 'Произвольный ключ партиционирования': 'table_engines/custom_partitioning_key.md' + - 'ReplacingMergeTree': 'table_engines/replacingmergetree.md' + - 'SummingMergeTree': 'table_engines/summingmergetree.md' + - 'AggregatingMergeTree': 'table_engines/aggregatingmergetree.md' + - 'CollapsingMergeTree': 'table_engines/collapsingmergetree.md' + - 'GraphiteMergeTree': 'table_engines/graphitemergetree.md' + - 'Data replication': 'table_engines/replication.md' + - 'Distributed': 'table_engines/distributed.md' + - 'Dictionary': 'table_engines/dictionary.md' + - 'Merge': 'table_engines/merge.md' + - 'Buffer': 'table_engines/buffer.md' + - 'File': 'table_engines/file.md' + - 'Null': 'table_engines/null.md' + - 'Set': 'table_engines/set.md' + - 'Join': 'table_engines/join.md' - 'View': 'table_engines/view.md' + - 'MaterializedView': 'table_engines/materializedview.md' + - 'Kafka': 'table_engines/kafka.md' + - 'Внешние данные для обработки запроса': 'table_engines/external_data.md' - 'Системные таблицы': - 'Введение': 'system_tables/index.md' - - 'system.asynchronous_metrics': 'system_tables/system.asynchronous_metrics.md' - - 'system.clusters': 'system_tables/system.clusters.md' - - 'system.columns': 'system_tables/system.columns.md' - - 'system.databases': 'system_tables/system.databases.md' - - 'system.dictionaries': 'system_tables/system.dictionaries.md' - - 'system.events': 'system_tables/system.events.md' - - 'system.functions': 'system_tables/system.functions.md' - - 'system.merges': 'system_tables/system.merges.md' - - 'system.metrics': 'system_tables/system.metrics.md' + - 'system.one': 'system_tables/system.one.md' - 'system.numbers': 'system_tables/system.numbers.md' - 'system.numbers_mt': 'system_tables/system.numbers_mt.md' - - 'system.one': 'system_tables/system.one.md' + - 'system.databases': 'system_tables/system.databases.md' + - 'system.tables': 'system_tables/system.tables.md' + - 'system.columns': 'system_tables/system.columns.md' - 'system.parts': 'system_tables/system.parts.md' - 'system.processes': 'system_tables/system.processes.md' + - 'system.merges': 'system_tables/system.merges.md' + - 'system.events': 'system_tables/system.events.md' + - 'system.metrics': 'system_tables/system.metrics.md' + - 'system.asynchronous_metrics': 'system_tables/system.asynchronous_metrics.md' - 'system.replicas': 'system_tables/system.replicas.md' + - 'system.dictionaries': 'system_tables/system.dictionaries.md' + - 'system.clusters': 'system_tables/system.clusters.md' + - 'system.functions': 'system_tables/system.functions.md' - 'system.settings': 'system_tables/system.settings.md' - - 'system.tables': 'system_tables/system.tables.md' - 'system.zookeeper': 'system_tables/system.zookeeper.md' - 'Табличные функции': - - 'Общие сведения': 'table_functions/index.md' - - 'merge': 'table_functions/merge.md' + - 'Введение': 'table_functions/index.md' - 'remote': 'table_functions/remote.md' + - 'merge': 'table_functions/merge.md' - 'Форматы': - 'Введение': 'formats/index.md' - - 'CapnProto': 'formats/capnproto.md' + - 'TabSeparated': 'formats/tabseparated.md' + - 'TabSeparatedRaw': 'formats/tabseparatedraw.md' + - 'TabSeparatedWithNames': 'formats/tabseparatedwithnames.md' + - 'TabSeparatedWithNamesAndTypes': 'formats/tabseparatedwithnamesandtypes.md' - 'CSV': 'formats/csv.md' - 'CSVWithNames': 'formats/csvwithnames.md' + - 'Values': 'formats/values.md' + - 'Vertical': 'formats/vertical.md' - 'JSON': 'formats/json.md' - 'JSONCompact': 'formats/jsoncompact.md' - 'JSONEachRow': 'formats/jsoneachrow.md' - - 'Native': 'formats/native.md' - - 'Null': 'formats/null.md' + - 'TSKV': 'formats/tskv.md' - 'Pretty': 'formats/pretty.md' - 'PrettyCompact': 'formats/prettycompact.md' - 'PrettyCompactMonoBlock': 'formats/prettycompactmonoblock.md' - 'PrettyNoEscapes': 'formats/prettynoescapes.md' - 'PrettySpace': 'formats/prettyspace.md' - 'RowBinary': 'formats/rowbinary.md' - - 'TabSeparated': 'formats/tabseparated.md' - - 'TabSeparatedRaw': 'formats/tabseparatedraw.md' - - 'TabSeparatedWithNames': 'formats/tabseparatedwithnames.md' - - 'TabSeparatedWithNamesAndTypes': 'formats/tabseparatedwithnamesandtypes.md' - - 'TSKV': 'formats/tskv.md' - - 'Values': 'formats/values.md' - - 'Vertical': 'formats/vertical.md' + - 'Native': 'formats/native.md' + - 'Null': 'formats/null.md' - 'XML': 'formats/xml.md' + - 'CapnProto': 'formats/capnproto.md' - 'Типы данных': - 'Введение': 'data_types/index.md' - - 'Array(T)': 'data_types/array.md' + - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md' + - 'Float32, Float64': 'data_types/float.md' - 'Булевы значения': 'data_types/boolean.md' + - 'String': 'data_types/string.md' + - 'FixedString(N)': 'data_types/fixedstring.md' - 'Date': 'data_types/date.md' - 'DateTime': 'data_types/datetime.md' - 'Enum': 'data_types/enum.md' - - 'FixedString(N)': 'data_types/fixedstring.md' - - 'Float32, Float64': 'data_types/float.md' - - 'UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64': 'data_types/int_uint.md' - - 'String': 'data_types/string.md' + - 'Array(T)': 'data_types/array.md' + - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md' - 'Tuple(T1, T2, ...)': 'data_types/tuple.md' - 'Вложенные структуры данных': # - 'Вложенные структуры данных': 'data_types/nested_data_structures/index.md' - - 'AggregateFunction(name, types_of_arguments...)': 'data_types/nested_data_structures/aggregatefunction.md' - 'Nested(Name1 Type1, Name2 Type2, ...)': 'data_types/nested_data_structures/nested.md' - 'Служебные типы данных': # - 'Служебные типы данных': 'data_types/special_data_types/index.md' @@ -180,31 +175,31 @@ pages: - 'Функции': - 'Общее описание': 'functions/index.md' - 'Арифметические функции': 'functions/arithmetic_functions.md' - - 'Функции по работе с массивами': 'functions/array_functions.md' - - 'Функция arrayJoin': 'functions/array_join.md' - - 'Битовые функции': 'functions/bit_functions.md' - 'Функции сравнения': 'functions/comparison_functions.md' - - 'Условные функции': 'functions/conditional_functions.md' + - 'Логические функции': 'functions/logical_functions.md' + - 'Функции преобразования типов': 'functions/type_conversion_functions.md' - 'Функции для работы с датами и временем': 'functions/date_time_functions.md' - - 'Функции кодирования': 'functions/encoding_functions.md' - - 'Функции для работы с внешними словарями': 'functions/ext_dict_functions.md' + - 'Функции для работы со строками': 'functions/string_functions.md' + - 'Функции поиска в строках': 'functions/string_search_functions.md' + - 'Функции поиска и замены в строках': 'functions/string_replace_functions.md' + - 'Условные функции': 'functions/conditional_functions.md' + - 'Математические функции': 'functions/math_functions.md' + - 'Функции округления': 'functions/rounding_functions.md' + - 'Функции по работе с массивами': 'functions/array_functions.md' + - 'Функции разбиения и слияния строк и массивов': 'functions/splitting_merging_functions.md' + - 'Битовые функции': 'functions/bit_functions.md' - 'Функции хэширования': 'functions/hash_functions.md' - - 'Функции высшего порядка': 'functions/higher_order_functions.md' - - 'Функции для реализации оператора IN.': 'functions/in_functions.md' + - 'Функции генерации псевдослучайных чисел': 'functions/random_functions.md' + - 'Функции кодирования': 'functions/encoding_functions.md' + - 'Функции для работы с URL': 'functions/url_functions.md' - 'Функции для работы с IP-адресами': 'functions/ip_address_functions.md' - 'Функции для работы с JSON.': 'functions/json_functions.md' - - 'Логические функции': 'functions/logical_functions.md' - - 'Математические функции': 'functions/math_functions.md' + - 'Функции высшего порядка': 'functions/higher_order_functions.md' - 'Прочие функции': 'functions/other_functions.md' - - 'Функции генерации псевдослучайных чисел': 'functions/random_functions.md' - - 'Функции округления': 'functions/rounding_functions.md' - - 'Функции разбиения и слияния строк и массивов': 'functions/splitting_merging_functions.md' - - 'Функции для работы со строками': 'functions/string_functions.md' - - 'Функции поиска и замены в строках': 'functions/string_replace_functions.md' - - 'Функции поиска в строках': 'functions/string_search_functions.md' - - 'Функции преобразования типов': 'functions/type_conversion_functions.md' - - 'Функции для работы с URL': 'functions/url_functions.md' + - 'Функции для работы с внешними словарями': 'functions/ext_dict_functions.md' - 'Функции для работы со словарями Яндекс.Метрики': 'functions/ym_dict_functions.md' + - 'Функции для реализации оператора IN.': 'functions/in_functions.md' + - 'Функция arrayJoin': 'functions/array_join.md' - 'Агрегатные функции': - 'Введение': 'agg_functions/index.md' @@ -239,9 +234,9 @@ pages: - 'Профили настроек': 'operations/settings/settings_profiles.md' - 'Утилиты': - - 'Утилиты': 'utils/index.md' + - 'Введение': 'utils/index.md' - 'clickhouse-copier': 'utils/clickhouse-copier.md' - #- 'clickhouse-local' : 'utils/clickhouse-local.md' + - 'clickhouse-local': 'utils/clickhouse-local.md' - 'ClickHouse Development': # - 'ClickHouse Development': 'development/index.md' diff --git a/docs/ru/query_language/clickhouse_local.md b/docs/ru/query_language/clickhouse_local.md deleted file mode 100644 index ee3f50dfeef..00000000000 --- a/docs/ru/query_language/clickhouse_local.md +++ /dev/null @@ -1,3 +0,0 @@ -# Программа clickhouse-local - -Программа `clickhouse-local` позволяет выполнять быструю обработку локальных файлов, хранящих таблицы, не прибегая к развертыванию и настройке clickhouse-server ... diff --git a/docs/ru/utils/clickhouse-copier.md b/docs/ru/utils/clickhouse-copier.md index 25d22f19222..8bcd58c23af 100644 --- a/docs/ru/utils/clickhouse-copier.md +++ b/docs/ru/utils/clickhouse-copier.md @@ -1,23 +1,37 @@ -# clickhouse-copier util + -The util copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed and fault-tolerant manner. +# clickhouse-copier -Configuration of copying tasks is set in special ZooKeeper node (called the `/description` node). -A ZooKeeper path to the description node is specified via `--task-path ` parameter. -So, node `/task/path/description` should contain special XML content describing copying tasks. +Копирует данные из таблиц одного кластера в таблицы другого (или этого же) кластера. -Simultaneously many `clickhouse-copier` processes located on any servers could execute the same task. -ZooKeeper node `/task/path/` is used by the processes to coordinate their work. -You must not add additional child nodes to `/task/path/`. +Можно запустить несколько `clickhouse-copier` для разных серверах для выполнения одного и того же задания. Для синхронизации между процессами используется ZooKeeper. -Currently you are responsible for manual launching of all `cluster-copier` processes. -You can launch as many processes as you want, whenever and wherever you want. -Each process try to select the nearest available shard of source cluster and copy some part of data (partition) from it to the whole -destination cluster (with resharding). -Therefore it makes sense to launch cluster-copier processes on the source cluster nodes to reduce the network usage. +После запуска, `clickhouse-copier`: +- Соединяется с ZooKeeper и получает: + - Задания на копирование. + - Состояние заданий на копирование. +- Выполняет задания. + Каждый запущенный процесс выбирает "ближайший" шард исходного кластера и копирует данные в кластер назначения, при необходимости перешардируя их. -Since the workers coordinate their work via ZooKeeper, in addition to `--task-path ` you have to specify ZooKeeper -cluster configuration via `--config-file ` parameter. Example of `zookeeper.xml`: +`clickhouse-copier` отслеживает изменения в ZooKeeper и применяет их "на лету". + +Для снижения сетевого трафика рекомендуем запускать `clickhouse-copier` на том же сервере, где находятся исходные данные. + +## Запуск clickhouse-copier + +Утилиту следует запускать вручную следующим образом: + +```bash +clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir +``` + +Параметры запуска: +- `daemon` - запускает `clickhouse-copier` в режиме демона. +- `config` - путь к файлу `zookeeper.xml` с параметрами соединения с ZooKeeper. +- `task-path` - путь к ноде ZooKeeper. Нода используется для синхронизации между процессами `clickhouse-copier` и для хранения заданий. Задания хранятся в `$task-path/description`. +- `base-dir` - путь к логам и вспомогательным файлам. При запуске `clickhouse-copier` создает в `$base-dir` подкаталоги `clickhouse-copier_YYYYMMHHSS_`. Если параметр не указан, то каталоги будут создаваться в каталоге, где `clickhouse-copier` был запущен. + +## Формат zookeeper.xml ```xml @@ -30,11 +44,7 @@ cluster configuration via `--config-file ` parameter. Example of ``` -When you run `clickhouse-copier --config-file --task-path ` the process connects to ZooKeeper cluster, reads tasks config from `/task/path/description` and executes them. - -## Format of task config - -Here is an example of `/task/path/description` content: +## Конфигурация заданий на копирование ```xml @@ -103,7 +113,8 @@ Here is an example of `/task/path/description` content: specify partitions that should be copied in , they should be in quoted format like partition column of system.parts table. --> - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') + + ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') PARTITION BY toMonday(date) ORDER BY (CounterID, EventDate) @@ -142,15 +153,4 @@ Here is an example of `/task/path/description` content: ``` -cluster-copier processes watch for `/task/path/description` node update. -So, if you modify the config settings or `max_workers` params, they will be updated. - -## Example - -```bash -clickhouse-copier copier --daemon --config /path/to/copier/zookeeper.xml --task-path /clickhouse-copier/cluster1_tables_hits --base-dir /path/to/copier_logs -``` - -`--base-dir /path/to/copier_logs` specifies where auxilary and log files of the copier process will be saved. -In this case it will create `/path/to/copier_logs/clickhouse-copier_YYYYMMHHSS_/` dir with log and status-files. -If it is not specified it will use current dir (`/clickhouse-copier_YYYYMMHHSS_/` if it is run as a `--daemon`). +`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их "на лету". Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится. diff --git a/docs/ru/utils/clickhouse-local.md b/docs/ru/utils/clickhouse-local.md index e69de29bb2d..d4b50c08320 100644 --- a/docs/ru/utils/clickhouse-local.md +++ b/docs/ru/utils/clickhouse-local.md @@ -0,0 +1,5 @@ + + +#clickhouse-local + +Программа `clickhouse-local` позволяет выполнять быструю обработку локальных файлов, хранящих таблицы, не прибегая к развертыванию и настройке сервера ClickHouse. diff --git a/docs/ru/utils/index.md b/docs/ru/utils/index.md index 760fc0100c3..75bd574a72e 100644 --- a/docs/ru/utils/index.md +++ b/docs/ru/utils/index.md @@ -1,6 +1,4 @@ # Утилиты ClickHouse -Существует несколько утилит ClickHouse, которые представляют из себя отдельные исполняемые файлы: - -* `clickhouse-local` позволяет выполнять SQL-запросы над данными подобно тому, как это делает `awk` -* `clickhouse-copier` копирует (и перешардирует) неизменяемые данные с одного кластера на другой отказоустойчивым способом. +* [clickhouse-local](clickhouse-local.md#utils-clickhouse-local) - позволяет выполнять SQL-запросы над данными без установки сервера ClickHouse подобно тому, как это делает `awk`. +* [clickhouse-copier](clickhouse-copier.md#utils-clickhouse-copier) - копирует (и перешардирует) данные с одного кластера на другой. From 8ea66f83ee6d6001bd1253c7818925ab66174046 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Fri, 2 Mar 2018 13:08:42 +0300 Subject: [PATCH 205/209] Some bugs are fixed. --- docs/mkdocs_en.yml | 2 +- docs/mkdocs_ru.yml | 2 +- docs/ru/utils/clickhouse-copier.md | 15 +++++++++------ docs/ru/utils/clickhouse-local.md | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index 1daf36ecfd8..5d0723f0042 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -3,7 +3,7 @@ copyright: ©2016–2018 Yandex LLC docs_dir: en site_dir: build/docs/en -use_directory_urls: true +use_directory_urls: false repo_name: 'yandex/ClickHouse' repo_url: 'https://github.com/yandex/ClickHouse/' diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml index 743a29c6cf9..df06d235c99 100644 --- a/docs/mkdocs_ru.yml +++ b/docs/mkdocs_ru.yml @@ -3,7 +3,7 @@ copyright: ©2016–2018 Yandex LLC docs_dir: ru site_dir: build/docs/ru -use_directory_urls: true +use_directory_urls: false repo_name: 'yandex/ClickHouse' repo_url: 'https://github.com/yandex/ClickHouse/' diff --git a/docs/ru/utils/clickhouse-copier.md b/docs/ru/utils/clickhouse-copier.md index 8bcd58c23af..849fa532d41 100644 --- a/docs/ru/utils/clickhouse-copier.md +++ b/docs/ru/utils/clickhouse-copier.md @@ -1,4 +1,4 @@ - + # clickhouse-copier @@ -7,11 +7,13 @@ Можно запустить несколько `clickhouse-copier` для разных серверах для выполнения одного и того же задания. Для синхронизации между процессами используется ZooKeeper. После запуска, `clickhouse-copier`: + - Соединяется с ZooKeeper и получает: - - Задания на копирование. - - Состояние заданий на копирование. + - Задания на копирование. + - Состояние заданий на копирование. - Выполняет задания. - Каждый запущенный процесс выбирает "ближайший" шард исходного кластера и копирует данные в кластер назначения, при необходимости перешардируя их. + + Каждый запущенный процесс выбирает "ближайший" шард исходного кластера и копирует данные в кластер назначения, при необходимости перешардируя их. `clickhouse-copier` отслеживает изменения в ZooKeeper и применяет их "на лету". @@ -26,6 +28,7 @@ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path ``` Параметры запуска: + - `daemon` - запускает `clickhouse-copier` в режиме демона. - `config` - путь к файлу `zookeeper.xml` с параметрами соединения с ZooKeeper. - `task-path` - путь к ноде ZooKeeper. Нода используется для синхронизации между процессами `clickhouse-copier` и для хранения заданий. Задания хранятся в `$task-path/description`. @@ -34,14 +37,14 @@ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path ## Формат zookeeper.xml ```xml - + 127.0.0.1 2181 - + ``` ## Конфигурация заданий на копирование diff --git a/docs/ru/utils/clickhouse-local.md b/docs/ru/utils/clickhouse-local.md index d4b50c08320..0cee8e4ee3c 100644 --- a/docs/ru/utils/clickhouse-local.md +++ b/docs/ru/utils/clickhouse-local.md @@ -1,4 +1,4 @@ - + #clickhouse-local From 5842c35aeb3065bcf603e2f0dddc780b82b70869 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 2 Mar 2018 15:59:41 +0300 Subject: [PATCH 206/209] added odbc_default_field_size setting --- dbms/src/Dictionaries/DictionarySourceFactory.cpp | 2 +- dbms/src/Dictionaries/ODBCDictionarySource.cpp | 11 +++++++++-- dbms/src/Dictionaries/ODBCDictionarySource.h | 2 +- dbms/src/Interpreters/Settings.h | 3 ++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/dbms/src/Dictionaries/DictionarySourceFactory.cpp b/dbms/src/Dictionaries/DictionarySourceFactory.cpp index 4b2f887e019..463cbee3ac7 100644 --- a/dbms/src/Dictionaries/DictionarySourceFactory.cpp +++ b/dbms/src/Dictionaries/DictionarySourceFactory.cpp @@ -155,7 +155,7 @@ DictionarySourcePtr DictionarySourceFactory::create( else if ("odbc" == source_type) { #if Poco_DataODBC_FOUND - return std::make_unique(dict_struct, config, config_prefix + ".odbc", sample_block); + return std::make_unique(dict_struct, config, config_prefix + ".odbc", sample_block, context); #else throw Exception{"Dictionary source of type `odbc` is disabled because poco library was built without ODBC support.", ErrorCodes::SUPPORT_IS_DISABLED}; diff --git a/dbms/src/Dictionaries/ODBCDictionarySource.cpp b/dbms/src/Dictionaries/ODBCDictionarySource.cpp index b7bc8063d76..7df00154785 100644 --- a/dbms/src/Dictionaries/ODBCDictionarySource.cpp +++ b/dbms/src/Dictionaries/ODBCDictionarySource.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -17,7 +18,7 @@ static const size_t max_block_size = 8192; ODBCDictionarySource::ODBCDictionarySource(const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - const Block & sample_block) + const Block & sample_block, const Context & context) : log(&Logger::get("ODBCDictionarySource")), dict_struct{dict_struct_}, db{config.getString(config_prefix + ".db", "")}, @@ -28,11 +29,17 @@ ODBCDictionarySource::ODBCDictionarySource(const DictionaryStructure & dict_stru load_all_query{query_builder.composeLoadAllQuery()}, invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} { + std::size_t field_size = context.getSettingsRef().odbc_max_field_size; + pool = createAndCheckResizePocoSessionPool([&] { - return std::make_shared( + auto session = std::make_shared( config.getString(config_prefix + ".connector", "ODBC"), config.getString(config_prefix + ".connection_string")); + + /// Default POCO value is 1024. Set property manually to make possible reading of longer strings. + session->setProperty("maxFieldSize", Poco::Any(field_size)); + return std::move(session); }); } diff --git a/dbms/src/Dictionaries/ODBCDictionarySource.h b/dbms/src/Dictionaries/ODBCDictionarySource.h index 4325e3e0770..c54a05cfb37 100644 --- a/dbms/src/Dictionaries/ODBCDictionarySource.h +++ b/dbms/src/Dictionaries/ODBCDictionarySource.h @@ -31,7 +31,7 @@ class ODBCDictionarySource final : public IDictionarySource public: ODBCDictionarySource(const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, - const Block & sample_block); + const Block & sample_block, const Context & context); /// copy-constructor is provided in order to support cloneability ODBCDictionarySource(const ODBCDictionarySource & other); diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 414f9dd35bb..1ba4f1f06e5 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -180,7 +180,8 @@ struct Settings M(SettingBool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.") \ \ M(SettingBool, empty_result_for_aggregation_by_empty_set, false, "Return empty result when aggregating without keys on empty set.") \ - M(SettingBool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.") + M(SettingBool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.") \ + M(SettingUInt64, odbc_max_field_size, 1024, "Max size of filed can be read from ODBC dictionary. Long strings are truncated.") \ /// Possible limits for query execution. From 0d36b8d4ce71663851527cb7a9705626a46c851e Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 2 Mar 2018 19:57:01 +0300 Subject: [PATCH 207/209] Update PULL_REQUEST_TEMPLATE.md Removed checkbox as it is redundant. --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 98c94cb52f2..a41347005bc 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1 +1 @@ -- [ ] I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en +I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en From bf924d1c9d6e4e2dbe91f6af24bab89fe7aa7071 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Mar 2018 21:51:59 +0300 Subject: [PATCH 208/209] Better name [#CLICKHOUSE-2] --- dbms/src/Common/ActionBlocker.h | 18 +++++++++--------- dbms/src/Interpreters/InterserverIOHandler.h | 2 +- .../Storages/MergeTree/MergeTreeDataMerger.h | 2 +- .../ReplicatedMergeTreeAlterThread.cpp | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/dbms/src/Common/ActionBlocker.h b/dbms/src/Common/ActionBlocker.h index 12e60989c9c..72876a23e66 100644 --- a/dbms/src/Common/ActionBlocker.h +++ b/dbms/src/Common/ActionBlocker.h @@ -16,8 +16,8 @@ public: bool isCancelled() const { return counter > 0; } /// Temporarily blocks corresponding actions (while the returned object is alive) - struct BlockHolder; - BlockHolder cancel() const { return BlockHolder(this); } + struct LockHolder; + LockHolder cancel() const { return LockHolder(this); } /// Cancel the actions forever. void cancelForever() const { ++counter; } @@ -26,30 +26,30 @@ public: auto & getCounter() { return counter; } /// Blocks related action while a BlockerHolder instance exists - struct BlockHolder + struct LockHolder { - explicit BlockHolder(const ActionBlocker * var_ = nullptr) : var(var_) + explicit LockHolder(const ActionBlocker * var_ = nullptr) : var(var_) { if (var) ++var->counter; } - BlockHolder(BlockHolder && other) noexcept + LockHolder(LockHolder && other) noexcept { *this = std::move(other); } - BlockHolder & operator=(BlockHolder && other) noexcept + LockHolder & operator=(LockHolder && other) noexcept { var = other.var; other.var = nullptr; return *this; } - BlockHolder(const BlockHolder & other) = delete; - BlockHolder & operator=(const BlockHolder & other) = delete; + LockHolder(const LockHolder & other) = delete; + LockHolder & operator=(const LockHolder & other) = delete; - ~BlockHolder() + ~LockHolder() { if (var) --var->counter; diff --git a/dbms/src/Interpreters/InterserverIOHandler.h b/dbms/src/Interpreters/InterserverIOHandler.h index e790e311561..9a340337859 100644 --- a/dbms/src/Interpreters/InterserverIOHandler.h +++ b/dbms/src/Interpreters/InterserverIOHandler.h @@ -144,7 +144,7 @@ public: ActionBlocker & getBlocker() { return endpoint->blocker; } void cancelForever() { getBlocker().cancelForever(); } - ActionBlocker::BlockHolder cancel() { return getBlocker().cancel(); } + ActionBlocker::LockHolder cancel() { return getBlocker().cancel(); } private: String name; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h index 25269687d3f..93a883f7567 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.h @@ -109,7 +109,7 @@ private: public: /** Is used to cancel all merges. On cancel() call all currently running 'mergeParts' methods will throw exception soon. - * All new calls to 'mergeParts' will throw exception till all 'BlockHolder' objects will be destroyed. + * All new calls to 'mergeParts' will throw exception till all 'LockHolder' objects will be destroyed. */ ActionBlocker merges_blocker; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index c1a666ec84d..ef7582c497c 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -67,7 +67,7 @@ void ReplicatedMergeTreeAlterThread::run() { /// If you need to lock table structure, then suspend merges. - ActionBlocker::BlockHolder merge_blocker; + ActionBlocker::LockHolder merge_blocker; if (changed_version || force_recheck_parts) merge_blocker = storage.merger.merges_blocker.cancel(); @@ -81,7 +81,7 @@ void ReplicatedMergeTreeAlterThread::run() auto temporarily_stop_part_checks = storage.part_check_thread.temporarilyStop(); /// Temporarily cancel parts sending - ActionBlocker::BlockHolder data_parts_exchange_blocker; + ActionBlocker::LockHolder data_parts_exchange_blocker; if (storage.data_parts_exchange_endpoint_holder) data_parts_exchange_blocker = storage.data_parts_exchange_endpoint_holder->cancel(); From 293e28803dfb3051d158307fdebb2ca09fbf73c3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Mar 2018 22:19:00 +0300 Subject: [PATCH 209/209] Fixed wrong modification in docs [#CLICKHOUSE-2] --- docs/mkdocs_en.yml | 2 +- docs/mkdocs_ru.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/mkdocs_en.yml b/docs/mkdocs_en.yml index 5d0723f0042..1daf36ecfd8 100644 --- a/docs/mkdocs_en.yml +++ b/docs/mkdocs_en.yml @@ -3,7 +3,7 @@ copyright: ©2016–2018 Yandex LLC docs_dir: en site_dir: build/docs/en -use_directory_urls: false +use_directory_urls: true repo_name: 'yandex/ClickHouse' repo_url: 'https://github.com/yandex/ClickHouse/' diff --git a/docs/mkdocs_ru.yml b/docs/mkdocs_ru.yml index df06d235c99..743a29c6cf9 100644 --- a/docs/mkdocs_ru.yml +++ b/docs/mkdocs_ru.yml @@ -3,7 +3,7 @@ copyright: ©2016–2018 Yandex LLC docs_dir: ru site_dir: build/docs/ru -use_directory_urls: false +use_directory_urls: true repo_name: 'yandex/ClickHouse' repo_url: 'https://github.com/yandex/ClickHouse/'