diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a8b3b3407e..ea6955fb33f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,59 @@ +## ClickHouse release 19.6.2.11, 2019-05-13 + +### New Features +* TTL expressions for columns and tables. [#4212](https://github.com/yandex/ClickHouse/pull/4212) ([Anton Popov](https://github.com/CurtizJ)) +* Added support for `brotli` compression for HTTP responses (Accept-Encoding: br) [#4388](https://github.com/yandex/ClickHouse/pull/4388) ([Mikhail](https://github.com/fandyushin)) +* Added new function `isValidUTF8` for checking whether a set of bytes is correctly utf-8 encoded. [#4934](https://github.com/yandex/ClickHouse/pull/4934) ([Danila Kutenin](https://github.com/danlark1)) +* Add new load balancing policy `first_or_random` which sends queries to the first specified host and if it's inaccessible send queries to random hosts of shard. Useful for cross-replication topology setups. [#5012](https://github.com/yandex/ClickHouse/pull/5012) ([nvartolomei](https://github.com/nvartolomei)) + +### Experimental Features +* Add setting `index_granularity_bytes` (adaptive index granularity) for MergeTree* tables family. [#4826](https://github.com/yandex/ClickHouse/pull/4826) ([alesapin](https://github.com/alesapin)) + +### Improvements +* Added support for non-constant and negative size and length arguments for function `substringUTF8`. [#4989](https://github.com/yandex/ClickHouse/pull/4989) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Disable push-down to right table in left join, left table in right join, and both tables in full join. This fixes wrong JOIN results in some cases. [#4846](https://github.com/yandex/ClickHouse/pull/4846) ([Ivan](https://github.com/abyss7)) +* `clickhouse-copier`: auto upload task configuration from `--task-file` option [#4876](https://github.com/yandex/ClickHouse/pull/4876) ([proller](https://github.com/proller)) +* Added typos handler for storage factory and table functions factory. [#4891](https://github.com/yandex/ClickHouse/pull/4891) ([Danila Kutenin](https://github.com/danlark1)) +* Support asterisks and qualified asterisks for multiple joins without subqueries [#4898](https://github.com/yandex/ClickHouse/pull/4898) ([Artem Zuikov](https://github.com/4ertus2)) +* Make missing column error message more user friendly. [#4915](https://github.com/yandex/ClickHouse/pull/4915) ([Artem Zuikov](https://github.com/4ertus2)) + +### Performance Improvements +* Significant speedup of ASOF JOIN [#4924](https://github.com/yandex/ClickHouse/pull/4924) ([Martijn Bakker](https://github.com/Gladdy)) + +### Backward Incompatible Changes +* HTTP header `Query-Id` was renamed to `X-ClickHouse-Query-Id` for consistency. [#4972](https://github.com/yandex/ClickHouse/pull/4972) ([Mikhail](https://github.com/fandyushin)) + +### Bug Fixes +* Fixed potential null pointer dereference in `clickhouse-copier`. [#4900](https://github.com/yandex/ClickHouse/pull/4900) ([proller](https://github.com/proller)) +* Fixed error on query with JOIN + ARRAY JOIN [#4938](https://github.com/yandex/ClickHouse/pull/4938) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed hanging on start of the server when a dictionary depends on another dictionary via a database with engine=Dictionary. [#4962](https://github.com/yandex/ClickHouse/pull/4962) ([Vitaly Baranov](https://github.com/vitlibar)) +* Partially fix distributed_product_mode = local. It's possible to allow columns of local tables in where/having/order by/... via table aliases. Throw exception if table does not have alias. There's not possible to access to the columns without table aliases yet. [#4986](https://github.com/yandex/ClickHouse/pull/4986) ([Artem Zuikov](https://github.com/4ertus2)) +* Fix potentially wrong result for `SELECT DISTINCT` with `JOIN` [#5001](https://github.com/yandex/ClickHouse/pull/5001) ([Artem Zuikov](https://github.com/4ertus2)) + +### Build/Testing/Packaging Improvements +* Fixed test failures when running clickhouse-server on different host [#4713](https://github.com/yandex/ClickHouse/pull/4713) ([Vasily Nemkov](https://github.com/Enmk)) +* clickhouse-test: Disable color control sequences in non tty environment. [#4937](https://github.com/yandex/ClickHouse/pull/4937) ([alesapin](https://github.com/alesapin)) +* clickhouse-test: Allow use any test database (remove `test.` qualification where it possible) [#5008](https://github.com/yandex/ClickHouse/pull/5008) ([proller](https://github.com/proller)) +* Fix ubsan errors [#5037](https://github.com/yandex/ClickHouse/pull/5037) ([Vitaly Baranov](https://github.com/vitlibar)) +* Yandex LFAlloc was added to ClickHouse to allocate MarkCache and UncompressedCache data in different ways to catch segfaults more reliable [#4995](https://github.com/yandex/ClickHouse/pull/4995) ([Danila Kutenin](https://github.com/danlark1)) +* Python util to help with backports and changelogs. [#4949](https://github.com/yandex/ClickHouse/pull/4949) ([Ivan](https://github.com/abyss7)) + + +## ClickHouse release 19.5.4.22, 2019-05-13 + +### Bug fixes +* Fixed possible crash in bitmap* functions [#5220](https://github.com/yandex/ClickHouse/pull/5220) [#5228](https://github.com/yandex/ClickHouse/pull/5228) ([Andy Yang](https://github.com/andyyzh)) +* Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [#5189](https://github.com/yandex/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error `Set for IN is not created yet in case of using single LowCardinality column in the left part of IN`. This error happened if LowCardinality column was the part of primary key. #5031 [#5154](https://github.com/yandex/ClickHouse/pull/5154) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Modification of retention function: If a row satisfies both the first and NTH condition, only the first satisfied condition is added to the data state. Now all conditions that satisfy in a row of data are added to the data state. [#5119](https://github.com/yandex/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) + + ## ClickHouse release 19.5.3.8, 2019-04-18 ### Bug fixes * Fixed type of setting `max_partitions_per_insert_block` from boolean to UInt64. [#5028](https://github.com/yandex/ClickHouse/pull/5028) ([Mohammad Hossein Sekhavat](https://github.com/mhsekhavat)) + ## ClickHouse release 19.5.2.6, 2019-04-15 ### New Features @@ -294,7 +345,7 @@ * Added support of `Nullable` types in `mysql` table function. [#4198](https://github.com/yandex/ClickHouse/pull/4198) ([Emmanuel Donin de Rosière](https://github.com/edonin)) * Support for arbitrary constant expressions in `LIMIT` clause. [#4246](https://github.com/yandex/ClickHouse/pull/4246) ([k3box](https://github.com/k3box)) * Added `topKWeighted` aggregate function that takes additional argument with (unsigned integer) weight. [#4245](https://github.com/yandex/ClickHouse/pull/4245) ([Andrew Golman](https://github.com/andrewgolman)) -* `StorageJoin` now supports `join_overwrite` setting that allows overwriting existing values of the same key. [#3973](https://github.com/yandex/ClickHouse/pull/3973) ([Amos Bird](https://github.com/amosbird) +* `StorageJoin` now supports `join_any_take_last_row` setting that allows overwriting existing values of the same key. [#3973](https://github.com/yandex/ClickHouse/pull/3973) ([Amos Bird](https://github.com/amosbird) * Added function `toStartOfInterval`. [#4304](https://github.com/yandex/ClickHouse/pull/4304) ([Vitaly Baranov](https://github.com/vitlibar)) * Added `RowBinaryWithNamesAndTypes` format. [#4200](https://github.com/yandex/ClickHouse/pull/4200) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)) * Added `IPv4` and `IPv6` data types. More effective implementations of `IPv*` functions. [#3669](https://github.com/yandex/ClickHouse/pull/3669) ([Vasily Nemkov](https://github.com/Enmk)) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index f862563c3ee..2a19bc4724a 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,53 @@ +## ClickHouse release 19.6.2.11, 2019-05-13 + +### Новые возможности +* TTL выражения, позволяющие настроить время жизни и автоматическую очистку данных в таблице или в отдельных её столбцах. [#4212](https://github.com/yandex/ClickHouse/pull/4212) ([Anton Popov](https://github.com/CurtizJ)) +* Добавлена поддержка алгоритма сжатия `brotli` в HTTP ответах (`Accept-Encoding: br`). Для тела POST запросов, эта возможность уже существовала. [#4388](https://github.com/yandex/ClickHouse/pull/4388) ([Mikhail](https://github.com/fandyushin)) +* Добавлена функция `isValidUTF8` для проверки, содержит ли строка валидные данные в кодировке UTF-8. [#4934](https://github.com/yandex/ClickHouse/pull/4934) ([Danila Kutenin](https://github.com/danlark1)) +* Добавлены новое правило балансировки (`load_balancing`) `first_or_random` по которому запросы посылаются на первый заданый хост и если он недоступен - на случайные хосты шарда. Полезно для топологий с кросс-репликацией. [#5012](https://github.com/yandex/ClickHouse/pull/5012) ([nvartolomei](https://github.com/nvartolomei)) + +### Эксперемннтальные возможности +* Добавлена настройка `index_granularity_bytes` (адаптивная гранулярность индекса) для таблиц семейства MergeTree* . [#4826](https://github.com/yandex/ClickHouse/pull/4826) ([alesapin](https://github.com/alesapin)) + +### Улучшения +* Добавлена поддержка для не константных и отрицательных значений аргументов смещения и длины для функции `substringUTF8`. [#4989](https://github.com/yandex/ClickHouse/pull/4989) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Отключение push-down в правую таблицы в left join, левую таблицу в right join, и в обе таблицы в full join. Это исправляет неправильные JOIN результаты в некоторых случаях. [#4846](https://github.com/yandex/ClickHouse/pull/4846) ([Ivan](https://github.com/abyss7)) +* `clickhouse-copier`: Автоматическая загрузка конфигурации задачи в zookeeper из `--task-file` опции [#4876](https://github.com/yandex/ClickHouse/pull/4876) ([proller](https://github.com/proller)) +* Добавлены подсказки с учётом опечаток для имён движков таблиц и табличных функций. [#4891](https://github.com/yandex/ClickHouse/pull/4891) ([Danila Kutenin](https://github.com/danlark1)) +* Поддержка выражений `select *` и `select tablename.*` для множественных join без подзапросов [#4898](https://github.com/yandex/ClickHouse/pull/4898) ([Artem Zuikov](https://github.com/4ertus2)) +* Сообщения об ошибках об отсутствующих столбцах стали более понятными. [#4915](https://github.com/yandex/ClickHouse/pull/4915) ([Artem Zuikov](https://github.com/4ertus2)) + +### Улучшение производительности +* Существенное ускорение ASOF JOIN [#4924](https://github.com/yandex/ClickHouse/pull/4924) ([Martijn Bakker](https://github.com/Gladdy)) + +### Обратно несовместимые изменения +* HTTP заголовок `Query-Id` переименован в `X-ClickHouse-Query-Id` для соответствия. [#4972](https://github.com/yandex/ClickHouse/pull/4972) ([Mikhail](https://github.com/fandyushin)) + +### Исправления ошибок +* Исправлены возможные разыменования нулевого указателя в `clickhouse-copier`. [#4900](https://github.com/yandex/ClickHouse/pull/4900) ([proller](https://github.com/proller)) +* Исправлены ошибки в запросах с JOIN + ARRAY JOIN [#4938](https://github.com/yandex/ClickHouse/pull/4938) ([Artem Zuikov](https://github.com/4ertus2)) +* Исправлено зависание на старте сервера если внешний словарь зависит от другого словаря через использование таблицы из БД с движком `Dictionary`. [#4962](https://github.com/yandex/ClickHouse/pull/4962) ([Vitaly Baranov](https://github.com/vitlibar)) +* При использовании `distributed_product_mode = 'local'` корректно работает использование столбцов локальных таблиц в where/having/order by/... через табличные алиасы. Выкидывает исключение если таблица не имеет алиас. Доступ к столбцам без алиасов пока не возможен. [#4986](https://github.com/yandex/ClickHouse/pull/4986) ([Artem Zuikov](https://github.com/4ertus2)) +* Исправлен потенциально некорректный результат для `SELECT DISTINCT` с `JOIN` [#5001](https://github.com/yandex/ClickHouse/pull/5001) ([Artem Zuikov](https://github.com/4ertus2)) + +### Улучшения сборки/тестирования/пакетирования +* Исправлена неработоспособность тестов, если `clickhouse-server` запущен на удалённом хосте [#4713](https://github.com/yandex/ClickHouse/pull/4713) ([Vasily Nemkov](https://github.com/Enmk)) +* `clickhouse-test`: Отключена раскраска результата, если команда запускается не в терминале. [#4937](https://github.com/yandex/ClickHouse/pull/4937) ([alesapin](https://github.com/alesapin)) +* `clickhouse-test`: Возможность использования не только базы данных test [#5008](https://github.com/yandex/ClickHouse/pull/5008) ([proller](https://github.com/proller)) +* Исправлены ошибки при запуске тестов под UBSan [#5037](https://github.com/yandex/ClickHouse/pull/5037) ([Vitaly Baranov](https://github.com/vitlibar)) +* Добавлен аллокатор Yandex LFAlloc для аллоцирования MarkCache и UncompressedCache данных разными способами для более надежного отлавливания проездов по памяти [#4995](https://github.com/yandex/ClickHouse/pull/4995) ([Danila Kutenin](https://github.com/danlark1)) +* Утилита для упрощения бэкпортирования изменений в старые релизы и составления changelogs. [#4949](https://github.com/yandex/ClickHouse/pull/4949) ([Ivan](https://github.com/abyss7)) + + +## ClickHouse release 19.5.4.22, 2019-05-13 + +### Исправления ошибок +* Исправлены возможные падения в bitmap* функциях [#5220](https://github.com/yandex/ClickHouse/pull/5220) [#5228](https://github.com/yandex/ClickHouse/pull/5228) ([Andy Yang](https://github.com/andyyzh)) +* Исправлен очень редкий data race condition который мог произойти при выполнении запроса с UNION ALL включающего минимум два SELECT из таблиц system.columns, system.tables, system.parts, system.parts_tables или таблиц семейства Merge и одновременно выполняющихся запросов ALTER столбцов соответствующих таблиц. [#5189](https://github.com/yandex/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Исправлена ошибка `Set for IN is not created yet in case of using single LowCardinality column in the left part of IN`. Эта ошибка возникала когда LowCardinality столбец была частью primary key. #5031 [#5154](https://github.com/yandex/ClickHouse/pull/5154) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) +* Исправление функции retention: только первое соответствующее условие добавлялось в состояние данных. Сейчас все условия которые удовлетворяют в строке данных добавляются в состояние. [#5119](https://github.com/yandex/ClickHouse/pull/5119) ([小路](https://github.com/nicelulu)) + + ## ClickHouse release 19.5.3.8, 2019-04-18 ### Исправления ошибок @@ -286,7 +336,7 @@ * Добавлена поддержка `Nullable` типов в табличной функции `mysql`. [#4198](https://github.com/yandex/ClickHouse/pull/4198) ([Emmanuel Donin de Rosière](https://github.com/edonin)) * Добавлена поддержка произвольных константных выражений в секции `LIMIT`. [#4246](https://github.com/yandex/ClickHouse/pull/4246) ([k3box](https://github.com/k3box)) * Добавлена агрегатная функция `topKWeighted` - вариант `topK`, позволяющий задавать (целый неотрицательный) вес добавляемого значения. [#4245](https://github.com/yandex/ClickHouse/pull/4245) ([Andrew Golman](https://github.com/andrewgolman)) -* Движок `Join` теперь поддерживает настройку `join_overwrite`, которая позволяет перезаписывать значения для существующих ключей. [#3973](https://github.com/yandex/ClickHouse/pull/3973) ([Amos Bird](https://github.com/amosbird)) +* Движок `Join` теперь поддерживает настройку `join_any_take_last_row`, которая позволяет перезаписывать значения для существующих ключей. [#3973](https://github.com/yandex/ClickHouse/pull/3973) ([Amos Bird](https://github.com/amosbird)) * Добавлена функция `toStartOfInterval`. [#4304](https://github.com/yandex/ClickHouse/pull/4304) ([Vitaly Baranov](https://github.com/vitlibar)) * Добавлена функция `toStartOfTenMinutes`. [#4298](https://github.com/yandex/ClickHouse/pull/4298) ([Vitaly Baranov](https://github.com/vitlibar)) * Добавлен формат `RowBinaryWithNamesAndTypes`. [#4200](https://github.com/yandex/ClickHouse/pull/4200) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)) diff --git a/CMakeLists.txt b/CMakeLists.txt index 79b3b1ddba3..19cf0388d98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,6 +93,11 @@ if (COMPILER_GCC OR COMPILER_CLANG) set (CXX_WARNING_FLAGS "${CXX_WARNING_FLAGS} -Wnon-virtual-dtor") endif () +if (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "8.3.0") + # Warnings in protobuf generating + set (CXX_WARNING_FLAGS "${CXX_WARNING_FLAGS} -Wno-array-bounds") +endif () + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # clang: warning: argument unused during compilation: '-stdlib=libc++' # clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument] @@ -226,7 +231,7 @@ if (OS_LINUX AND NOT UNBUNDLED AND (GLIBC_COMPATIBILITY OR USE_LIBCXX)) set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") # FIXME: glibc-compatibility may be non-static in some builds! - set (DEFAULT_LIBS "${DEFAULT_LIBS} libs/libglibc-compatibility/libglibc-compatibility${${CMAKE_POSTFIX_VARIABLE}}.a") + set (DEFAULT_LIBS "${DEFAULT_LIBS} ${ClickHouse_BINARY_DIR}/libs/libglibc-compatibility/libglibc-compatibility${${CMAKE_POSTFIX_VARIABLE}}.a") endif () # Add Libc. GLIBC is actually a collection of interdependent libraries. diff --git a/cmake/dbms_glob_sources.cmake b/cmake/dbms_glob_sources.cmake index 2d2200c2c55..5ea9fe88ca3 100644 --- a/cmake/dbms_glob_sources.cmake +++ b/cmake/dbms_glob_sources.cmake @@ -5,7 +5,7 @@ endmacro() macro(add_headers_and_sources prefix common_path) add_glob(${prefix}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) - add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.h) + add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h) endmacro() macro(add_headers_only prefix common_path) diff --git a/cmake/find_lfalloc.cmake b/cmake/find_lfalloc.cmake index 81b1827e44c..32cb1e7d5d5 100644 --- a/cmake/find_lfalloc.cmake +++ b/cmake/find_lfalloc.cmake @@ -1,4 +1,5 @@ -if (NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE AND NOT OS_FREEBSD AND NOT APPLE) +# TODO(danlark1). Disable LFAlloc for a while to fix mmap count problem +if (NOT OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE AND NOT OS_FREEBSD AND NOT APPLE) option (ENABLE_LFALLOC "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED}) endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 71c269ad2bc..8618d2b3f89 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,8 +1,8 @@ # Third-party libraries may have substandard code. if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation -Wno-stringop-overflow -Wno-implicit-function-declaration -Wno-return-type -Wno-array-bounds -Wno-bool-compare -Wno-int-conversion -Wno-switch") - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation -Wno-implicit-fallthrough -Wno-class-memaccess -Wno-sign-compare -std=c++1z") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation -Wno-stringop-overflow -Wno-implicit-function-declaration -Wno-return-type -Wno-array-bounds -Wno-bool-compare -Wno-int-conversion -Wno-switch -Wno-stringop-truncation") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-maybe-uninitialized -Wno-format -Wno-misleading-indentation -Wno-implicit-fallthrough -Wno-class-memaccess -Wno-sign-compare -Wno-array-bounds -Wno-missing-attributes -Wno-stringop-truncation -std=c++1z") elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-format -Wno-parentheses-equality -Wno-tautological-constant-compare -Wno-tautological-constant-out-of-range-compare -Wno-implicit-function-declaration -Wno-return-type -Wno-pointer-bool-conversion -Wno-enum-conversion -Wno-int-conversion -Wno-switch -Wno-string-plus-int") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-format -Wno-inconsistent-missing-override -std=c++1z") diff --git a/contrib/boost b/contrib/boost index 79bf85ea99c..8abda007bfe 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit 79bf85ea99c05ba4fb6959474d4464ab126f8973 +Subproject commit 8abda007bfe52d78a51548d4594879d6d82a22fa diff --git a/contrib/hyperscan b/contrib/hyperscan index 05b0f9064cc..ed17d34a7c7 160000 --- a/contrib/hyperscan +++ b/contrib/hyperscan @@ -1 +1 @@ -Subproject commit 05b0f9064cca4bd55548dedb0a32ed9461146c1e +Subproject commit ed17d34a7c786512471946f9105eaa8d925f34c3 diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 94b84884f41..55d59e254de 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -40,6 +40,10 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND SRCS ${JEMALLOC_SOURCE_DIR}/src/zone.c) endif() +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") +endif () + add_library(jemalloc STATIC ${SRCS}) target_include_directories(jemalloc PUBLIC diff --git a/contrib/simdjson b/contrib/simdjson index 14cd1f7a0b0..2151ad7f34c 160000 --- a/contrib/simdjson +++ b/contrib/simdjson @@ -1 +1 @@ -Subproject commit 14cd1f7a0b0563db78bda8053a9f6ac2ea95a441 +Subproject commit 2151ad7f34cf773a23f086e941d661f8a8873144 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index a14495dfc19..993b62801a9 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -49,19 +49,20 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif () if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - if (WEVERYTHING) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-std-move-in-c++11") endif () endif () if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra-semi-stmt -Wshadow-field -Wstring-plus-int -Wempty-init-stmt") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshadow-field -Wstring-plus-int") + if(NOT APPLE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra-semi-stmt -Wempty-init-stmt") + endif() endif () if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9) - if (WEVERYTHING) + if (WEVERYTHING AND NOT APPLE) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ctad-maybe-unsupported") endif () endif () diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 78a4230c9d9..a853ea81913 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,11 +1,11 @@ # This strings autochanged from release_lib.sh: -set(VERSION_REVISION 54420) +set(VERSION_REVISION 54421) set(VERSION_MAJOR 19) -set(VERSION_MINOR 8) +set(VERSION_MINOR 9) set(VERSION_PATCH 1) -set(VERSION_GITHASH a76e504f45ff4a74e8c492bd269f022352d5f6d9) -set(VERSION_DESCRIBE v19.8.1.1-testing) -set(VERSION_STRING 19.8.1.1) +set(VERSION_GITHASH 0c2aa460651a462f14efc7e995840a244531d373) +set(VERSION_DESCRIBE v19.9.1.1-testing) +set(VERSION_STRING 19.9.1.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/dbms/programs/benchmark/Benchmark.cpp b/dbms/programs/benchmark/Benchmark.cpp index 534ed415ca0..85206f1330a 100644 --- a/dbms/programs/benchmark/Benchmark.cpp +++ b/dbms/programs/benchmark/Benchmark.cpp @@ -325,8 +325,8 @@ private: double seconds = watch.elapsedSeconds(); std::lock_guard lock(mutex); - info_per_interval.add(seconds, progress.rows, progress.bytes, info.rows, info.bytes); - info_total.add(seconds, progress.rows, progress.bytes, info.rows, info.bytes); + info_per_interval.add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); + info_total.add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); } diff --git a/dbms/programs/clang/CMakeLists.txt b/dbms/programs/clang/CMakeLists.txt index 8bf9e3760e2..82f520614f4 100644 --- a/dbms/programs/clang/CMakeLists.txt +++ b/dbms/programs/clang/CMakeLists.txt @@ -26,8 +26,8 @@ elseif (EXISTS ${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE}) set (COPY_HEADERS_COMPILER "${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE}") endif () -if (COPY_HEADERS_COMPILER AND OS_LINUX) - add_custom_target (copy-headers [ -f ${TMP_HEADERS_DIR}/dbms/src/Interpreters/SpecializedAggregator.h ] || env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES copy_headers.sh) +if (COPY_HEADERS_COMPILER) + add_custom_target (copy-headers [ -f ${TMP_HEADERS_DIR}/dbms/src/Interpreters/SpecializedAggregator.h ] || env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} CMAKE_CXX_COMPILER_VERSION=${CMAKE_CXX_COMPILER_VERSION} ${CMAKE_CURRENT_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES copy_headers.sh) if (USE_INTERNAL_LLVM_LIBRARY) set (CLANG_HEADERS_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/clang/lib/Headers") diff --git a/dbms/programs/clang/copy_headers.sh b/dbms/programs/clang/copy_headers.sh index 2f7b2f9f5cf..45a58855c91 100755 --- a/dbms/programs/clang/copy_headers.sh +++ b/dbms/programs/clang/copy_headers.sh @@ -38,26 +38,28 @@ for header in $START_HEADERS; do START_HEADERS_INCLUDE+="-include $header " done -# Опция -mcx16 для того, чтобы выбиралось больше заголовочных файлов (с запасом). -# The latter options are the same that are added while building packages. -# TODO: Does not work on macos: GCC_ROOT=`$CLANG -v 2>&1 | grep "Selected GCC installation"| sed -n -e 's/^.*: //p'` -for src_file in $(echo | $CLANG -M -xc++ -std=c++1z -Wall -Werror -msse4 -mcx16 -mpopcnt -O3 -g -fPIC -fstack-protector -D_FORTIFY_SOURCE=2 \ +# TODO: Does not work on macos? +GCC_ROOT=${GCC_ROOT:=/usr/lib/clang/${CMAKE_CXX_COMPILER_VERSION}} + +# Опция -mcx16 для того, чтобы выбиралось больше заголовочных файлов (с запасом). +# The latter options are the same that are added while building packages. +for src_file in $(echo | $CLANG -M -xc++ -std=c++1z -Wall -Werror -msse2 -msse4 -mcx16 -mpopcnt -O3 -g -fPIC -fstack-protector -D_FORTIFY_SOURCE=2 \ -I $GCC_ROOT/include \ -I $GCC_ROOT/include-fixed \ $(cat "$BUILD_PATH/include_directories.txt") \ $START_HEADERS_INCLUDE \ - | tr -d '\\' | - sed --posix -E -e 's/^-\.o://'); + sed -E -e 's/^-\.o://'); do dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed --posix -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed --posix -E -e "s!^$DESTDIR!!") - dst_file=$(echo $dst_file | sed --posix -E -e 's/build\///') # for simplicity reasons, will put generated headers near the rest. - mkdir -p "$DST/$(echo $dst_file | sed --posix -E -e 's/\/[^/]*$/\//')"; + [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") + [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") + dst_file=$(echo $dst_file | sed -E -e 's/build\///') # for simplicity reasons, will put generated headers near the rest. + mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; cp "$src_file" "$DST/$dst_file"; done @@ -68,9 +70,9 @@ done for src_file in $(ls -1 $($CLANG -v -xc++ - <<<'' 2>&1 | grep '^ /' | grep 'include' | grep -E '/lib/clang/|/include/clang/')/*.h | grep -vE 'arm|altivec|Intrin'); do dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed --posix -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed --posix -E -e "s!^$DESTDIR!!") - mkdir -p "$DST/$(echo $dst_file | sed --posix -E -e 's/\/[^/]*$/\//')"; + [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") + [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") + mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; cp "$src_file" "$DST/$dst_file"; done @@ -79,9 +81,9 @@ if [ -d "$SOURCE_PATH/contrib/boost/libs/smart_ptr/include/boost/smart_ptr/detai for src_file in $(ls -1 $SOURCE_PATH/contrib/boost/libs/smart_ptr/include/boost/smart_ptr/detail/*); do dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed --posix -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed --posix -E -e "s!^$DESTDIR!!") - mkdir -p "$DST/$(echo $dst_file | sed --posix -E -e 's/\/[^/]*$/\//')"; + [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") + [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") + mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; cp "$src_file" "$DST/$dst_file"; done fi @@ -90,9 +92,9 @@ if [ -d "$SOURCE_PATH/contrib/boost/boost/smart_ptr/detail" ]; then for src_file in $(ls -1 $SOURCE_PATH/contrib/boost/boost/smart_ptr/detail/*); do dst_file=$src_file; - [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed --posix -E -e "s!^$BUILD_PATH!!") - [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed --posix -E -e "s!^$DESTDIR!!") - mkdir -p "$DST/$(echo $dst_file | sed --posix -E -e 's/\/[^/]*$/\//')"; + [ -n $BUILD_PATH ] && dst_file=$(echo $dst_file | sed -E -e "s!^$BUILD_PATH!!") + [ -n $DESTDIR ] && dst_file=$(echo $dst_file | sed -E -e "s!^$DESTDIR!!") + mkdir -p "$DST/$(echo $dst_file | sed -E -e 's/\/[^/]*$/\//')"; cp "$src_file" "$DST/$dst_file"; done fi diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index ce02d45d0e0..06b142227d7 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,6 +1,19 @@ -set(CLICKHOUSE_CLIENT_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Client.cpp) +set(CLICKHOUSE_CLIENT_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/Client.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ConnectionParameters.cpp +) + set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) -set(CLICKHOUSE_CLIENT_INCLUDE SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) +set(CLICKHOUSE_CLIENT_INCLUDE SYSTEM PRIVATE ${READLINE_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include) + +include(CheckSymbolExists) +check_symbol_exists(readpassphrase readpassphrase.h HAVE_READPASSPHRASE) +configure_file(config_client.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/config_client.h) + +if(NOT HAVE_READPASSPHRASE) + add_subdirectory(readpassphrase) + list(APPEND CLICKHOUSE_CLIENT_LINK PRIVATE readpassphrase) +endif() clickhouse_program_add(client) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 349f6690cbe..d8081d7c97c 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -435,7 +435,7 @@ private: #if USE_READLINE int res = read_history(history_file.c_str()); if (res) - throwFromErrno("Cannot read history from file " + history_file, ErrorCodes::CANNOT_READ_HISTORY); + std::cerr << "Cannot read history from file " + history_file + ": "+ errnoToString(ErrorCodes::CANNOT_READ_HISTORY); #endif } else /// Create history file. @@ -612,7 +612,7 @@ private: #if USE_READLINE && HAVE_READLINE_HISTORY if (!history_file.empty() && append_history(1, history_file.c_str())) - throwFromErrno("Cannot append history to file " + history_file, ErrorCodes::CANNOT_APPEND_HISTORY); + std::cerr << "Cannot append history to file " + history_file + ": " + errnoToString(ErrorCodes::CANNOT_APPEND_HISTORY); #endif prev_input = input; @@ -866,7 +866,7 @@ private: std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << watch.elapsedSeconds() << " sec. "; - if (progress.rows >= 1000) + if (progress.read_rows >= 1000) writeFinalProgress(); std::cout << std::endl << std::endl; @@ -1420,23 +1420,23 @@ private: << " Progress: "; message - << formatReadableQuantity(progress.rows) << " rows, " - << formatReadableSizeWithDecimalSuffix(progress.bytes); + << formatReadableQuantity(progress.read_rows) << " rows, " + << formatReadableSizeWithDecimalSuffix(progress.read_bytes); size_t elapsed_ns = watch.elapsed(); if (elapsed_ns) message << " (" - << formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., " - << formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) "; + << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " + << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.) "; else message << ". "; written_progress_chars = message.count() - prefix_size - (increment % 8 == 7 ? 10 : 13); /// Don't count invisible output (escape sequences). /// If the approximate number of rows to process is known, we can display a progress bar and percentage. - if (progress.total_rows > 0) + if (progress.total_rows_to_read > 0) { - size_t total_rows_corrected = std::max(progress.rows, progress.total_rows); + size_t total_rows_corrected = std::max(progress.read_rows, progress.total_rows_to_read); /// To avoid flicker, display progress bar only if .5 seconds have passed since query execution start /// and the query is less than halfway done. @@ -1444,7 +1444,7 @@ private: if (elapsed_ns > 500000000) { /// Trigger to start displaying progress bar. If query is mostly done, don't display it. - if (progress.rows * 2 < total_rows_corrected) + if (progress.read_rows * 2 < total_rows_corrected) show_progress_bar = true; if (show_progress_bar) @@ -1452,7 +1452,7 @@ private: ssize_t width_of_progress_bar = static_cast(terminal_size.ws_col) - written_progress_chars - strlen(" 99%"); if (width_of_progress_bar > 0) { - std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.rows, 0, total_rows_corrected, width_of_progress_bar)); + std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.read_rows, 0, total_rows_corrected, width_of_progress_bar)); message << "\033[0;32m" << bar << "\033[0m"; if (width_of_progress_bar > static_cast(bar.size() / UNICODE_BAR_CHAR_SIZE)) message << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' '); @@ -1461,7 +1461,7 @@ private: } /// Underestimate percentage a bit to avoid displaying 100%. - message << ' ' << (99 * progress.rows / total_rows_corrected) << '%'; + message << ' ' << (99 * progress.read_rows / total_rows_corrected) << '%'; } message << ENABLE_LINE_WRAPPING; @@ -1474,14 +1474,14 @@ private: void writeFinalProgress() { std::cout << "Processed " - << formatReadableQuantity(progress.rows) << " rows, " - << formatReadableSizeWithDecimalSuffix(progress.bytes); + << formatReadableQuantity(progress.read_rows) << " rows, " + << formatReadableSizeWithDecimalSuffix(progress.read_bytes); size_t elapsed_ns = watch.elapsed(); if (elapsed_ns) std::cout << " (" - << formatReadableQuantity(progress.rows * 1000000000.0 / elapsed_ns) << " rows/s., " - << formatReadableSizeWithDecimalSuffix(progress.bytes * 1000000000.0 / elapsed_ns) << "/s.) "; + << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., " + << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.) "; else std::cout << ". "; } diff --git a/dbms/programs/client/ConnectionParameters.cpp b/dbms/programs/client/ConnectionParameters.cpp new file mode 100644 index 00000000000..e1611af249d --- /dev/null +++ b/dbms/programs/client/ConnectionParameters.cpp @@ -0,0 +1,63 @@ +#include "ConnectionParameters.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config) +{ + bool is_secure = config.getBool("secure", false); + security = is_secure ? Protocol::Secure::Enable : Protocol::Secure::Disable; + + host = config.getString("host", "localhost"); + port = config.getInt( + "port", config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); + + default_database = config.getString("database", ""); + /// changed the default value to "default" to fix the issue when the user in the prompt is blank + user = config.getString("user", "default"); + bool password_prompt = false; + if (config.getBool("ask-password", false)) + { + if (config.has("password")) + throw Exception("Specified both --password and --ask-password. Remove one of them", ErrorCodes::BAD_ARGUMENTS); + password_prompt = true; + } + else + { + password = config.getString("password", ""); + /// if the value of --password is omitted, the password will be set implicitly to "\n" + if (password == "\n") + password_prompt = true; + } + if (password_prompt) + { + std::string prompt{"Password for user (" + user + "): "}; + char buf[1000] = {}; + if (auto result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) + password = result; + } + compression = config.getBool("compression", true) ? Protocol::Compression::Enable : Protocol::Compression::Disable; + + timeouts = ConnectionTimeouts( + Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); +} +} diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index d1c055129eb..834f08df9cb 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -1,90 +1,30 @@ #pragma once -#include - -#include +#include #include -#include -#include #include -#include -#include - -#include - +namespace Poco::Util +{ +class AbstractConfiguration; +} namespace DB { - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - struct ConnectionParameters { - String host; + std::string host; UInt16 port{}; - String default_database; - String user; - String password; + std::string default_database; + std::string user; + std::string password; Protocol::Secure security = Protocol::Secure::Disable; Protocol::Compression compression = Protocol::Compression::Enable; ConnectionTimeouts timeouts; ConnectionParameters() {} - ConnectionParameters(const Poco::Util::AbstractConfiguration & config) - { - bool is_secure = config.getBool("secure", false); - security = is_secure - ? Protocol::Secure::Enable - : Protocol::Secure::Disable; - - host = config.getString("host", "localhost"); - port = config.getInt("port", - config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", - is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); - - default_database = config.getString("database", ""); - /// changed the default value to "default" to fix the issue when the user in the prompt is blank - user = config.getString("user", "default"); - bool password_prompt = false; - if (config.getBool("ask-password", false)) - { - if (config.has("password")) - throw Exception("Specified both --password and --ask-password. Remove one of them", ErrorCodes::BAD_ARGUMENTS); - password_prompt = true; - } - else - { - password = config.getString("password", ""); - /// if the value of --password is omitted, the password will be set implicitly to "\n" - if (password == "\n") - password_prompt = true; - } - if (password_prompt) - { - std::cout << "Password for user (" << user << "): "; - setTerminalEcho(false); - - SCOPE_EXIT({ - setTerminalEcho(true); - }); - std::getline(std::cin, password); - std::cout << std::endl; - } - compression = config.getBool("compression", true) - ? Protocol::Compression::Enable - : Protocol::Compression::Disable; - - timeouts = ConnectionTimeouts( - Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); - } + ConnectionParameters(const Poco::Util::AbstractConfiguration & config); }; } diff --git a/dbms/programs/client/config_client.h.in b/dbms/programs/client/config_client.h.in new file mode 100644 index 00000000000..5ad788ff54c --- /dev/null +++ b/dbms/programs/client/config_client.h.in @@ -0,0 +1,3 @@ +#pragma once + +#cmakedefine HAVE_READPASSPHRASE diff --git a/dbms/programs/client/readpassphrase/CMakeLists.txt b/dbms/programs/client/readpassphrase/CMakeLists.txt new file mode 100644 index 00000000000..b7ff6db31ee --- /dev/null +++ b/dbms/programs/client/readpassphrase/CMakeLists.txt @@ -0,0 +1,10 @@ + +# wget https://raw.githubusercontent.com/openssh/openssh-portable/master/openbsd-compat/readpassphrase.c +# wget https://raw.githubusercontent.com/openssh/openssh-portable/master/openbsd-compat/readpassphrase.h + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-result -Wno-reserved-id-macro") + +configure_file(includes.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/includes.h) +add_library(readpassphrase ${CMAKE_CURRENT_SOURCE_DIR}/readpassphrase.c) +# . to allow #include +target_include_directories(readpassphrase PUBLIC . ${CMAKE_CURRENT_BINARY_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/../include) diff --git a/dbms/programs/client/readpassphrase/includes.h.in b/dbms/programs/client/readpassphrase/includes.h.in new file mode 100644 index 00000000000..44580d1ed95 --- /dev/null +++ b/dbms/programs/client/readpassphrase/includes.h.in @@ -0,0 +1,9 @@ +#pragma once + +#cmakedefine HAVE_READPASSPHRASE + +#if !defined(HAVE_READPASSPHRASE) +# ifndef _PATH_TTY +# define _PATH_TTY "/dev/tty" +# endif +#endif diff --git a/dbms/programs/client/readpassphrase/readpassphrase.c b/dbms/programs/client/readpassphrase/readpassphrase.c new file mode 100644 index 00000000000..be12c923db2 --- /dev/null +++ b/dbms/programs/client/readpassphrase/readpassphrase.c @@ -0,0 +1,211 @@ +/* $OpenBSD: readpassphrase.c,v 1.26 2016/10/18 12:47:18 millert Exp $ */ + +/* + * Copyright (c) 2000-2002, 2007, 2010 + * Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +/* OPENBSD ORIGINAL: lib/libc/gen/readpassphrase.c */ + +#include "includes.h" + +#ifndef HAVE_READPASSPHRASE + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef TCSASOFT +/* If we don't have TCSASOFT define it so that ORing it it below is a no-op. */ +# define TCSASOFT 0 +#endif + +/* SunOS 4.x which lacks _POSIX_VDISABLE, but has VDISABLE */ +#if !defined(_POSIX_VDISABLE) && defined(VDISABLE) +# define _POSIX_VDISABLE VDISABLE +#endif + +static volatile sig_atomic_t signo[_NSIG]; + +static void handler(int); + +char * +readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags) +{ + ssize_t nr; + int input, output, save_errno, i, need_restart; + char ch, *p, *end; + struct termios term, oterm; + struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm; + struct sigaction savetstp, savettin, savettou, savepipe; + + /* I suppose we could alloc on demand in this case (XXX). */ + if (bufsiz == 0) { + errno = EINVAL; + return(NULL); + } + +restart: + for (i = 0; i < _NSIG; i++) + signo[i] = 0; + nr = -1; + save_errno = 0; + need_restart = 0; + /* + * Read and write to /dev/tty if available. If not, read from + * stdin and write to stderr unless a tty is required. + */ + if ((flags & RPP_STDIN) || + (input = output = open(_PATH_TTY, O_RDWR)) == -1) { + if (flags & RPP_REQUIRE_TTY) { + errno = ENOTTY; + return(NULL); + } + input = STDIN_FILENO; + output = STDERR_FILENO; + } + + /* + * Turn off echo if possible. + * If we are using a tty but are not the foreground pgrp this will + * generate SIGTTOU, so do it *before* installing the signal handlers. + */ + if (input != STDIN_FILENO && tcgetattr(input, &oterm) == 0) { + memcpy(&term, &oterm, sizeof(term)); + if (!(flags & RPP_ECHO_ON)) + term.c_lflag &= ~(ECHO | ECHONL); +#ifdef VSTATUS + if (term.c_cc[VSTATUS] != _POSIX_VDISABLE) + term.c_cc[VSTATUS] = _POSIX_VDISABLE; +#endif + (void)tcsetattr(input, TCSAFLUSH|TCSASOFT, &term); + } else { + memset(&term, 0, sizeof(term)); + term.c_lflag |= ECHO; + memset(&oterm, 0, sizeof(oterm)); + oterm.c_lflag |= ECHO; + } + + /* + * Catch signals that would otherwise cause the user to end + * up with echo turned off in the shell. Don't worry about + * things like SIGXCPU and SIGVTALRM for now. + */ + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; /* don't restart system calls */ + sa.sa_handler = handler; + (void)sigaction(SIGALRM, &sa, &savealrm); + (void)sigaction(SIGHUP, &sa, &savehup); + (void)sigaction(SIGINT, &sa, &saveint); + (void)sigaction(SIGPIPE, &sa, &savepipe); + (void)sigaction(SIGQUIT, &sa, &savequit); + (void)sigaction(SIGTERM, &sa, &saveterm); + (void)sigaction(SIGTSTP, &sa, &savetstp); + (void)sigaction(SIGTTIN, &sa, &savettin); + (void)sigaction(SIGTTOU, &sa, &savettou); + + if (!(flags & RPP_STDIN)) + (void)write(output, prompt, strlen(prompt)); + end = buf + bufsiz - 1; + p = buf; + while ((nr = read(input, &ch, 1)) == 1 && ch != '\n' && ch != '\r') { + if (p < end) { + if ((flags & RPP_SEVENBIT)) + ch &= 0x7f; + if (isalpha((unsigned char)ch)) { + if ((flags & RPP_FORCELOWER)) + ch = (char)tolower((unsigned char)ch); + if ((flags & RPP_FORCEUPPER)) + ch = (char)toupper((unsigned char)ch); + } + *p++ = ch; + } + } + *p = '\0'; + save_errno = errno; + if (!(term.c_lflag & ECHO)) + (void)write(output, "\n", 1); + + /* Restore old terminal settings and signals. */ + if (memcmp(&term, &oterm, sizeof(term)) != 0) { + const int sigttou = signo[SIGTTOU]; + + /* Ignore SIGTTOU generated when we are not the fg pgrp. */ + while (tcsetattr(input, TCSAFLUSH|TCSASOFT, &oterm) == -1 && + errno == EINTR && !signo[SIGTTOU]) + continue; + signo[SIGTTOU] = sigttou; + } + (void)sigaction(SIGALRM, &savealrm, NULL); + (void)sigaction(SIGHUP, &savehup, NULL); + (void)sigaction(SIGINT, &saveint, NULL); + (void)sigaction(SIGQUIT, &savequit, NULL); + (void)sigaction(SIGPIPE, &savepipe, NULL); + (void)sigaction(SIGTERM, &saveterm, NULL); + (void)sigaction(SIGTSTP, &savetstp, NULL); + (void)sigaction(SIGTTIN, &savettin, NULL); + (void)sigaction(SIGTTOU, &savettou, NULL); + if (input != STDIN_FILENO) + (void)close(input); + + /* + * If we were interrupted by a signal, resend it to ourselves + * now that we have restored the signal handlers. + */ + for (i = 0; i < _NSIG; i++) { + if (signo[i]) { + kill(getpid(), i); + switch (i) { + case SIGTSTP: + case SIGTTIN: + case SIGTTOU: + need_restart = 1; + } + } + } + if (need_restart) + goto restart; + + if (save_errno) + errno = save_errno; + return(nr == -1 ? NULL : buf); +} +//DEF_WEAK(readpassphrase); + +#if 0 +char * +getpass(const char *prompt) +{ + static char buf[_PASSWORD_LEN + 1]; + + return(readpassphrase(prompt, buf, sizeof(buf), RPP_ECHO_OFF)); +} +#endif + +static void handler(int s) +{ + + signo[s] = 1; +} +#endif /* HAVE_READPASSPHRASE */ diff --git a/dbms/programs/client/readpassphrase/readpassphrase.h b/dbms/programs/client/readpassphrase/readpassphrase.h new file mode 100644 index 00000000000..d504cff5f00 --- /dev/null +++ b/dbms/programs/client/readpassphrase/readpassphrase.h @@ -0,0 +1,56 @@ +// /* $OpenBSD: readpassphrase.h,v 1.5 2003/06/17 21:56:23 millert Exp $ */ + +/* + * Copyright (c) 2000, 2002 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +/* OPENBSD ORIGINAL: include/readpassphrase.h */ + +#pragma once +// #ifndef _READPASSPHRASE_H_ +// #define _READPASSPHRASE_H_ + +//#include "includes.h" +#include "config_client.h" + +#ifndef HAVE_READPASSPHRASE + +# ifdef __cplusplus +extern "C" { +# endif + + +# define RPP_ECHO_OFF 0x00 /* Turn off echo (default). */ +# define RPP_ECHO_ON 0x01 /* Leave echo on. */ +# define RPP_REQUIRE_TTY 0x02 /* Fail if there is no tty. */ +# define RPP_FORCELOWER 0x04 /* Force input to lower case. */ +# define RPP_FORCEUPPER 0x08 /* Force input to upper case. */ +# define RPP_SEVENBIT 0x10 /* Strip the high bit from input. */ +# define RPP_STDIN 0x20 /* Read from stdin, not /dev/tty */ + +char * readpassphrase(const char *, char *, size_t, int); + +# ifdef __cplusplus +} +# endif + + +#endif /* HAVE_READPASSPHRASE */ + +// #endif /* !_READPASSPHRASE_H_ */ diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 5d7a342799f..56ac11284e1 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,6 +1,7 @@ #include "PerformanceTest.h" #include +#include #include #include #include @@ -71,6 +72,7 @@ bool PerformanceTest::checkPreconditions() const Strings preconditions; config->keys("preconditions", preconditions); size_t table_precondition_index = 0; + size_t cpu_precondition_index = 0; for (const std::string & precondition : preconditions) { @@ -136,6 +138,30 @@ bool PerformanceTest::checkPreconditions() const return false; } } + + if (precondition == "cpu") + { + std::string precondition_key = "preconditions.cpu[" + std::to_string(cpu_precondition_index++) + "]"; + std::string flag_to_check = config->getString(precondition_key); + + #define CHECK_CPU_PRECONDITION(OP) \ + if (flag_to_check == #OP) \ + { \ + if (!Cpu::CpuFlagsCache::have_##OP) \ + { \ + LOG_WARNING(log, "CPU doesn't support " << #OP); \ + return false; \ + } \ + } else + + CPU_ID_ENUMERATE(CHECK_CPU_PRECONDITION) + { + LOG_WARNING(log, "CPU doesn't support " << flag_to_check); + return false; + } + + #undef CHECK_CPU_PRECONDITION + } } return true; @@ -159,17 +185,9 @@ UInt64 PerformanceTest::calculateMaxExecTime() const void PerformanceTest::prepare() const { - for (const auto & query : test_info.create_queries) + for (const auto & query : test_info.create_and_fill_queries) { - LOG_INFO(log, "Executing create query \"" << query << '\"'); - connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); - waitQuery(connection); - LOG_INFO(log, "Query finished"); - } - - for (const auto & query : test_info.fill_queries) - { - LOG_INFO(log, "Executing fill query \"" << query << '\"'); + LOG_INFO(log, "Executing create or fill query \"" << query << '\"'); connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); waitQuery(connection); LOG_INFO(log, "Query finished"); diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h index 66f758231bc..6368c1f1040 100644 --- a/dbms/programs/performance-test/PerformanceTest.h +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -30,11 +30,6 @@ public: std::vector execute(); void finish() const; - const PerformanceTestInfo & getTestInfo() const - { - return test_info; - } - bool checkSIGINT() const { return got_SIGINT; diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp index 4b114823c6d..509532f418b 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.cpp +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -60,10 +60,10 @@ PerformanceTestInfo::PerformanceTestInfo( applySettings(config); extractQueries(config); + extractAuxiliaryQueries(config); processSubstitutions(config); getExecutionType(config); getStopConditions(config); - extractAuxiliaryQueries(config); } void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) @@ -153,13 +153,29 @@ void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config) ConfigurationPtr substitutions_view(config->createView("substitutions")); constructSubstitutions(substitutions_view, substitutions); - auto queries_pre_format = queries; + auto create_and_fill_queries_preformat = create_and_fill_queries; + create_and_fill_queries.clear(); + for (const auto & query : create_and_fill_queries_preformat) + { + auto formatted = formatQueries(query, substitutions); + create_and_fill_queries.insert(create_and_fill_queries.end(), formatted.begin(), formatted.end()); + } + + auto queries_preformat = queries; queries.clear(); - for (const auto & query : queries_pre_format) + for (const auto & query : queries_preformat) { auto formatted = formatQueries(query, substitutions); queries.insert(queries.end(), formatted.begin(), formatted.end()); } + + auto drop_queries_preformat = drop_queries; + drop_queries.clear(); + for (const auto & query : drop_queries_preformat) + { + auto formatted = formatQueries(query, substitutions); + drop_queries.insert(drop_queries.end(), formatted.begin(), formatted.end()); + } } } @@ -203,13 +219,20 @@ void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) void PerformanceTestInfo::extractAuxiliaryQueries(XMLConfigurationPtr config) { if (config->has("create_query")) - create_queries = getMultipleValuesFromConfig(*config, "", "create_query"); + { + create_and_fill_queries = getMultipleValuesFromConfig(*config, "", "create_query"); + } if (config->has("fill_query")) - fill_queries = getMultipleValuesFromConfig(*config, "", "fill_query"); + { + auto fill_queries = getMultipleValuesFromConfig(*config, "", "fill_query"); + create_and_fill_queries.insert(create_and_fill_queries.end(), fill_queries.begin(), fill_queries.end()); + } if (config->has("drop_query")) + { drop_queries = getMultipleValuesFromConfig(*config, "", "drop_query"); + } } } diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h index 453bf0d30be..4483e56bbfe 100644 --- a/dbms/programs/performance-test/PerformanceTestInfo.h +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -42,8 +42,7 @@ public: std::vector stop_conditions_by_run; - Strings create_queries; - Strings fill_queries; + Strings create_and_fill_queries; Strings drop_queries; private: @@ -52,7 +51,6 @@ private: void processSubstitutions(XMLConfigurationPtr config); void getExecutionType(XMLConfigurationPtr config); void getStopConditions(XMLConfigurationPtr config); - void getMetrics(XMLConfigurationPtr config); void extractAuxiliaryQueries(XMLConfigurationPtr config); }; diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index 523e6787364..ef0ee715c49 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -202,8 +202,7 @@ private: LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled"); LOG_INFO( log, - "Preparing for run, have " << info.create_queries.size() << " create queries and " << info.fill_queries.size() - << " fill queries"); + "Preparing for run, have " << info.create_and_fill_queries.size() << " create and fill queries"); current.prepare(); LOG_INFO(log, "Prepared"); LOG_INFO(log, "Running test '" << info.test_name << "'"); diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp index 58f56d0ba10..f12808eac36 100644 --- a/dbms/programs/performance-test/executeQuery.cpp +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -13,7 +13,7 @@ void checkFulfilledConditionsAndUpdate( TestStats & statistics, TestStopConditions & stop_conditions, InterruptListener & interrupt_listener) { - statistics.add(progress.rows, progress.bytes); + statistics.add(progress.read_rows, progress.read_bytes); stop_conditions.reportRowsRead(statistics.total_rows_read); stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); diff --git a/dbms/programs/server/CMakeLists.txt b/dbms/programs/server/CMakeLists.txt index 5cb08018065..fe260c1192d 100644 --- a/dbms/programs/server/CMakeLists.txt +++ b/dbms/programs/server/CMakeLists.txt @@ -8,6 +8,8 @@ set(CLICKHOUSE_SERVER_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/RootRequestHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/Server.cpp ${CMAKE_CURRENT_SOURCE_DIR}/TCPHandler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandlerFactory.cpp ) set(CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_dictionaries clickhouse_common_io PUBLIC daemon PRIVATE clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${Poco_Net_LIBRARY}) diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index ee8a50662c9..82f2fa3ccc9 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -453,30 +453,6 @@ void HTTPHandler::processQuery( return false; }; - /// Used in case of POST request with form-data, but it isn't expected to be deleted after that scope. - std::string full_query; - - /// Support for "external data for query processing". - if (startsWith(request.getContentType().data(), "multipart/form-data")) - { - ExternalTablesHandler handler(context, params); - params.load(request, istr, handler); - - /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. - reserved_param_suffixes.emplace_back("_format"); - reserved_param_suffixes.emplace_back("_types"); - reserved_param_suffixes.emplace_back("_structure"); - - /// Params are of both form params POST and uri (GET params) - for (const auto & it : params) - if (it.first == "query") - full_query += it.second; - - in = std::make_unique(full_query); - } - else - in = std::make_unique(*in_param, *in_post_maybe_compressed); - /// Settings can be overridden in the query. /// Some parameters (database, default_format, everything used in the code above) do not /// belong to the Settings class. @@ -497,30 +473,63 @@ void HTTPHandler::processQuery( settings.readonly = 2; } - SettingsChanges settings_changes; - for (auto it = params.begin(); it != params.end(); ++it) + bool isExternalData = startsWith(request.getContentType().data(), "multipart/form-data"); + + if (isExternalData) { - if (it->first == "database") + /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters. + reserved_param_suffixes.reserve(3); + /// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings. + reserved_param_suffixes.emplace_back("_format"); + reserved_param_suffixes.emplace_back("_types"); + reserved_param_suffixes.emplace_back("_structure"); + } + + SettingsChanges settings_changes; + for (const auto & [key, value] : params) + { + if (key == "database") { - context.setCurrentDatabase(it->second); + context.setCurrentDatabase(value); } - else if (it->first == "default_format") + else if (key == "default_format") { - context.setDefaultFormat(it->second); + context.setDefaultFormat(value); } - else if (param_could_be_skipped(it->first)) + else if (param_could_be_skipped(key)) { } else { /// All other query parameters are treated as settings. - settings_changes.push_back({it->first, it->second}); + settings_changes.push_back({key, value}); } } + /// For external data we also want settings context.checkSettingsConstraints(settings_changes); context.applySettingsChanges(settings_changes); + /// Used in case of POST request with form-data, but it isn't expected to be deleted after that scope. + std::string full_query; + + /// Support for "external data for query processing". + if (isExternalData) + { + ExternalTablesHandler handler(context, params); + params.load(request, istr, handler); + + /// Params are of both form params POST and uri (GET params) + for (const auto & it : params) + if (it.first == "query") + full_query += it.second; + + in = std::make_unique(full_query); + } + else + in = std::make_unique(*in_param, *in_post_maybe_compressed); + + /// HTTP response compression is turned on only if the client signalled that they support it /// (using Accept-Encoding header) and 'enable_http_compression' setting is turned on. used_output.out->setCompression(client_supports_http_compression && settings.enable_http_compression); diff --git a/dbms/programs/server/InterserverIOHTTPHandler.cpp b/dbms/programs/server/InterserverIOHTTPHandler.cpp index 94095365b6a..27e4c7041c4 100644 --- a/dbms/programs/server/InterserverIOHTTPHandler.cpp +++ b/dbms/programs/server/InterserverIOHTTPHandler.cpp @@ -1,17 +1,16 @@ +#include "InterserverIOHTTPHandler.h" + #include #include #include - #include - #include #include #include #include #include #include - -#include "InterserverIOHTTPHandler.h" +#include "IServer.h" namespace DB { @@ -50,7 +49,7 @@ std::pair InterserverIOHTTPHandler::checkAuthentication(Poco::Net: return {"", true}; } -void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output) { HTMLForm params(request); @@ -61,24 +60,17 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque ReadBufferFromIStream body(request.stream()); - const auto & config = server.config(); - unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); - - WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); - auto endpoint = server.context().getInterserverIOHandler().getEndpoint(endpoint_name); if (compress) { - CompressedWriteBuffer compressed_out(out); + CompressedWriteBuffer compressed_out(*used_output.out.get()); endpoint->processQuery(params, body, compressed_out, response); } else { - endpoint->processQuery(params, body, out, response); + endpoint->processQuery(params, body, *used_output.out.get(), response); } - - out.finalize(); } @@ -90,30 +82,30 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ if (request.getVersion() == Poco::Net::HTTPServerRequest::HTTP_1_1) response.setChunkedTransferEncoding(true); + Output used_output; + const auto & config = server.config(); + unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", 10); + used_output.out = std::make_shared(request, response, keep_alive_timeout); + try { - if (auto [msg, success] = checkAuthentication(request); success) + if (auto [message, success] = checkAuthentication(request); success) { - processQuery(request, response); + processQuery(request, response, used_output); LOG_INFO(log, "Done processing query"); } else { response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED); if (!response.sent()) - response.send() << msg << std::endl; + writeString(message, *used_output.out); LOG_WARNING(log, "Query processing failed request: '" << request.getURI() << "' authentification failed"); } } catch (Exception & e) { - if (e.code() == ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES) - { - if (!response.sent()) - response.send(); return; - } response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); @@ -122,7 +114,7 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ std::string message = getCurrentExceptionMessage(is_real_error); if (!response.sent()) - response.send() << message << std::endl; + writeString(message, *used_output.out); if (is_real_error) LOG_ERROR(log, message); @@ -134,7 +126,8 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); std::string message = getCurrentExceptionMessage(false); if (!response.sent()) - response.send() << message << std::endl; + writeString(message, *used_output.out); + LOG_ERROR(log, message); } } diff --git a/dbms/programs/server/InterserverIOHTTPHandler.h b/dbms/programs/server/InterserverIOHTTPHandler.h index fbaf432d4f9..8dc1962664c 100644 --- a/dbms/programs/server/InterserverIOHTTPHandler.h +++ b/dbms/programs/server/InterserverIOHTTPHandler.h @@ -1,12 +1,10 @@ #pragma once +#include #include #include - #include -#include "IServer.h" - namespace CurrentMetrics { @@ -16,6 +14,9 @@ namespace CurrentMetrics namespace DB { +class IServer; +class WriteBufferFromHTTPServerResponse; + class InterserverIOHTTPHandler : public Poco::Net::HTTPRequestHandler { public: @@ -28,12 +29,17 @@ public: void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; private: + struct Output + { + std::shared_ptr out; + }; + IServer & server; Poco::Logger * log; CurrentMetrics::Increment metric_increment{CurrentMetrics::InterserverConnection}; - void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response); + void processQuery(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response, Output & used_output); std::pair checkAuthentication(Poco::Net::HTTPServerRequest & request) const; }; diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp new file mode 100644 index 00000000000..0e832f619fc --- /dev/null +++ b/dbms/programs/server/MySQLHandler.cpp @@ -0,0 +1,370 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "MySQLHandler.h" +#include +#include + + +namespace DB +{ +using namespace MySQLProtocol; +using Poco::Net::SecureStreamSocket; +using Poco::Net::SSLManager; + +namespace ErrorCodes +{ + extern const int MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES; + extern const int OPENSSL_ERROR; +} + +MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, RSA & public_key, RSA & private_key, bool ssl_enabled, size_t connection_id) + : Poco::Net::TCPServerConnection(socket_) + , server(server_) + , log(&Poco::Logger::get("MySQLHandler")) + , connection_context(server.context()) + , connection_id(connection_id) + , public_key(public_key) + , private_key(private_key) +{ + server_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; + if (ssl_enabled) + server_capability_flags |= CLIENT_SSL; +} + +void MySQLHandler::run() +{ + connection_context = server.context(); + connection_context.setDefaultFormat("MySQLWire"); + + in = std::make_shared(socket()); + out = std::make_shared(socket()); + packet_sender = std::make_shared(*in, *out, connection_context.sequence_id); + + try + { + String scramble = generateScramble(); + + /** Native authentication sent 20 bytes + '\0' character = 21 bytes. + * This plugin must do the same to stay consistent with historical behavior if it is set to operate as a default plugin. + * https://github.com/mysql/mysql-server/blob/8.0/sql/auth/sql_authentication.cc#L3994 + */ + Handshake handshake(server_capability_flags, connection_id, VERSION_STRING + String("-") + VERSION_NAME, scramble + '\0'); + packet_sender->sendPacket(handshake, true); + + LOG_TRACE(log, "Sent handshake"); + + HandshakeResponse handshake_response = finishHandshake(); + connection_context.client_capabilities = handshake_response.capability_flags; + if (handshake_response.max_packet_size) + connection_context.max_packet_size = handshake_response.max_packet_size; + if (!connection_context.max_packet_size) + connection_context.max_packet_size = MAX_PACKET_LENGTH; + + LOG_DEBUG(log, "Capabilities: " << handshake_response.capability_flags + << "\nmax_packet_size: " + << handshake_response.max_packet_size + << "\ncharacter_set: " + << handshake_response.character_set + << "\nuser: " + << handshake_response.username + << "\nauth_response length: " + << handshake_response.auth_response.length() + << "\nauth_response: " + << handshake_response.auth_response + << "\ndatabase: " + << handshake_response.database + << "\nauth_plugin_name: " + << handshake_response.auth_plugin_name); + + client_capability_flags = handshake_response.capability_flags; + if (!(client_capability_flags & CLIENT_PROTOCOL_41)) + throw Exception("Required capability: CLIENT_PROTOCOL_41.", ErrorCodes::MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES); + if (!(client_capability_flags & CLIENT_PLUGIN_AUTH)) + throw Exception("Required capability: CLIENT_PLUGIN_AUTH.", ErrorCodes::MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES); + + authenticate(handshake_response, scramble); + OK_Packet ok_packet(0, handshake_response.capability_flags, 0, 0, 0); + packet_sender->sendPacket(ok_packet, true); + + while (true) + { + packet_sender->resetSequenceId(); + String payload = packet_sender->receivePacketPayload(); + int command = payload[0]; + LOG_DEBUG(log, "Received command: " << std::to_string(command) << ". Connection id: " << connection_id << "."); + try + { + switch (command) + { + case COM_QUIT: + return; + case COM_INIT_DB: + comInitDB(payload); + break; + case COM_QUERY: + comQuery(payload); + break; + case COM_FIELD_LIST: + comFieldList(payload); + break; + case COM_PING: + comPing(); + break; + default: + throw Exception(Poco::format("Command %d is not implemented.", command), ErrorCodes::NOT_IMPLEMENTED); + } + } + catch (const NetException & exc) + { + log->log(exc); + throw; + } + catch (const Exception & exc) + { + log->log(exc); + packet_sender->sendPacket(ERR_Packet(exc.code(), "00000", exc.message()), true); + } + } + } + catch (Poco::Exception & exc) + { + log->log(exc); + } +} + +/** Reads 3 bytes, finds out whether it is SSLRequest or HandshakeResponse packet, starts secure connection, if it is SSLRequest. + * Reading is performed from socket instead of ReadBuffer to prevent reading part of SSL handshake. + * If we read it from socket, it will be impossible to start SSL connection using Poco. Size of SSLRequest packet payload is 32 bytes, thus we can read at most 36 bytes. + */ +MySQLProtocol::HandshakeResponse MySQLHandler::finishHandshake() +{ + HandshakeResponse packet; + size_t packet_size = PACKET_HEADER_SIZE + SSL_REQUEST_PAYLOAD_SIZE; + + /// Buffer for SSLRequest or part of HandshakeResponse. + char buf[packet_size]; + size_t pos = 0; + + /// Reads at least count and at most packet_size bytes. + auto read_bytes = [this, &buf, &pos, &packet_size](size_t count) -> void { + while (pos < count) + { + int ret = socket().receiveBytes(buf + pos, packet_size - pos); + if (ret == 0) + { + throw Exception("Cannot read all data. Bytes read: " + std::to_string(pos) + ". Bytes expected: 3.", ErrorCodes::CANNOT_READ_ALL_DATA); + } + pos += ret; + } + }; + read_bytes(3); /// We can find out whether it is SSLRequest of HandshakeResponse by first 3 bytes. + + size_t payload_size = unalignedLoad(buf) & 0xFFFFFFu; + LOG_TRACE(log, "payload size: " << payload_size); + + if (payload_size == SSL_REQUEST_PAYLOAD_SIZE) + { + read_bytes(packet_size); /// Reading rest SSLRequest. + SSLRequest ssl_request; + ssl_request.readPayload(String(buf + PACKET_HEADER_SIZE, pos - PACKET_HEADER_SIZE)); + connection_context.client_capabilities = ssl_request.capability_flags; + connection_context.max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH; + secure_connection = true; + ss = std::make_shared(SecureStreamSocket::attach(socket(), SSLManager::instance().defaultServerContext())); + in = std::make_shared(*ss); + out = std::make_shared(*ss); + connection_context.sequence_id = 2; + packet_sender = std::make_shared(*in, *out, connection_context.sequence_id); + packet_sender->max_packet_size = connection_context.max_packet_size; + packet_sender->receivePacket(packet); /// Reading HandshakeResponse from secure socket. + } + else + { + /// Reading rest of HandshakeResponse. + packet_size = PACKET_HEADER_SIZE + payload_size; + WriteBufferFromOwnString buf_for_handshake_response; + buf_for_handshake_response.write(buf, pos); + copyData(*packet_sender->in, buf_for_handshake_response, packet_size - pos); + packet.readPayload(buf_for_handshake_response.str().substr(PACKET_HEADER_SIZE)); + packet_sender->sequence_id++; + } + return packet; +} + +String MySQLHandler::generateScramble() +{ + String scramble(MySQLProtocol::SCRAMBLE_LENGTH, 0); + Poco::RandomInputStream generator; + for (size_t i = 0; i < scramble.size(); i++) + { + generator >> scramble[i]; + } + return scramble; +} + +void MySQLHandler::authenticate(const HandshakeResponse & handshake_response, const String & scramble) +{ + + String auth_response; + AuthSwitchResponse response; + if (handshake_response.auth_plugin_name != Authentication::SHA256) + { + packet_sender->sendPacket(AuthSwitchRequest(Authentication::SHA256, scramble + '\0'), true); + if (in->eof()) + throw Exception( + "Client doesn't support authentication method " + String(Authentication::SHA256) + " used by ClickHouse", + ErrorCodes::MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES); + packet_sender->receivePacket(response); + auth_response = response.value; + LOG_TRACE(log, "Authentication method mismatch."); + } + else + { + auth_response = handshake_response.auth_response; + LOG_TRACE(log, "Authentication method match."); + } + + if (auth_response == "\1") + { + LOG_TRACE(log, "Client requests public key."); + + BIO * mem = BIO_new(BIO_s_mem()); + SCOPE_EXIT(BIO_free(mem)); + if (PEM_write_bio_RSA_PUBKEY(mem, &public_key) != 1) + { + throw Exception("Failed to write public key to memory. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + } + char * pem_buf = nullptr; + long pem_size = BIO_get_mem_data(mem, &pem_buf); + String pem(pem_buf, pem_size); + + LOG_TRACE(log, "Key: " << pem); + + AuthMoreData data(pem); + packet_sender->sendPacket(data, true); + packet_sender->receivePacket(response); + auth_response = response.value; + } + else + { + LOG_TRACE(log, "Client didn't request public key."); + } + + String password; + + /** Decrypt password, if it's not empty. + * The original intention was that the password is a string[NUL] but this never got enforced properly so now we have to accept that + * an empty packet is a blank password, thus the check for auth_response.empty() has to be made too. + * https://github.com/mysql/mysql-server/blob/8.0/sql/auth/sql_authentication.cc#L4017 + */ + if (!secure_connection && !auth_response.empty() && auth_response != String("\0", 1)) + { + LOG_TRACE(log, "Received nonempty password"); + auto ciphertext = reinterpret_cast(auth_response.data()); + + unsigned char plaintext[RSA_size(&private_key)]; + int plaintext_size = RSA_private_decrypt(auth_response.size(), ciphertext, plaintext, &private_key, RSA_PKCS1_OAEP_PADDING); + if (plaintext_size == -1) + { + throw Exception("Failed to decrypt auth data. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + } + + password.resize(plaintext_size); + for (int i = 0; i < plaintext_size; i++) + { + password[i] = plaintext[i] ^ static_cast(scramble[i % scramble.size()]); + } + } + else if (secure_connection) + { + password = auth_response; + } + else + { + LOG_TRACE(log, "Received empty password"); + } + + if (!password.empty()) + { + password.pop_back(); /// terminating null byte + } + + try + { + connection_context.setUser(handshake_response.username, password, socket().address(), ""); + connection_context.setCurrentDatabase(handshake_response.database); + connection_context.setCurrentQueryId(""); + LOG_ERROR(log, "Authentication for user " << handshake_response.username << " succeeded."); + } + catch (const Exception & exc) + { + LOG_ERROR(log, "Authentication for user " << handshake_response.username << " failed."); + packet_sender->sendPacket(ERR_Packet(exc.code(), "00000", exc.message()), true); + throw; + } +} + +void MySQLHandler::comInitDB(const String & payload) +{ + String database = payload.substr(1); + LOG_DEBUG(log, "Setting current database to " << database); + connection_context.setCurrentDatabase(database); + packet_sender->sendPacket(OK_Packet(0, client_capability_flags, 0, 0, 1), true); +} + +void MySQLHandler::comFieldList(const String & payload) +{ + ComFieldList packet; + packet.readPayload(payload); + String database = connection_context.getCurrentDatabase(); + StoragePtr tablePtr = connection_context.getTable(database, packet.table); + for (const NameAndTypePair & column: tablePtr->getColumns().getAll()) + { + ColumnDefinition column_definition( + database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0 + ); + packet_sender->sendPacket(column_definition); + } + packet_sender->sendPacket(OK_Packet(0xfe, client_capability_flags, 0, 0, 0), true); +} + +void MySQLHandler::comPing() +{ + packet_sender->sendPacket(OK_Packet(0x0, client_capability_flags, 0, 0, 0), true); +} + +void MySQLHandler::comQuery(const String & payload) +{ + bool with_output = false; + std::function set_content_type = [&with_output](const String &) -> void { + with_output = true; + }; + + String query = payload.substr(1); + + // Translate query from MySQL to ClickHouse. + // This is a temporary workaround until ClickHouse supports the syntax "@@var_name". + if (query == "select @@version_comment limit 1") // MariaDB client starts session with that query + query = "select ''"; + + ReadBufferFromString buf(query); + executeQuery(buf, *out, true, connection_context, set_content_type, nullptr); + if (!with_output) + packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true); +} + +} diff --git a/dbms/programs/server/MySQLHandler.h b/dbms/programs/server/MySQLHandler.h new file mode 100644 index 00000000000..79cba54e656 --- /dev/null +++ b/dbms/programs/server/MySQLHandler.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include +#include +#include +#include "IServer.h" + + +namespace DB +{ + +/// Handler for MySQL wire protocol connections. Allows to connect to ClickHouse using MySQL client. +class MySQLHandler : public Poco::Net::TCPServerConnection +{ +public: + MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, RSA & public_key, RSA & private_key, bool ssl_enabled, size_t connection_id); + + void run() final; + +private: + /// Enables SSL, if client requested. + MySQLProtocol::HandshakeResponse finishHandshake(); + + void comQuery(const String & payload); + + void comFieldList(const String & payload); + + void comPing(); + + void comInitDB(const String & payload); + + static String generateScramble(); + + void authenticate(const MySQLProtocol::HandshakeResponse &, const String & scramble); + + IServer & server; + Poco::Logger * log; + Context connection_context; + + std::shared_ptr packet_sender; + + size_t connection_id = 0; + + size_t server_capability_flags; + size_t client_capability_flags; + + RSA & public_key; + RSA & private_key; + + std::shared_ptr in; + std::shared_ptr out; + + bool secure_connection = false; + std::shared_ptr ss; +}; + +} diff --git a/dbms/programs/server/MySQLHandlerFactory.cpp b/dbms/programs/server/MySQLHandlerFactory.cpp new file mode 100644 index 00000000000..0f949987c54 --- /dev/null +++ b/dbms/programs/server/MySQLHandlerFactory.cpp @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include +#include +#include "IServer.h" +#include "MySQLHandler.h" +#include "MySQLHandlerFactory.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_OPEN_FILE; + extern const int NO_ELEMENTS_IN_CONFIG; + extern const int OPENSSL_ERROR; + extern const int SYSTEM_ERROR; +} + +MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_) + : server(server_) + , log(&Logger::get("MySQLHandlerFactory")) +{ + try + { + Poco::Net::SSLManager::instance().defaultServerContext(); + } + catch (...) + { + LOG_INFO(log, "Failed to create SSL context. SSL will be disabled. Error: " << getCurrentExceptionMessage(false)); + ssl_enabled = false; + } + + /// Reading rsa keys for SHA256 authentication plugin. + try + { + readRSAKeys(); + } + catch (...) + { + LOG_WARNING(log, "Failed to read RSA keys. Error: " << getCurrentExceptionMessage(false)); + generateRSAKeys(); + } +} + +void MySQLHandlerFactory::readRSAKeys() +{ + const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config(); + String certificateFileProperty = "openSSL.server.certificateFile"; + String privateKeyFileProperty = "openSSL.server.privateKeyFile"; + + if (!config.has(certificateFileProperty)) + throw Exception("Certificate file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + if (!config.has(privateKeyFileProperty)) + throw Exception("Private key file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + { + String certificateFile = config.getString(certificateFileProperty); + FILE * fp = fopen(certificateFile.data(), "r"); + if (fp == nullptr) + throw Exception("Cannot open certificate file: " + certificateFile + ".", ErrorCodes::CANNOT_OPEN_FILE); + SCOPE_EXIT(fclose(fp)); + + X509 * x509 = PEM_read_X509(fp, nullptr, nullptr, nullptr); + SCOPE_EXIT(X509_free(x509)); + if (x509 == nullptr) + throw Exception("Failed to read PEM certificate from " + certificateFile + ". Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + + EVP_PKEY * p = X509_get_pubkey(x509); + if (p == nullptr) + throw Exception("Failed to get RSA key from X509. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + SCOPE_EXIT(EVP_PKEY_free(p)); + + public_key.reset(EVP_PKEY_get1_RSA(p)); + if (public_key.get() == nullptr) + throw Exception("Failed to get RSA key from ENV_PKEY. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + } + + { + String privateKeyFile = config.getString(privateKeyFileProperty); + + FILE * fp = fopen(privateKeyFile.data(), "r"); + if (fp == nullptr) + throw Exception ("Cannot open private key file " + privateKeyFile + ".", ErrorCodes::CANNOT_OPEN_FILE); + SCOPE_EXIT(fclose(fp)); + + private_key.reset(PEM_read_RSAPrivateKey(fp, nullptr, nullptr, nullptr)); + if (!private_key) + throw Exception("Failed to read RSA private key from " + privateKeyFile + ". Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + } +} + +void MySQLHandlerFactory::generateRSAKeys() +{ + LOG_INFO(log, "Generating new RSA key."); + public_key.reset(RSA_new()); + if (!public_key) + throw Exception("Failed to allocate RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + + BIGNUM * e = BN_new(); + if (!e) + throw Exception("Failed to allocate BIGNUM. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + SCOPE_EXIT(BN_free(e)); + + if (!BN_set_word(e, 65537) || !RSA_generate_key_ex(public_key.get(), 2048, e, nullptr)) + throw Exception("Failed to generate RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + + private_key.reset(RSAPrivateKey_dup(public_key.get())); + if (!private_key) + throw Exception("Failed to copy RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); +} + +Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket) +{ + size_t connection_id = last_connection_id++; + LOG_TRACE(log, "MySQL connection. Id: " << connection_id << ". Address: " << socket.peerAddress().toString()); + return new MySQLHandler(server, socket, *public_key, *private_key, ssl_enabled, connection_id); +} + +} diff --git a/dbms/programs/server/MySQLHandlerFactory.h b/dbms/programs/server/MySQLHandlerFactory.h new file mode 100644 index 00000000000..e8fe81ed384 --- /dev/null +++ b/dbms/programs/server/MySQLHandlerFactory.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include +#include +#include "IServer.h" + +namespace DB +{ + +class MySQLHandlerFactory : public Poco::Net::TCPServerConnectionFactory +{ +private: + IServer & server; + Poco::Logger * log; + + struct RSADeleter + { + void operator()(RSA * ptr) { RSA_free(ptr); } + }; + using RSAPtr = std::unique_ptr; + + RSAPtr public_key; + RSAPtr private_key; + + bool ssl_enabled = true; + + std::atomic last_connection_id = 0; +public: + explicit MySQLHandlerFactory(IServer & server_); + + void readRSAKeys(); + + void generateRSAKeys(); + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override; +}; + +} diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index e446a88abc5..68520112565 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -49,6 +49,7 @@ #include #include "TCPHandlerFactory.h" #include "Common/config_version.h" +#include "MySQLHandlerFactory.h" #if defined(__linux__) #include @@ -668,7 +669,7 @@ int Server::main(const std::vector & /*args*/) socket, new Poco::Net::TCPServerParams)); - LOG_INFO(log, "Listening tcp: " + address.toString()); + LOG_INFO(log, "Listening for connections with native protocol (tcp): " + address.toString()); } /// TCP with SSL @@ -685,7 +686,7 @@ int Server::main(const std::vector & /*args*/) server_pool, socket, new Poco::Net::TCPServerParams)); - LOG_INFO(log, "Listening tcp_secure: " + address.toString()); + LOG_INFO(log, "Listening for connections with secure native protocol (tcp_secure): " + address.toString()); #else throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; @@ -710,7 +711,7 @@ int Server::main(const std::vector & /*args*/) socket, http_params)); - LOG_INFO(log, "Listening interserver http: " + address.toString()); + LOG_INFO(log, "Listening for replica communication (interserver) http://" + address.toString()); } if (config().has("interserver_https_port")) @@ -727,12 +728,27 @@ int Server::main(const std::vector & /*args*/) socket, http_params)); - LOG_INFO(log, "Listening interserver https: " + address.toString()); + LOG_INFO(log, "Listening for secure replica communication (interserver) https://" + address.toString()); #else throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif } + + if (config().has("mysql_port")) + { + Poco::Net::ServerSocket socket; + auto address = socket_bind_listen(socket, listen_host, config().getInt("mysql_port"), /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + servers.emplace_back(std::make_unique( + new MySQLHandlerFactory(*this), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + + LOG_INFO(log, "Listening for MySQL compatibility protocol: " + address.toString()); + } } catch (const Poco::Exception & e) { diff --git a/dbms/programs/server/data/.gitignore b/dbms/programs/server/data/.gitignore index 5d26e622e3e..3e223bc0920 100644 --- a/dbms/programs/server/data/.gitignore +++ b/dbms/programs/server/data/.gitignore @@ -1,2 +1,5 @@ *.bin *.mrk +*.txt +*.dat +*.idx diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index 7f41e3f4fcf..7b711d712d0 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -308,16 +308,88 @@ public: /** * Check whether two bitmaps intersect. + * Intersection with an empty set is always 0 (consistent with hasAny). */ - UInt8 rb_intersect(const RoaringBitmapWithSmallSet & r1) + UInt8 rb_intersect(const RoaringBitmapWithSmallSet & r1) const { if (isSmall()) - toLarge(); - roaring_bitmap_t * rb1 = r1.isSmall() ? r1.getNewRbFromSmall() : r1.getRb(); - UInt8 is_true = roaring_bitmap_intersect(rb, rb1); - if (r1.isSmall()) - roaring_bitmap_free(rb1); - return is_true; + { + if (r1.isSmall()) + { + for (const auto & x : r1.small) + if (small.find(x.getValue()) != small.end()) + return 1; + } + else + { + for (const auto & x : small) + if (roaring_bitmap_contains(r1.rb, x.getValue())) + return 1; + } + } + else if (r1.isSmall()) + { + for (const auto & x : r1.small) + if (roaring_bitmap_contains(rb, x.getValue())) + return 1; + } + else if (roaring_bitmap_intersect(rb, r1.rb)) + return 1; + + return 0; + } + + /** + * Check whether the argument is the subset of this set. + * Empty set is a subset of any other set (consistent with hasAll). + */ + UInt8 rb_is_subset(const RoaringBitmapWithSmallSet & r1) const + { + if (isSmall()) + { + if (r1.isSmall()) + { + for (const auto & x : r1.small) + if (small.find(x.getValue()) == small.end()) + return 0; + } + else + { + UInt64 r1_size = r1.size(); + + if (r1_size > small.size()) + return 0; // A bigger set can not be a subset of ours. + + // This is a rare case with a small number of elements on + // both sides: r1 was promoted to large for some reason and + // it is still not larger than our small set. + // If r1 is our subset then our size must be equal to + // r1_size + number of not found elements, if this sum becomes + // greater then r1 is not a subset. + for (const auto & x : small) + if (!roaring_bitmap_contains(r1.rb, x.getValue()) && ++r1_size > small.size()) + return 0; + } + } + else if (r1.isSmall()) + { + for (const auto & x : r1.small) + if (!roaring_bitmap_contains(rb, x.getValue())) + return 0; + } + else if (!roaring_bitmap_is_subset(r1.rb, rb)) + return 0; + + return 1; + } + + /** + * Check whether this bitmap contains the argument. + */ + UInt8 rb_contains(const UInt32 x) const + { + return isSmall() ? small.find(x) != small.end() : + roaring_bitmap_contains(rb, x); } /** diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.cpp b/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.cpp index 11daf5ca819..a28974bdbc8 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include "AggregateFunctionFactory.h" @@ -34,7 +36,7 @@ namespace for (size_t i = 0; i < argument_types.size(); ++i) { - if (!isNumber(argument_types[i])) + if (!isNativeNumber(argument_types[i])) throw Exception( "Argument " + std::to_string(i) + " of type " + argument_types[i]->getName() + " must be numeric for aggregate function " + name, @@ -43,11 +45,11 @@ namespace /// Such default parameters were picked because they did good on some tests, /// though it still requires to fit parameters to achieve better result - auto learning_rate = Float64(0.01); - auto l2_reg_coef = Float64(0.01); - UInt32 batch_size = 1; + auto learning_rate = Float64(0.00001); + auto l2_reg_coef = Float64(0.1); + UInt32 batch_size = 15; - std::shared_ptr weights_updater = std::make_shared(); + std::string weights_updater_name = "\'SGD\'"; std::shared_ptr gradient_computer; if (!parameters.empty()) @@ -64,19 +66,8 @@ namespace } if (parameters.size() > 3) { - if (applyVisitor(FieldVisitorToString(), parameters[3]) == "\'SGD\'") - { - weights_updater = std::make_shared(); - } - else if (applyVisitor(FieldVisitorToString(), parameters[3]) == "\'Momentum\'") - { - weights_updater = std::make_shared(); - } - else if (applyVisitor(FieldVisitorToString(), parameters[3]) == "\'Nesterov\'") - { - weights_updater = std::make_shared(); - } - else + weights_updater_name = applyVisitor(FieldVisitorToString(), parameters[3]); + if (weights_updater_name != "\'SGD\'" && weights_updater_name != "\'Momentum\'" && weights_updater_name != "\'Nesterov\'") { throw Exception("Invalid parameter for weights updater", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } @@ -98,20 +89,19 @@ namespace return std::make_shared( argument_types.size() - 1, gradient_computer, - weights_updater, + weights_updater_name, learning_rate, l2_reg_coef, batch_size, argument_types, parameters); } - } void registerAggregateFunctionMLMethod(AggregateFunctionFactory & factory) { - factory.registerFunction("LinearRegression", createAggregateFunctionMLMethod); - factory.registerFunction("LogisticRegression", createAggregateFunctionMLMethod); + factory.registerFunction("stochasticLinearRegression", createAggregateFunctionMLMethod); + factory.registerFunction("stochasticLogisticRegression", createAggregateFunctionMLMethod); } LinearModelData::LinearModelData( @@ -149,6 +139,26 @@ void LinearModelData::predict( gradient_computer->predict(container, block, arguments, weights, bias, context); } +void LinearModelData::returnWeights(IColumn & to) const +{ + size_t size = weights.size() + 1; + + ColumnArray & arr_to = static_cast(to); + ColumnArray::Offsets & offsets_to = arr_to.getOffsets(); + + size_t old_size = offsets_to.back(); + offsets_to.push_back(old_size + size); + + typename ColumnFloat64::Container & val_to + = static_cast(arr_to.getData()).getData(); + + val_to.reserve(old_size + size); + for (size_t i = 0; i + 1 < size; ++i) + val_to.push_back(weights[i]); + + val_to.push_back(bias); +} + void LinearModelData::read(ReadBuffer & buf) { readBinary(bias, buf); @@ -192,7 +202,8 @@ void LinearModelData::merge(const DB::LinearModelData & rhs) void LinearModelData::add(const IColumn ** columns, size_t row_num) { /// first column stores target; features start from (columns + 1) - const auto target = (*columns[0])[row_num].get(); + Float64 target = (*columns[0]).getFloat64(row_num); + /// Here we have columns + 1 as first column corresponds to target value, and others - to features weights_updater->add_to_batch( gradient_batch, *gradient_computer, weights, bias, learning_rate, l2_reg_coef, target, columns + 1, row_num); @@ -345,7 +356,7 @@ void LogisticRegression::predict( for (size_t i = 1; i < arguments.size(); ++i) { const ColumnWithTypeAndName & cur_col = block.getByPosition(arguments[i]); - if (!isNumber(cur_col.type)) + if (!isNativeNumber(cur_col.type)) { throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS); } @@ -385,7 +396,7 @@ void LogisticRegression::compute( Float64 derivative = bias; for (size_t i = 0; i < weights.size(); ++i) { - auto value = (*columns[i])[row_num].get(); + auto value = (*columns[i]).getFloat64(row_num); derivative += weights[i] * value; } derivative *= target; @@ -394,8 +405,8 @@ void LogisticRegression::compute( batch_gradient[weights.size()] += learning_rate * target / (derivative + 1); for (size_t i = 0; i < weights.size(); ++i) { - auto value = (*columns[i])[row_num].get(); - batch_gradient[i] += learning_rate * target * value / (derivative + 1) - 2 * l2_reg_coef * weights[i]; + auto value = (*columns[i]).getFloat64(row_num); + batch_gradient[i] += learning_rate * target * value / (derivative + 1) - 2 * learning_rate * l2_reg_coef * weights[i]; } } @@ -418,7 +429,7 @@ void LinearRegression::predict( for (size_t i = 1; i < arguments.size(); ++i) { const ColumnWithTypeAndName & cur_col = block.getByPosition(arguments[i]); - if (!isNumber(cur_col.type)) + if (!isNativeNumber(cur_col.type)) { throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS); } @@ -458,7 +469,7 @@ void LinearRegression::compute( Float64 derivative = (target - bias); for (size_t i = 0; i < weights.size(); ++i) { - auto value = (*columns[i])[row_num].get(); + auto value = (*columns[i]).getFloat64(row_num); derivative -= weights[i] * value; } derivative *= (2 * learning_rate); @@ -466,8 +477,8 @@ void LinearRegression::compute( batch_gradient[weights.size()] += derivative; for (size_t i = 0; i < weights.size(); ++i) { - auto value = (*columns[i])[row_num].get(); - batch_gradient[i] += derivative * value - 2 * l2_reg_coef * weights[i]; + auto value = (*columns[i]).getFloat64(row_num); + batch_gradient[i] += derivative * value - 2 * learning_rate * l2_reg_coef * weights[i]; } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.h b/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.h index 8160eb2ef7d..b96b2bceed5 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include "IAggregateFunction.h" namespace DB @@ -12,6 +14,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int BAD_ARGUMENTS; + extern const int BAD_CAST; } /** @@ -218,6 +221,7 @@ public: void predict(ColumnVector::Container & container, Block & block, const ColumnNumbers & arguments, const Context & context) const; + void returnWeights(IColumn & to) const; private: std::vector weights; Float64 bias{0.0}; @@ -253,7 +257,7 @@ public: explicit AggregateFunctionMLMethod( UInt32 param_num, std::shared_ptr gradient_computer, - std::shared_ptr weights_updater, + std::string weights_updater_name, Float64 learning_rate, Float64 l2_reg_coef, UInt32 batch_size, @@ -265,15 +269,39 @@ public: , l2_reg_coef(l2_reg_coef) , batch_size(batch_size) , gradient_computer(std::move(gradient_computer)) - , weights_updater(std::move(weights_updater)) + , weights_updater_name(std::move(weights_updater_name)) { } - DataTypePtr getReturnType() const override { return std::make_shared>(); } + /// This function is called when SELECT linearRegression(...) is called + DataTypePtr getReturnType() const override + { + return std::make_shared(std::make_shared()); + } + + /// This function is called from evalMLMethod function for correct predictValues call + DataTypePtr getReturnTypeToPredict() const override + { + return std::make_shared>(); + } void create(AggregateDataPtr place) const override { - new (place) Data(learning_rate, l2_reg_coef, param_num, batch_size, gradient_computer, weights_updater); + std::shared_ptr new_weights_updater; + if (weights_updater_name == "\'SGD\'") + { + new_weights_updater = std::make_shared(); + } else if (weights_updater_name == "\'Momentum\'") + { + new_weights_updater = std::make_shared(); + } else if (weights_updater_name == "\'Nesterov\'") + { + new_weights_updater = std::make_shared(); + } else + { + throw Exception("Illegal name of weights updater (should have been checked earlier)", ErrorCodes::LOGICAL_ERROR); + } + new (place) Data(learning_rate, l2_reg_coef, param_num, batch_size, gradient_computer, new_weights_updater); } void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override @@ -296,16 +324,26 @@ public: + ". Required: " + std::to_string(param_num + 1), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - auto & column = dynamic_cast &>(to); + /// This cast might be correct because column type is based on getReturnTypeToPredict. + ColumnVector * column; + try + { + column = &dynamic_cast &>(to); + } catch (const std::bad_cast &) + { + throw Exception("Cast of column of predictions is incorrect. getReturnTypeToPredict must return same value as it is casted to", + ErrorCodes::BAD_CAST); + } - this->data(place).predict(column.getData(), block, arguments, context); + this->data(place).predict(column->getData(), block, arguments, context); } + /** This function is called if aggregate function without State modifier is selected in a query. + * Inserts all weights of the model into the column 'to', so user may use such information if needed + */ void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { - std::ignore = place; - std::ignore = to; - throw std::runtime_error("not implemented"); + this->data(place).returnWeights(to); } const char * getHeaderFilePath() const override { return __FILE__; } @@ -316,15 +354,15 @@ private: Float64 l2_reg_coef; UInt32 batch_size; std::shared_ptr gradient_computer; - std::shared_ptr weights_updater; + std::string weights_updater_name; }; struct NameLinearRegression { - static constexpr auto name = "LinearRegression"; + static constexpr auto name = "stochasticLinearRegression"; }; struct NameLogisticRegression { - static constexpr auto name = "LogisticRegression"; + static constexpr auto name = "stochasticLogisticRegression"; }; } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/dbms/src/AggregateFunctions/AggregateFunctionMaxIntersections.h index dbb727b7d9a..8b93f15fd9d 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMaxIntersections.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionMaxIntersections.h @@ -61,10 +61,10 @@ public: AggregateFunctionIntersectionsMax(AggregateFunctionIntersectionsKind kind_, const DataTypes & arguments) : IAggregateFunctionDataHelper, AggregateFunctionIntersectionsMax>(arguments, {}), kind(kind_) { - if (!isNumber(arguments[0])) + if (!isNativeNumber(arguments[0])) throw Exception{getName() + ": first argument must be represented by integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - if (!isNumber(arguments[1])) + if (!isNativeNumber(arguments[1])) throw Exception{getName() + ": second argument must be represented by integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (!arguments[0]->equals(*arguments[1])) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp index be139d9e633..f1a561d6f6e 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp @@ -1,6 +1,12 @@ +#include #include #include +#include +#include + +#include + namespace DB { @@ -12,32 +18,58 @@ namespace ErrorCodes namespace { -AggregateFunctionPtr createAggregateFunctionSequenceCount(const std::string & name, const DataTypes & argument_types, const Array & params) +template