diff --git a/CHANGELOG.md b/CHANGELOG.md index ea6955fb33f..c3bb2300021 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,65 @@ +## ClickHouse release 19.7.3.9, 2019-05-30 + +### New Features +* Allow to limit the range of a setting that can be specified by user. + These constraints can be set up in user settings profile. +[#4931](https://github.com/yandex/ClickHouse/pull/4931) ([Vitaly +Baranov](https://github.com/vitlibar)) +* Add a second version of the function `groupUniqArray` with an optional + `max_size` parameter that limits the size of the resulting array. This +behavior is similar to `groupArray(max_size)(x)` function. +[#5026](https://github.com/yandex/ClickHouse/pull/5026) ([Guillaume +Tassery](https://github.com/YiuRULE)) +* For TSVWithNames/CSVWithNames input file formats, column order can now be + determined from file header. This is controlled by +`input_format_with_names_use_header` parameter. +[#5081](https://github.com/yandex/ClickHouse/pull/5081) +([Alexander](https://github.com/Akazz)) + +### Bug Fixes +* Crash with uncompressed_cache + JOIN during merge (#5197) +[#5133](https://github.com/yandex/ClickHouse/pull/5133) ([Danila +Kutenin](https://github.com/danlark1)) +* Segmentation fault on a clickhouse-client query to system tables. #5066 +[#5127](https://github.com/yandex/ClickHouse/pull/5127) +([Ivan](https://github.com/abyss7)) +* Data loss on heavy load via KafkaEngine (#4736) +[#5080](https://github.com/yandex/ClickHouse/pull/5080) +([Ivan](https://github.com/abyss7)) +* Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [#5189](https://github.com/yandex/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### Performance Improvements +* Use radix sort for sorting by single numeric column in `ORDER BY` without + `LIMIT`. [#5106](https://github.com/yandex/ClickHouse/pull/5106), +[#4439](https://github.com/yandex/ClickHouse/pull/4439) +([Evgenii Pravda](https://github.com/kvinty), +[alexey-milovidov](https://github.com/alexey-milovidov)) + +### Documentation +* Translate documentation for some table engines to Chinese. + [#5107](https://github.com/yandex/ClickHouse/pull/5107), +[#5094](https://github.com/yandex/ClickHouse/pull/5094), +[#5087](https://github.com/yandex/ClickHouse/pull/5087) +([张风啸](https://github.com/AlexZFX)), +[#5068](https://github.com/yandex/ClickHouse/pull/5068) ([never +lee](https://github.com/neverlee)) + +### Build/Testing/Packaging Improvements +* Print UTF-8 characters properly in `clickhouse-test`. + [#5084](https://github.com/yandex/ClickHouse/pull/5084) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Add command line parameter for clickhouse-client to always load suggestion + data. [#5102](https://github.com/yandex/ClickHouse/pull/5102) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Resolve some of PVS-Studio warnings. + [#5082](https://github.com/yandex/ClickHouse/pull/5082) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Update LZ4 [#5040](https://github.com/yandex/ClickHouse/pull/5040) ([Danila + Kutenin](https://github.com/danlark1)) +* Add gperf to build requirements for upcoming pull request #5030. + [#5110](https://github.com/yandex/ClickHouse/pull/5110) +([proller](https://github.com/proller)) + ## ClickHouse release 19.6.2.11, 2019-05-13 ### New Features @@ -29,6 +91,7 @@ * Fixed hanging on start of the server when a dictionary depends on another dictionary via a database with engine=Dictionary. [#4962](https://github.com/yandex/ClickHouse/pull/4962) ([Vitaly Baranov](https://github.com/vitlibar)) * Partially fix distributed_product_mode = local. It's possible to allow columns of local tables in where/having/order by/... via table aliases. Throw exception if table does not have alias. There's not possible to access to the columns without table aliases yet. [#4986](https://github.com/yandex/ClickHouse/pull/4986) ([Artem Zuikov](https://github.com/4ertus2)) * Fix potentially wrong result for `SELECT DISTINCT` with `JOIN` [#5001](https://github.com/yandex/ClickHouse/pull/5001) ([Artem Zuikov](https://github.com/4ertus2)) +* Fixed very rare data race condition that could happen when executing a query with UNION ALL involving at least two SELECTs from system.columns, system.tables, system.parts, system.parts_tables or tables of Merge family and performing ALTER of columns of the related tables concurrently. [#5189](https://github.com/yandex/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) ### Build/Testing/Packaging Improvements * Fixed test failures when running clickhouse-server on different host [#4713](https://github.com/yandex/ClickHouse/pull/4713) ([Vasily Nemkov](https://github.com/Enmk)) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 2a19bc4724a..dfb96c348e9 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -1,3 +1,68 @@ +## ClickHouse release 19.7.3.9, 2019-05-30 + +### Новые возможности +* Добавлена возможность ограничить значения конфигурационных параметров, + которые может задать пользователь. Эти ограничения устанавливаются в профиле +настроек пользователя. [#4931](https://github.com/yandex/ClickHouse/pull/4931) +([Vitaly Baranov](https://github.com/vitlibar)) +* Добавлен вариант функции `groupUniqArray` с дополнительным параметром + `max_size`, который ограничивает размер результирующего массива, аналогично +функции `groupArray(max_size)(x)`. +[#5026](https://github.com/yandex/ClickHouse/pull/5026) ([Guillaume +Tassery](https://github.com/YiuRULE)) +* Для входных файлов формата TSVWithNames и CSVWithNames появилась возможность + определить порядок колонок в файле исходя из его заголовка. Это поведение +управляется конфигурационным параметром `input_format_with_names_use_header`. +[#5081](https://github.com/yandex/ClickHouse/pull/5081) +([Alexander](https://github.com/Akazz)) + +### Исправления ошибок +* Падение в процессе слияния при использовании uncompressed_cache и JOIN + (#5197). [#5133](https://github.com/yandex/ClickHouse/pull/5133) ([Danila +Kutenin](https://github.com/danlark1)) +* Segmentation fault на запросе к системным таблицам (#5066). + [#5127](https://github.com/yandex/ClickHouse/pull/5127) +([Ivan](https://github.com/abyss7)) +* Потеря загружаемых данных при больших потоках загрузки через KafkaEngine + (#4736). [#5080](https://github.com/yandex/ClickHouse/pull/5080) +([Ivan](https://github.com/abyss7)) +* Исправлен очень редкий data race condition который мог произойти при выполнении запроса с UNION ALL включающего минимум два SELECT из таблиц system.columns, system.tables, system.parts, system.parts_tables или таблиц семейства Merge и одновременно выполняющихся запросов ALTER столбцов соответствующих таблиц. [#5189](https://github.com/yandex/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### Улучшения производительности +* Используется поразрядная сортировка числовых колонок для `ORDER BY` без + `LIMIT`. [#5106](https://github.com/yandex/ClickHouse/pull/5106), +[#4439](https://github.com/yandex/ClickHouse/pull/4439) ([Evgenii +Pravda](https://github.com/kvinty), +[alexey-milovidov](https://github.com/alexey-milovidov) + +### Документация +* Документация для некоторых табличных движков переведена на китайский. + [#5107](https://github.com/yandex/ClickHouse/pull/5107), +[#5094](https://github.com/yandex/ClickHouse/pull/5094), +[#5087](https://github.com/yandex/ClickHouse/pull/5087) +([张风啸](https://github.com/AlexZFX), +[#5068](https://github.com/yandex/ClickHouse/pull/5068) ([never +lee](https://github.com/neverlee)) + +### Улучшения сборки, тестирования и пакетирования +* Правильно отображаются символы в кодировке UTF-8 в `clickhouse-test`. + [#5084](https://github.com/yandex/ClickHouse/pull/5084) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Добавлен параметр командной строки для `clickhouse-client`, позволяющий + всегда загружать данные подсказок. +[#5102](https://github.com/yandex/ClickHouse/pull/5102) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Исправлены некоторые предупреждения PVS-Studio. + [#5082](https://github.com/yandex/ClickHouse/pull/5082) +([alexey-milovidov](https://github.com/alexey-milovidov)) +* Обновлена библиотека LZ4. + [#5040](https://github.com/yandex/ClickHouse/pull/5040) ([Danila +Kutenin](https://github.com/danlark1)) +* В зависимости сборки добавлен gperf для поддержки готовящегося PR #5030. + [#5110](https://github.com/yandex/ClickHouse/pull/5110) +([proller](https://github.com/proller)) + + ## ClickHouse release 19.6.2.11, 2019-05-13 ### Новые возможности @@ -6,7 +71,7 @@ * Добавлена функция `isValidUTF8` для проверки, содержит ли строка валидные данные в кодировке UTF-8. [#4934](https://github.com/yandex/ClickHouse/pull/4934) ([Danila Kutenin](https://github.com/danlark1)) * Добавлены новое правило балансировки (`load_balancing`) `first_or_random` по которому запросы посылаются на первый заданый хост и если он недоступен - на случайные хосты шарда. Полезно для топологий с кросс-репликацией. [#5012](https://github.com/yandex/ClickHouse/pull/5012) ([nvartolomei](https://github.com/nvartolomei)) -### Эксперемннтальные возможности +### Экспериментальные возможности * Добавлена настройка `index_granularity_bytes` (адаптивная гранулярность индекса) для таблиц семейства MergeTree* . [#4826](https://github.com/yandex/ClickHouse/pull/4826) ([alesapin](https://github.com/alesapin)) ### Улучшения @@ -29,6 +94,7 @@ * Исправлено зависание на старте сервера если внешний словарь зависит от другого словаря через использование таблицы из БД с движком `Dictionary`. [#4962](https://github.com/yandex/ClickHouse/pull/4962) ([Vitaly Baranov](https://github.com/vitlibar)) * При использовании `distributed_product_mode = 'local'` корректно работает использование столбцов локальных таблиц в where/having/order by/... через табличные алиасы. Выкидывает исключение если таблица не имеет алиас. Доступ к столбцам без алиасов пока не возможен. [#4986](https://github.com/yandex/ClickHouse/pull/4986) ([Artem Zuikov](https://github.com/4ertus2)) * Исправлен потенциально некорректный результат для `SELECT DISTINCT` с `JOIN` [#5001](https://github.com/yandex/ClickHouse/pull/5001) ([Artem Zuikov](https://github.com/4ertus2)) +* Исправлен очень редкий data race condition который мог произойти при выполнении запроса с UNION ALL включающего минимум два SELECT из таблиц system.columns, system.tables, system.parts, system.parts_tables или таблиц семейства Merge и одновременно выполняющихся запросов ALTER столбцов соответствующих таблиц. [#5189](https://github.com/yandex/ClickHouse/pull/5189) ([alexey-milovidov](https://github.com/alexey-milovidov)) ### Улучшения сборки/тестирования/пакетирования * Исправлена неработоспособность тестов, если `clickhouse-server` запущен на удалённом хосте [#4713](https://github.com/yandex/ClickHouse/pull/4713) ([Vasily Nemkov](https://github.com/Enmk)) diff --git a/README.md b/README.md index 8e7653d60cb..adb1c166f12 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,6 @@ ClickHouse is an open-source column-oriented database management system that all * You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person. ## Upcoming Events -* ClickHouse at [Percona Live 2019](https://www.percona.com/live/19/other-open-source-databases-track) in Austin on May 28-30. -* [ClickHouse Community Meetup in San Francisco](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/261110652/) on June 4. -* [ClickHouse Community Meetup in Beijing](https://www.huodongxing.com/event/2483759276200) on June 8. +* [ClickHouse on HighLoad++ Siberia](https://www.highload.ru/siberia/2019/abstracts/5348) on June 24-25. * [ClickHouse Community Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20. * [ClickHouse Community Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27. diff --git a/cmake/find_rapidjson.cmake b/cmake/find_rapidjson.cmake index bd8f0fbb449..443e2b02a28 100644 --- a/cmake/find_rapidjson.cmake +++ b/cmake/find_rapidjson.cmake @@ -1,9 +1,28 @@ -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include/rapidjson/rapidjson.h") - message (WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init --recursive") +option(ENABLE_RAPIDJSON "Use rapidjson" ON) +if(NOT ENABLE_RAPIDJSON) return() -endif () +endif() -option (USE_RAPIDJSON "Use rapidjson" ON) -set (RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include") +option(USE_INTERNAL_RAPIDJSON_LIBRARY "Set to FALSE to use system rapidjson library instead of bundled" ${NOT_UNBUNDLED}) + +if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include/rapidjson/rapidjson.h") + if(USE_INTERNAL_RAPIDJSON_LIBRARY) + message(WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init --recursive") + set(USE_INTERNAL_RAPIDJSON_LIBRARY 0) + endif() + set(MISSING_INTERNAL_RAPIDJSON_LIBRARY 1) +endif() + +if(NOT USE_INTERNAL_RAPIDJSON_LIBRARY) + find_path(RAPIDJSON_INCLUDE_DIR NAMES rapidjson/rapidjson.h PATHS ${RAPIDJSON_INCLUDE_PATHS}) +endif() + +if(RAPIDJSON_INCLUDE_DIR) + set(USE_RAPIDJSON 1) +elseif(NOT MISSING_INTERNAL_RAPIDJSON_LIBRARY) + set(RAPIDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include") + set(USE_INTERNAL_RAPIDJSON_LIBRARY 1) + set(USE_RAPIDJSON 1) +endif() message(STATUS "Using rapidjson=${USE_RAPIDJSON}: ${RAPIDJSON_INCLUDE_DIR}") diff --git a/contrib/boost b/contrib/boost index 8abda007bfe..830e51edb59 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit 8abda007bfe52d78a51548d4594879d6d82a22fa +Subproject commit 830e51edb59c4f37a8638138581e1e56c29ac44f diff --git a/contrib/brotli-cmake/CMakeLists.txt b/contrib/brotli-cmake/CMakeLists.txt index cbc290d9e67..00fea50fc43 100644 --- a/contrib/brotli-cmake/CMakeLists.txt +++ b/contrib/brotli-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ -set(BROTLI_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/brotli/c) -set(BROTLI_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/brotli/c) +set(BROTLI_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/brotli/c) +set(BROTLI_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/brotli/c) set(SRCS ${BROTLI_SOURCE_DIR}/dec/bit_reader.c diff --git a/contrib/cppkafka-cmake/CMakeLists.txt b/contrib/cppkafka-cmake/CMakeLists.txt index 064eb0d04ad..2725eaf7a77 100644 --- a/contrib/cppkafka-cmake/CMakeLists.txt +++ b/contrib/cppkafka-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -set(CPPKAFKA_DIR ${CMAKE_SOURCE_DIR}/contrib/cppkafka) +set(CPPKAFKA_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka) set(SRCS ${CPPKAFKA_DIR}/src/configuration.cpp diff --git a/contrib/hyperscan b/contrib/hyperscan index ed17d34a7c7..01e6b83f9fb 160000 --- a/contrib/hyperscan +++ b/contrib/hyperscan @@ -1 +1 @@ -Subproject commit ed17d34a7c786512471946f9105eaa8d925f34c3 +Subproject commit 01e6b83f9fbdb4020cd68a5287bf3a0471eeb272 diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 55d59e254de..4840197c2fd 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -set(JEMALLOC_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/jemalloc) +set(JEMALLOC_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/jemalloc) set(SRCS ${JEMALLOC_SOURCE_DIR}/src/arena.c diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index 398b415e307..8ec14f897b9 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -9,7 +9,7 @@ endif() SET(WITH_KERBEROS false) # project and source dir -set(HDFS3_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/libhdfs3) +set(HDFS3_ROOT_DIR ${ClickHouse_SOURCE_DIR}/contrib/libhdfs3) set(HDFS3_SOURCE_DIR ${HDFS3_ROOT_DIR}/src) set(HDFS3_COMMON_DIR ${HDFS3_SOURCE_DIR}/common) diff --git a/contrib/libpcg-random/include/pcg_extras.hpp b/contrib/libpcg-random/include/pcg_extras.hpp index 08e8dbacd84..929c756b151 100644 --- a/contrib/libpcg-random/include/pcg_extras.hpp +++ b/contrib/libpcg-random/include/pcg_extras.hpp @@ -81,7 +81,7 @@ #define PCG_128BIT_CONSTANT(high,low) \ ((pcg128_t(high) << 64) + low) #else - #include "pcg_uint128.hpp" // Y_IGNORE + #include "pcg_uint128.hpp" namespace pcg_extras { typedef pcg_extras::uint_x4 pcg128_t; } diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 3c9c17b1796..626ae8fd6a9 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -set(RDKAFKA_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/librdkafka/src) +set(RDKAFKA_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src) set(SRCS ${RDKAFKA_SOURCE_DIR}/crc32c.c diff --git a/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h b/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h index 3387659281a..bcba977e393 100644 --- a/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h +++ b/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h @@ -1,5 +1,5 @@ #if __has_include() // maybe bundled -# include_next // Y_IGNORE +# include_next #else // system # include_next #endif diff --git a/contrib/libxml2-cmake/CMakeLists.txt b/contrib/libxml2-cmake/CMakeLists.txt index ff14698335e..8783fca774e 100644 --- a/contrib/libxml2-cmake/CMakeLists.txt +++ b/contrib/libxml2-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ -set(LIBXML2_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/libxml2) -set(LIBXML2_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/libxml2) +set(LIBXML2_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libxml2) +set(LIBXML2_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/libxml2) set(SRCS ${LIBXML2_SOURCE_DIR}/SAX.c diff --git a/contrib/mariadb-connector-c-cmake/CMakeLists.txt b/contrib/mariadb-connector-c-cmake/CMakeLists.txt index ced97b828cc..1c9b7a5ec9c 100644 --- a/contrib/mariadb-connector-c-cmake/CMakeLists.txt +++ b/contrib/mariadb-connector-c-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ -set(MARIADB_CLIENT_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/mariadb-connector-c) -set(MARIADB_CLIENT_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/mariadb-connector-c) +set(MARIADB_CLIENT_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/mariadb-connector-c) +set(MARIADB_CLIENT_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/mariadb-connector-c) set(SRCS #${MARIADB_CLIENT_SOURCE_DIR}/libmariadb/bmove_upp.c diff --git a/contrib/unixodbc-cmake/CMakeLists.txt b/contrib/unixodbc-cmake/CMakeLists.txt index 4fa54a55a3a..1715747191c 100644 --- a/contrib/unixodbc-cmake/CMakeLists.txt +++ b/contrib/unixodbc-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ -set(ODBC_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/unixodbc) -set(ODBC_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/unixodbc) +set(ODBC_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/unixodbc) +set(ODBC_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/unixodbc) set(SRCS diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index a853ea81913..024880f40c8 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,11 +1,11 @@ # This strings autochanged from release_lib.sh: -set(VERSION_REVISION 54421) +set(VERSION_REVISION 54422) set(VERSION_MAJOR 19) -set(VERSION_MINOR 9) +set(VERSION_MINOR 10) set(VERSION_PATCH 1) -set(VERSION_GITHASH 0c2aa460651a462f14efc7e995840a244531d373) -set(VERSION_DESCRIBE v19.9.1.1-testing) -set(VERSION_STRING 19.9.1.1) +set(VERSION_GITHASH 864a5830f5b5fa67c6b7126e8f2615d03ee890ce) +set(VERSION_DESCRIBE v19.10.1.1-prestable) +set(VERSION_STRING 19.10.1.1) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index d8081d7c97c..2c421cc20b3 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -65,9 +65,10 @@ #include #include #include +#include #if USE_READLINE -#include "Suggest.h" // Y_IGNORE +#include "Suggest.h" #endif #ifndef __clang__ @@ -1549,7 +1550,7 @@ public: * where possible args are file, name, format, structure, types. * Split these groups before processing. */ - using Arguments = std::vector; + using Arguments = std::vector; Arguments common_arguments{""}; /// 0th argument is ignored. std::vector external_tables_arguments; @@ -1671,8 +1672,7 @@ public: ("types", po::value(), "types") ; /// Parse main commandline options. - po::parsed_options parsed = po::command_line_parser( - common_arguments.size(), common_arguments.data()).options(main_description).run(); + po::parsed_options parsed = po::command_line_parser(common_arguments).options(main_description).run(); po::variables_map options; po::store(parsed, options); po::notify(options); @@ -1705,8 +1705,7 @@ public: for (size_t i = 0; i < external_tables_arguments.size(); ++i) { /// Parse commandline options related to external tables. - po::parsed_options parsed_tables = po::command_line_parser( - external_tables_arguments[i].size(), external_tables_arguments[i].data()).options(external_description).run(); + po::parsed_options parsed_tables = po::command_line_parser(external_tables_arguments[i]).options(external_description).run(); po::variables_map external_options; po::store(parsed_tables, external_options); @@ -1802,6 +1801,9 @@ public: } if (options.count("suggestion_limit")) config().setInt("suggestion_limit", options["suggestion_limit"].as()); + + argsToConfig(common_arguments, config(), 100); + } }; diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 5d388686d55..9cd3629192f 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/programs/local/CMakeLists.txt b/dbms/programs/local/CMakeLists.txt index 299458ef913..41780936977 100644 --- a/dbms/programs/local/CMakeLists.txt +++ b/dbms/programs/local/CMakeLists.txt @@ -6,4 +6,4 @@ clickhouse_program_add(local) if(NOT CLICKHOUSE_ONE_SHARED) target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib) -endif () +endif() diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index 73e43dffec3..3e3b249fe82 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -34,6 +34,7 @@ #include #include #include +#include namespace DB @@ -59,11 +60,29 @@ void LocalServer::initialize(Poco::Util::Application & self) { Poco::Util::Application::initialize(self); - // Turn off server logging to stderr - if (!config().has("verbose")) + /// Load config files if exists + if (config().has("config-file") || Poco::File("config.xml").exists()) { - Poco::Logger::root().setLevel("none"); - Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::NullChannel())); + const auto config_path = config().getString("config-file", "config.xml"); + ConfigProcessor config_processor(config_path, false, true); + config_processor.setConfigPath(Poco::Path(config_path).makeParent().toString()); + auto loaded_config = config_processor.loadConfig(); + config_processor.savePreprocessedConfig(loaded_config, loaded_config.configuration->getString("path", ".")); + config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); + } + + if (config().has("logger") || config().has("logger.level") || config().has("logger.log")) + { + buildLoggers(config(), logger()); + } + else + { + // Turn off server logging to stderr + if (!config().has("verbose")) + { + Poco::Logger::root().setLevel("none"); + Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::NullChannel())); + } } } @@ -110,16 +129,6 @@ try return Application::EXIT_OK; } - /// Load config files if exists - if (config().has("config-file") || Poco::File("config.xml").exists()) - { - const auto config_path = config().getString("config-file", "config.xml"); - ConfigProcessor config_processor(config_path, false, true); - config_processor.setConfigPath(Poco::Path(config_path).makeParent().toString()); - auto loaded_config = config_processor.loadConfig(); - config_processor.savePreprocessedConfig(loaded_config, loaded_config.configuration->getString("path", DBMS_DEFAULT_PATH)); - config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); - } context = std::make_unique(Context::createGlobal()); context->setGlobalContext(*context); @@ -428,6 +437,8 @@ void LocalServer::init(int argc, char ** argv) ("stacktrace", "print stack traces of exceptions") ("echo", "print query before execution") ("verbose", "print query and other debugging info") + ("logger.log", po::value(), "Log file name") + ("logger.level", po::value(), "Log level") ("ignore-error", "do not stop processing if a query failed") ("version,V", "print version information and exit") ; @@ -481,8 +492,17 @@ void LocalServer::init(int argc, char ** argv) config().setBool("echo", true); if (options.count("verbose")) config().setBool("verbose", true); + if (options.count("logger.log")) + config().setString("logger.log", options["logger.log"].as()); + if (options.count("logger.level")) + config().setString("logger.level", options["logger.level"].as()); if (options.count("ignore-error")) config().setBool("ignore-error", true); + + std::vector arguments; + for (int arg_num = 1; arg_num < argc; ++arg_num) + arguments.emplace_back(argv[arg_num]); + argsToConfig(arguments, config(), 100); } void LocalServer::applyCmdOptions() diff --git a/dbms/programs/local/LocalServer.h b/dbms/programs/local/LocalServer.h index 2f59778490c..a79ab484107 100644 --- a/dbms/programs/local/LocalServer.h +++ b/dbms/programs/local/LocalServer.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -13,7 +14,7 @@ class Context; /// Lightweight Application for clickhouse-local /// No networking, no extra configs and working directories, no pid and status files, no dictionaries, no logging. /// Quiet mode by default -class LocalServer : public Poco::Util::Application +class LocalServer : public Poco::Util::Application, public Loggers { public: LocalServer(); diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index 15f1673985b..9d0a0b4f0a6 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -15,7 +15,7 @@ #endif #if USE_TCMALLOC -#include // Y_IGNORE +#include #endif #include diff --git a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp index 997ef9cf216..2b46a6cd49f 100644 --- a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -3,9 +3,9 @@ #if USE_POCO_SQLODBC || USE_POCO_DATAODBC #if USE_POCO_SQLODBC -#include // Y_IGNORE -#include // Y_IGNORE -#include // Y_IGNORE +#include +#include +#include #define POCO_SQL_ODBC_CLASS Poco::SQL::ODBC #endif #if USE_POCO_DATAODBC diff --git a/dbms/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/dbms/programs/odbc-bridge/IdentifierQuoteHandler.cpp index 77fd32d80b4..303165a4d45 100644 --- a/dbms/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/dbms/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -2,9 +2,9 @@ #if USE_POCO_SQLODBC || USE_POCO_DATAODBC #if USE_POCO_SQLODBC -#include // Y_IGNORE -#include // Y_IGNORE -#include // Y_IGNORE +#include +#include +#include #define POCO_SQL_ODBC_CLASS Poco::SQL::ODBC #endif #if USE_POCO_DATAODBC diff --git a/dbms/programs/odbc-bridge/ODBCBridge.cpp b/dbms/programs/odbc-bridge/ODBCBridge.cpp index a6324b6e229..aaacdfca826 100644 --- a/dbms/programs/odbc-bridge/ODBCBridge.cpp +++ b/dbms/programs/odbc-bridge/ODBCBridge.cpp @@ -123,7 +123,7 @@ void ODBCBridge::initialize(Application & self) config().setString("logger", "ODBCBridge"); - buildLoggers(config()); + buildLoggers(config(), logger()); log = &logger(); hostname = config().getString("listen-host", "localhost"); port = config().getUInt("http-port"); diff --git a/dbms/programs/odbc-bridge/getIdentifierQuote.cpp b/dbms/programs/odbc-bridge/getIdentifierQuote.cpp index 055b211e99d..9ac0a56bfc6 100644 --- a/dbms/programs/odbc-bridge/getIdentifierQuote.cpp +++ b/dbms/programs/odbc-bridge/getIdentifierQuote.cpp @@ -2,9 +2,9 @@ #if USE_POCO_SQLODBC || USE_POCO_DATAODBC #if USE_POCO_SQLODBC -#include // Y_IGNORE -#include // Y_IGNORE -#include // Y_IGNORE +#include +#include +#include #define POCO_SQL_ODBC_CLASS Poco::SQL::ODBC #endif #if USE_POCO_DATAODBC diff --git a/dbms/programs/odbc-bridge/getIdentifierQuote.h b/dbms/programs/odbc-bridge/getIdentifierQuote.h index 09d20937ea3..30371b4060d 100644 --- a/dbms/programs/odbc-bridge/getIdentifierQuote.h +++ b/dbms/programs/odbc-bridge/getIdentifierQuote.h @@ -8,7 +8,7 @@ #if USE_POCO_SQLODBC || USE_POCO_DATAODBC #if USE_POCO_SQLODBC -#include // Y_IGNORE +#include #endif #if USE_POCO_DATAODBC #include diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 68520112565..46e44477f02 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -310,7 +310,7 @@ int Server::main(const std::vector & /*args*/) /// Initialize DateLUT early, to not interfere with running time of first query. LOG_DEBUG(log, "Initializing DateLUT."); DateLUT::instance(); - LOG_TRACE(log, "Initialized DateLUT with time zone `" << DateLUT::instance().getTimeZone() << "'."); + LOG_TRACE(log, "Initialized DateLUT with time zone '" << DateLUT::instance().getTimeZone() << "'."); /// Directory with temporary data for processing of heavy queries. { @@ -405,7 +405,7 @@ int Server::main(const std::vector & /*args*/) main_config_zk_changed_event, [&](ConfigurationPtr config) { - buildLoggers(*config); + buildLoggers(*config, logger()); global_context->setClustersConfig(config); global_context->setMacros(std::make_unique(*config, "macros")); }, diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index fcef7e6a8d1..6ec9ec5d416 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -1,8 +1,6 @@ #include #include #include -#include - #include #include #include diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.cpp b/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.cpp index a28974bdbc8..af6971e3dae 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.cpp @@ -45,12 +45,12 @@ namespace /// Such default parameters were picked because they did good on some tests, /// though it still requires to fit parameters to achieve better result - auto learning_rate = Float64(0.00001); + auto learning_rate = Float64(0.01); auto l2_reg_coef = Float64(0.1); UInt32 batch_size = 15; - std::string weights_updater_name = "\'SGD\'"; - std::shared_ptr gradient_computer; + std::string weights_updater_name = "SGD"; + std::unique_ptr gradient_computer; if (!parameters.empty()) { @@ -66,20 +66,19 @@ namespace } if (parameters.size() > 3) { - weights_updater_name = applyVisitor(FieldVisitorToString(), parameters[3]); - if (weights_updater_name != "\'SGD\'" && weights_updater_name != "\'Momentum\'" && weights_updater_name != "\'Nesterov\'") - { - throw Exception("Invalid parameter for weights updater", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } + weights_updater_name = parameters[3].safeGet(); + if (weights_updater_name != "SGD" && weights_updater_name != "Momentum" && weights_updater_name != "Nesterov") + throw Exception("Invalid parameter for weights updater. The only supported are 'SGD', 'Momentum' and 'Nesterov'", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } if (std::is_same::value) { - gradient_computer = std::make_shared(); + gradient_computer = std::make_unique(); } else if (std::is_same::value) { - gradient_computer = std::make_shared(); + gradient_computer = std::make_unique(); } else { @@ -88,7 +87,7 @@ namespace return std::make_shared( argument_types.size() - 1, - gradient_computer, + std::move(gradient_computer), weights_updater_name, learning_rate, l2_reg_coef, @@ -134,9 +133,14 @@ void LinearModelData::update_state() } void LinearModelData::predict( - ColumnVector::Container & container, Block & block, const ColumnNumbers & arguments, const Context & context) const + ColumnVector::Container & container, + Block & block, + size_t offset, + size_t limit, + const ColumnNumbers & arguments, + const Context & context) const { - gradient_computer->predict(container, block, arguments, weights, bias, context); + gradient_computer->predict(container, block, offset, limit, arguments, weights, bias, context); } void LinearModelData::returnWeights(IColumn & to) const @@ -345,42 +349,38 @@ void IWeightsUpdater::add_to_batch( void LogisticRegression::predict( ColumnVector::Container & container, Block & block, + size_t offset, + size_t limit, const ColumnNumbers & arguments, const std::vector & weights, Float64 bias, - const Context & context) const + const Context & /*context*/) const { size_t rows_num = block.rows(); - std::vector results(rows_num, bias); + + if (offset > rows_num || offset + limit > rows_num) + throw Exception("Invalid offset and limit for LogisticRegression::predict. " + "Block has " + toString(rows_num) + " rows, but offset is " + toString(offset) + + " and limit is " + toString(limit), ErrorCodes::LOGICAL_ERROR); + + std::vector results(limit, bias); for (size_t i = 1; i < arguments.size(); ++i) { const ColumnWithTypeAndName & cur_col = block.getByPosition(arguments[i]); + if (!isNativeNumber(cur_col.type)) - { throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS); - } - /// If column type is already Float64 then castColumn simply returns it - auto features_col_ptr = castColumn(cur_col, std::make_shared(), context); - auto features_column = typeid_cast(features_col_ptr.get()); + auto & features_column = cur_col.column; - if (!features_column) - { - throw Exception("Unexpectedly cannot dynamically cast features column " + std::to_string(i), ErrorCodes::LOGICAL_ERROR); - } - - for (size_t row_num = 0; row_num != rows_num; ++row_num) - { - results[row_num] += weights[i - 1] * features_column->getElement(row_num); - } + for (size_t row_num = 0; row_num < limit; ++row_num) + results[row_num] += weights[i - 1] * features_column->getFloat64(offset + row_num); } - container.reserve(rows_num); - for (size_t row_num = 0; row_num != rows_num; ++row_num) - { + container.reserve(container.size() + limit); + for (size_t row_num = 0; row_num < limit; ++row_num) container.emplace_back(1 / (1 + exp(-results[row_num]))); - } } void LogisticRegression::compute( @@ -413,10 +413,12 @@ void LogisticRegression::compute( void LinearRegression::predict( ColumnVector::Container & container, Block & block, + size_t offset, + size_t limit, const ColumnNumbers & arguments, const std::vector & weights, Float64 bias, - const Context & context) const + const Context & /*context*/) const { if (weights.size() + 1 != arguments.size()) { @@ -424,36 +426,33 @@ void LinearRegression::predict( } size_t rows_num = block.rows(); - std::vector results(rows_num, bias); + + if (offset > rows_num || offset + limit > rows_num) + throw Exception("Invalid offset and limit for LogisticRegression::predict. " + "Block has " + toString(rows_num) + " rows, but offset is " + toString(offset) + + " and limit is " + toString(limit), ErrorCodes::LOGICAL_ERROR); + + std::vector results(limit, bias); for (size_t i = 1; i < arguments.size(); ++i) { const ColumnWithTypeAndName & cur_col = block.getByPosition(arguments[i]); - if (!isNativeNumber(cur_col.type)) - { - throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS); - } - /// If column type is already Float64 then castColumn simply returns it - auto features_col_ptr = castColumn(cur_col, std::make_shared(), context); - auto features_column = typeid_cast(features_col_ptr.get()); + if (!isNativeNumber(cur_col.type)) + throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS); + + auto features_column = cur_col.column; if (!features_column) - { throw Exception("Unexpectedly cannot dynamically cast features column " + std::to_string(i), ErrorCodes::LOGICAL_ERROR); - } - for (size_t row_num = 0; row_num != rows_num; ++row_num) - { - results[row_num] += weights[i - 1] * features_column->getElement(row_num); - } + for (size_t row_num = 0; row_num < limit; ++row_num) + results[row_num] += weights[i - 1] * features_column->getFloat64(row_num + offset); } - container.reserve(rows_num); - for (size_t row_num = 0; row_num != rows_num; ++row_num) - { + container.reserve(container.size() + limit); + for (size_t row_num = 0; row_num < limit; ++row_num) container.emplace_back(results[row_num]); - } } void LinearRegression::compute( diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.h b/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.h index b96b2bceed5..90048924173 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionMLMethod.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -36,12 +37,13 @@ public: Float64 l2_reg_coef, Float64 target, const IColumn ** columns, - size_t row_num) - = 0; + size_t row_num) = 0; virtual void predict( ColumnVector::Container & container, Block & block, + size_t offset, + size_t limit, const ColumnNumbers & arguments, const std::vector & weights, Float64 bias, @@ -67,6 +69,8 @@ public: void predict( ColumnVector::Container & container, Block & block, + size_t offset, + size_t limit, const ColumnNumbers & arguments, const std::vector & weights, Float64 bias, @@ -92,6 +96,8 @@ public: void predict( ColumnVector::Container & container, Block & block, + size_t offset, + size_t limit, const ColumnNumbers & arguments, const std::vector & weights, Float64 bias, @@ -194,9 +200,8 @@ private: }; -/** -* LinearModelData is a class which manages current state of learning -*/ +/** LinearModelData is a class which manages current state of learning + */ class LinearModelData { public: @@ -218,8 +223,13 @@ public: void read(ReadBuffer & buf); - void - predict(ColumnVector::Container & container, Block & block, const ColumnNumbers & arguments, const Context & context) const; + void predict( + ColumnVector::Container & container, + Block & block, + size_t offset, + size_t limit, + const ColumnNumbers & arguments, + const Context & context) const; void returnWeights(IColumn & to) const; private: @@ -228,18 +238,17 @@ private: Float64 learning_rate; Float64 l2_reg_coef; - UInt32 batch_capacity; + UInt64 batch_capacity; - UInt32 iter_num = 0; + UInt64 iter_num = 0; std::vector gradient_batch; - UInt32 batch_size; + UInt64 batch_size; std::shared_ptr gradient_computer; std::shared_ptr weights_updater; - /** - * The function is called when we want to flush current batch and update our weights - */ + /** The function is called when we want to flush current batch and update our weights + */ void update_state(); }; @@ -256,7 +265,7 @@ public: explicit AggregateFunctionMLMethod( UInt32 param_num, - std::shared_ptr gradient_computer, + std::unique_ptr gradient_computer, std::string weights_updater_name, Float64 learning_rate, Float64 l2_reg_coef, @@ -288,19 +297,15 @@ public: void create(AggregateDataPtr place) const override { std::shared_ptr new_weights_updater; - if (weights_updater_name == "\'SGD\'") - { + if (weights_updater_name == "SGD") new_weights_updater = std::make_shared(); - } else if (weights_updater_name == "\'Momentum\'") - { + else if (weights_updater_name == "Momentum") new_weights_updater = std::make_shared(); - } else if (weights_updater_name == "\'Nesterov\'") - { + else if (weights_updater_name == "Nesterov") new_weights_updater = std::make_shared(); - } else - { + else throw Exception("Illegal name of weights updater (should have been checked earlier)", ErrorCodes::LOGICAL_ERROR); - } + new (place) Data(learning_rate, l2_reg_coef, param_num, batch_size, gradient_computer, new_weights_updater); } @@ -316,7 +321,13 @@ public: void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).read(buf); } void predictValues( - ConstAggregateDataPtr place, IColumn & to, Block & block, const ColumnNumbers & arguments, const Context & context) const override + ConstAggregateDataPtr place, + IColumn & to, + Block & block, + size_t offset, + size_t limit, + const ColumnNumbers & arguments, + const Context & context) const override { if (arguments.size() != param_num + 1) throw Exception( @@ -325,17 +336,12 @@ public: ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// This cast might be correct because column type is based on getReturnTypeToPredict. - ColumnVector * column; - try - { - column = &dynamic_cast &>(to); - } catch (const std::bad_cast &) - { + auto * column = typeid_cast(&to); + if (!column) throw Exception("Cast of column of predictions is incorrect. getReturnTypeToPredict must return same value as it is casted to", ErrorCodes::BAD_CAST); - } - this->data(place).predict(column->getData(), block, arguments, context); + this->data(place).predict(column->getData(), block, offset, limit, arguments, context); } /** This function is called if aggregate function without State modifier is selected in a query. diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index 38982b8130e..3f835fb6033 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -117,11 +117,11 @@ namespace __builtin_unreachable(); } -} // namespace +} void registerAggregateFunctionUniqCombined(AggregateFunctionFactory & factory) { factory.registerFunction("uniqCombined", createAggregateFunctionUniqCombined); } -} // namespace DB +} diff --git a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h index b82b1f2c198..7206da3a6aa 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -63,7 +63,7 @@ namespace detail } }; -} // namespace detail +} template @@ -231,4 +231,4 @@ public: } }; -} // namespace DB +} diff --git a/dbms/src/AggregateFunctions/IAggregateFunction.h b/dbms/src/AggregateFunctions/IAggregateFunction.h index e5fb71a374c..94feb2456cf 100644 --- a/dbms/src/AggregateFunctions/IAggregateFunction.h +++ b/dbms/src/AggregateFunctions/IAggregateFunction.h @@ -100,9 +100,16 @@ public: /// Inserts results into a column. virtual void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const = 0; - /// This function is used for machine learning methods - virtual void predictValues(ConstAggregateDataPtr /* place */, IColumn & /*to*/, - Block & /*block*/, const ColumnNumbers & /*arguments*/, const Context & /*context*/) const + /// Used for machine learning methods. Predict result from trained model. + /// Will insert result into `to` column for rows in range [offset, offset + limit). + virtual void predictValues( + ConstAggregateDataPtr /* place */, + IColumn & /*to*/, + Block & /*block*/, + size_t /*offset*/, + size_t /*limit*/, + const ColumnNumbers & /*arguments*/, + const Context & /*context*/) const { throw Exception("Method predictValues is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index f47b9840444..f6a597caba0 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -92,13 +92,21 @@ MutableColumnPtr ColumnAggregateFunction::predictValues(Block & block, const Col auto ML_function = func.get(); if (ML_function) { - size_t row_num = 0; - for (auto val : data) + if (data.size() == 1) { - ML_function->predictValues(val, *res, block, arguments, context); - ++row_num; + /// Case for const column. Predict using single model. + ML_function->predictValues(data[0], *res, block, 0, block.rows(), arguments, context); + } + else + { + /// Case for non-constant column. Use different aggregate function for each row. + size_t row_num = 0; + for (auto val : data) + { + ML_function->predictValues(val, *res, block, row_num, 1, arguments, context); + ++row_num; + } } - } else { diff --git a/dbms/src/Common/Config/ConfigProcessor.cpp b/dbms/src/Common/Config/ConfigProcessor.cpp index 06c1269bd63..04c8bf15c03 100644 --- a/dbms/src/Common/Config/ConfigProcessor.cpp +++ b/dbms/src/Common/Config/ConfigProcessor.cpp @@ -527,7 +527,7 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfig(bool allow_zk_includes XMLDocumentPtr config_xml = processConfig(&has_zk_includes); if (has_zk_includes && !allow_zk_includes) - throw Poco::Exception("Error while loading config `" + path + "': from_zk includes are not allowed!"); + throw Poco::Exception("Error while loading config '" + path + "': from_zk includes are not allowed!"); ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml)); diff --git a/dbms/src/Common/Config/ConfigReloader.cpp b/dbms/src/Common/Config/ConfigReloader.cpp index 063fbec8e5b..0e3cf9d1dba 100644 --- a/dbms/src/Common/Config/ConfigReloader.cpp +++ b/dbms/src/Common/Config/ConfigReloader.cpp @@ -87,7 +87,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac ConfigProcessor::LoadedConfig loaded_config; try { - LOG_DEBUG(log, "Loading config `" << path << "'"); + LOG_DEBUG(log, "Loading config '" << path << "'"); loaded_config = config_processor.loadConfig(/* allow_zk_includes = */ true); if (loaded_config.has_zk_includes) @@ -102,7 +102,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac if (throw_on_error) throw; - tryLogCurrentException(log, "ZooKeeper error when loading config from `" + path + "'"); + tryLogCurrentException(log, "ZooKeeper error when loading config from '" + path + "'"); return; } catch (...) @@ -110,7 +110,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac if (throw_on_error) throw; - tryLogCurrentException(log, "Error loading config from `" + path + "'"); + tryLogCurrentException(log, "Error loading config from '" + path + "'"); return; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); @@ -134,7 +134,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac { if (throw_on_error) throw; - tryLogCurrentException(log, "Error updating configuration from `" + path + "' config."); + tryLogCurrentException(log, "Error updating configuration from '" + path + "' config."); } } } diff --git a/dbms/src/Common/IPv6ToBinary.cpp b/dbms/src/Common/IPv6ToBinary.cpp new file mode 100644 index 00000000000..a17a6f3d9b3 --- /dev/null +++ b/dbms/src/Common/IPv6ToBinary.cpp @@ -0,0 +1,30 @@ +#include "IPv6ToBinary.h" +#include + + +namespace DB +{ + +std::array IPv6ToBinary(const Poco::Net::IPAddress & address) +{ + std::array res; + + if (Poco::Net::IPAddress::IPv6 == address.family()) + { + memcpy(res.data(), address.addr(), 16); + } + else if (Poco::Net::IPAddress::IPv4 == address.family()) + { + /// Convert to IPv6-mapped address. + memset(res.data(), 0, 10); + res[10] = '\xFF'; + res[11] = '\xFF'; + memcpy(&res[12], address.addr(), 4); + } + else + memset(res.data(), 0, 16); + + return res; +} + +} diff --git a/dbms/src/Common/IPv6ToBinary.h b/dbms/src/Common/IPv6ToBinary.h new file mode 100644 index 00000000000..7bc91d6e909 --- /dev/null +++ b/dbms/src/Common/IPv6ToBinary.h @@ -0,0 +1,11 @@ +#include + +namespace Poco { namespace Net { class IPAddress; }} + +namespace DB +{ + +/// Convert IP address to 16-byte array with IPv6 data (big endian). If it's an IPv4, map it to IPv6. +std::array IPv6ToBinary(const Poco::Net::IPAddress & address); + +} diff --git a/dbms/src/Common/OptimizedRegularExpression.h b/dbms/src/Common/OptimizedRegularExpression.h index 729711277b1..ccbd8a517d2 100644 --- a/dbms/src/Common/OptimizedRegularExpression.h +++ b/dbms/src/Common/OptimizedRegularExpression.h @@ -6,7 +6,7 @@ #include #include #if USE_RE2_ST - #include // Y_IGNORE + #include #else #define re2_st re2 #endif diff --git a/dbms/src/Common/StringSearcher.h b/dbms/src/Common/StringSearcher.h index 9e1f2413439..f722ebc6c55 100644 --- a/dbms/src/Common/StringSearcher.h +++ b/dbms/src/Common/StringSearcher.h @@ -329,8 +329,7 @@ class StringSearcher : private StringSearcherBase private: /// string to be searched for const UInt8 * const needle; - const size_t needle_size; - const UInt8 * const needle_end = needle + needle_size; + const UInt8 * const needle_end; /// lower and uppercase variants of the first character in `needle` UInt8 l{}; UInt8 u{}; @@ -345,7 +344,7 @@ private: public: StringSearcher(const char * const needle_, const size_t needle_size) - : needle{reinterpret_cast(needle_)}, needle_size{needle_size} + : needle{reinterpret_cast(needle_)}, needle_end{needle + needle_size} { if (0 == needle_size) return; @@ -430,7 +429,7 @@ public: const UInt8 * search(const UInt8 * haystack, const UInt8 * const haystack_end) const { - if (0 == needle_size) + if (needle == needle_end) return haystack; while (haystack < haystack_end) @@ -528,8 +527,7 @@ class StringSearcher : private StringSearcherBase private: /// string to be searched for const UInt8 * const needle; - const size_t needle_size; - const UInt8 * const needle_end = needle + needle_size; + const UInt8 * const needle_end; /// first character in `needle` UInt8 first{}; @@ -543,7 +541,7 @@ private: public: StringSearcher(const char * const needle_, const size_t needle_size) - : needle{reinterpret_cast(needle_)}, needle_size{needle_size} + : needle{reinterpret_cast(needle_)}, needle_end{needle + needle_size} { if (0 == needle_size) return; @@ -616,7 +614,7 @@ public: const UInt8 * search(const UInt8 * haystack, const UInt8 * const haystack_end) const { - if (0 == needle_size) + if (needle == needle_end) return haystack; while (haystack < haystack_end) @@ -715,10 +713,9 @@ using UTF8CaseInsensitiveStringSearcher = StringSearcher; struct LibCASCIICaseSensitiveStringSearcher { const char * const needle; - const size_t needle_size; - LibCASCIICaseSensitiveStringSearcher(const char * const needle, const size_t needle_size) - : needle(needle), needle_size(needle_size) {} + LibCASCIICaseSensitiveStringSearcher(const char * const needle, const size_t /* needle_size */) + : needle(needle) {} const UInt8 * search(const UInt8 * haystack, const UInt8 * const haystack_end) const { @@ -737,10 +734,9 @@ struct LibCASCIICaseSensitiveStringSearcher struct LibCASCIICaseInsensitiveStringSearcher { const char * const needle; - const size_t needle_size; - LibCASCIICaseInsensitiveStringSearcher(const char * const needle, const size_t needle_size) - : needle(needle), needle_size(needle_size) {} + LibCASCIICaseInsensitiveStringSearcher(const char * const needle, const size_t /* needle_size */) + : needle(needle) {} const UInt8 * search(const UInt8 * haystack, const UInt8 * const haystack_end) const { diff --git a/dbms/src/Common/TypePromotion.h b/dbms/src/Common/TypePromotion.h index 18ac0821b2c..62d2de90e04 100644 --- a/dbms/src/Common/TypePromotion.h +++ b/dbms/src/Common/TypePromotion.h @@ -60,4 +60,4 @@ public: } }; -} // namespace DB +} diff --git a/dbms/src/Common/Volnitsky.h b/dbms/src/Common/Volnitsky.h index bce37e655cd..646ad57aa1f 100644 --- a/dbms/src/Common/Volnitsky.h +++ b/dbms/src/Common/Volnitsky.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -345,6 +344,7 @@ public: auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); }; /// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0 + /// And also adding from the end guarantees that we will find first occurence because we will lookup bigger offsets first. for (auto i = static_cast(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i) VolnitskyTraits::putNGram(this->needle + i, i + 1, this->needle, callback); } @@ -436,94 +436,6 @@ public: fallback_searchers.reserve(needles.size()); } - template - void searchAllPositions( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const AnsCallback & ans_callback, - ResultType & ans) - { - const size_t haystack_string_size = haystack_offsets.size(); - const size_t needles_size = needles.size(); - - /// something can be uninitialized after - std::fill(ans.begin(), ans.end(), 0); - - while (!reset()) - { - size_t fallback_size = fallback_needles.size(); - size_t prev_offset = 0; - for (size_t j = 0, from = 0; j < haystack_string_size; ++j, from += needles_size) - { - const auto * haystack = &haystack_data[prev_offset]; - const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; - for (size_t i = 0; i < fallback_size; ++i) - { - const UInt8 * ptr = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); - if (ptr != haystack_end) - ans[from + fallback_needles[i]] = ans_callback(haystack, ptr); - } - - /// check if we have one non empty volnitsky searcher - if (step != std::numeric_limits::max()) - { - const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram); - for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step) - { - for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off; - cell_num = (cell_num + 1) % VolnitskyTraits::hash_size) - { - if (pos >= haystack + hash[cell_num].off - 1) - { - const auto * res = pos - (hash[cell_num].off - 1); - const size_t ind = hash[cell_num].id; - if (ans[from + ind] == 0 && res + needles[ind].size <= haystack_end) - { - if (fallback_searchers[ind].compare(res)) - { - ans[from + ind] = ans_callback(haystack, res); - } - } - } - } - } - } - prev_offset = haystack_offsets[j]; - } - } - } - - template - void search(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, ResultType & ans) - { - auto callback = [this](const UInt8 * haystack, const UInt8 * haystack_end) -> bool - { - return this->searchOne(haystack, haystack_end); - }; - searchInternal(haystack_data, haystack_offsets, callback, ans); - } - - template - void searchIndex(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, ResultType & ans) - { - auto callback = [this](const UInt8 * haystack, const UInt8 * haystack_end) -> size_t - { - return this->searchOneIndex(haystack, haystack_end); - }; - searchInternal(haystack_data, haystack_offsets, callback, ans); - } - - template - void searchFirstPosition(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const CountCharsCallback & count_chars_callback, ResultType & ans) - { - auto callback = [this, &count_chars_callback](const UInt8 * haystack, const UInt8 * haystack_end) -> UInt64 - { - return this->searchOneFirstPosition(haystack, haystack_end, count_chars_callback); - }; - searchInternal(haystack_data, haystack_offsets, callback, ans); - } - -private: /** * This function is needed to initialize hash table * Returns `true` if there is nothing to initialize @@ -532,15 +444,15 @@ private: * We actually destroy the hash table and initialize it with uninitialized needles * and search through the haystack again. * The actual usage of this function is like this: - * while (!reset()) + * while (hasMoreToSearch()) * { * search inside the haystack with the known needles * } */ - bool reset() + bool hasMoreToSearch() { if (last == needles.size()) - return true; + return false; memset(hash, 0, sizeof(hash)); fallback_needles.clear(); @@ -585,28 +497,7 @@ private: } fallback_searchers.emplace_back(cur_needle_data, cur_needle_size); } - return false; - } - - template - inline void searchInternal( - const ColumnString::Chars & haystack_data, - const ColumnString::Offsets & haystack_offsets, - const OneSearcher & search_fallback, - ResultType & ans) - { - const size_t haystack_string_size = haystack_offsets.size(); - while (!reset()) - { - size_t prev_offset = 0; - for (size_t j = 0; j < haystack_string_size; ++j) - { - const auto * haystack = &haystack_data[prev_offset]; - const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; - ans[j] = search_fallback(haystack, haystack_end); - prev_offset = haystack_offsets[j]; - } - } + return true; } inline bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const @@ -638,7 +529,7 @@ private: return false; } - inline size_t searchOneIndex(const UInt8 * haystack, const UInt8 * haystack_end) const + inline size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const { const size_t fallback_size = fallback_needles.size(); @@ -676,7 +567,7 @@ private: } template - inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & callback) const + inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const { const size_t fallback_size = fallback_needles.size(); @@ -684,7 +575,7 @@ private: for (size_t i = 0; i < fallback_size; ++i) if (auto pos = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); pos != haystack_end) - ans = std::min(ans, callback(haystack, pos)); + ans = std::min(ans, pos - haystack); /// check if we have one non empty volnitsky searcher if (step != std::numeric_limits::max()) @@ -700,14 +591,46 @@ private: const auto res = pos - (hash[cell_num].off - 1); const size_t ind = hash[cell_num].id; if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) - ans = std::min(ans, callback(haystack, res)); + ans = std::min(ans, res - haystack); } } } } if (ans == std::numeric_limits::max()) return 0; - return ans; + return count_chars(haystack, haystack + ans); + } + + template + inline void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * ans, const CountCharsCallback & count_chars) const + { + const size_t fallback_size = fallback_needles.size(); + for (size_t i = 0; i < fallback_size; ++i) + { + const UInt8 * ptr = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); + if (ptr != haystack_end) + ans[fallback_needles[i]] = count_chars(haystack, ptr); + } + + /// check if we have one non empty volnitsky searcher + if (step != std::numeric_limits::max()) + { + const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram); + for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step) + { + for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off; + cell_num = (cell_num + 1) % VolnitskyTraits::hash_size) + { + if (pos >= haystack + hash[cell_num].off - 1) + { + const auto * res = pos - (hash[cell_num].off - 1); + const size_t ind = hash[cell_num].id; + if (ans[ind] == 0 && res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res)) + ans[ind] = count_chars(haystack, res); + } + } + } + } } void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset, const size_t num) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperNodeCache.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperNodeCache.cpp index 2fdd27e83ba..ee4f54dec13 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperNodeCache.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperNodeCache.cpp @@ -36,7 +36,7 @@ ZooKeeperNodeCache::ZNode ZooKeeperNodeCache::get(const std::string & path, Coor zkutil::ZooKeeperPtr zookeeper = get_zookeeper(); if (!zookeeper) - throw DB::Exception("Could not get znode: `" + path + "'. ZooKeeper not configured.", DB::ErrorCodes::NO_ZOOKEEPER); + throw DB::Exception("Could not get znode: '" + path + "'. ZooKeeper not configured.", DB::ErrorCodes::NO_ZOOKEEPER); for (const auto & invalidated_path : invalidated_paths) path_to_cached_znode.erase(invalidated_path); diff --git a/dbms/src/Common/convertMySQLDataType.cpp b/dbms/src/Common/convertMySQLDataType.cpp new file mode 100644 index 00000000000..0db9b8ab2ef --- /dev/null +++ b/dbms/src/Common/convertMySQLDataType.cpp @@ -0,0 +1,65 @@ +#include + +namespace DB +{ + +ASTPtr dataTypeConvertToQuery(const DataTypePtr & data_type) +{ + WhichDataType which(data_type); + + if (!which.isNullable()) + return std::make_shared(data_type->getName()); + + return makeASTFunction("Nullable", dataTypeConvertToQuery(typeid_cast(data_type.get())->getNestedType())); +} + +DataTypePtr convertMySQLDataType(const String & mysql_data_type, bool is_nullable, bool is_unsigned, size_t length) +{ + DataTypePtr res; + if (mysql_data_type == "tinyint") + { + if (is_unsigned) + res = std::make_shared(); + else + res = std::make_shared(); + } + else if (mysql_data_type == "smallint") + { + if (is_unsigned) + res = std::make_shared(); + else + res = std::make_shared(); + } + else if (mysql_data_type == "int" || mysql_data_type == "mediumint") + { + if (is_unsigned) + res = std::make_shared(); + else + res = std::make_shared(); + } + else if (mysql_data_type == "bigint") + { + if (is_unsigned) + res = std::make_shared(); + else + res = std::make_shared(); + } + else if (mysql_data_type == "float") + res = std::make_shared(); + else if (mysql_data_type == "double") + res = std::make_shared(); + else if (mysql_data_type == "date") + res = std::make_shared(); + else if (mysql_data_type == "datetime" || mysql_data_type == "timestamp") + res = std::make_shared(); + else if (mysql_data_type == "binary") + res = std::make_shared(length); + else + /// Also String is fallback for all unknown types. + res = std::make_shared(); + if (is_nullable) + res = std::make_shared(res); + return res; +} + +} diff --git a/dbms/src/Common/convertMySQLDataType.h b/dbms/src/Common/convertMySQLDataType.h new file mode 100644 index 00000000000..8cff8880353 --- /dev/null +++ b/dbms/src/Common/convertMySQLDataType.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +/// Convert data type to query. for example +/// DataTypeUInt8 -> ASTIdentifier(UInt8) +/// DataTypeNullable(DataTypeUInt8) -> ASTFunction(ASTIdentifier(UInt8)) +ASTPtr dataTypeConvertToQuery(const DataTypePtr & data_type); + +/// Convert MySQL type to ClickHouse data type. +DataTypePtr convertMySQLDataType(const String & mysql_data_type, bool is_nullable, bool is_unsigned, size_t length); + +} diff --git a/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp b/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp index b31ae600f3b..de158a51a77 100644 --- a/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -7,7 +7,7 @@ # include namespace DB { namespace ErrorCodes { extern const int CPUID_ERROR; }} #elif USE_CPUINFO -# include // Y_IGNORE +# include #endif diff --git a/dbms/src/DataStreams/NullBlockInputStream.h b/dbms/src/DataStreams/NullBlockInputStream.h index 7598f8a5811..2e4f78899dc 100644 --- a/dbms/src/DataStreams/NullBlockInputStream.h +++ b/dbms/src/DataStreams/NullBlockInputStream.h @@ -21,4 +21,4 @@ private: Block readImpl() override { return {}; } }; -} /// namespace DB +} diff --git a/dbms/src/DataTypes/DataTypeCustom.h b/dbms/src/DataTypes/DataTypeCustom.h index 93882361e20..0706803048d 100644 --- a/dbms/src/DataTypes/DataTypeCustom.h +++ b/dbms/src/DataTypes/DataTypeCustom.h @@ -85,4 +85,4 @@ public: String getName() const override { return name; } }; -} // namespace DB +} diff --git a/dbms/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp b/dbms/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp index 8d12a9847db..559f1f0cc9c 100644 --- a/dbms/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp +++ b/dbms/src/DataTypes/DataTypeCustomIPv4AndIPv6.cpp @@ -98,7 +98,7 @@ public: } }; -} // namespace +} void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory) { @@ -115,4 +115,4 @@ void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory) }); } -} // namespace DB +} diff --git a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp index 44ce27a6e88..5f1e2ae5665 100644 --- a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp +++ b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.cpp @@ -23,7 +23,7 @@ static void deserializeFromString(const DataTypeCustomSimpleTextSerialization & domain.deserializeText(column, istr, settings); } -} // namespace +} namespace DB { @@ -85,4 +85,4 @@ void DataTypeCustomSimpleTextSerialization::serializeTextXML(const IColumn & col writeXMLString(serializeToString(*this, column, row_num, settings), ostr); } -} // namespace DB +} diff --git a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h index fb9be86d95f..ba483b4ff5c 100644 --- a/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h +++ b/dbms/src/DataTypes/DataTypeCustomSimpleTextSerialization.h @@ -50,4 +50,4 @@ public: void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; }; -} // namespace DB +} diff --git a/dbms/src/DataTypes/DataTypesDecimal.cpp b/dbms/src/DataTypes/DataTypesDecimal.cpp index 15f410d2530..9dd811e7aec 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.cpp +++ b/dbms/src/DataTypes/DataTypesDecimal.cpp @@ -212,7 +212,7 @@ DataTypePtr createDecimal(UInt64 precision_value, UInt64 scale_value) throw Exception("Wrong precision", ErrorCodes::ARGUMENT_OUT_OF_BOUND); if (static_cast(scale_value) > precision_value) - throw Exception("Negative scales and scales larger than presicion are not supported", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception("Negative scales and scales larger than precision are not supported", ErrorCodes::ARGUMENT_OUT_OF_BOUND); if (precision_value <= maxDecimalPrecision()) return std::make_shared>(precision_value, scale_value); diff --git a/dbms/src/DataTypes/DataTypesDecimal.h b/dbms/src/DataTypes/DataTypesDecimal.h index d8dcc6b9d8e..ea0d777784d 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.h +++ b/dbms/src/DataTypes/DataTypesDecimal.h @@ -320,12 +320,27 @@ inline std::enable_if_t && IsDataTypeDecimal) { if (!std::isfinite(value)) throw Exception("Decimal convert overflow. Cannot convert infinity or NaN to decimal", ErrorCodes::DECIMAL_OVERFLOW); - return value * ToDataType::getScaleMultiplier(scale); + + auto out = value * ToDataType::getScaleMultiplier(scale); + if constexpr (std::is_same_v) + { + static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; + static constexpr __int128 max_int128 = (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; + if (out < min_int128 || out > max_int128) + throw Exception("Decimal convert overflow. Float is out of Decimal range", ErrorCodes::DECIMAL_OVERFLOW); + } + else + { + if (out < std::numeric_limits::min() || out > std::numeric_limits::max()) + throw Exception("Decimal convert overflow. Float is out of Decimal range", ErrorCodes::DECIMAL_OVERFLOW); + } + return out; } else { diff --git a/dbms/src/DataTypes/Native.h b/dbms/src/DataTypes/Native.h index 26191c17cbe..e8e54c3a9ab 100644 --- a/dbms/src/DataTypes/Native.h +++ b/dbms/src/DataTypes/Native.h @@ -16,7 +16,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include // Y_IGNORE +#include #pragma GCC diagnostic pop diff --git a/dbms/src/Databases/DatabaseFactory.cpp b/dbms/src/Databases/DatabaseFactory.cpp index 0b5f8c0643f..4c998e772b9 100644 --- a/dbms/src/Databases/DatabaseFactory.cpp +++ b/dbms/src/Databases/DatabaseFactory.cpp @@ -2,22 +2,44 @@ #include #include #include +#include +#include +#include +#include +#include + +#if USE_MYSQL + +#include + +#endif + namespace DB { namespace ErrorCodes { - extern const int UNKNOWN_DATABASE_ENGINE; +extern const int BAD_ARGUMENTS; +extern const int UNKNOWN_DATABASE_ENGINE; } - DatabasePtr DatabaseFactory::get( - const String & engine_name, const String & database_name, const String & metadata_path, + const ASTStorage * engine_define, Context & context) { + String engine_name = engine_define->engine->name; + + if (engine_name != "MySQL" && engine_define->engine->arguments) + throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS); + + if (engine_define->engine->parameters || engine_define->partition_by || engine_define->primary_key || engine_define->order_by || + engine_define->sample_by || engine_define->settings) + throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings", + ErrorCodes::UNKNOWN_ELEMENT_IN_AST); + if (engine_name == "Ordinary") return std::make_shared(database_name, metadata_path, context); else if (engine_name == "Memory") @@ -25,6 +47,29 @@ DatabasePtr DatabaseFactory::get( else if (engine_name == "Dictionary") return std::make_shared(database_name); +#if USE_MYSQL + + else if (engine_name == "MySQL") + { + const ASTFunction * engine = engine_define->engine; + const auto & arguments = engine->arguments->children; + + if (arguments.size() != 4) + throw Exception("MySQL Database require mysql_hostname, mysql_database_name, mysql_username, mysql_password arguments.", + ErrorCodes::BAD_ARGUMENTS); + + const auto & mysql_host_name = arguments[0]->as()->value.safeGet(); + const auto & mysql_database_name = arguments[1]->as()->value.safeGet(); + const auto & mysql_user_name = arguments[2]->as()->value.safeGet(); + const auto & mysql_user_password = arguments[3]->as()->value.safeGet(); + + auto parsed_host_port = parseAddress(mysql_host_name, 3306); + return std::make_shared(context, database_name, parsed_host_port.first, parsed_host_port.second, mysql_database_name, + mysql_user_name, mysql_user_password); + } + +#endif + throw Exception("Unknown database engine: " + engine_name, ErrorCodes::UNKNOWN_DATABASE_ENGINE); } diff --git a/dbms/src/Databases/DatabaseFactory.h b/dbms/src/Databases/DatabaseFactory.h index bb912ca377b..0fab3e2307a 100644 --- a/dbms/src/Databases/DatabaseFactory.h +++ b/dbms/src/Databases/DatabaseFactory.h @@ -3,17 +3,18 @@ #include #include - namespace DB { +class ASTStorage; + class DatabaseFactory { public: static DatabasePtr get( - const String & engine_name, const String & database_name, const String & metadata_path, + const ASTStorage * engine_define, Context & context); }; diff --git a/dbms/src/Databases/DatabaseMySQL.cpp b/dbms/src/Databases/DatabaseMySQL.cpp new file mode 100644 index 00000000000..f5b2e2aec19 --- /dev/null +++ b/dbms/src/Databases/DatabaseMySQL.cpp @@ -0,0 +1,397 @@ +#include + +#if USE_MYSQL + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_TABLE; +} + +static constexpr const std::chrono::seconds cleaner_sleep_time{30}; + +String toQueryStringWithQuote(const std::vector & quote_list) +{ + WriteBufferFromOwnString quote_list_query; + quote_list_query << "("; + + for (size_t index = 0; index < quote_list.size(); ++index) + { + if (index) + quote_list_query << ","; + + quote_list_query << quote << quote_list[index]; + } + + quote_list_query << ")"; + return quote_list_query.str(); +} + +DatabaseMySQL::DatabaseMySQL( + const Context & context_, const String & database_name_, const String & mysql_host_name_, const UInt16 & mysql_port_, + const String & mysql_database_name_, const String & mysql_user_name_, const String & mysql_user_password_) + : global_context(context_), database_name(database_name_), mysql_host_name(mysql_host_name_), mysql_port(mysql_port_), + mysql_database_name(mysql_database_name_), mysql_user_name(mysql_user_name_), mysql_user_password(mysql_user_password_), + mysql_pool(mysql_database_name, mysql_host_name, mysql_user_name, mysql_user_password, mysql_port) +{ +} + +bool DatabaseMySQL::empty(const Context &) const +{ + std::lock_guard lock(mutex); + + fetchTablesIntoLocalCache(); + + return local_tables_cache.empty(); +} + +DatabaseIteratorPtr DatabaseMySQL::getIterator(const Context &) +{ + Tables tables; + std::lock_guard lock(mutex); + + fetchTablesIntoLocalCache(); + + for (const auto & local_table : local_tables_cache) + tables[local_table.first] = local_table.second.storage; + + return std::make_unique(tables); +} + +bool DatabaseMySQL::isTableExist(const Context & context, const String & name) const +{ + return bool(tryGetTable(context, name)); +} + +StoragePtr DatabaseMySQL::tryGetTable(const Context &, const String & mysql_table_name) const +{ + std::lock_guard lock(mutex); + + fetchTablesIntoLocalCache(); + + if (local_tables_cache.find(mysql_table_name) != local_tables_cache.end()) + return local_tables_cache[mysql_table_name].storage; + + return StoragePtr{}; +} + +ASTPtr DatabaseMySQL::tryGetCreateTableQuery(const Context &, const String & table_name) const +{ + std::lock_guard lock(mutex); + + fetchTablesIntoLocalCache(); + + if (local_tables_cache.find(table_name) == local_tables_cache.end()) + throw Exception("MySQL table " + mysql_database_name + "." + table_name + " doesn't exist..", ErrorCodes::UNKNOWN_TABLE); + + return local_tables_cache[table_name].create_table_query; +} + +time_t DatabaseMySQL::getTableMetadataModificationTime(const Context &, const String & table_name) +{ + std::lock_guard lock(mutex); + + fetchTablesIntoLocalCache(); + + if (local_tables_cache.find(table_name) == local_tables_cache.end()) + throw Exception("MySQL table " + mysql_database_name + "." + table_name + " doesn't exist..", ErrorCodes::UNKNOWN_TABLE); + + return time_t(local_tables_cache[table_name].modification_time); +} + +ASTPtr DatabaseMySQL::getCreateDatabaseQuery(const Context &) const +{ + const auto & create_query = std::make_shared(); + create_query->database = database_name; + + const auto & storage = std::make_shared(); + storage->set(storage->engine, makeASTFunction("MySQL", + std::make_shared(mysql_host_name + ":" + toString(mysql_port)), std::make_shared(mysql_database_name), + std::make_shared(mysql_user_name), std::make_shared(mysql_user_password))); + + create_query->set(create_query->storage, storage); + return create_query; +} + +void DatabaseMySQL::fetchTablesIntoLocalCache() const +{ + const auto & tables_with_modification_time = fetchTablesWithModificationTime(); + + destroyLocalCacheExtraTables(tables_with_modification_time); + fetchLatestTablesStructureIntoCache(tables_with_modification_time); +} + +void DatabaseMySQL::destroyLocalCacheExtraTables(const std::map & tables_with_modification_time) const +{ + for (auto iterator = local_tables_cache.begin(); iterator != local_tables_cache.end();) + { + if (tables_with_modification_time.find(iterator->first) != tables_with_modification_time.end()) + ++iterator; + else + { + outdated_tables.emplace_back(iterator->second.storage); + iterator = local_tables_cache.erase(iterator); + } + } +} + +void DatabaseMySQL::fetchLatestTablesStructureIntoCache(const std::map &tables_modification_time) const +{ + std::vector wait_update_tables_name; + for (const auto & table_modification_time : tables_modification_time) + { + const auto & it = local_tables_cache.find(table_modification_time.first); + + /// Outdated or new table structures + if (it == local_tables_cache.end() || table_modification_time.second > it->second.modification_time) + wait_update_tables_name.emplace_back(table_modification_time.first); + } + + std::map tables_and_columns = fetchTablesColumnsList(wait_update_tables_name); + + for (const auto & table_and_columns : tables_and_columns) + { + const auto & table_name = table_and_columns.first; + const auto & columns_name_and_type = table_and_columns.second; + const auto & table_modification_time = tables_modification_time.at(table_name); + + const auto & iterator = local_tables_cache.find(table_name); + if (iterator != local_tables_cache.end()) + { + outdated_tables.emplace_back(iterator->second.storage); + local_tables_cache.erase(iterator); + } + + local_tables_cache[table_name] = createStorageInfo(table_name, columns_name_and_type, table_modification_time); + } +} + +static ASTPtr getTableColumnsCreateQuery(const NamesAndTypesList & names_and_types_list) +{ + const auto & table_columns_list_ast = std::make_shared(); + const auto & columns_expression_list = std::make_shared(); + + for (const auto & table_column_name_and_type : names_and_types_list) + { + const auto & column_declaration = std::make_shared(); + column_declaration->name = table_column_name_and_type.name; + column_declaration->type = dataTypeConvertToQuery(table_column_name_and_type.type); + columns_expression_list->children.emplace_back(column_declaration); + } + + table_columns_list_ast->set(table_columns_list_ast->columns, columns_expression_list); + return table_columns_list_ast; +} + +static ASTPtr getTableStorageCreateQuery( + const String & host_name, const UInt16 & port, + const String & database_name, const String & table_name, + const String & user_name, const String & password) +{ + const auto & table_storage = std::make_shared(); + const auto & storage_engine = std::make_shared(); + + storage_engine->name = "MySQL"; + storage_engine->arguments = std::make_shared(); + storage_engine->children.push_back(storage_engine->arguments); + + storage_engine->arguments->children = { + std::make_shared(host_name + ":" + toString(port)), + std::make_shared(database_name), std::make_shared(table_name), + std::make_shared(user_name), std::make_shared(password) + }; + + + table_storage->set(table_storage->engine, storage_engine); + return table_storage; +} + +DatabaseMySQL::MySQLStorageInfo DatabaseMySQL::createStorageInfo( + const String & table_name, const NamesAndTypesList & columns_name_and_type, const UInt64 & table_modification_time) const +{ + const auto & mysql_table = StorageMySQL::create( + table_name, std::move(mysql_pool), mysql_database_name, table_name, + false, "", ColumnsDescription{columns_name_and_type}, global_context); + + const auto & create_table_query = std::make_shared(); + + create_table_query->table = table_name; + create_table_query->database = database_name; + create_table_query->set(create_table_query->columns_list, getTableColumnsCreateQuery(columns_name_and_type)); + create_table_query->set(create_table_query->storage, getTableStorageCreateQuery( + mysql_host_name, mysql_port, mysql_database_name, table_name, mysql_user_name, mysql_user_password)); + + MySQLStorageInfo storage_info; + storage_info.storage = mysql_table; + storage_info.create_table_query = create_table_query; + storage_info.modification_time = table_modification_time; + + return storage_info; +} + +std::map DatabaseMySQL::fetchTablesWithModificationTime() const +{ + Block tables_status_sample_block + { + { std::make_shared(), "table_name" }, + { std::make_shared(), "modification_time" }, + }; + + WriteBufferFromOwnString query; + query << "SELECT" + " TABLE_NAME AS table_name, " + " CREATE_TIME AS modification_time " + " FROM INFORMATION_SCHEMA.TABLES " + " WHERE TABLE_SCHEMA = " << quote << mysql_database_name; + + std::map tables_with_modification_time; + MySQLBlockInputStream result(mysql_pool.Get(), query.str(), tables_status_sample_block, DEFAULT_BLOCK_SIZE); + + while (Block block = result.read()) + { + size_t rows = block.rows(); + for (size_t index = 0; index < rows; ++index) + { + String table_name = (*block.getByPosition(0).column)[index].safeGet(); + tables_with_modification_time[table_name] = (*block.getByPosition(1).column)[index].safeGet(); + } + } + + return tables_with_modification_time; +} + +std::map DatabaseMySQL::fetchTablesColumnsList(const std::vector & tables_name) const +{ + std::map tables_and_columns; + + if (tables_name.empty()) + return tables_and_columns; + + Block tables_columns_sample_block + { + { std::make_shared(), "table_name" }, + { std::make_shared(), "column_name" }, + { std::make_shared(), "column_type" }, + { std::make_shared(), "is_nullable" }, + { std::make_shared(), "is_unsigned" }, + { std::make_shared(), "length" }, + }; + + WriteBufferFromOwnString query; + query << "SELECT " + " TABLE_NAME AS table_name," + " COLUMN_NAME AS column_name," + " DATA_TYPE AS column_type," + " IS_NULLABLE = 'YES' AS is_nullable," + " COLUMN_TYPE LIKE '%unsigned' AS is_unsigned," + " CHARACTER_MAXIMUM_LENGTH AS length" + " FROM INFORMATION_SCHEMA.COLUMNS" + " WHERE TABLE_SCHEMA = " << quote << mysql_database_name + << " AND TABLE_NAME IN " << toQueryStringWithQuote(tables_name) << " ORDER BY ORDINAL_POSITION"; + + const auto & external_table_functions_use_nulls = global_context.getSettings().external_table_functions_use_nulls; + MySQLBlockInputStream result(mysql_pool.Get(), query.str(), tables_columns_sample_block, DEFAULT_BLOCK_SIZE); + while (Block block = result.read()) + { + size_t rows = block.rows(); + for (size_t i = 0; i < rows; ++i) + { + String table_name = (*block.getByPosition(0).column)[i].safeGet(); + tables_and_columns[table_name].emplace_back((*block.getByPosition(1).column)[i].safeGet(), + convertMySQLDataType( + (*block.getByPosition(2).column)[i].safeGet(), + (*block.getByPosition(3).column)[i].safeGet() && + external_table_functions_use_nulls, + (*block.getByPosition(4).column)[i].safeGet(), + (*block.getByPosition(5).column)[i].safeGet())); + } + } + return tables_and_columns; +} + +void DatabaseMySQL::shutdown() +{ + std::map tables_snapshot; + { + std::lock_guard lock(mutex); + tables_snapshot = local_tables_cache; + } + + for (const auto & table_snapshot : tables_snapshot) + table_snapshot.second.storage->shutdown(); + + std::lock_guard lock(mutex); + local_tables_cache.clear(); +} + +void DatabaseMySQL::cleanOutdatedTables() +{ + setThreadName("MySQLDBCleaner"); + + std::unique_lock lock{mutex}; + + while (!quit.load(std::memory_order_relaxed)) + { + for (auto iterator = outdated_tables.begin(); iterator != outdated_tables.end();) + { + if (!iterator->unique()) + ++iterator; + else + { + const auto table_lock = (*iterator)->lockAlterIntention(RWLockImpl::NO_QUERY); + + (*iterator)->shutdown(); + (*iterator)->is_dropped = true; + iterator = outdated_tables.erase(iterator); + } + } + + cond.wait_for(lock, cleaner_sleep_time); + } +} + +DatabaseMySQL::~DatabaseMySQL() +{ + try + { + if (!quit) + { + { + quit = true; + std::lock_guard lock{mutex}; + } + cond.notify_one(); + thread.join(); + } + + shutdown(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + +} + +#endif diff --git a/dbms/src/Databases/DatabaseMySQL.h b/dbms/src/Databases/DatabaseMySQL.h new file mode 100644 index 00000000000..3e89b395208 --- /dev/null +++ b/dbms/src/Databases/DatabaseMySQL.h @@ -0,0 +1,125 @@ +#pragma once + +#include + +#if USE_MYSQL + +#include +#include + +namespace DB +{ + +/** Real-time access to table list and table structure from remote MySQL + * It doesn't make any manipulations with filesystem. + * All tables are created by calling code after real-time pull-out structure from remote MySQL + */ +class DatabaseMySQL : public IDatabase +{ +public: + ~DatabaseMySQL() override; + + DatabaseMySQL(const Context & context_, const String & database_name_, const String & mysql_host_name_, const UInt16 & mysql_port_, + const String & mysql_database_name_, const String & mysql_user_name_, const String & mysql_user_password_); + + String getEngineName() const override { return "MySQL"; } + + String getDatabaseName() const override { return database_name; } + + bool empty(const Context & context) const override; + + DatabaseIteratorPtr getIterator(const Context & context) override; + + ASTPtr getCreateDatabaseQuery(const Context & context) const override; + + bool isTableExist(const Context & context, const String & name) const override; + + StoragePtr tryGetTable(const Context & context, const String & name) const override; + + ASTPtr tryGetCreateTableQuery(const Context & context, const String & name) const override; + + time_t getTableMetadataModificationTime(const Context & context, const String & name) override; + + void shutdown() override; + + StoragePtr detachTable(const String &) override + { + throw Exception("MySQL database engine does not support detach table.", ErrorCodes::NOT_IMPLEMENTED); + } + + void loadTables(Context &, ThreadPool *, bool) override + { + /// do nothing + } + + void removeTable(const Context &, const String &) override + { + throw Exception("MySQL database engine does not support remove table.", ErrorCodes::NOT_IMPLEMENTED); + } + + void attachTable(const String &, const StoragePtr &) override + { + throw Exception("MySQL database engine does not support attach table.", ErrorCodes::NOT_IMPLEMENTED); + } + + void renameTable(const Context &, const String &, IDatabase &, const String &) override + { + throw Exception("MySQL database engine does not support rename table.", ErrorCodes::NOT_IMPLEMENTED); + } + + void createTable(const Context &, const String &, const StoragePtr &, const ASTPtr &) override + { + throw Exception("MySQL database engine does not support create table.", ErrorCodes::NOT_IMPLEMENTED); + } + + void alterTable(const Context &, const String &, const ColumnsDescription &, const IndicesDescription &, const ASTModifier &) override + { + throw Exception("MySQL database engine does not support alter table.", ErrorCodes::NOT_IMPLEMENTED); + } + +private: + struct MySQLStorageInfo + { + StoragePtr storage; + UInt64 modification_time; + ASTPtr create_table_query; + }; + + const Context global_context; + const String database_name; + const String mysql_host_name; + const UInt16 mysql_port; + const String mysql_database_name; + const String mysql_user_name; + const String mysql_user_password; + + mutable std::mutex mutex; + std::atomic quit{false}; + std::condition_variable cond; + + mutable mysqlxx::Pool mysql_pool; + mutable std::vector outdated_tables; + mutable std::map local_tables_cache; + + + void cleanOutdatedTables(); + + void fetchTablesIntoLocalCache() const; + + std::map fetchTablesWithModificationTime() const; + + DatabaseMySQL::MySQLStorageInfo createStorageInfo( + const String & table_name, const NamesAndTypesList & columns_name_and_type, const UInt64 & table_modification_time) const; + + std::map fetchTablesColumnsList(const std::vector & tables_name) const; + + void destroyLocalCacheExtraTables(const std::map & tables_with_modification_time) const; + + void fetchLatestTablesStructureIntoCache(const std::map & tables_modification_time) const; + + ThreadFromGlobalPool thread{&DatabaseMySQL::cleanOutdatedTables, this}; +}; + +} + +#endif diff --git a/dbms/src/Dictionaries/DictionaryStructure.cpp b/dbms/src/Dictionaries/DictionaryStructure.cpp index 4fa8f32024d..d43b749935f 100644 --- a/dbms/src/Dictionaries/DictionaryStructure.cpp +++ b/dbms/src/Dictionaries/DictionaryStructure.cpp @@ -37,7 +37,7 @@ namespace return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)}; } -} // namespace +} AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type) diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp index 60ba5ed7408..67180921a93 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.cpp @@ -69,5 +69,5 @@ IRegionsHierarchyDataSourcePtr RegionsHierarchiesDataProvider::getHierarchySourc return std::make_shared(hierarchy_file_path); } - throw Poco::Exception("Regions hierarchy `" + name + "` not found"); + throw Poco::Exception("Regions hierarchy '" + name + "' not found"); } diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.cpp b/dbms/src/Dictionaries/RangeHashedDictionary.cpp index b1cae6956f2..ac509b4d1e5 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp +++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp @@ -39,7 +39,7 @@ const DB::IColumn & unwrapNullableColumn(const DB::IColumn & column) return column; } -} // namespace +} namespace DB { diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 414a25cf39c..6770c54a9fd 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -3,13 +3,13 @@ #include #include -#include // Y_IGNORE +#include #include #include #include -#include // Y_IGNORE -#include // Y_IGNORE -#include // Y_IGNORE +#include +#include +#include #include #include #include diff --git a/dbms/src/Formats/PrettySpaceBlockOutputStream.cpp b/dbms/src/Formats/PrettySpaceBlockOutputStream.cpp index 6cad669fd49..b210af2551a 100644 --- a/dbms/src/Formats/PrettySpaceBlockOutputStream.cpp +++ b/dbms/src/Formats/PrettySpaceBlockOutputStream.cpp @@ -7,7 +7,6 @@ namespace DB { - void PrettySpaceBlockOutputStream::write(const Block & block) { UInt64 max_rows = format_settings.pretty.max_rows; @@ -26,12 +25,6 @@ void PrettySpaceBlockOutputStream::write(const Block & block) Widths name_widths; calculateWidths(block, widths, max_widths, name_widths, format_settings); - /// Do not align on too long values. - if (terminal_width > 80) - for (size_t i = 0; i < columns; ++i) - if (max_widths[i] > terminal_width / 2) - max_widths[i] = terminal_width / 2; - /// Names for (size_t i = 0; i < columns; ++i) { diff --git a/dbms/src/Formats/ProtobufSchemas.cpp b/dbms/src/Formats/ProtobufSchemas.cpp index 80b71e9797f..c443ea221c6 100644 --- a/dbms/src/Formats/ProtobufSchemas.cpp +++ b/dbms/src/Formats/ProtobufSchemas.cpp @@ -2,8 +2,8 @@ #if USE_PROTOBUF #include -#include // Y_IGNORE -#include // Y_IGNORE +#include +#include #include diff --git a/dbms/src/Formats/ProtobufWriter.cpp b/dbms/src/Formats/ProtobufWriter.cpp index 815786fab0b..dd4ba7ff6df 100644 --- a/dbms/src/Formats/ProtobufWriter.cpp +++ b/dbms/src/Formats/ProtobufWriter.cpp @@ -7,8 +7,8 @@ #include #include #include -#include // Y_IGNORE -#include // Y_IGNORE +#include +#include #include #include #include "ProtobufWriter.h" diff --git a/dbms/src/Functions/FunctionBase64Conversion.h b/dbms/src/Functions/FunctionBase64Conversion.h index dfd3a05ce47..5ee6ff8f257 100644 --- a/dbms/src/Functions/FunctionBase64Conversion.h +++ b/dbms/src/Functions/FunctionBase64Conversion.h @@ -7,7 +7,7 @@ #include #include #include -#include // Y_IGNORE +#include namespace DB diff --git a/dbms/src/Functions/FunctionBinaryArithmetic.h b/dbms/src/Functions/FunctionBinaryArithmetic.h index 9ccb8fbbd59..9faa2ee51e6 100644 --- a/dbms/src/Functions/FunctionBinaryArithmetic.h +++ b/dbms/src/Functions/FunctionBinaryArithmetic.h @@ -24,7 +24,7 @@ #if USE_EMBEDDED_COMPILER #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include // Y_IGNORE +#include #pragma GCC diagnostic pop #endif diff --git a/dbms/src/Functions/FunctionMathBinaryFloat64.h b/dbms/src/Functions/FunctionMathBinaryFloat64.h index 86766aef2cc..af7c8f97eff 100644 --- a/dbms/src/Functions/FunctionMathBinaryFloat64.h +++ b/dbms/src/Functions/FunctionMathBinaryFloat64.h @@ -22,8 +22,8 @@ #pragma clang diagnostic ignored "-Wshift-negative-value" #endif - #include // Y_IGNORE - #include // Y_IGNORE + #include + #include #ifdef __clang__ #pragma clang diagnostic pop diff --git a/dbms/src/Functions/FunctionMathUnaryFloat64.h b/dbms/src/Functions/FunctionMathUnaryFloat64.h index 12ecd3e2d0e..82ae17a0bb6 100644 --- a/dbms/src/Functions/FunctionMathUnaryFloat64.h +++ b/dbms/src/Functions/FunctionMathUnaryFloat64.h @@ -21,9 +21,9 @@ #pragma clang diagnostic ignored "-Wshift-negative-value" #endif - #include // Y_IGNORE - #include // Y_IGNORE - #include // Y_IGNORE + #include + #include + #include #ifdef __clang__ #pragma clang diagnostic pop diff --git a/dbms/src/Functions/FunctionUnaryArithmetic.h b/dbms/src/Functions/FunctionUnaryArithmetic.h index 8cc02420a26..243b9813086 100644 --- a/dbms/src/Functions/FunctionUnaryArithmetic.h +++ b/dbms/src/Functions/FunctionUnaryArithmetic.h @@ -13,7 +13,7 @@ #if USE_EMBEDDED_COMPILER #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -#include // Y_IGNORE +#include #pragma GCC diagnostic pop #endif diff --git a/dbms/src/Functions/FunctionsBitmap.cpp b/dbms/src/Functions/FunctionsBitmap.cpp index c3763124bcf..d4367ae2c64 100644 --- a/dbms/src/Functions/FunctionsBitmap.cpp +++ b/dbms/src/Functions/FunctionsBitmap.cpp @@ -23,5 +23,6 @@ void registerFunctionsBitmap(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } } diff --git a/dbms/src/Functions/FunctionsBitmap.h b/dbms/src/Functions/FunctionsBitmap.h index 17b729ecf12..e4696c69c3a 100644 --- a/dbms/src/Functions/FunctionsBitmap.h +++ b/dbms/src/Functions/FunctionsBitmap.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,15 @@ namespace ErrorCodes * * Two bitmap andnot calculation, return cardinality: * bitmapAndnotCardinality: bitmap,bitmap -> integer + * + * Determine if a bitmap contains the given integer: + * bitmapContains: bitmap,integer -> bool + * + * Judge if a bitmap is superset of the another one: + * bitmapHasAll: bitmap,bitmap -> bool + * + * Judge if the intersection of two bitmap is nonempty: + * bitmapHasAny: bitmap,bitmap -> bool */ template @@ -362,6 +372,95 @@ struct BitmapHasAnyImpl } }; +class FunctionBitmapContains : public IFunction +{ +public: + static constexpr auto name = "bitmapContains"; + + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return false; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + auto bitmap_type0 = typeid_cast(arguments[0].get()); + if (!(bitmap_type0 && bitmap_type0->getFunctionName() == AggregateFunctionGroupBitmapData::name())) + throw Exception( + "First argument for function " + getName() + " must be an bitmap but it has type " + arguments[0]->getName() + ".", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + auto arg_type1 = typeid_cast *>(arguments[1].get()); + if (!(arg_type1)) + throw Exception( + "Second argument for function " + getName() + " must be UInt32 but it has type " + arguments[1]->getName() + ".", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared>(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + auto col_to = ColumnVector::create(input_rows_count); + typename ColumnVector::Container & vec_to = col_to->getData(); + const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); + + const DataTypeAggregateFunction * aggr_type = typeid_cast(from_type); + WhichDataType which(aggr_type->getArgumentsDataTypes()[0]); + if (which.isUInt8()) + executeIntType(block, arguments, input_rows_count, vec_to); + else if (which.isUInt16()) + executeIntType(block, arguments, input_rows_count, vec_to); + else if (which.isUInt32()) + executeIntType(block, arguments, input_rows_count, vec_to); + else if (which.isUInt64()) + executeIntType(block, arguments, input_rows_count, vec_to); + else + throw Exception( + "Unexpected type " + from_type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + block.getByPosition(result).column = std::move(col_to); + } + +private: + template + void executeIntType( + Block & block, const ColumnNumbers & arguments, size_t input_rows_count, typename ColumnVector::Container & vec_to) + { + const IColumn * columns[2]; + bool isColumnConst[2]; + const PaddedPODArray * container0; + const PaddedPODArray * container1; + + for (size_t i = 0; i < 2; ++i) + { + columns[i] = block.getByPosition(arguments[i]).column.get(); + isColumnConst[i] = typeid_cast(columns[i])!=nullptr; + } + if (isColumnConst[0]) + container0 = &typeid_cast(typeid_cast(columns[0])->getDataColumnPtr().get())->getData(); + else + container0 = &typeid_cast(columns[0])->getData(); + if (isColumnConst[1]) + container1 = &typeid_cast(typeid_cast(columns[1])->getDataColumnPtr().get())->getData(); + else + container1 = &typeid_cast(columns[1])->getData(); + + for (size_t i = 0; i < input_rows_count; ++i) + { + const AggregateDataPtr dataPtr0 = isColumnConst[0] ? (*container0)[0] : (*container0)[i]; + const UInt32 data1 = isColumnConst[1] ? (*container1)[0] : (*container1)[i]; + const AggregateFunctionGroupBitmapData& bd0 + = *reinterpret_cast*>(dataPtr0); + vec_to[i] = bd0.rbs.rb_contains(data1); + } + } +}; + template