mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge branch 'master' into brotli
This commit is contained in:
commit
763b3dcf73
30
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
30
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
---
|
||||||
|
name: Bug report
|
||||||
|
about: Create a report to help us improve ClickHouse
|
||||||
|
title: ''
|
||||||
|
labels: bug, issue
|
||||||
|
assignees: ''
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
(you don't have to strictly follow this form)
|
||||||
|
|
||||||
|
**Describe the bug**
|
||||||
|
A clear and concise description of what the bug is.
|
||||||
|
|
||||||
|
**How to reproduce**
|
||||||
|
* Which ClickHouse server version to use
|
||||||
|
* Which interface to use, if matters
|
||||||
|
* Non-default settings, if any
|
||||||
|
* `CREATE TABLE` statements for all tables involved
|
||||||
|
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/yandex/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
|
||||||
|
* Queries to run that lead to unexpected result
|
||||||
|
|
||||||
|
**Expected behavior**
|
||||||
|
A clear and concise description of what you expected to happen.
|
||||||
|
|
||||||
|
**Error message and/or stacktrace**
|
||||||
|
If applicable, add screenshots to help explain your problem.
|
||||||
|
|
||||||
|
**Additional context**
|
||||||
|
Add any other context about the problem here.
|
21
.github/ISSUE_TEMPLATE/build-issue.md
vendored
Normal file
21
.github/ISSUE_TEMPLATE/build-issue.md
vendored
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
---
|
||||||
|
name: Build issue
|
||||||
|
about: Report failed ClickHouse build from master
|
||||||
|
title: ''
|
||||||
|
labels: build
|
||||||
|
assignees: ''
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.yandex/docs/en/development/build/
|
||||||
|
|
||||||
|
**Operating system**
|
||||||
|
OS kind or distribution, specific version/release, non-standard kernel if any. If you are trying to build inside virtual machine, please mention it too.
|
||||||
|
|
||||||
|
**Cmake version**
|
||||||
|
|
||||||
|
**Ninja version**
|
||||||
|
|
||||||
|
**Compiler name and version**
|
||||||
|
|
||||||
|
**Full cmake and/or ninja output**
|
22
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
22
.github/ISSUE_TEMPLATE/feature_request.md
vendored
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
---
|
||||||
|
name: Feature request
|
||||||
|
about: Suggest an idea for ClickHouse
|
||||||
|
title: ''
|
||||||
|
labels: feature
|
||||||
|
assignees: ''
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
(you don't have to strictly follow this form)
|
||||||
|
|
||||||
|
**Use case**
|
||||||
|
A clear and concise description of what is the intended usage scenario is.
|
||||||
|
|
||||||
|
**Describe the solution you'd like**
|
||||||
|
A clear and concise description of what you want to happen.
|
||||||
|
|
||||||
|
**Describe alternatives you've considered**
|
||||||
|
A clear and concise description of any alternative solutions or features you've considered.
|
||||||
|
|
||||||
|
**Additional context**
|
||||||
|
Add any other context or screenshots about the feature request here.
|
12
.github/ISSUE_TEMPLATE/question.md
vendored
Normal file
12
.github/ISSUE_TEMPLATE/question.md
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
---
|
||||||
|
name: Question
|
||||||
|
about: Ask question about ClickHouse
|
||||||
|
title: ''
|
||||||
|
labels: question
|
||||||
|
assignees: ''
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Make sure to check documentation https://clickhouse.yandex/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
|
||||||
|
|
||||||
|
If you still prefer GitHub issues, remove all this text and ask your question here.
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -243,3 +243,5 @@ website/package-lock.json
|
|||||||
|
|
||||||
# ccls cache
|
# ccls cache
|
||||||
/.ccls-cache
|
/.ccls-cache
|
||||||
|
|
||||||
|
/compile_commands.json
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [#3826](https://github.com/yandex/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov))
|
* Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [#3826](https://github.com/yandex/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov))
|
||||||
* Support for write in `HDFS` tables and `hdfs` table function. [#4084](https://github.com/yandex/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin))
|
* Support for write in `HDFS` tables and `hdfs` table function. [#4084](https://github.com/yandex/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin))
|
||||||
* Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [#4053](https://github.com/yandex/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1))
|
* Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [#4053](https://github.com/yandex/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1))
|
||||||
* Pruning of unused shards if `SELECT` query filters by sharding key (setting `distributed_optimize_skip_select_on_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([Ivan](https://github.com/abyss7))
|
* Pruning of unused shards if `SELECT` query filters by sharding key (setting `optimize_skip_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([Gleb Kanterov](https://github.com/kanterov), [Ivan](https://github.com/abyss7))
|
||||||
* Allow `Kafka` engine to ignore some number of parsing errors per block. [#4094](https://github.com/yandex/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7))
|
* Allow `Kafka` engine to ignore some number of parsing errors per block. [#4094](https://github.com/yandex/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7))
|
||||||
* Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/yandex/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai))
|
* Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/yandex/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai))
|
||||||
* Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/yandex/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea))
|
* Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/yandex/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea))
|
||||||
|
109
CHANGELOG_RU.md
109
CHANGELOG_RU.md
@ -1,3 +1,112 @@
|
|||||||
|
## ClickHouse release 19.1.6, 2019-01-24
|
||||||
|
|
||||||
|
### Новые возможности:
|
||||||
|
|
||||||
|
* Задание формата сжатия для отдельных столбцов. [#3899](https://github.com/yandex/ClickHouse/pull/3899) [#4111](https://github.com/yandex/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin), [Winter Zhang](https://github.com/zhang2014), [Anatoly](https://github.com/Sindbag))
|
||||||
|
* Формат сжатия `Delta`. [#4052](https://github.com/yandex/ClickHouse/pull/4052) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Изменение формата сжатия запросом `ALTER`. [#4054](https://github.com/yandex/ClickHouse/pull/4054) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Добавлены функции `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` для совместимости со стандартом SQL. [#3826](https://github.com/yandex/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov))
|
||||||
|
* Поддержка записи в движок `HDFS` и табличную функцию `hdfs`. [#4084](https://github.com/yandex/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Добавлены функции поиска набора константных строк в тексте: `multiPosition`, `multiSearch` ,`firstMatch` также с суффиксами `-UTF8`, `-CaseInsensitive`, и `-CaseInsensitiveUTF8`. [#4053](https://github.com/yandex/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1))
|
||||||
|
* Пропуск неиспользуемых шардов в случае, если запрос `SELECT` содержит фильтрацию по ключу шардирования (настройка `optimize_skip_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([Gleb Kanterov](https://github.com/kanterov), [Ivan](https://github.com/abyss7))
|
||||||
|
* Пропуск строк в случае ошибки парсинга для движка `Kafka` (настройка `kafka_skip_broken_messages`). [#4094](https://github.com/yandex/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7))
|
||||||
|
* Поддержка применения мультиклассовых моделей `CatBoost`. Функция `modelEvaluate` возвращает кортеж в случае использования мультиклассовой модели. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/yandex/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai))
|
||||||
|
* Добавлены функции `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/yandex/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea))
|
||||||
|
* Добавлены функции хеширования `xxHash64` и `xxHash32`. [#3905](https://github.com/yandex/ClickHouse/pull/3905) ([filimonov](https://github.com/filimonov))
|
||||||
|
* Добавлена функция хеширования `gccMurmurHash` (GCC flavoured Murmur hash), использующая те же hash seed, что и [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [#4000](https://github.com/yandex/ClickHouse/pull/4000) ([sundyli](https://github.com/sundy-li))
|
||||||
|
* Добавлены функции хеширования `javaHash`, `hiveHash`. [#3811](https://github.com/yandex/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365))
|
||||||
|
* Добавлена функция `remoteSecure`. Функция работает аналогично `remote`, но использует безопасное соединение. [#4088](https://github.com/yandex/ClickHouse/pull/4088) ([proller](https://github.com/proller))
|
||||||
|
|
||||||
|
|
||||||
|
### Экспериментальные возможности:
|
||||||
|
|
||||||
|
* Эмуляция запросов с несколькими секциями `JOIN` (настройка `allow_experimental_multiple_joins_emulation`). [#3946](https://github.com/yandex/ClickHouse/pull/3946) ([Artem Zuikov](https://github.com/4ertus2))
|
||||||
|
|
||||||
|
### Исправления ошибок:
|
||||||
|
|
||||||
|
* Ограничен размер кеша скомпилированных выражений в случае, если не указана настройка `compiled_expression_cache_size` для экономии потребляемой памяти. [#4041](https://github.com/yandex/ClickHouse/pull/4041) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Исправлена проблема зависания потоков, выполняющих запрос `ALTER` для таблиц семейства `Replicated`, а также потоков, обновляющих конфигурацию из ZooKeeper. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3891](https://github.com/yandex/ClickHouse/issues/3891) [#3934](https://github.com/yandex/ClickHouse/pull/3934) ([Alex Zatelepin](https://github.com/ztlpn))
|
||||||
|
* Исправлен race condition в случае выполнения распределенной задачи запроса `ALTER`. Race condition приводил к состоянию, когда более чем одна реплика пыталась выполнить задачу, в результате чего все такие реплики, кроме одной, падали с ошибкой обращения к ZooKeeper. [#3904](https://github.com/yandex/ClickHouse/pull/3904) ([Alex Zatelepin](https://github.com/ztlpn))
|
||||||
|
* Исправлена проблема обновления настройки `from_zk`. Настройка, указанная в файле конфигурации, не обновлялась в случае, если запрос к ZooKeeper падал по timeout. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3947](https://github.com/yandex/ClickHouse/pull/3947) ([Alex Zatelepin](https://github.com/ztlpn))
|
||||||
|
* Исправлена ошибка в вычислении сетевого префикса при указании IPv4 маски подсети. [#3945](https://github.com/yandex/ClickHouse/pull/3945) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Исправлено падение (`std::terminate`) в редком сценарии, когда новый поток не мог быть создан из-за нехватки ресурсов. [#3956](https://github.com/yandex/ClickHouse/pull/3956) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправлено падение табличной функции `remote` в случае, когда не удавалось получить структуру таблицы из-за ограничений пользователя. [#4009](https://github.com/yandex/ClickHouse/pull/4009) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Исправлена утечка сетевых сокетов. Сокеты создавались в пуле и никогда не закрывались. При создании потока, создавались новые сокеты в случае, если все доступные использовались. [#4017](https://github.com/yandex/ClickHouse/pull/4017) ([Alex Zatelepin](https://github.com/ztlpn))
|
||||||
|
* Исправлена проблема закрывания `/proc/self/fd` раньше, чем все файловые дескрипторы были прочитаны из `/proc` после создания процесса `odbc-bridge`. [#4120](https://github.com/yandex/ClickHouse/pull/4120) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Исправлен баг в монотонном преобразовании String в UInt в случае использования String в первичном ключе. [#3870](https://github.com/yandex/ClickHouse/pull/3870) ([Winter Zhang](https://github.com/zhang2014))
|
||||||
|
* Исправлен баг в вычислении монотонности функции преобразования типа целых значений. [#3921](https://github.com/yandex/ClickHouse/pull/3921) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправлено падение в функциях `arrayEnumerateUniq`, `arrayEnumerateDense` при передаче невалидных аргументов. [#3909](https://github.com/yandex/ClickHouse/pull/3909) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправлен undefined behavior в StorageMerge. [#3910](https://github.com/yandex/ClickHouse/pull/3910) ([Amos Bird](https://github.com/amosbird))
|
||||||
|
* Исправлено падение в функциях `addDays`, `subtractDays`. [#3913](https://github.com/yandex/ClickHouse/pull/3913) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправлена проблема, в результате которой функции `round`, `floor`, `trunc`, `ceil` могли возвращать неверный результат для отрицательных целочисленных аргументов с большим значением. [#3914](https://github.com/yandex/ClickHouse/pull/3914) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправлена проблема, в результате которой 'kill query sync' приводил к падению сервера. [#3916](https://github.com/yandex/ClickHouse/pull/3916) ([muVulDeePecker](https://github.com/fancyqlx))
|
||||||
|
* Исправлен баг, приводящий к большой задержке в случае пустой очереди репликации. [#3928](https://github.com/yandex/ClickHouse/pull/3928) [#3932](https://github.com/yandex/ClickHouse/pull/3932) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Исправлено избыточное использование памяти в случае вставки в таблицу с `LowCardinality` в первичном ключе. [#3955](https://github.com/yandex/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai))
|
||||||
|
* Исправлена сериализация пустых массивов типа `LowCardinality` для формата `Native`. [#3907](https://github.com/yandex/ClickHouse/issues/3907) [#4011](https://github.com/yandex/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai))
|
||||||
|
* Исправлен неверный результат в случае использования distinct для числового столбца `LowCardinality`. [#3895](https://github.com/yandex/ClickHouse/issues/3895) [#4012](https://github.com/yandex/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai))
|
||||||
|
* Исправлена компиляция вычисления агрегатных функций для ключа `LowCardinality` (для случая, когда включена настройка `compile`). [#3886](https://github.com/yandex/ClickHouse/pull/3886) ([KochetovNicolai](https://github.com/KochetovNicolai))
|
||||||
|
* Исправлена передача пользователя и пароля для запросов с реплик. [#3957](https://github.com/yandex/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) ([小路](https://github.com/nicelulu))
|
||||||
|
* Исправлен очень редкий race condition возникающий при перечислении таблиц из базы данных типа `Dictionary` во время перезагрузки словарей. [#3970](https://github.com/yandex/ClickHouse/pull/3970) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправлен неверный результат в случае использования HAVING с ROLLUP или CUBE. [#3756](https://github.com/yandex/ClickHouse/issues/3756) [#3837](https://github.com/yandex/ClickHouse/pull/3837) ([Sam Chou](https://github.com/reflection))
|
||||||
|
* Исправлена проблема с алиасами столбцов для запросов с `JOIN ON` над распределенными таблицами. [#3980](https://github.com/yandex/ClickHouse/pull/3980) ([Winter Zhang](https://github.com/zhang2014))
|
||||||
|
* Исправлена ошибка в реализации функции `quantileTDigest` (нашел Artem Vakhrushev). Эта ошибка никогда не происходит в ClickHouse и актуальна только для тех, кто использует кодовую базу ClickHouse напрямую в качестве библиотеки. [#3935](https://github.com/yandex/ClickHouse/pull/3935) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
|
||||||
|
### Улучшения:
|
||||||
|
|
||||||
|
* Добавлена поддержка `IF NOT EXISTS` в выражении `ALTER TABLE ADD COLUMN`, `IF EXISTS` в выражении `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [#3900](https://github.com/yandex/ClickHouse/pull/3900) ([Boris Granveaud](https://github.com/bgranvea))
|
||||||
|
* Функция `parseDateTimeBestEffort` теперь поддерживает форматы `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` и аналогичные. [#3922](https://github.com/yandex/ClickHouse/pull/3922) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* `CapnProtoInputStream` теперь поддерживает jagged структуры. [#4063](https://github.com/yandex/ClickHouse/pull/4063) ([Odin Hultgren Van Der Horst](https://github.com/Miniwoffer))
|
||||||
|
* Улучшение usability: добавлена проверка, что сервер запущен от пользователя, совпадающего с владельцем директории данных. Запрещен запуск от пользователя root в случае, если root не владеет директорией с данными. [#3785](https://github.com/yandex/ClickHouse/pull/3785) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev))
|
||||||
|
* Улучшена логика проверки столбцов, необходимых для JOIN, на стадии анализа запроса. [#3930](https://github.com/yandex/ClickHouse/pull/3930) ([Artem Zuikov](https://github.com/4ertus2))
|
||||||
|
* Уменьшено число поддерживаемых соединений в случае большого числа распределенных таблиц. [#3726](https://github.com/yandex/ClickHouse/pull/3726) ([Winter Zhang](https://github.com/zhang2014))
|
||||||
|
* Добавлена поддержка строки с totals для запроса с `WITH TOTALS` через ODBC драйвер. [#3836](https://github.com/yandex/ClickHouse/pull/3836) ([Maksim Koritckiy](https://github.com/nightweb))
|
||||||
|
* Поддержано использование `Enum` в качестве чисел в функции `if`. [#3875](https://github.com/yandex/ClickHouse/pull/3875) ([Ivan](https://github.com/abyss7))
|
||||||
|
* Добавлена настройка `low_cardinality_allow_in_native_format`. Если она выключена, то тип `LowCadrinality` не используется в формате `Native`. [#3879](https://github.com/yandex/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai))
|
||||||
|
* Удалены некоторые избыточные объекты из кеша скомпилированных выражений для уменьшения потребления памяти. [#4042](https://github.com/yandex/ClickHouse/pull/4042) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Добавлена проверка того, что в запрос `SET send_logs_level = 'value'` передается верное значение. [#3873](https://github.com/yandex/ClickHouse/pull/3873) ([Sabyanin Maxim](https://github.com/s-mx))
|
||||||
|
* Добавлена проверка типов для функций преобразования типов. [#3896](https://github.com/yandex/ClickHouse/pull/3896) ([Winter Zhang](https://github.com/zhang2014))
|
||||||
|
|
||||||
|
### Улучшения производительности:
|
||||||
|
|
||||||
|
* Добавлена настройка `use_minimalistic_part_header_in_zookeeper` для движка MergeTree. Если настройка включена, Replicated таблицы будут хранить метаданные куска в компактном виде (в соответствующем znode для этого куска). Это может значительно уменьшить размер для ZooKeeper snapshot (особенно для таблиц с большим числом столбцов). После включения данной настройки будет невозможно сделать откат к версии, которая эту настройку не поддерживает. [#3960](https://github.com/yandex/ClickHouse/pull/3960) ([Alex Zatelepin](https://github.com/ztlpn))
|
||||||
|
* Добавлена реализация функций `sequenceMatch` и `sequenceCount` на основе конечного автомата в случае, если последовательность событий не содержит условия на время. [#4004](https://github.com/yandex/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo))
|
||||||
|
* Улучшена производительность сериализации целых чисел. [#3968](https://github.com/yandex/ClickHouse/pull/3968) ([Amos Bird](https://github.com/amosbird))
|
||||||
|
* Добавлен zero left padding для PODArray. Теперь элемент с индексом -1 является валидным нулевым значением. Эта особенность используется для удаления условного выражения при вычислении оффсетов массивов. [#3920](https://github.com/yandex/ClickHouse/pull/3920) ([Amos Bird](https://github.com/amosbird))
|
||||||
|
* Откат версии `jemalloc`, приводящей к деградации производительности. [#4018](https://github.com/yandex/ClickHouse/pull/4018) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
|
||||||
|
### Обратно несовместимые изменения:
|
||||||
|
|
||||||
|
* Удалена недокументированная возможность `ALTER MODIFY PRIMARY KEY`, замененная выражением `ALTER MODIFY ORDER BY`. [#3887](https://github.com/yandex/ClickHouse/pull/3887) ([Alex Zatelepin](https://github.com/ztlpn))
|
||||||
|
* Удалена функция `shardByHash`. [#3833](https://github.com/yandex/ClickHouse/pull/3833) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Запрещено использование скалярных подзапросов с результатом, имеющим тип `AggregateFunction`. [#3865](https://github.com/yandex/ClickHouse/pull/3865) ([Ivan](https://github.com/abyss7))
|
||||||
|
|
||||||
|
### Улучшения сборки/тестирования/пакетирования:
|
||||||
|
|
||||||
|
* Добавлена поддержка сборки под PowerPC (`ppc64le`). [#4132](https://github.com/yandex/ClickHouse/pull/4132) ([Danila Kutenin](https://github.com/danlark1))
|
||||||
|
* Функциональные stateful тесты запускаются на публично доступных данных. [#3969](https://github.com/yandex/ClickHouse/pull/3969) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправлена ошибка, при которой сервер не мог запуститься с сообщением `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` при использовании Docker или systemd-nspawn. [#4136](https://github.com/yandex/ClickHouse/pull/4136) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Обновлена библиотека `rdkafka` до версии v1.0.0-RC5. Использована cppkafka на замену интерфейса языка C. [#4025](https://github.com/yandex/ClickHouse/pull/4025) ([Ivan](https://github.com/abyss7))
|
||||||
|
* Обновлена библиотека `mariadb-client`. Исправлена проблема, обнаруженная с использованием UBSan. [#3924](https://github.com/yandex/ClickHouse/pull/3924) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправления для сборок с UBSan. [#3926](https://github.com/yandex/ClickHouse/pull/3926) [#3021](https://github.com/yandex/ClickHouse/pull/3021) [#3948](https://github.com/yandex/ClickHouse/pull/3948) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Добавлены покоммитные запуски тестов с UBSan сборкой.
|
||||||
|
* Добавлены покоммитные запуски тестов со статическим анализатором PVS-Studio.
|
||||||
|
* Исправлены проблемы, найденные с использованием PVS-Studio. [#4013](https://github.com/yandex/ClickHouse/pull/4013) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправлены проблемы совместимости glibc. [#4100](https://github.com/yandex/ClickHouse/pull/4100) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Docker образы перемещены на Ubuntu 18.10, добавлена совместимость с glibc >= 2.28 [#3965](https://github.com/yandex/ClickHouse/pull/3965) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Добавлена переменная окружения `CLICKHOUSE_DO_NOT_CHOWN`, позволяющая не делать shown директории для Docker образа сервера. [#3967](https://github.com/yandex/ClickHouse/pull/3967) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Включены большинство предупреждений из `-Weverything` для clang. Включено `-Wpedantic`. [#3986](https://github.com/yandex/ClickHouse/pull/3986) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Добавлены некоторые предупреждения, специфичные только для clang 8. [#3993](https://github.com/yandex/ClickHouse/pull/3993) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* При использовании динамической линковки используется `libLLVM` вместо библиотеки `LLVM`. [#3989](https://github.com/yandex/ClickHouse/pull/3989) ([Orivej Desh](https://github.com/orivej))
|
||||||
|
* Добавлены переменные окружения для параметров `TSan`, `UBSan`, `ASan` в тестовом Docker образе. [#4072](https://github.com/yandex/ClickHouse/pull/4072) ([alesapin](https://github.com/alesapin))
|
||||||
|
* Debian пакет `clickhouse-server` будет рекомендовать пакет `libcap2-bin` для того, чтобы использовать утилиту `setcap` для настроек. Данный пакет опционален. [#4093](https://github.com/yandex/ClickHouse/pull/4093) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Уменьшено время сборки, убраны ненужные включения заголовочных файлов. [#3898](https://github.com/yandex/ClickHouse/pull/3898) ([proller](https://github.com/proller))
|
||||||
|
* Добавлены тесты производительности для функций хеширования. [#3918](https://github.com/yandex/ClickHouse/pull/3918) ([filimonov](https://github.com/filimonov))
|
||||||
|
* Исправлены циклические зависимости библиотек. [#3958](https://github.com/yandex/ClickHouse/pull/3958) ([proller](https://github.com/proller))
|
||||||
|
* Улучшена компиляция при малом объеме памяти. [#4030](https://github.com/yandex/ClickHouse/pull/4030) ([proller](https://github.com/proller))
|
||||||
|
* Добавлен тестовый скрипт для воспроизведения деградации производительности в `jemalloc`. [#4036](https://github.com/yandex/ClickHouse/pull/4036) ([alexey-milovidov](https://github.com/alexey-milovidov))
|
||||||
|
* Исправления опечаток в комментариях и строковых литералах. [#4122](https://github.com/yandex/ClickHouse/pull/4122) ([maiha](https://github.com/maiha))
|
||||||
|
* Исправления опечаток в комментариях. [#4089](https://github.com/yandex/ClickHouse/pull/4089) ([Evgenii Pravda](https://github.com/kvinty))
|
||||||
|
|
||||||
## ClickHouse release 18.16.1, 2018-12-21
|
## ClickHouse release 18.16.1, 2018-12-21
|
||||||
|
|
||||||
### Исправления ошибок:
|
### Исправления ошибок:
|
||||||
|
@ -221,7 +221,7 @@ if (UNBUNDLED OR NOT (OS_LINUX OR APPLE) OR ARCH_32)
|
|||||||
option (NO_WERROR "Disable -Werror compiler option" ON)
|
option (NO_WERROR "Disable -Werror compiler option" ON)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} UNBUNDLED=${UNBUNDLED} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
|
message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} UNBUNDLED=${UNBUNDLED} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
|
|
||||||
|
@ -13,4 +13,5 @@ ClickHouse is an open-source column-oriented database management system that all
|
|||||||
|
|
||||||
## Upcoming Events
|
## Upcoming Events
|
||||||
|
|
||||||
* [C++ ClickHouse and CatBoost Sprints](https://events.yandex.ru/events/ClickHouse/2-feb-2019/) in Moscow on February 2.
|
* [ClickHouse Community Meetup](https://www.eventbrite.com/e/meetup-clickhouse-in-the-wild-deployment-success-stories-registration-55305051899) in San Francisco on February 19.
|
||||||
|
* [ClickHouse Community Meetup](https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339) in Madrid on April 2.
|
||||||
|
@ -3,7 +3,6 @@ set -e -x
|
|||||||
|
|
||||||
source default-config
|
source default-config
|
||||||
|
|
||||||
# TODO Non debian systems
|
|
||||||
./install-os-packages.sh svn
|
./install-os-packages.sh svn
|
||||||
./install-os-packages.sh cmake
|
./install-os-packages.sh cmake
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# ARM: Cannot cpuid_get_raw_data: CPUID instruction is not supported
|
||||||
if (NOT ARCH_ARM)
|
if (NOT ARCH_ARM)
|
||||||
option (USE_INTERNAL_CPUID_LIBRARY "Set to FALSE to use system cpuid library instead of bundled" ${NOT_UNBUNDLED})
|
option (USE_INTERNAL_CPUID_LIBRARY "Set to FALSE to use system cpuid library instead of bundled" ${NOT_UNBUNDLED})
|
||||||
endif ()
|
endif ()
|
||||||
@ -21,7 +22,7 @@ if (CPUID_LIBRARY AND CPUID_INCLUDE_DIR)
|
|||||||
# TODO: make virtual target cpuid:cpuid with COMPILE_DEFINITIONS property
|
# TODO: make virtual target cpuid:cpuid with COMPILE_DEFINITIONS property
|
||||||
endif ()
|
endif ()
|
||||||
set (USE_CPUID 1)
|
set (USE_CPUID 1)
|
||||||
elseif (NOT MISSING_INTERNAL_CPUID_LIBRARY)
|
elseif (NOT ARCH_ARM AND NOT MISSING_INTERNAL_CPUID_LIBRARY)
|
||||||
set (CPUID_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include)
|
set (CPUID_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include)
|
||||||
set (USE_INTERNAL_CPUID_LIBRARY 1)
|
set (USE_INTERNAL_CPUID_LIBRARY 1)
|
||||||
set (CPUID_LIBRARY cpuid)
|
set (CPUID_LIBRARY cpuid)
|
||||||
|
@ -1,5 +1,12 @@
|
|||||||
option(USE_INTERNAL_CPUINFO_LIBRARY "Set to FALSE to use system cpuinfo library instead of bundled" ${NOT_UNBUNDLED})
|
option(USE_INTERNAL_CPUINFO_LIBRARY "Set to FALSE to use system cpuinfo library instead of bundled" ${NOT_UNBUNDLED})
|
||||||
|
|
||||||
|
# Now we have no contrib/libcpuinfo, use from system.
|
||||||
|
if (USE_INTERNAL_CPUINFO_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcpuinfo/include")
|
||||||
|
#message (WARNING "submodule contrib/libcpuid is missing. to fix try run: \n git submodule update --init --recursive")
|
||||||
|
set (USE_INTERNAL_CPUINFO_LIBRARY 0)
|
||||||
|
set (MISSING_INTERNAL_CPUINFO_LIBRARY 1)
|
||||||
|
endif ()
|
||||||
|
|
||||||
if(NOT USE_INTERNAL_CPUINFO_LIBRARY)
|
if(NOT USE_INTERNAL_CPUINFO_LIBRARY)
|
||||||
find_library(CPUINFO_LIBRARY cpuinfo)
|
find_library(CPUINFO_LIBRARY cpuinfo)
|
||||||
find_path(CPUINFO_INCLUDE_DIR NAMES cpuinfo.h PATHS ${CPUINFO_INCLUDE_PATHS})
|
find_path(CPUINFO_INCLUDE_DIR NAMES cpuinfo.h PATHS ${CPUINFO_INCLUDE_PATHS})
|
||||||
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -8,6 +8,8 @@ elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
|||||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-format -Wno-inconsistent-missing-override -std=c++1z")
|
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-old-style-cast -Wno-unused-function -Wno-unused-variable -Wno-unused-result -Wno-deprecated-declarations -Wno-non-virtual-dtor -Wno-format -Wno-inconsistent-missing-override -std=c++1z")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
|
||||||
|
|
||||||
if (USE_INTERNAL_BOOST_LIBRARY)
|
if (USE_INTERNAL_BOOST_LIBRARY)
|
||||||
add_subdirectory (boost-cmake)
|
add_subdirectory (boost-cmake)
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -39,5 +39,20 @@ add_library(base64 ${LINK_MODE}
|
|||||||
${LIBRARY_DIR}/lib/codecs.h
|
${LIBRARY_DIR}/lib/codecs.h
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/config.h)
|
${CMAKE_CURRENT_BINARY_DIR}/config.h)
|
||||||
|
|
||||||
target_compile_options(base64 PRIVATE ${base64_SSSE3_opt} ${base64_SSE41_opt} ${base64_SSE42_opt} ${base64_AVX_opt} ${base64_AVX2_opt})
|
if(HAVE_AVX)
|
||||||
|
set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx/codec.c PROPERTIES COMPILE_FLAGS -mavx)
|
||||||
|
endif()
|
||||||
|
if(HAVE_AVX2)
|
||||||
|
set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx2/codec.c PROPERTIES COMPILE_FLAGS -mavx2)
|
||||||
|
endif()
|
||||||
|
if(HAVE_SSE41)
|
||||||
|
set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse41/codec.c PROPERTIES COMPILE_FLAGS -msse4.1)
|
||||||
|
endif()
|
||||||
|
if(HAVE_SSE42)
|
||||||
|
set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse42/codec.c PROPERTIES COMPILE_FLAGS -msse4.2)
|
||||||
|
endif()
|
||||||
|
if(HAVE_SSSE3)
|
||||||
|
set_source_files_properties(${LIBRARY_DIR}/lib/arch/ssse3/codec.c PROPERTIES COMPILE_FLAGS -mssse3)
|
||||||
|
endif()
|
||||||
|
|
||||||
target_include_directories(base64 PRIVATE ${LIBRARY_DIR}/include ${CMAKE_CURRENT_BINARY_DIR})
|
target_include_directories(base64 PRIVATE ${LIBRARY_DIR}/include ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
2
contrib/cppkafka
vendored
2
contrib/cppkafka
vendored
@ -1 +1 @@
|
|||||||
Subproject commit 520465510efef7704346cf8d140967c4abb057c1
|
Subproject commit 860c90e92eee6690aa74a2ca7b7c5c6930dffecd
|
1
contrib/pdqsort
vendored
1
contrib/pdqsort
vendored
@ -1 +0,0 @@
|
|||||||
Subproject commit 08879029ab8dcb80a70142acb709e3df02de5d37
|
|
2
contrib/pdqsort/README
Normal file
2
contrib/pdqsort/README
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
Source from https://github.com/orlp/pdqsort
|
||||||
|
Mandatory for Clickhouse, not available in OS packages, we can't use it as submodule.
|
16
contrib/pdqsort/license.txt
Normal file
16
contrib/pdqsort/license.txt
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
Copyright (c) 2015 Orson Peters <orsonpeters@gmail.com>
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty. In no event will the
|
||||||
|
authors be held liable for any damages arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose, including commercial
|
||||||
|
applications, and to alter it and redistribute it freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the
|
||||||
|
original software. If you use this software in a product, an acknowledgment in the product
|
||||||
|
documentation would be appreciated but is not required.
|
||||||
|
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as
|
||||||
|
being the original software.
|
||||||
|
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
544
contrib/pdqsort/pdqsort.h
Normal file
544
contrib/pdqsort/pdqsort.h
Normal file
@ -0,0 +1,544 @@
|
|||||||
|
/*
|
||||||
|
pdqsort.h - Pattern-defeating quicksort.
|
||||||
|
|
||||||
|
Copyright (c) 2015 Orson Peters
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied warranty. In no event will the
|
||||||
|
authors be held liable for any damages arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose, including commercial
|
||||||
|
applications, and to alter it and redistribute it freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not claim that you wrote the
|
||||||
|
original software. If you use this software in a product, an acknowledgment in the product
|
||||||
|
documentation would be appreciated but is not required.
|
||||||
|
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be misrepresented as
|
||||||
|
being the original software.
|
||||||
|
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef PDQSORT_H
|
||||||
|
#define PDQSORT_H
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <functional>
|
||||||
|
#include <utility>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
|
#if __cplusplus >= 201103L
|
||||||
|
#include <cstdint>
|
||||||
|
#include <type_traits>
|
||||||
|
#define PDQSORT_PREFER_MOVE(x) std::move(x)
|
||||||
|
#else
|
||||||
|
#define PDQSORT_PREFER_MOVE(x) (x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace pdqsort_detail {
|
||||||
|
enum {
|
||||||
|
// Partitions below this size are sorted using insertion sort.
|
||||||
|
insertion_sort_threshold = 24,
|
||||||
|
|
||||||
|
// Partitions above this size use Tukey's ninther to select the pivot.
|
||||||
|
ninther_threshold = 128,
|
||||||
|
|
||||||
|
// When we detect an already sorted partition, attempt an insertion sort that allows this
|
||||||
|
// amount of element moves before giving up.
|
||||||
|
partial_insertion_sort_limit = 8,
|
||||||
|
|
||||||
|
// Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char.
|
||||||
|
block_size = 64,
|
||||||
|
|
||||||
|
// Cacheline size, assumes power of two.
|
||||||
|
cacheline_size = 64
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#if __cplusplus >= 201103L
|
||||||
|
template<class T> struct is_default_compare : std::false_type { };
|
||||||
|
template<class T> struct is_default_compare<std::less<T>> : std::true_type { };
|
||||||
|
template<class T> struct is_default_compare<std::greater<T>> : std::true_type { };
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Returns floor(log2(n)), assumes n > 0.
|
||||||
|
template<class T>
|
||||||
|
inline int log2(T n) {
|
||||||
|
int log = 0;
|
||||||
|
while (n >>= 1) ++log;
|
||||||
|
return log;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sorts [begin, end) using insertion sort with the given comparison function.
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline void insertion_sort(Iter begin, Iter end, Compare comp) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
|
if (begin == end) return;
|
||||||
|
|
||||||
|
for (Iter cur = begin + 1; cur != end; ++cur) {
|
||||||
|
Iter sift = cur;
|
||||||
|
Iter sift_1 = cur - 1;
|
||||||
|
|
||||||
|
// Compare first so we can avoid 2 moves for an element already positioned correctly.
|
||||||
|
if (comp(*sift, *sift_1)) {
|
||||||
|
T tmp = PDQSORT_PREFER_MOVE(*sift);
|
||||||
|
|
||||||
|
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
|
||||||
|
while (sift != begin && comp(tmp, *--sift_1));
|
||||||
|
|
||||||
|
*sift = PDQSORT_PREFER_MOVE(tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sorts [begin, end) using insertion sort with the given comparison function. Assumes
|
||||||
|
// *(begin - 1) is an element smaller than or equal to any element in [begin, end).
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline void unguarded_insertion_sort(Iter begin, Iter end, Compare comp) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
|
if (begin == end) return;
|
||||||
|
|
||||||
|
for (Iter cur = begin + 1; cur != end; ++cur) {
|
||||||
|
Iter sift = cur;
|
||||||
|
Iter sift_1 = cur - 1;
|
||||||
|
|
||||||
|
// Compare first so we can avoid 2 moves for an element already positioned correctly.
|
||||||
|
if (comp(*sift, *sift_1)) {
|
||||||
|
T tmp = PDQSORT_PREFER_MOVE(*sift);
|
||||||
|
|
||||||
|
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
|
||||||
|
while (comp(tmp, *--sift_1));
|
||||||
|
|
||||||
|
*sift = PDQSORT_PREFER_MOVE(tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempts to use insertion sort on [begin, end). Will return false if more than
|
||||||
|
// partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will
|
||||||
|
// successfully sort and return true.
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
|
if (begin == end) return true;
|
||||||
|
|
||||||
|
int limit = 0;
|
||||||
|
for (Iter cur = begin + 1; cur != end; ++cur) {
|
||||||
|
if (limit > partial_insertion_sort_limit) return false;
|
||||||
|
|
||||||
|
Iter sift = cur;
|
||||||
|
Iter sift_1 = cur - 1;
|
||||||
|
|
||||||
|
// Compare first so we can avoid 2 moves for an element already positioned correctly.
|
||||||
|
if (comp(*sift, *sift_1)) {
|
||||||
|
T tmp = PDQSORT_PREFER_MOVE(*sift);
|
||||||
|
|
||||||
|
do { *sift-- = PDQSORT_PREFER_MOVE(*sift_1); }
|
||||||
|
while (sift != begin && comp(tmp, *--sift_1));
|
||||||
|
|
||||||
|
*sift = PDQSORT_PREFER_MOVE(tmp);
|
||||||
|
limit += cur - sift;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline void sort2(Iter a, Iter b, Compare comp) {
|
||||||
|
if (comp(*b, *a)) std::iter_swap(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sorts the elements *a, *b and *c using comparison function comp.
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline void sort3(Iter a, Iter b, Iter c, Compare comp) {
|
||||||
|
sort2(a, b, comp);
|
||||||
|
sort2(b, c, comp);
|
||||||
|
sort2(a, b, comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
inline T* align_cacheline(T* p) {
|
||||||
|
#if defined(UINTPTR_MAX) && __cplusplus >= 201103L
|
||||||
|
std::uintptr_t ip = reinterpret_cast<std::uintptr_t>(p);
|
||||||
|
#else
|
||||||
|
std::size_t ip = reinterpret_cast<std::size_t>(p);
|
||||||
|
#endif
|
||||||
|
ip = (ip + cacheline_size - 1) & -cacheline_size;
|
||||||
|
return reinterpret_cast<T*>(ip);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Iter>
|
||||||
|
inline void swap_offsets(Iter first, Iter last,
|
||||||
|
unsigned char* offsets_l, unsigned char* offsets_r,
|
||||||
|
int num, bool use_swaps) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
|
if (use_swaps) {
|
||||||
|
// This case is needed for the descending distribution, where we need
|
||||||
|
// to have proper swapping for pdqsort to remain O(n).
|
||||||
|
for (int i = 0; i < num; ++i) {
|
||||||
|
std::iter_swap(first + offsets_l[i], last - offsets_r[i]);
|
||||||
|
}
|
||||||
|
} else if (num > 0) {
|
||||||
|
Iter l = first + offsets_l[0]; Iter r = last - offsets_r[0];
|
||||||
|
T tmp(PDQSORT_PREFER_MOVE(*l)); *l = PDQSORT_PREFER_MOVE(*r);
|
||||||
|
for (int i = 1; i < num; ++i) {
|
||||||
|
l = first + offsets_l[i]; *r = PDQSORT_PREFER_MOVE(*l);
|
||||||
|
r = last - offsets_r[i]; *l = PDQSORT_PREFER_MOVE(*r);
|
||||||
|
}
|
||||||
|
*r = PDQSORT_PREFER_MOVE(tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
|
||||||
|
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
|
||||||
|
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
|
||||||
|
// pivot is a median of at least 3 elements and that [begin, end) is at least
|
||||||
|
// insertion_sort_threshold long. Uses branchless partitioning.
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline std::pair<Iter, bool> partition_right_branchless(Iter begin, Iter end, Compare comp) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
|
|
||||||
|
// Move pivot into local for speed.
|
||||||
|
T pivot(PDQSORT_PREFER_MOVE(*begin));
|
||||||
|
Iter first = begin;
|
||||||
|
Iter last = end;
|
||||||
|
|
||||||
|
// Find the first element greater than or equal than the pivot (the median of 3 guarantees
|
||||||
|
// this exists).
|
||||||
|
while (comp(*++first, pivot));
|
||||||
|
|
||||||
|
// Find the first element strictly smaller than the pivot. We have to guard this search if
|
||||||
|
// there was no element before *first.
|
||||||
|
if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
|
||||||
|
else while ( !comp(*--last, pivot));
|
||||||
|
|
||||||
|
// If the first pair of elements that should be swapped to partition are the same element,
|
||||||
|
// the passed in sequence already was correctly partitioned.
|
||||||
|
bool already_partitioned = first >= last;
|
||||||
|
if (!already_partitioned) {
|
||||||
|
std::iter_swap(first, last);
|
||||||
|
++first;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The following branchless partitioning is derived from "BlockQuicksort: How Branch
|
||||||
|
// Mispredictions don’t affect Quicksort" by Stefan Edelkamp and Armin Weiss.
|
||||||
|
unsigned char offsets_l_storage[block_size + cacheline_size];
|
||||||
|
unsigned char offsets_r_storage[block_size + cacheline_size];
|
||||||
|
unsigned char* offsets_l = align_cacheline(offsets_l_storage);
|
||||||
|
unsigned char* offsets_r = align_cacheline(offsets_r_storage);
|
||||||
|
int num_l, num_r, start_l, start_r;
|
||||||
|
num_l = num_r = start_l = start_r = 0;
|
||||||
|
|
||||||
|
while (last - first > 2 * block_size) {
|
||||||
|
// Fill up offset blocks with elements that are on the wrong side.
|
||||||
|
if (num_l == 0) {
|
||||||
|
start_l = 0;
|
||||||
|
Iter it = first;
|
||||||
|
for (unsigned char i = 0; i < block_size;) {
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (num_r == 0) {
|
||||||
|
start_r = 0;
|
||||||
|
Iter it = last;
|
||||||
|
for (unsigned char i = 0; i < block_size;) {
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Swap elements and update block sizes and first/last boundaries.
|
||||||
|
int num = std::min(num_l, num_r);
|
||||||
|
swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r,
|
||||||
|
num, num_l == num_r);
|
||||||
|
num_l -= num; num_r -= num;
|
||||||
|
start_l += num; start_r += num;
|
||||||
|
if (num_l == 0) first += block_size;
|
||||||
|
if (num_r == 0) last -= block_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
int l_size = 0, r_size = 0;
|
||||||
|
int unknown_left = (last - first) - ((num_r || num_l) ? block_size : 0);
|
||||||
|
if (num_r) {
|
||||||
|
// Handle leftover block by assigning the unknown elements to the other block.
|
||||||
|
l_size = unknown_left;
|
||||||
|
r_size = block_size;
|
||||||
|
} else if (num_l) {
|
||||||
|
l_size = block_size;
|
||||||
|
r_size = unknown_left;
|
||||||
|
} else {
|
||||||
|
// No leftover block, split the unknown elements in two blocks.
|
||||||
|
l_size = unknown_left/2;
|
||||||
|
r_size = unknown_left - l_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill offset buffers if needed.
|
||||||
|
if (unknown_left && !num_l) {
|
||||||
|
start_l = 0;
|
||||||
|
Iter it = first;
|
||||||
|
for (unsigned char i = 0; i < l_size;) {
|
||||||
|
offsets_l[num_l] = i++; num_l += !comp(*it, pivot); ++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (unknown_left && !num_r) {
|
||||||
|
start_r = 0;
|
||||||
|
Iter it = last;
|
||||||
|
for (unsigned char i = 0; i < r_size;) {
|
||||||
|
offsets_r[num_r] = ++i; num_r += comp(*--it, pivot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int num = std::min(num_l, num_r);
|
||||||
|
swap_offsets(first, last, offsets_l + start_l, offsets_r + start_r, num, num_l == num_r);
|
||||||
|
num_l -= num; num_r -= num;
|
||||||
|
start_l += num; start_r += num;
|
||||||
|
if (num_l == 0) first += l_size;
|
||||||
|
if (num_r == 0) last -= r_size;
|
||||||
|
|
||||||
|
// We have now fully identified [first, last)'s proper position. Swap the last elements.
|
||||||
|
if (num_l) {
|
||||||
|
offsets_l += start_l;
|
||||||
|
while (num_l--) std::iter_swap(first + offsets_l[num_l], --last);
|
||||||
|
first = last;
|
||||||
|
}
|
||||||
|
if (num_r) {
|
||||||
|
offsets_r += start_r;
|
||||||
|
while (num_r--) std::iter_swap(last - offsets_r[num_r], first), ++first;
|
||||||
|
last = first;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put the pivot in the right place.
|
||||||
|
Iter pivot_pos = first - 1;
|
||||||
|
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
|
||||||
|
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
|
||||||
|
|
||||||
|
return std::make_pair(pivot_pos, already_partitioned);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
|
||||||
|
// to the pivot are put in the right-hand partition. Returns the position of the pivot after
|
||||||
|
// partitioning and whether the passed sequence already was correctly partitioned. Assumes the
|
||||||
|
// pivot is a median of at least 3 elements and that [begin, end) is at least
|
||||||
|
// insertion_sort_threshold long.
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline std::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
|
|
||||||
|
// Move pivot into local for speed.
|
||||||
|
T pivot(PDQSORT_PREFER_MOVE(*begin));
|
||||||
|
|
||||||
|
Iter first = begin;
|
||||||
|
Iter last = end;
|
||||||
|
|
||||||
|
// Find the first element greater than or equal than the pivot (the median of 3 guarantees
|
||||||
|
// this exists).
|
||||||
|
while (comp(*++first, pivot));
|
||||||
|
|
||||||
|
// Find the first element strictly smaller than the pivot. We have to guard this search if
|
||||||
|
// there was no element before *first.
|
||||||
|
if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
|
||||||
|
else while ( !comp(*--last, pivot));
|
||||||
|
|
||||||
|
// If the first pair of elements that should be swapped to partition are the same element,
|
||||||
|
// the passed in sequence already was correctly partitioned.
|
||||||
|
bool already_partitioned = first >= last;
|
||||||
|
|
||||||
|
// Keep swapping pairs of elements that are on the wrong side of the pivot. Previously
|
||||||
|
// swapped pairs guard the searches, which is why the first iteration is special-cased
|
||||||
|
// above.
|
||||||
|
while (first < last) {
|
||||||
|
std::iter_swap(first, last);
|
||||||
|
while (comp(*++first, pivot));
|
||||||
|
while (!comp(*--last, pivot));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Put the pivot in the right place.
|
||||||
|
Iter pivot_pos = first - 1;
|
||||||
|
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
|
||||||
|
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
|
||||||
|
|
||||||
|
return std::make_pair(pivot_pos, already_partitioned);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Similar function to the one above, except elements equal to the pivot are put to the left of
|
||||||
|
// the pivot and it doesn't check or return if the passed sequence already was partitioned.
|
||||||
|
// Since this is rarely used (the many equal case), and in that case pdqsort already has O(n)
|
||||||
|
// performance, no block quicksort is applied here for simplicity.
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline Iter partition_left(Iter begin, Iter end, Compare comp) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
|
|
||||||
|
T pivot(PDQSORT_PREFER_MOVE(*begin));
|
||||||
|
Iter first = begin;
|
||||||
|
Iter last = end;
|
||||||
|
|
||||||
|
while (comp(pivot, *--last));
|
||||||
|
|
||||||
|
if (last + 1 == end) while (first < last && !comp(pivot, *++first));
|
||||||
|
else while ( !comp(pivot, *++first));
|
||||||
|
|
||||||
|
while (first < last) {
|
||||||
|
std::iter_swap(first, last);
|
||||||
|
while (comp(pivot, *--last));
|
||||||
|
while (!comp(pivot, *++first));
|
||||||
|
}
|
||||||
|
|
||||||
|
Iter pivot_pos = last;
|
||||||
|
*begin = PDQSORT_PREFER_MOVE(*pivot_pos);
|
||||||
|
*pivot_pos = PDQSORT_PREFER_MOVE(pivot);
|
||||||
|
|
||||||
|
return pivot_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class Iter, class Compare, bool Branchless>
|
||||||
|
inline void pdqsort_loop(Iter begin, Iter end, Compare comp, int bad_allowed, bool leftmost = true) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::difference_type diff_t;
|
||||||
|
|
||||||
|
// Use a while loop for tail recursion elimination.
|
||||||
|
while (true) {
|
||||||
|
diff_t size = end - begin;
|
||||||
|
|
||||||
|
// Insertion sort is faster for small arrays.
|
||||||
|
if (size < insertion_sort_threshold) {
|
||||||
|
if (leftmost) insertion_sort(begin, end, comp);
|
||||||
|
else unguarded_insertion_sort(begin, end, comp);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Choose pivot as median of 3 or pseudomedian of 9.
|
||||||
|
diff_t s2 = size / 2;
|
||||||
|
if (size > ninther_threshold) {
|
||||||
|
sort3(begin, begin + s2, end - 1, comp);
|
||||||
|
sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
|
||||||
|
sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
|
||||||
|
sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
|
||||||
|
std::iter_swap(begin, begin + s2);
|
||||||
|
} else sort3(begin + s2, begin, end - 1, comp);
|
||||||
|
|
||||||
|
// If *(begin - 1) is the end of the right partition of a previous partition operation
|
||||||
|
// there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
|
||||||
|
// pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
|
||||||
|
// the left partition, greater elements in the right partition. We do not have to
|
||||||
|
// recurse on the left partition, since it's sorted (all equal).
|
||||||
|
if (!leftmost && !comp(*(begin - 1), *begin)) {
|
||||||
|
begin = partition_left(begin, end, comp) + 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Partition and get results.
|
||||||
|
std::pair<Iter, bool> part_result =
|
||||||
|
Branchless ? partition_right_branchless(begin, end, comp)
|
||||||
|
: partition_right(begin, end, comp);
|
||||||
|
Iter pivot_pos = part_result.first;
|
||||||
|
bool already_partitioned = part_result.second;
|
||||||
|
|
||||||
|
// Check for a highly unbalanced partition.
|
||||||
|
diff_t l_size = pivot_pos - begin;
|
||||||
|
diff_t r_size = end - (pivot_pos + 1);
|
||||||
|
bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
|
||||||
|
|
||||||
|
// If we got a highly unbalanced partition we shuffle elements to break many patterns.
|
||||||
|
if (highly_unbalanced) {
|
||||||
|
// If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
|
||||||
|
if (--bad_allowed == 0) {
|
||||||
|
std::make_heap(begin, end, comp);
|
||||||
|
std::sort_heap(begin, end, comp);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (l_size >= insertion_sort_threshold) {
|
||||||
|
std::iter_swap(begin, begin + l_size / 4);
|
||||||
|
std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
|
||||||
|
|
||||||
|
if (l_size > ninther_threshold) {
|
||||||
|
std::iter_swap(begin + 1, begin + (l_size / 4 + 1));
|
||||||
|
std::iter_swap(begin + 2, begin + (l_size / 4 + 2));
|
||||||
|
std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
|
||||||
|
std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r_size >= insertion_sort_threshold) {
|
||||||
|
std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
|
||||||
|
std::iter_swap(end - 1, end - r_size / 4);
|
||||||
|
|
||||||
|
if (r_size > ninther_threshold) {
|
||||||
|
std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
|
||||||
|
std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
|
||||||
|
std::iter_swap(end - 2, end - (1 + r_size / 4));
|
||||||
|
std::iter_swap(end - 3, end - (2 + r_size / 4));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If we were decently balanced and we tried to sort an already partitioned
|
||||||
|
// sequence try to use insertion sort.
|
||||||
|
if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
|
||||||
|
&& partial_insertion_sort(pivot_pos + 1, end, comp)) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort the left partition first using recursion and do tail recursion elimination for
|
||||||
|
// the right-hand partition.
|
||||||
|
pdqsort_loop<Iter, Compare, Branchless>(begin, pivot_pos, comp, bad_allowed, leftmost);
|
||||||
|
begin = pivot_pos + 1;
|
||||||
|
leftmost = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline void pdqsort(Iter begin, Iter end, Compare comp) {
|
||||||
|
if (begin == end) return;
|
||||||
|
|
||||||
|
#if __cplusplus >= 201103L
|
||||||
|
pdqsort_detail::pdqsort_loop<Iter, Compare,
|
||||||
|
pdqsort_detail::is_default_compare<typename std::decay<Compare>::type>::value &&
|
||||||
|
std::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
|
||||||
|
begin, end, comp, pdqsort_detail::log2(end - begin));
|
||||||
|
#else
|
||||||
|
pdqsort_detail::pdqsort_loop<Iter, Compare, false>(
|
||||||
|
begin, end, comp, pdqsort_detail::log2(end - begin));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Iter>
|
||||||
|
inline void pdqsort(Iter begin, Iter end) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
|
pdqsort(begin, end, std::less<T>());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Iter, class Compare>
|
||||||
|
inline void pdqsort_branchless(Iter begin, Iter end, Compare comp) {
|
||||||
|
if (begin == end) return;
|
||||||
|
pdqsort_detail::pdqsort_loop<Iter, Compare, true>(
|
||||||
|
begin, end, comp, pdqsort_detail::log2(end - begin));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Iter>
|
||||||
|
inline void pdqsort_branchless(Iter begin, Iter end) {
|
||||||
|
typedef typename std::iterator_traits<Iter>::value_type T;
|
||||||
|
pdqsort_branchless(begin, end, std::less<T>());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#undef PDQSORT_PREFER_MOVE
|
||||||
|
|
||||||
|
#endif
|
119
contrib/pdqsort/readme.md
Normal file
119
contrib/pdqsort/readme.md
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
pdqsort
|
||||||
|
-------
|
||||||
|
|
||||||
|
Pattern-defeating quicksort (pdqsort) is a novel sorting algorithm that combines the fast average
|
||||||
|
case of randomized quicksort with the fast worst case of heapsort, while achieving linear time on
|
||||||
|
inputs with certain patterns. pdqsort is an extension and improvement of David Mussers introsort.
|
||||||
|
All code is available for free under the zlib license.
|
||||||
|
|
||||||
|
Best Average Worst Memory Stable Deterministic
|
||||||
|
n n log n n log n log n No Yes
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
`pdqsort` is a drop-in replacement for [`std::sort`](http://en.cppreference.com/w/cpp/algorithm/sort).
|
||||||
|
Just replace a call to `std::sort` with `pdqsort` to start using pattern-defeating quicksort. If your
|
||||||
|
comparison function is branchless, you can call `pdqsort_branchless` for a potential big speedup. If
|
||||||
|
you are using C++11, the type you're sorting is arithmetic and your comparison function is not given
|
||||||
|
or is `std::less`/`std::greater`, `pdqsort` automatically delegates to `pdqsort_branchless`.
|
||||||
|
|
||||||
|
### Benchmark
|
||||||
|
|
||||||
|
A comparison of pdqsort and GCC's `std::sort` and `std::stable_sort` with various input
|
||||||
|
distributions:
|
||||||
|
|
||||||
|
![Performance graph](http://i.imgur.com/1RnIGBO.png)
|
||||||
|
|
||||||
|
Compiled with `-std=c++11 -O2 -m64 -march=native`.
|
||||||
|
|
||||||
|
|
||||||
|
### Visualization
|
||||||
|
|
||||||
|
A visualization of pattern-defeating quicksort sorting a ~200 element array with some duplicates.
|
||||||
|
Generated using Timo Bingmann's [The Sound of Sorting](http://panthema.net/2013/sound-of-sorting/)
|
||||||
|
program, a tool that has been invaluable during the development of pdqsort. For the purposes of
|
||||||
|
this visualization the cutoff point for insertion sort was lowered to 8 elements.
|
||||||
|
|
||||||
|
![Visualization](http://i.imgur.com/QzFG09F.gif)
|
||||||
|
|
||||||
|
|
||||||
|
### The best case
|
||||||
|
|
||||||
|
pdqsort is designed to run in linear time for a couple of best-case patterns. Linear time is
|
||||||
|
achieved for inputs that are in strictly ascending or descending order, only contain equal elements,
|
||||||
|
or are strictly in ascending order followed by one out-of-place element. There are two separate
|
||||||
|
mechanisms at play to achieve this.
|
||||||
|
|
||||||
|
For equal elements a smart partitioning scheme is used that always puts equal elements in the
|
||||||
|
partition containing elements greater than the pivot. When a new pivot is chosen it's compared to
|
||||||
|
the greatest element in the partition before it. If they compare equal we can derive that there are
|
||||||
|
no elements smaller than the chosen pivot. When this happens we switch strategy for this partition,
|
||||||
|
and filter out all elements equal to the pivot.
|
||||||
|
|
||||||
|
To get linear time for the other patterns we check after every partition if any swaps were made. If
|
||||||
|
no swaps were made and the partition was decently balanced we will optimistically attempt to use
|
||||||
|
insertion sort. This insertion sort aborts if more than a constant amount of moves are required to
|
||||||
|
sort.
|
||||||
|
|
||||||
|
|
||||||
|
### The average case
|
||||||
|
|
||||||
|
On average case data where no patterns are detected pdqsort is effectively a quicksort that uses
|
||||||
|
median-of-3 pivot selection, switching to insertion sort if the number of elements to be
|
||||||
|
(recursively) sorted is small. The overhead associated with detecting the patterns for the best case
|
||||||
|
is so small it lies within the error of measurement.
|
||||||
|
|
||||||
|
pdqsort gets a great speedup over the traditional way of implementing quicksort when sorting large
|
||||||
|
arrays (1000+ elements). This is due to a new technique described in "BlockQuicksort: How Branch
|
||||||
|
Mispredictions don't affect Quicksort" by Stefan Edelkamp and Armin Weiss. In short, we bypass the
|
||||||
|
branch predictor by using small buffers (entirely in L1 cache) of the indices of elements that need
|
||||||
|
to be swapped. We fill these buffers in a branch-free way that's quite elegant (in pseudocode):
|
||||||
|
|
||||||
|
```cpp
|
||||||
|
buffer_num = 0; buffer_max_size = 64;
|
||||||
|
for (int i = 0; i < buffer_max_size; ++i) {
|
||||||
|
// With branch:
|
||||||
|
if (elements[i] < pivot) { buffer[buffer_num] = i; buffer_num++; }
|
||||||
|
// Without:
|
||||||
|
buffer[buffer_num] = i; buffer_num += (elements[i] < pivot);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This is only a speedup if the comparison function itself is branchless, however. By default pdqsort
|
||||||
|
will detect this if you're using C++11 or higher, the type you're sorting is arithmetic (e.g.
|
||||||
|
`int`), and you're using either `std::less` or `std::greater`. You can explicitly request branchless
|
||||||
|
partitioning by calling `pdqsort_branchless` instead of `pdqsort`.
|
||||||
|
|
||||||
|
|
||||||
|
### The worst case
|
||||||
|
|
||||||
|
Quicksort naturally performs bad on inputs that form patterns, due to it being a partition-based
|
||||||
|
sort. Choosing a bad pivot will result in many comparisons that give little to no progress in the
|
||||||
|
sorting process. If the pattern does not get broken up, this can happen many times in a row. Worse,
|
||||||
|
real world data is filled with these patterns.
|
||||||
|
|
||||||
|
Traditionally the solution to this is to randomize the pivot selection of quicksort. While this
|
||||||
|
technically still allows for a quadratic worst case, the chances of it happening are astronomically
|
||||||
|
small. Later, in introsort, pivot selection is kept deterministic, instead switching to the
|
||||||
|
guaranteed O(n log n) heapsort if the recursion depth becomes too big. In pdqsort we adopt a hybrid
|
||||||
|
approach, (deterministically) shuffling some elements to break up patterns when we encounter a "bad"
|
||||||
|
partition. If we encounter too many "bad" partitions we switch to heapsort.
|
||||||
|
|
||||||
|
|
||||||
|
### Bad partitions
|
||||||
|
|
||||||
|
A bad partition occurs when the position of the pivot after partitioning is under 12.5% (1/8th)
|
||||||
|
percentile or over 87,5% percentile - the partition is highly unbalanced. When this happens we will
|
||||||
|
shuffle four elements at fixed locations for both partitions. This effectively breaks up many
|
||||||
|
patterns. If we encounter more than log(n) bad partitions we will switch to heapsort.
|
||||||
|
|
||||||
|
The 1/8th percentile is not chosen arbitrarily. An upper bound of quicksorts worst case runtime can
|
||||||
|
be approximated within a constant factor by the following recurrence:
|
||||||
|
|
||||||
|
T(n, p) = n + T(p(n-1), p) + T((1-p)(n-1), p)
|
||||||
|
|
||||||
|
Where n is the number of elements, and p is the percentile of the pivot after partitioning.
|
||||||
|
`T(n, 1/2)` is the best case for quicksort. On modern systems heapsort is profiled to be
|
||||||
|
approximately 1.8 to 2 times as slow as quicksort. Choosing p such that `T(n, 1/2) / T(n, p) ~= 1.9`
|
||||||
|
as n gets big will ensure that we will only switch to heapsort if it would speed up the sorting.
|
||||||
|
p = 1/8 is a reasonably close value and is cheap to compute on every platform using a bitshift.
|
@ -102,7 +102,9 @@ add_headers_and_sources(dbms src/Interpreters/ClusterProxy)
|
|||||||
add_headers_and_sources(dbms src/Columns)
|
add_headers_and_sources(dbms src/Columns)
|
||||||
add_headers_and_sources(dbms src/Storages)
|
add_headers_and_sources(dbms src/Storages)
|
||||||
add_headers_and_sources(dbms src/Storages/Distributed)
|
add_headers_and_sources(dbms src/Storages/Distributed)
|
||||||
add_headers_and_sources(dbms src/Storages/Kafka)
|
if(USE_RDKAFKA)
|
||||||
|
add_headers_and_sources(dbms src/Storages/Kafka)
|
||||||
|
endif()
|
||||||
add_headers_and_sources(dbms src/Storages/MergeTree)
|
add_headers_and_sources(dbms src/Storages/MergeTree)
|
||||||
add_headers_and_sources(dbms src/Client)
|
add_headers_and_sources(dbms src/Client)
|
||||||
add_headers_and_sources(dbms src/Formats)
|
add_headers_and_sources(dbms src/Formats)
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
# This strings autochanged from release_lib.sh:
|
# This strings autochanged from release_lib.sh:
|
||||||
set(VERSION_REVISION 54413)
|
set(VERSION_REVISION 54415)
|
||||||
set(VERSION_MAJOR 19)
|
set(VERSION_MAJOR 19)
|
||||||
set(VERSION_MINOR 1)
|
set(VERSION_MINOR 3)
|
||||||
set(VERSION_PATCH 6)
|
set(VERSION_PATCH 0)
|
||||||
set(VERSION_GITHASH f73b337a93d534671b2187660398b8573fc1d464)
|
set(VERSION_GITHASH 1db4bd8c2a1a0cd610c8a6564e8194dca5265562)
|
||||||
set(VERSION_DESCRIBE v19.1.6-testing)
|
set(VERSION_DESCRIBE v19.3.0-testing)
|
||||||
set(VERSION_STRING 19.1.6)
|
set(VERSION_STRING 19.3.0)
|
||||||
# end of autochange
|
# end of autochange
|
||||||
|
|
||||||
set(VERSION_EXTRA "" CACHE STRING "")
|
set(VERSION_EXTRA "" CACHE STRING "")
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <ext/scope_guard.h>
|
||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
#include <boost/algorithm/string/replace.hpp>
|
#include <boost/algorithm/string/replace.hpp>
|
||||||
#include <Poco/String.h>
|
#include <Poco/String.h>
|
||||||
@ -400,6 +401,7 @@ private:
|
|||||||
throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS);
|
throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
#if USE_READLINE
|
#if USE_READLINE
|
||||||
|
SCOPE_EXIT({ Suggest::instance().finalize(); });
|
||||||
if (server_revision >= Suggest::MIN_SERVER_REVISION
|
if (server_revision >= Suggest::MIN_SERVER_REVISION
|
||||||
&& !config().getBool("disable_suggestion", false))
|
&& !config().getBool("disable_suggestion", false))
|
||||||
{
|
{
|
||||||
@ -722,7 +724,11 @@ private:
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
if (!processSingleQuery(str, ast) && !ignore_error)
|
auto ast_to_process = ast;
|
||||||
|
if (insert && insert->data)
|
||||||
|
ast_to_process = nullptr;
|
||||||
|
|
||||||
|
if (!processSingleQuery(str, ast_to_process) && !ignore_error)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
@ -1029,6 +1035,17 @@ private:
|
|||||||
InterruptListener interrupt_listener;
|
InterruptListener interrupt_listener;
|
||||||
bool cancelled = false;
|
bool cancelled = false;
|
||||||
|
|
||||||
|
// TODO: get the poll_interval from commandline.
|
||||||
|
const auto receive_timeout = connection->getTimeouts().receive_timeout;
|
||||||
|
constexpr size_t default_poll_interval = 1000000; /// in microseconds
|
||||||
|
constexpr size_t min_poll_interval = 5000; /// in microseconds
|
||||||
|
const size_t poll_interval
|
||||||
|
= std::max(min_poll_interval, std::min<size_t>(receive_timeout.totalMicroseconds(), default_poll_interval));
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
Stopwatch receive_watch(CLOCK_MONOTONIC_COARSE);
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
/// Has the Ctrl+C been pressed and thus the query should be cancelled?
|
/// Has the Ctrl+C been pressed and thus the query should be cancelled?
|
||||||
@ -1036,8 +1053,7 @@ private:
|
|||||||
/// to avoid losing sync.
|
/// to avoid losing sync.
|
||||||
if (!cancelled)
|
if (!cancelled)
|
||||||
{
|
{
|
||||||
if (interrupt_listener.check())
|
auto cancelQuery = [&] {
|
||||||
{
|
|
||||||
connection->sendCancel();
|
connection->sendCancel();
|
||||||
cancelled = true;
|
cancelled = true;
|
||||||
if (is_interactive)
|
if (is_interactive)
|
||||||
@ -1045,9 +1061,30 @@ private:
|
|||||||
|
|
||||||
/// Pressing Ctrl+C twice results in shut down.
|
/// Pressing Ctrl+C twice results in shut down.
|
||||||
interrupt_listener.unblock();
|
interrupt_listener.unblock();
|
||||||
|
};
|
||||||
|
|
||||||
|
if (interrupt_listener.check())
|
||||||
|
{
|
||||||
|
cancelQuery();
|
||||||
}
|
}
|
||||||
else if (!connection->poll(1000000))
|
else
|
||||||
continue; /// If there is no new data, continue checking whether the query was cancelled after a timeout.
|
{
|
||||||
|
double elapsed = receive_watch.elapsedSeconds();
|
||||||
|
if (elapsed > receive_timeout.totalSeconds())
|
||||||
|
{
|
||||||
|
std::cout << "Timeout exceeded while receiving data from server."
|
||||||
|
<< " Waited for " << static_cast<size_t>(elapsed) << " seconds,"
|
||||||
|
<< " timeout is " << receive_timeout.totalSeconds() << " seconds." << std::endl;
|
||||||
|
|
||||||
|
cancelQuery();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Poll for changes after a cancellation check, otherwise it never reached
|
||||||
|
/// because of progress updates from server.
|
||||||
|
if (connection->poll(poll_interval))
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!receiveAndProcessPacket())
|
if (!receiveAndProcessPacket())
|
||||||
@ -1303,7 +1340,11 @@ private:
|
|||||||
|
|
||||||
void onProgress(const Progress & value)
|
void onProgress(const Progress & value)
|
||||||
{
|
{
|
||||||
progress.incrementPiecewiseAtomically(value);
|
if (!progress.incrementPiecewiseAtomically(value))
|
||||||
|
{
|
||||||
|
// Just a keep-alive update.
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (block_out_stream)
|
if (block_out_stream)
|
||||||
block_out_stream->onProgress(value);
|
block_out_stream->onProgress(value);
|
||||||
writeProgress();
|
writeProgress();
|
||||||
@ -1542,12 +1583,19 @@ public:
|
|||||||
po::options_description main_description("Main options", line_length, min_description_length);
|
po::options_description main_description("Main options", line_length, min_description_length);
|
||||||
main_description.add_options()
|
main_description.add_options()
|
||||||
("help", "produce help message")
|
("help", "produce help message")
|
||||||
("config-file,c", po::value<std::string>(), "config-file path")
|
("config-file,C", po::value<std::string>(), "config-file path")
|
||||||
|
("config,c", po::value<std::string>(), "config-file path (another shorthand)")
|
||||||
("host,h", po::value<std::string>()->default_value("localhost"), "server host")
|
("host,h", po::value<std::string>()->default_value("localhost"), "server host")
|
||||||
("port", po::value<int>()->default_value(9000), "server port")
|
("port", po::value<int>()->default_value(9000), "server port")
|
||||||
("secure,s", "Use TLS connection")
|
("secure,s", "Use TLS connection")
|
||||||
("user,u", po::value<std::string>()->default_value("default"), "user")
|
("user,u", po::value<std::string>()->default_value("default"), "user")
|
||||||
("password", po::value<std::string>(), "password")
|
/** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown.
|
||||||
|
* implicit_value is used to avoid this exception (to allow user to type just "--password")
|
||||||
|
* Since currently boost provides no way to check if a value has been set implicitly for an option,
|
||||||
|
* the "\n" is used to distinguish this case because there is hardly a chance an user would use "\n"
|
||||||
|
* as the password.
|
||||||
|
*/
|
||||||
|
("password", po::value<std::string>()->implicit_value("\n"), "password")
|
||||||
("ask-password", "ask-password")
|
("ask-password", "ask-password")
|
||||||
("query_id", po::value<std::string>(), "query_id")
|
("query_id", po::value<std::string>(), "query_id")
|
||||||
("query,q", po::value<std::string>(), "query")
|
("query,q", po::value<std::string>(), "query")
|
||||||
@ -1585,13 +1633,11 @@ public:
|
|||||||
("structure", po::value<std::string>(), "structure")
|
("structure", po::value<std::string>(), "structure")
|
||||||
("types", po::value<std::string>(), "types")
|
("types", po::value<std::string>(), "types")
|
||||||
;
|
;
|
||||||
|
|
||||||
/// Parse main commandline options.
|
/// Parse main commandline options.
|
||||||
po::parsed_options parsed = po::command_line_parser(
|
po::parsed_options parsed = po::command_line_parser(
|
||||||
common_arguments.size(), common_arguments.data()).options(main_description).run();
|
common_arguments.size(), common_arguments.data()).options(main_description).run();
|
||||||
po::variables_map options;
|
po::variables_map options;
|
||||||
po::store(parsed, options);
|
po::store(parsed, options);
|
||||||
|
|
||||||
if (options.count("version") || options.count("V"))
|
if (options.count("version") || options.count("V"))
|
||||||
{
|
{
|
||||||
showClientVersion();
|
showClientVersion();
|
||||||
@ -1645,13 +1691,21 @@ public:
|
|||||||
/// Extract settings from the options.
|
/// Extract settings from the options.
|
||||||
#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \
|
#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \
|
||||||
if (options.count(#NAME)) \
|
if (options.count(#NAME)) \
|
||||||
context.setSetting(#NAME, options[#NAME].as<std::string>());
|
{ \
|
||||||
|
context.setSetting(#NAME, options[#NAME].as<std::string>()); \
|
||||||
|
config().setString(#NAME, options[#NAME].as<std::string>()); \
|
||||||
|
}
|
||||||
APPLY_FOR_SETTINGS(EXTRACT_SETTING)
|
APPLY_FOR_SETTINGS(EXTRACT_SETTING)
|
||||||
#undef EXTRACT_SETTING
|
#undef EXTRACT_SETTING
|
||||||
|
|
||||||
|
if (options.count("config-file") && options.count("config"))
|
||||||
|
throw Exception("Two or more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
/// Save received data into the internal config.
|
/// Save received data into the internal config.
|
||||||
if (options.count("config-file"))
|
if (options.count("config-file"))
|
||||||
config().setString("config-file", options["config-file"].as<std::string>());
|
config().setString("config-file", options["config-file"].as<std::string>());
|
||||||
|
if (options.count("config"))
|
||||||
|
config().setString("config-file", options["config"].as<std::string>());
|
||||||
if (options.count("host") && !options["host"].defaulted())
|
if (options.count("host") && !options["host"].defaulted())
|
||||||
config().setString("host", options["host"].as<std::string>());
|
config().setString("host", options["host"].as<std::string>());
|
||||||
if (options.count("query_id"))
|
if (options.count("query_id"))
|
||||||
@ -1710,11 +1764,11 @@ public:
|
|||||||
|
|
||||||
int mainEntryClickHouseClient(int argc, char ** argv)
|
int mainEntryClickHouseClient(int argc, char ** argv)
|
||||||
{
|
{
|
||||||
DB::Client client;
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
DB::Client client;
|
||||||
client.init(argc, argv);
|
client.init(argc, argv);
|
||||||
|
return client.run();
|
||||||
}
|
}
|
||||||
catch (const boost::program_options::error & e)
|
catch (const boost::program_options::error & e)
|
||||||
{
|
{
|
||||||
@ -1726,6 +1780,4 @@ int mainEntryClickHouseClient(int argc, char ** argv)
|
|||||||
std::cerr << DB::getCurrentExceptionMessage(true) << std::endl;
|
std::cerr << DB::getCurrentExceptionMessage(true) << std::endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return client.run();
|
|
||||||
}
|
}
|
||||||
|
@ -48,14 +48,25 @@ struct ConnectionParameters
|
|||||||
is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT));
|
is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT));
|
||||||
|
|
||||||
default_database = config.getString("database", "");
|
default_database = config.getString("database", "");
|
||||||
user = config.getString("user", "");
|
/// changed the default value to "default" to fix the issue when the user in the prompt is blank
|
||||||
|
user = config.getString("user", "default");
|
||||||
|
bool password_prompt = false;
|
||||||
if (config.getBool("ask-password", false))
|
if (config.getBool("ask-password", false))
|
||||||
{
|
{
|
||||||
if (config.has("password"))
|
if (config.has("password"))
|
||||||
throw Exception("Specified both --password and --ask-password. Remove one of them", ErrorCodes::BAD_ARGUMENTS);
|
throw Exception("Specified both --password and --ask-password. Remove one of them", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
password_prompt = true;
|
||||||
std::cout << "Password for user " << user << ": ";
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
password = config.getString("password", "");
|
||||||
|
/// if the value of --password is omitted, the password will be set implicitly to "\n"
|
||||||
|
if (password == "\n")
|
||||||
|
password_prompt = true;
|
||||||
|
}
|
||||||
|
if (password_prompt)
|
||||||
|
{
|
||||||
|
std::cout << "Password for user (" << user << "): ";
|
||||||
setTerminalEcho(false);
|
setTerminalEcho(false);
|
||||||
|
|
||||||
SCOPE_EXIT({
|
SCOPE_EXIT({
|
||||||
@ -64,19 +75,14 @@ struct ConnectionParameters
|
|||||||
std::getline(std::cin, password);
|
std::getline(std::cin, password);
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
password = config.getString("password", "");
|
|
||||||
}
|
|
||||||
|
|
||||||
compression = config.getBool("compression", true)
|
compression = config.getBool("compression", true)
|
||||||
? Protocol::Compression::Enable
|
? Protocol::Compression::Enable
|
||||||
: Protocol::Compression::Disable;
|
: Protocol::Compression::Disable;
|
||||||
|
|
||||||
timeouts = ConnectionTimeouts(
|
timeouts = ConnectionTimeouts(
|
||||||
Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0),
|
Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0),
|
||||||
Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0),
|
|
||||||
Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0),
|
Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0),
|
||||||
|
Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0),
|
||||||
Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0));
|
Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -39,7 +39,7 @@ private:
|
|||||||
"DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE",
|
"DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE",
|
||||||
"INSERT", "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "LEFT", "RIGHT", "FULL", "OUTER",
|
"INSERT", "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "LEFT", "RIGHT", "FULL", "OUTER",
|
||||||
"CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", "IN",
|
"CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", "IN",
|
||||||
"KILL", "QUERY", "SYNC", "ASYNC", "TEST"
|
"KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN"
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Words are fetched asynchonously.
|
/// Words are fetched asynchonously.
|
||||||
@ -194,6 +194,12 @@ public:
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void finalize()
|
||||||
|
{
|
||||||
|
if (loading_thread.joinable())
|
||||||
|
loading_thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
/// A function for readline.
|
/// A function for readline.
|
||||||
static char * generator(const char * text, int state)
|
static char * generator(const char * text, int state)
|
||||||
{
|
{
|
||||||
@ -211,8 +217,7 @@ public:
|
|||||||
|
|
||||||
~Suggest()
|
~Suggest()
|
||||||
{
|
{
|
||||||
if (loading_thread.joinable())
|
finalize();
|
||||||
loading_thread.join();
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -67,7 +67,6 @@
|
|||||||
#include <Storages/StorageDistributed.h>
|
#include <Storages/StorageDistributed.h>
|
||||||
#include <Databases/DatabaseMemory.h>
|
#include <Databases/DatabaseMemory.h>
|
||||||
#include <Common/StatusFile.h>
|
#include <Common/StatusFile.h>
|
||||||
#include <daemon/OwnPatternFormatter.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -817,7 +816,7 @@ public:
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
type->deserializeTextQuoted(*column_dummy, rb, FormatSettings());
|
type->deserializeAsTextQuoted(*column_dummy, rb, FormatSettings());
|
||||||
}
|
}
|
||||||
catch (Exception & e)
|
catch (Exception & e)
|
||||||
{
|
{
|
||||||
@ -1179,7 +1178,7 @@ protected:
|
|||||||
/// Removes MATERIALIZED and ALIAS columns from create table query
|
/// Removes MATERIALIZED and ALIAS columns from create table query
|
||||||
static ASTPtr removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast)
|
static ASTPtr removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast)
|
||||||
{
|
{
|
||||||
const ASTs & column_asts = typeid_cast<ASTCreateQuery &>(*query_ast).columns->children;
|
const ASTs & column_asts = typeid_cast<ASTCreateQuery &>(*query_ast).columns_list->columns->children;
|
||||||
auto new_columns = std::make_shared<ASTExpressionList>();
|
auto new_columns = std::make_shared<ASTExpressionList>();
|
||||||
|
|
||||||
for (const ASTPtr & column_ast : column_asts)
|
for (const ASTPtr & column_ast : column_asts)
|
||||||
@ -1198,8 +1197,13 @@ protected:
|
|||||||
|
|
||||||
ASTPtr new_query_ast = query_ast->clone();
|
ASTPtr new_query_ast = query_ast->clone();
|
||||||
ASTCreateQuery & new_query = typeid_cast<ASTCreateQuery &>(*new_query_ast);
|
ASTCreateQuery & new_query = typeid_cast<ASTCreateQuery &>(*new_query_ast);
|
||||||
new_query.columns = new_columns.get();
|
|
||||||
new_query.children.at(0) = std::move(new_columns);
|
auto new_columns_list = std::make_shared<ASTColumns>();
|
||||||
|
new_columns_list->set(new_columns_list->columns, new_columns);
|
||||||
|
new_columns_list->set(
|
||||||
|
new_columns_list->indices, typeid_cast<ASTCreateQuery &>(*query_ast).columns_list->indices->clone());
|
||||||
|
|
||||||
|
new_query.replace(new_query.columns_list, new_columns_list);
|
||||||
|
|
||||||
return new_query_ast;
|
return new_query_ast;
|
||||||
}
|
}
|
||||||
@ -1217,7 +1221,7 @@ protected:
|
|||||||
res->table = new_table.second;
|
res->table = new_table.second;
|
||||||
|
|
||||||
res->children.clear();
|
res->children.clear();
|
||||||
res->set(res->columns, create.columns->clone());
|
res->set(res->columns_list, create.columns_list->clone());
|
||||||
res->set(res->storage, new_storage_ast->clone());
|
res->set(res->storage, new_storage_ast->clone());
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
@ -1877,7 +1881,7 @@ protected:
|
|||||||
for (size_t i = 0; i < column.column->size(); ++i)
|
for (size_t i = 0; i < column.column->size(); ++i)
|
||||||
{
|
{
|
||||||
WriteBufferFromOwnString wb;
|
WriteBufferFromOwnString wb;
|
||||||
column.type->serializeTextQuoted(*column.column, i, wb, FormatSettings());
|
column.type->serializeAsTextQuoted(*column.column, i, wb, FormatSettings());
|
||||||
res.emplace(wb.str());
|
res.emplace(wb.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1037,7 +1037,7 @@ try
|
|||||||
|
|
||||||
Obfuscator obfuscator(header, seed, markov_model_params);
|
Obfuscator obfuscator(header, seed, markov_model_params);
|
||||||
|
|
||||||
size_t max_block_size = 8192;
|
UInt64 max_block_size = 8192;
|
||||||
|
|
||||||
/// Train step
|
/// Train step
|
||||||
{
|
{
|
||||||
|
@ -75,7 +75,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t max_block_size = DEFAULT_BLOCK_SIZE;
|
UInt64 max_block_size = DEFAULT_BLOCK_SIZE;
|
||||||
if (params.has("max_block_size"))
|
if (params.has("max_block_size"))
|
||||||
{
|
{
|
||||||
std::string max_block_size_str = params.get("max_block_size", "");
|
std::string max_block_size_str = params.get("max_block_size", "");
|
||||||
|
@ -18,6 +18,32 @@ namespace ErrorCodes
|
|||||||
extern const int NOT_IMPLEMENTED;
|
extern const int NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
void waitQuery(Connection & connection)
|
||||||
|
{
|
||||||
|
bool finished = false;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
if (!connection.poll(1000000))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Connection::Packet packet = connection.receivePacket();
|
||||||
|
switch (packet.type)
|
||||||
|
{
|
||||||
|
case Protocol::Server::EndOfStream:
|
||||||
|
finished = true;
|
||||||
|
break;
|
||||||
|
case Protocol::Server::Exception:
|
||||||
|
throw *packet.exception;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (finished)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
namespace fs = boost::filesystem;
|
namespace fs = boost::filesystem;
|
||||||
|
|
||||||
PerformanceTest::PerformanceTest(
|
PerformanceTest::PerformanceTest(
|
||||||
@ -25,12 +51,14 @@ PerformanceTest::PerformanceTest(
|
|||||||
Connection & connection_,
|
Connection & connection_,
|
||||||
InterruptListener & interrupt_listener_,
|
InterruptListener & interrupt_listener_,
|
||||||
const PerformanceTestInfo & test_info_,
|
const PerformanceTestInfo & test_info_,
|
||||||
Context & context_)
|
Context & context_,
|
||||||
|
const std::vector<size_t> & queries_to_run_)
|
||||||
: config(config_)
|
: config(config_)
|
||||||
, connection(connection_)
|
, connection(connection_)
|
||||||
, interrupt_listener(interrupt_listener_)
|
, interrupt_listener(interrupt_listener_)
|
||||||
, test_info(test_info_)
|
, test_info(test_info_)
|
||||||
, context(context_)
|
, context(context_)
|
||||||
|
, queries_to_run(queries_to_run_)
|
||||||
, log(&Poco::Logger::get("PerformanceTest"))
|
, log(&Poco::Logger::get("PerformanceTest"))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -133,14 +161,18 @@ void PerformanceTest::prepare() const
|
|||||||
{
|
{
|
||||||
for (const auto & query : test_info.create_queries)
|
for (const auto & query : test_info.create_queries)
|
||||||
{
|
{
|
||||||
LOG_INFO(log, "Executing create query '" << query << "'");
|
LOG_INFO(log, "Executing create query \"" << query << '\"');
|
||||||
connection.sendQuery(query);
|
connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false);
|
||||||
|
waitQuery(connection);
|
||||||
|
LOG_INFO(log, "Query finished");
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto & query : test_info.fill_queries)
|
for (const auto & query : test_info.fill_queries)
|
||||||
{
|
{
|
||||||
LOG_INFO(log, "Executing fill query '" << query << "'");
|
LOG_INFO(log, "Executing fill query \"" << query << '\"');
|
||||||
connection.sendQuery(query);
|
connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false);
|
||||||
|
waitQuery(connection);
|
||||||
|
LOG_INFO(log, "Query finished");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -149,17 +181,24 @@ void PerformanceTest::finish() const
|
|||||||
{
|
{
|
||||||
for (const auto & query : test_info.drop_queries)
|
for (const auto & query : test_info.drop_queries)
|
||||||
{
|
{
|
||||||
LOG_INFO(log, "Executing drop query '" << query << "'");
|
LOG_INFO(log, "Executing drop query \"" << query << '\"');
|
||||||
connection.sendQuery(query);
|
connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false);
|
||||||
|
waitQuery(connection);
|
||||||
|
LOG_INFO(log, "Query finished");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<TestStats> PerformanceTest::execute()
|
std::vector<TestStats> PerformanceTest::execute()
|
||||||
{
|
{
|
||||||
std::vector<TestStats> statistics_by_run;
|
std::vector<TestStats> statistics_by_run;
|
||||||
|
size_t query_count;
|
||||||
|
if (queries_to_run.empty())
|
||||||
|
query_count = test_info.queries.size();
|
||||||
|
else
|
||||||
|
query_count = queries_to_run.size();
|
||||||
size_t total_runs = test_info.times_to_run * test_info.queries.size();
|
size_t total_runs = test_info.times_to_run * test_info.queries.size();
|
||||||
statistics_by_run.resize(total_runs);
|
statistics_by_run.resize(total_runs);
|
||||||
LOG_INFO(log, "Totally will run cases " << total_runs << " times");
|
LOG_INFO(log, "Totally will run cases " << test_info.times_to_run * query_count << " times");
|
||||||
UInt64 max_exec_time = calculateMaxExecTime();
|
UInt64 max_exec_time = calculateMaxExecTime();
|
||||||
if (max_exec_time != 0)
|
if (max_exec_time != 0)
|
||||||
LOG_INFO(log, "Test will be executed for a maximum of " << max_exec_time / 1000. << " seconds");
|
LOG_INFO(log, "Test will be executed for a maximum of " << max_exec_time / 1000. << " seconds");
|
||||||
@ -171,11 +210,15 @@ std::vector<TestStats> PerformanceTest::execute()
|
|||||||
QueriesWithIndexes queries_with_indexes;
|
QueriesWithIndexes queries_with_indexes;
|
||||||
|
|
||||||
for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index)
|
for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index)
|
||||||
|
{
|
||||||
|
if (queries_to_run.empty() || std::find(queries_to_run.begin(), queries_to_run.end(), query_index) != queries_to_run.end())
|
||||||
{
|
{
|
||||||
size_t statistic_index = number_of_launch * test_info.queries.size() + query_index;
|
size_t statistic_index = number_of_launch * test_info.queries.size() + query_index;
|
||||||
|
|
||||||
queries_with_indexes.push_back({test_info.queries[query_index], statistic_index});
|
queries_with_indexes.push_back({test_info.queries[query_index], statistic_index});
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
LOG_INFO(log, "Will skip query " << test_info.queries[query_index] << " by index");
|
||||||
|
}
|
||||||
|
|
||||||
if (got_SIGINT)
|
if (got_SIGINT)
|
||||||
break;
|
break;
|
||||||
|
@ -22,7 +22,8 @@ public:
|
|||||||
Connection & connection_,
|
Connection & connection_,
|
||||||
InterruptListener & interrupt_listener_,
|
InterruptListener & interrupt_listener_,
|
||||||
const PerformanceTestInfo & test_info_,
|
const PerformanceTestInfo & test_info_,
|
||||||
Context & context_);
|
Context & context_,
|
||||||
|
const std::vector<size_t> & queries_to_run_);
|
||||||
|
|
||||||
bool checkPreconditions() const;
|
bool checkPreconditions() const;
|
||||||
void prepare() const;
|
void prepare() const;
|
||||||
@ -54,6 +55,7 @@ private:
|
|||||||
PerformanceTestInfo test_info;
|
PerformanceTestInfo test_info;
|
||||||
Context & context;
|
Context & context;
|
||||||
|
|
||||||
|
std::vector<size_t> queries_to_run;
|
||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
|
|
||||||
bool got_SIGINT = false;
|
bool got_SIGINT = false;
|
||||||
|
@ -36,42 +36,6 @@ void extractSettings(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void checkMetricsInput(const Strings & metrics, ExecutionType exec_type)
|
|
||||||
{
|
|
||||||
Strings loop_metrics = {
|
|
||||||
"min_time", "quantiles", "total_time",
|
|
||||||
"queries_per_second", "rows_per_second",
|
|
||||||
"bytes_per_second"};
|
|
||||||
|
|
||||||
Strings non_loop_metrics = {
|
|
||||||
"max_rows_per_second", "max_bytes_per_second",
|
|
||||||
"avg_rows_per_second", "avg_bytes_per_second"};
|
|
||||||
|
|
||||||
if (exec_type == ExecutionType::Loop)
|
|
||||||
{
|
|
||||||
for (const std::string & metric : metrics)
|
|
||||||
{
|
|
||||||
auto non_loop_pos =
|
|
||||||
std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric);
|
|
||||||
|
|
||||||
if (non_loop_pos != non_loop_metrics.end())
|
|
||||||
throw Exception("Wrong type of metric for loop execution type (" + metric + ")",
|
|
||||||
ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (const std::string & metric : metrics)
|
|
||||||
{
|
|
||||||
auto loop_pos = std::find(loop_metrics.begin(), loop_metrics.end(), metric);
|
|
||||||
if (loop_pos != loop_metrics.end())
|
|
||||||
throw Exception(
|
|
||||||
"Wrong type of metric for non-loop execution type (" + metric + ")",
|
|
||||||
ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -84,12 +48,19 @@ PerformanceTestInfo::PerformanceTestInfo(
|
|||||||
{
|
{
|
||||||
test_name = config->getString("name");
|
test_name = config->getString("name");
|
||||||
path = config->getString("path");
|
path = config->getString("path");
|
||||||
|
if (config->has("main_metric"))
|
||||||
|
{
|
||||||
|
Strings main_metrics;
|
||||||
|
config->keys("main_metric", main_metrics);
|
||||||
|
if (main_metrics.size())
|
||||||
|
main_metric = main_metrics[0];
|
||||||
|
}
|
||||||
|
|
||||||
applySettings(config);
|
applySettings(config);
|
||||||
extractQueries(config);
|
extractQueries(config);
|
||||||
processSubstitutions(config);
|
processSubstitutions(config);
|
||||||
getExecutionType(config);
|
getExecutionType(config);
|
||||||
getStopConditions(config);
|
getStopConditions(config);
|
||||||
getMetrics(config);
|
|
||||||
extractAuxiliaryQueries(config);
|
extractAuxiliaryQueries(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,37 +210,6 @@ void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void PerformanceTestInfo::getMetrics(XMLConfigurationPtr config)
|
|
||||||
{
|
|
||||||
ConfigurationPtr metrics_view(config->createView("metrics"));
|
|
||||||
metrics_view->keys(metrics);
|
|
||||||
|
|
||||||
if (config->has("main_metric"))
|
|
||||||
{
|
|
||||||
Strings main_metrics;
|
|
||||||
config->keys("main_metric", main_metrics);
|
|
||||||
if (main_metrics.size())
|
|
||||||
main_metric = main_metrics[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!main_metric.empty())
|
|
||||||
{
|
|
||||||
if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end())
|
|
||||||
metrics.push_back(main_metric);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (metrics.empty())
|
|
||||||
throw Exception("You shoud specify at least one metric",
|
|
||||||
ErrorCodes::BAD_ARGUMENTS);
|
|
||||||
main_metric = metrics[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (metrics.size() > 0)
|
|
||||||
checkMetricsInput(metrics, exec_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
void PerformanceTestInfo::extractAuxiliaryQueries(XMLConfigurationPtr config)
|
void PerformanceTestInfo::extractAuxiliaryQueries(XMLConfigurationPtr config)
|
||||||
{
|
{
|
||||||
if (config->has("create_query"))
|
if (config->has("create_query"))
|
||||||
|
@ -33,7 +33,6 @@ public:
|
|||||||
std::string main_metric;
|
std::string main_metric;
|
||||||
|
|
||||||
Strings queries;
|
Strings queries;
|
||||||
Strings metrics;
|
|
||||||
|
|
||||||
Settings settings;
|
Settings settings;
|
||||||
ExecutionType exec_type;
|
ExecutionType exec_type;
|
||||||
|
@ -11,12 +11,13 @@
|
|||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
#include <boost/program_options.hpp>
|
#include <boost/program_options.hpp>
|
||||||
|
|
||||||
#include <Poco/Util/XMLConfiguration.h>
|
#include <Poco/AutoPtr.h>
|
||||||
#include <Poco/Logger.h>
|
|
||||||
#include <Poco/ConsoleChannel.h>
|
#include <Poco/ConsoleChannel.h>
|
||||||
#include <Poco/FormattingChannel.h>
|
#include <Poco/FormattingChannel.h>
|
||||||
|
#include <Poco/Logger.h>
|
||||||
|
#include <Poco/Path.h>
|
||||||
#include <Poco/PatternFormatter.h>
|
#include <Poco/PatternFormatter.h>
|
||||||
|
#include <Poco/Util/XMLConfiguration.h>
|
||||||
|
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
#include <Client/Connection.h>
|
#include <Client/Connection.h>
|
||||||
@ -25,7 +26,6 @@
|
|||||||
#include <IO/ConnectionTimeouts.h>
|
#include <IO/ConnectionTimeouts.h>
|
||||||
#include <IO/UseSSL.h>
|
#include <IO/UseSSL.h>
|
||||||
#include <Interpreters/Settings.h>
|
#include <Interpreters/Settings.h>
|
||||||
#include <Poco/AutoPtr.h>
|
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
#include <Common/InterruptListener.h>
|
#include <Common/InterruptListener.h>
|
||||||
|
|
||||||
@ -70,6 +70,7 @@ public:
|
|||||||
Strings && skip_names_,
|
Strings && skip_names_,
|
||||||
Strings && tests_names_regexp_,
|
Strings && tests_names_regexp_,
|
||||||
Strings && skip_names_regexp_,
|
Strings && skip_names_regexp_,
|
||||||
|
const std::unordered_map<std::string, std::vector<size_t>> query_indexes_,
|
||||||
const ConnectionTimeouts & timeouts)
|
const ConnectionTimeouts & timeouts)
|
||||||
: connection(host_, port_, default_database_, user_,
|
: connection(host_, port_, default_database_, user_,
|
||||||
password_, timeouts, "performance-test", Protocol::Compression::Enable,
|
password_, timeouts, "performance-test", Protocol::Compression::Enable,
|
||||||
@ -80,6 +81,7 @@ public:
|
|||||||
, skip_tags(std::move(skip_tags_))
|
, skip_tags(std::move(skip_tags_))
|
||||||
, skip_names(std::move(skip_names_))
|
, skip_names(std::move(skip_names_))
|
||||||
, skip_names_regexp(std::move(skip_names_regexp_))
|
, skip_names_regexp(std::move(skip_names_regexp_))
|
||||||
|
, query_indexes(query_indexes_)
|
||||||
, lite_output(lite_output_)
|
, lite_output(lite_output_)
|
||||||
, profiles_file(profiles_file_)
|
, profiles_file(profiles_file_)
|
||||||
, input_files(input_files_)
|
, input_files(input_files_)
|
||||||
@ -128,6 +130,7 @@ private:
|
|||||||
const Strings & skip_tags;
|
const Strings & skip_tags;
|
||||||
const Strings & skip_names;
|
const Strings & skip_names;
|
||||||
const Strings & skip_names_regexp;
|
const Strings & skip_names_regexp;
|
||||||
|
std::unordered_map<std::string, std::vector<size_t>> query_indexes;
|
||||||
|
|
||||||
Context global_context = Context::createGlobal();
|
Context global_context = Context::createGlobal();
|
||||||
std::shared_ptr<ReportBuilder> report_builder;
|
std::shared_ptr<ReportBuilder> report_builder;
|
||||||
@ -167,11 +170,13 @@ private:
|
|||||||
for (auto & test_config : tests_configurations)
|
for (auto & test_config : tests_configurations)
|
||||||
{
|
{
|
||||||
auto [output, signal] = runTest(test_config);
|
auto [output, signal] = runTest(test_config);
|
||||||
|
if (!output.empty())
|
||||||
|
{
|
||||||
if (lite_output)
|
if (lite_output)
|
||||||
std::cout << output;
|
std::cout << output;
|
||||||
else
|
else
|
||||||
outputs.push_back(output);
|
outputs.push_back(output);
|
||||||
|
}
|
||||||
if (signal)
|
if (signal)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -198,12 +203,15 @@ private:
|
|||||||
{
|
{
|
||||||
PerformanceTestInfo info(test_config, profiles_file);
|
PerformanceTestInfo info(test_config, profiles_file);
|
||||||
LOG_INFO(log, "Config for test '" << info.test_name << "' parsed");
|
LOG_INFO(log, "Config for test '" << info.test_name << "' parsed");
|
||||||
PerformanceTest current(test_config, connection, interrupt_listener, info, global_context);
|
PerformanceTest current(test_config, connection, interrupt_listener, info, global_context, query_indexes[info.path]);
|
||||||
|
|
||||||
current.checkPreconditions();
|
if (current.checkPreconditions())
|
||||||
|
{
|
||||||
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled");
|
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled");
|
||||||
LOG_INFO(log, "Preparing for run, have " << info.create_queries.size()
|
LOG_INFO(
|
||||||
<< " create queries and " << info.fill_queries.size() << " fill queries");
|
log,
|
||||||
|
"Preparing for run, have " << info.create_queries.size() << " create queries and " << info.fill_queries.size()
|
||||||
|
<< " fill queries");
|
||||||
current.prepare();
|
current.prepare();
|
||||||
LOG_INFO(log, "Prepared");
|
LOG_INFO(log, "Prepared");
|
||||||
LOG_INFO(log, "Running test '" << info.test_name << "'");
|
LOG_INFO(log, "Running test '" << info.test_name << "'");
|
||||||
@ -213,13 +221,16 @@ private:
|
|||||||
LOG_INFO(log, "Running post run queries");
|
LOG_INFO(log, "Running post run queries");
|
||||||
current.finish();
|
current.finish();
|
||||||
LOG_INFO(log, "Postqueries finished");
|
LOG_INFO(log, "Postqueries finished");
|
||||||
|
|
||||||
if (lite_output)
|
if (lite_output)
|
||||||
return {report_builder->buildCompactReport(info, result), current.checkSIGINT()};
|
return {report_builder->buildCompactReport(info, result, query_indexes[info.path]), current.checkSIGINT()};
|
||||||
else
|
else
|
||||||
return {report_builder->buildFullReport(info, result), current.checkSIGINT()};
|
return {report_builder->buildFullReport(info, result, query_indexes[info.path]), current.checkSIGINT()};
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are not fullfilled, skip run");
|
||||||
|
|
||||||
|
return {"", current.checkSIGINT()};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -289,6 +300,29 @@ static std::vector<std::string> getInputFiles(const po::variables_map & options,
|
|||||||
return input_files;
|
return input_files;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unordered_map<std::string, std::vector<std::size_t>> getTestQueryIndexes(const po::basic_parsed_options<char> & parsed_opts)
|
||||||
|
{
|
||||||
|
std::unordered_map<std::string, std::vector<std::size_t>> result;
|
||||||
|
const auto & options = parsed_opts.options;
|
||||||
|
for (size_t i = 0; i < options.size() - 1; ++i)
|
||||||
|
{
|
||||||
|
const auto & opt = options[i];
|
||||||
|
if (opt.string_key == "input-files")
|
||||||
|
{
|
||||||
|
if (options[i + 1].string_key == "query-indexes")
|
||||||
|
{
|
||||||
|
const std::string & test_path = Poco::Path(opt.value[0]).absolute().toString();
|
||||||
|
for (const auto & query_num_str : options[i + 1].value)
|
||||||
|
{
|
||||||
|
size_t query_num = std::stoul(query_num_str);
|
||||||
|
result[test_path].push_back(query_num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
int mainEntryClickHousePerformanceTest(int argc, char ** argv)
|
int mainEntryClickHousePerformanceTest(int argc, char ** argv)
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -314,24 +348,18 @@ try
|
|||||||
("skip-names", value<Strings>()->multitoken(), "Do not run tests with name")
|
("skip-names", value<Strings>()->multitoken(), "Do not run tests with name")
|
||||||
("names-regexp", value<Strings>()->multitoken(), "Run tests with names matching regexp")
|
("names-regexp", value<Strings>()->multitoken(), "Run tests with names matching regexp")
|
||||||
("skip-names-regexp", value<Strings>()->multitoken(), "Do not run tests with names matching regexp")
|
("skip-names-regexp", value<Strings>()->multitoken(), "Do not run tests with names matching regexp")
|
||||||
|
("input-files", value<Strings>()->multitoken(), "Input .xml files")
|
||||||
|
("query-indexes", value<std::vector<size_t>>()->multitoken(), "Input query indexes")
|
||||||
("recursive,r", "Recurse in directories to find all xml's");
|
("recursive,r", "Recurse in directories to find all xml's");
|
||||||
|
|
||||||
/// These options will not be displayed in --help
|
|
||||||
po::options_description hidden("Hidden options");
|
|
||||||
hidden.add_options()
|
|
||||||
("input-files", value<std::vector<std::string>>(), "");
|
|
||||||
|
|
||||||
/// But they will be legit, though. And they must be given without name
|
|
||||||
po::positional_options_description positional;
|
|
||||||
positional.add("input-files", -1);
|
|
||||||
|
|
||||||
po::options_description cmdline_options;
|
po::options_description cmdline_options;
|
||||||
cmdline_options.add(desc).add(hidden);
|
cmdline_options.add(desc);
|
||||||
|
|
||||||
po::variables_map options;
|
po::variables_map options;
|
||||||
po::store(
|
po::basic_parsed_options<char> parsed = po::command_line_parser(argc, argv).options(cmdline_options).run();
|
||||||
po::command_line_parser(argc, argv).
|
auto queries_with_indexes = getTestQueryIndexes(parsed);
|
||||||
options(cmdline_options).positional(positional).run(), options);
|
po::store(parsed, options);
|
||||||
|
|
||||||
po::notify(options);
|
po::notify(options);
|
||||||
|
|
||||||
Poco::AutoPtr<Poco::PatternFormatter> formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t"));
|
Poco::AutoPtr<Poco::PatternFormatter> formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t"));
|
||||||
@ -378,6 +406,7 @@ try
|
|||||||
std::move(skip_names),
|
std::move(skip_names),
|
||||||
std::move(tests_names_regexp),
|
std::move(tests_names_regexp),
|
||||||
std::move(skip_names_regexp),
|
std::move(skip_names_regexp),
|
||||||
|
queries_with_indexes,
|
||||||
timeouts);
|
timeouts);
|
||||||
return performance_test_suite.run();
|
return performance_test_suite.run();
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,18 @@ namespace DB
|
|||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
const std::regex QUOTE_REGEX{"\""};
|
const std::regex QUOTE_REGEX{"\""};
|
||||||
|
std::string getMainMetric(const PerformanceTestInfo & test_info)
|
||||||
|
{
|
||||||
|
std::string main_metric;
|
||||||
|
if (test_info.main_metric.empty())
|
||||||
|
if (test_info.exec_type == ExecutionType::Loop)
|
||||||
|
main_metric = "min_time";
|
||||||
|
else
|
||||||
|
main_metric = "rows_per_second";
|
||||||
|
else
|
||||||
|
main_metric = test_info.main_metric;
|
||||||
|
return main_metric;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ReportBuilder::ReportBuilder(const std::string & server_version_)
|
ReportBuilder::ReportBuilder(const std::string & server_version_)
|
||||||
@ -35,7 +47,8 @@ std::string ReportBuilder::getCurrentTime() const
|
|||||||
|
|
||||||
std::string ReportBuilder::buildFullReport(
|
std::string ReportBuilder::buildFullReport(
|
||||||
const PerformanceTestInfo & test_info,
|
const PerformanceTestInfo & test_info,
|
||||||
std::vector<TestStats> & stats) const
|
std::vector<TestStats> & stats,
|
||||||
|
const std::vector<std::size_t> & queries_to_run) const
|
||||||
{
|
{
|
||||||
JSONString json_output;
|
JSONString json_output;
|
||||||
|
|
||||||
@ -47,13 +60,7 @@ std::string ReportBuilder::buildFullReport(
|
|||||||
json_output.set("time", getCurrentTime());
|
json_output.set("time", getCurrentTime());
|
||||||
json_output.set("test_name", test_info.test_name);
|
json_output.set("test_name", test_info.test_name);
|
||||||
json_output.set("path", test_info.path);
|
json_output.set("path", test_info.path);
|
||||||
json_output.set("main_metric", test_info.main_metric);
|
json_output.set("main_metric", getMainMetric(test_info));
|
||||||
|
|
||||||
auto has_metric = [&test_info] (const std::string & metric_name)
|
|
||||||
{
|
|
||||||
return std::find(test_info.metrics.begin(),
|
|
||||||
test_info.metrics.end(), metric_name) != test_info.metrics.end();
|
|
||||||
};
|
|
||||||
|
|
||||||
if (test_info.substitutions.size())
|
if (test_info.substitutions.size())
|
||||||
{
|
{
|
||||||
@ -85,6 +92,9 @@ std::string ReportBuilder::buildFullReport(
|
|||||||
std::vector<JSONString> run_infos;
|
std::vector<JSONString> run_infos;
|
||||||
for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index)
|
for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index)
|
||||||
{
|
{
|
||||||
|
if (!queries_to_run.empty() && std::find(queries_to_run.begin(), queries_to_run.end(), query_index) == queries_to_run.end())
|
||||||
|
continue;
|
||||||
|
|
||||||
for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch)
|
for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch)
|
||||||
{
|
{
|
||||||
size_t stat_index = number_of_launch * test_info.queries.size() + query_index;
|
size_t stat_index = number_of_launch * test_info.queries.size() + query_index;
|
||||||
@ -97,16 +107,16 @@ std::string ReportBuilder::buildFullReport(
|
|||||||
|
|
||||||
auto query = std::regex_replace(test_info.queries[query_index], QUOTE_REGEX, "\\\"");
|
auto query = std::regex_replace(test_info.queries[query_index], QUOTE_REGEX, "\\\"");
|
||||||
runJSON.set("query", query);
|
runJSON.set("query", query);
|
||||||
|
runJSON.set("query_index", query_index);
|
||||||
if (!statistics.exception.empty())
|
if (!statistics.exception.empty())
|
||||||
runJSON.set("exception", statistics.exception);
|
runJSON.set("exception", statistics.exception);
|
||||||
|
|
||||||
if (test_info.exec_type == ExecutionType::Loop)
|
if (test_info.exec_type == ExecutionType::Loop)
|
||||||
{
|
{
|
||||||
/// in seconds
|
/// in seconds
|
||||||
if (has_metric("min_time"))
|
|
||||||
runJSON.set("min_time", statistics.min_time / double(1000));
|
runJSON.set("min_time", statistics.min_time / double(1000));
|
||||||
|
|
||||||
if (has_metric("quantiles"))
|
if (statistics.sampler.size() != 0)
|
||||||
{
|
{
|
||||||
JSONString quantiles(4); /// here, 4 is the size of \t padding
|
JSONString quantiles(4); /// here, 4 is the size of \t padding
|
||||||
for (double percent = 10; percent <= 90; percent += 10)
|
for (double percent = 10; percent <= 90; percent += 10)
|
||||||
@ -130,33 +140,20 @@ std::string ReportBuilder::buildFullReport(
|
|||||||
runJSON.set("quantiles", quantiles.asString());
|
runJSON.set("quantiles", quantiles.asString());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_metric("total_time"))
|
|
||||||
runJSON.set("total_time", statistics.total_time);
|
runJSON.set("total_time", statistics.total_time);
|
||||||
|
|
||||||
if (has_metric("queries_per_second"))
|
if (statistics.total_time != 0)
|
||||||
runJSON.set("queries_per_second",
|
{
|
||||||
double(statistics.queries) / statistics.total_time);
|
runJSON.set("queries_per_second", static_cast<double>(statistics.queries) / statistics.total_time);
|
||||||
|
runJSON.set("rows_per_second", static_cast<double>(statistics.total_rows_read) / statistics.total_time);
|
||||||
if (has_metric("rows_per_second"))
|
runJSON.set("bytes_per_second", static_cast<double>(statistics.total_bytes_read) / statistics.total_time);
|
||||||
runJSON.set("rows_per_second",
|
}
|
||||||
double(statistics.total_rows_read) / statistics.total_time);
|
|
||||||
|
|
||||||
if (has_metric("bytes_per_second"))
|
|
||||||
runJSON.set("bytes_per_second",
|
|
||||||
double(statistics.total_bytes_read) / statistics.total_time);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (has_metric("max_rows_per_second"))
|
|
||||||
runJSON.set("max_rows_per_second", statistics.max_rows_speed);
|
runJSON.set("max_rows_per_second", statistics.max_rows_speed);
|
||||||
|
|
||||||
if (has_metric("max_bytes_per_second"))
|
|
||||||
runJSON.set("max_bytes_per_second", statistics.max_bytes_speed);
|
runJSON.set("max_bytes_per_second", statistics.max_bytes_speed);
|
||||||
|
|
||||||
if (has_metric("avg_rows_per_second"))
|
|
||||||
runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value);
|
runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value);
|
||||||
|
|
||||||
if (has_metric("avg_bytes_per_second"))
|
|
||||||
runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value);
|
runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -171,26 +168,32 @@ std::string ReportBuilder::buildFullReport(
|
|||||||
|
|
||||||
std::string ReportBuilder::buildCompactReport(
|
std::string ReportBuilder::buildCompactReport(
|
||||||
const PerformanceTestInfo & test_info,
|
const PerformanceTestInfo & test_info,
|
||||||
std::vector<TestStats> & stats) const
|
std::vector<TestStats> & stats,
|
||||||
|
const std::vector<std::size_t> & queries_to_run) const
|
||||||
{
|
{
|
||||||
|
|
||||||
std::ostringstream output;
|
std::ostringstream output;
|
||||||
|
|
||||||
for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index)
|
for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index)
|
||||||
{
|
{
|
||||||
|
if (!queries_to_run.empty() && std::find(queries_to_run.begin(), queries_to_run.end(), query_index) == queries_to_run.end())
|
||||||
|
continue;
|
||||||
|
|
||||||
for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch)
|
for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch)
|
||||||
{
|
{
|
||||||
if (test_info.queries.size() > 1)
|
if (test_info.queries.size() > 1)
|
||||||
output << "query \"" << test_info.queries[query_index] << "\", ";
|
output << "query \"" << test_info.queries[query_index] << "\", ";
|
||||||
|
|
||||||
output << "run " << std::to_string(number_of_launch + 1) << ": ";
|
output << "run " << std::to_string(number_of_launch + 1) << ": ";
|
||||||
output << test_info.main_metric << " = ";
|
|
||||||
|
std::string main_metric = getMainMetric(test_info);
|
||||||
|
|
||||||
|
output << main_metric << " = ";
|
||||||
size_t index = number_of_launch * test_info.queries.size() + query_index;
|
size_t index = number_of_launch * test_info.queries.size() + query_index;
|
||||||
output << stats[index].getStatisticByName(test_info.main_metric);
|
output << stats[index].getStatisticByName(main_metric);
|
||||||
output << "\n";
|
output << "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return output.str();
|
return output.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -9,14 +9,18 @@ namespace DB
|
|||||||
class ReportBuilder
|
class ReportBuilder
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit ReportBuilder(const std::string & server_version_);
|
ReportBuilder(const std::string & server_version_);
|
||||||
std::string buildFullReport(
|
std::string buildFullReport(
|
||||||
const PerformanceTestInfo & test_info,
|
const PerformanceTestInfo & test_info,
|
||||||
std::vector<TestStats> & stats) const;
|
std::vector<TestStats> & stats,
|
||||||
|
const std::vector<std::size_t> & queries_to_run) const;
|
||||||
|
|
||||||
|
|
||||||
std::string buildCompactReport(
|
std::string buildCompactReport(
|
||||||
const PerformanceTestInfo & test_info,
|
const PerformanceTestInfo & test_info,
|
||||||
std::vector<TestStats> & stats) const;
|
std::vector<TestStats> & stats,
|
||||||
|
const std::vector<std::size_t> & queries_to_run) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::string server_version;
|
std::string server_version;
|
||||||
std::string hostname;
|
std::string hostname;
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <Poco/File.h>
|
#include <Poco/File.h>
|
||||||
#include <Poco/Net/HTTPBasicCredentials.h>
|
#include <Poco/Net/HTTPBasicCredentials.h>
|
||||||
#include <Poco/Net/HTTPServerRequest.h>
|
#include <Poco/Net/HTTPServerRequest.h>
|
||||||
|
#include <Poco/Net/HTTPServerRequestImpl.h>
|
||||||
#include <Poco/Net/HTTPServerResponse.h>
|
#include <Poco/Net/HTTPServerResponse.h>
|
||||||
#include <Poco/Net/NetException.h>
|
#include <Poco/Net/NetException.h>
|
||||||
|
|
||||||
@ -15,13 +16,12 @@
|
|||||||
#include <Common/getFQDNOrHostName.h>
|
#include <Common/getFQDNOrHostName.h>
|
||||||
#include <Common/CurrentThread.h>
|
#include <Common/CurrentThread.h>
|
||||||
#include <Common/setThreadName.h>
|
#include <Common/setThreadName.h>
|
||||||
|
#include <Compression/CompressedReadBuffer.h>
|
||||||
|
#include <Compression/CompressedWriteBuffer.h>
|
||||||
#include <IO/ReadBufferFromIStream.h>
|
#include <IO/ReadBufferFromIStream.h>
|
||||||
#include <IO/ZlibInflatingReadBuffer.h>
|
#include <IO/ZlibInflatingReadBuffer.h>
|
||||||
#include <IO/BrotliReadBuffer.h>
|
#include <IO/BrotliReadBuffer.h>
|
||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
#include <IO/ConcatReadBuffer.h>
|
|
||||||
#include <Compression/CompressedReadBuffer.h>
|
|
||||||
#include <Compression/CompressedWriteBuffer.h>
|
|
||||||
#include <IO/WriteBufferFromString.h>
|
#include <IO/WriteBufferFromString.h>
|
||||||
#include <IO/WriteBufferFromHTTPServerResponse.h>
|
#include <IO/WriteBufferFromHTTPServerResponse.h>
|
||||||
#include <IO/WriteBufferFromFile.h>
|
#include <IO/WriteBufferFromFile.h>
|
||||||
@ -563,9 +563,47 @@ void HTTPHandler::processQuery(
|
|||||||
client_info.http_method = http_method;
|
client_info.http_method = http_method;
|
||||||
client_info.http_user_agent = request.get("User-Agent", "");
|
client_info.http_user_agent = request.get("User-Agent", "");
|
||||||
|
|
||||||
|
auto appendCallback = [&context] (ProgressCallback callback)
|
||||||
|
{
|
||||||
|
auto prev = context.getProgressCallback();
|
||||||
|
|
||||||
|
context.setProgressCallback([prev, callback] (const Progress & progress)
|
||||||
|
{
|
||||||
|
if (prev)
|
||||||
|
prev(progress);
|
||||||
|
|
||||||
|
callback(progress);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
/// While still no data has been sent, we will report about query execution progress by sending HTTP headers.
|
/// While still no data has been sent, we will report about query execution progress by sending HTTP headers.
|
||||||
if (settings.send_progress_in_http_headers)
|
if (settings.send_progress_in_http_headers)
|
||||||
context.setProgressCallback([&used_output] (const Progress & progress) { used_output.out->onProgress(progress); });
|
appendCallback([&used_output] (const Progress & progress) { used_output.out->onProgress(progress); });
|
||||||
|
|
||||||
|
if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close)
|
||||||
|
{
|
||||||
|
Poco::Net::StreamSocket & socket = dynamic_cast<Poco::Net::HTTPServerRequestImpl &>(request).socket();
|
||||||
|
|
||||||
|
appendCallback([&context, &socket](const Progress &)
|
||||||
|
{
|
||||||
|
/// Assume that at the point this method is called no one is reading data from the socket any more.
|
||||||
|
/// True for read-only queries.
|
||||||
|
try
|
||||||
|
{
|
||||||
|
char b;
|
||||||
|
int status = socket.receiveBytes(&b, 1, MSG_DONTWAIT | MSG_PEEK);
|
||||||
|
if (status == 0)
|
||||||
|
context.killCurrentQuery();
|
||||||
|
}
|
||||||
|
catch (Poco::TimeoutException &)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
context.killCurrentQuery();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
|
executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
|
||||||
[&response] (const String & content_type) { response.setContentType(content_type); },
|
[&response] (const String & content_type) { response.setContentType(content_type); },
|
||||||
|
@ -41,7 +41,7 @@ void MetricsTransmitter::run()
|
|||||||
const auto & config = context.getConfigRef();
|
const auto & config = context.getConfigRef();
|
||||||
auto interval = config.getInt(config_name + ".interval", 60);
|
auto interval = config.getInt(config_name + ".interval", 60);
|
||||||
|
|
||||||
const std::string thread_name = "MericsTrns " + std::to_string(interval) + "s";
|
const std::string thread_name = "MetrTx" + std::to_string(interval);
|
||||||
setThreadName(thread_name.c_str());
|
setThreadName(thread_name.c_str());
|
||||||
|
|
||||||
const auto get_next_time = [](size_t seconds)
|
const auto get_next_time = [](size_t seconds)
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <Poco/DirectoryIterator.h>
|
#include <Poco/DirectoryIterator.h>
|
||||||
#include <Poco/Net/HTTPServer.h>
|
#include <Poco/Net/HTTPServer.h>
|
||||||
#include <Poco/Net/NetException.h>
|
#include <Poco/Net/NetException.h>
|
||||||
|
#include <Poco/Util/HelpFormatter.h>
|
||||||
#include <ext/scope_guard.h>
|
#include <ext/scope_guard.h>
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
#include <common/ErrorHandlers.h>
|
#include <common/ErrorHandlers.h>
|
||||||
@ -47,6 +48,7 @@
|
|||||||
#include "MetricsTransmitter.h"
|
#include "MetricsTransmitter.h"
|
||||||
#include <Common/StatusFile.h>
|
#include <Common/StatusFile.h>
|
||||||
#include "TCPHandlerFactory.h"
|
#include "TCPHandlerFactory.h"
|
||||||
|
#include "Common/config_version.h"
|
||||||
|
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
#include <Common/hasLinuxCapability.h>
|
#include <Common/hasLinuxCapability.h>
|
||||||
@ -116,6 +118,26 @@ void Server::uninitialize()
|
|||||||
BaseDaemon::uninitialize();
|
BaseDaemon::uninitialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Server::run()
|
||||||
|
{
|
||||||
|
if (config().hasOption("help"))
|
||||||
|
{
|
||||||
|
Poco::Util::HelpFormatter helpFormatter(Server::options());
|
||||||
|
std::stringstream header;
|
||||||
|
header << commandName() << " [OPTION] [-- [ARG]...]\n";
|
||||||
|
header << "positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010";
|
||||||
|
helpFormatter.setHeader(header.str());
|
||||||
|
helpFormatter.format(std::cout);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (config().hasOption("version"))
|
||||||
|
{
|
||||||
|
std::cout << DBMS_NAME << " server version " << VERSION_STRING << "." << std::endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return Application::run();
|
||||||
|
}
|
||||||
|
|
||||||
void Server::initialize(Poco::Util::Application & self)
|
void Server::initialize(Poco::Util::Application & self)
|
||||||
{
|
{
|
||||||
BaseDaemon::initialize(self);
|
BaseDaemon::initialize(self);
|
||||||
@ -127,6 +149,21 @@ std::string Server::getDefaultCorePath() const
|
|||||||
return getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)) + "cores";
|
return getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)) + "cores";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Server::defineOptions(Poco::Util::OptionSet & _options)
|
||||||
|
{
|
||||||
|
_options.addOption(
|
||||||
|
Poco::Util::Option("help", "h", "show help and exit")
|
||||||
|
.required(false)
|
||||||
|
.repeatable(false)
|
||||||
|
.binding("help"));
|
||||||
|
_options.addOption(
|
||||||
|
Poco::Util::Option("version", "V", "show version and exit")
|
||||||
|
.required(false)
|
||||||
|
.repeatable(false)
|
||||||
|
.binding("version"));
|
||||||
|
BaseDaemon::defineOptions(_options);
|
||||||
|
}
|
||||||
|
|
||||||
int Server::main(const std::vector<std::string> & /*args*/)
|
int Server::main(const std::vector<std::string> & /*args*/)
|
||||||
{
|
{
|
||||||
Logger * log = &logger();
|
Logger * log = &logger();
|
||||||
@ -398,18 +435,36 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
|||||||
if (config().has("max_partition_size_to_drop"))
|
if (config().has("max_partition_size_to_drop"))
|
||||||
global_context->setMaxPartitionSizeToDrop(config().getUInt64("max_partition_size_to_drop"));
|
global_context->setMaxPartitionSizeToDrop(config().getUInt64("max_partition_size_to_drop"));
|
||||||
|
|
||||||
|
/// Set up caches.
|
||||||
|
|
||||||
|
/// Lower cache size on low-memory systems.
|
||||||
|
double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
|
||||||
|
size_t max_cache_size = memory_amount * cache_size_to_ram_max_ratio;
|
||||||
|
|
||||||
/// Size of cache for uncompressed blocks. Zero means disabled.
|
/// Size of cache for uncompressed blocks. Zero means disabled.
|
||||||
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
|
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
|
||||||
if (uncompressed_cache_size)
|
if (uncompressed_cache_size > max_cache_size)
|
||||||
|
{
|
||||||
|
uncompressed_cache_size = max_cache_size;
|
||||||
|
LOG_INFO(log, "Uncompressed cache size was lowered to " << formatReadableSizeWithBinarySuffix(uncompressed_cache_size)
|
||||||
|
<< " because the system has low amount of memory");
|
||||||
|
}
|
||||||
global_context->setUncompressedCache(uncompressed_cache_size);
|
global_context->setUncompressedCache(uncompressed_cache_size);
|
||||||
|
|
||||||
/// Load global settings from default_profile and system_profile.
|
/// Load global settings from default_profile and system_profile.
|
||||||
global_context->setDefaultProfiles(config());
|
global_context->setDefaultProfiles(config());
|
||||||
Settings & settings = global_context->getSettingsRef();
|
Settings & settings = global_context->getSettingsRef();
|
||||||
|
|
||||||
/// Size of cache for marks (index of MergeTree family of tables). It is necessary.
|
/// Size of cache for marks (index of MergeTree family of tables). It is mandatory.
|
||||||
size_t mark_cache_size = config().getUInt64("mark_cache_size");
|
size_t mark_cache_size = config().getUInt64("mark_cache_size");
|
||||||
if (mark_cache_size)
|
if (!mark_cache_size)
|
||||||
|
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
|
||||||
|
if (mark_cache_size > max_cache_size)
|
||||||
|
{
|
||||||
|
mark_cache_size = max_cache_size;
|
||||||
|
LOG_INFO(log, "Mark cache size was lowered to " << formatReadableSizeWithBinarySuffix(uncompressed_cache_size)
|
||||||
|
<< " because the system has low amount of memory");
|
||||||
|
}
|
||||||
global_context->setMarkCache(mark_cache_size);
|
global_context->setMarkCache(mark_cache_size);
|
||||||
|
|
||||||
#if USE_EMBEDDED_COMPILER
|
#if USE_EMBEDDED_COMPILER
|
||||||
@ -697,10 +752,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
|||||||
|
|
||||||
{
|
{
|
||||||
std::stringstream message;
|
std::stringstream message;
|
||||||
message << "Available RAM = " << formatReadableSizeWithBinarySuffix(memory_amount) << ";"
|
message << "Available RAM: " << formatReadableSizeWithBinarySuffix(memory_amount) << ";"
|
||||||
<< " physical cores = " << getNumberOfPhysicalCPUCores() << ";"
|
<< " physical cores: " << getNumberOfPhysicalCPUCores() << ";"
|
||||||
// on ARM processors it can show only enabled at current moment cores
|
// on ARM processors it can show only enabled at current moment cores
|
||||||
<< " threads = " << std::thread::hardware_concurrency() << ".";
|
<< " logical cores: " << std::thread::hardware_concurrency() << ".";
|
||||||
LOG_INFO(log, message.str());
|
LOG_INFO(log, message.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,6 +21,8 @@ namespace DB
|
|||||||
class Server : public BaseDaemon, public IServer
|
class Server : public BaseDaemon, public IServer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
using ServerApplication::run;
|
||||||
|
|
||||||
Poco::Util::LayeredConfiguration & config() const override
|
Poco::Util::LayeredConfiguration & config() const override
|
||||||
{
|
{
|
||||||
return BaseDaemon::config();
|
return BaseDaemon::config();
|
||||||
@ -41,7 +43,10 @@ public:
|
|||||||
return BaseDaemon::isCancelled();
|
return BaseDaemon::isCancelled();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void defineOptions(Poco::Util::OptionSet & _options) override;
|
||||||
protected:
|
protected:
|
||||||
|
int run() override;
|
||||||
|
|
||||||
void initialize(Application & self) override;
|
void initialize(Application & self) override;
|
||||||
|
|
||||||
void uninitialize() override;
|
void uninitialize() override;
|
||||||
|
@ -6,8 +6,6 @@
|
|||||||
#include <Common/ClickHouseRevision.h>
|
#include <Common/ClickHouseRevision.h>
|
||||||
#include <Common/CurrentThread.h>
|
#include <Common/CurrentThread.h>
|
||||||
#include <Common/Stopwatch.h>
|
#include <Common/Stopwatch.h>
|
||||||
#include <Common/ClickHouseRevision.h>
|
|
||||||
#include <Common/Stopwatch.h>
|
|
||||||
#include <Common/NetException.h>
|
#include <Common/NetException.h>
|
||||||
#include <Common/setThreadName.h>
|
#include <Common/setThreadName.h>
|
||||||
#include <Common/config_version.h>
|
#include <Common/config_version.h>
|
||||||
@ -302,10 +300,10 @@ void TCPHandler::runImpl()
|
|||||||
|
|
||||||
void TCPHandler::readData(const Settings & global_settings)
|
void TCPHandler::readData(const Settings & global_settings)
|
||||||
{
|
{
|
||||||
auto receive_timeout = query_context.getSettingsRef().receive_timeout.value;
|
const auto receive_timeout = query_context.getSettingsRef().receive_timeout.value;
|
||||||
|
|
||||||
/// Poll interval should not be greater than receive_timeout
|
/// Poll interval should not be greater than receive_timeout
|
||||||
size_t default_poll_interval = global_settings.poll_interval.value * 1000000;
|
const size_t default_poll_interval = global_settings.poll_interval.value * 1000000;
|
||||||
size_t current_poll_interval = static_cast<size_t>(receive_timeout.totalMicroseconds());
|
size_t current_poll_interval = static_cast<size_t>(receive_timeout.totalMicroseconds());
|
||||||
constexpr size_t min_poll_interval = 5000; // 5 ms
|
constexpr size_t min_poll_interval = 5000; // 5 ms
|
||||||
size_t poll_interval = std::max(min_poll_interval, std::min(default_poll_interval, current_poll_interval));
|
size_t poll_interval = std::max(min_poll_interval, std::min(default_poll_interval, current_poll_interval));
|
||||||
@ -409,7 +407,7 @@ void TCPHandler::processOrdinaryQuery()
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (state.progress.rows && after_send_progress.elapsed() / 1000 >= query_context.getSettingsRef().interactive_delay)
|
if (after_send_progress.elapsed() / 1000 >= query_context.getSettingsRef().interactive_delay)
|
||||||
{
|
{
|
||||||
/// Some time passed and there is a progress.
|
/// Some time passed and there is a progress.
|
||||||
after_send_progress.restart();
|
after_send_progress.restart();
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <IO/VarInt.h>
|
#include <IO/VarInt.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
#include <Columns/ColumnNullable.h>
|
#include <Columns/ColumnNullable.h>
|
||||||
#include <AggregateFunctions/IAggregateFunction.h>
|
#include <AggregateFunctions/IAggregateFunction.h>
|
||||||
#include <IO/WriteHelpers.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
|
44
dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp
Normal file
44
dbms/src/AggregateFunctions/AggregateFunctionEntropy.cpp
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||||
|
#include <AggregateFunctions/AggregateFunctionEntropy.h>
|
||||||
|
#include <AggregateFunctions/FactoryHelpers.h>
|
||||||
|
#include <AggregateFunctions/Helpers.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
AggregateFunctionPtr createAggregateFunctionEntropy(const std::string & name, const DataTypes & argument_types, const Array & parameters)
|
||||||
|
{
|
||||||
|
assertNoParameters(name, parameters);
|
||||||
|
if (argument_types.empty())
|
||||||
|
throw Exception("Incorrect number of arguments for aggregate function " + name,
|
||||||
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||||
|
|
||||||
|
size_t num_args = argument_types.size();
|
||||||
|
if (num_args == 1)
|
||||||
|
{
|
||||||
|
/// Specialized implementation for single argument of numeric type.
|
||||||
|
if (auto res = createWithNumericBasedType<AggregateFunctionEntropy>(*argument_types[0], num_args))
|
||||||
|
return AggregateFunctionPtr(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generic implementation for other types or for multiple arguments.
|
||||||
|
return std::make_shared<AggregateFunctionEntropy<UInt128>>(num_args);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerAggregateFunctionEntropy(AggregateFunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction("entropy", createAggregateFunctionEntropy);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
149
dbms/src/AggregateFunctions/AggregateFunctionEntropy.h
Normal file
149
dbms/src/AggregateFunctions/AggregateFunctionEntropy.h
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Common/HashTable/HashMap.h>
|
||||||
|
#include <Common/NaNUtils.h>
|
||||||
|
|
||||||
|
#include <AggregateFunctions/IAggregateFunction.h>
|
||||||
|
#include <AggregateFunctions/UniqVariadicHash.h>
|
||||||
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
#include <Columns/ColumnVector.h>
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
/** Calculates Shannon Entropy, using HashMap and computing empirical distribution function.
|
||||||
|
* Entropy is measured in bits (base-2 logarithm is used).
|
||||||
|
*/
|
||||||
|
template <typename Value>
|
||||||
|
struct EntropyData
|
||||||
|
{
|
||||||
|
using Weight = UInt64;
|
||||||
|
|
||||||
|
using HashingMap = HashMap<
|
||||||
|
Value, Weight,
|
||||||
|
HashCRC32<Value>,
|
||||||
|
HashTableGrower<4>,
|
||||||
|
HashTableAllocatorWithStackMemory<sizeof(std::pair<Value, Weight>) * (1 << 3)>>;
|
||||||
|
|
||||||
|
/// For the case of pre-hashed values.
|
||||||
|
using TrivialMap = HashMap<
|
||||||
|
Value, Weight,
|
||||||
|
UInt128TrivialHash,
|
||||||
|
HashTableGrower<4>,
|
||||||
|
HashTableAllocatorWithStackMemory<sizeof(std::pair<Value, Weight>) * (1 << 3)>>;
|
||||||
|
|
||||||
|
using Map = std::conditional_t<std::is_same_v<UInt128, Value>, TrivialMap, HashingMap>;
|
||||||
|
|
||||||
|
Map map;
|
||||||
|
|
||||||
|
void add(const Value & x)
|
||||||
|
{
|
||||||
|
if (!isNaN(x))
|
||||||
|
++map[x];
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(const Value & x, const Weight & weight)
|
||||||
|
{
|
||||||
|
if (!isNaN(x))
|
||||||
|
map[x] += weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
void merge(const EntropyData & rhs)
|
||||||
|
{
|
||||||
|
for (const auto & pair : rhs.map)
|
||||||
|
map[pair.first] += pair.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
void serialize(WriteBuffer & buf) const
|
||||||
|
{
|
||||||
|
map.write(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void deserialize(ReadBuffer & buf)
|
||||||
|
{
|
||||||
|
typename Map::Reader reader(buf);
|
||||||
|
while (reader.next())
|
||||||
|
{
|
||||||
|
const auto & pair = reader.get();
|
||||||
|
map[pair.first] = pair.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Float64 get() const
|
||||||
|
{
|
||||||
|
UInt64 total_value = 0;
|
||||||
|
for (const auto & pair : map)
|
||||||
|
total_value += pair.second;
|
||||||
|
|
||||||
|
Float64 shannon_entropy = 0;
|
||||||
|
for (const auto & pair : map)
|
||||||
|
{
|
||||||
|
Float64 frequency = Float64(pair.second) / total_value;
|
||||||
|
shannon_entropy -= frequency * log2(frequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
return shannon_entropy;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Value>
|
||||||
|
class AggregateFunctionEntropy final : public IAggregateFunctionDataHelper<EntropyData<Value>, AggregateFunctionEntropy<Value>>
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
size_t num_args;
|
||||||
|
|
||||||
|
public:
|
||||||
|
AggregateFunctionEntropy(size_t num_args) : num_args(num_args)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "entropy"; }
|
||||||
|
|
||||||
|
DataTypePtr getReturnType() const override
|
||||||
|
{
|
||||||
|
return std::make_shared<DataTypeNumber<Float64>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
|
||||||
|
{
|
||||||
|
if constexpr (!std::is_same_v<UInt128, Value>)
|
||||||
|
{
|
||||||
|
/// Here we manage only with numerical types
|
||||||
|
const auto & column = static_cast<const ColumnVector <Value> &>(*columns[0]);
|
||||||
|
this->data(place).add(column.getData()[row_num]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this->data(place).add(UniqVariadicHash<true, false>::apply(num_args, columns, row_num));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||||
|
{
|
||||||
|
this->data(place).merge(this->data(rhs));
|
||||||
|
}
|
||||||
|
|
||||||
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
||||||
|
{
|
||||||
|
this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
||||||
|
{
|
||||||
|
this->data(place).deserialize(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
||||||
|
{
|
||||||
|
auto & column = static_cast<ColumnVector<Float64> &>(to);
|
||||||
|
column.getData().push_back(this->data(place).get());
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * getHeaderFilePath() const override { return __FILE__; }
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -5,15 +5,17 @@
|
|||||||
#include <DataTypes/DataTypeArray.h>
|
#include <DataTypes/DataTypeArray.h>
|
||||||
#include <DataTypes/DataTypeNullable.h>
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
|
|
||||||
#include <IO/WriteBuffer.h>
|
#include <IO/WriteBuffer.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
|
|
||||||
#include <Common/StringUtils/StringUtils.h>
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
|
|
||||||
#include <Poco/String.h>
|
#include <Poco/String.h>
|
||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -128,6 +130,10 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
|
|||||||
return combinator->transformAggregateFunction(nested_function, argument_types, parameters);
|
return combinator->transformAggregateFunction(nested_function, argument_types, parameters);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto hints = this->getHints(name);
|
||||||
|
if (!hints.empty())
|
||||||
|
throw Exception("Unknown aggregate function " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION);
|
||||||
|
else
|
||||||
throw Exception("Unknown aggregate function " + name, ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION);
|
throw Exception("Unknown aggregate function " + name, ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
|
|
||||||
#include <IO/WriteBuffer.h>
|
#include <IO/WriteBuffer.h>
|
||||||
#include <IO/ReadBuffer.h>
|
#include <IO/ReadBuffer.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/VarInt.h>
|
#include <IO/VarInt.h>
|
||||||
|
|
||||||
#include <AggregateFunctions/IAggregateFunction.h>
|
#include <AggregateFunctions/IAggregateFunction.h>
|
||||||
@ -268,15 +270,13 @@ public:
|
|||||||
lower_bound = std::min(lower_bound, other.lower_bound);
|
lower_bound = std::min(lower_bound, other.lower_bound);
|
||||||
upper_bound = std::max(lower_bound, other.upper_bound);
|
upper_bound = std::max(lower_bound, other.upper_bound);
|
||||||
for (size_t i = 0; i < other.size; i++)
|
for (size_t i = 0; i < other.size; i++)
|
||||||
{
|
|
||||||
add(other.points[i].mean, other.points[i].weight, max_bins);
|
add(other.points[i].mean, other.points[i].weight, max_bins);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void write(WriteBuffer & buf) const
|
void write(WriteBuffer & buf) const
|
||||||
{
|
{
|
||||||
buf.write(reinterpret_cast<const char *>(&lower_bound), sizeof(lower_bound));
|
writeBinary(lower_bound, buf);
|
||||||
buf.write(reinterpret_cast<const char *>(&upper_bound), sizeof(upper_bound));
|
writeBinary(upper_bound, buf);
|
||||||
|
|
||||||
writeVarUInt(size, buf);
|
writeVarUInt(size, buf);
|
||||||
buf.write(reinterpret_cast<const char *>(points), size * sizeof(WeightedValue));
|
buf.write(reinterpret_cast<const char *>(points), size * sizeof(WeightedValue));
|
||||||
@ -284,11 +284,10 @@ public:
|
|||||||
|
|
||||||
void read(ReadBuffer & buf, UInt32 max_bins)
|
void read(ReadBuffer & buf, UInt32 max_bins)
|
||||||
{
|
{
|
||||||
buf.read(reinterpret_cast<char *>(&lower_bound), sizeof(lower_bound));
|
readBinary(lower_bound, buf);
|
||||||
buf.read(reinterpret_cast<char *>(&upper_bound), sizeof(upper_bound));
|
readBinary(upper_bound, buf);
|
||||||
|
|
||||||
readVarUInt(size, buf);
|
readVarUInt(size, buf);
|
||||||
|
|
||||||
if (size > max_bins * 2)
|
if (size > max_bins * 2)
|
||||||
throw Exception("Too many bins", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
|
throw Exception("Too many bins", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||||
#include <AggregateFunctions/IAggregateFunction.h>
|
#include <AggregateFunctions/IAggregateFunction.h>
|
||||||
#include <Columns/ColumnAggregateFunction.h>
|
#include <Columns/ColumnAggregateFunction.h>
|
||||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
|
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ template <typename T> using FuncQuantilesTDigestWeighted = AggregateFunctionQuan
|
|||||||
|
|
||||||
|
|
||||||
template <template <typename> class Function>
|
template <template <typename> class Function>
|
||||||
static constexpr bool SupportDecimal()
|
static constexpr bool supportDecimal()
|
||||||
{
|
{
|
||||||
return std::is_same_v<Function<Float32>, FuncQuantileExact<Float32>> ||
|
return std::is_same_v<Function<Float32>, FuncQuantileExact<Float32>> ||
|
||||||
std::is_same_v<Function<Float32>, FuncQuantilesExact<Float32>>;
|
std::is_same_v<Function<Float32>, FuncQuantilesExact<Float32>>;
|
||||||
@ -61,11 +61,10 @@ AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, c
|
|||||||
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE>>(argument_type, params);
|
if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE>>(argument_type, params);
|
||||||
FOR_NUMERIC_TYPES(DISPATCH)
|
FOR_NUMERIC_TYPES(DISPATCH)
|
||||||
#undef DISPATCH
|
#undef DISPATCH
|
||||||
#undef FOR_NUMERIC_TYPES
|
|
||||||
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType>>(argument_type, params);
|
if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType>>(argument_type, params);
|
||||||
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType>>(argument_type, params);
|
if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType>>(argument_type, params);
|
||||||
|
|
||||||
if constexpr (SupportDecimal<Function>())
|
if constexpr (supportDecimal<Function>())
|
||||||
{
|
{
|
||||||
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32>>(argument_type, params);
|
if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32>>(argument_type, params);
|
||||||
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64>>(argument_type, params);
|
if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64>>(argument_type, params);
|
||||||
|
@ -15,6 +15,7 @@ namespace ErrorCodes
|
|||||||
{
|
{
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||||
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -22,42 +23,59 @@ namespace
|
|||||||
{
|
{
|
||||||
|
|
||||||
/// Substitute return type for Date and DateTime
|
/// Substitute return type for Date and DateTime
|
||||||
class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::FieldType>
|
template <bool is_weighted>
|
||||||
|
class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>
|
||||||
{
|
{
|
||||||
using AggregateFunctionTopK<DataTypeDate::FieldType>::AggregateFunctionTopK;
|
using AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>::AggregateFunctionTopK;
|
||||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()); }
|
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()); }
|
||||||
};
|
};
|
||||||
|
|
||||||
class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateTime::FieldType>
|
template <bool is_weighted>
|
||||||
|
class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>
|
||||||
{
|
{
|
||||||
using AggregateFunctionTopK<DataTypeDateTime::FieldType>::AggregateFunctionTopK;
|
using AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>::AggregateFunctionTopK;
|
||||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()); }
|
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <bool is_weighted>
|
||||||
static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, UInt64 threshold)
|
static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, UInt64 threshold)
|
||||||
{
|
{
|
||||||
WhichDataType which(argument_type);
|
WhichDataType which(argument_type);
|
||||||
if (which.idx == TypeIndex::Date) return new AggregateFunctionTopKDate(threshold);
|
if (which.idx == TypeIndex::Date)
|
||||||
if (which.idx == TypeIndex::DateTime) return new AggregateFunctionTopKDateTime(threshold);
|
return new AggregateFunctionTopKDate<is_weighted>(threshold);
|
||||||
|
if (which.idx == TypeIndex::DateTime)
|
||||||
|
return new AggregateFunctionTopKDateTime<is_weighted>(threshold);
|
||||||
|
|
||||||
/// Check that we can use plain version of AggregateFunctionTopKGeneric
|
/// Check that we can use plain version of AggregateFunctionTopKGeneric
|
||||||
if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||||
return new AggregateFunctionTopKGeneric<true>(threshold, argument_type);
|
return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, argument_type);
|
||||||
else
|
else
|
||||||
return new AggregateFunctionTopKGeneric<false>(threshold, argument_type);
|
return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, argument_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <bool is_weighted>
|
||||||
AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const DataTypes & argument_types, const Array & params)
|
AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const DataTypes & argument_types, const Array & params)
|
||||||
{
|
{
|
||||||
|
if (!is_weighted)
|
||||||
|
{
|
||||||
assertUnary(name, argument_types);
|
assertUnary(name, argument_types);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assertBinary(name, argument_types);
|
||||||
|
if (!isNumber(argument_types[1]))
|
||||||
|
throw Exception("The second argument for aggregate function 'topKWeighted' must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||||
|
}
|
||||||
|
|
||||||
UInt64 threshold = 10; /// default value
|
UInt64 threshold = 10; /// default value
|
||||||
|
|
||||||
if (!params.empty())
|
if (!params.empty())
|
||||||
{
|
{
|
||||||
if (params.size() != 1)
|
if (params.size() != 1)
|
||||||
throw Exception("Aggregate function " + name + " requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
throw Exception("Aggregate function " + name + " requires one parameter or less.",
|
||||||
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||||
|
|
||||||
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
|
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
|
||||||
|
|
||||||
@ -72,10 +90,10 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
|
|||||||
threshold = k;
|
threshold = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK>(*argument_types[0], threshold));
|
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK, is_weighted>(*argument_types[0], threshold));
|
||||||
|
|
||||||
if (!res)
|
if (!res)
|
||||||
res = AggregateFunctionPtr(createWithExtraTypes(argument_types[0], threshold));
|
res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types[0], threshold));
|
||||||
|
|
||||||
if (!res)
|
if (!res)
|
||||||
throw Exception("Illegal type " + argument_types[0]->getName() +
|
throw Exception("Illegal type " + argument_types[0]->getName() +
|
||||||
@ -88,7 +106,8 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
|
|||||||
|
|
||||||
void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
|
void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
|
||||||
{
|
{
|
||||||
factory.registerFunction("topK", createAggregateFunctionTopK);
|
factory.registerFunction("topK", createAggregateFunctionTopK<false>);
|
||||||
|
factory.registerFunction("topKWeighted", createAggregateFunctionTopK<true>);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -38,13 +38,12 @@ struct AggregateFunctionTopKData
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T, bool is_weighted>
|
||||||
class AggregateFunctionTopK
|
class AggregateFunctionTopK
|
||||||
: public IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T>>
|
: public IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>
|
||||||
{
|
{
|
||||||
private:
|
protected:
|
||||||
using State = AggregateFunctionTopKData<T>;
|
using State = AggregateFunctionTopKData<T>;
|
||||||
|
|
||||||
UInt64 threshold;
|
UInt64 threshold;
|
||||||
UInt64 reserved;
|
UInt64 reserved;
|
||||||
|
|
||||||
@ -52,7 +51,7 @@ public:
|
|||||||
AggregateFunctionTopK(UInt64 threshold)
|
AggregateFunctionTopK(UInt64 threshold)
|
||||||
: threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold) {}
|
: threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold) {}
|
||||||
|
|
||||||
String getName() const override { return "topK"; }
|
String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
|
||||||
|
|
||||||
DataTypePtr getReturnType() const override
|
DataTypePtr getReturnType() const override
|
||||||
{
|
{
|
||||||
@ -64,6 +63,10 @@ public:
|
|||||||
auto & set = this->data(place).value;
|
auto & set = this->data(place).value;
|
||||||
if (set.capacity() != reserved)
|
if (set.capacity() != reserved)
|
||||||
set.resize(reserved);
|
set.resize(reserved);
|
||||||
|
|
||||||
|
if constexpr (is_weighted)
|
||||||
|
set.insert(static_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num], columns[1]->getUInt(row_num));
|
||||||
|
else
|
||||||
set.insert(static_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
|
set.insert(static_cast<const ColumnVector<T> &>(*columns[0]).getData()[row_num]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,8 +128,8 @@ struct AggregateFunctionTopKGenericData
|
|||||||
/** Template parameter with true value should be used for columns that store their elements in memory continuously.
|
/** Template parameter with true value should be used for columns that store their elements in memory continuously.
|
||||||
* For such columns topK() can be implemented more efficiently (especially for small numeric arrays).
|
* For such columns topK() can be implemented more efficiently (especially for small numeric arrays).
|
||||||
*/
|
*/
|
||||||
template <bool is_plain_column = false>
|
template <bool is_plain_column, bool is_weighted>
|
||||||
class AggregateFunctionTopKGeneric : public IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column>>
|
class AggregateFunctionTopKGeneric : public IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
using State = AggregateFunctionTopKGenericData;
|
using State = AggregateFunctionTopKGenericData;
|
||||||
@ -141,7 +144,7 @@ public:
|
|||||||
AggregateFunctionTopKGeneric(UInt64 threshold, const DataTypePtr & input_data_type)
|
AggregateFunctionTopKGeneric(UInt64 threshold, const DataTypePtr & input_data_type)
|
||||||
: threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold), input_data_type(input_data_type) {}
|
: threshold(threshold), reserved(TOP_K_LOAD_FACTOR * threshold), input_data_type(input_data_type) {}
|
||||||
|
|
||||||
String getName() const override { return "topK"; }
|
String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
|
||||||
|
|
||||||
DataTypePtr getReturnType() const override
|
DataTypePtr getReturnType() const override
|
||||||
{
|
{
|
||||||
@ -189,12 +192,18 @@ public:
|
|||||||
|
|
||||||
if constexpr (is_plain_column)
|
if constexpr (is_plain_column)
|
||||||
{
|
{
|
||||||
|
if constexpr (is_weighted)
|
||||||
|
set.insert(columns[0]->getDataAt(row_num), columns[1]->getUInt(row_num));
|
||||||
|
else
|
||||||
set.insert(columns[0]->getDataAt(row_num));
|
set.insert(columns[0]->getDataAt(row_num));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const char * begin = nullptr;
|
const char * begin = nullptr;
|
||||||
StringRef str_serialized = columns[0]->serializeValueIntoArena(row_num, *arena, begin);
|
StringRef str_serialized = columns[0]->serializeValueIntoArena(row_num, *arena, begin);
|
||||||
|
if constexpr (is_weighted)
|
||||||
|
set.insert(str_serialized, columns[1]->getUInt(row_num));
|
||||||
|
else
|
||||||
set.insert(str_serialized);
|
set.insert(str_serialized);
|
||||||
arena->rollback(str_serialized.size);
|
arena->rollback(str_serialized.size);
|
||||||
}
|
}
|
||||||
@ -226,7 +235,6 @@ public:
|
|||||||
const char * getHeaderFilePath() const override { return __FILE__; }
|
const char * getHeaderFilePath() const override { return __FILE__; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#undef TOP_K_LOAD_FACTOR
|
#undef TOP_K_LOAD_FACTOR
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -5,8 +5,6 @@
|
|||||||
|
|
||||||
#include <ext/bit_cast.h>
|
#include <ext/bit_cast.h>
|
||||||
|
|
||||||
#include <AggregateFunctions/UniquesHashSet.h>
|
|
||||||
|
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
|
|
||||||
@ -14,13 +12,14 @@
|
|||||||
#include <DataTypes/DataTypeTuple.h>
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
|
|
||||||
#include <Interpreters/AggregationCommon.h>
|
#include <Interpreters/AggregationCommon.h>
|
||||||
|
|
||||||
#include <Common/HashTable/HashSet.h>
|
#include <Common/HashTable/HashSet.h>
|
||||||
#include <Common/HyperLogLogWithSmallSetOptimization.h>
|
#include <Common/HyperLogLogWithSmallSetOptimization.h>
|
||||||
#include <Common/CombinedCardinalityEstimator.h>
|
#include <Common/CombinedCardinalityEstimator.h>
|
||||||
#include <Common/MemoryTracker.h>
|
#include <Common/MemoryTracker.h>
|
||||||
|
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
|
|
||||||
|
#include <AggregateFunctions/UniquesHashSet.h>
|
||||||
#include <AggregateFunctions/IAggregateFunction.h>
|
#include <AggregateFunctions/IAggregateFunction.h>
|
||||||
#include <AggregateFunctions/UniqVariadicHash.h>
|
#include <AggregateFunctions/UniqVariadicHash.h>
|
||||||
|
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Common/CombinedCardinalityEstimator.h>
|
#include <Common/CombinedCardinalityEstimator.h>
|
||||||
|
#include <Common/FieldVisitors.h>
|
||||||
|
#include <Common/SipHash.h>
|
||||||
|
#include <Common/typeid_cast.h>
|
||||||
|
|
||||||
#include <DataTypes/DataTypeTuple.h>
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
#include <DataTypes/DataTypeUUID.h>
|
#include <DataTypes/DataTypeUUID.h>
|
||||||
@ -14,10 +17,7 @@
|
|||||||
|
|
||||||
#include <Columns/ColumnVector.h>
|
#include <Columns/ColumnVector.h>
|
||||||
#include <Columns/ColumnsNumber.h>
|
#include <Columns/ColumnsNumber.h>
|
||||||
#include <Common/FieldVisitors.h>
|
|
||||||
#include <Common/SipHash.h>
|
|
||||||
|
|
||||||
#include <Common/typeid_cast.h>
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
@ -1,15 +1,19 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Common/FieldVisitors.h>
|
#include <Common/FieldVisitors.h>
|
||||||
|
#include <Common/typeid_cast.h>
|
||||||
|
|
||||||
#include <AggregateFunctions/IAggregateFunction.h>
|
#include <AggregateFunctions/IAggregateFunction.h>
|
||||||
#include <AggregateFunctions/UniqVariadicHash.h>
|
#include <AggregateFunctions/UniqVariadicHash.h>
|
||||||
|
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
#include <DataTypes/DataTypeTuple.h>
|
#include <DataTypes/DataTypeTuple.h>
|
||||||
#include <DataTypes/DataTypeUUID.h>
|
#include <DataTypes/DataTypeUUID.h>
|
||||||
|
|
||||||
#include <Columns/ColumnsNumber.h>
|
#include <Columns/ColumnsNumber.h>
|
||||||
|
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <Common/typeid_cast.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
|
@ -20,7 +20,7 @@ namespace DB
|
|||||||
|
|
||||||
/** Create an aggregate function with a numeric type in the template parameter, depending on the type of the argument.
|
/** Create an aggregate function with a numeric type in the template parameter, depending on the type of the argument.
|
||||||
*/
|
*/
|
||||||
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
|
template <template <typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||||
static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
|
static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
|
||||||
{
|
{
|
||||||
WhichDataType which(argument_type);
|
WhichDataType which(argument_type);
|
||||||
@ -33,7 +33,20 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
|
template <template <typename, bool> class AggregateFunctionTemplate, bool bool_param, typename... TArgs>
|
||||||
|
static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
|
||||||
|
{
|
||||||
|
WhichDataType which(argument_type);
|
||||||
|
#define DISPATCH(TYPE) \
|
||||||
|
if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate<TYPE, bool_param>(std::forward<TArgs>(args)...);
|
||||||
|
FOR_NUMERIC_TYPES(DISPATCH)
|
||||||
|
#undef DISPATCH
|
||||||
|
if (which.idx == TypeIndex::Enum8) return new AggregateFunctionTemplate<Int8, bool_param>(std::forward<TArgs>(args)...);
|
||||||
|
if (which.idx == TypeIndex::Enum16) return new AggregateFunctionTemplate<Int16, bool_param>(std::forward<TArgs>(args)...);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename... TArgs>
|
||||||
static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
|
static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
|
||||||
{
|
{
|
||||||
WhichDataType which(argument_type);
|
WhichDataType which(argument_type);
|
||||||
@ -46,7 +59,7 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename ... TArgs>
|
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
|
||||||
static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
|
static IAggregateFunction * createWithNumericType(const IDataType & argument_type, TArgs && ... args)
|
||||||
{
|
{
|
||||||
WhichDataType which(argument_type);
|
WhichDataType which(argument_type);
|
||||||
@ -59,7 +72,7 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename ... TArgs>
|
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
|
||||||
static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argument_type, TArgs && ... args)
|
static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argument_type, TArgs && ... args)
|
||||||
{
|
{
|
||||||
WhichDataType which(argument_type);
|
WhichDataType which(argument_type);
|
||||||
@ -70,7 +83,7 @@ static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argu
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
|
template <template <typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||||
static IAggregateFunction * createWithNumericBasedType(const IDataType & argument_type, TArgs && ... args)
|
static IAggregateFunction * createWithNumericBasedType(const IDataType & argument_type, TArgs && ... args)
|
||||||
{
|
{
|
||||||
IAggregateFunction * f = createWithNumericType<AggregateFunctionTemplate>(argument_type, std::forward<TArgs>(args)...);
|
IAggregateFunction * f = createWithNumericType<AggregateFunctionTemplate>(argument_type, std::forward<TArgs>(args)...);
|
||||||
@ -85,7 +98,7 @@ static IAggregateFunction * createWithNumericBasedType(const IDataType & argumen
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename> class AggregateFunctionTemplate, typename ... TArgs>
|
template <template <typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||||
static IAggregateFunction * createWithDecimalType(const IDataType & argument_type, TArgs && ... args)
|
static IAggregateFunction * createWithDecimalType(const IDataType & argument_type, TArgs && ... args)
|
||||||
{
|
{
|
||||||
WhichDataType which(argument_type);
|
WhichDataType which(argument_type);
|
||||||
@ -98,7 +111,7 @@ static IAggregateFunction * createWithDecimalType(const IDataType & argument_typ
|
|||||||
|
|
||||||
/** For template with two arguments.
|
/** For template with two arguments.
|
||||||
*/
|
*/
|
||||||
template <typename FirstType, template <typename, typename> class AggregateFunctionTemplate, typename ... TArgs>
|
template <typename FirstType, template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||||
static IAggregateFunction * createWithTwoNumericTypesSecond(const IDataType & second_type, TArgs && ... args)
|
static IAggregateFunction * createWithTwoNumericTypesSecond(const IDataType & second_type, TArgs && ... args)
|
||||||
{
|
{
|
||||||
WhichDataType which(second_type);
|
WhichDataType which(second_type);
|
||||||
@ -111,7 +124,7 @@ static IAggregateFunction * createWithTwoNumericTypesSecond(const IDataType & se
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename, typename> class AggregateFunctionTemplate, typename ... TArgs>
|
template <template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
|
||||||
static IAggregateFunction * createWithTwoNumericTypes(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
|
static IAggregateFunction * createWithTwoNumericTypes(const IDataType & first_type, const IDataType & second_type, TArgs && ... args)
|
||||||
{
|
{
|
||||||
WhichDataType which(first_type);
|
WhichDataType which(first_type);
|
||||||
|
@ -2,9 +2,9 @@
|
|||||||
|
|
||||||
#include <Common/PODArray.h>
|
#include <Common/PODArray.h>
|
||||||
#include <Common/NaNUtils.h>
|
#include <Common/NaNUtils.h>
|
||||||
|
#include <Core/Types.h>
|
||||||
#include <IO/WriteBuffer.h>
|
#include <IO/WriteBuffer.h>
|
||||||
#include <IO/ReadBuffer.h>
|
#include <IO/ReadBuffer.h>
|
||||||
#include <Core/Types.h>
|
|
||||||
#include <IO/VarInt.h>
|
#include <IO/VarInt.h>
|
||||||
|
|
||||||
|
|
||||||
@ -19,7 +19,7 @@ namespace ErrorCodes
|
|||||||
/** Calculates quantile by collecting all values into array
|
/** Calculates quantile by collecting all values into array
|
||||||
* and applying n-th element (introselect) algorithm for the resulting array.
|
* and applying n-th element (introselect) algorithm for the resulting array.
|
||||||
*
|
*
|
||||||
* It use O(N) memory and it is very inefficient in case of high amount of identical values.
|
* It uses O(N) memory and it is very inefficient in case of high amount of identical values.
|
||||||
* But it is very CPU efficient for not large datasets.
|
* But it is very CPU efficient for not large datasets.
|
||||||
*/
|
*/
|
||||||
template <typename Value>
|
template <typename Value>
|
||||||
|
@ -14,7 +14,7 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
/** Calculates quantile by counting number of occurrences for each value in a hash map.
|
/** Calculates quantile by counting number of occurrences for each value in a hash map.
|
||||||
*
|
*
|
||||||
* It use O(distinct(N)) memory. Can be naturally applied for values with weight.
|
* It uses O(distinct(N)) memory. Can be naturally applied for values with weight.
|
||||||
* In case of many identical values, it can be more efficient than QuantileExact even when weight is not used.
|
* In case of many identical values, it can be more efficient than QuantileExact even when weight is not used.
|
||||||
*/
|
*/
|
||||||
template <typename Value>
|
template <typename Value>
|
||||||
|
@ -27,6 +27,7 @@ void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory &);
|
|||||||
void registerAggregateFunctionTopK(AggregateFunctionFactory &);
|
void registerAggregateFunctionTopK(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionsBitwise(AggregateFunctionFactory &);
|
void registerAggregateFunctionsBitwise(AggregateFunctionFactory &);
|
||||||
void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &);
|
void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &);
|
||||||
|
void registerAggregateFunctionEntropy(AggregateFunctionFactory &);
|
||||||
|
|
||||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||||
void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &);
|
void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &);
|
||||||
@ -65,6 +66,7 @@ void registerAggregateFunctions()
|
|||||||
registerAggregateFunctionsMaxIntersections(factory);
|
registerAggregateFunctionsMaxIntersections(factory);
|
||||||
registerAggregateFunctionHistogram(factory);
|
registerAggregateFunctionHistogram(factory);
|
||||||
registerAggregateFunctionRetention(factory);
|
registerAggregateFunctionRetention(factory);
|
||||||
|
registerAggregateFunctionEntropy(factory);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <optional>
|
|
||||||
|
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
|
|
||||||
#include <Poco/Net/StreamSocket.h>
|
#include <Poco/Net/StreamSocket.h>
|
||||||
@ -121,6 +119,12 @@ public:
|
|||||||
UInt16 getPort() const;
|
UInt16 getPort() const;
|
||||||
const String & getDefaultDatabase() const;
|
const String & getDefaultDatabase() const;
|
||||||
|
|
||||||
|
/// For proper polling.
|
||||||
|
inline const auto & getTimeouts() const
|
||||||
|
{
|
||||||
|
return timeouts;
|
||||||
|
}
|
||||||
|
|
||||||
/// If last flag is true, you need to call sendExternalTablesData after.
|
/// If last flag is true, you need to call sendExternalTablesData after.
|
||||||
void sendQuery(
|
void sendQuery(
|
||||||
const String & query,
|
const String & query,
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include <Columns/ColumnAggregateFunction.h>
|
#include <Columns/ColumnAggregateFunction.h>
|
||||||
|
#include <Columns/ColumnsCommon.h>
|
||||||
#include <AggregateFunctions/AggregateFunctionState.h>
|
#include <AggregateFunctions/AggregateFunctionState.h>
|
||||||
#include <DataStreams/ColumnGathererStream.h>
|
#include <DataStreams/ColumnGathererStream.h>
|
||||||
#include <IO/WriteBufferFromArena.h>
|
#include <IO/WriteBufferFromArena.h>
|
||||||
@ -6,7 +7,6 @@
|
|||||||
#include <Common/AlignedBuffer.h>
|
#include <Common/AlignedBuffer.h>
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <Common/Arena.h>
|
#include <Common/Arena.h>
|
||||||
#include <Columns/ColumnsCommon.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -182,7 +182,7 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limit) const
|
ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
size_t size = data.size();
|
size_t size = data.size();
|
||||||
|
|
||||||
@ -203,13 +203,13 @@ ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limi
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnAggregateFunction::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnAggregateFunction::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
return selectIndexImpl(*this, indexes, limit);
|
return selectIndexImpl(*this, indexes, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr ColumnAggregateFunction::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
ColumnPtr ColumnAggregateFunction::indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
auto res = createView();
|
auto res = createView();
|
||||||
|
|
||||||
|
@ -161,12 +161,12 @@ public:
|
|||||||
|
|
||||||
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
|
||||||
|
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override;
|
||||||
|
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
|
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const;
|
||||||
|
|
||||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||||
|
|
||||||
@ -179,7 +179,7 @@ public:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const override;
|
||||||
|
|
||||||
/** More efficient manipulation methods */
|
/** More efficient manipulation methods */
|
||||||
Container & getData()
|
Container & getData()
|
||||||
|
@ -589,7 +589,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ColumnPtr ColumnArray::permute(const Permutation & perm, size_t limit) const
|
ColumnPtr ColumnArray::permute(const Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
size_t size = getOffsets().size();
|
size_t size = getOffsets().size();
|
||||||
|
|
||||||
@ -626,13 +626,13 @@ ColumnPtr ColumnArray::permute(const Permutation & perm, size_t limit) const
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnArray::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnArray::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
return selectIndexImpl(*this, indexes, limit);
|
return selectIndexImpl(*this, indexes, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, size_t limit) const
|
ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
return ColumnArray::create(data);
|
return ColumnArray::create(data);
|
||||||
@ -664,7 +664,7 @@ ColumnPtr ColumnArray::indexImpl(const PaddedPODArray<T> & indexes, size_t limit
|
|||||||
|
|
||||||
INSTANTIATE_INDEX_IMPL(ColumnArray)
|
INSTANTIATE_INDEX_IMPL(ColumnArray)
|
||||||
|
|
||||||
void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
void ColumnArray::getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const
|
||||||
{
|
{
|
||||||
size_t s = size();
|
size_t s = size();
|
||||||
if (limit >= s)
|
if (limit >= s)
|
||||||
|
@ -70,11 +70,11 @@ public:
|
|||||||
void insertDefault() override;
|
void insertDefault() override;
|
||||||
void popBack(size_t n) override;
|
void popBack(size_t n) override;
|
||||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override;
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const;
|
||||||
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
|
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
|
||||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const override;
|
||||||
void reserve(size_t n) override;
|
void reserve(size_t n) override;
|
||||||
size_t byteSize() const override;
|
size_t byteSize() const override;
|
||||||
size_t allocatedBytes() const override;
|
size_t allocatedBytes() const override;
|
||||||
|
@ -54,7 +54,7 @@ ColumnPtr ColumnConst::replicate(const Offsets & offsets) const
|
|||||||
return ColumnConst::create(data, replicated_size);
|
return ColumnConst::create(data, replicated_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnConst::permute(const Permutation & perm, size_t limit) const
|
ColumnPtr ColumnConst::permute(const Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
limit = s;
|
limit = s;
|
||||||
@ -68,7 +68,7 @@ ColumnPtr ColumnConst::permute(const Permutation & perm, size_t limit) const
|
|||||||
return ColumnConst::create(data, limit);
|
return ColumnConst::create(data, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnConst::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnConst::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
limit = indexes.size();
|
limit = indexes.size();
|
||||||
|
@ -154,9 +154,9 @@ public:
|
|||||||
|
|
||||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override;
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const override;
|
||||||
|
|
||||||
size_t byteSize() const override
|
size_t byteSize() const override
|
||||||
{
|
{
|
||||||
|
@ -63,7 +63,7 @@ void ColumnDecimal<T>::updateHashWithValue(size_t n, SipHash & hash) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void ColumnDecimal<T>::getPermutation(bool reverse, size_t limit, int , IColumn::Permutation & res) const
|
void ColumnDecimal<T>::getPermutation(bool reverse, UInt64 limit, int , IColumn::Permutation & res) const
|
||||||
{
|
{
|
||||||
#if 1 /// TODO: perf test
|
#if 1 /// TODO: perf test
|
||||||
if (data.size() <= std::numeric_limits<UInt32>::max())
|
if (data.size() <= std::numeric_limits<UInt32>::max())
|
||||||
@ -82,7 +82,7 @@ void ColumnDecimal<T>::getPermutation(bool reverse, size_t limit, int , IColumn:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ColumnPtr ColumnDecimal<T>::permute(const IColumn::Permutation & perm, size_t limit) const
|
ColumnPtr ColumnDecimal<T>::permute(const IColumn::Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
size_t size = limit ? std::min(data.size(), limit) : data.size();
|
size_t size = limit ? std::min(data.size(), limit) : data.size();
|
||||||
if (perm.size() < size)
|
if (perm.size() < size)
|
||||||
@ -173,7 +173,7 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ColumnPtr ColumnDecimal<T>::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnDecimal<T>::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
return selectIndexImpl(*this, indexes, limit);
|
return selectIndexImpl(*this, indexes, limit);
|
||||||
}
|
}
|
||||||
|
@ -101,7 +101,7 @@ public:
|
|||||||
const char * deserializeAndInsertFromArena(const char * pos) override;
|
const char * deserializeAndInsertFromArena(const char * pos) override;
|
||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
|
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
|
||||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
||||||
|
|
||||||
MutableColumnPtr cloneResized(size_t size) const override;
|
MutableColumnPtr cloneResized(size_t size) const override;
|
||||||
|
|
||||||
@ -116,11 +116,11 @@ public:
|
|||||||
bool isDefaultAt(size_t n) const override { return data[n] == 0; }
|
bool isDefaultAt(size_t n) const override { return data[n] == 0; }
|
||||||
|
|
||||||
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
|
||||||
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const IColumn::Permutation & perm, UInt64 limit) const override;
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
|
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const;
|
||||||
|
|
||||||
ColumnPtr replicate(const IColumn::Offsets & offsets) const override;
|
ColumnPtr replicate(const IColumn::Offsets & offsets) const override;
|
||||||
void getExtremes(Field & min, Field & max) const override;
|
void getExtremes(Field & min, Field & max) const override;
|
||||||
@ -144,7 +144,7 @@ protected:
|
|||||||
UInt32 scale;
|
UInt32 scale;
|
||||||
|
|
||||||
template <typename U>
|
template <typename U>
|
||||||
void permutation(bool reverse, size_t limit, PaddedPODArray<U> & res) const
|
void permutation(bool reverse, UInt64 limit, PaddedPODArray<U> & res) const
|
||||||
{
|
{
|
||||||
size_t s = data.size();
|
size_t s = data.size();
|
||||||
res.resize(s);
|
res.resize(s);
|
||||||
@ -164,7 +164,7 @@ protected:
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr ColumnDecimal<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
ColumnPtr ColumnDecimal<T>::indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
size_t size = indexes.size();
|
size_t size = indexes.size();
|
||||||
|
|
||||||
|
@ -112,7 +112,7 @@ struct ColumnFixedString::less
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
|
void ColumnFixedString::getPermutation(bool reverse, UInt64 limit, int /*nan_direction_hint*/, Permutation & res) const
|
||||||
{
|
{
|
||||||
size_t s = size();
|
size_t s = size();
|
||||||
res.resize(s);
|
res.resize(s);
|
||||||
@ -231,7 +231,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) const
|
ColumnPtr ColumnFixedString::permute(const Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
size_t col_size = size();
|
size_t col_size = size();
|
||||||
|
|
||||||
@ -260,14 +260,14 @@ ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) con
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ColumnPtr ColumnFixedString::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnFixedString::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
return selectIndexImpl(*this, indexes, limit);
|
return selectIndexImpl(*this, indexes, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr ColumnFixedString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
ColumnPtr ColumnFixedString::indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
return ColumnFixedString::create(n);
|
return ColumnFixedString::create(n);
|
||||||
|
@ -101,18 +101,18 @@ public:
|
|||||||
return memcmp(&chars[p1 * n], &rhs.chars[p2 * n], n);
|
return memcmp(&chars[p1 * n], &rhs.chars[p2 * n], n);
|
||||||
}
|
}
|
||||||
|
|
||||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const override;
|
||||||
|
|
||||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||||
|
|
||||||
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
|
||||||
|
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override;
|
||||||
|
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
|
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const;
|
||||||
|
|
||||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||||
|
|
||||||
@ -138,7 +138,7 @@ public:
|
|||||||
StringRef getRawData() const override { return StringRef(chars.data(), chars.size()); }
|
StringRef getRawData() const override { return StringRef(chars.data(), chars.size()); }
|
||||||
|
|
||||||
/// Specialized part of interface, not from IColumn.
|
/// Specialized part of interface, not from IColumn.
|
||||||
|
void insertString(const String & string) { insertData(string.c_str(), string.size()); }
|
||||||
Chars & getChars() { return chars; }
|
Chars & getChars() { return chars; }
|
||||||
const Chars & getChars() const { return chars; }
|
const Chars & getChars() const { return chars; }
|
||||||
|
|
||||||
|
@ -70,7 +70,7 @@ ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint)
|
|||||||
return ColumnFunction::create(filtered_size, function, capture);
|
return ColumnFunction::create(filtered_size, function, capture);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnFunction::permute(const Permutation & perm, size_t limit) const
|
ColumnPtr ColumnFunction::permute(const Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
limit = size_;
|
limit = size_;
|
||||||
@ -88,7 +88,7 @@ ColumnPtr ColumnFunction::permute(const Permutation & perm, size_t limit) const
|
|||||||
return ColumnFunction::create(limit, function, capture);
|
return ColumnFunction::create(limit, function, capture);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnFunction::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnFunction::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
ColumnsWithTypeAndName capture = captured_columns;
|
ColumnsWithTypeAndName capture = captured_columns;
|
||||||
for (auto & column : capture)
|
for (auto & column : capture)
|
||||||
|
@ -32,8 +32,8 @@ public:
|
|||||||
ColumnPtr cut(size_t start, size_t length) const override;
|
ColumnPtr cut(size_t start, size_t length) const override;
|
||||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override;
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
void insertDefault() override;
|
void insertDefault() override;
|
||||||
void popBack(size_t n) override;
|
void popBack(size_t n) override;
|
||||||
std::vector<MutableColumnPtr> scatter(IColumn::ColumnIndex num_columns,
|
std::vector<MutableColumnPtr> scatter(IColumn::ColumnIndex num_columns,
|
||||||
|
@ -250,7 +250,7 @@ int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int
|
|||||||
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
|
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
void ColumnLowCardinality::getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const
|
||||||
{
|
{
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
limit = size();
|
limit = size();
|
||||||
@ -343,7 +343,7 @@ void ColumnLowCardinality::compactIfSharedDictionary()
|
|||||||
|
|
||||||
|
|
||||||
ColumnLowCardinality::DictionaryEncodedColumn
|
ColumnLowCardinality::DictionaryEncodedColumn
|
||||||
ColumnLowCardinality::getMinimalDictionaryEncodedColumn(size_t offset, size_t limit) const
|
ColumnLowCardinality::getMinimalDictionaryEncodedColumn(UInt64 offset, UInt64 limit) const
|
||||||
{
|
{
|
||||||
MutableColumnPtr sub_indexes = (*std::move(idx.getPositions()->cut(offset, limit))).mutate();
|
MutableColumnPtr sub_indexes = (*std::move(idx.getPositions()->cut(offset, limit))).mutate();
|
||||||
auto indexes_map = mapUniqueIndex(*sub_indexes);
|
auto indexes_map = mapUniqueIndex(*sub_indexes);
|
||||||
@ -527,7 +527,7 @@ void ColumnLowCardinality::Index::insertPosition(UInt64 position)
|
|||||||
checkSizeOfType();
|
checkSizeOfType();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnLowCardinality::Index::insertPositionsRange(const IColumn & column, size_t offset, size_t limit)
|
void ColumnLowCardinality::Index::insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit)
|
||||||
{
|
{
|
||||||
auto insertForType = [&](auto type)
|
auto insertForType = [&](auto type)
|
||||||
{
|
{
|
||||||
@ -550,10 +550,10 @@ void ColumnLowCardinality::Index::insertPositionsRange(const IColumn & column, s
|
|||||||
auto & positions_data = getPositionsData<CurIndexType>();
|
auto & positions_data = getPositionsData<CurIndexType>();
|
||||||
const auto & column_data = column_ptr->getData();
|
const auto & column_data = column_ptr->getData();
|
||||||
|
|
||||||
size_t size = positions_data.size();
|
UInt64 size = positions_data.size();
|
||||||
positions_data.resize(size + limit);
|
positions_data.resize(size + limit);
|
||||||
|
|
||||||
for (size_t i = 0; i < limit; ++i)
|
for (UInt64 i = 0; i < limit; ++i)
|
||||||
positions_data[size + i] = column_data[offset + i];
|
positions_data[size + i] = column_data[offset + i];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -90,19 +90,19 @@ public:
|
|||||||
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
|
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override
|
||||||
{
|
{
|
||||||
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit));
|
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit));
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr index(const IColumn & indexes_, size_t limit) const override
|
ColumnPtr index(const IColumn & indexes_, UInt64 limit) const override
|
||||||
{
|
{
|
||||||
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
|
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
|
||||||
}
|
}
|
||||||
|
|
||||||
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
|
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
|
||||||
|
|
||||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const override;
|
||||||
|
|
||||||
ColumnPtr replicate(const Offsets & offsets) const override
|
ColumnPtr replicate(const Offsets & offsets) const override
|
||||||
{
|
{
|
||||||
@ -180,7 +180,7 @@ public:
|
|||||||
ColumnPtr indexes;
|
ColumnPtr indexes;
|
||||||
};
|
};
|
||||||
|
|
||||||
DictionaryEncodedColumn getMinimalDictionaryEncodedColumn(size_t offset, size_t limit) const;
|
DictionaryEncodedColumn getMinimalDictionaryEncodedColumn(UInt64 offset, UInt64 limit) const;
|
||||||
|
|
||||||
ColumnPtr countKeys() const;
|
ColumnPtr countKeys() const;
|
||||||
|
|
||||||
@ -196,7 +196,7 @@ public:
|
|||||||
ColumnPtr & getPositionsPtr() { return positions; }
|
ColumnPtr & getPositionsPtr() { return positions; }
|
||||||
size_t getPositionAt(size_t row) const;
|
size_t getPositionAt(size_t row) const;
|
||||||
void insertPosition(UInt64 position);
|
void insertPosition(UInt64 position);
|
||||||
void insertPositionsRange(const IColumn & column, size_t offset, size_t limit);
|
void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit);
|
||||||
|
|
||||||
void popBack(size_t n) { positions->assumeMutableRef().popBack(n); }
|
void popBack(size_t n) { positions->assumeMutableRef().popBack(n); }
|
||||||
void reserve(size_t n) { positions->assumeMutableRef().reserve(n); }
|
void reserve(size_t n) { positions->assumeMutableRef().reserve(n); }
|
||||||
|
@ -158,14 +158,14 @@ ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint)
|
|||||||
return ColumnNullable::create(filtered_data, filtered_null_map);
|
return ColumnNullable::create(filtered_data, filtered_null_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnNullable::permute(const Permutation & perm, size_t limit) const
|
ColumnPtr ColumnNullable::permute(const Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
ColumnPtr permuted_data = getNestedColumn().permute(perm, limit);
|
ColumnPtr permuted_data = getNestedColumn().permute(perm, limit);
|
||||||
ColumnPtr permuted_null_map = getNullMapColumn().permute(perm, limit);
|
ColumnPtr permuted_null_map = getNullMapColumn().permute(perm, limit);
|
||||||
return ColumnNullable::create(permuted_data, permuted_null_map);
|
return ColumnNullable::create(permuted_data, permuted_null_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnNullable::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
ColumnPtr indexed_data = getNestedColumn().index(indexes, limit);
|
ColumnPtr indexed_data = getNestedColumn().index(indexes, limit);
|
||||||
ColumnPtr indexed_null_map = getNullMapColumn().index(indexes, limit);
|
ColumnPtr indexed_null_map = getNullMapColumn().index(indexes, limit);
|
||||||
@ -197,7 +197,7 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null
|
|||||||
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
|
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const
|
void ColumnNullable::getPermutation(bool reverse, UInt64 limit, int null_direction_hint, Permutation & res) const
|
||||||
{
|
{
|
||||||
/// Cannot pass limit because of unknown amount of NULLs.
|
/// Cannot pass limit because of unknown amount of NULLs.
|
||||||
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
|
getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res);
|
||||||
|
@ -64,10 +64,10 @@ public:
|
|||||||
|
|
||||||
void popBack(size_t n) override;
|
void popBack(size_t n) override;
|
||||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override;
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
|
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
|
||||||
void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int null_direction_hint, Permutation & res) const override;
|
||||||
void reserve(size_t n) override;
|
void reserve(size_t n) override;
|
||||||
size_t byteSize() const override;
|
size_t byteSize() const override;
|
||||||
size_t allocatedBytes() const override;
|
size_t allocatedBytes() const override;
|
||||||
|
@ -111,7 +111,7 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
|
ColumnPtr ColumnString::permute(const Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
size_t size = offsets.size();
|
size_t size = offsets.size();
|
||||||
|
|
||||||
@ -191,13 +191,13 @@ const char * ColumnString::deserializeAndInsertFromArena(const char * pos)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ColumnPtr ColumnString::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnString::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
return selectIndexImpl(*this, indexes, limit);
|
return selectIndexImpl(*this, indexes, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr ColumnString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
ColumnPtr ColumnString::indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
return ColumnString::create();
|
return ColumnString::create();
|
||||||
@ -251,7 +251,7 @@ struct ColumnString::less
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
|
void ColumnString::getPermutation(bool reverse, UInt64 limit, int /*nan_direction_hint*/, Permutation & res) const
|
||||||
{
|
{
|
||||||
size_t s = offsets.size();
|
size_t s = offsets.size();
|
||||||
res.resize(s);
|
res.resize(s);
|
||||||
@ -389,7 +389,7 @@ struct ColumnString::lessWithCollation
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const
|
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, UInt64 limit, Permutation & res) const
|
||||||
{
|
{
|
||||||
size_t s = offsets.size();
|
size_t s = offsets.size();
|
||||||
res.resize(s);
|
res.resize(s);
|
||||||
|
@ -194,12 +194,12 @@ public:
|
|||||||
|
|
||||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||||
|
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override;
|
||||||
|
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
|
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const;
|
||||||
|
|
||||||
void insertDefault() override
|
void insertDefault() override
|
||||||
{
|
{
|
||||||
@ -225,10 +225,10 @@ public:
|
|||||||
/// Variant of compareAt for string comparison with respect of collation.
|
/// Variant of compareAt for string comparison with respect of collation.
|
||||||
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const;
|
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const;
|
||||||
|
|
||||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const override;
|
||||||
|
|
||||||
/// Sorting with respect of collation.
|
/// Sorting with respect of collation.
|
||||||
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const;
|
void getPermutationWithCollation(const Collator & collator, bool reverse, UInt64 limit, Permutation & res) const;
|
||||||
|
|
||||||
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) con
|
|||||||
return ColumnTuple::create(new_columns);
|
return ColumnTuple::create(new_columns);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
|
ColumnPtr ColumnTuple::permute(const Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
const size_t tuple_size = columns.size();
|
const size_t tuple_size = columns.size();
|
||||||
Columns new_columns(tuple_size);
|
Columns new_columns(tuple_size);
|
||||||
@ -181,7 +181,7 @@ ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
|
|||||||
return ColumnTuple::create(new_columns);
|
return ColumnTuple::create(new_columns);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr ColumnTuple::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnTuple::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
const size_t tuple_size = columns.size();
|
const size_t tuple_size = columns.size();
|
||||||
Columns new_columns(tuple_size);
|
Columns new_columns(tuple_size);
|
||||||
@ -261,7 +261,7 @@ struct ColumnTuple::Less
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
|
void ColumnTuple::getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const
|
||||||
{
|
{
|
||||||
size_t rows = size();
|
size_t rows = size();
|
||||||
res.resize(rows);
|
res.resize(rows);
|
||||||
|
@ -60,14 +60,14 @@ public:
|
|||||||
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
void updateHashWithValue(size_t n, SipHash & hash) const override;
|
||||||
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||||
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override;
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
ColumnPtr replicate(const Offsets & offsets) const override;
|
ColumnPtr replicate(const Offsets & offsets) const override;
|
||||||
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
|
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
|
||||||
void gather(ColumnGathererStream & gatherer_stream) override;
|
void gather(ColumnGathererStream & gatherer_stream) override;
|
||||||
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
|
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
|
||||||
void getExtremes(Field & min, Field & max) const override;
|
void getExtremes(Field & min, Field & max) const override;
|
||||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const override;
|
||||||
void reserve(size_t n) override;
|
void reserve(size_t n) override;
|
||||||
size_t byteSize() const override;
|
size_t byteSize() const override;
|
||||||
size_t allocatedBytes() const override;
|
size_t allocatedBytes() const override;
|
||||||
|
@ -16,8 +16,6 @@
|
|||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#include <Columns/ColumnsCommon.h>
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -71,7 +69,7 @@ struct ColumnVector<T>::greater
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
|
void ColumnVector<T>::getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, IColumn::Permutation & res) const
|
||||||
{
|
{
|
||||||
size_t s = data.size();
|
size_t s = data.size();
|
||||||
res.resize(s);
|
res.resize(s);
|
||||||
@ -211,7 +209,7 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation & perm, size_t limit) const
|
ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation & perm, UInt64 limit) const
|
||||||
{
|
{
|
||||||
size_t size = data.size();
|
size_t size = data.size();
|
||||||
|
|
||||||
@ -232,7 +230,7 @@ ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation & perm, size_t lim
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ColumnPtr ColumnVector<T>::index(const IColumn & indexes, size_t limit) const
|
ColumnPtr ColumnVector<T>::index(const IColumn & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
return selectIndexImpl(*this, indexes, limit);
|
return selectIndexImpl(*this, indexes, limit);
|
||||||
}
|
}
|
||||||
|
@ -174,7 +174,7 @@ public:
|
|||||||
return CompareHelper<T>::compare(data[n], static_cast<const Self &>(rhs_).data[m], nan_direction_hint);
|
return CompareHelper<T>::compare(data[n], static_cast<const Self &>(rhs_).data[m], nan_direction_hint);
|
||||||
}
|
}
|
||||||
|
|
||||||
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, IColumn::Permutation & res) const override;
|
||||||
|
|
||||||
void reserve(size_t n) override
|
void reserve(size_t n) override
|
||||||
{
|
{
|
||||||
@ -221,12 +221,12 @@ public:
|
|||||||
|
|
||||||
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
|
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
|
||||||
|
|
||||||
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
|
ColumnPtr permute(const IColumn::Permutation & perm, UInt64 limit) const override;
|
||||||
|
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override;
|
||||||
|
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const;
|
||||||
|
|
||||||
ColumnPtr replicate(const IColumn::Offsets & offsets) const override;
|
ColumnPtr replicate(const IColumn::Offsets & offsets) const override;
|
||||||
|
|
||||||
@ -273,7 +273,7 @@ protected:
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
template <typename Type>
|
template <typename Type>
|
||||||
ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, UInt64 limit) const
|
||||||
{
|
{
|
||||||
size_t size = indexes.size();
|
size_t size = indexes.size();
|
||||||
|
|
||||||
|
@ -3,8 +3,8 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <Columns/IColumn.h>
|
#include <Columns/IColumn.h>
|
||||||
#include <Common/typeid_cast.h>
|
|
||||||
#include <Columns/ColumnVector.h>
|
#include <Columns/ColumnVector.h>
|
||||||
|
#include <Common/typeid_cast.h>
|
||||||
#include <Common/HashTable/HashSet.h>
|
#include <Common/HashTable/HashSet.h>
|
||||||
#include <Common/HashTable/HashMap.h>
|
#include <Common/HashTable/HashMap.h>
|
||||||
|
|
||||||
|
@ -44,9 +44,9 @@ namespace detail
|
|||||||
const PaddedPODArray<T> * getIndexesData(const IColumn & indexes);
|
const PaddedPODArray<T> * getIndexesData(const IColumn & indexes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check limit <= indexes->size() and call column.indexImpl(const PaddedPodArray<Type> & indexes, size_t limit).
|
/// Check limit <= indexes->size() and call column.indexImpl(const PaddedPodArray<Type> & indexes, UInt64 limit).
|
||||||
template <typename Column>
|
template <typename Column>
|
||||||
ColumnPtr selectIndexImpl(const Column & column, const IColumn & indexes, size_t limit)
|
ColumnPtr selectIndexImpl(const Column & column, const IColumn & indexes, UInt64 limit)
|
||||||
{
|
{
|
||||||
if (limit == 0)
|
if (limit == 0)
|
||||||
limit = indexes.size();
|
limit = indexes.size();
|
||||||
@ -68,8 +68,8 @@ ColumnPtr selectIndexImpl(const Column & column, const IColumn & indexes, size_t
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define INSTANTIATE_INDEX_IMPL(Column) \
|
#define INSTANTIATE_INDEX_IMPL(Column) \
|
||||||
template ColumnPtr Column::indexImpl<UInt8>(const PaddedPODArray<UInt8> & indexes, size_t limit) const; \
|
template ColumnPtr Column::indexImpl<UInt8>(const PaddedPODArray<UInt8> & indexes, UInt64 limit) const; \
|
||||||
template ColumnPtr Column::indexImpl<UInt16>(const PaddedPODArray<UInt16> & indexes, size_t limit) const; \
|
template ColumnPtr Column::indexImpl<UInt16>(const PaddedPODArray<UInt16> & indexes, UInt64 limit) const; \
|
||||||
template ColumnPtr Column::indexImpl<UInt32>(const PaddedPODArray<UInt32> & indexes, size_t limit) const; \
|
template ColumnPtr Column::indexImpl<UInt32>(const PaddedPODArray<UInt32> & indexes, UInt64 limit) const; \
|
||||||
template ColumnPtr Column::indexImpl<UInt64>(const PaddedPODArray<UInt64> & indexes, size_t limit) const;
|
template ColumnPtr Column::indexImpl<UInt64>(const PaddedPODArray<UInt64> & indexes, UInt64 limit) const;
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#include <Columns/FilterDescription.h>
|
|
||||||
|
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
|
#include <Columns/FilterDescription.h>
|
||||||
#include <Columns/ColumnsNumber.h>
|
#include <Columns/ColumnsNumber.h>
|
||||||
#include <Columns/ColumnNullable.h>
|
#include <Columns/ColumnNullable.h>
|
||||||
#include <Columns/ColumnConst.h>
|
#include <Columns/ColumnConst.h>
|
||||||
|
@ -184,11 +184,11 @@ public:
|
|||||||
/// Permutes elements using specified permutation. Is used in sortings.
|
/// Permutes elements using specified permutation. Is used in sortings.
|
||||||
/// limit - if it isn't 0, puts only first limit elements in the result.
|
/// limit - if it isn't 0, puts only first limit elements in the result.
|
||||||
using Permutation = PaddedPODArray<size_t>;
|
using Permutation = PaddedPODArray<size_t>;
|
||||||
virtual Ptr permute(const Permutation & perm, size_t limit) const = 0;
|
virtual Ptr permute(const Permutation & perm, UInt64 limit) const = 0;
|
||||||
|
|
||||||
/// Creates new column with values column[indexes[:limit]]. If limit is 0, all indexes are used.
|
/// Creates new column with values column[indexes[:limit]]. If limit is 0, all indexes are used.
|
||||||
/// Indexes must be one of the ColumnUInt. For default implementation, see selectIndexImpl from ColumnsCommon.h
|
/// Indexes must be one of the ColumnUInt. For default implementation, see selectIndexImpl from ColumnsCommon.h
|
||||||
virtual Ptr index(const IColumn & indexes, size_t limit) const = 0;
|
virtual Ptr index(const IColumn & indexes, UInt64 limit) const = 0;
|
||||||
|
|
||||||
/** Compares (*this)[n] and rhs[m]. Column rhs should have the same type.
|
/** Compares (*this)[n] and rhs[m]. Column rhs should have the same type.
|
||||||
* Returns negative number, 0, or positive number (*this)[n] is less, equal, greater than rhs[m] respectively.
|
* Returns negative number, 0, or positive number (*this)[n] is less, equal, greater than rhs[m] respectively.
|
||||||
@ -209,7 +209,7 @@ public:
|
|||||||
* limit - if isn't 0, then only first limit elements of the result column could be sorted.
|
* limit - if isn't 0, then only first limit elements of the result column could be sorted.
|
||||||
* nan_direction_hint - see above.
|
* nan_direction_hint - see above.
|
||||||
*/
|
*/
|
||||||
virtual void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const = 0;
|
virtual void getPermutation(bool reverse, UInt64 limit, int nan_direction_hint, Permutation & res) const = 0;
|
||||||
|
|
||||||
/** Copies each element according offsets parameter.
|
/** Copies each element according offsets parameter.
|
||||||
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
|
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
|
||||||
|
@ -79,7 +79,7 @@ public:
|
|||||||
return cloneDummy(countBytesInFilter(filt));
|
return cloneDummy(countBytesInFilter(filt));
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr permute(const Permutation & perm, size_t limit) const override
|
ColumnPtr permute(const Permutation & perm, UInt64 limit) const override
|
||||||
{
|
{
|
||||||
if (s != perm.size())
|
if (s != perm.size())
|
||||||
throw Exception("Size of permutation doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
throw Exception("Size of permutation doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
||||||
@ -87,7 +87,7 @@ public:
|
|||||||
return cloneDummy(limit ? std::min(s, limit) : s);
|
return cloneDummy(limit ? std::min(s, limit) : s);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnPtr index(const IColumn & indexes, size_t limit) const override
|
ColumnPtr index(const IColumn & indexes, UInt64 limit) const override
|
||||||
{
|
{
|
||||||
if (indexes.size() < limit)
|
if (indexes.size() < limit)
|
||||||
throw Exception("Size of indexes is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
throw Exception("Size of indexes is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
||||||
|
557
dbms/src/Common/ColumnsHashing.h
Normal file
557
dbms/src/Common/ColumnsHashing.h
Normal file
@ -0,0 +1,557 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
|
||||||
|
#include <Common/ColumnsHashingImpl.h>
|
||||||
|
#include <Common/Arena.h>
|
||||||
|
#include <Common/LRUCache.h>
|
||||||
|
#include <common/unaligned.h>
|
||||||
|
|
||||||
|
#include <Columns/ColumnString.h>
|
||||||
|
#include <Columns/ColumnFixedString.h>
|
||||||
|
#include <Columns/ColumnLowCardinality.h>
|
||||||
|
|
||||||
|
#include <Core/Defines.h>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ColumnsHashing
|
||||||
|
{
|
||||||
|
|
||||||
|
/// For the case when there is one numeric key.
|
||||||
|
/// UInt8/16/32/64 for any type with corresponding bit width.
|
||||||
|
template <typename Value, typename Mapped, typename FieldType, bool use_cache = true>
|
||||||
|
struct HashMethodOneNumber
|
||||||
|
: public columns_hashing_impl::HashMethodBase<HashMethodOneNumber<Value, Mapped, FieldType, use_cache>, Value, Mapped, use_cache>
|
||||||
|
{
|
||||||
|
using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>;
|
||||||
|
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||||
|
|
||||||
|
const char * vec;
|
||||||
|
|
||||||
|
/// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
|
||||||
|
HashMethodOneNumber(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
|
||||||
|
{
|
||||||
|
vec = key_columns[0]->getRawData().data;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates context. Method is called once and result context is used in all threads.
|
||||||
|
using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr
|
||||||
|
|
||||||
|
/// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr.
|
||||||
|
/// Data is a HashTable where to insert key from column's row.
|
||||||
|
/// For Serialized method, key may be placed in pool.
|
||||||
|
using Base::emplaceKey; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult
|
||||||
|
|
||||||
|
/// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr.
|
||||||
|
using Base::findKey; /// (Data & data, size_t row, Arena & pool) -> FindResult
|
||||||
|
|
||||||
|
/// Get hash value of row.
|
||||||
|
using Base::getHash; /// (const Data & data, size_t row, Arena & pool) -> size_t
|
||||||
|
|
||||||
|
/// Is used for default implementation in HashMethodBase.
|
||||||
|
FieldType getKey(size_t row, Arena &) const { return unalignedLoad<FieldType>(vec + row * sizeof(FieldType)); }
|
||||||
|
|
||||||
|
/// Get StringRef from value which can be inserted into column.
|
||||||
|
static StringRef getValueRef(const Value & value)
|
||||||
|
{
|
||||||
|
return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/// For the case when there is one string key.
|
||||||
|
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
|
||||||
|
struct HashMethodString
|
||||||
|
: public columns_hashing_impl::HashMethodBase<HashMethodString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
|
||||||
|
{
|
||||||
|
using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>;
|
||||||
|
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||||
|
|
||||||
|
const IColumn::Offset * offsets;
|
||||||
|
const UInt8 * chars;
|
||||||
|
|
||||||
|
HashMethodString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
|
||||||
|
{
|
||||||
|
const IColumn & column = *key_columns[0];
|
||||||
|
const ColumnString & column_string = static_cast<const ColumnString &>(column);
|
||||||
|
offsets = column_string.getOffsets().data();
|
||||||
|
chars = column_string.getChars().data();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto getKey(ssize_t row, Arena &) const
|
||||||
|
{
|
||||||
|
return StringRef(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||||
|
|
||||||
|
static ALWAYS_INLINE void onNewKey([[maybe_unused]] StringRef & key, [[maybe_unused]] Arena & pool)
|
||||||
|
{
|
||||||
|
if constexpr (place_string_to_arena)
|
||||||
|
{
|
||||||
|
if (key.size)
|
||||||
|
key.data = pool.insert(key.data, key.size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/// For the case when there is one fixed-length string key.
|
||||||
|
template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
|
||||||
|
struct HashMethodFixedString
|
||||||
|
: public columns_hashing_impl::HashMethodBase<HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>, Value, Mapped, use_cache>
|
||||||
|
{
|
||||||
|
using Self = HashMethodFixedString<Value, Mapped, place_string_to_arena, use_cache>;
|
||||||
|
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||||
|
|
||||||
|
size_t n;
|
||||||
|
const ColumnFixedString::Chars * chars;
|
||||||
|
|
||||||
|
HashMethodFixedString(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
|
||||||
|
{
|
||||||
|
const IColumn & column = *key_columns[0];
|
||||||
|
const ColumnFixedString & column_string = static_cast<const ColumnFixedString &>(column);
|
||||||
|
n = column_string.getN();
|
||||||
|
chars = &column_string.getChars();
|
||||||
|
}
|
||||||
|
|
||||||
|
StringRef getKey(size_t row, Arena &) const { return StringRef(&(*chars)[row * n], n); }
|
||||||
|
|
||||||
|
static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||||
|
static ALWAYS_INLINE void onNewKey([[maybe_unused]] StringRef & key, [[maybe_unused]] Arena & pool)
|
||||||
|
{
|
||||||
|
if constexpr (place_string_to_arena)
|
||||||
|
key.data = pool.insert(key.data, key.size);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/// Cache stores dictionaries and saved_hash per dictionary key.
|
||||||
|
class LowCardinalityDictionaryCache : public HashMethodContext
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/// Will assume that dictionaries with same hash has the same keys.
|
||||||
|
/// Just in case, check that they have also the same size.
|
||||||
|
struct DictionaryKey
|
||||||
|
{
|
||||||
|
UInt128 hash;
|
||||||
|
UInt64 size;
|
||||||
|
|
||||||
|
bool operator== (const DictionaryKey & other) const { return hash == other.hash && size == other.size; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DictionaryKeyHash
|
||||||
|
{
|
||||||
|
size_t operator()(const DictionaryKey & key) const
|
||||||
|
{
|
||||||
|
SipHash hash;
|
||||||
|
hash.update(key.hash.low);
|
||||||
|
hash.update(key.hash.high);
|
||||||
|
hash.update(key.size);
|
||||||
|
return hash.get64();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CachedValues
|
||||||
|
{
|
||||||
|
/// Store ptr to dictionary to be sure it won't be deleted.
|
||||||
|
ColumnPtr dictionary_holder;
|
||||||
|
/// Hashes for dictionary keys.
|
||||||
|
const UInt64 * saved_hash = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
using CachedValuesPtr = std::shared_ptr<CachedValues>;
|
||||||
|
|
||||||
|
explicit LowCardinalityDictionaryCache(const HashMethodContext::Settings & settings) : cache(settings.max_threads) {}
|
||||||
|
|
||||||
|
CachedValuesPtr get(const DictionaryKey & key) { return cache.get(key); }
|
||||||
|
void set(const DictionaryKey & key, const CachedValuesPtr & mapped) { cache.set(key, mapped); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
using Cache = LRUCache<DictionaryKey, CachedValues, DictionaryKeyHash>;
|
||||||
|
Cache cache;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/// Single low cardinality column.
|
||||||
|
template <typename SingleColumnMethod, typename Mapped, bool use_cache>
|
||||||
|
struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
|
||||||
|
{
|
||||||
|
using Base = SingleColumnMethod;
|
||||||
|
|
||||||
|
enum class VisitValue
|
||||||
|
{
|
||||||
|
Empty = 0,
|
||||||
|
Found = 1,
|
||||||
|
NotFound = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
|
||||||
|
using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>;
|
||||||
|
using FindResult = columns_hashing_impl::FindResultImpl<Mapped>;
|
||||||
|
|
||||||
|
static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings)
|
||||||
|
{
|
||||||
|
return std::make_shared<LowCardinalityDictionaryCache>(settings);
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnRawPtrs key_columns;
|
||||||
|
const IColumn * positions = nullptr;
|
||||||
|
size_t size_of_index_type = 0;
|
||||||
|
|
||||||
|
/// saved hash is from current column or from cache.
|
||||||
|
const UInt64 * saved_hash = nullptr;
|
||||||
|
/// Hold dictionary in case saved_hash is from cache to be sure it won't be deleted.
|
||||||
|
ColumnPtr dictionary_holder;
|
||||||
|
|
||||||
|
/// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages.
|
||||||
|
columns_hashing_impl::MappedCache<Mapped> mapped_cache;
|
||||||
|
PaddedPODArray<VisitValue> visit_cache;
|
||||||
|
|
||||||
|
/// If initialized column is nullable.
|
||||||
|
bool is_nullable = false;
|
||||||
|
|
||||||
|
static const ColumnLowCardinality & getLowCardinalityColumn(const IColumn * low_cardinality_column)
|
||||||
|
{
|
||||||
|
auto column = typeid_cast<const ColumnLowCardinality *>(low_cardinality_column);
|
||||||
|
if (!column)
|
||||||
|
throw Exception("Invalid aggregation key type for HashMethodSingleLowCardinalityColumn method. "
|
||||||
|
"Excepted LowCardinality, got " + column->getName(), ErrorCodes::LOGICAL_ERROR);
|
||||||
|
return *column;
|
||||||
|
}
|
||||||
|
|
||||||
|
HashMethodSingleLowCardinalityColumn(
|
||||||
|
const ColumnRawPtrs & key_columns_low_cardinality, const Sizes & key_sizes, const HashMethodContextPtr & context)
|
||||||
|
: Base({getLowCardinalityColumn(key_columns_low_cardinality[0]).getDictionary().getNestedNotNullableColumn().get()}, key_sizes, context)
|
||||||
|
{
|
||||||
|
auto column = &getLowCardinalityColumn(key_columns_low_cardinality[0]);
|
||||||
|
|
||||||
|
if (!context)
|
||||||
|
throw Exception("Cache wasn't created for HashMethodSingleLowCardinalityColumn",
|
||||||
|
ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
|
LowCardinalityDictionaryCache * cache;
|
||||||
|
if constexpr (use_cache)
|
||||||
|
{
|
||||||
|
cache = typeid_cast<LowCardinalityDictionaryCache *>(context.get());
|
||||||
|
if (!cache)
|
||||||
|
{
|
||||||
|
const auto & cached_val = *context;
|
||||||
|
throw Exception("Invalid type for HashMethodSingleLowCardinalityColumn cache: "
|
||||||
|
+ demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto * dict = column->getDictionary().getNestedNotNullableColumn().get();
|
||||||
|
is_nullable = column->getDictionary().nestedColumnIsNullable();
|
||||||
|
key_columns = {dict};
|
||||||
|
bool is_shared_dict = column->isSharedDictionary();
|
||||||
|
|
||||||
|
typename LowCardinalityDictionaryCache::DictionaryKey dictionary_key;
|
||||||
|
typename LowCardinalityDictionaryCache::CachedValuesPtr cached_values;
|
||||||
|
|
||||||
|
if (is_shared_dict)
|
||||||
|
{
|
||||||
|
dictionary_key = {column->getDictionary().getHash(), dict->size()};
|
||||||
|
if constexpr (use_cache)
|
||||||
|
cached_values = cache->get(dictionary_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cached_values)
|
||||||
|
{
|
||||||
|
saved_hash = cached_values->saved_hash;
|
||||||
|
dictionary_holder = cached_values->dictionary_holder;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
saved_hash = column->getDictionary().tryGetSavedHash();
|
||||||
|
dictionary_holder = column->getDictionaryPtr();
|
||||||
|
|
||||||
|
if constexpr (use_cache)
|
||||||
|
{
|
||||||
|
if (is_shared_dict)
|
||||||
|
{
|
||||||
|
cached_values = std::make_shared<typename LowCardinalityDictionaryCache::CachedValues>();
|
||||||
|
cached_values->saved_hash = saved_hash;
|
||||||
|
cached_values->dictionary_holder = dictionary_holder;
|
||||||
|
|
||||||
|
cache->set(dictionary_key, cached_values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
mapped_cache.resize(key_columns[0]->size());
|
||||||
|
|
||||||
|
VisitValue empty(VisitValue::Empty);
|
||||||
|
visit_cache.assign(key_columns[0]->size(), empty);
|
||||||
|
|
||||||
|
size_of_index_type = column->getSizeOfIndexType();
|
||||||
|
positions = column->getIndexesPtr().get();
|
||||||
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE size_t getIndexAt(size_t row) const
|
||||||
|
{
|
||||||
|
switch (size_of_index_type)
|
||||||
|
{
|
||||||
|
case sizeof(UInt8): return static_cast<const ColumnUInt8 *>(positions)->getElement(row);
|
||||||
|
case sizeof(UInt16): return static_cast<const ColumnUInt16 *>(positions)->getElement(row);
|
||||||
|
case sizeof(UInt32): return static_cast<const ColumnUInt32 *>(positions)->getElement(row);
|
||||||
|
case sizeof(UInt64): return static_cast<const ColumnUInt64 *>(positions)->getElement(row);
|
||||||
|
default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the key from the key columns for insertion into the hash table.
|
||||||
|
ALWAYS_INLINE auto getKey(size_t row, Arena & pool) const
|
||||||
|
{
|
||||||
|
return Base::getKey(getIndexAt(row), pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Data>
|
||||||
|
ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row_, Arena & pool)
|
||||||
|
{
|
||||||
|
size_t row = getIndexAt(row_);
|
||||||
|
|
||||||
|
if (is_nullable && row == 0)
|
||||||
|
{
|
||||||
|
visit_cache[row] = VisitValue::Found;
|
||||||
|
bool has_null_key = data.hasNullKeyData();
|
||||||
|
data.hasNullKeyData() = true;
|
||||||
|
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return EmplaceResult(data.getNullKeyData(), mapped_cache[0], !has_null_key);
|
||||||
|
else
|
||||||
|
return EmplaceResult(!has_null_key);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (visit_cache[row] == VisitValue::Found)
|
||||||
|
{
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return EmplaceResult(mapped_cache[row], mapped_cache[row], false);
|
||||||
|
else
|
||||||
|
return EmplaceResult(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto key = getKey(row_, pool);
|
||||||
|
|
||||||
|
bool inserted = false;
|
||||||
|
typename Data::iterator it;
|
||||||
|
if (saved_hash)
|
||||||
|
data.emplace(key, it, inserted, saved_hash[row]);
|
||||||
|
else
|
||||||
|
data.emplace(key, it, inserted);
|
||||||
|
|
||||||
|
visit_cache[row] = VisitValue::Found;
|
||||||
|
|
||||||
|
if (inserted)
|
||||||
|
{
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
{
|
||||||
|
new(&it->second) Mapped();
|
||||||
|
Base::onNewKey(it->first, pool);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
Base::onNewKey(*it, pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return EmplaceResult(it->second, mapped_cache[row], inserted);
|
||||||
|
else
|
||||||
|
return EmplaceResult(inserted);
|
||||||
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE bool isNullAt(size_t i)
|
||||||
|
{
|
||||||
|
if (!is_nullable)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return getIndexAt(i) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Data>
|
||||||
|
ALWAYS_INLINE FindResult findFromRow(Data & data, size_t row_, Arena & pool)
|
||||||
|
{
|
||||||
|
size_t row = getIndexAt(row_);
|
||||||
|
|
||||||
|
if (is_nullable && row == 0)
|
||||||
|
{
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return FindResult(data.hasNullKeyData() ? &data.getNullKeyData() : nullptr, data.hasNullKeyData());
|
||||||
|
else
|
||||||
|
return FindResult(data.hasNullKeyData());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (visit_cache[row] != VisitValue::Empty)
|
||||||
|
{
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return FindResult(&mapped_cache[row], visit_cache[row] == VisitValue::Found);
|
||||||
|
else
|
||||||
|
return FindResult(visit_cache[row] == VisitValue::Found);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto key = getKey(row_, pool);
|
||||||
|
|
||||||
|
typename Data::iterator it;
|
||||||
|
if (saved_hash)
|
||||||
|
it = data.find(key, saved_hash[row]);
|
||||||
|
else
|
||||||
|
it = data.find(key);
|
||||||
|
|
||||||
|
bool found = it != data.end();
|
||||||
|
visit_cache[row] = found ? VisitValue::Found : VisitValue::NotFound;
|
||||||
|
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
{
|
||||||
|
if (found)
|
||||||
|
mapped_cache[row] = it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return FindResult(&mapped_cache[row], found);
|
||||||
|
else
|
||||||
|
return FindResult(found);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Data>
|
||||||
|
ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
|
||||||
|
{
|
||||||
|
row = getIndexAt(row);
|
||||||
|
if (saved_hash)
|
||||||
|
return saved_hash[row];
|
||||||
|
|
||||||
|
return Base::getHash(data, row, pool);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Optional mask for low cardinality columns.
|
||||||
|
template <bool has_low_cardinality>
|
||||||
|
struct LowCardinalityKeys
|
||||||
|
{
|
||||||
|
ColumnRawPtrs nested_columns;
|
||||||
|
ColumnRawPtrs positions;
|
||||||
|
Sizes position_sizes;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct LowCardinalityKeys<false> {};
|
||||||
|
|
||||||
|
/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
|
||||||
|
template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false, bool has_low_cardinality_ = false, bool use_cache = true>
|
||||||
|
struct HashMethodKeysFixed
|
||||||
|
: private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>
|
||||||
|
, public columns_hashing_impl::HashMethodBase<HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>, Value, Mapped, use_cache>
|
||||||
|
{
|
||||||
|
using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, has_low_cardinality_, use_cache>;
|
||||||
|
using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||||
|
using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
|
||||||
|
|
||||||
|
static constexpr bool has_nullable_keys = has_nullable_keys_;
|
||||||
|
static constexpr bool has_low_cardinality = has_low_cardinality_;
|
||||||
|
|
||||||
|
LowCardinalityKeys<has_low_cardinality> low_cardinality_keys;
|
||||||
|
Sizes key_sizes;
|
||||||
|
size_t keys_size;
|
||||||
|
|
||||||
|
HashMethodKeysFixed(const ColumnRawPtrs & key_columns, const Sizes & key_sizes, const HashMethodContextPtr &)
|
||||||
|
: Base(key_columns), key_sizes(std::move(key_sizes)), keys_size(key_columns.size())
|
||||||
|
{
|
||||||
|
if constexpr (has_low_cardinality)
|
||||||
|
{
|
||||||
|
low_cardinality_keys.nested_columns.resize(key_columns.size());
|
||||||
|
low_cardinality_keys.positions.assign(key_columns.size(), nullptr);
|
||||||
|
low_cardinality_keys.position_sizes.resize(key_columns.size());
|
||||||
|
for (size_t i = 0; i < key_columns.size(); ++i)
|
||||||
|
{
|
||||||
|
if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i]))
|
||||||
|
{
|
||||||
|
low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get();
|
||||||
|
low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes();
|
||||||
|
low_cardinality_keys.position_sizes[i] = low_cardinality_col->getSizeOfIndexType();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
low_cardinality_keys.nested_columns[i] = key_columns[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE Key getKey(size_t row, Arena &) const
|
||||||
|
{
|
||||||
|
if constexpr (has_nullable_keys)
|
||||||
|
{
|
||||||
|
auto bitmap = Base::createBitmap(row);
|
||||||
|
return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes, bitmap);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if constexpr (has_low_cardinality)
|
||||||
|
return packFixed<Key, true>(row, keys_size, low_cardinality_keys.nested_columns, key_sizes,
|
||||||
|
&low_cardinality_keys.positions, &low_cardinality_keys.position_sizes);
|
||||||
|
|
||||||
|
return packFixed<Key>(row, keys_size, Base::getActualColumns(), key_sizes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Hash by concatenating serialized key values.
|
||||||
|
* The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
|
||||||
|
* That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
|
||||||
|
* Therefore, when aggregating by several strings, there is no ambiguity.
|
||||||
|
*/
|
||||||
|
template <typename Value, typename Mapped>
|
||||||
|
struct HashMethodSerialized
|
||||||
|
: public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value, Mapped, false>
|
||||||
|
{
|
||||||
|
using Self = HashMethodSerialized<Value, Mapped>;
|
||||||
|
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
|
||||||
|
|
||||||
|
ColumnRawPtrs key_columns;
|
||||||
|
size_t keys_size;
|
||||||
|
|
||||||
|
HashMethodSerialized(const ColumnRawPtrs & key_columns, const Sizes & /*key_sizes*/, const HashMethodContextPtr &)
|
||||||
|
: key_columns(key_columns), keys_size(key_columns.size()) {}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
|
||||||
|
|
||||||
|
ALWAYS_INLINE StringRef getKey(size_t row, Arena & pool) const
|
||||||
|
{
|
||||||
|
return serializeKeysToPoolContiguous(row, keys_size, key_columns, pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ALWAYS_INLINE void onExistingKey(StringRef & key, Arena & pool) { pool.rollback(key.size); }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// For the case when there is one string key.
|
||||||
|
template <typename Value, typename Mapped, bool use_cache = true>
|
||||||
|
struct HashMethodHashed
|
||||||
|
: public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache>, Value, Mapped, use_cache>
|
||||||
|
{
|
||||||
|
using Key = UInt128;
|
||||||
|
using Self = HashMethodHashed<Value, Mapped, use_cache>;
|
||||||
|
using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||||
|
|
||||||
|
ColumnRawPtrs key_columns;
|
||||||
|
|
||||||
|
HashMethodHashed(ColumnRawPtrs key_columns, const Sizes &, const HashMethodContextPtr &)
|
||||||
|
: key_columns(std::move(key_columns)) {}
|
||||||
|
|
||||||
|
ALWAYS_INLINE Key getKey(size_t row, Arena &) const { return hash128(row, key_columns.size(), key_columns); }
|
||||||
|
|
||||||
|
static ALWAYS_INLINE StringRef getValueRef(const Value & value)
|
||||||
|
{
|
||||||
|
return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
356
dbms/src/Common/ColumnsHashingImpl.h
Normal file
356
dbms/src/Common/ColumnsHashingImpl.h
Normal file
@ -0,0 +1,356 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Columns/IColumn.h>
|
||||||
|
#include <Interpreters/AggregationCommon.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ColumnsHashing
|
||||||
|
{
|
||||||
|
|
||||||
|
/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe.
|
||||||
|
/// Is used for caching.
|
||||||
|
class HashMethodContext
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~HashMethodContext() = default;
|
||||||
|
|
||||||
|
struct Settings
|
||||||
|
{
|
||||||
|
size_t max_threads;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
using HashMethodContextPtr = std::shared_ptr<HashMethodContext>;
|
||||||
|
|
||||||
|
|
||||||
|
namespace columns_hashing_impl
|
||||||
|
{
|
||||||
|
|
||||||
|
template <typename Value, bool consecutive_keys_optimization_>
|
||||||
|
struct LastElementCache
|
||||||
|
{
|
||||||
|
static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
|
||||||
|
Value value;
|
||||||
|
bool empty = true;
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
|
bool check(const Value & value_) { return !empty && value == value_; }
|
||||||
|
|
||||||
|
template <typename Key>
|
||||||
|
bool check(const Key & key) { return !empty && value.first == key; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Data>
|
||||||
|
struct LastElementCache<Data, false>
|
||||||
|
{
|
||||||
|
static constexpr bool consecutive_keys_optimization = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Mapped>
|
||||||
|
class EmplaceResultImpl
|
||||||
|
{
|
||||||
|
Mapped & value;
|
||||||
|
Mapped & cached_value;
|
||||||
|
bool inserted;
|
||||||
|
|
||||||
|
public:
|
||||||
|
EmplaceResultImpl(Mapped & value, Mapped & cached_value, bool inserted)
|
||||||
|
: value(value), cached_value(cached_value), inserted(inserted) {}
|
||||||
|
|
||||||
|
bool isInserted() const { return inserted; }
|
||||||
|
auto & getMapped() const { return value; }
|
||||||
|
|
||||||
|
void setMapped(const Mapped & mapped)
|
||||||
|
{
|
||||||
|
cached_value = mapped;
|
||||||
|
value = mapped;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class EmplaceResultImpl<void>
|
||||||
|
{
|
||||||
|
bool inserted;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit EmplaceResultImpl(bool inserted) : inserted(inserted) {}
|
||||||
|
bool isInserted() const { return inserted; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Mapped>
|
||||||
|
class FindResultImpl
|
||||||
|
{
|
||||||
|
Mapped * value;
|
||||||
|
bool found;
|
||||||
|
|
||||||
|
public:
|
||||||
|
FindResultImpl(Mapped * value, bool found) : value(value), found(found) {}
|
||||||
|
bool isFound() const { return found; }
|
||||||
|
Mapped & getMapped() const { return *value; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class FindResultImpl<void>
|
||||||
|
{
|
||||||
|
bool found;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit FindResultImpl(bool found) : found(found) {}
|
||||||
|
bool isFound() const { return found; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization>
|
||||||
|
class HashMethodBase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using EmplaceResult = EmplaceResultImpl<Mapped>;
|
||||||
|
using FindResult = FindResultImpl<Mapped>;
|
||||||
|
static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
|
||||||
|
using Cache = LastElementCache<Value, consecutive_keys_optimization>;
|
||||||
|
|
||||||
|
static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; }
|
||||||
|
|
||||||
|
template <typename Data>
|
||||||
|
ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
|
||||||
|
{
|
||||||
|
auto key = static_cast<Derived &>(*this).getKey(row, pool);
|
||||||
|
return emplaceKeyImpl(key, data, pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Data>
|
||||||
|
ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
|
||||||
|
{
|
||||||
|
auto key = static_cast<Derived &>(*this).getKey(row, pool);
|
||||||
|
auto res = findKeyImpl(key, data);
|
||||||
|
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Data>
|
||||||
|
ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
|
||||||
|
{
|
||||||
|
auto key = static_cast<Derived &>(*this).getKey(row, pool);
|
||||||
|
auto res = data.hash(key);
|
||||||
|
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Cache cache;
|
||||||
|
|
||||||
|
HashMethodBase()
|
||||||
|
{
|
||||||
|
if constexpr (consecutive_keys_optimization)
|
||||||
|
{
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
{
|
||||||
|
/// Init PairNoInit elements.
|
||||||
|
cache.value.second = Mapped();
|
||||||
|
using Key = decltype(cache.value.first);
|
||||||
|
cache.value.first = Key();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cache.value = Value();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Key>
|
||||||
|
static ALWAYS_INLINE void onNewKey(Key & /*key*/, Arena & /*pool*/) {}
|
||||||
|
template <typename Key>
|
||||||
|
static ALWAYS_INLINE void onExistingKey(Key & /*key*/, Arena & /*pool*/) {}
|
||||||
|
|
||||||
|
template <typename Data, typename Key>
|
||||||
|
ALWAYS_INLINE EmplaceResult emplaceKeyImpl(Key key, Data & data, Arena & pool)
|
||||||
|
{
|
||||||
|
if constexpr (Cache::consecutive_keys_optimization)
|
||||||
|
{
|
||||||
|
if (cache.found && cache.check(key))
|
||||||
|
{
|
||||||
|
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||||
|
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return EmplaceResult(cache.value.second, cache.value.second, false);
|
||||||
|
else
|
||||||
|
return EmplaceResult(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typename Data::iterator it;
|
||||||
|
bool inserted = false;
|
||||||
|
data.emplace(key, it, inserted);
|
||||||
|
|
||||||
|
[[maybe_unused]] Mapped * cached = nullptr;
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
cached = &it->second;
|
||||||
|
|
||||||
|
if (inserted)
|
||||||
|
{
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
{
|
||||||
|
new(&it->second) Mapped();
|
||||||
|
static_cast<Derived &>(*this).onNewKey(it->first, pool);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
static_cast<Derived &>(*this).onNewKey(*it, pool);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||||
|
|
||||||
|
if constexpr (consecutive_keys_optimization)
|
||||||
|
{
|
||||||
|
cache.value = *it;
|
||||||
|
cache.found = true;
|
||||||
|
cache.empty = false;
|
||||||
|
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
cached = &cache.value.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return EmplaceResult(it->second, *cached, inserted);
|
||||||
|
else
|
||||||
|
return EmplaceResult(inserted);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Data, typename Key>
|
||||||
|
ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data)
|
||||||
|
{
|
||||||
|
if constexpr (Cache::consecutive_keys_optimization)
|
||||||
|
{
|
||||||
|
if (cache.check(key))
|
||||||
|
{
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return FindResult(&cache.value.second, cache.found);
|
||||||
|
else
|
||||||
|
return FindResult(cache.found);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto it = data.find(key);
|
||||||
|
bool found = it != data.end();
|
||||||
|
|
||||||
|
if constexpr (consecutive_keys_optimization)
|
||||||
|
{
|
||||||
|
cache.found = found;
|
||||||
|
cache.empty = false;
|
||||||
|
|
||||||
|
if (found)
|
||||||
|
cache.value = *it;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
cache.value.first = key;
|
||||||
|
else
|
||||||
|
cache.value = key;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (has_mapped)
|
||||||
|
return FindResult(found ? &it->second : nullptr, found);
|
||||||
|
else
|
||||||
|
return FindResult(found);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct MappedCache : public PaddedPODArray<T> {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct MappedCache<void> {};
|
||||||
|
|
||||||
|
|
||||||
|
/// This class is designed to provide the functionality that is required for
|
||||||
|
/// supporting nullable keys in HashMethodKeysFixed. If there are
|
||||||
|
/// no nullable keys, this class is merely implemented as an empty shell.
|
||||||
|
template <typename Key, bool has_nullable_keys>
|
||||||
|
class BaseStateKeysFixed;
|
||||||
|
|
||||||
|
/// Case where nullable keys are supported.
|
||||||
|
template <typename Key>
|
||||||
|
class BaseStateKeysFixed<Key, true>
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
BaseStateKeysFixed(const ColumnRawPtrs & key_columns)
|
||||||
|
{
|
||||||
|
null_maps.reserve(key_columns.size());
|
||||||
|
actual_columns.reserve(key_columns.size());
|
||||||
|
|
||||||
|
for (const auto & col : key_columns)
|
||||||
|
{
|
||||||
|
if (col->isColumnNullable())
|
||||||
|
{
|
||||||
|
const auto & nullable_col = static_cast<const ColumnNullable &>(*col);
|
||||||
|
actual_columns.push_back(&nullable_col.getNestedColumn());
|
||||||
|
null_maps.push_back(&nullable_col.getNullMapColumn());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
actual_columns.push_back(col);
|
||||||
|
null_maps.push_back(nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the columns which actually contain the values of the keys.
|
||||||
|
/// For a given key column, if it is nullable, we return its nested
|
||||||
|
/// column. Otherwise we return the key column itself.
|
||||||
|
inline const ColumnRawPtrs & getActualColumns() const
|
||||||
|
{
|
||||||
|
return actual_columns;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a bitmap that indicates whether, for a particular row,
|
||||||
|
/// a key column bears a null value or not.
|
||||||
|
KeysNullMap<Key> createBitmap(size_t row) const
|
||||||
|
{
|
||||||
|
KeysNullMap<Key> bitmap{};
|
||||||
|
|
||||||
|
for (size_t k = 0; k < null_maps.size(); ++k)
|
||||||
|
{
|
||||||
|
if (null_maps[k] != nullptr)
|
||||||
|
{
|
||||||
|
const auto & null_map = static_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
|
||||||
|
if (null_map[row] == 1)
|
||||||
|
{
|
||||||
|
size_t bucket = k / 8;
|
||||||
|
size_t offset = k % 8;
|
||||||
|
bitmap[bucket] |= UInt8(1) << offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bitmap;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ColumnRawPtrs actual_columns;
|
||||||
|
ColumnRawPtrs null_maps;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Case where nullable keys are not supported.
|
||||||
|
template <typename Key>
|
||||||
|
class BaseStateKeysFixed<Key, false>
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {}
|
||||||
|
|
||||||
|
const ColumnRawPtrs & getActualColumns() const { return actual_columns; }
|
||||||
|
|
||||||
|
KeysNullMap<Key> createBitmap(size_t) const
|
||||||
|
{
|
||||||
|
throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
|
||||||
|
" is forbidden", ErrorCodes::LOGICAL_ERROR};
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ColumnRawPtrs actual_columns;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -69,7 +69,7 @@ public:
|
|||||||
static void finalizePerformanceCounters();
|
static void finalizePerformanceCounters();
|
||||||
|
|
||||||
/// Returns a non-empty string if the thread is attached to a query
|
/// Returns a non-empty string if the thread is attached to a query
|
||||||
static std::string getCurrentQueryID();
|
static const std::string & getQueryId();
|
||||||
|
|
||||||
/// Non-master threads call this method in destructor automatically
|
/// Non-master threads call this method in destructor automatically
|
||||||
static void detachQuery();
|
static void detachQuery();
|
||||||
|
@ -414,6 +414,9 @@ namespace ErrorCodes
|
|||||||
extern const int PROTOBUF_FIELD_NOT_REPEATED = 437;
|
extern const int PROTOBUF_FIELD_NOT_REPEATED = 437;
|
||||||
extern const int DATA_TYPE_CANNOT_BE_PROMOTED = 438;
|
extern const int DATA_TYPE_CANNOT_BE_PROMOTED = 438;
|
||||||
extern const int CANNOT_SCHEDULE_TASK = 439;
|
extern const int CANNOT_SCHEDULE_TASK = 439;
|
||||||
|
extern const int INVALID_LIMIT_EXPRESSION = 440;
|
||||||
|
extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING = 441;
|
||||||
|
extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE = 442;
|
||||||
|
|
||||||
extern const int KEEPER_EXCEPTION = 999;
|
extern const int KEEPER_EXCEPTION = 999;
|
||||||
extern const int POCO_EXCEPTION = 1000;
|
extern const int POCO_EXCEPTION = 1000;
|
||||||
|
@ -38,10 +38,10 @@ std::string errnoToString(int code, int e)
|
|||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
std::string tmp = std::to_string(code);
|
std::string tmp = std::to_string(code);
|
||||||
const char * code = tmp.c_str();
|
const char * code_str = tmp.c_str();
|
||||||
const char * unknown_message = "Unknown error ";
|
const char * unknown_message = "Unknown error ";
|
||||||
strcpy(buf, unknown_message);
|
strcpy(buf, unknown_message);
|
||||||
strcpy(buf + strlen(unknown_message), code);
|
strcpy(buf + strlen(unknown_message), code_str);
|
||||||
}
|
}
|
||||||
return "errno: " + toString(e) + ", strerror: " + std::string(buf);
|
return "errno: " + toString(e) + ", strerror: " + std::string(buf);
|
||||||
#else
|
#else
|
||||||
@ -88,7 +88,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
|
stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
|
||||||
<< ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what();
|
<< ", e.displayText() = " << e.displayText();
|
||||||
}
|
}
|
||||||
catch (...) {}
|
catch (...) {}
|
||||||
}
|
}
|
||||||
@ -202,7 +202,7 @@ std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
stream << "Code: " << e.code() << ", e.displayText() = " << text << ", e.what() = " << e.what();
|
stream << "Code: " << e.code() << ", e.displayText() = " << text;
|
||||||
|
|
||||||
if (with_stacktrace && !has_embedded_stack_trace)
|
if (with_stacktrace && !has_embedded_stack_trace)
|
||||||
stream << ", Stack trace:\n\n" << e.getStackTrace().toString();
|
stream << ", Stack trace:\n\n" << e.getStackTrace().toString();
|
||||||
|
@ -33,6 +33,7 @@ public:
|
|||||||
Exception * clone() const override { return new Exception(*this); }
|
Exception * clone() const override { return new Exception(*this); }
|
||||||
void rethrow() const override { throw *this; }
|
void rethrow() const override { throw *this; }
|
||||||
const char * name() const throw() override { return "DB::Exception"; }
|
const char * name() const throw() override { return "DB::Exception"; }
|
||||||
|
const char * what() const throw() override { return message().data(); }
|
||||||
|
|
||||||
/// Add something to the existing message.
|
/// Add something to the existing message.
|
||||||
void addMessage(const std::string & arg) { extendedMessage(arg); }
|
void addMessage(const std::string & arg) { extendedMessage(arg); }
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
|
#include <Common/NamePrompter.h>
|
||||||
#include <Core/Types.h>
|
#include <Core/Types.h>
|
||||||
#include <Poco/String.h>
|
#include <Poco/String.h>
|
||||||
|
|
||||||
@ -105,6 +106,12 @@ public:
|
|||||||
return aliases.count(name) || case_insensitive_aliases.count(name);
|
return aliases.count(name) || case_insensitive_aliases.count(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<String> getHints(const String & name) const
|
||||||
|
{
|
||||||
|
static const auto registered_names = getAllRegisteredNames();
|
||||||
|
return prompter.getHints(name, registered_names);
|
||||||
|
}
|
||||||
|
|
||||||
virtual ~IFactoryWithAliases() {}
|
virtual ~IFactoryWithAliases() {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -120,6 +127,13 @@ private:
|
|||||||
|
|
||||||
/// Case insensitive aliases
|
/// Case insensitive aliases
|
||||||
AliasMap case_insensitive_aliases;
|
AliasMap case_insensitive_aliases;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* prompter for names, if a person makes a typo for some function or type, it
|
||||||
|
* helps to find best possible match (in particular, edit distance is done like in clang
|
||||||
|
* (max edit distance is (typo.size() + 2) / 3)
|
||||||
|
*/
|
||||||
|
NamePrompter</*MaxNumHints=*/2> prompter;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
92
dbms/src/Common/NamePrompter.h
Normal file
92
dbms/src/Common/NamePrompter.h
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Core/Types.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cctype>
|
||||||
|
#include <cmath>
|
||||||
|
#include <queue>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
template <size_t MaxNumHints>
|
||||||
|
class NamePrompter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using DistanceIndex = std::pair<size_t, size_t>;
|
||||||
|
using DistanceIndexQueue = std::priority_queue<DistanceIndex>;
|
||||||
|
|
||||||
|
static std::vector<String> getHints(const String & name, const std::vector<String> & prompting_strings)
|
||||||
|
{
|
||||||
|
DistanceIndexQueue queue;
|
||||||
|
for (size_t i = 0; i < prompting_strings.size(); ++i)
|
||||||
|
appendToQueue(i, name, queue, prompting_strings);
|
||||||
|
return release(queue, prompting_strings);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static size_t levenshteinDistance(const String & lhs, const String & rhs)
|
||||||
|
{
|
||||||
|
size_t n = lhs.size();
|
||||||
|
size_t m = rhs.size();
|
||||||
|
std::vector<std::vector<size_t>> dp(n + 1, std::vector<size_t>(m + 1));
|
||||||
|
|
||||||
|
for (size_t i = 1; i <= n; ++i)
|
||||||
|
dp[i][0] = i;
|
||||||
|
|
||||||
|
for (size_t i = 1; i <= m; ++i)
|
||||||
|
dp[0][i] = i;
|
||||||
|
|
||||||
|
for (size_t j = 1; j <= m; ++j)
|
||||||
|
{
|
||||||
|
for (size_t i = 1; i <= n; ++i)
|
||||||
|
{
|
||||||
|
if (std::tolower(lhs[i - 1]) == std::tolower(rhs[j - 1]))
|
||||||
|
dp[i][j] = dp[i - 1][j - 1];
|
||||||
|
else
|
||||||
|
dp[i][j] = std::min(dp[i - 1][j] + 1, std::min(dp[i][j - 1] + 1, dp[i - 1][j - 1] + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dp[n][m];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
|
||||||
|
{
|
||||||
|
const String & prompt = prompting_strings[ind];
|
||||||
|
|
||||||
|
/// Clang SimpleTypoCorrector logic
|
||||||
|
const size_t min_possible_edit_distance = std::abs(static_cast<int64_t>(name.size()) - static_cast<int64_t>(prompt.size()));
|
||||||
|
const size_t mistake_factor = (name.size() + 2) / 3;
|
||||||
|
if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size())
|
||||||
|
{
|
||||||
|
size_t distance = levenshteinDistance(prompt, name);
|
||||||
|
if (distance <= mistake_factor)
|
||||||
|
{
|
||||||
|
queue.emplace(distance, ind);
|
||||||
|
if (queue.size() > MaxNumHints)
|
||||||
|
queue.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<String> release(DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
|
||||||
|
{
|
||||||
|
std::vector<String> ans;
|
||||||
|
ans.reserve(queue.size());
|
||||||
|
while (!queue.empty())
|
||||||
|
{
|
||||||
|
auto top = queue.top();
|
||||||
|
queue.pop();
|
||||||
|
ans.push_back(prompting_strings[top.second]);
|
||||||
|
}
|
||||||
|
std::reverse(ans.begin(), ans.end());
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -17,6 +17,7 @@
|
|||||||
#include <common/unaligned.h>
|
#include <common/unaligned.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
#include <Core/Defines.h>
|
||||||
|
|
||||||
#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))
|
#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))
|
||||||
|
|
||||||
@ -49,7 +50,7 @@ private:
|
|||||||
UInt8 current_bytes[8];
|
UInt8 current_bytes[8];
|
||||||
};
|
};
|
||||||
|
|
||||||
void finalize()
|
ALWAYS_INLINE void finalize()
|
||||||
{
|
{
|
||||||
/// In the last free byte, we write the remainder of the division by 256.
|
/// In the last free byte, we write the remainder of the division by 256.
|
||||||
current_bytes[7] = cnt;
|
current_bytes[7] = cnt;
|
||||||
@ -156,7 +157,7 @@ public:
|
|||||||
|
|
||||||
/// template for avoiding 'unsigned long long' vs 'unsigned long' problem on old poco in macos
|
/// template for avoiding 'unsigned long long' vs 'unsigned long' problem on old poco in macos
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void get128(T & lo, T & hi)
|
ALWAYS_INLINE void get128(T & lo, T & hi)
|
||||||
{
|
{
|
||||||
static_assert(sizeof(T) == 8);
|
static_assert(sizeof(T) == 8);
|
||||||
finalize();
|
finalize();
|
||||||
@ -199,8 +200,6 @@ std::enable_if_t<std::/*has_unique_object_representations_v*/is_standard_layout_
|
|||||||
return hash.get64();
|
return hash.get64();
|
||||||
}
|
}
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
inline UInt64 sipHash64(const std::string & s)
|
inline UInt64 sipHash64(const std::string & s)
|
||||||
{
|
{
|
||||||
return sipHash64(s.data(), s.size());
|
return sipHash64(s.data(), s.size());
|
||||||
|
@ -21,7 +21,7 @@ namespace ErrorCodes
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
thread_local ThreadStatusPtr current_thread = nullptr;
|
thread_local ThreadStatus * current_thread = nullptr;
|
||||||
|
|
||||||
|
|
||||||
TasksStatsCounters TasksStatsCounters::current()
|
TasksStatsCounters TasksStatsCounters::current()
|
||||||
@ -124,7 +124,7 @@ void ThreadStatus::attachInternalTextLogsQueue(const InternalTextLogsQueuePtr &
|
|||||||
if (!thread_group)
|
if (!thread_group)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
std::unique_lock lock(thread_group->mutex);
|
std::lock_guard lock(thread_group->mutex);
|
||||||
thread_group->logs_queue_ptr = logs_queue;
|
thread_group->logs_queue_ptr = logs_queue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,7 +25,6 @@ namespace DB
|
|||||||
class Context;
|
class Context;
|
||||||
class QueryStatus;
|
class QueryStatus;
|
||||||
class ThreadStatus;
|
class ThreadStatus;
|
||||||
using ThreadStatusPtr = ThreadStatus*;
|
|
||||||
class QueryThreadLog;
|
class QueryThreadLog;
|
||||||
struct TasksStatsCounters;
|
struct TasksStatsCounters;
|
||||||
struct RUsageCounters;
|
struct RUsageCounters;
|
||||||
@ -46,7 +45,7 @@ using InternalTextLogsQueueWeakPtr = std::weak_ptr<InternalTextLogsQueue>;
|
|||||||
class ThreadGroupStatus
|
class ThreadGroupStatus
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
mutable std::shared_mutex mutex;
|
mutable std::mutex mutex;
|
||||||
|
|
||||||
ProfileEvents::Counters performance_counters{VariableContext::Process};
|
ProfileEvents::Counters performance_counters{VariableContext::Process};
|
||||||
MemoryTracker memory_tracker{VariableContext::Process};
|
MemoryTracker memory_tracker{VariableContext::Process};
|
||||||
@ -56,12 +55,11 @@ public:
|
|||||||
|
|
||||||
InternalTextLogsQueueWeakPtr logs_queue_ptr;
|
InternalTextLogsQueueWeakPtr logs_queue_ptr;
|
||||||
|
|
||||||
/// Key is Poco's thread_id
|
std::vector<UInt32> thread_numbers;
|
||||||
using QueryThreadStatuses = std::map<UInt32, ThreadStatusPtr>;
|
|
||||||
QueryThreadStatuses thread_statuses;
|
|
||||||
|
|
||||||
/// The first thread created this thread group
|
/// The first thread created this thread group
|
||||||
ThreadStatusPtr master_thread;
|
UInt32 master_thread_number = 0;
|
||||||
|
Int32 master_thread_os_id = -1;
|
||||||
|
|
||||||
String query;
|
String query;
|
||||||
};
|
};
|
||||||
@ -69,7 +67,7 @@ public:
|
|||||||
using ThreadGroupStatusPtr = std::shared_ptr<ThreadGroupStatus>;
|
using ThreadGroupStatusPtr = std::shared_ptr<ThreadGroupStatus>;
|
||||||
|
|
||||||
|
|
||||||
extern thread_local ThreadStatusPtr current_thread;
|
extern thread_local ThreadStatus * current_thread;
|
||||||
|
|
||||||
/** Encapsulates all per-thread info (ProfileEvents, MemoryTracker, query_id, query context, etc.).
|
/** Encapsulates all per-thread info (ProfileEvents, MemoryTracker, query_id, query context, etc.).
|
||||||
* The object must be created in thread function and destroyed in the same thread before the exit.
|
* The object must be created in thread function and destroyed in the same thread before the exit.
|
||||||
@ -116,7 +114,7 @@ public:
|
|||||||
return thread_state.load(std::memory_order_relaxed);
|
return thread_state.load(std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
String getQueryID();
|
const std::string & getQueryId() const;
|
||||||
|
|
||||||
/// Starts new query and create new thread group for it, current thread becomes master thread of the query
|
/// Starts new query and create new thread group for it, current thread becomes master thread of the query
|
||||||
void initializeQuery();
|
void initializeQuery();
|
||||||
@ -160,6 +158,8 @@ protected:
|
|||||||
/// Use it only from current thread
|
/// Use it only from current thread
|
||||||
Context * query_context = nullptr;
|
Context * query_context = nullptr;
|
||||||
|
|
||||||
|
String query_id;
|
||||||
|
|
||||||
/// A logs queue used by TCPHandler to pass logs to a client
|
/// A logs queue used by TCPHandler to pass logs to a client
|
||||||
InternalTextLogsQueueWeakPtr logs_queue_ptr;
|
InternalTextLogsQueueWeakPtr logs_queue_ptr;
|
||||||
|
|
||||||
|
@ -108,7 +108,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
const size_t max_speed = 0;
|
const size_t max_speed = 0;
|
||||||
const size_t limit = 0; /// 0 - not limited.
|
const UInt64 limit = 0; /// 0 - not limited.
|
||||||
const char * limit_exceeded_exception_message = nullptr;
|
const char * limit_exceeded_exception_message = nullptr;
|
||||||
Stopwatch watch {CLOCK_MONOTONIC_COARSE};
|
Stopwatch watch {CLOCK_MONOTONIC_COARSE};
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
|
@ -33,7 +33,7 @@ class IXDBCBridgeHelper
|
|||||||
public:
|
public:
|
||||||
static constexpr inline auto DEFAULT_FORMAT = "RowBinary";
|
static constexpr inline auto DEFAULT_FORMAT = "RowBinary";
|
||||||
|
|
||||||
virtual std::vector<std::pair<std::string, std::string>> getURLParams(const std::string & cols, size_t max_block_size) const = 0;
|
virtual std::vector<std::pair<std::string, std::string>> getURLParams(const std::string & cols, UInt64 max_block_size) const = 0;
|
||||||
virtual void startBridgeSync() const = 0;
|
virtual void startBridgeSync() const = 0;
|
||||||
virtual Poco::URI getMainURI() const = 0;
|
virtual Poco::URI getMainURI() const = 0;
|
||||||
virtual Poco::URI getColumnsInfoURI() const = 0;
|
virtual Poco::URI getColumnsInfoURI() const = 0;
|
||||||
@ -127,7 +127,7 @@ public:
|
|||||||
/**
|
/**
|
||||||
* @todo leaky abstraction - used by external API's
|
* @todo leaky abstraction - used by external API's
|
||||||
*/
|
*/
|
||||||
std::vector<std::pair<std::string, std::string>> getURLParams(const std::string & cols, size_t max_block_size) const override
|
std::vector<std::pair<std::string, std::string>> getURLParams(const std::string & cols, UInt64 max_block_size) const override
|
||||||
{
|
{
|
||||||
std::vector<std::pair<std::string, std::string>> result;
|
std::vector<std::pair<std::string, std::string>> result;
|
||||||
|
|
||||||
|
@ -1,12 +1,44 @@
|
|||||||
#include <Common/formatIPv6.h>
|
#include <Common/formatIPv6.h>
|
||||||
#include <Common/hex.h>
|
#include <Common/hex.h>
|
||||||
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
|
|
||||||
#include <ext/range.h>
|
#include <ext/range.h>
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
// To be used in formatIPv4, maps a byte to it's string form prefixed with length (so save strlen call).
|
||||||
|
extern const char one_byte_to_string_lookup_table[256][4] = {
|
||||||
|
{1, '0'}, {1, '1'}, {1, '2'}, {1, '3'}, {1, '4'}, {1, '5'}, {1, '6'}, {1, '7'}, {1, '8'}, {1, '9'},
|
||||||
|
{2, '1', '0'}, {2, '1', '1'}, {2, '1', '2'}, {2, '1', '3'}, {2, '1', '4'}, {2, '1', '5'}, {2, '1', '6'}, {2, '1', '7'}, {2, '1', '8'}, {2, '1', '9'},
|
||||||
|
{2, '2', '0'}, {2, '2', '1'}, {2, '2', '2'}, {2, '2', '3'}, {2, '2', '4'}, {2, '2', '5'}, {2, '2', '6'}, {2, '2', '7'}, {2, '2', '8'}, {2, '2', '9'},
|
||||||
|
{2, '3', '0'}, {2, '3', '1'}, {2, '3', '2'}, {2, '3', '3'}, {2, '3', '4'}, {2, '3', '5'}, {2, '3', '6'}, {2, '3', '7'}, {2, '3', '8'}, {2, '3', '9'},
|
||||||
|
{2, '4', '0'}, {2, '4', '1'}, {2, '4', '2'}, {2, '4', '3'}, {2, '4', '4'}, {2, '4', '5'}, {2, '4', '6'}, {2, '4', '7'}, {2, '4', '8'}, {2, '4', '9'},
|
||||||
|
{2, '5', '0'}, {2, '5', '1'}, {2, '5', '2'}, {2, '5', '3'}, {2, '5', '4'}, {2, '5', '5'}, {2, '5', '6'}, {2, '5', '7'}, {2, '5', '8'}, {2, '5', '9'},
|
||||||
|
{2, '6', '0'}, {2, '6', '1'}, {2, '6', '2'}, {2, '6', '3'}, {2, '6', '4'}, {2, '6', '5'}, {2, '6', '6'}, {2, '6', '7'}, {2, '6', '8'}, {2, '6', '9'},
|
||||||
|
{2, '7', '0'}, {2, '7', '1'}, {2, '7', '2'}, {2, '7', '3'}, {2, '7', '4'}, {2, '7', '5'}, {2, '7', '6'}, {2, '7', '7'}, {2, '7', '8'}, {2, '7', '9'},
|
||||||
|
{2, '8', '0'}, {2, '8', '1'}, {2, '8', '2'}, {2, '8', '3'}, {2, '8', '4'}, {2, '8', '5'}, {2, '8', '6'}, {2, '8', '7'}, {2, '8', '8'}, {2, '8', '9'},
|
||||||
|
{2, '9', '0'}, {2, '9', '1'}, {2, '9', '2'}, {2, '9', '3'}, {2, '9', '4'}, {2, '9', '5'}, {2, '9', '6'}, {2, '9', '7'}, {2, '9', '8'}, {2, '9', '9'},
|
||||||
|
{3, '1', '0', '0'}, {3, '1', '0', '1'}, {3, '1', '0', '2'}, {3, '1', '0', '3'}, {3, '1', '0', '4'}, {3, '1', '0', '5'}, {3, '1', '0', '6'}, {3, '1', '0', '7'}, {3, '1', '0', '8'}, {3, '1', '0', '9'},
|
||||||
|
{3, '1', '1', '0'}, {3, '1', '1', '1'}, {3, '1', '1', '2'}, {3, '1', '1', '3'}, {3, '1', '1', '4'}, {3, '1', '1', '5'}, {3, '1', '1', '6'}, {3, '1', '1', '7'}, {3, '1', '1', '8'}, {3, '1', '1', '9'},
|
||||||
|
{3, '1', '2', '0'}, {3, '1', '2', '1'}, {3, '1', '2', '2'}, {3, '1', '2', '3'}, {3, '1', '2', '4'}, {3, '1', '2', '5'}, {3, '1', '2', '6'}, {3, '1', '2', '7'}, {3, '1', '2', '8'}, {3, '1', '2', '9'},
|
||||||
|
{3, '1', '3', '0'}, {3, '1', '3', '1'}, {3, '1', '3', '2'}, {3, '1', '3', '3'}, {3, '1', '3', '4'}, {3, '1', '3', '5'}, {3, '1', '3', '6'}, {3, '1', '3', '7'}, {3, '1', '3', '8'}, {3, '1', '3', '9'},
|
||||||
|
{3, '1', '4', '0'}, {3, '1', '4', '1'}, {3, '1', '4', '2'}, {3, '1', '4', '3'}, {3, '1', '4', '4'}, {3, '1', '4', '5'}, {3, '1', '4', '6'}, {3, '1', '4', '7'}, {3, '1', '4', '8'}, {3, '1', '4', '9'},
|
||||||
|
{3, '1', '5', '0'}, {3, '1', '5', '1'}, {3, '1', '5', '2'}, {3, '1', '5', '3'}, {3, '1', '5', '4'}, {3, '1', '5', '5'}, {3, '1', '5', '6'}, {3, '1', '5', '7'}, {3, '1', '5', '8'}, {3, '1', '5', '9'},
|
||||||
|
{3, '1', '6', '0'}, {3, '1', '6', '1'}, {3, '1', '6', '2'}, {3, '1', '6', '3'}, {3, '1', '6', '4'}, {3, '1', '6', '5'}, {3, '1', '6', '6'}, {3, '1', '6', '7'}, {3, '1', '6', '8'}, {3, '1', '6', '9'},
|
||||||
|
{3, '1', '7', '0'}, {3, '1', '7', '1'}, {3, '1', '7', '2'}, {3, '1', '7', '3'}, {3, '1', '7', '4'}, {3, '1', '7', '5'}, {3, '1', '7', '6'}, {3, '1', '7', '7'}, {3, '1', '7', '8'}, {3, '1', '7', '9'},
|
||||||
|
{3, '1', '8', '0'}, {3, '1', '8', '1'}, {3, '1', '8', '2'}, {3, '1', '8', '3'}, {3, '1', '8', '4'}, {3, '1', '8', '5'}, {3, '1', '8', '6'}, {3, '1', '8', '7'}, {3, '1', '8', '8'}, {3, '1', '8', '9'},
|
||||||
|
{3, '1', '9', '0'}, {3, '1', '9', '1'}, {3, '1', '9', '2'}, {3, '1', '9', '3'}, {3, '1', '9', '4'}, {3, '1', '9', '5'}, {3, '1', '9', '6'}, {3, '1', '9', '7'}, {3, '1', '9', '8'}, {3, '1', '9', '9'},
|
||||||
|
{3, '2', '0', '0'}, {3, '2', '0', '1'}, {3, '2', '0', '2'}, {3, '2', '0', '3'}, {3, '2', '0', '4'}, {3, '2', '0', '5'}, {3, '2', '0', '6'}, {3, '2', '0', '7'}, {3, '2', '0', '8'}, {3, '2', '0', '9'},
|
||||||
|
{3, '2', '1', '0'}, {3, '2', '1', '1'}, {3, '2', '1', '2'}, {3, '2', '1', '3'}, {3, '2', '1', '4'}, {3, '2', '1', '5'}, {3, '2', '1', '6'}, {3, '2', '1', '7'}, {3, '2', '1', '8'}, {3, '2', '1', '9'},
|
||||||
|
{3, '2', '2', '0'}, {3, '2', '2', '1'}, {3, '2', '2', '2'}, {3, '2', '2', '3'}, {3, '2', '2', '4'}, {3, '2', '2', '5'}, {3, '2', '2', '6'}, {3, '2', '2', '7'}, {3, '2', '2', '8'}, {3, '2', '2', '9'},
|
||||||
|
{3, '2', '3', '0'}, {3, '2', '3', '1'}, {3, '2', '3', '2'}, {3, '2', '3', '3'}, {3, '2', '3', '4'}, {3, '2', '3', '5'}, {3, '2', '3', '6'}, {3, '2', '3', '7'}, {3, '2', '3', '8'}, {3, '2', '3', '9'},
|
||||||
|
{3, '2', '4', '0'}, {3, '2', '4', '1'}, {3, '2', '4', '2'}, {3, '2', '4', '3'}, {3, '2', '4', '4'}, {3, '2', '4', '5'}, {3, '2', '4', '6'}, {3, '2', '4', '7'}, {3, '2', '4', '8'}, {3, '2', '4', '9'},
|
||||||
|
{3, '2', '5', '0'}, {3, '2', '5', '1'}, {3, '2', '5', '2'}, {3, '2', '5', '3'}, {3, '2', '5', '4'}, {3, '2', '5', '5'},
|
||||||
|
};
|
||||||
|
|
||||||
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
|
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
|
||||||
static constexpr UInt32 intLog(const UInt32 value, const UInt32 base, const bool carry)
|
static constexpr UInt32 intLog(const UInt32 value, const UInt32 base, const bool carry)
|
||||||
{
|
{
|
||||||
@ -45,22 +77,6 @@ static void printInteger(char *& out, T value)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// print IPv4 address as %u.%u.%u.%u
|
|
||||||
static void formatIPv4(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
|
|
||||||
{
|
|
||||||
const auto limit = IPV4_BINARY_LENGTH - zeroed_tail_bytes_count;
|
|
||||||
|
|
||||||
for (const auto i : ext::range(0, IPV4_BINARY_LENGTH))
|
|
||||||
{
|
|
||||||
UInt8 byte = (i < limit) ? src[i] : 0;
|
|
||||||
printInteger<10, UInt8>(dst, byte);
|
|
||||||
|
|
||||||
if (i != IPV4_BINARY_LENGTH - 1)
|
|
||||||
*dst++ = '.';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
|
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
|
||||||
{
|
{
|
||||||
struct { int base, len; } best{-1, 0}, cur{-1, 0};
|
struct { int base, len; } best{-1, 0}, cur{-1, 0};
|
||||||
@ -122,8 +138,14 @@ void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_
|
|||||||
/// Is this address an encapsulated IPv4?
|
/// Is this address an encapsulated IPv4?
|
||||||
if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
|
if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
|
||||||
{
|
{
|
||||||
formatIPv4(src + 12, dst, std::min(zeroed_tail_bytes_count, static_cast<UInt8>(IPV4_BINARY_LENGTH)));
|
UInt8 ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
|
||||||
break;
|
memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
|
||||||
|
// Due to historical reasons formatIPv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
|
||||||
|
std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
|
||||||
|
|
||||||
|
formatIPv4(ipv4_buffer, dst, std::min(zeroed_tail_bytes_count, static_cast<UInt8>(IPV4_BINARY_LENGTH)), "0");
|
||||||
|
// formatIPv4 has already added a null-terminator for us.
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
printInteger<16>(dst, words[i]);
|
printInteger<16>(dst, words[i]);
|
||||||
|
@ -1,12 +1,17 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <common/Types.h>
|
#include <common/Types.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <utility>
|
||||||
|
#include <ext/range.h>
|
||||||
|
#include <Common/hex.h>
|
||||||
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
|
|
||||||
#define IPV4_BINARY_LENGTH 4
|
constexpr size_t IPV4_BINARY_LENGTH = 4;
|
||||||
#define IPV6_BINARY_LENGTH 16
|
constexpr size_t IPV6_BINARY_LENGTH = 16;
|
||||||
#define IPV4_MAX_TEXT_LENGTH 15 /// Does not count tail zero byte.
|
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
|
||||||
#define IPV6_MAX_TEXT_LENGTH 39
|
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -18,4 +23,205 @@ namespace DB
|
|||||||
*/
|
*/
|
||||||
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0);
|
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0);
|
||||||
|
|
||||||
|
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
|
||||||
|
*
|
||||||
|
* Parses the input string `src` and stores binary BE value into buffer pointed by `dst`,
|
||||||
|
* which should be long enough.
|
||||||
|
* That is "127.0.0.1" becomes 0x7f000001.
|
||||||
|
*
|
||||||
|
* In case of failure returns false and doesn't modify buffer pointed by `dst`.
|
||||||
|
*
|
||||||
|
* @param src - input string, expected to be non-null and null-terminated right after the IPv4 string value.
|
||||||
|
* @param dst - where to put output bytes, expected to be non-null and atleast IPV4_BINARY_LENGTH-long.
|
||||||
|
* @return false if parsing failed, true otherwise.
|
||||||
|
*/
|
||||||
|
inline bool parseIPv4(const char * src, unsigned char * dst)
|
||||||
|
{
|
||||||
|
UInt32 result = 0;
|
||||||
|
for (int offset = 24; offset >= 0; offset -= 8)
|
||||||
|
{
|
||||||
|
UInt32 value = 0;
|
||||||
|
size_t len = 0;
|
||||||
|
while (isNumericASCII(*src) && len <= 3)
|
||||||
|
{
|
||||||
|
value = value * 10 + (*src - '0');
|
||||||
|
++len;
|
||||||
|
++src;
|
||||||
|
}
|
||||||
|
if (len == 0 || value > 255 || (offset > 0 && *src != '.'))
|
||||||
|
return false;
|
||||||
|
result |= value << offset;
|
||||||
|
++src;
|
||||||
|
}
|
||||||
|
if (*(src - 1) != '\0')
|
||||||
|
return false;
|
||||||
|
|
||||||
|
memcpy(dst, &result, sizeof(result));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
|
||||||
|
*
|
||||||
|
* Slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
|
||||||
|
* Parses the input string `src` and stores binary LE value into buffer pointed by `dst`,
|
||||||
|
* which should be long enough. In case of failure zeroes
|
||||||
|
* IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
|
||||||
|
*
|
||||||
|
* @param src - input string, expected to be non-null and null-terminated right after the IPv6 string value.
|
||||||
|
* @param dst - where to put output bytes, expected to be non-null and atleast IPV6_BINARY_LENGTH-long.
|
||||||
|
* @return false if parsing failed, true otherwise.
|
||||||
|
*/
|
||||||
|
inline bool parseIPv6(const char * src, unsigned char * dst)
|
||||||
|
{
|
||||||
|
const auto clear_dst = [dst]()
|
||||||
|
{
|
||||||
|
memset(dst, '\0', IPV6_BINARY_LENGTH);
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Leading :: requires some special handling.
|
||||||
|
if (*src == ':')
|
||||||
|
if (*++src != ':')
|
||||||
|
return clear_dst();
|
||||||
|
|
||||||
|
unsigned char tmp[IPV6_BINARY_LENGTH]{};
|
||||||
|
auto tp = tmp;
|
||||||
|
auto endp = tp + IPV6_BINARY_LENGTH;
|
||||||
|
auto curtok = src;
|
||||||
|
auto saw_xdigit = false;
|
||||||
|
UInt32 val{};
|
||||||
|
unsigned char * colonp = nullptr;
|
||||||
|
|
||||||
|
/// Assuming zero-terminated string.
|
||||||
|
while (const auto ch = *src++)
|
||||||
|
{
|
||||||
|
const auto num = unhex(ch);
|
||||||
|
|
||||||
|
if (num != '\xff')
|
||||||
|
{
|
||||||
|
val <<= 4;
|
||||||
|
val |= num;
|
||||||
|
if (val > 0xffffu)
|
||||||
|
return clear_dst();
|
||||||
|
|
||||||
|
saw_xdigit = 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch == ':')
|
||||||
|
{
|
||||||
|
curtok = src;
|
||||||
|
if (!saw_xdigit)
|
||||||
|
{
|
||||||
|
if (colonp)
|
||||||
|
return clear_dst();
|
||||||
|
|
||||||
|
colonp = tp;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tp + sizeof(UInt16) > endp)
|
||||||
|
return clear_dst();
|
||||||
|
|
||||||
|
*tp++ = static_cast<unsigned char>((val >> 8) & 0xffu);
|
||||||
|
*tp++ = static_cast<unsigned char>(val & 0xffu);
|
||||||
|
saw_xdigit = false;
|
||||||
|
val = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ch == '.' && (tp + IPV4_BINARY_LENGTH) <= endp)
|
||||||
|
{
|
||||||
|
if (!parseIPv4(curtok, tp))
|
||||||
|
return clear_dst();
|
||||||
|
std::reverse(tp, tp + IPV4_BINARY_LENGTH);
|
||||||
|
|
||||||
|
tp += IPV4_BINARY_LENGTH;
|
||||||
|
saw_xdigit = false;
|
||||||
|
break; /* '\0' was seen by ipv4_scan(). */
|
||||||
|
}
|
||||||
|
|
||||||
|
return clear_dst();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (saw_xdigit)
|
||||||
|
{
|
||||||
|
if (tp + sizeof(UInt16) > endp)
|
||||||
|
return clear_dst();
|
||||||
|
|
||||||
|
*tp++ = static_cast<unsigned char>((val >> 8) & 0xffu);
|
||||||
|
*tp++ = static_cast<unsigned char>(val & 0xffu);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (colonp)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Since some memmove()'s erroneously fail to handle
|
||||||
|
* overlapping regions, we'll do the shift by hand.
|
||||||
|
*/
|
||||||
|
const auto n = tp - colonp;
|
||||||
|
|
||||||
|
for (int i = 1; i <= n; ++i)
|
||||||
|
{
|
||||||
|
endp[- i] = colonp[n - i];
|
||||||
|
colonp[n - i] = 0;
|
||||||
|
}
|
||||||
|
tp = endp;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tp != endp)
|
||||||
|
return clear_dst();
|
||||||
|
|
||||||
|
memcpy(dst, tmp, sizeof(tmp));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
|
||||||
|
* expects inout to be in BE-format, that is 0x7f000001 => "127.0.0.1".
|
||||||
|
*
|
||||||
|
* Any number of the tail bytes can be masked with given mask string.
|
||||||
|
*
|
||||||
|
* Assumptions:
|
||||||
|
* src is IPV4_BINARY_LENGTH long,
|
||||||
|
* dst is IPV4_MAX_TEXT_LENGTH long,
|
||||||
|
* mask_tail_octets <= IPV4_BINARY_LENGTH
|
||||||
|
* mask_string is NON-NULL, if mask_tail_octets > 0.
|
||||||
|
*
|
||||||
|
* Examples:
|
||||||
|
* formatIPv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
|
||||||
|
* > dst == "127.0.0.1"
|
||||||
|
* formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
|
||||||
|
* > dst == "127.0.0.xxx"
|
||||||
|
* formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
|
||||||
|
* > dst == "127.0.0.0"
|
||||||
|
*/
|
||||||
|
inline void formatIPv4(const unsigned char * src, char *& dst, UInt8 mask_tail_octets = 0, const char * mask_string = "xxx")
|
||||||
|
{
|
||||||
|
extern const char one_byte_to_string_lookup_table[256][4];
|
||||||
|
|
||||||
|
const size_t mask_length = mask_string ? strlen(mask_string) : 0;
|
||||||
|
const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
|
||||||
|
for (size_t octet = 0; octet < limit; ++octet)
|
||||||
|
{
|
||||||
|
const UInt8 value = static_cast<UInt8>(src[IPV4_BINARY_LENGTH - octet - 1]);
|
||||||
|
auto rep = one_byte_to_string_lookup_table[value];
|
||||||
|
const UInt8 len = rep[0];
|
||||||
|
const char* str = rep + 1;
|
||||||
|
|
||||||
|
memcpy(dst, str, len);
|
||||||
|
dst += len;
|
||||||
|
*dst++ = '.';
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t mask = 0; mask < mask_tail_octets; ++mask)
|
||||||
|
{
|
||||||
|
memcpy(dst, mask_string, mask_length);
|
||||||
|
dst += mask_length;
|
||||||
|
|
||||||
|
*dst++ = '.';
|
||||||
|
}
|
||||||
|
|
||||||
|
dst[-1] = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,17 +1,18 @@
|
|||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
#if defined(__APPLE__)
|
#if defined(__APPLE__)
|
||||||
#include <pthread.h>
|
|
||||||
#elif defined(__FreeBSD__)
|
#elif defined(__FreeBSD__)
|
||||||
#include <pthread.h>
|
#include <pthread_np.h>
|
||||||
#include <pthread_np.h>
|
|
||||||
#else
|
#else
|
||||||
#include <sys/prctl.h>
|
#include <sys/prctl.h>
|
||||||
#endif
|
#endif
|
||||||
#include <pthread.h>
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
#include <Common/setThreadName.h>
|
#include <Common/setThreadName.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
@ -23,6 +24,11 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
void setThreadName(const char * name)
|
void setThreadName(const char * name)
|
||||||
{
|
{
|
||||||
|
#ifndef NDEBUG
|
||||||
|
if (strlen(name) > 15)
|
||||||
|
throw DB::Exception("Thread name cannot be longer than 15 bytes", DB::ErrorCodes::PTHREAD_ERROR);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(__FreeBSD__)
|
#if defined(__FreeBSD__)
|
||||||
pthread_set_name_np(pthread_self(), name);
|
pthread_set_name_np(pthread_self(), name);
|
||||||
return;
|
return;
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user