rebase from master

This commit is contained in:
Guillaume Tassery 2019-11-07 08:38:55 +01:00
commit 5994d551a6
614 changed files with 16072 additions and 5062 deletions

1
.github/CODEOWNERS vendored
View File

@ -1,3 +1,4 @@
dbms/* @ClickHouse/core-assigner
utils/* @ClickHouse/core-assigner
docs/* @ClickHouse/docs
docs/zh/* @ClickHouse/docs-zh

View File

@ -1,8 +1,6 @@
I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla/?lang=en
For changelog. Remove if this is non-significant change.
Category (leave one):
Changelog category (leave one):
- New Feature
- Bug Fix
- Improvement
@ -11,11 +9,14 @@ Category (leave one):
- Build/Testing/Packaging Improvement
- Documentation
- Other
- Non-significant (changelog entry is not needed)
Short description (up to few sentences):
Changelog entry (up to few sentences, not needed for non-significant PRs):
...
Detailed description (optional):
...

View File

@ -1 +0,0 @@
pr-feature: "New Feature"

19
.github/labeler.yml vendored
View File

@ -1,19 +0,0 @@
# Documentation PRs
documentation:
- "**/*.md"
- "docs/**/*"
pr-documentation:
- "**/*.md"
- "docs/**/*"
# Component labels
comp-mutations:
- "**/*Mutation*"
comp-matview:
- "**/*MaterializedView*"
comp-skipidx:
- "**/*Indices*"
comp-kafka:
- "dbms/src/Storages/Kafka/**/*"
- "dbms/tests/integration/test_storage_kafka/**/*"
- "utils/kafka/**/*"

67
.github/stale.yml vendored
View File

@ -1,67 +0,0 @@
# Configuration for probot-stale - https://github.com/probot/stale
# Number of days of inactivity before an Issue or Pull Request becomes stale
daysUntilStale: 45
# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
daysUntilClose: 30
# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
onlyLabels: []
# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
exemptLabels:
- bug
- feature
- memory
- performance
- prio-crit
- prio-major
- st-accepted
- st-in-progress
- st-waiting-for-fix
# Set to true to ignore issues in a project (defaults to false)
exemptProjects: false
# Set to true to ignore issues in a milestone (defaults to false)
exemptMilestones: false
# Set to true to ignore issues with an assignee (defaults to false)
exemptAssignees: false
# Label to use when marking as stale
staleLabel: stale
# Comment to post when marking as stale. Set to `false` to disable
markComment: >
This issue has been automatically marked as stale because it has not had
recent activity. It will be closed if no further activity occurs. Thank you
for your contributions.
# Comment to post when removing the stale label.
# unmarkComment: >
# Your comment here.
# Comment to post when closing a stale Issue or Pull Request.
# closeComment: >
# Your comment here.
# Limit the number of actions per hour, from 1-30. Default is 30
limitPerRun: 30
# Limit to only `issues` or `pulls`
# only: issues
# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
pulls:
daysUntilStale: 365
markComment: >
This pull request has been automatically marked as stale because it has not had
any activity for over a year. It will be closed if no further activity occurs. Thank you
for your contributions.
# issues:
# exemptLabels:
# - confirmed

View File

@ -1,11 +0,0 @@
name: "Pull Request Labeler"
on:
pull_request
jobs:
by-filename:
runs-on: ubuntu-latest
steps:
- uses: "actions/labeler@v2"
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"

3
.gitignore vendored
View File

@ -245,3 +245,6 @@ website/package-lock.json
/.ccls-cache
/compile_commands.json
# Toolchains
/cmake/toolchain/*

View File

@ -1,3 +1,294 @@
## ClickHouse release v19.16.2.2, 2019-10-30
### Backward Incompatible Change
* Add missing arity validation for count/counIf.
[#7095](https://github.com/ClickHouse/ClickHouse/issues/7095)
[#7298](https://github.com/ClickHouse/ClickHouse/pull/7298) ([Vdimir](https://github.com/Vdimir))
* Remove legacy `asterisk_left_columns_only` setting (it was disabled by default).
[#7335](https://github.com/ClickHouse/ClickHouse/pull/7335) ([Artem
Zuikov](https://github.com/4ertus2))
* Format strings for Template data format are now specified in files.
[#7118](https://github.com/ClickHouse/ClickHouse/pull/7118)
([tavplubix](https://github.com/tavplubix))
### New Feature
* Introduce uniqCombined64() to calculate cardinality greater than UINT_MAX.
[#7213](https://github.com/ClickHouse/ClickHouse/pull/7213),
[#7222](https://github.com/ClickHouse/ClickHouse/pull/7222) ([Azat
Khuzhin](https://github.com/azat))
* Support Bloom filter indexes on Array columns.
[#6984](https://github.com/ClickHouse/ClickHouse/pull/6984)
([achimbab](https://github.com/achimbab))
* Add a function `getMacro(name)` that returns String with the value of corresponding `<macros>`
from server configuration. [#7240](https://github.com/ClickHouse/ClickHouse/pull/7240)
([alexey-milovidov](https://github.com/alexey-milovidov))
* Set two configuration options for a dictionary based on an HTTP source: `credentials` and
`http-headers`. [#7092](https://github.com/ClickHouse/ClickHouse/pull/7092) ([Guillaume
Tassery](https://github.com/YiuRULE))
* Add a new ProfileEvent `Merge` that counts the number of launched background merges.
[#7093](https://github.com/ClickHouse/ClickHouse/pull/7093) ([Mikhail
Korotov](https://github.com/millb))
* Add fullHostName function that returns a fully qualified domain name.
[#7263](https://github.com/ClickHouse/ClickHouse/issues/7263)
[#7291](https://github.com/ClickHouse/ClickHouse/pull/7291) ([sundyli](https://github.com/sundy-li))
* Add function `arraySplit` and `arrayReverseSplit` which split an array by "cut off"
conditions. They are useful in time sequence handling.
[#7294](https://github.com/ClickHouse/ClickHouse/pull/7294) ([hcz](https://github.com/hczhcz))
* Add new functions that return the Array of all matched indices in multiMatch family of functions.
[#7299](https://github.com/ClickHouse/ClickHouse/pull/7299) ([Danila
Kutenin](https://github.com/danlark1))
* Add a new database engine `Lazy` that is optimized for storing a large number of small -Log
tables. [#7171](https://github.com/ClickHouse/ClickHouse/pull/7171) ([Nikita
Vasilev](https://github.com/nikvas0))
* Add aggregate functions groupBitmapAnd, -Or, -Xor for bitmap columns. [#7109](https://github.com/ClickHouse/ClickHouse/pull/7109) ([Zhichang
Yu](https://github.com/yuzhichang))
* Add aggregate function combinators -OrNull and -OrDefault, which return null
or default values when there is nothing to aggregate.
[#7331](https://github.com/ClickHouse/ClickHouse/pull/7331)
([hcz](https://github.com/hczhcz))
* Introduce CustomSeparated data format that supports custom escaping and
delimiter rules. [#7118](https://github.com/ClickHouse/ClickHouse/pull/7118)
([tavplubix](https://github.com/tavplubix))
### Bug Fix
* Fix wrong query result if it has `WHERE IN (SELECT ...)` section and `optimize_read_in_order` is
used. [#7371](https://github.com/ClickHouse/ClickHouse/pull/7371) ([Anton
Popov](https://github.com/CurtizJ))
* Disabled MariaDB authentication plugin, which depends on files outside of project.
[#7140](https://github.com/ClickHouse/ClickHouse/pull/7140) ([Yuriy
Baranov](https://github.com/yurriy))
* Fix exception `Cannot convert column ... because it is constant but values of constants are
different in source and result` which could rarely happen when functions `now()`, `today()`,
`yesterday()`, `randConstant()` are used.
[#7156](https://github.com/ClickHouse/ClickHouse/pull/7156) ([Nikolai
Kochetov](https://github.com/KochetovNicolai))
* Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout.
[#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily
Nemkov](https://github.com/Enmk))
* Fixed a segmentation fault in groupBitmapOr (issue [#7109](https://github.com/ClickHouse/ClickHouse/issues/7109)).
[#7289](https://github.com/ClickHouse/ClickHouse/pull/7289) ([Zhichang
Yu](https://github.com/yuzhichang))
* For materialized views the commit for Kafka is called after all data were written.
[#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7))
* Fixed wrong `duration_ms` value in `system.part_log` table. It was ten times off.
[#7172](https://github.com/ClickHouse/ClickHouse/pull/7172) ([Vladimir
Chebotarev](https://github.com/excitoon))
* A quick fix to resolve crash in LIVE VIEW table and re-enabling all LIVE VIEW tests.
[#7201](https://github.com/ClickHouse/ClickHouse/pull/7201)
([vzakaznikov](https://github.com/vzakaznikov))
* Serialize NULL values correctly in min/max indexes of MergeTree parts.
[#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander
Kuzmenkov](https://github.com/akuzm))
* Don't put virtual columns to .sql metadata when table is created as `CREATE TABLE AS`.
[#7183](https://github.com/ClickHouse/ClickHouse/pull/7183) ([Ivan](https://github.com/abyss7))
* Fix segmentation fault in `ATTACH PART` query.
[#7185](https://github.com/ClickHouse/ClickHouse/pull/7185)
([alesapin](https://github.com/alesapin))
* Fix wrong result for some queries given by the optimization of empty IN subqueries and empty
INNER/RIGHT JOIN. [#7284](https://github.com/ClickHouse/ClickHouse/pull/7284) ([Nikolai
Kochetov](https://github.com/KochetovNicolai))
* Fixing AddressSanitizer error in the LIVE VIEW getHeader() method.
[#7271](https://github.com/ClickHouse/ClickHouse/pull/7271)
([vzakaznikov](https://github.com/vzakaznikov))
### Improvement
* Add a message in case of queue_wait_max_ms wait takes place.
[#7390](https://github.com/ClickHouse/ClickHouse/pull/7390) ([Azat
Khuzhin](https://github.com/azat))
* Made setting `s3_min_upload_part_size` table-level.
[#7059](https://github.com/ClickHouse/ClickHouse/pull/7059) ([Vladimir
Chebotarev](https://github.com/excitoon))
* Check TTL in StorageFactory. [#7304](https://github.com/ClickHouse/ClickHouse/pull/7304)
([sundyli](https://github.com/sundy-li))
* Squash left-hand blocks in partial merge join (optimization).
[#7122](https://github.com/ClickHouse/ClickHouse/pull/7122) ([Artem
Zuikov](https://github.com/4ertus2))
* Do not allow non-deterministic functions in mutations of Replicated table engines, because this
can introduce inconsistencies between replicas.
[#7247](https://github.com/ClickHouse/ClickHouse/pull/7247) ([Alexander
Kazakov](https://github.com/Akazz))
* Disable memory tracker while converting exception stack trace to string. It can prevent the loss
of error messages of type `Memory limit exceeded` on server, which caused the `Attempt to read
after eof` exception on client. [#7264](https://github.com/ClickHouse/ClickHouse/pull/7264)
([Nikolai Kochetov](https://github.com/KochetovNicolai))
* Miscellaneous format improvements. Resolves
[#6033](https://github.com/ClickHouse/ClickHouse/issues/6033),
[#2633](https://github.com/ClickHouse/ClickHouse/issues/2633),
[#6611](https://github.com/ClickHouse/ClickHouse/issues/6611),
[#6742](https://github.com/ClickHouse/ClickHouse/issues/6742)
[#7215](https://github.com/ClickHouse/ClickHouse/pull/7215)
([tavplubix](https://github.com/tavplubix))
* ClickHouse ignores values on the right side of IN operator that are not convertible to the left
side type. Make it work properly for compound types -- Array and Tuple.
[#7283](https://github.com/ClickHouse/ClickHouse/pull/7283) ([Alexander
Kuzmenkov](https://github.com/akuzm))
* Support missing inequalities for ASOF JOIN. It's possible to join less-or-equal variant and strict
greater and less variants for ASOF column in ON syntax.
[#7282](https://github.com/ClickHouse/ClickHouse/pull/7282) ([Artem
Zuikov](https://github.com/4ertus2))
* Optimize partial merge join. [#7070](https://github.com/ClickHouse/ClickHouse/pull/7070)
([Artem Zuikov](https://github.com/4ertus2))
* Do not use more then 98K of memory in uniqCombined functions.
[#7236](https://github.com/ClickHouse/ClickHouse/pull/7236),
[#7270](https://github.com/ClickHouse/ClickHouse/pull/7270) ([Azat
Khuzhin](https://github.com/azat))
* Flush parts of right-hand joining table on disk in PartialMergeJoin (if there is not enough
memory). Load data back when needed. [#7186](https://github.com/ClickHouse/ClickHouse/pull/7186)
([Artem Zuikov](https://github.com/4ertus2))
### Performance Improvement
* Speed up joinGet with const arguments by avoiding data duplication.
[#7359](https://github.com/ClickHouse/ClickHouse/pull/7359) ([Amos
Bird](https://github.com/amosbird))
* Return early if the subquery is empty.
[#7007](https://github.com/ClickHouse/ClickHouse/pull/7007) ([小路](https://github.com/nicelulu))
* Optimize parsing of SQL expression in Values.
[#6781](https://github.com/ClickHouse/ClickHouse/pull/6781)
([tavplubix](https://github.com/tavplubix))
### Build/Testing/Packaging Improvement
* Disable some contribs for cross-compilation to Mac OS.
[#7101](https://github.com/ClickHouse/ClickHouse/pull/7101) ([Ivan](https://github.com/abyss7))
* Add missing linking with PocoXML for clickhouse_common_io.
[#7200](https://github.com/ClickHouse/ClickHouse/pull/7200) ([Azat
Khuzhin](https://github.com/azat))
* Accept multiple test filter arguments in clickhouse-test.
[#7226](https://github.com/ClickHouse/ClickHouse/pull/7226) ([Alexander
Kuzmenkov](https://github.com/akuzm))
* Enable musl and jemalloc for ARM. [#7300](https://github.com/ClickHouse/ClickHouse/pull/7300)
([Amos Bird](https://github.com/amosbird))
* Added `--client-option` parameter to `clickhouse-test` to pass additional parameters to client.
[#7277](https://github.com/ClickHouse/ClickHouse/pull/7277) ([Nikolai
Kochetov](https://github.com/KochetovNicolai))
* Preserve existing configs on rpm package upgrade.
[#7103](https://github.com/ClickHouse/ClickHouse/pull/7103)
([filimonov](https://github.com/filimonov))
* Fix errors detected by PVS. [#7153](https://github.com/ClickHouse/ClickHouse/pull/7153) ([Artem
Zuikov](https://github.com/4ertus2))
* Fix build for Darwin. [#7149](https://github.com/ClickHouse/ClickHouse/pull/7149)
([Ivan](https://github.com/abyss7))
* glibc 2.29 compatibility. [#7142](https://github.com/ClickHouse/ClickHouse/pull/7142) ([Amos
Bird](https://github.com/amosbird))
* Make sure dh_clean does not touch potential source files.
[#7205](https://github.com/ClickHouse/ClickHouse/pull/7205) ([Amos
Bird](https://github.com/amosbird))
* Attempt to avoid conflict when updating from altinity rpm - it has config file packaged separately
in clickhouse-server-common. [#7073](https://github.com/ClickHouse/ClickHouse/pull/7073)
([filimonov](https://github.com/filimonov))
* Optimize some header files for faster rebuilds.
[#7212](https://github.com/ClickHouse/ClickHouse/pull/7212),
[#7231](https://github.com/ClickHouse/ClickHouse/pull/7231) ([Alexander
Kuzmenkov](https://github.com/akuzm))
* Add performance tests for Date and DateTime. [#7332](https://github.com/ClickHouse/ClickHouse/pull/7332) ([Vasily
Nemkov](https://github.com/Enmk))
* Fix some tests that contained non-deterministic mutations.
[#7132](https://github.com/ClickHouse/ClickHouse/pull/7132) ([Alexander
Kazakov](https://github.com/Akazz))
* Add build with MemorySanitizer to CI. [#7066](https://github.com/ClickHouse/ClickHouse/pull/7066)
([Alexander Kuzmenkov](https://github.com/akuzm))
* Avoid use of uninitialized values in MetricsTransmitter.
[#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat
Khuzhin](https://github.com/azat))
* Fix some issues in Fields found by MemorySanitizer.
[#7135](https://github.com/ClickHouse/ClickHouse/pull/7135),
[#7179](https://github.com/ClickHouse/ClickHouse/pull/7179) ([Alexander
Kuzmenkov](https://github.com/akuzm)), [#7376](https://github.com/ClickHouse/ClickHouse/pull/7376)
([Amos Bird](https://github.com/amosbird))
* Fix undefined behavior in murmurhash32. [#7388](https://github.com/ClickHouse/ClickHouse/pull/7388) ([Amos
Bird](https://github.com/amosbird))
* Fix undefined behavior in StoragesInfoStream. [#7384](https://github.com/ClickHouse/ClickHouse/pull/7384)
([tavplubix](https://github.com/tavplubix))
* Fixed constant expressions folding for external database engines (MySQL, ODBC, JDBC). In previous
versions it wasn't working for multiple constant expressions and was not working at all for Date,
DateTime and UUID. This fixes [#7245](https://github.com/ClickHouse/ClickHouse/issues/7245)
[#7252](https://github.com/ClickHouse/ClickHouse/pull/7252)
([alexey-milovidov](https://github.com/alexey-milovidov))
* Fixing ThreadSanitizer data race error in the LIVE VIEW when accessing no_users_thread variable.
[#7353](https://github.com/ClickHouse/ClickHouse/pull/7353)
([vzakaznikov](https://github.com/vzakaznikov))
* Get rid of malloc symbols in libcommon
[#7134](https://github.com/ClickHouse/ClickHouse/pull/7134),
[#7065](https://github.com/ClickHouse/ClickHouse/pull/7065) ([Amos
Bird](https://github.com/amosbird))
* Add global flag ENABLE_LIBRARIES for disabling all libraries.
[#7063](https://github.com/ClickHouse/ClickHouse/pull/7063)
([proller](https://github.com/proller))
### Code cleanup
* Generalize configuration repository to prepare for DDL for Dictionaries. [#7155](https://github.com/ClickHouse/ClickHouse/pull/7155)
([alesapin](https://github.com/alesapin))
* Parser for dictionaries DDL without any semantic.
[#7209](https://github.com/ClickHouse/ClickHouse/pull/7209)
([alesapin](https://github.com/alesapin))
* Split ParserCreateQuery into different smaller parsers.
[#7253](https://github.com/ClickHouse/ClickHouse/pull/7253)
([alesapin](https://github.com/alesapin))
* Small refactoring and renaming near external dictionaries.
[#7111](https://github.com/ClickHouse/ClickHouse/pull/7111)
([alesapin](https://github.com/alesapin))
* Refactor some code to prepare for role-based access control. [#7235](https://github.com/ClickHouse/ClickHouse/pull/7235) ([Vitaly
Baranov](https://github.com/vitlibar))
* Some improvements in DatabaseOrdinary code.
[#7086](https://github.com/ClickHouse/ClickHouse/pull/7086) ([Nikita
Vasilev](https://github.com/nikvas0))
* Do not use iterators in find() and emplace() methods of hash tables.
[#7026](https://github.com/ClickHouse/ClickHouse/pull/7026) ([Alexander
Kuzmenkov](https://github.com/akuzm))
* Fix getMultipleValuesFromConfig in case when parameter root is not empty. [#7374](https://github.com/ClickHouse/ClickHouse/pull/7374)
([Mikhail Korotov](https://github.com/millb))
* Remove some copy-paste (TemporaryFile and TemporaryFileStream)
[#7166](https://github.com/ClickHouse/ClickHouse/pull/7166) ([Artem
Zuikov](https://github.com/4ertus2))
* Improved code readability a little bit (`MergeTreeData::getActiveContainingPart`).
[#7361](https://github.com/ClickHouse/ClickHouse/pull/7361) ([Vladimir
Chebotarev](https://github.com/excitoon))
* Wait for all scheduled jobs, which are using local objects, if `ThreadPool::schedule(...)` throws
an exception. Rename `ThreadPool::schedule(...)` to `ThreadPool::scheduleOrThrowOnError(...)` and
fix comments to make obvious that it may throw.
[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350)
([tavplubix](https://github.com/tavplubix))
## ClickHouse release 19.15.4.10, 2019-10-31
### Bug Fix
* Added handling of SQL_TINYINT and SQL_BIGINT, and fix handling of SQL_FLOAT data source types in ODBC Bridge.
[#7491](https://github.com/ClickHouse/ClickHouse/pull/7491) ([Denis Glazachev](https://github.com/traceon))
* Allowed to have some parts on destination disk or volume in MOVE PARTITION.
[#7434](https://github.com/ClickHouse/ClickHouse/pull/7434) ([Vladimir Chebotarev](https://github.com/excitoon))
* Fixed NULL-values in nullable columns through ODBC-bridge.
[#7402](https://github.com/ClickHouse/ClickHouse/pull/7402) ([Vasily Nemkov](https://github.com/Enmk))
* Fixed INSERT into Distributed non local node with MATERIALIZED columns.
[#7377](https://github.com/ClickHouse/ClickHouse/pull/7377) ([Azat Khuzhin](https://github.com/azat))
* Fixed function getMultipleValuesFromConfig.
[#7374](https://github.com/ClickHouse/ClickHouse/pull/7374) ([Mikhail Korotov](https://github.com/millb))
* Fixed issue of using HTTP keep alive timeout instead of TCP keep alive timeout.
[#7351](https://github.com/ClickHouse/ClickHouse/pull/7351) ([Vasily Nemkov](https://github.com/Enmk))
* Wait for all jobs to finish on exception (fixes rare segfaults).
[#7350](https://github.com/ClickHouse/ClickHouse/pull/7350) ([tavplubix](https://github.com/tavplubix))
* Don't push to MVs when inserting into Kafka table.
[#7265](https://github.com/ClickHouse/ClickHouse/pull/7265) ([Ivan](https://github.com/abyss7))
* Disable memory tracker for exception stack.
[#7264](https://github.com/ClickHouse/ClickHouse/pull/7264) ([Nikolai Kochetov](https://github.com/KochetovNicolai))
* Fixed bad code in transforming query for external database.
[#7252](https://github.com/ClickHouse/ClickHouse/pull/7252) ([alexey-milovidov](https://github.com/alexey-milovidov))
* Avoid use of uninitialized values in MetricsTransmitter.
[#7158](https://github.com/ClickHouse/ClickHouse/pull/7158) ([Azat Khuzhin](https://github.com/azat))
* Added example config with macros for tests ([alexey-milovidov](https://github.com/alexey-milovidov))
## ClickHouse release 19.15.3.6, 2019-10-09
### Bug Fix
* Fixed bad_variant in hashed dictionary.
([alesapin](https://github.com/alesapin))
* Fixed up bug with segmentation fault in ATTACH PART query.
([alesapin](https://github.com/alesapin))
* Fixed time calculation in `MergeTreeData`.
([Vladimir Chebotarev](https://github.com/excitoon))
* Commit to Kafka explicitly after the writing is finalized.
[#7175](https://github.com/ClickHouse/ClickHouse/pull/7175) ([Ivan](https://github.com/abyss7))
* Serialize NULL values correctly in min/max indexes of MergeTree parts.
[#7234](https://github.com/ClickHouse/ClickHouse/pull/7234) ([Alexander Kuzmenkov](https://github.com/akuzm))
## ClickHouse release 19.15.2.2, 2019-10-01
### New Feature
@ -345,6 +636,13 @@
### Security Fix
* Fix two vulnerabilities in codecs in decompression phase (malicious user can fabricate compressed data that will lead to buffer overflow in decompression). [#6670](https://github.com/ClickHouse/ClickHouse/pull/6670) ([Artem Zuikov](https://github.com/4ertus2))
## ClickHouse release 19.11.13.74, 2019-11-01
### Bug Fix
* Fixed rare crash in `ALTER MODIFY COLUMN` and vertical merge when one of merged/altered parts is empty (0 rows). [#6780](https://github.com/ClickHouse/ClickHouse/pull/6780) ([alesapin](https://github.com/alesapin))
* Manual update of `SIMDJSON`. This fixes possible flooding of stderr files with bogus json diagnostic messages. [#7548](https://github.com/ClickHouse/ClickHouse/pull/7548) ([Alexander Kazakov](https://github.com/Akazz))
* Fixed bug with `mrk` file extension for mutations ([alesapin](https://github.com/alesapin))
## ClickHouse release 19.11.12.69, 2019-10-02
### Bug Fix

View File

@ -13,7 +13,10 @@ foreach(policy
endforeach()
project(ClickHouse)
include (cmake/arch.cmake)
include (cmake/target.cmake)
include (cmake/tools.cmake)
# Ignore export() since we don't use it,
# but it gets broken with a global targets via link_libraries()
@ -26,8 +29,6 @@ set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so
set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE)
set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.") # To be consistent with CMakeLists from contrib libs.
include (cmake/arch.cmake)
option(ENABLE_IPO "Enable inter-procedural optimization (aka LTO)" OFF) # need cmake 3.9+
if(ENABLE_IPO)
cmake_policy(SET CMP0069 NEW)
@ -230,7 +231,6 @@ include(cmake/dbms_glob_sources.cmake)
if (OS_LINUX)
include(cmake/linux/default_libs.cmake)
elseif (OS_DARWIN)
include(cmake/darwin/sdk.cmake)
include(cmake/darwin/default_libs.cmake)
endif ()

View File

@ -13,8 +13,9 @@ ClickHouse is an open-source column-oriented database management system that all
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27.
* [ClickHouse Meetup in Tokyo](https://clickhouse.connpass.com/event/147001/) on November 14.
* [ClickHouse Meetup in Istanbul](https://www.eventbrite.com/e/clickhouse-meetup-istanbul-create-blazing-fast-experiences-w-clickhouse-tickets-73101120419) on November 19.
* [ClickHouse Meetup in Ankara](https://www.eventbrite.com/e/clickhouse-meetup-ankara-create-blazing-fast-experiences-w-clickhouse-tickets-73100530655) on November 21.
* [ClickHouse Meetup in Singapore](https://www.meetup.com/Singapore-Clickhouse-Meetup-Group/events/265085331/) on November 23.
* [ClickHouse Meetup in San Francisco](https://www.eventbrite.com/e/clickhouse-december-meetup-registration-78642047481) on December 3.

View File

@ -17,6 +17,7 @@ endif ()
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le.*|PPC64LE.*)")
set (ARCH_PPC64LE 1)
# FIXME: move this check into tools.cmake
if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))
message(FATAL_ERROR "Only gcc-8 is supported for powerpc architecture")
endif ()

View File

@ -11,6 +11,14 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# Minimal supported SDK version
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mmacosx-version-min=10.14")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmacosx-version-min=10.14")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mmacosx-version-min=10.14")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -mmacosx-version-min=10.14")
# Global libraries
add_library(global-libs INTERFACE)

View File

@ -1,11 +0,0 @@
option (SDK_PATH "Path to the SDK to build with" "")
if (NOT EXISTS "${SDK_PATH}/SDKSettings.plist")
message (FATAL_ERROR "Wrong SDK path provided: ${SDK_PATH}")
endif ()
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")

View File

@ -0,0 +1,13 @@
set (CMAKE_SYSTEM_NAME "Darwin")
set (CMAKE_SYSTEM_PROCESSOR "x86_64")
set (CMAKE_C_COMPILER_TARGET "x86_64-apple-darwin")
set (CMAKE_CXX_COMPILER_TARGET "x86_64-apple-darwin")
set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-x86_64")
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -1,13 +1,20 @@
macro(add_glob cur_list)
file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN})
list(APPEND ${cur_list} ${__tmp})
endmacro()
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.12")
macro(add_glob cur_list)
file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} CONFIGURE_DEPENDS ${ARGN})
list(APPEND ${cur_list} ${__tmp})
endmacro()
else ()
macro(add_glob cur_list)
file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN})
list(APPEND ${cur_list} ${__tmp})
endmacro()
endif ()
macro(add_headers_and_sources prefix common_path)
add_glob(${prefix}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h)
endmacro()
macro(add_headers_only prefix common_path)
add_glob(${prefix}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
endmacro()

View File

@ -4,6 +4,14 @@ if (ENABLE_CAPNP)
option (USE_INTERNAL_CAPNP_LIBRARY "Set to FALSE to use system capnproto library instead of bundled" ${NOT_UNBUNDLED})
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/capnproto/CMakeLists.txt")
if(USE_INTERNAL_CAPNP_LIBRARY)
message(WARNING "submodule contrib/capnproto is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set(MISSING_INTERNAL_CAPNP_LIBRARY 1)
set(USE_INTERNAL_CAPNP_LIBRARY 0)
endif()
# FIXME: refactor to use `add_library( IMPORTED)` if possible.
if (NOT USE_INTERNAL_CAPNP_LIBRARY)
find_library (KJ kj)
@ -11,7 +19,7 @@ if (NOT USE_INTERNAL_CAPNP_LIBRARY)
find_library (CAPNPC capnpc)
set (CAPNP_LIBRARIES ${CAPNPC} ${CAPNP} ${KJ})
else ()
elseif(NOT MISSING_INTERNAL_CAPNP_LIBRARY)
add_subdirectory(contrib/capnproto-cmake)
set (CAPNP_LIBRARIES capnpc)
@ -23,4 +31,4 @@ endif ()
endif ()
message (STATUS "Using capnp: ${CAPNP_LIBRARIES}")
message (STATUS "Using capnp=${USE_CAPNP}: ${CAPNP_LIBRARIES}")

View File

@ -1,7 +1,8 @@
option (ENABLE_ORC "Enable ORC" ${ENABLE_LIBRARIES})
if(ENABLE_ORC)
option (USE_INTERNAL_ORC_LIBRARY "Set to FALSE to use system ORC instead of bundled" ${NOT_UNBUNDLED})
include(cmake/find/snappy.cmake)
option(USE_INTERNAL_ORC_LIBRARY "Set to FALSE to use system ORC instead of bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include/orc/OrcFile.hh")
if(USE_INTERNAL_ORC_LIBRARY)
@ -25,7 +26,7 @@ endif ()
if (ORC_LIBRARY AND ORC_INCLUDE_DIR)
set(USE_ORC 1)
elseif(NOT MISSING_INTERNAL_ORC_LIBRARY AND ARROW_LIBRARY) # (LIBGSASL_LIBRARY AND LIBXML2_LIBRARY)
elseif(NOT MISSING_INTERNAL_ORC_LIBRARY AND ARROW_LIBRARY AND SNAPPY_LIBRARY) # (LIBGSASL_LIBRARY AND LIBXML2_LIBRARY)
set(ORC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include")
set(ORC_LIBRARY orc)
set(USE_ORC 1)

View File

@ -24,7 +24,10 @@ endif()
if(ARROW_INCLUDE_DIR AND PARQUET_INCLUDE_DIR)
elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
include(cmake/find/snappy.cmake)
set(CAN_USE_INTERNAL_PARQUET_LIBRARY 1)
if(SNAPPY_LIBRARY)
set(CAN_USE_INTERNAL_PARQUET_LIBRARY 1)
endif()
include(CheckCXXSourceCompiles)
if(NOT USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
set(CMAKE_REQUIRED_LIBRARIES ${DOUBLE_CONVERSION_LIBRARIES})

View File

@ -8,6 +8,14 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/poco/CMakeLists.txt")
set (MISSING_INTERNAL_POCO_LIBRARY 1)
endif ()
if (NOT ENABLE_LIBRARIES)
set (ENABLE_POCO_NETSSL ${ENABLE_LIBRARIES} CACHE BOOL "")
set (ENABLE_POCO_MONGODB ${ENABLE_LIBRARIES} CACHE BOOL "")
set (ENABLE_POCO_REDIS ${ENABLE_LIBRARIES} CACHE BOOL "")
set (ENABLE_POCO_ODBC ${ENABLE_LIBRARIES} CACHE BOOL "")
set (ENABLE_POCO_SQL ${ENABLE_LIBRARIES} CACHE BOOL "")
endif ()
set (POCO_COMPONENTS Net XML SQL Data)
if (NOT DEFINED ENABLE_POCO_NETSSL OR ENABLE_POCO_NETSSL)
list (APPEND POCO_COMPONENTS Crypto NetSSL)

View File

@ -4,6 +4,11 @@ if (NOT CMAKE_SYSTEM MATCHES "Linux" OR ARCH_ARM OR ARCH_32)
set (USE_UNWIND OFF)
endif ()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libunwind/CMakeLists.txt")
message(WARNING "submodule contrib/libunwind is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_UNWIND OFF)
endif ()
if (USE_UNWIND)
add_subdirectory(contrib/libunwind-cmake)
set (UNWIND_LIBRARIES unwind)

View File

@ -5,7 +5,7 @@ set (DEFAULT_LIBS "-nodefaultlibs")
# We need builtins from Clang's RT even without libcxx - for ubsan+int128.
# See https://bugs.llvm.org/show_bug.cgi?id=16404
if (COMPILER_CLANG)
if (COMPILER_CLANG AND NOT (CMAKE_CROSSCOMPILING AND ARCH_AARCH64))
execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
else ()
set (BUILTINS_LIBRARY "-lgcc")

View File

@ -0,0 +1,25 @@
set (CMAKE_SYSTEM_NAME "Linux")
set (CMAKE_SYSTEM_PROCESSOR "aarch64")
set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc")
# We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar" CACHE FILEPATH "" FORCE)
set (CMAKE_RANLIB "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ranlib" CACHE FILEPATH "" FORCE)
set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
set (LINKER_NAME "lld" CACHE STRING "" FORCE)
set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)

View File

@ -9,62 +9,8 @@ elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_definitions(-D OS_DARWIN)
endif ()
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (COMPILER_GCC 1)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set (COMPILER_CLANG 1)
endif ()
if (COMPILER_GCC)
# Require minimum version of gcc
set (GCC_MINIMUM_VERSION 8)
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${GCC_MINIMUM_VERSION} AND NOT CMAKE_VERSION VERSION_LESS 2.8.9)
message (FATAL_ERROR "GCC version must be at least ${GCC_MINIMUM_VERSION}. For example, if GCC ${GCC_MINIMUM_VERSION} is available under gcc-${GCC_MINIMUM_VERSION}, g++-${GCC_MINIMUM_VERSION} names, do the following: export CC=gcc-${GCC_MINIMUM_VERSION} CXX=g++-${GCC_MINIMUM_VERSION}; rm -rf CMakeCache.txt CMakeFiles; and re run cmake or ./release.")
endif ()
elseif (COMPILER_CLANG)
# Require minimum version of clang
set (CLANG_MINIMUM_VERSION 7)
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION})
message (FATAL_ERROR "Clang version must be at least ${CLANG_MINIMUM_VERSION}.")
endif ()
else ()
message (WARNING "You are using an unsupported compiler. Compilation has only been tested with Clang 6+ and GCC 7+.")
endif ()
string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER})
if (OS_LINUX)
find_program (LLD_PATH NAMES "lld${COMPILER_POSTFIX}" "lld")
find_program (GOLD_PATH NAMES "ld.gold" "gold")
endif()
option (LINKER_NAME "Linker name or full path")
if (NOT LINKER_NAME)
if (COMPILER_CLANG AND LLD_PATH)
set (LINKER_NAME "lld")
elseif (GOLD_PATH)
set (LINKER_NAME "gold")
endif ()
endif ()
if (LINKER_NAME)
message(STATUS "Using linker: ${LINKER_NAME} (selected from: LLD_PATH=${LLD_PATH}; GOLD_PATH=${GOLD_PATH}; COMPILER_POSTFIX=${COMPILER_POSTFIX})")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
endif ()
if (CMAKE_CROSSCOMPILING)
if (OS_DARWIN)
set (CMAKE_SYSTEM_PROCESSOR x86_64)
set (CMAKE_C_COMPILER_TARGET x86_64-apple-darwin)
set (CMAKE_CXX_COMPILER_TARGET x86_64-apple-darwin)
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
# FIXME: broken dependencies
set (USE_SNAPPY OFF CACHE INTERNAL "")
set (ENABLE_SSL OFF CACHE INTERNAL "")
@ -73,12 +19,19 @@ if (CMAKE_CROSSCOMPILING)
set (ENABLE_READLINE OFF CACHE INTERNAL "")
set (ENABLE_ICU OFF CACHE INTERNAL "")
set (ENABLE_FASTOPS OFF CACHE INTERNAL "")
message (STATUS "Cross-compiling for Darwin")
elseif (OS_LINUX)
if (ARCH_AARCH64)
# FIXME: broken dependencies
set (ENABLE_PROTOBUF OFF CACHE INTERNAL "")
set (ENABLE_PARQUET OFF CACHE INTERNAL "")
set (ENABLE_MYSQL OFF CACHE INTERNAL "")
endif ()
else ()
message (FATAL_ERROR "Trying to cross-compile to unsupported target: ${CMAKE_SYSTEM_NAME}!")
message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!")
endif ()
# Don't know why but CXX_STANDARD doesn't work for cross-compilation
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
message (STATUS "Cross-compiling for target: ${CMAKE_CXX_COMPILE_TARGET}")
endif ()

View File

@ -0,0 +1,2 @@
wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
tar --strip-components=1 xJf MacOSX10.14.sdk.tar.xz

View File

@ -0,0 +1,2 @@
wget https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz?revision=2e88a73f-d233-4f96-b1f4-d8b36e9bb0b9&la=en -O gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz
tar --strip-components=1 xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz

41
cmake/tools.cmake Normal file
View File

@ -0,0 +1,41 @@
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (COMPILER_GCC 1)
elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang|AppleClang")
set (COMPILER_CLANG 1)
endif ()
if (COMPILER_GCC)
# Require minimum version of gcc
set (GCC_MINIMUM_VERSION 8)
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${GCC_MINIMUM_VERSION} AND NOT CMAKE_VERSION VERSION_LESS 2.8.9)
message (FATAL_ERROR "GCC version must be at least ${GCC_MINIMUM_VERSION}. For example, if GCC ${GCC_MINIMUM_VERSION} is available under gcc-${GCC_MINIMUM_VERSION}, g++-${GCC_MINIMUM_VERSION} names, do the following: export CC=gcc-${GCC_MINIMUM_VERSION} CXX=g++-${GCC_MINIMUM_VERSION}; rm -rf CMakeCache.txt CMakeFiles; and re run cmake or ./release.")
endif ()
elseif (COMPILER_CLANG)
# Require minimum version of clang
set (CLANG_MINIMUM_VERSION 7)
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION})
message (FATAL_ERROR "Clang version must be at least ${CLANG_MINIMUM_VERSION}.")
endif ()
else ()
message (WARNING "You are using an unsupported compiler. Compilation has only been tested with Clang 6+ and GCC 7+.")
endif ()
option (LINKER_NAME "Linker name or full path")
find_program (LLD_PATH NAMES "ld.lld" "lld")
find_program (GOLD_PATH NAMES "ld.gold" "gold")
if (NOT LINKER_NAME)
if (LLD_PATH)
set (LINKER_NAME "lld")
elseif (GOLD_PATH)
set (LINKER_NAME "gold")
endif ()
endif ()
if (LINKER_NAME)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
message(STATUS "Using custom linker by name: ${LINKER_NAME}")
endif ()

View File

@ -124,7 +124,7 @@ if (USE_INTERNAL_SSL_LIBRARY)
add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY})
endif ()
if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
function(mysql_support)
set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC)
set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC)
set(CLIENT_PLUGIN_REMOTE_IO OFF)
@ -136,7 +136,15 @@ if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
if (GLIBC_COMPATIBILITY)
set(LIBM glibc-compatibility)
endif()
if (USE_INTERNAL_ZLIB_LIBRARY)
set(ZLIB_FOUND ON)
set(ZLIB_LIBRARY zlibstatic)
set(WITH_EXTERNAL_ZLIB ON)
endif()
add_subdirectory (mariadb-connector-c)
endfunction()
if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
mysql_support()
endif ()
if (USE_INTERNAL_RDKAFKA_LIBRARY)

View File

@ -70,6 +70,14 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
--cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
"${PROTO_DIR}/orc_proto.proto")
# arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
# Apple Clang compiler failed to compile this code without specifying c++11 standard.
# As result these compiler features detected as absent. In result it failed to compile orc itself.
# In orc makefile there is code that sets flags, but arrow-cmake ignores these flags.
if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
set (CXX11_FLAGS "-std=c++0x")
endif()
include(${ClickHouse_SOURCE_DIR}/contrib/orc/cmake_modules/CheckSourceCompiles.cmake)
include(orc_check.cmake)
configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DIR}/orc/orc-config.hh")

View File

@ -44,7 +44,7 @@ target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_S
target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
target_compile_options(cxx PUBLIC -nostdinc++ -Wno-reserved-id-macro)
if (OS_DARWIN AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9)
if (OS_DARWIN AND (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9) AND (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11))
target_compile_options(cxx PUBLIC -Wno-ctad-maybe-unsupported)
endif ()

View File

@ -130,8 +130,8 @@ list (APPEND dbms_headers
list (APPEND dbms_sources src/TableFunctions/ITableFunction.cpp src/TableFunctions/TableFunctionFactory.cpp)
list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions/TableFunctionFactory.h)
list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp)
list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h src/Dictionaries/DictionarySourceFactory.h src/Dictionaries/DictionaryStructure.h)
list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp src/Dictionaries/getDictionaryConfigurationFromAST.cpp)
list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h src/Dictionaries/DictionarySourceFactory.h src/Dictionaries/DictionaryStructure.h src/Dictionaries/getDictionaryConfigurationFromAST.h)
if (NOT ENABLE_SSL)
list (REMOVE_ITEM clickhouse_common_io_sources src/Common/OpenSSLHelpers.cpp)
@ -153,12 +153,10 @@ add_subdirectory(src/Common/Config)
set (all_modules)
macro(add_object_library name common_path)
if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES)
add_glob(dbms_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(dbms_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h)
add_headers_and_sources(dbms ${common_path})
else ()
list (APPEND all_modules ${name})
add_glob(${name}_headers RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${name}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h)
add_headers_and_sources(${name} ${common_path})
add_library(${name} SHARED ${${name}_sources} ${${name}_headers})
target_link_libraries (${name} PRIVATE -Wl,--unresolved-symbols=ignore-all)
endif ()
@ -427,6 +425,11 @@ endif()
if (USE_JEMALLOC)
dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp
target_include_directories (clickhouse_new_delete SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR})
if(NOT MAKE_STATIC_LIBRARIES AND ${JEMALLOC_LIBRARIES} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$")
# mallctl in dbms/src/Interpreters/AsynchronousMetrics.cpp
target_link_libraries(clickhouse_interpreters PRIVATE ${JEMALLOC_LIBRARIES})
endif()
endif ()
dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src/Formats/include)

View File

@ -365,7 +365,7 @@ private:
Stopwatch watch;
RemoteBlockInputStream stream(
*(*connection_entries[connection_index]),
query, {}, global_context, &settings, nullptr, Tables(), query_processing_stage);
query, {}, global_context, &settings, nullptr, Scalars(), Tables(), query_processing_stage);
Progress progress;
stream.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); });

View File

@ -89,6 +89,40 @@
#define DISABLE_LINE_WRAPPING "\033[?7l"
#define ENABLE_LINE_WRAPPING "\033[?7h"
#if USE_READLINE && RL_VERSION_MAJOR >= 7
#define BRACK_PASTE_PREF "\033[200~"
#define BRACK_PASTE_SUFF "\033[201~"
#define BRACK_PASTE_LAST '~'
#define BRACK_PASTE_SLEN 6
/// Make sure we don't get ^J for the enter character.
/// This handler also bypasses some unused macro/event checkings.
static int clickhouse_rl_bracketed_paste_begin(int /* count */, int /* key */)
{
std::string buf;
buf.reserve(128);
RL_SETSTATE(RL_STATE_MOREINPUT);
SCOPE_EXIT(RL_UNSETSTATE(RL_STATE_MOREINPUT));
char c;
while ((c = rl_read_key()) >= 0)
{
if (c == '\r' || c == '\n')
c = '\n';
buf.push_back(c);
if (buf.size() >= BRACK_PASTE_SLEN && c == BRACK_PASTE_LAST && buf.substr(buf.size() - BRACK_PASTE_SLEN) == BRACK_PASTE_SUFF)
{
buf.resize(buf.size() - BRACK_PASTE_SLEN);
break;
}
}
return static_cast<size_t>(rl_insert_text(buf.c_str())) == buf.size() ? 0 : 1;
}
#endif
namespace DB
{
@ -462,6 +496,18 @@ private:
if (rl_initialize())
throw Exception("Cannot initialize readline", ErrorCodes::CANNOT_READLINE);
#if RL_VERSION_MAJOR >= 7
/// When bracketed paste mode is set, pasted text is bracketed with control sequences so
/// that the program can differentiate pasted text from typed-in text. This helps
/// clickhouse-client so that without -m flag, one can still paste multiline queries, and
/// possibly get better pasting performance. See https://cirw.in/blog/bracketed-paste for
/// more details.
rl_variable_bind("enable-bracketed-paste", "on");
/// Use our bracketed paste handler to get better user experience. See comments above.
rl_bind_keyseq(BRACK_PASTE_PREF, clickhouse_rl_bracketed_paste_begin);
#endif
auto clear_prompt_or_exit = [](int)
{
/// This is signal safe.
@ -632,7 +678,8 @@ private:
/// If the user restarts the client then after pressing the "up" button
/// every line of the query will be displayed separately.
std::string logged_query = input;
std::replace(logged_query.begin(), logged_query.end(), '\n', ' ');
if (config().has("multiline"))
std::replace(logged_query.begin(), logged_query.end(), '\n', ' ');
add_history(logged_query.c_str());
#if USE_READLINE && HAVE_READLINE_HISTORY

View File

@ -579,7 +579,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.getSecond();
Histogram & histogram = elem.getMapped();
if (histogram.buckets.size() < params.num_buckets_cutoff)
{
@ -593,7 +593,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.getSecond();
Histogram & histogram = elem.getMapped();
if (!histogram.total)
continue;
@ -625,7 +625,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.getSecond();
Histogram & histogram = elem.getMapped();
if (!histogram.total)
continue;
@ -641,7 +641,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.getSecond();
Histogram & histogram = elem.getMapped();
if (!histogram.total)
continue;
@ -676,7 +676,7 @@ public:
while (true)
{
it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size()));
if (it && lookupResultGetMapped(it)->total + lookupResultGetMapped(it)->count_end != 0)
if (it && it->getMapped().total + it->getMapped().count_end != 0)
break;
if (context_size == 0)
@ -710,7 +710,7 @@ public:
if (num_bytes_after_desired_size > 0)
end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);
CodePoint code = lookupResultGetMapped(it)->sample(determinator, end_probability_multiplier);
CodePoint code = it->getMapped().sample(determinator, end_probability_multiplier);
if (code == END)
break;

View File

@ -38,12 +38,16 @@ namespace
switch (type)
{
case SQL_TINYINT:
return factory.get("Int8");
case SQL_INTEGER:
return factory.get("Int32");
case SQL_SMALLINT:
return factory.get("Int16");
case SQL_BIGINT:
return factory.get("Int64");
case SQL_FLOAT:
return factory.get("Float32");
return factory.get("Float64");
case SQL_REAL:
return factory.get("Float32");
case SQL_DOUBLE:

View File

@ -22,7 +22,7 @@ public:
void set(const std::string & key, std::string value, bool wrap = true);
template <typename T>
std::enable_if_t<std::is_arithmetic_v<T>> set(const std::string key, T value)
std::enable_if_t<is_arithmetic_v<T>> set(const std::string key, T value)
{
set(key, std::to_string(value), /*wrap= */ false);
}

View File

@ -10,13 +10,11 @@ set(CLICKHOUSE_SERVER_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/TCPHandler.cpp
)
if (USE_SSL)
set(CLICKHOUSE_SERVER_SOURCES
${CLICKHOUSE_SERVER_SOURCES}
${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandlerFactory.cpp
)
endif ()
set(CLICKHOUSE_SERVER_SOURCES
${CLICKHOUSE_SERVER_SOURCES}
${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandlerFactory.cpp
)
set(CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_dictionaries clickhouse_common_io clickhouse_common_config clickhouse_common_zookeeper clickhouse_parsers string_utils PUBLIC daemon PRIVATE clickhouse_storages_system clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${Poco_Net_LIBRARY})
if (USE_POCO_NETSSL)

View File

@ -1,7 +1,6 @@
#include <Common/config.h>
#if USE_SSL
#include "MySQLHandler.h"
#include "MySQLHandler.h"
#include <limits>
#include <ext/scope_guard.h>
#include <Columns/ColumnVector.h>
@ -15,37 +14,39 @@
#include <IO/ReadBufferFromPocoSocket.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromPocoSocket.h>
#include <Poco/Crypto/CipherFactory.h>
#include <Poco/Crypto/RSAKey.h>
#include <Poco/Net/SecureStreamSocket.h>
#include <Poco/Net/SSLManager.h>
#include <Storages/IStorage.h>
#if USE_POCO_NETSSL
#include <Poco/Net/SecureStreamSocket.h>
#include <Poco/Net/SSLManager.h>
#include <Poco/Crypto/CipherFactory.h>
#include <Poco/Crypto/RSAKey.h>
#endif
namespace DB
{
using namespace MySQLProtocol;
#if USE_POCO_NETSSL
using Poco::Net::SecureStreamSocket;
using Poco::Net::SSLManager;
#endif
namespace ErrorCodes
{
extern const int MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES;
extern const int OPENSSL_ERROR;
extern const int SUPPORT_IS_DISABLED;
}
MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, RSA & public_key_, RSA & private_key_, bool ssl_enabled, size_t connection_id_)
MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_,
bool ssl_enabled, size_t connection_id_)
: Poco::Net::TCPServerConnection(socket_)
, server(server_)
, log(&Poco::Logger::get("MySQLHandler"))
, connection_context(server.context())
, connection_id(connection_id_)
, public_key(public_key_)
, private_key(private_key_)
, auth_plugin(new MySQLProtocol::Authentication::Native41())
{
server_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF;
@ -197,21 +198,7 @@ void MySQLHandler::finishHandshake(MySQLProtocol::HandshakeResponse & packet)
if (payload_size == SSL_REQUEST_PAYLOAD_SIZE)
{
read_bytes(packet_size); /// Reading rest SSLRequest.
SSLRequest ssl_request;
ReadBufferFromMemory payload(buf, pos);
payload.ignore(PACKET_HEADER_SIZE);
ssl_request.readPayload(payload);
connection_context.mysql.client_capabilities = ssl_request.capability_flags;
connection_context.mysql.max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH;
secure_connection = true;
ss = std::make_shared<SecureStreamSocket>(SecureStreamSocket::attach(socket(), SSLManager::instance().defaultServerContext()));
in = std::make_shared<ReadBufferFromPocoSocket>(*ss);
out = std::make_shared<WriteBufferFromPocoSocket>(*ss);
connection_context.mysql.sequence_id = 2;
packet_sender = std::make_shared<PacketSender>(*in, *out, connection_context.mysql.sequence_id);
packet_sender->max_packet_size = connection_context.mysql.max_packet_size;
packet_sender->receivePacket(packet); /// Reading HandshakeResponse from secure socket.
finishHandshakeSSL(packet_size, buf, pos, read_bytes, packet);
}
else
{
@ -232,7 +219,9 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl
// For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible (if password is specified using double SHA1). Otherwise SHA256 plugin is used.
auto user = connection_context.getUser(user_name);
if (user->authentication.getType() != DB::Authentication::DOUBLE_SHA1_PASSWORD)
auth_plugin = std::make_unique<MySQLProtocol::Authentication::Sha256Password>(public_key, private_key, log);
{
authPluginSSL();
}
try {
std::optional<String> auth_response = auth_plugin_name == auth_plugin->getName() ? std::make_optional<String>(initial_auth_response) : std::nullopt;
@ -302,5 +291,47 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
packet_sender->sendPacket(OK_Packet(0x00, client_capability_flags, 0, 0, 0), true);
}
void MySQLHandler::authPluginSSL()
{
throw Exception("Compiled without SSL", ErrorCodes::SUPPORT_IS_DISABLED);
}
void MySQLHandler::finishHandshakeSSL([[maybe_unused]] size_t packet_size, [[maybe_unused]] char * buf, [[maybe_unused]] size_t pos, [[maybe_unused]] std::function<void(size_t)> read_bytes, [[maybe_unused]] MySQLProtocol::HandshakeResponse & packet)
{
throw Exception("Compiled without SSL", ErrorCodes::SUPPORT_IS_DISABLED);
}
#if USE_SSL && USE_POCO_NETSSL
MySQLHandlerSSL::MySQLHandlerSSL(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_)
: MySQLHandler(server_, socket_, ssl_enabled, connection_id_)
, public_key(public_key_)
, private_key(private_key_)
{}
void MySQLHandlerSSL::authPluginSSL()
{
auth_plugin = std::make_unique<MySQLProtocol::Authentication::Sha256Password>(public_key, private_key, log);
}
void MySQLHandlerSSL::finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function<void(size_t)> read_bytes, MySQLProtocol::HandshakeResponse & packet)
{
read_bytes(packet_size); /// Reading rest SSLRequest.
SSLRequest ssl_request;
ReadBufferFromMemory payload(buf, pos);
payload.ignore(PACKET_HEADER_SIZE);
ssl_request.readPayload(payload);
connection_context.mysql.client_capabilities = ssl_request.capability_flags;
connection_context.mysql.max_packet_size = ssl_request.max_packet_size ? ssl_request.max_packet_size : MAX_PACKET_LENGTH;
secure_connection = true;
ss = std::make_shared<SecureStreamSocket>(SecureStreamSocket::attach(socket(), SSLManager::instance().defaultServerContext()));
in = std::make_shared<ReadBufferFromPocoSocket>(*ss);
out = std::make_shared<WriteBufferFromPocoSocket>(*ss);
connection_context.mysql.sequence_id = 2;
packet_sender = std::make_shared<PacketSender>(*in, *out, connection_context.mysql.sequence_id);
packet_sender->max_packet_size = connection_context.mysql.max_packet_size;
packet_sender->receivePacket(packet); /// Reading HandshakeResponse from secure socket.
}
#endif
}

View File

@ -1,13 +1,13 @@
#pragma once
#include <Common/config.h>
#if USE_SSL
#include <Poco/Net/TCPServerConnection.h>
#include <Poco/Net/SecureStreamSocket.h>
#include <Common/getFQDNOrHostName.h>
#include <Core/MySQLProtocol.h>
#include "IServer.h"
#if USE_POCO_NETSSL
#include <Poco/Net/SecureStreamSocket.h>
#endif
namespace DB
{
@ -16,7 +16,7 @@ namespace DB
class MySQLHandler : public Poco::Net::TCPServerConnection
{
public:
MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, RSA & public_key_, RSA & private_key_, bool ssl_enabled, size_t connection_id_);
MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_);
void run() final;
@ -34,28 +34,47 @@ private:
void authenticate(const String & user_name, const String & auth_plugin_name, const String & auth_response);
virtual void authPluginSSL();
virtual void finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function<void(size_t)> read_bytes, MySQLProtocol::HandshakeResponse & packet);
IServer & server;
protected:
Poco::Logger * log;
Context connection_context;
std::shared_ptr<MySQLProtocol::PacketSender> packet_sender;
private:
size_t connection_id = 0;
size_t server_capability_flags = 0;
size_t client_capability_flags = 0;
RSA & public_key;
RSA & private_key;
protected:
std::unique_ptr<MySQLProtocol::Authentication::IPlugin> auth_plugin;
std::shared_ptr<Poco::Net::SecureStreamSocket> ss;
std::shared_ptr<ReadBuffer> in;
std::shared_ptr<WriteBuffer> out;
bool secure_connection = false;
};
}
#if USE_SSL && USE_POCO_NETSSL
class MySQLHandlerSSL : public MySQLHandler
{
public:
MySQLHandlerSSL(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_);
private:
void authPluginSSL() override;
void finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function<void(size_t)> read_bytes, MySQLProtocol::HandshakeResponse & packet) override;
RSA & public_key;
RSA & private_key;
std::shared_ptr<Poco::Net::SecureStreamSocket> ss;
};
#endif
}

View File

@ -1,7 +1,5 @@
#include "MySQLHandlerFactory.h"
#if USE_POCO_NETSSL && USE_SSL
#include <Common/OpenSSLHelpers.h>
#include <Poco/Net/SSLManager.h>
#include <Poco/Net/TCPServerConnectionFactory.h>
#include <Poco/Util/Application.h>
#include <common/logger_useful.h>
@ -9,6 +7,10 @@
#include "IServer.h"
#include "MySQLHandler.h"
#if USE_POCO_NETSSL
#include <Poco/Net/SSLManager.h>
#endif
namespace DB
{
@ -24,6 +26,8 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
: server(server_)
, log(&Logger::get("MySQLHandlerFactory"))
{
#if USE_POCO_NETSSL
try
{
Poco::Net::SSLManager::instance().defaultServerContext();
@ -33,7 +37,9 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
LOG_INFO(log, "Failed to create SSL context. SSL will be disabled. Error: " << getCurrentExceptionMessage(false));
ssl_enabled = false;
}
#endif
#if USE_SSL
/// Reading rsa keys for SHA256 authentication plugin.
try
{
@ -44,8 +50,10 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
LOG_WARNING(log, "Failed to read RSA keys. Error: " << getCurrentExceptionMessage(false));
generateRSAKeys();
}
#endif
}
#if USE_SSL
void MySQLHandlerFactory::readRSAKeys()
{
const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config();
@ -113,13 +121,18 @@ void MySQLHandlerFactory::generateRSAKeys()
if (!private_key)
throw Exception("Failed to copy RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR);
}
#endif
Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket)
{
size_t connection_id = last_connection_id++;
LOG_TRACE(log, "MySQL connection. Id: " << connection_id << ". Address: " << socket.peerAddress().toString());
return new MySQLHandler(server, socket, *public_key, *private_key, ssl_enabled, connection_id);
#if USE_POCO_NETSSL && USE_SSL
return new MySQLHandlerSSL(server, socket, ssl_enabled, connection_id, *public_key, *private_key);
#else
return new MySQLHandler(server, socket, ssl_enabled, connection_id);
#endif
}
}
#endif

View File

@ -1,12 +1,12 @@
#pragma once
#include <Common/config.h>
#if USE_POCO_NETSSL && USE_SSL
#include <Poco/Net/TCPServerConnectionFactory.h>
#include <atomic>
#include <openssl/rsa.h>
#include "IServer.h"
#if USE_SSL
#include <openssl/rsa.h>
#endif
namespace DB
{
@ -17,6 +17,7 @@ private:
IServer & server;
Poco::Logger * log;
#if USE_SSL
struct RSADeleter
{
void operator()(RSA * ptr) { RSA_free(ptr); }
@ -27,6 +28,9 @@ private:
RSAPtr private_key;
bool ssl_enabled = true;
#else
bool ssl_enabled = false;
#endif
std::atomic<size_t> last_connection_id = 0;
public:
@ -40,4 +44,3 @@ public:
};
}
#endif

View File

@ -44,7 +44,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
if (db.second->getEngineName() == "Lazy")
continue;
for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next())
for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next())
{
auto & table = iterator->table();
StorageReplicatedMergeTree * table_replicated = dynamic_cast<StorageReplicatedMergeTree *>(table.get());

View File

@ -37,10 +37,12 @@
#include <Interpreters/AsynchronousMetrics.h>
#include <Interpreters/DDLWorker.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Interpreters/ExternalModelsLoader.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/DNSCacheUpdater.h>
#include <Interpreters/SystemLog.cpp>
#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/System/attachSystemTables.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
@ -55,7 +57,7 @@
#include "TCPHandlerFactory.h"
#include "Common/config_version.h"
#include <Common/SensitiveDataMasker.h>
#include "MySQLHandlerFactory.h"
#if defined(OS_LINUX)
#include <Common/hasLinuxCapability.h>
@ -63,7 +65,6 @@
#endif
#if USE_POCO_NETSSL
#include "MySQLHandlerFactory.h"
#include <Poco/Net/Context.h>
#include <Poco/Net/SecureServerSocket.h>
#endif
@ -920,6 +921,12 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->tryCreateEmbeddedDictionaries();
global_context->getExternalDictionariesLoader().enableAlwaysLoadEverything(true);
}
auto dictionaries_repository = std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "dictionaries_config");
global_context->getExternalDictionariesLoader().addConfigRepository("", std::move(dictionaries_repository));
auto models_repository = std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "models_config");
global_context->getExternalModelsLoader().addConfigRepository("", std::move(models_repository));
}
catch (...)
{

View File

@ -530,7 +530,8 @@ void TCPHandler::processOrdinaryQuery()
sendLogs();
}
sendData(block);
if (!block || !state.io.null_format)
sendData(block);
if (!block)
break;
}
@ -850,9 +851,10 @@ bool TCPHandler::receivePacket()
return true;
case Protocol::Client::Data:
case Protocol::Client::Scalar:
if (state.empty())
receiveUnexpectedData();
return receiveData();
return receiveData(packet_type == Protocol::Client::Scalar);
case Protocol::Client::Ping:
writeVarUInt(Protocol::Server::Pong, *out);
@ -957,39 +959,44 @@ void TCPHandler::receiveUnexpectedQuery()
throw NetException("Unexpected packet Query received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
}
bool TCPHandler::receiveData()
bool TCPHandler::receiveData(bool scalar)
{
initBlockInput();
/// The name of the temporary table for writing data, default to empty string
String external_table_name;
readStringBinary(external_table_name, *in);
String name;
readStringBinary(name, *in);
/// Read one block from the network and write it down
Block block = state.block_in->read();
if (block)
{
/// If there is an insert request, then the data should be written directly to `state.io.out`.
/// Otherwise, we write the blocks in the temporary `external_table_name` table.
if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input)
{
StoragePtr storage;
/// If such a table does not exist, create it.
if (!(storage = query_context->tryGetExternalTable(external_table_name)))
{
NamesAndTypesList columns = block.getNamesAndTypesList();
storage = StorageMemory::create("_external", external_table_name, ColumnsDescription{columns}, ConstraintsDescription{});
storage->startup();
query_context->addExternalTable(external_table_name, storage);
}
/// The data will be written directly to the table.
state.io.out = storage->write(ASTPtr(), *query_context);
}
if (state.need_receive_data_for_input)
state.block_for_input = block;
if (scalar)
query_context->addScalar(name, block);
else
state.io.out->write(block);
{
/// If there is an insert request, then the data should be written directly to `state.io.out`.
/// Otherwise, we write the blocks in the temporary `external_table_name` table.
if (!state.need_receive_data_for_insert && !state.need_receive_data_for_input)
{
StoragePtr storage;
/// If such a table does not exist, create it.
if (!(storage = query_context->tryGetExternalTable(name)))
{
NamesAndTypesList columns = block.getNamesAndTypesList();
storage = StorageMemory::create("_external", name, ColumnsDescription{columns}, ConstraintsDescription{});
storage->startup();
query_context->addExternalTable(name, storage);
}
/// The data will be written directly to the table.
state.io.out = storage->write(ASTPtr(), *query_context);
}
if (state.need_receive_data_for_input)
state.block_for_input = block;
else
state.io.out->write(block);
}
return true;
}
else

View File

@ -153,7 +153,7 @@ private:
void receiveHello();
bool receivePacket();
void receiveQuery();
bool receiveData();
bool receiveData(bool scalar);
bool readDataNext(const size_t & poll_interval, const int & receive_timeout);
void readData(const Settings & global_settings);
std::tuple<size_t, int> getReadTimeouts(const Settings & global_settings);

View File

@ -180,7 +180,21 @@
<port>9000</port>
</replica>
</shard>
</test_cluster_two_shards_localhost>
</test_cluster_two_shards_localhost>
<test_cluster_two_shards>
<shard>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
</shard>
</test_cluster_two_shards>
<test_shard_localhost_secure>
<shard>
<replica>

View File

@ -1,4 +1,4 @@
#include <Interpreters/SettingsConstraints.h>
#include <Access/SettingsConstraints.h>
#include <Core/Settings.h>
#include <Common/FieldVisitors.h>
#include <IO/WriteHelpers.h>
@ -29,22 +29,118 @@ void SettingsConstraints::clear()
}
void SettingsConstraints::setReadOnly(const String & name, bool read_only)
void SettingsConstraints::setMinValue(const StringRef & name, const Field & min_value)
{
size_t setting_index = Settings::findIndexStrict(name);
getConstraintRef(setting_index).min_value = Settings::valueToCorrespondingType(setting_index, min_value);
}
Field SettingsConstraints::getMinValue(const StringRef & name) const
{
size_t setting_index = Settings::findIndexStrict(name);
const auto * ptr = tryGetConstraint(setting_index);
if (ptr)
return ptr->min_value;
else
return {};
}
void SettingsConstraints::setMaxValue(const StringRef & name, const Field & max_value)
{
size_t setting_index = Settings::findIndexStrict(name);
getConstraintRef(setting_index).max_value = Settings::valueToCorrespondingType(setting_index, max_value);
}
Field SettingsConstraints::getMaxValue(const StringRef & name) const
{
size_t setting_index = Settings::findIndexStrict(name);
const auto * ptr = tryGetConstraint(setting_index);
if (ptr)
return ptr->max_value;
else
return {};
}
void SettingsConstraints::setReadOnly(const StringRef & name, bool read_only)
{
size_t setting_index = Settings::findIndexStrict(name);
getConstraintRef(setting_index).read_only = read_only;
}
void SettingsConstraints::setMinValue(const String & name, const Field & min_value)
bool SettingsConstraints::isReadOnly(const StringRef & name) const
{
size_t setting_index = Settings::findIndexStrict(name);
getConstraintRef(setting_index).min_value = Settings::castValueWithoutApplying(setting_index, min_value);
const auto * ptr = tryGetConstraint(setting_index);
if (ptr)
return ptr->read_only;
else
return false;
}
void SettingsConstraints::setMaxValue(const String & name, const Field & max_value)
void SettingsConstraints::set(const StringRef & name, const Field & min_value, const Field & max_value, bool read_only)
{
size_t setting_index = Settings::findIndexStrict(name);
getConstraintRef(setting_index).max_value = Settings::castValueWithoutApplying(setting_index, max_value);
auto & ref = getConstraintRef(setting_index);
ref.min_value = min_value;
ref.max_value = max_value;
ref.read_only = read_only;
}
void SettingsConstraints::get(const StringRef & name, Field & min_value, Field & max_value, bool & read_only) const
{
size_t setting_index = Settings::findIndexStrict(name);
const auto * ptr = tryGetConstraint(setting_index);
if (ptr)
{
min_value = ptr->min_value;
max_value = ptr->max_value;
read_only = ptr->read_only;
}
else
{
min_value = Field{};
max_value = Field{};
read_only = false;
}
}
void SettingsConstraints::merge(const SettingsConstraints & other)
{
for (const auto & [setting_index, other_constraint] : other.constraints_by_index)
{
auto & constraint = constraints_by_index[setting_index];
if (!other_constraint.min_value.isNull())
constraint.min_value = other_constraint.min_value;
if (!other_constraint.max_value.isNull())
constraint.max_value = other_constraint.max_value;
if (other_constraint.read_only)
constraint.read_only = true;
}
}
SettingsConstraints::Infos SettingsConstraints::getInfo() const
{
Infos result;
result.reserve(constraints_by_index.size());
for (const auto & [setting_index, constraint] : constraints_by_index)
{
result.emplace_back();
Info & info = result.back();
info.name = Settings::getName(setting_index);
info.min = constraint.min_value;
info.max = constraint.max_value;
info.read_only = constraint.read_only;
}
return result;
}
@ -55,7 +151,7 @@ void SettingsConstraints::check(const Settings & current_settings, const Setting
if (setting_index == Settings::npos)
return;
Field new_value = Settings::castValueWithoutApplying(setting_index, change.value);
Field new_value = Settings::valueToCorrespondingType(setting_index, change.value);
Field current_value = current_settings.get(setting_index);
/// Setting isn't checked if value wasn't changed.
@ -159,4 +255,15 @@ void SettingsConstraints::loadFromConfig(const String & path_to_constraints, con
}
}
bool SettingsConstraints::Constraint::operator==(const Constraint & rhs) const
{
return (read_only == rhs.read_only) && (min_value == rhs.min_value) && (max_value == rhs.max_value);
}
bool operator ==(const SettingsConstraints & lhs, const SettingsConstraints & rhs)
{
return lhs.constraints_by_index == rhs.constraints_by_index;
}
}

View File

@ -58,10 +58,32 @@ public:
~SettingsConstraints();
void clear();
bool empty() const { return constraints_by_index.empty(); }
void setMinValue(const String & name, const Field & min_value);
void setMaxValue(const String & name, const Field & max_value);
void setReadOnly(const String & name, bool read_only);
void setMinValue(const StringRef & name, const Field & min_value);
Field getMinValue(const StringRef & name) const;
void setMaxValue(const StringRef & name, const Field & max_value);
Field getMaxValue(const StringRef & name) const;
void setReadOnly(const StringRef & name, bool read_only);
bool isReadOnly(const StringRef & name) const;
void set(const StringRef & name, const Field & min_value, const Field & max_value, bool read_only);
void get(const StringRef & name, Field & min_value, Field & max_value, bool & read_only) const;
void merge(const SettingsConstraints & other);
struct Info
{
StringRef name;
Field min;
Field max;
bool read_only = false;
};
using Infos = std::vector<Info>;
Infos getInfo() const;
void check(const Settings & current_settings, const SettingChange & change) const;
void check(const Settings & current_settings, const SettingsChanges & changes) const;
@ -74,12 +96,18 @@ public:
/// Loads the constraints from configuration file, at "path" prefix in configuration.
void loadFromConfig(const String & path, const Poco::Util::AbstractConfiguration & config);
friend bool operator ==(const SettingsConstraints & lhs, const SettingsConstraints & rhs);
friend bool operator !=(const SettingsConstraints & lhs, const SettingsConstraints & rhs) { return !(lhs == rhs); }
private:
struct Constraint
{
bool read_only = false;
Field min_value;
Field max_value;
bool operator ==(const Constraint & rhs) const;
bool operator !=(const Constraint & rhs) const { return !(*this == rhs); }
};
Constraint & getConstraintRef(size_t index);

View File

@ -31,9 +31,9 @@ struct AggregateFunctionAvgData
if constexpr (std::numeric_limits<ResultT>::is_iec559)
return static_cast<ResultT>(sum) / count; /// allow division by zero
if (!count)
throw Exception("AggregateFunctionAvg with zero values", ErrorCodes::LOGICAL_ERROR);
return static_cast<ResultT>(sum) / count;
if (count == 0)
return static_cast<ResultT>(0);
return static_cast<ResultT>(sum / count);
}
};
@ -43,10 +43,10 @@ template <typename T, typename Data>
class AggregateFunctionAvg final : public IAggregateFunctionDataHelper<Data, AggregateFunctionAvg<T, Data>>
{
public:
using ResultType = std::conditional_t<IsDecimalNumber<T>, Decimal128, Float64>;
using ResultDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<Decimal128>, DataTypeNumber<Float64>>;
using ResultType = std::conditional_t<IsDecimalNumber<T>, T, Float64>;
using ResultDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<T>, DataTypeNumber<Float64>>;
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>, ColumnVector<Float64>>;
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<Float64>>;
/// ctor for native types
AggregateFunctionAvg(const DataTypes & argument_types_)

View File

@ -64,6 +64,12 @@ public:
}
const char * getHeaderFilePath() const override { return __FILE__; }
/// Reset the state to specified value. This function is not the part of common interface.
void set(AggregateDataPtr place, UInt64 new_count)
{
data(place).count = new_count;
}
};

View File

@ -55,7 +55,7 @@ struct EntropyData
void merge(const EntropyData & rhs)
{
for (const auto & pair : rhs.map)
map[pair.getFirst()] += pair.getSecond();
map[pair.getKey()] += pair.getMapped();
}
void serialize(WriteBuffer & buf) const
@ -77,12 +77,12 @@ struct EntropyData
{
UInt64 total_value = 0;
for (const auto & pair : map)
total_value += pair.getSecond();
total_value += pair.getMapped();
Float64 shannon_entropy = 0;
for (const auto & pair : map)
{
Float64 frequency = Float64(pair.getSecond()) / total_value;
Float64 frequency = Float64(pair.getMapped()) / total_value;
shannon_entropy -= frequency * log2(frequency);
}

View File

@ -90,6 +90,10 @@ public:
{
Data & data_lhs = this->data(place);
const Data & data_rhs = this->data(rhs);
if (!data_rhs.doneFirst)
return;
if (!data_lhs.doneFirst)
{
data_lhs.doneFirst = true;

View File

@ -581,6 +581,23 @@ public:
return max_val;
}
/**
* Replace value
*/
void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num)
{
if (isSmall())
toLarge();
for (size_t i = 0; i < num; ++i)
{
if (from_vals[i] == to_vals[i])
continue;
bool changed = roaring_bitmap_remove_checked(rb, from_vals[i]);
if (changed)
roaring_bitmap_add(rb, to_vals[i]);
}
}
private:
/// To read and write the DB Buffer directly, migrate code from CRoaring
void db_roaring_bitmap_add_many(DB::ReadBuffer & dbBuf, roaring_bitmap_t * r, size_t n_args)

View File

@ -673,15 +673,15 @@ struct AggregateFunctionAnyHeavyData : Data
};
template <typename Data, bool AllocatesMemoryInArena>
class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, AllocatesMemoryInArena>>
template <typename Data, bool use_arena>
class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, use_arena>>
{
private:
DataTypePtr & type;
public:
AggregateFunctionsSingleValue(const DataTypePtr & type_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, AllocatesMemoryInArena>>({type_}, {})
: IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, use_arena>>({type_}, {})
, type(this->argument_types[0])
{
if (StringRef(Data::name()) == StringRef("min")
@ -722,7 +722,7 @@ public:
bool allocatesMemoryInArena() const override
{
return AllocatesMemoryInArena;
return use_arena;
}
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override

View File

@ -16,11 +16,11 @@ namespace ErrorCodes
namespace
{
template <typename Value, bool FloatReturn> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<FloatReturn, Float64, void>, false>;
template <typename Value, bool FloatReturn> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<FloatReturn, Float64, void>, true>;
template <typename Value, bool float_return> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<float_return, Float64, void>, false>;
template <typename Value, bool float_return> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<float_return, Float64, void>, true>;
template <typename Value, bool FloatReturn> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<FloatReturn, Float64, void>, false>;
template <typename Value, bool FloatReturn> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<FloatReturn, Float64, void>, true>;
template <typename Value, bool float_return> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<float_return, Float64, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<float_return, Float64, void>, true>;
template <typename Value, bool _> using FuncQuantileExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantileExact, false, void, false>;
template <typename Value, bool _> using FuncQuantilesExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantilesExact, false, void, true>;
@ -40,11 +40,11 @@ template <typename Value, bool _> using FuncQuantilesTiming = AggregateFunctionQ
template <typename Value, bool _> using FuncQuantileTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTimingWeighted, true, Float32, false>;
template <typename Value, bool _> using FuncQuantilesTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTimingWeighted, true, Float32, true>;
template <typename Value, bool FloatReturn> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<FloatReturn, Float32, void>, false>;
template <typename Value, bool FloatReturn> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<FloatReturn, Float32, void>, true>;
template <typename Value, bool float_return> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<float_return, Float32, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<float_return, Float32, void>, true>;
template <typename Value, bool FloatReturn> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<FloatReturn, Float32, void>, false>;
template <typename Value, bool FloatReturn> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<FloatReturn, Float32, void>, true>;
template <typename Value, bool float_return> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, true>;
template <template <typename, bool> class Function>

View File

@ -58,7 +58,7 @@ struct QuantileExactWeighted
void merge(const QuantileExactWeighted & rhs)
{
for (const auto & pair : rhs.map)
map[pair.getFirst()] += pair.getSecond();
map[pair.getKey()] += pair.getMapped();
}
void serialize(WriteBuffer & buf) const
@ -93,7 +93,7 @@ struct QuantileExactWeighted
UInt64 sum_weight = 0;
for (const auto & pair : map)
{
sum_weight += pair.getSecond();
sum_weight += pair.getMapped();
array[i] = pair.getValue();
++i;
}
@ -143,7 +143,7 @@ struct QuantileExactWeighted
UInt64 sum_weight = 0;
for (const auto & pair : map)
{
sum_weight += pair.getSecond();
sum_weight += pair.getMapped();
array[i] = pair.getValue();
++i;
}

View File

@ -31,7 +31,7 @@ namespace ReservoirSamplerOnEmpty
};
}
template <typename ResultType, bool IsFloatingPoint>
template <typename ResultType, bool is_float>
struct NanLikeValueConstructor
{
static ResultType getValue()
@ -109,8 +109,11 @@ public:
double quantileInterpolated(double level)
{
if (samples.empty())
{
if (DB::IsDecimalNumber<T>)
return 0;
return onEmpty<double>();
}
sortIfNeeded();
double index = std::max(0., std::min(samples.size() - 1., level * (samples.size() - 1)));

View File

@ -30,6 +30,7 @@
namespace CurrentMetrics
{
extern const Metric SendScalars;
extern const Metric SendExternalTables;
}
@ -441,7 +442,7 @@ void Connection::sendCancel()
}
void Connection::sendData(const Block & block, const String & name)
void Connection::sendData(const Block & block, const String & name, bool scalar)
{
//LOG_TRACE(log_wrapper.get(), "Sending data");
@ -455,7 +456,10 @@ void Connection::sendData(const Block & block, const String & name)
block_out = std::make_shared<NativeBlockOutputStream>(*maybe_compressed_out, server_revision, block.cloneEmpty());
}
writeVarUInt(Protocol::Client::Data, *out);
if (scalar)
writeVarUInt(Protocol::Client::Scalar, *out);
else
writeVarUInt(Protocol::Client::Data, *out);
writeStringBinary(name, *out);
size_t prev_bytes = out->count();
@ -484,6 +488,44 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String
}
void Connection::sendScalarsData(Scalars & data)
{
if (data.empty())
return;
Stopwatch watch;
size_t out_bytes = out ? out->count() : 0;
size_t maybe_compressed_out_bytes = maybe_compressed_out ? maybe_compressed_out->count() : 0;
size_t rows = 0;
CurrentMetrics::Increment metric_increment{CurrentMetrics::SendScalars};
for (auto & elem : data)
{
rows += elem.second.rows();
sendData(elem.second, elem.first, true /* scalar */);
}
out_bytes = out->count() - out_bytes;
maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes;
double elapsed = watch.elapsedSeconds();
std::stringstream msg;
msg << std::fixed << std::setprecision(3);
msg << "Sent data for " << data.size() << " scalars, total " << rows << " rows in " << elapsed << " sec., "
<< static_cast<size_t>(rows / watch.elapsedSeconds()) << " rows/sec., "
<< maybe_compressed_out_bytes / 1048576.0 << " MiB (" << maybe_compressed_out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
if (compression == Protocol::Compression::Enable)
msg << ", compressed " << static_cast<double>(maybe_compressed_out_bytes) / out_bytes << " times to "
<< out_bytes / 1048576.0 << " MiB (" << out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
else
msg << ", no compression.";
LOG_DEBUG(log_wrapper.get(), msg.rdbuf());
}
void Connection::sendExternalTablesData(ExternalTablesData & data)
{
if (data.empty())

View File

@ -133,7 +133,9 @@ public:
void sendCancel();
/// Send block of data; if name is specified, server will write it to external (temporary) table of that name.
void sendData(const Block & block, const String & name = "");
void sendData(const Block & block, const String & name = "", bool scalar = false);
/// Send all scalars.
void sendScalarsData(Scalars & data);
/// Send all contents of external (temporary) tables.
void sendExternalTablesData(ExternalTablesData & data);

View File

@ -51,6 +51,21 @@ MultiplexedConnections::MultiplexedConnections(
active_connection_count = connections.size();
}
void MultiplexedConnections::sendScalarsData(Scalars & data)
{
std::lock_guard lock(cancel_mutex);
if (!sent_query)
throw Exception("Cannot send scalars data: query not yet sent.", ErrorCodes::LOGICAL_ERROR);
for (ReplicaState & state : replica_states)
{
Connection * connection = state.connection;
if (connection != nullptr)
connection->sendScalarsData(data);
}
}
void MultiplexedConnections::sendExternalTablesData(std::vector<ExternalTablesData> & data)
{
std::lock_guard lock(cancel_mutex);

View File

@ -27,6 +27,8 @@ public:
std::vector<IConnectionPool::Entry> && connections,
const Settings & settings_, const ThrottlerPtr & throttler_);
/// Send all scalars to replicas.
void sendScalarsData(Scalars & data);
/// Send all content of external tables to replicas.
void sendExternalTablesData(std::vector<ExternalTablesData> & data);

View File

@ -35,7 +35,7 @@ namespace
data.resize(hash_map.size());
for (const auto & val : hash_map)
data[val.getSecond()] = val.getFirst();
data[val.getMapped()] = val.getKey();
for (auto & ind : index)
ind = hash_map[ind];

View File

@ -1,6 +1,7 @@
#pragma once
#include <Columns/IColumnDummy.h>
#include <Core/Field.h>
namespace DB
@ -28,6 +29,9 @@ public:
ConstSetPtr getData() const { return data; }
// Used only for debugging, making it DUMPABLE
Field operator[](size_t) const override { return {}; }
private:
ConstSetPtr data;
};

View File

@ -112,7 +112,7 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
else
{
/// A case for radix sort
if constexpr (std::is_arithmetic_v<T> && !std::is_same_v<T, UInt128>)
if constexpr (is_arithmetic_v<T> && !std::is_same_v<T, UInt128>)
{
/// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
if (s >= 256 && s <= std::numeric_limits<UInt32>::max())

View File

@ -359,7 +359,7 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
if constexpr (has_mapped)
{
auto & mapped = *lookupResultGetMapped(it);
auto & mapped = it->getMapped();
if (inserted)
{
new (&mapped) Mapped();

View File

@ -174,13 +174,13 @@ protected:
[[maybe_unused]] Mapped * cached = nullptr;
if constexpr (has_mapped)
cached = lookupResultGetMapped(it);
cached = &it->getMapped();
if (inserted)
{
if constexpr (has_mapped)
{
new(lookupResultGetMapped(it)) Mapped();
new (&it->getMapped()) Mapped();
}
}
@ -191,18 +191,18 @@ protected:
if constexpr (has_mapped)
{
cache.value.first = *lookupResultGetKey(it);
cache.value.second = *lookupResultGetMapped(it);
cache.value.first = it->getKey();
cache.value.second = it->getMapped();
cached = &cache.value.second;
}
else
{
cache.value = *lookupResultGetKey(it);
cache.value = it->getKey();
}
}
if constexpr (has_mapped)
return EmplaceResult(*lookupResultGetMapped(it), *cached, inserted);
return EmplaceResult(it->getMapped(), *cached, inserted);
else
return EmplaceResult(inserted);
}
@ -233,7 +233,7 @@ protected:
cache.value.first = key;
if (it)
{
cache.value.second = *lookupResultGetMapped(it);
cache.value.second = it->getMapped();
}
}
else
@ -243,7 +243,7 @@ protected:
}
if constexpr (has_mapped)
return FindResult(it ? lookupResultGetMapped(it) : nullptr, it != nullptr);
return FindResult(it ? &it->getMapped() : nullptr, it != nullptr);
else
return FindResult(it != nullptr);
}

View File

@ -21,6 +21,7 @@
M(OpenFileForWrite, "Number of files open for writing") \
M(Read, "Number of read (read, pread, io_getevents, etc.) syscalls in fly") \
M(Write, "Number of write (write, pwrite, io_getevents, etc.) syscalls in fly") \
M(SendScalars, "Number of connections that are sending data for scalars to remote servers.") \
M(SendExternalTables, "Number of connections that are sending data for external tables to remote servers. External tables are used to implement GLOBAL IN and GLOBAL JOIN operators with distributed subqueries.") \
M(QueryThread, "Number of query processing threads") \
M(ReadonlyReplica, "Number of Replicated tables that are currently in readonly state due to re-initialization after ZooKeeper session loss or due to startup without ZooKeeper configured.") \

View File

@ -193,7 +193,7 @@ DiskSelector::DiskSelector(const Poco::Util::AbstractConfiguration & config, con
if (has_space_ratio)
{
auto ratio = config.getDouble(config_prefix + ".keep_free_space_ratio");
auto ratio = config.getDouble(disk_config_prefix + ".keep_free_space_ratio");
if (ratio < 0 || ratio > 1)
throw Exception("'keep_free_space_ratio' have to be between 0 and 1",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
@ -292,7 +292,7 @@ Volume::Volume(
formatReadableSizeWithBinarySuffix(max_data_part_size) << ")");
}
constexpr UInt64 MIN_PART_SIZE = 8u * 1024u * 1024u;
if (max_data_part_size < MIN_PART_SIZE)
if (max_data_part_size != 0 && max_data_part_size < MIN_PART_SIZE)
LOG_WARNING(logger, "Volume " << backQuote(name) << " max_data_part_size is too low ("
<< formatReadableSizeWithBinarySuffix(max_data_part_size) << " < "
<< formatReadableSizeWithBinarySuffix(MIN_PART_SIZE) << ")");

View File

@ -155,7 +155,7 @@ namespace ErrorCodes
extern const int NOT_FOUND_FUNCTION_ELEMENT_FOR_AGGREGATE = 147;
extern const int NOT_FOUND_RELATION_ELEMENT_FOR_CONDITION = 148;
extern const int NOT_FOUND_RHS_ELEMENT_FOR_CONDITION = 149;
extern const int NO_ATTRIBUTES_LISTED = 150;
extern const int EMPTY_LIST_OF_ATTRIBUTES_PASSED = 150;
extern const int INDEX_OF_COLUMN_IN_SORT_CLAUSE_IS_OUT_OF_RANGE = 151;
extern const int UNKNOWN_DIRECTION_OF_SORTING = 152;
extern const int ILLEGAL_DIVISION = 153;
@ -361,7 +361,7 @@ namespace ErrorCodes
extern const int PART_IS_TEMPORARILY_LOCKED = 384;
extern const int MULTIPLE_STREAMS_REQUIRED = 385;
extern const int NO_COMMON_TYPE = 386;
extern const int EXTERNAL_LOADABLE_ALREADY_EXISTS = 387;
extern const int DICTIONARY_ALREADY_EXISTS = 387;
extern const int CANNOT_ASSIGN_OPTIMIZE = 388;
extern const int INSERT_WAS_DEDUPLICATED = 389;
extern const int CANNOT_GET_CREATE_TABLE_QUERY = 390;
@ -459,6 +459,11 @@ namespace ErrorCodes
extern const int DICTIONARY_ACCESS_DENIED = 482;
extern const int TOO_MANY_REDIRECTS = 483;
extern const int INTERNAL_REDIS_ERROR = 484;
extern const int SCALAR_ALREADY_EXISTS = 485;
extern const int UNKNOWN_SCALAR = 486;
extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY = 487;
extern const int UNKNOWN_DICTIONARY = 488;
extern const int INCORRECT_DICTIONARY_DEFINITION = 489;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -14,12 +14,6 @@ struct ClearableHashMapCell : public ClearableHashTableCell<Key, HashMapCell<Key
: Base::BaseCell(value_, state), Base::version(state.version) {}
};
template<typename Key, typename Mapped, typename Hash>
ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashMapCell<Key, Mapped, Hash> * cell) { return &cell->getFirst(); }
template<typename Key, typename Mapped, typename Hash>
ALWAYS_INLINE inline auto lookupResultGetMapped(ClearableHashMapCell<Key, Mapped, Hash> * cell) { return &cell->getSecond(); }
template
<
typename Key,
@ -31,20 +25,16 @@ template
class ClearableHashMap : public HashTable<Key, ClearableHashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>
{
public:
using key_type = Key;
using mapped_type = Mapped;
using value_type = typename ClearableHashMap::cell_type::value_type;
mapped_type & operator[](Key x)
Mapped & operator[](const Key & x)
{
typename ClearableHashMap::LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new(lookupResultGetMapped(it)) mapped_type();
new (&it->getMapped()) Mapped();
return *lookupResultGetMapped(it);
return it->getMapped();
}
void clear()

View File

@ -48,12 +48,6 @@ struct ClearableHashTableCell : public BaseCell
ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {}
};
template<typename Key, typename BaseCell>
ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashTableCell<Key, BaseCell> * cell) { return &cell->key; }
template<typename Key, typename BaseCell>
ALWAYS_INLINE inline void * lookupResultGetMapped(ClearableHashTableCell<Key, BaseCell> *) { return nullptr; }
template
<
typename Key,
@ -64,9 +58,6 @@ template
class ClearableHashSet : public HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>
{
public:
using key_type = Key;
using value_type = typename ClearableHashSet::cell_type::value_type;
using Base = HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>;
using typename Base::LookupResult;
@ -87,9 +78,6 @@ template
class ClearableHashSetWithSavedHash: public HashTable<Key, ClearableHashTableCell<Key, HashSetCellWithSavedHash<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>
{
public:
using key_type = Key;
using value_type = typename ClearableHashSetWithSavedHash::cell_type::value_type;
void clear()
{
++this->version;

View File

@ -11,6 +11,8 @@ struct FixedClearableHashMapCell
using State = ClearableHashSetState;
using value_type = PairNoInit<Key, Mapped>;
using mapped_type = Mapped;
UInt32 version;
Mapped mapped;
@ -18,11 +20,12 @@ struct FixedClearableHashMapCell
FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {}
FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {}
Mapped & getSecond() { return mapped; }
const Mapped & getSecond() const { return mapped; }
const VoidKey getKey() const { return {}; }
Mapped & getMapped() { return mapped; }
const Mapped & getMapped() const { return mapped; }
bool isZero(const State & state) const { return version != state.version; }
void setZero() { version = 0; }
static constexpr bool need_zero_value_storage = false;
struct CellExt
{
@ -35,32 +38,33 @@ struct FixedClearableHashMapCell
}
Key key;
FixedClearableHashMapCell * ptr;
const Key & getFirst() const { return key; }
Mapped & getSecond() { return ptr->mapped; }
const Mapped & getSecond() const { return *ptr->mapped; }
const Key & getKey() const { return key; }
Mapped & getMapped() { return ptr->mapped; }
const Mapped & getMapped() const { return *ptr->mapped; }
const value_type getValue() const { return {key, *ptr->mapped}; }
};
};
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
class FixedClearableHashMap : public FixedHashMap<Key, FixedClearableHashMapCell<Key, Mapped>, Allocator>
class FixedClearableHashMap : public FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>
{
public:
using key_type = Key;
using mapped_type = Mapped;
using value_type = typename FixedClearableHashMap::cell_type::value_type;
using Base = FixedHashMap<Key, Mapped, FixedClearableHashMapCell<Key, Mapped>, Allocator>;
using Self = FixedClearableHashMap;
using LookupResult = typename Base::LookupResult;
mapped_type & operator[](Key x)
using Base::Base;
Mapped & operator[](const Key & x)
{
typename FixedClearableHashMap::iterator it;
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new (&it->second) mapped_type();
new (&it->getMapped()) Mapped();
return it->second;
return it->getMapped();
}
void clear()

View File

@ -10,19 +10,23 @@ struct FixedClearableHashTableCell
using State = ClearableHashSetState;
using value_type = Key;
using mapped_type = void;
using mapped_type = VoidMapped;
UInt32 version;
FixedClearableHashTableCell() {}
FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {}
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
bool isZero(const State & state) const { return version != state.version; }
void setZero() { version = 0; }
static constexpr bool need_zero_value_storage = false;
struct CellExt
{
Key key;
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
const value_type & getValue() const { return key; }
void update(Key && key_, FixedClearableHashTableCell *) { key = key_; }
};
@ -34,8 +38,6 @@ class FixedClearableHashSet : public FixedHashTable<Key, FixedClearableHashTable
{
public:
using Base = FixedHashTable<Key, FixedClearableHashTableCell<Key>, Allocator>;
using key_type = Key;
using value_type = typename FixedClearableHashSet::cell_type::value_type;
using LookupResult = typename Base::LookupResult;
void clear()

View File

@ -13,18 +13,19 @@ struct FixedHashMapCell
using value_type = PairNoInit<Key, Mapped>;
using mapped_type = TMapped;
Mapped mapped;
bool full;
Mapped mapped;
FixedHashMapCell() {}
FixedHashMapCell(const Key &, const State &) : full(true) {}
FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
Mapped & getSecond() { return mapped; }
const Mapped & getSecond() const { return mapped; }
const VoidKey getKey() const { return {}; }
Mapped & getMapped() { return mapped; }
const Mapped & getMapped() const { return mapped; }
bool isZero(const State &) const { return !full; }
void setZero() { full = false; }
static constexpr bool need_zero_value_storage = false;
/// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
/// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
@ -40,36 +41,23 @@ struct FixedHashMapCell
Key key;
FixedHashMapCell * ptr;
const Key & getFirst() const { return key; }
Mapped & getSecond() { return ptr->mapped; }
const Mapped & getSecond() const { return ptr->mapped; }
const Key & getKey() const { return key; }
Mapped & getMapped() { return ptr->mapped; }
const Mapped & getMapped() const { return ptr->mapped; }
const value_type getValue() const { return {key, ptr->mapped}; }
};
};
template<typename Key, typename Mapped, typename State>
ALWAYS_INLINE inline void * lookupResultGetKey(FixedHashMapCell<Key, Mapped, State> *)
{ return nullptr; }
template<typename Key, typename Mapped, typename State>
ALWAYS_INLINE inline auto lookupResultGetMapped(FixedHashMapCell<Key, Mapped, State> * cell)
{ return &cell->getSecond(); }
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
class FixedHashMap : public FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>
template <typename Key, typename Mapped, typename Cell = FixedHashMapCell<Key, Mapped>, typename Allocator = HashTableAllocator>
class FixedHashMap : public FixedHashTable<Key, Cell, Allocator>
{
public:
using Base = FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>;
using Base = FixedHashTable<Key, Cell, Allocator>;
using Self = FixedHashMap;
using key_type = Key;
using Cell = typename Base::cell_type;
using value_type = typename Cell::value_type;
using mapped_type = typename Cell::Mapped;
using LookupResult = typename Base::LookupResult;
using Base::Base;
using LookupResult = typename Base::LookupResult;
template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{
@ -77,8 +65,8 @@ public:
{
typename Self::LookupResult res_it;
bool inserted;
that.emplace(it->getFirst(), res_it, inserted, it.getHash());
func(*lookupResultGetMapped(res_it), it->getSecond(), inserted);
that.emplace(it->getKey(), res_it, inserted, it.getHash());
func(res_it->getMapped(), it->getMapped(), inserted);
}
}
@ -87,11 +75,11 @@ public:
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
auto res_it = that.find(it->getFirst(), it.getHash());
auto res_it = that.find(it->getKey(), it.getHash());
if (!res_it)
func(it->getSecond(), it->getSecond(), false);
func(it->getMapped(), it->getMapped(), false);
else
func(*lookupResultGetMapped(res_it), it->getSecond(), true);
func(res_it->getMapped(), it->getMapped(), true);
}
}
@ -99,24 +87,24 @@ public:
void forEachValue(Func && func)
{
for (auto & v : *this)
func(v.getFirst(), v.getSecond());
func(v.getKey(), v.getMapped());
}
template <typename Func>
void forEachMapped(Func && func)
{
for (auto & v : *this)
func(v.getSecond());
func(v.getMapped());
}
mapped_type & ALWAYS_INLINE operator[](Key x)
Mapped & ALWAYS_INLINE operator[](const Key & x)
{
typename Base::LookupResult it;
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new (it) mapped_type();
new (&it->getMapped()) Mapped();
return it;
return it->getMapped();
}
};

View File

@ -6,14 +6,15 @@ template <typename Key, typename Allocator = HashTableAllocator>
class FixedHashSet : public FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>
{
public:
using Base = FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>;
using Cell = FixedHashTableCell<Key>;
using Base = FixedHashTable<Key, Cell, Allocator>;
using Self = FixedHashSet;
void merge(const Self & rhs)
{
for (size_t i = 0; i < Base::BUFFER_SIZE; ++i)
if (Base::buf[i].isZero(*this) && !rhs.buf[i].isZero(*this))
Base::buf[i] = rhs.buf[i];
new (&Base::buf[i]) Cell(rhs.buf[i]);
}
/// NOTE: Currently this method isn't used. When it does, the ReadBuffer should

View File

@ -8,12 +8,15 @@ struct FixedHashTableCell
using State = TState;
using value_type = Key;
using mapped_type = void;
using mapped_type = VoidMapped;
bool full;
FixedHashTableCell() {}
FixedHashTableCell(const Key &, const State &) : full(true) {}
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
bool isZero(const State &) const { return !full; }
void setZero() { full = false; }
static constexpr bool need_zero_value_storage = false;
@ -28,6 +31,8 @@ struct FixedHashTableCell
{
Key key;
const VoidKey getKey() const { return {}; }
VoidMapped getMapped() const { return {}; }
const value_type & getValue() const { return key; }
void update(Key && key_, FixedHashTableCell *) { key = key_; }
};
@ -53,7 +58,7 @@ struct FixedHashTableCell
template <typename Key, typename Cell, typename Allocator>
class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State
{
static constexpr size_t BUFFER_SIZE = 1ULL << (sizeof(Key) * 8);
static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
protected:
friend class const_iterator;
@ -61,12 +66,11 @@ protected:
friend class Reader;
using Self = FixedHashTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements
Cell * buf; /// A piece of memory for all elements except the element with zero key.
Cell * buf; /// A piece of memory for all elements.
void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(BUFFER_SIZE * sizeof(Cell))); }
void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); }
void free()
{
@ -111,7 +115,7 @@ protected:
++ptr;
/// Skip empty cells in the main buffer.
auto buf_end = container->buf + container->BUFFER_SIZE;
auto buf_end = container->buf + container->NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*container))
++ptr;
@ -140,8 +144,9 @@ protected:
public:
using key_type = Key;
using value_type = typename Cell::value_type;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
using LookupResult = Cell *;
using ConstLookupResult = const Cell *;
@ -239,7 +244,7 @@ public:
return end();
const Cell * ptr = buf;
auto buf_end = buf + BUFFER_SIZE;
auto buf_end = buf + NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*this))
++ptr;
@ -254,21 +259,21 @@ public:
return end();
Cell * ptr = buf;
auto buf_end = buf + BUFFER_SIZE;
auto buf_end = buf + NUM_CELLS;
while (ptr < buf_end && ptr->isZero(*this))
++ptr;
return iterator(this, ptr);
}
const_iterator end() const { return const_iterator(this, buf + BUFFER_SIZE); }
const_iterator end() const { return const_iterator(this, buf + NUM_CELLS); }
const_iterator cend() const { return end(); }
iterator end() { return iterator(this, buf + BUFFER_SIZE); }
iterator end() { return iterator(this, buf + NUM_CELLS); }
public:
/// The last parameter is unused but exists for compatibility with HashTable interface.
void ALWAYS_INLINE emplace(Key x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0)
{
it = &buf[x];
@ -288,40 +293,31 @@ public:
std::pair<LookupResult, bool> res;
emplace(Cell::getKey(x), res.first, res.second);
if (res.second)
insertSetMapped(lookupResultGetMapped(res.first), x);
insertSetMapped(res.first->getMapped(), x);
return res;
}
LookupResult ALWAYS_INLINE find(Key x)
{
return !buf[x].isZero(*this) ? &buf[x] : nullptr;
}
LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; }
ConstLookupResult ALWAYS_INLINE find(Key x) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x);
}
ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); }
LookupResult ALWAYS_INLINE find(Key, size_t hash_value)
{
return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr;
}
LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; }
ConstLookupResult ALWAYS_INLINE find(Key key, size_t hash_value) const
ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value);
}
bool ALWAYS_INLINE has(Key x) const { return !buf[x].isZero(*this); }
bool ALWAYS_INLINE has(Key, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
void write(DB::WriteBuffer & wb) const
{
Cell::State::write(wb);
DB::writeVarUInt(m_size, wb);
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
if (!ptr->isZero(*this))
{
DB::writeVarUInt(ptr - buf);
@ -334,7 +330,7 @@ public:
Cell::State::writeText(wb);
DB::writeText(m_size, wb);
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr)
{
if (!ptr->isZero(*this))
{
@ -393,7 +389,7 @@ public:
destroyElements();
m_size = 0;
memset(static_cast<void *>(buf), 0, BUFFER_SIZE * sizeof(*buf));
memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf));
}
/// After executing this function, the table can only be destroyed,
@ -405,9 +401,9 @@ public:
free();
}
size_t getBufferSizeInBytes() const { return BUFFER_SIZE * sizeof(Cell); }
size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); }
size_t getBufferSizeInCells() const { return BUFFER_SIZE; }
size_t getBufferSizeInCells() const { return NUM_CELLS; }
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
size_t getCollisions() const { return 0; }

View File

@ -76,7 +76,7 @@ template <typename T, typename Enable = void>
struct DefaultHash;
template <typename T>
struct DefaultHash<T, std::enable_if_t<std::is_arithmetic_v<T>>>
struct DefaultHash<T, std::enable_if_t<is_arithmetic_v<T>>>
{
size_t operator() (T key) const
{

View File

@ -52,12 +52,13 @@ struct HashMapCell
HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
HashMapCell(const value_type & value_, const State &) : value(value_) {}
const Key & getFirst() const { return value.first; }
Mapped & getSecond() { return value.second; }
const Mapped & getSecond() const { return value.second; }
/// Get the key (externally).
const Key & getKey() const { return value.first; }
Mapped & getMapped() { return value.second; }
const Mapped & getMapped() const { return value.second; }
const value_type & getValue() const { return value; }
/// Get the key (internally).
static const Key & getKey(const value_type & value) { return value.first; }
bool keyEquals(const Key & key_) const { return value.first == key_; }
@ -110,15 +111,6 @@ struct HashMapCell
}
};
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCell<Key, Mapped, Hash, State> * cell)
{ return &cell->getFirst(); }
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCell<Key, Mapped, Hash, State> * cell)
{ return &cell->getSecond(); }
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
{
@ -136,15 +128,6 @@ struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
};
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCellWithSavedHash<Key, Mapped, Hash, State> * cell)
{ return &cell->getFirst(); }
template<typename Key, typename Mapped, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCellWithSavedHash<Key, Mapped, Hash, State> * cell)
{ return &cell->getSecond(); }
template <
typename Key,
typename Cell,
@ -156,14 +139,9 @@ class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
public:
using Self = HashMapTable;
using Base = HashTable<Key, Cell, Hash, Grower, Allocator>;
using key_type = Key;
using value_type = typename Cell::value_type;
using mapped_type = typename Cell::Mapped;
using LookupResult = typename Base::LookupResult;
using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
using Base::Base;
/// Merge every cell's value of current map into the destination map via emplace.
/// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
@ -178,8 +156,8 @@ public:
{
typename Self::LookupResult res_it;
bool inserted;
that.emplace(it->getFirst(), res_it, inserted, it.getHash());
func(*lookupResultGetMapped(res_it), it->getSecond(), inserted);
that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash());
func(res_it->getMapped(), it->getMapped(), inserted);
}
}
@ -193,11 +171,11 @@ public:
{
for (auto it = this->begin(), end = this->end(); it != end; ++it)
{
auto res_it = that.find(it->getFirst(), it.getHash());
auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash());
if (!res_it)
func(it->getSecond(), it->getSecond(), false);
func(it->getMapped(), it->getMapped(), false);
else
func(*lookupResultGetMapped(res_it), it->getSecond(), true);
func(res_it->getMapped(), it->getMapped(), true);
}
}
@ -206,7 +184,7 @@ public:
void forEachValue(Func && func)
{
for (auto & v : *this)
func(v.getFirst(), v.getSecond());
func(v.getKey(), v.getMapped());
}
/// Call func(Mapped &) for each hash map element.
@ -214,12 +192,12 @@ public:
void forEachMapped(Func && func)
{
for (auto & v : *this)
func(v.getSecond());
func(v.getMapped());
}
mapped_type & ALWAYS_INLINE operator[](Key x)
typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
{
typename HashMapTable::LookupResult it;
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
@ -238,9 +216,9 @@ public:
* the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
*/
if (inserted)
new(lookupResultGetMapped(it)) mapped_type();
new (&it->getMapped()) typename Cell::Mapped();
return *lookupResultGetMapped(it);
return it->getMapped();
}
};

View File

@ -84,14 +84,6 @@ struct HashSetCellWithSavedHash : public HashTableCell<Key, Hash, TState>
size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; }
};
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashSetCellWithSavedHash<Key, Hash, State> * cell)
{ return &cell->key; }
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline void * lookupResultGetMapped(HashSetCellWithSavedHash<Key, Hash, State> *)
{ return nullptr; }
template
<
typename Key,

View File

@ -78,66 +78,48 @@ void set(T & x) { x = 0; }
}
/**
* lookupResultGetKey/Mapped -- functions to get key/"mapped" values from the
* LookupResult returned by find() and emplace() methods of HashTable.
* Must not be called for a null LookupResult.
* getKey/Mapped -- methods to get key/"mapped" values from the LookupResult returned by find() and
* emplace() methods of HashTable. Must not be called for a null LookupResult.
*
* We don't use iterators for lookup result to avoid creating temporary
* objects. Instead, LookupResult is a pointer of some kind. There are global
* functions lookupResultGetKey/Mapped, overloaded for this pointer type, that
* return pointers to key/"mapped" values. They are implemented as global
* functions and not as methods, because they have to be overloaded for POD
* types, e.g. in StringHashTable where different components have different
* Cell format.
* We don't use iterators for lookup result. Instead, LookupResult is a pointer of some kind. There
* are methods getKey/Mapped, that return references or values to key/"mapped" values.
*
* Different hash table implementations support this interface to a varying
* degree:
* Different hash table implementations support this interface to a varying degree:
*
* 1) Hash tables that store neither the key in its original form, nor a
* "mapped" value: FixedHashTable or StringHashTable.
* Neither GetKey nor GetMapped are supported, the only valid operation is
* checking LookupResult for null.
* 1) Hash tables that store neither the key in its original form, nor a "mapped" value:
* FixedHashTable or StringHashTable. Neither GetKey nor GetMapped are supported, the only valid
* operation is checking LookupResult for null.
*
* 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap.
* Only GetMapped is supported.
* 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap. Only GetMapped is
* supported.
*
* 3) Hash tables that store the key and do not have a "mapped" value, e.g. the
* normal HashTable.
* GetKey returns the key, and GetMapped returns a zero void pointer. This
* simplifies generic code that works with mapped values: it can overload
* on the return type of GetMapped(), and doesn't need other parameters. One
* example is insertSetMapped() function.
* 3) Hash tables that store the key and do not have a "mapped" value, e.g. the normal HashTable.
* GetKey returns the key, and GetMapped returns a zero void pointer. This simplifies generic
* code that works with mapped values: it can overload on the return type of GetMapped(), and
* doesn't need other parameters. One example is insertSetMapped() function.
*
* 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap.
* Both GetKey and GetMapped are supported.
* 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap. Both GetKey and
* GetMapped are supported.
*
* The implementation side goes as follows:
* for (1), LookupResult = void *, no getters;
* for (2), LookupResult = Mapped *, GetMapped is a default implementation that
* takes any pointer-like object;
* for (3) and (4), LookupResult = Cell *, and both getters are implemented.
* They have to be specialized for each particular Cell class to supersede the
* default verision that takes a generic pointer-like object.
*
* for (1), LookupResult->getKey = const VoidKey, LookupResult->getMapped = VoidMapped;
*
* for (2), LookupResult->getKey = const VoidKey, LookupResult->getMapped = Mapped &;
*
* for (3) and (4), LookupResult->getKey = const Key [&], LookupResult->getMapped = Mapped &;
* VoidKey and VoidMapped may have specialized function overloads for generic code.
*/
/**
* The default implementation of GetMapped that is used for the above case (2).
*/
template<typename PointerLike>
ALWAYS_INLINE inline auto lookupResultGetMapped(PointerLike && ptr) { return &*ptr; }
/**
* Generic const wrapper for lookupResultGetMapped, that calls a non-const
* version. Should be safe, given that these functions only do pointer
* arithmetics.
*/
template<typename T>
ALWAYS_INLINE inline auto lookupResultGetMapped(const T * obj)
struct VoidKey {};
struct VoidMapped
{
auto mapped_ptr = lookupResultGetMapped(const_cast<T *>(obj));
const auto const_mapped_ptr = mapped_ptr;
return const_mapped_ptr;
}
template <typename T>
auto & operator=(const T &)
{
return *this;
}
};
/** Compile-time interface for cell of the hash table.
* Different cell types are used to implement different hash tables.
@ -152,7 +134,7 @@ struct HashTableCell
using key_type = Key;
using value_type = Key;
using mapped_type = void;
using mapped_type = VoidMapped;
Key key;
@ -161,10 +143,12 @@ struct HashTableCell
/// Create a cell with the given key / key and value.
HashTableCell(const Key & key_, const State &) : key(key_) {}
/// Get what the value_type of the container will be.
/// Get the key (externally).
const Key & getKey() const { return key; }
VoidMapped getMapped() const { return {}; }
const value_type & getValue() const { return key; }
/// Get the key.
/// Get the key (internally).
static const Key & getKey(const value_type & value) { return value; }
/// Are the keys at the cells equal?
@ -207,23 +191,15 @@ struct HashTableCell
void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); }
};
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline auto lookupResultGetKey(HashTableCell<Key, Hash, State> * cell)
{ return &cell->key; }
template<typename Key, typename Hash, typename State>
ALWAYS_INLINE inline void * lookupResultGetMapped(HashTableCell<Key, Hash, State> *)
{ return nullptr; }
/**
* A helper function for HashTable::insert() to set the "mapped" value.
* Overloaded on the mapped type, does nothing if it's void.
* Overloaded on the mapped type, does nothing if it's VoidMapped.
*/
template <typename ValueType>
void insertSetMapped(void * /* dest */, const ValueType & /* src */) {}
void insertSetMapped(VoidMapped /* dest */, const ValueType & /* src */) {}
template <typename MappedType, typename ValueType>
void insertSetMapped(MappedType * dest, const ValueType & src) { *dest = src.second; }
void insertSetMapped(MappedType & dest, const ValueType & src) { dest = src.second; }
/** Determines the size of the hash table, and when and how much it should be resized.
@ -276,7 +252,7 @@ struct HashTableGrower
/** When used as a Grower, it turns a hash table into something like a lookup table.
* It remains non-optimal - the cells store the keys.
* Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
* TODO Make a proper lookup table.
* NOTE: Better to use FixedHashTable instead.
*/
template <size_t key_bits>
struct HashTableFixedGrower
@ -358,9 +334,14 @@ protected:
template <typename, typename, typename, typename, typename, typename, size_t>
friend class TwoLevelHashTable;
template <typename, typename, size_t>
friend class TwoLevelStringHashTable;
template <typename SubMaps>
friend class StringHashTable;
using HashValue = size_t;
using Self = HashTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements
Cell * buf; /// A piece of memory for all elements except the element with zero key.
@ -580,9 +561,10 @@ protected:
public:
using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
// Use lookupResultGetMapped/Key to work with these values.
using LookupResult = Cell *;
using ConstLookupResult = const Cell *;
@ -745,7 +727,7 @@ protected:
/// If the key is zero, insert it into a special place and return true.
/// We don't have to persist a zero key, because it's not actually inserted.
/// That's why we just take a Key by value, an not a key holder.
bool ALWAYS_INLINE emplaceIfZero(Key x, LookupResult & it, bool & inserted, size_t hash_value)
bool ALWAYS_INLINE emplaceIfZero(const Key & x, LookupResult & it, bool & inserted, size_t hash_value)
{
/// If it is claimed that the zero key can not be inserted into the table.
if (!Cell::need_zero_value_storage)
@ -787,7 +769,7 @@ protected:
keyHolderPersistKey(key_holder);
const auto & key = keyHolderGetKey(key_holder);
new(&buf[place_value]) Cell(key, *this);
new (&buf[place_value]) Cell(key, *this);
buf[place_value].setHash(hash_value);
inserted = true;
++m_size;
@ -840,7 +822,7 @@ public:
}
if (res.second)
insertSetMapped(lookupResultGetMapped(res.first), x);
insertSetMapped(res.first->getMapped(), x);
return res;
}
@ -863,11 +845,11 @@ public:
*
* Example usage:
*
* Map::iterator it;
* Map::LookupResult it;
* bool inserted;
* map.emplace(key, it, inserted);
* if (inserted)
* new(&it->second) Mapped(value);
* new (&it->getMapped()) Mapped(value);
*/
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
@ -897,7 +879,7 @@ public:
resize();
}
LookupResult ALWAYS_INLINE find(Key x)
LookupResult ALWAYS_INLINE find(const Key & x)
{
if (Cell::isZero(x, *this))
return this->hasZero() ? this->zeroValue() : nullptr;
@ -907,12 +889,12 @@ public:
return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
}
ConstLookupResult ALWAYS_INLINE find(Key x) const
ConstLookupResult ALWAYS_INLINE find(const Key & x) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x);
}
LookupResult ALWAYS_INLINE find(Key x, size_t hash_value)
LookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value)
{
if (Cell::isZero(x, *this))
return this->hasZero() ? this->zeroValue() : nullptr;
@ -921,7 +903,12 @@ public:
return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr;
}
bool ALWAYS_INLINE has(Key x) const
ConstLookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x, hash_value);
}
bool ALWAYS_INLINE has(const Key & x) const
{
if (Cell::isZero(x, *this))
return this->hasZero();
@ -932,7 +919,7 @@ public:
}
bool ALWAYS_INLINE has(Key x, size_t hash_value) const
bool ALWAYS_INLINE has(const Key & x, size_t hash_value) const
{
if (Cell::isZero(x, *this))
return this->hasZero();

View File

@ -38,7 +38,6 @@ protected:
friend class Reader;
using Self = SmallTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements.
Cell buf[capacity]; /// A piece of memory for all elements.
@ -72,8 +71,9 @@ protected:
public:
using key_type = Key;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
class Reader final : private Cell::State
{
@ -391,16 +391,17 @@ class SmallMapTable : public SmallTable<Key, Cell, capacity>
{
public:
using key_type = Key;
using mapped_type = typename Cell::Mapped;
using mapped_type = typename Cell::mapped_type;
using value_type = typename Cell::value_type;
using cell_type = Cell;
mapped_type & ALWAYS_INLINE operator[](Key x)
{
typename SmallMapTable::iterator it;
bool inserted;
this->emplace(x, it, inserted);
new(&it->getSecond()) mapped_type();
return it->getSecond();
new (&it->getMapped()) mapped_type();
return it->getMapped();
}
};

View File

@ -0,0 +1,182 @@
#pragma once
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTableAllocator.h>
#include <Common/HashTable/StringHashTable.h>
template <typename Key, typename TMapped>
struct StringHashMapCell : public HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCell<Key, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
// external
const StringRef getKey() const { return toStringRef(this->value.first); }
// internal
static const Key & getKey(const value_type & value_) { return value_.first; }
};
template <typename TMapped>
struct StringHashMapCell<StringKey16, TMapped> : public HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCell<StringKey16, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
// Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; }
void setZero() { this->value.first.low = 0; }
// external
const StringRef getKey() const { return toStringRef(this->value.first); }
// internal
static const StringKey16 & getKey(const value_type & value_) { return value_.first; }
};
template <typename TMapped>
struct StringHashMapCell<StringKey24, TMapped> : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); }
// Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method
static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; }
void setZero() { this->value.first.a = 0; }
// external
const StringRef getKey() const { return toStringRef(this->value.first); }
// internal
static const StringKey24 & getKey(const value_type & value_) { return value_.first; }
};
template <typename TMapped>
struct StringHashMapCell<StringRef, TMapped> : public HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>
{
using Base = HashMapCellWithSavedHash<StringRef, TMapped, StringHashTableHash, HashTableNoState>;
using value_type = typename Base::value_type;
using Base::Base;
static constexpr bool need_zero_value_storage = false;
// external
using Base::getKey;
// internal
static const StringRef & getKey(const value_type & value_) { return value_.first; }
};
template <typename TMapped, typename Allocator>
struct StringHashMapSubMaps
{
using T0 = StringHashTableEmpty<StringHashMapCell<StringRef, TMapped>>;
using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
using Ts = HashMapTable<StringRef, StringHashMapCell<StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>;
};
template <typename TMapped, typename Allocator = HashTableAllocator>
class StringHashMap : public StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>
{
public:
using Key = StringRef;
using Base = StringHashTable<StringHashMapSubMaps<TMapped, Allocator>>;
using Self = StringHashMap;
using LookupResult = typename Base::LookupResult;
using Base::Base;
/// Merge every cell's value of current map into the destination map.
/// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
/// Each filled cell in current map will invoke func once. If that map doesn't
/// have a key equals to the given cell, a new cell gets emplaced into that map,
/// and func is invoked with the third argument emplaced set to true. Otherwise
/// emplaced is set to false.
template <typename Func>
void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func)
{
if (this->m0.hasZero() && that.m0.hasZero())
func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
else if (this->m0.hasZero())
{
that.m0.setHasZero();
func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
}
this->m1.mergeToViaEmplace(that.m1, func);
this->m2.mergeToViaEmplace(that.m2, func);
this->m3.mergeToViaEmplace(that.m3, func);
this->ms.mergeToViaEmplace(that.ms, func);
}
/// Merge every cell's value of current map into the destination map via find.
/// Func should have signature void(Mapped & dst, Mapped & src, bool exist).
/// Each filled cell in current map will invoke func once. If that map doesn't
/// have a key equals to the given cell, func is invoked with the third argument
/// exist set to false. Otherwise exist is set to true.
template <typename Func>
void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func)
{
if (this->m0.size() && that.m0.size())
func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true);
else if (this->m0.size())
func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false);
this->m1.mergeToViaFind(that.m1, func);
this->m2.mergeToViaFind(that.m2, func);
this->m3.mergeToViaFind(that.m3, func);
this->ms.mergeToViaFind(that.ms, func);
}
TMapped & ALWAYS_INLINE operator[](const Key & x)
{
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new (&it->getMapped()) TMapped();
return it->getMapped();
}
template <typename Func>
void ALWAYS_INLINE forEachValue(Func && func)
{
if (this->m0.size())
{
func(StringRef{}, this->m0.zeroValue()->getMapped());
}
for (auto & v : this->m1)
{
func(v.getKey(), v.getMapped());
}
for (auto & v : this->m2)
{
func(v.getKey(), v.getMapped());
}
for (auto & v : this->m3)
{
func(v.getKey(), v.getMapped());
}
for (auto & v : this->ms)
{
func(v.getKey(), v.getMapped());
}
}
template <typename Func>
void ALWAYS_INLINE forEachMapped(Func && func)
{
if (this->m0.size())
func(this->m0.zeroValue()->getMapped());
for (auto & v : this->m1)
func(v.getMapped());
for (auto & v : this->m2)
func(v.getMapped());
for (auto & v : this->m3)
func(v.getMapped());
for (auto & v : this->ms)
func(v.getMapped());
}
};

View File

@ -0,0 +1,407 @@
#pragma once
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/HashTable.h>
#include <variant>
using StringKey8 = UInt64;
using StringKey16 = DB::UInt128;
struct StringKey24
{
UInt64 a;
UInt64 b;
UInt64 c;
bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; }
};
inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
{
return {reinterpret_cast<const char *>(&n), 8ul - (__builtin_clzll(n) >> 3)};
}
inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n)
{
return {reinterpret_cast<const char *>(&n), 16ul - (__builtin_clzll(n.high) >> 3)};
}
inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
{
return {reinterpret_cast<const char *>(&n), 24ul - (__builtin_clzll(n.c) >> 3)};
}
struct StringHashTableHash
{
#if defined(__SSE4_2__)
size_t ALWAYS_INLINE operator()(StringKey8 key) const
{
size_t res = -1ULL;
res = _mm_crc32_u64(res, key);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey16 key) const
{
size_t res = -1ULL;
res = _mm_crc32_u64(res, key.low);
res = _mm_crc32_u64(res, key.high);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey24 key) const
{
size_t res = -1ULL;
res = _mm_crc32_u64(res, key.a);
res = _mm_crc32_u64(res, key.b);
res = _mm_crc32_u64(res, key.c);
return res;
}
#else
size_t ALWAYS_INLINE operator()(StringKey8 key) const
{
return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 8);
}
size_t ALWAYS_INLINE operator()(StringKey16 key) const
{
return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 16);
}
size_t ALWAYS_INLINE operator()(StringKey24 key) const
{
return CityHash_v1_0_2::CityHash64(reinterpret_cast<const char *>(&key), 24);
}
#endif
size_t ALWAYS_INLINE operator()(StringRef key) const
{
return StringRefHash()(key);
}
};
template <typename Cell>
struct StringHashTableEmpty
{
using Self = StringHashTableEmpty;
bool has_zero = false;
std::aligned_storage_t<sizeof(Cell), alignof(Cell)> zero_value_storage; /// Storage of element with zero key.
public:
bool hasZero() const { return has_zero; }
void setHasZero()
{
has_zero = true;
new (zeroValue()) Cell();
}
void setHasZero(const Cell & other)
{
has_zero = true;
new (zeroValue()) Cell(other);
}
void clearHasZero()
{
has_zero = false;
if (!std::is_trivially_destructible_v<Cell>)
zeroValue()->~Cell();
}
Cell * zeroValue() { return reinterpret_cast<Cell *>(&zero_value_storage); }
const Cell * zeroValue() const { return reinterpret_cast<const Cell *>(&zero_value_storage); }
using LookupResult = Cell *;
using ConstLookupResult = const Cell *;
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0)
{
if (!hasZero())
{
setHasZero();
inserted = true;
}
else
inserted = false;
it = zeroValue();
}
template <typename Key>
LookupResult ALWAYS_INLINE find(const Key &, size_t = 0)
{
return hasZero() ? zeroValue() : nullptr;
}
template <typename Key>
ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const
{
return hasZero() ? zeroValue() : nullptr;
}
void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); }
void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); }
void read(DB::ReadBuffer & rb) { zeroValue()->read(rb); }
void readText(DB::ReadBuffer & rb) { zeroValue()->readText(rb); }
size_t size() const { return hasZero() ? 1 : 0; }
bool empty() const { return !hasZero(); }
size_t getBufferSizeInBytes() const { return sizeof(Cell); }
size_t getCollisions() const { return 0; }
};
template <size_t initial_size_degree = 8>
struct StringHashTableGrower : public HashTableGrower<initial_size_degree>
{
// Smooth growing for string maps
void increaseSize() { this->size_degree += 1; }
};
template <typename Mapped>
struct StringHashTableLookupResult
{
Mapped * mapped_ptr;
StringHashTableLookupResult() {}
StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {}
StringHashTableLookupResult(std::nullptr_t) {}
const VoidKey getKey() const { return {}; }
auto & getMapped() { return *mapped_ptr; }
auto & operator*() { return *this; }
auto & operator*() const { return *this; }
auto * operator->() { return this; }
auto * operator->() const { return this; }
operator bool() const { return mapped_ptr; }
friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; }
friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; }
friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; }
friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; }
};
template <typename SubMaps>
class StringHashTable : private boost::noncopyable
{
protected:
static constexpr size_t NUM_MAPS = 5;
// Map for storing empty string
using T0 = typename SubMaps::T0;
// Short strings are stored as numbers
using T1 = typename SubMaps::T1;
using T2 = typename SubMaps::T2;
using T3 = typename SubMaps::T3;
// Long strings are stored as StringRef along with saved hash
using Ts = typename SubMaps::Ts;
using Self = StringHashTable;
template <typename, typename, size_t>
friend class TwoLevelStringHashTable;
T0 m0;
T1 m1;
T2 m2;
T3 m3;
Ts ms;
public:
using Key = StringRef;
using key_type = Key;
using mapped_type = typename Ts::mapped_type;
using value_type = typename Ts::value_type;
using cell_type = typename Ts::cell_type;
using LookupResult = StringHashTableLookupResult<typename cell_type::mapped_type>;
using ConstLookupResult = StringHashTableLookupResult<const typename cell_type::mapped_type>;
StringHashTable() {}
StringHashTable(size_t reserve_for_num_elements)
: m1{reserve_for_num_elements / 4}
, m2{reserve_for_num_elements / 4}
, m3{reserve_for_num_elements / 4}
, ms{reserve_for_num_elements / 4}
{
}
StringHashTable(StringHashTable && rhs) { *this = std::move(rhs); }
~StringHashTable() {}
public:
// Dispatch is written in a way that maximizes the performance:
// 1. Always memcpy 8 times bytes
// 2. Use switch case extension to generate fast dispatching table
// 3. Funcs are named callables that can be force_inlined
// NOTE: It relies on Little Endianness
template <typename Self, typename KeyHolder, typename Func>
static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
{
const StringRef & x = keyHolderGetKey(key_holder);
const size_t sz = x.size;
if (sz == 0)
{
keyHolderDiscardKey(key_holder);
return func(self.m0, VoidKey{}, 0);
}
const char * p = x.data;
// pending bits that needs to be shifted out
const char s = (-sz & 7) * 8;
union
{
StringKey8 k8;
StringKey16 k16;
StringKey24 k24;
UInt64 n[3];
};
StringHashTableHash hash;
switch ((sz - 1) >> 3)
{
case 0: // 1..8 bytes
{
// first half page
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
{
memcpy(&n[0], p, 8);
n[0] &= -1ul >> s;
}
else
{
const char * lp = x.data + x.size - 8;
memcpy(&n[0], lp, 8);
n[0] >>= s;
}
keyHolderDiscardKey(key_holder);
return func(self.m1, k8, hash(k8));
}
case 1: // 9..16 bytes
{
memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8);
n[1] >>= s;
keyHolderDiscardKey(key_holder);
return func(self.m2, k16, hash(k16));
}
case 2: // 17..24 bytes
{
memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8);
n[2] >>= s;
keyHolderDiscardKey(key_holder);
return func(self.m3, k24, hash(k24));
}
default: // >= 25 bytes
{
return func(self.ms, std::forward<KeyHolder>(key_holder), hash(x));
}
}
}
struct EmplaceCallable
{
LookupResult & mapped;
bool & inserted;
EmplaceCallable(LookupResult & mapped_, bool & inserted_)
: mapped(mapped_), inserted(inserted_) {}
template <typename Map, typename KeyHolder>
void ALWAYS_INLINE operator()(Map & map, KeyHolder && key_holder, size_t hash)
{
typename Map::LookupResult result;
map.emplace(key_holder, result, inserted, hash);
mapped = &result->getMapped();
}
};
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
{
this->dispatch(*this, key_holder, EmplaceCallable(it, inserted));
}
struct FindCallable
{
// find() doesn't need any key memory management, so we don't work with
// any key holders here, only with normal keys. The key type is still
// different for every subtable, this is why it is a template parameter.
template <typename Submap, typename SubmapKey>
auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash)
{
return &map.find(key, hash)->getMapped();
}
};
LookupResult ALWAYS_INLINE find(const Key & x)
{
return dispatch(*this, x, FindCallable{});
}
ConstLookupResult ALWAYS_INLINE find(const Key & x) const
{
return dispatch(*this, x, FindCallable{});
}
bool ALWAYS_INLINE has(const Key & x, size_t = 0) const
{
return dispatch(*this, x, FindCallable{}) != nullptr;
}
void write(DB::WriteBuffer & wb) const
{
m0.write(wb);
m1.write(wb);
m2.write(wb);
m3.write(wb);
ms.write(wb);
}
void writeText(DB::WriteBuffer & wb) const
{
m0.writeText(wb);
DB::writeChar(',', wb);
m1.writeText(wb);
DB::writeChar(',', wb);
m2.writeText(wb);
DB::writeChar(',', wb);
m3.writeText(wb);
DB::writeChar(',', wb);
ms.writeText(wb);
}
void read(DB::ReadBuffer & rb)
{
m0.read(rb);
m1.read(rb);
m2.read(rb);
m3.read(rb);
ms.read(rb);
}
void readText(DB::ReadBuffer & rb)
{
m0.readText(rb);
DB::assertChar(',', rb);
m1.readText(rb);
DB::assertChar(',', rb);
m2.readText(rb);
DB::assertChar(',', rb);
m3.readText(rb);
DB::assertChar(',', rb);
ms.readText(rb);
}
size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); }
bool empty() const { return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); }
size_t getBufferSizeInBytes() const
{
return m0.getBufferSizeInBytes() + m1.getBufferSizeInBytes() + m2.getBufferSizeInBytes() + m3.getBufferSizeInBytes()
+ ms.getBufferSizeInBytes();
}
void clearAndShrink()
{
m1.clearHasZero();
m1.clearAndShrink();
m2.clearAndShrink();
m3.clearAndShrink();
ms.clearAndShrink();
}
};

View File

@ -16,10 +16,6 @@ template
class TwoLevelHashMapTable : public TwoLevelHashTable<Key, Cell, Hash, Grower, Allocator, ImplTable<Key, Cell, Hash, Grower, Allocator>>
{
public:
using key_type = Key;
using mapped_type = typename Cell::Mapped;
using value_type = typename Cell::value_type;
using Impl = ImplTable<Key, Cell, Hash, Grower, Allocator>;
using LookupResult = typename Impl::LookupResult;
@ -32,16 +28,16 @@ public:
this->impls[i].forEachMapped(func);
}
mapped_type & ALWAYS_INLINE operator[](Key x)
typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x)
{
typename TwoLevelHashMapTable::LookupResult it;
LookupResult it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new(lookupResultGetMapped(it)) mapped_type();
new (&it->getMapped()) typename Cell::Mapped();
return *lookupResultGetMapped(it);
return it->getMapped();
}
};

View File

@ -82,7 +82,9 @@ protected:
public:
using key_type = typename Impl::key_type;
using mapped_type = typename Impl::mapped_type;
using value_type = typename Impl::value_type;
using cell_type = typename Impl::cell_type;
using LookupResult = typename Impl::LookupResult;
using ConstLookupResult = typename Impl::ConstLookupResult;
@ -217,7 +219,7 @@ public:
emplace(Cell::getKey(x), res.first, res.second, hash_value);
if (res.second)
insertSetMapped(lookupResultGetMapped(res.first), x);
insertSetMapped(res.first->getMapped(), x);
return res;
}

View File

@ -0,0 +1,33 @@
#pragma once
#include <Common/HashTable/StringHashMap.h>
#include <Common/HashTable/TwoLevelStringHashTable.h>
template <typename TMapped, typename Allocator = HashTableAllocator, template <typename...> typename ImplTable = StringHashMap>
class TwoLevelStringHashMap : public TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, ImplTable<TMapped, Allocator>>
{
public:
using Key = StringRef;
using Self = TwoLevelStringHashMap;
using Base = TwoLevelStringHashTable<StringHashMapSubMaps<TMapped, Allocator>, StringHashMap<TMapped, Allocator>>;
using LookupResult = typename Base::LookupResult;
using Base::Base;
template <typename Func>
void ALWAYS_INLINE forEachMapped(Func && func)
{
for (auto i = 0u; i < this->NUM_BUCKETS; ++i)
return this->impls[i].forEachMapped(func);
}
TMapped & ALWAYS_INLINE operator[](const Key & x)
{
bool inserted;
LookupResult it;
this->emplace(x, it, inserted);
if (inserted)
new (&it->getMapped()) TMapped();
return it->getMapped();
}
};

View File

@ -0,0 +1,225 @@
#pragma once
#include <Common/HashTable/StringHashTable.h>
template <typename SubMaps, typename ImplTable = StringHashTable<SubMaps>, size_t BITS_FOR_BUCKET = 8>
class TwoLevelStringHashTable : private boost::noncopyable
{
protected:
using HashValue = size_t;
using Self = TwoLevelStringHashTable;
public:
using Key = StringRef;
using Impl = ImplTable;
static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET;
static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
// TODO: currently hashing contains redundant computations when doing distributed or external aggregations
size_t hash(const Key & x) const
{
return const_cast<Self &>(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; });
}
size_t operator()(const Key & x) const { return hash(x); }
/// NOTE Bad for hash tables with more than 2^32 cells.
static size_t getBucketFromHash(size_t hash_value) { return (hash_value >> (32 - BITS_FOR_BUCKET)) & MAX_BUCKET; }
public:
using key_type = typename Impl::key_type;
using mapped_type = typename Impl::mapped_type;
using value_type = typename Impl::value_type;
using cell_type = typename Impl::cell_type;
using LookupResult = typename Impl::LookupResult;
using ConstLookupResult = typename Impl::ConstLookupResult;
Impl impls[NUM_BUCKETS];
TwoLevelStringHashTable() {}
template <typename Source>
TwoLevelStringHashTable(const Source & src)
{
if (src.m0.hasZero())
impls[0].m0.setHasZero(*src.m0.zeroValue());
for (auto & v : src.m1)
{
size_t hash_value = v.getHash(src.m1);
size_t buck = getBucketFromHash(hash_value);
impls[buck].m1.insertUniqueNonZero(&v, hash_value);
}
for (auto & v : src.m2)
{
size_t hash_value = v.getHash(src.m2);
size_t buck = getBucketFromHash(hash_value);
impls[buck].m2.insertUniqueNonZero(&v, hash_value);
}
for (auto & v : src.m3)
{
size_t hash_value = v.getHash(src.m3);
size_t buck = getBucketFromHash(hash_value);
impls[buck].m3.insertUniqueNonZero(&v, hash_value);
}
for (auto & v : src.ms)
{
size_t hash_value = v.getHash(src.ms);
size_t buck = getBucketFromHash(hash_value);
impls[buck].ms.insertUniqueNonZero(&v, hash_value);
}
}
// This function is mostly the same as StringHashTable::dispatch, but with
// added bucket computation. See the comments there.
template <typename Self, typename Func, typename KeyHolder>
static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func)
{
const StringRef & x = keyHolderGetKey(key_holder);
const size_t sz = x.size;
if (sz == 0)
{
keyHolderDiscardKey(key_holder);
return func(self.impls[0].m0, VoidKey{}, 0);
}
const char * p = x.data;
// pending bits that needs to be shifted out
const char s = (-sz & 7) * 8;
union
{
StringKey8 k8;
StringKey16 k16;
StringKey24 k24;
UInt64 n[3];
};
StringHashTableHash hash;
switch ((sz - 1) >> 3)
{
case 0:
{
// first half page
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
{
memcpy(&n[0], p, 8);
n[0] &= -1ul >> s;
}
else
{
const char * lp = x.data + x.size - 8;
memcpy(&n[0], lp, 8);
n[0] >>= s;
}
auto res = hash(k8);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
return func(self.impls[buck].m1, k8, res);
}
case 1:
{
memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8);
n[1] >>= s;
auto res = hash(k16);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
return func(self.impls[buck].m2, k16, res);
}
case 2:
{
memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8);
n[2] >>= s;
auto res = hash(k24);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
return func(self.impls[buck].m3, k24, res);
}
default:
{
auto res = hash(x);
auto buck = getBucketFromHash(res);
return func(self.impls[buck].ms, std::forward<KeyHolder>(key_holder), res);
}
}
}
template <typename KeyHolder>
void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted)
{
dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted});
}
LookupResult ALWAYS_INLINE find(const Key x)
{
return dispatch(*this, x, typename Impl::FindCallable{});
}
ConstLookupResult ALWAYS_INLINE find(const Key x) const
{
return dispatch(*this, x, typename Impl::FindCallable{});
}
void write(DB::WriteBuffer & wb) const
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
impls[i].write(wb);
}
void writeText(DB::WriteBuffer & wb) const
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
{
if (i != 0)
DB::writeChar(',', wb);
impls[i].writeText(wb);
}
}
void read(DB::ReadBuffer & rb)
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
impls[i].read(rb);
}
void readText(DB::ReadBuffer & rb)
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
{
if (i != 0)
DB::assertChar(',', rb);
impls[i].readText(rb);
}
}
size_t size() const
{
size_t res = 0;
for (size_t i = 0; i < NUM_BUCKETS; ++i)
res += impls[i].size();
return res;
}
bool empty() const
{
for (size_t i = 0; i < NUM_BUCKETS; ++i)
if (!impls[i].empty())
return false;
return true;
}
size_t getBufferSizeInBytes() const
{
size_t res = 0;
for (size_t i = 0; i < NUM_BUCKETS; ++i)
res += impls[i].getBufferSizeInBytes();
return res;
}
};

View File

@ -1,6 +1,7 @@
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/Macros.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
namespace DB
@ -66,7 +67,9 @@ String Macros::expand(const String & s, size_t level, const String & database_na
else if (macro_name == "table" && !table_name.empty())
res += table_name;
else
throw Exception("No macro " + macro_name + " in config", ErrorCodes::SYNTAX_ERROR);
throw Exception("No macro '" + macro_name +
"' in config while processing substitutions in '" + s + "' at "
+ toString(begin), ErrorCodes::SYNTAX_ERROR);
pos = end + 1;
}

View File

@ -165,12 +165,10 @@ struct RadixSortIntTraits
template <typename T>
using RadixSortNumTraits =
std::conditional_t<std::is_integral_v<T>,
std::conditional_t<std::is_unsigned_v<T>,
RadixSortUIntTraits<T>,
RadixSortIntTraits<T>>,
RadixSortFloatTraits<T>>;
using RadixSortNumTraits = std::conditional_t<
is_integral_v<T>,
std::conditional_t<is_unsigned_v<T>, RadixSortUIntTraits<T>, RadixSortIntTraits<T>>,
RadixSortFloatTraits<T>>;
template <typename Traits>

View File

@ -10,6 +10,9 @@ struct SettingChange
{
String name;
Field value;
friend bool operator ==(const SettingChange & lhs, const SettingChange & rhs) { return (lhs.name == rhs.name) && (lhs.value == rhs.value); }
friend bool operator !=(const SettingChange & lhs, const SettingChange & rhs) { return !(lhs == rhs); }
};
using SettingsChanges = std::vector<SettingChange>;

View File

@ -369,7 +369,7 @@ private:
if (!it)
return nullptr;
return *lookupResultGetMapped(it);
return it->getMapped();
}
void rebuildCounterMap()

View File

@ -30,7 +30,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext
else
error << "Address: " << info.si_addr;
#if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__)
#if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__) && !defined(__arm__)
auto err_mask = context.uc_mcontext.gregs[REG_ERR];
if ((err_mask & 0x02))
error << " Access: write.";

View File

@ -182,18 +182,8 @@ struct UInt256HashCRC32
struct UInt256HashCRC32 : public UInt256Hash {};
#endif
}
/// Overload hash for type casting
namespace std
{
template <> struct hash<DB::UInt128>
{
size_t operator()(const DB::UInt128 & u) const
{
return CityHash_v1_0_2::Hash128to64({u.low, u.high});
}
};
}
template <> struct is_signed<DB::UInt128>
{
@ -215,4 +205,16 @@ template <> struct is_arithmetic<DB::UInt128>
{
static constexpr bool value = false;
};
/// Overload hash for type casting
namespace std
{
template <> struct hash<DB::UInt128>
{
size_t operator()(const DB::UInt128 & u) const
{
return CityHash_v1_0_2::Hash128to64({u.low, u.high});
}
};
}

View File

@ -68,7 +68,7 @@ protected:
public:
using Configuration = Poco::Util::AbstractConfiguration;
Context & context;
const Context & context;
const Configuration & config;
static constexpr inline auto DEFAULT_HOST = "localhost";
@ -79,7 +79,7 @@ public:
static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote";
static constexpr inline auto PING_OK_ANSWER = "Ok.";
XDBCBridgeHelper(Context & global_context_, const Poco::Timespan & http_timeout_, const std::string & connection_string_)
XDBCBridgeHelper(const Context & global_context_, const Poco::Timespan & http_timeout_, const std::string & connection_string_)
: http_timeout(http_timeout_), connection_string(connection_string_), context(global_context_), config(context.getConfigRef())
{
size_t bridge_port = config.getUInt(BridgeHelperMixin::configPrefix() + ".port", DEFAULT_PORT);

View File

@ -155,10 +155,10 @@ int main(int argc, char ** argv)
map.emplace(rand(), it, inserted);
if (inserted)
{
new(lookupResultGetMapped(it)) Arr(n);
new (&it->getMapped()) Arr(n);
for (size_t j = 0; j < n; ++j)
(*lookupResultGetMapped(it))[j] = field;
(it->getMapped())[j] = field;
}
}

View File

@ -31,7 +31,7 @@ void setAffinity()
static inline ALWAYS_INLINE UInt64 rdtsc()
{
#if __x86_64__
#if defined(__x86_64__)
UInt32 a, d;
__asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
return static_cast<UInt64>(a) | (static_cast<UInt64>(d) << 32);
@ -109,7 +109,7 @@ static inline size_t murmurMix(UInt64 x)
}
#if __x86_64__
#if defined(__x86_64__)
static inline size_t crc32Hash(UInt64 x)
{
UInt64 crc = -1ULL;
@ -309,7 +309,7 @@ int main(int argc, char ** argv)
if (!method || method == 8) test<mulShift> (n, data.data(), "7: mulShift");
if (!method || method == 9) test<tabulation>(n, data.data(), "8: tabulation");
#if __x86_64__
#if defined(__x86_64__)
if (!method || method == 10) test<crc32Hash> (n, data.data(), "9: crc32");
#endif

View File

@ -82,14 +82,14 @@ void aggregate12(Map & map, Source::const_iterator begin, Source::const_iterator
{
if (prev_it != end && *it == *prev_it)
{
++*lookupResultGetMapped(found);
++found->getMapped();
continue;
}
prev_it = it;
bool inserted;
map.emplace(*it, found, inserted);
++*lookupResultGetMapped(found);
++found->getMapped();
}
}
@ -107,14 +107,14 @@ void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source::const_
{
if (*it == *prev_it)
{
++*lookupResultGetMapped(found);
++found->getMapped();
continue;
}
prev_it = it;
bool inserted;
map.emplace(*it, found, inserted);
++*lookupResultGetMapped(found);
++found->getMapped();
}
}
@ -126,7 +126,7 @@ void merge2(MapTwoLevel * maps, size_t num_threads, size_t bucket)
{
for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].impls[bucket].begin(); it != maps[i].impls[bucket].end(); ++it)
maps[0].impls[bucket][it->getFirst()] += it->getSecond();
maps[0].impls[bucket][it->getKey()] += it->getMapped();
}
void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_iterator begin, Source::const_iterator end)
@ -138,7 +138,7 @@ void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_
auto found = local_map.find(*it);
if (found)
++*lookupResultGetMapped(found);
++found->getMapped();
else if (local_map.size() < threshold)
++local_map[*it]; /// TODO You could do one lookup, not two.
else
@ -163,13 +163,13 @@ void aggregate33(Map & local_map, Map & global_map, Mutex & mutex, Source::const
Map::LookupResult found;
bool inserted;
local_map.emplace(*it, found, inserted);
++*lookupResultGetMapped(found);
++found->getMapped();
if (inserted && local_map.size() == threshold)
{
std::lock_guard<Mutex> lock(mutex);
for (auto & value_type : local_map)
global_map[value_type.getFirst()] += value_type.getSecond();
global_map[value_type.getKey()] += value_type.getMapped();
local_map.clear();
}
@ -198,7 +198,7 @@ void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexes, Sour
auto found = local_map.find(*it);
if (found)
++*lookupResultGetMapped(found);
++found->getMapped();
else
{
size_t hash_value = global_map.hash(*it);
@ -311,7 +311,7 @@ int main(int argc, char ** argv)
for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
maps[0][it->getFirst()] += it->getSecond();
maps[0][it->getKey()] += it->getMapped();
watch.stop();
double time_merged = watch.elapsedSeconds();
@ -365,7 +365,7 @@ int main(int argc, char ** argv)
for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
maps[0][it->getFirst()] += it->getSecond();
maps[0][it->getKey()] += it->getMapped();
watch.stop();
@ -435,7 +435,7 @@ int main(int argc, char ** argv)
continue;
finish = false;
maps[0][iterators[i]->getFirst()] += iterators[i]->getSecond();
maps[0][iterators[i]->getKey()] += iterators[i]->getMapped();
++iterators[i];
}
@ -623,7 +623,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->getFirst()] += it->getSecond();
global_map[it->getKey()] += it->getMapped();
pool.wait();
@ -689,7 +689,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->getFirst()] += it->getSecond();
global_map[it->getKey()] += it->getMapped();
pool.wait();
@ -760,7 +760,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->getFirst()] += it->getSecond();
global_map[it->getKey()] += it->getMapped();
pool.wait();

View File

@ -51,9 +51,9 @@ struct AggregateIndependent
map.emplace(*it, place, inserted);
if (inserted)
creator(*lookupResultGetMapped(place));
creator(place->getMapped());
else
updater(*lookupResultGetMapped(place));
updater(place->getMapped());
}
});
}
@ -93,7 +93,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
{
if (it != begin && *it == prev_key)
{
updater(*lookupResultGetMapped(place));
updater(place->getMapped());
continue;
}
prev_key = *it;
@ -102,9 +102,9 @@ struct AggregateIndependentWithSequentialKeysOptimization
map.emplace(*it, place, inserted);
if (inserted)
creator(*lookupResultGetMapped(place));
creator(place->getMapped());
else
updater(*lookupResultGetMapped(place));
updater(place->getMapped());
}
});
}
@ -131,7 +131,7 @@ struct MergeSequential
auto begin = source_maps[i]->begin();
auto end = source_maps[i]->end();
for (auto it = begin; it != end; ++it)
merger((*source_maps[0])[it->getFirst()], it->getSecond());
merger((*source_maps[0])[it->getKey()], it->getMapped());
}
result_map = source_maps[0];
@ -161,7 +161,7 @@ struct MergeSequentialTransposed /// In practice not better than usual.
continue;
finish = false;
merger((*result_map)[iterators[i]->getFirst()], iterators[i]->getSecond());
merger((*result_map)[iterators[i]->getKey()], iterators[i]->getMapped());
++iterators[i];
}

View File

@ -42,7 +42,7 @@ int main(int, char **)
cont[1] = "Goodbye.";
for (auto x : cont)
std::cerr << x.getFirst() << " -> " << x.getSecond() << std::endl;
std::cerr << x.getKey() << " -> " << x.getMapped() << std::endl;
DB::WriteBufferFromOwnString wb;
cont.writeText(wb);

View File

@ -109,7 +109,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
{
// Since only unsinged int has granted 2-compliment overflow handling, we are doing math here on unsigned types.
// To simplify and booletproof code, we operate enforce ValueType to be unsigned too.
static_assert(std::is_unsigned_v<ValueType>, "ValueType must be unsigned.");
static_assert(is_unsigned_v<ValueType>, "ValueType must be unsigned.");
using UnsignedDeltaType = ValueType;
// We use signed delta type to turn huge unsigned values into smaller signed:
@ -189,7 +189,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
template <typename ValueType>
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
{
static_assert(std::is_unsigned_v<ValueType>, "ValueType must be unsigned.");
static_assert(is_unsigned_v<ValueType>, "ValueType must be unsigned.");
using UnsignedDeltaType = ValueType;
using SignedDeltaType = typename std::make_signed<UnsignedDeltaType>::type;

View File

@ -262,10 +262,10 @@ void reverseTranspose(const char * src, T * buf, UInt32 num_bits, UInt32 tail =
reverseTransposeBytes(matrix, col, buf[col]);
}
template <typename T, typename MinMaxT = std::conditional_t<std::is_signed_v<T>, Int64, UInt64>>
template <typename T, typename MinMaxT = std::conditional_t<is_signed_v<T>, Int64, UInt64>>
void restoreUpperBits(T * buf, T upper_min, T upper_max [[maybe_unused]], T sign_bit [[maybe_unused]], UInt32 tail = 64)
{
if constexpr (std::is_signed_v<T>)
if constexpr (is_signed_v<T>)
{
/// Restore some data as negatives and others as positives
if (sign_bit)
@ -334,7 +334,7 @@ using Variant = CompressionCodecT64::Variant;
template <typename T, bool full>
UInt32 compressData(const char * src, UInt32 bytes_size, char * dst)
{
using MinMaxType = std::conditional_t<std::is_signed_v<T>, Int64, UInt64>;
using MinMaxType = std::conditional_t<is_signed_v<T>, Int64, UInt64>;
static constexpr const UInt32 matrix_size = 64;
static constexpr const UInt32 header_size = 2 * sizeof(UInt64);
@ -389,7 +389,7 @@ UInt32 compressData(const char * src, UInt32 bytes_size, char * dst)
template <typename T, bool full>
void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 uncompressed_size)
{
using MinMaxType = std::conditional_t<std::is_signed_v<T>, Int64, UInt64>;
using MinMaxType = std::conditional_t<is_signed_v<T>, Int64, UInt64>;
static constexpr const UInt32 matrix_size = 64;
static constexpr const UInt32 header_size = 2 * sizeof(UInt64);
@ -441,7 +441,7 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco
if (num_bits < 64)
upper_min = UInt64(min) >> num_bits << num_bits;
if constexpr (std::is_signed_v<T>)
if constexpr (is_signed_v<T>)
{
if (min < 0 && max >= 0 && num_bits < 64)
{

Some files were not shown because too many files have changed in this diff Show More