Compare commits

...

208 Commits

Author SHA1 Message Date
Nikolai Kochetov
6933b95a99
Merge 4ffae801be into b94a7167a8 2024-09-18 23:54:17 +03:00
Robert Schulze
b94a7167a8
Merge pull request #69580 from rschu1ze/bump-libpqxx
Bump libpqxx to v7.7.5
2024-09-18 18:56:12 +00:00
Konstantin Bogdanov
64e58baba1
Merge pull request #69682 from ClickHouse/more-asserts-for-hashjoin
Try fix asserts failure in `HashJoin`
2024-09-18 18:20:27 +00:00
max-vostrikov
a3fe155579
Merge pull request #69737 from ClickHouse/test_printf
added some edge cases for printf tests
2024-09-18 17:49:57 +00:00
Nikolai Kochetov
4ffae801be Fixing another test. 2024-09-18 17:09:23 +00:00
Nikolai Kochetov
de30ec1d1d Fixing prewhere. 2024-09-18 16:51:18 +00:00
Nikolai Kochetov
79a62c99c5 Fixing style. 2024-09-18 16:39:58 +00:00
Antonio Andelic
a997cfad2b
Merge pull request #68108 from ClickHouse/keeper-some-improvement2
Keeper improvements package
2024-09-18 16:35:57 +00:00
maxvostrikov
f4b4b3cc35 added some edge cases for printf tests
added some edge cases for printf tests
2024-09-18 17:22:36 +02:00
Konstantin Bogdanov
cb24849396
Move assert 2024-09-18 15:24:48 +02:00
Kseniia Sumarokova
7fd2207626
Merge pull request #68504 from ClickHouse/miscellaneous-3
Miscellaneous
2024-09-18 11:21:26 +00:00
Antonio Andelic
4f73c677ac Merge branch 'master' into keeper-some-improvement2 2024-09-18 13:19:24 +02:00
Kseniia Sumarokova
69f45acfd7
Merge pull request #69672 from ClickHouse/s3queue-refactor-2
S3Queue small refactoring
2024-09-18 10:47:14 +00:00
Yarik Briukhovetskyi
4c78206d0a
Merge pull request #69718 from arruw/patch-1
Improve QuantileDD docs
2024-09-18 10:34:46 +00:00
Mikhail Artemenko
429e8ada79
Merge pull request #69690 from ClickHouse/remove_recursive_small_fixes
Remove recursive small fixes
2024-09-18 10:23:27 +00:00
mmav
06b49d18d9
Update quantileddsketch.md
Update function syntax
2024-09-18 10:45:10 +01:00
Kseniia Sumarokova
a17a8febf7
Merge pull request #69714 from tbragin/patch-15
Update README.md - Meetups
2024-09-18 09:20:05 +00:00
Robert Schulze
55529ec5a2
Merge pull request #69674 from rschu1ze/bump-pg
Bump libpq from v14.3 to v15.8
2024-09-18 09:13:13 +00:00
Antonio Andelic
3106653852 Fix watches 2024-09-18 10:47:40 +02:00
Yakov Olkhovskiy
82dbb3bb32
Merge pull request #69615 from ClickHouse/refactor-secret-finder
Unification of FunctionSecretArgumentsFinder
2024-09-18 08:17:52 +00:00
Tanya Bragin
1bcdde3e62
Update README.md - Meetups 2024-09-17 19:48:48 -07:00
Alexey Katsman
2cef99c311
Merge pull request #69576 from bigo-sg/arrayzip_allow_empty
Allow empty arguments for arrayZip/arrayZipUnaligned
2024-09-17 21:25:29 +00:00
Robert Schulze
cd7a1a9288
Merge pull request #69684 from rschu1ze/disallow-alter-table-add-inv-idx
Prohibit `ALTER TABLE ... ADD INDEX ... TYPE` inverted if setting = 0
2024-09-17 21:18:02 +00:00
Alexander Gololobov
6597a8ed04
Merge pull request #69596 from ClickHouse/fix_dedup_in_parallel_replicas_announcement
Optimize complexity of part deduplication in parallel replicas announcement
2024-09-17 20:48:12 +00:00
Kseniia Sumarokova
3b901f49e5
Merge pull request #69673 from ClickHouse/update-assert
Update assert
2024-09-17 20:39:07 +00:00
Raúl Marín
958c3effae
Merge pull request #69705 from ClickHouse/revert-69376-marco-vb/setting-stop-insert-on-full-disk
Revert "Add user-level settings min_free_diskspace_bytes_to_throw_insert and min_free_diskspace_ratio_to_throw_insert"
2024-09-17 19:49:35 +00:00
Raúl Marín
474499d240
Revert "Add user-level settings min_free_diskspace_bytes_to_throw_insert and min_free_diskspace_ratio_to_throw_insert" 2024-09-17 21:48:19 +02:00
jsc0218
839f06035f
Merge pull request #69376 from marco-vb/marco-vb/setting-stop-insert-on-full-disk
Add user-level settings min_free_diskspace_bytes_to_throw_insert and min_free_diskspace_ratio_to_throw_insert
2024-09-17 18:46:43 +00:00
Vitaly Baranov
4f88ccb6a8
Merge pull request #69201 from NikBarykin/allow_arguments_in_custom_database_engines
Allow custom settings in database engine
2024-09-17 18:26:57 +00:00
Kruglov Pavel
a226567bc2
Merge pull request #69560 from Avogar/fix-prewhere-reorder
Keep original order of conditions during move to prewhere
2024-09-17 18:22:19 +00:00
Vitaly Baranov
fcda762a27
Merge pull request #69346 from vitlibar/restore-access-dependencies
Improve restoring of access entities' dependencies
2024-09-17 18:21:55 +00:00
Mikhail Artemenko
9c185374e4 fix level sorting 2024-09-17 18:14:47 +00:00
Mikhail Artemenko
13e82d6439 fix double visit of uncommitted changes 2024-09-17 17:45:04 +00:00
Mikhail f. Shiryaev
fdee35cccc
Merge pull request #69557 from ClickHouse/integration-prepull-kill-runner
Kill runner when integration tests fail to pre-pull
2024-09-17 17:27:20 +00:00
Konstantin Bogdanov
b08e727aef
Count allocated bytes from scratch after rerange 2024-09-17 19:02:10 +02:00
Miсhael Stetsyuk
9eba103c5e
Merge pull request #69670 from ClickHouse/sync-executeToDatabaseImpl-with-private-fork
sync changes to `InterpreterDropQuery::executeToDatabaseImpl` from the private fork
2024-09-17 16:54:15 +00:00
Konstantin Bogdanov
a210f98819
Lint 2024-09-17 18:28:27 +02:00
kssenii
e574c49e25 Fix 2024-09-17 18:19:05 +02:00
Konstantin Bogdanov
7c5d55c6b2
Lint 2024-09-17 18:10:51 +02:00
Robert Schulze
665f362601
Prohibit ALTER TABLE ... ADD INDEX ... TYPE inverted if setting = 0 2024-09-17 16:10:03 +00:00
Konstantin Bogdanov
80259659ff
More asserts 2024-09-17 18:03:19 +02:00
Alexander Gololobov
574a26c63b Use adjacent_find to check adjacent parts 2024-09-17 17:56:44 +02:00
Alexander Gololobov
3674c97ebb Fix for using part after std::move from it 2024-09-17 17:49:02 +02:00
vdimir
8508b1ba37
Merge pull request #67966 from ClickHouse/vdimir/datetime64_constant_to_ast_f
Add test cases to 03217_datetime64_constant_to_ast
2024-09-17 14:56:32 +00:00
Alexander Gololobov
190d3f04c9 More optimal check for intrsecting parts in DefaultCoordinator init 2024-09-17 16:54:49 +02:00
Alexander Gololobov
aba7de5091 Verify that there are no intersecting parts in the resulting all_parts_to_read 2024-09-17 16:53:32 +02:00
Antonio Andelic
8db3dddb3d Fix watches count and list request 2024-09-17 16:15:55 +02:00
Nikita Taranov
ffaf97a390
Merge pull request #68424 from ClickHouse/adaptive_parallel_replicas
Adaptive mark_segment_size for parallel replicas
2024-09-17 13:52:42 +00:00
Antonio Andelic
452fde78c7
Merge pull request #69582 from ClickHouse/keeper-better-ssl-support
Support more advanced SSL options for Keeper internal communication
2024-09-17 13:32:18 +00:00
Kseniia Sumarokova
51fa9ebf8a
Merge pull request #68520 from ClickHouse/fix-bad-exception-messages
Fix bad exception messages
2024-09-17 13:29:53 +00:00
kssenii
e30ebfa23e Add mode validation 2024-09-17 15:24:02 +02:00
Kruglov Pavel
b21be2bc54
Merge pull request #68591 from bigo-sg/orc_dict_encode
Add settings `output_format_orc_dictionary_key_size_threshold` to allow user to enable dict encoding for string column in ORC output format
2024-09-17 13:22:44 +00:00
Alexander Gololobov
14736d95c5
Merge pull request #69606 from ClickHouse/check_time_limit_in_index_analysis
Check time limits while analyzing indexes
2024-09-17 13:14:08 +00:00
NikBarykin
4b69d8e2ca Fix CE 2024-09-17 15:52:20 +03:00
robot-clickhouse
5ce8604869 Automatic style fix 2024-09-17 12:37:31 +00:00
Robert Schulze
813bcd896f
Bump to v18.8 2024-09-17 12:30:12 +00:00
kssenii
3a05282bce Update assert 2024-09-17 14:26:31 +02:00
Yakov Olkhovskiy
fd0c7a1c18 Merge branch 'master' into refactor-secret-finder 2024-09-17 12:16:19 +00:00
Kseniia Sumarokova
4704fb8a3b
Merge branch 'master' into miscellaneous-3 2024-09-17 13:32:01 +02:00
Vitaly Baranov
f768717be8 Fix test. 2024-09-17 13:05:02 +02:00
Vitaly Baranov
983b061b58 Corrections after review. 2024-09-17 12:56:10 +02:00
kssenii
3a299f382d Refactor 2024-09-17 12:52:45 +02:00
Vitaly Baranov
f8f72ccb00 Add test. 2024-09-17 12:10:31 +02:00
Vitaly Baranov
1ccd461c97 Fix restoring access entities dependant on existing ones. 2024-09-17 12:10:31 +02:00
Michael Stetsyuk
5aaff37b36 sync changes to InterpreterDropQuery::executeToDatabaseImpl from the private fork 2024-09-17 09:16:52 +00:00
Antonio Andelic
9f932fb453 Merge branch 'master' into keeper-better-ssl-support 2024-09-17 10:52:35 +02:00
Antonio Andelic
f3654b8fc8 Merge branch 'master' into keeper-some-improvement2 2024-09-17 10:35:38 +02:00
Antonio Andelic
676b6238d0 Update comments 2024-09-17 10:30:39 +02:00
Antonio Andelic
e876997ebb Merge branch 'master' into keeper-some-improvement2 2024-09-17 10:28:02 +02:00
Yakov Olkhovskiy
19e2197582
fix 2024-09-16 10:38:28 -04:00
Yakov Olkhovskiy
d223c4547f
fix after master merge 2024-09-16 08:35:05 -04:00
Yakov Olkhovskiy
58993d3f3b
Merge branch 'master' into refactor-secret-finder 2024-09-16 08:33:16 -04:00
Alexander Gololobov
8507d209c0
Merge branch 'master' into check_time_limit_in_index_analysis 2024-09-16 13:36:51 +02:00
Alexander Gololobov
f5b9d5ad34 Test for checking time limit in index analysis 2024-09-16 13:34:40 +02:00
Alexander Gololobov
4af369fbc4 Failpoint for testing slow index analysis 2024-09-16 13:34:01 +02:00
Antonio Andelic
8cdcc431fe Fix 2024-09-16 13:30:17 +02:00
Antonio Andelic
187a717872 Merge branch 'master' into keeper-better-ssl-support 2024-09-16 09:17:30 +02:00
李扬
4412946532
Merge branch 'master' into orc_dict_encode 2024-09-15 17:25:20 +08:00
marco-vb
03737ddcab Reduced disk size on test for faster execution. 2024-09-14 22:24:17 +00:00
marco-vb
038f56cb5e Only make checks to stop inserts if settings are being used. 2024-09-14 21:04:12 +00:00
Nikita Taranov
63577507c9 fix build 2024-09-14 21:43:27 +01:00
Nikita Taranov
9eb78773a6 Merge branch 'master' into adaptive_parallel_replicas 2024-09-14 19:31:02 +01:00
Yakov Olkhovskiy
6f63a7b213 fix tidy 2024-09-14 16:46:48 +00:00
Yakov Olkhovskiy
56cfa74a14 fix 2024-09-14 13:32:52 +00:00
Yakov Olkhovskiy
dbb1d043fe unification of FunctionSecretArgumentsFinder 2024-09-14 05:46:08 +00:00
marco-vb
56f3030b17 Black formatting python test. 2024-09-13 17:32:33 +00:00
Alexander Gololobov
31ddfc6f5f Check time limit while analyzing indexes 2024-09-13 19:19:21 +02:00
Marco Vilas Boas
ddf2e07fd0
Merge branch 'ClickHouse:master' into marco-vb/setting-stop-insert-on-full-disk 2024-09-13 18:17:44 +01:00
marco-vb
5cc12ca9ee Added integration testing for newly implemented settings. 2024-09-13 17:16:16 +00:00
Alexander Gololobov
e13247b67e Fix clang-18 build 2024-09-13 16:50:43 +02:00
Alexander Gololobov
2650a20628 Make dedup logic O(n*log(n)) instead of O(n^2) 2024-09-13 16:21:17 +02:00
Antonio Andelic
9a31fc385d Fixes 2024-09-13 15:58:17 +02:00
marco-vb
ddc506a677 Corrected implementation for check of new settings and fix lint of settings change history. 2024-09-13 13:48:42 +00:00
avogar
2812953a8a Try to fix tests 2024-09-13 13:37:42 +00:00
Antonio Andelic
492461271b Merge branch 'master' into keeper-better-ssl-support 2024-09-13 14:44:12 +02:00
Antonio Andelic
3c47f3df4b Support more advanced SSL options for Keeper internal communication 2024-09-13 14:23:01 +02:00
李扬
11c7cdabf8
Merge branch 'ClickHouse:master' into orc_dict_encode 2024-09-13 18:26:20 +08:00
李扬
71553022e0
fix 03230_array_zip_unaligned 2024-09-13 18:16:13 +08:00
李扬
53e1975833
fix 01045_array_zip 2024-09-13 18:15:47 +08:00
Marco Vilas Boas
8299b31d47
Merge branch 'master' into marco-vb/setting-stop-insert-on-full-disk 2024-09-13 10:44:04 +01:00
李扬
11d2963497
fix style 2024-09-13 11:56:47 +08:00
taiyang-li
f9335a2fd5 update uts 2024-09-13 10:50:50 +08:00
taiyang-li
8a89d7b2b9 allow empty inputs for arrayZip or arrayZipUnaligned 2024-09-13 10:46:38 +08:00
Robert Schulze
aab0d3dd9e
Bump to 7.7.5 2024-09-12 19:42:32 +00:00
Robert Schulze
5a34b9f24e
Bump to 7.6.1 2024-09-12 19:14:41 +00:00
Robert Schulze
a0a4858e00
Scratch build of libpqxx at 7.5.3 + patches 2024-09-12 18:55:35 +00:00
avogar
9c1f4f4545 Remove bad files 2024-09-12 17:21:28 +00:00
avogar
2e82e06330 Update tests 2024-09-12 16:59:25 +00:00
Nikita Taranov
7b2810bea2 Merge branch 'master' into adaptive_parallel_replicas 2024-09-12 16:51:15 +01:00
avogar
401a3d0931 Add test 2024-09-12 15:10:29 +00:00
avogar
beffb92411 Keep original order of conditions during move to prewhere 2024-09-12 14:52:09 +00:00
Nikita Taranov
16f93ea1b3 revive separate protocol versioning for PRs 2024-09-12 15:40:51 +01:00
Nikita Taranov
1e3bc6d359 log mark_segment_size on initiator 2024-09-12 15:15:57 +01:00
marco-vb
562c23eac6 Add new settings to settings change history. 2024-09-12 13:28:49 +00:00
Mikhail f. Shiryaev
8d5babf65f
Kill the runner process if integration tests fail to pre-pull 2024-09-12 15:26:21 +02:00
Mikhail f. Shiryaev
99ede620be
Add kill_ci_runner to ci_utils, will allow restarts 2024-09-12 15:24:25 +02:00
Marco Vilas Boas
f292767778
Merge branch 'master' into marco-vb/setting-stop-insert-on-full-disk 2024-09-12 10:56:32 +01:00
marco-vb
7d36f3b764 Implemented checks for new settings. 2024-09-12 09:53:56 +00:00
marco-vb
21bd47f09e Add settings min_free_disk_bytes_to_throw_insert and min_free_disk_ratio_to_throw_insert and update documentation. 2024-09-12 09:45:43 +00:00
Nikita Taranov
fc83c1c7a2 use final task size in segment size calculation 2024-09-11 20:20:18 +01:00
李扬
0de3b1dacb
Merge branch 'ClickHouse:master' into orc_dict_encode 2024-09-11 12:08:06 +08:00
Nikita Taranov
8d5d7dd83a fix wording 2024-09-10 17:18:27 +01:00
Nikita Taranov
61ebcdc2ed fix 2024-09-10 12:07:44 +01:00
Nikita Taranov
1df897db27 Merge branch 'master' into adaptive_parallel_replicas 2024-09-10 12:03:46 +01:00
Nikita Taranov
8cdc10cf65 fix settings changes 2024-09-09 18:11:03 +01:00
Antonio Andelic
65019c4b9b Merge branch 'master' into keeper-some-improvement2 2024-09-07 20:59:04 +02:00
Antonio Andelic
190339c4e6 Fix snapshot sync 2024-09-07 17:34:59 +02:00
Antonio Andelic
5a86371b02 Merge branch 'master' into keeper-some-improvement2 2024-09-07 11:32:44 +02:00
Igor Nikonov
f5d49f8e10
Merge branch 'master' into adaptive_parallel_replicas 2024-09-06 23:08:30 +02:00
Antonio Andelic
03c7f3817b Correct lock order 2024-09-06 15:41:04 +02:00
Antonio Andelic
f44eaa808d Merge branch 'master' into keeper-some-improvement2 2024-09-06 09:35:56 +02:00
Antonio Andelic
e388f6f99b Remove useless log 2024-09-06 09:35:02 +02:00
Antonio Andelic
a3e233a537 Fix watch 2024-09-04 15:19:56 +02:00
Antonio Andelic
955412888c Merge branch 'master' into keeper-some-improvement2 2024-09-04 11:30:29 +02:00
Antonio Andelic
9633563fbd Fix 2024-09-04 11:30:05 +02:00
NikBarykin
83854cf293 Make method of DatabaseFactory 2024-09-03 19:13:05 +03:00
NikBarykin
e874c6e1de Fix typo 2024-09-03 18:58:39 +03:00
NikBarykin
03ccf05d14 Allow custom settings in database engine 2024-09-03 16:14:15 +03:00
Antonio Andelic
79fc8d67ad More fixes 2024-09-02 15:46:04 +02:00
Antonio Andelic
596ba574e3 Merge branch 'master' into keeper-some-improvement2 2024-09-02 09:31:02 +02:00
Antonio Andelic
e968984d17 More changes 2024-09-02 08:25:17 +02:00
李扬
3d04f3d33a
Merge branch 'ClickHouse:master' into orc_dict_encode 2024-08-29 10:16:06 +08:00
李扬
553c309477
Merge branch 'master' into orc_dict_encode 2024-08-28 21:00:18 +08:00
taiyang-li
ae582120ae change as request 2024-08-28 20:56:33 +08:00
taiyang-li
aa4688a982 fix style 2024-08-27 12:25:22 +08:00
taiyang-li
7aaa0289e1 revert files 2024-08-26 14:58:57 +08:00
taiyang-li
d6df83d561 add uts about orc string encode 2024-08-26 14:57:51 +08:00
taiyang-li
1011f8ef9c add uts about orc string encode 2024-08-26 14:45:41 +08:00
taiyang-li
b0a0988c5b change as request 2024-08-22 10:46:44 +08:00
taiyang-li
03ab625265 enable string dict encoding in orc output format 2024-08-20 15:47:26 +08:00
taiyang-li
dbd4ee44ed enable dict encoding in orc writer 2024-08-20 14:09:14 +08:00
Nikita Taranov
d4a3a033b0
Merge branch 'master' into adaptive_parallel_replicas 2024-08-19 12:48:39 +02:00
Alexey Milovidov
165d08f088 Fix bad exception messages 2024-08-19 05:54:37 +02:00
Alexey Milovidov
e0dbc53b58 Merge branch 'master' into miscellaneous-3 2024-08-19 01:18:48 +02:00
Alexey Milovidov
f97551e2ad Fix tests 2024-08-18 22:17:16 +02:00
Nikita Taranov
c252b3c8b0 fix build 2024-08-18 18:29:48 +01:00
Nikita Taranov
30229a3bfd better 2024-08-18 17:44:16 +01:00
Nikita Taranov
8a0f41da7a
Merge branch 'master' into adaptive_parallel_replicas 2024-08-18 17:55:29 +02:00
Nikita Taranov
628a4300ba fix 2024-08-18 16:53:00 +01:00
Alexey Milovidov
f88b5988c1 Update test 2024-08-18 09:44:39 +02:00
Alexey Milovidov
4bb2f7b3f6 Miscellaneous 2024-08-18 09:09:58 +02:00
Alexey Milovidov
95edca513c Fix tests 2024-08-18 05:43:01 +02:00
Alexey Milovidov
5004e4d2cc Miscellaneous 2024-08-18 03:27:42 +02:00
Nikita Taranov
e7fc89ba26 add bw-compatibility test 2024-08-16 23:23:03 +01:00
vdimir
49ce2c7619
Merge branch 'master' into vdimir/datetime64_constant_to_ast_f 2024-08-15 21:41:56 +02:00
Nikita Taranov
80d985a690 add setting change 2024-08-15 19:11:43 +01:00
Nikita Taranov
891f9c5358 fix typo 2024-08-15 18:44:37 +01:00
Nikita Taranov
cb0335446e impl 2024-08-15 18:34:06 +01:00
vdimir
64e10b2dda
Merge branch 'master' into vdimir/datetime64_constant_to_ast_f 2024-08-13 17:00:51 +02:00
Antonio Andelic
c61fc591c4 Use functions instead of classes 2024-08-13 11:33:17 +02:00
Antonio Andelic
dcbc590302 Merge branch 'master' into keeper-some-improvement2 2024-08-13 09:01:10 +02:00
Antonio Andelic
b6c3619543 Whitespace 2024-08-09 15:41:11 +02:00
Antonio Andelic
b2172af817 Merge branch 'master' into keeper-some-improvement2 2024-08-09 14:50:52 +02:00
vdimir
ef40cc3bae
Merge branch 'master' into vdimir/datetime64_constant_to_ast_f 2024-08-08 12:12:22 +02:00
vdimir
f5c07b8938
Add test cases to 03217_datetime64_constant_to_ast 2024-08-07 09:43:13 +00:00
Antonio Andelic
5ea4844d69 Merge branch 'master' into keeper-some-improvement2 2024-08-07 11:26:33 +02:00
Antonio Andelic
48e7057200 Merge branch 'master' into keeper-some-improvement2 2024-07-22 16:51:20 +02:00
Antonio Andelic
5a96290cce Merge branch 'master' into keeper-some-improvement2 2024-07-10 12:45:43 +02:00
Antonio Andelic
7e22af06f1 Merge branch 'master' into keeper-some-improvement2 2024-07-02 09:01:48 +02:00
Antonio Andelic
ac78184fe7 Merge branch 'tracing-try-2' into keeper-some-improvement2 2024-06-18 11:04:00 +02:00
Antonio Andelic
1777ff37c0 Merge branch 'master' into keeper-some-improvement2 2024-06-18 11:03:38 +02:00
Antonio Andelic
7dca59da56 Revert "Merge branch 'use-thread-from-global-pool-in-poco-threadpool' into keeper-some-improvement"
This reverts commit 737d7484c5, reversing
changes made to b3a742304e.
2024-06-17 09:03:49 +02:00
Antonio Andelic
0fa45c3954 More parallel storage 2024-06-11 16:39:35 +02:00
Antonio Andelic
c802d7d58a Writing improvements 2024-06-11 14:35:26 +02:00
Antonio Andelic
5ab06caffc Merge branch 'keeper-parallel-storage' into keeper-some-improvement2 2024-06-11 10:18:27 +02:00
Antonio Andelic
737d7484c5 Merge branch 'use-thread-from-global-pool-in-poco-threadpool' into keeper-some-improvement 2024-06-11 09:46:58 +02:00
Antonio Andelic
b3a742304e Merge branch 'master' into keeper-some-improvement 2024-06-11 09:46:41 +02:00
kssenii
6514d72fea Move servers pool back 2024-06-10 18:53:51 +02:00
kssenii
c3d4b429d9 Fix merge 2024-06-10 15:39:54 +02:00
kssenii
7ff848c2c8 Merge remote-tracking branch 'origin/master' into use-thread-from-global-pool-in-poco-threadpool 2024-06-10 15:20:03 +02:00
kssenii
a11ba3f437 Fix shutdown 2024-06-10 15:19:03 +02:00
kssenii
6604d94271 Ping CI: skip fast test to see all stateless runs 2024-06-07 17:11:49 +02:00
kssenii
e30fa1da4d Fix ThreadStatus 2024-06-07 15:03:13 +02:00
kssenii
7ea3345e0d Use ThreadFromGlobalPool in Poco::ThreadPool 2024-06-06 17:25:15 +02:00
kssenii
1e97d73bd0 Squashed commit of the following:
commit 27fe0439fa
Merge: bfb1c4c793 bb469e0d45
Author: Antonio Andelic <antonio@clickhouse.com>
Date:   Thu Jun 6 14:36:02 2024 +0200

    Merge branch 'master' into fix-global-trace-collector

commit bfb1c4c793
Author: Antonio Andelic <antonio@clickhouse.com>
Date:   Thu Jun 6 11:29:42 2024 +0200

    better

commit fcee260b25
Author: Antonio Andelic <antonio2368@users.noreply.github.com>
Date:   Thu Jun 6 11:22:48 2024 +0200

    Update src/Interpreters/TraceCollector.h

    Co-authored-by: alesapin <alesapin@clickhouse.com>

commit 1d3cf17053
Author: Antonio Andelic <antonio@clickhouse.com>
Date:   Thu Jun 6 11:11:08 2024 +0200

    Fix global trace collector
2024-06-06 17:13:37 +02:00
Antonio Andelic
f0e9703384 Some small improvements 2024-06-06 09:45:07 +02:00
Antonio Andelic
514941627b Merge branch 'master' into keeper-parallel-storage 2024-06-05 15:31:57 +02:00
Antonio Andelic
acc08c65d9 Add stopwatch 2024-05-22 11:56:45 +02:00
Antonio Andelic
f1e4403f98 Merge branch 'master' into keeper-parallel-storage 2024-05-22 11:39:57 +02:00
Antonio Andelic
b1d53f0472 Merge branch 'master' into keeper-parallel-storage 2024-04-29 15:13:19 +02:00
Antonio Andelic
bc3cfb008e Merge branch 'master' into keeper-parallel-storage 2024-03-25 13:14:57 +01:00
Antonio Andelic
9791a2ea40 Merge branch 'keeper-batch-flushes' into keeper-parallel-storage 2023-09-08 16:26:12 +00:00
Antonio Andelic
9fb9d16737 Merge branch 'keeper-batch-flushes' into keeper-parallel-storage 2023-09-06 13:30:05 +00:00
Antonio Andelic
6be1d0724a More mutex 2023-09-06 13:04:08 +00:00
Antonio Andelic
9238520490 Merge branch 'master' into keeper-parallel-storage 2023-09-06 10:57:33 +00:00
Antonio Andelic
dd1bb579df Better 2023-09-05 12:05:37 +00:00
Antonio Andelic
57943798b7 Merge branch 'master' into keeper-parallel-storage 2023-09-05 08:46:38 +00:00
Antonio Andelic
b43c3d75a2 Initial implementation 2023-09-04 14:49:49 +00:00
167 changed files with 5909 additions and 4173 deletions

View File

@ -40,17 +40,8 @@ Every month we get together with the community (users, contributors, customers,
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov: Upcoming meetups
* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
Other upcoming meetups
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
* [Bangalore Meetup](https://www.meetup.com/clickhouse-bangalore-user-group/events/303208274/) - September 18 * [Bangalore Meetup](https://www.meetup.com/clickhouse-bangalore-user-group/events/303208274/) - September 18
* [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22 * [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22
* [Jakarta Meetup](https://www.meetup.com/clickhouse-indonesia-user-group/events/303191359/) - October 1 * [Jakarta Meetup](https://www.meetup.com/clickhouse-indonesia-user-group/events/303191359/) - October 1
@ -62,13 +53,20 @@ Other upcoming meetups
* [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21 * [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26 * [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26
Recently completed events Recently completed meetups
* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27 * [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27
* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27 * [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27
* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5 * [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5 * [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
## Recent Recordings ## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"

View File

@ -188,8 +188,9 @@ namespace Crypto
pFile = fopen(keyFile.c_str(), "r"); pFile = fopen(keyFile.c_str(), "r");
if (pFile) if (pFile)
{ {
pem_password_cb * pCB = pass.empty() ? (pem_password_cb *)0 : &passCB; pem_password_cb * pCB = &passCB;
void * pPassword = pass.empty() ? (void *)0 : (void *)pass.c_str(); static constexpr char * no_password = "";
void * pPassword = pass.empty() ? (void *)no_password : (void *)pass.c_str();
if (readFunc(pFile, &pKey, pCB, pPassword)) if (readFunc(pFile, &pKey, pCB, pPassword))
{ {
fclose(pFile); fclose(pFile);
@ -225,6 +226,13 @@ namespace Crypto
error: error:
if (pFile) if (pFile)
fclose(pFile); fclose(pFile);
if (*ppKey)
{
if constexpr (std::is_same_v<K, EVP_PKEY>)
EVP_PKEY_free(*ppKey);
else
EC_KEY_free(*ppKey);
}
throw OpenSSLException("EVPKey::loadKey(string)"); throw OpenSSLException("EVPKey::loadKey(string)");
} }
@ -286,6 +294,13 @@ namespace Crypto
error: error:
if (pBIO) if (pBIO)
BIO_free(pBIO); BIO_free(pBIO);
if (*ppKey)
{
if constexpr (std::is_same_v<K, EVP_PKEY>)
EVP_PKEY_free(*ppKey);
else
EC_KEY_free(*ppKey);
}
throw OpenSSLException("EVPKey::loadKey(stream)"); throw OpenSSLException("EVPKey::loadKey(stream)");
} }

View File

@ -248,6 +248,9 @@ namespace Net
SSL_CTX * sslContext() const; SSL_CTX * sslContext() const;
/// Returns the underlying OpenSSL SSL Context object. /// Returns the underlying OpenSSL SSL Context object.
SSL_CTX * takeSslContext();
/// Takes ownership of the underlying OpenSSL SSL Context object.
Usage usage() const; Usage usage() const;
/// Returns whether the context is for use by a client or by a server /// Returns whether the context is for use by a client or by a server
/// and whether TLSv1 is required. /// and whether TLSv1 is required.
@ -401,6 +404,13 @@ namespace Net
return _pSSLContext; return _pSSLContext;
} }
inline SSL_CTX * Context::takeSslContext()
{
auto * result = _pSSLContext;
_pSSLContext = nullptr;
return result;
}
inline bool Context::extendedCertificateVerificationEnabled() const inline bool Context::extendedCertificateVerificationEnabled() const
{ {

View File

@ -106,6 +106,11 @@ Context::Context(
Context::~Context() Context::~Context()
{ {
if (_pSSLContext == nullptr)
{
return;
}
try try
{ {
SSL_CTX_free(_pSSLContext); SSL_CTX_free(_pSSLContext);

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit c995193a3a14d71f4711f1f421f65a1a1db64640 Subproject commit 41e4c331564167cca97ad6eccbd5b8879c2ca044

View File

@ -1,9 +1,9 @@
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpqxx") set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpqxx")
set (SRCS set (SRCS
"${LIBRARY_DIR}/src/strconv.cxx"
"${LIBRARY_DIR}/src/array.cxx" "${LIBRARY_DIR}/src/array.cxx"
"${LIBRARY_DIR}/src/binarystring.cxx" "${LIBRARY_DIR}/src/binarystring.cxx"
"${LIBRARY_DIR}/src/blob.cxx"
"${LIBRARY_DIR}/src/connection.cxx" "${LIBRARY_DIR}/src/connection.cxx"
"${LIBRARY_DIR}/src/cursor.cxx" "${LIBRARY_DIR}/src/cursor.cxx"
"${LIBRARY_DIR}/src/encodings.cxx" "${LIBRARY_DIR}/src/encodings.cxx"
@ -12,59 +12,25 @@ set (SRCS
"${LIBRARY_DIR}/src/field.cxx" "${LIBRARY_DIR}/src/field.cxx"
"${LIBRARY_DIR}/src/largeobject.cxx" "${LIBRARY_DIR}/src/largeobject.cxx"
"${LIBRARY_DIR}/src/notification.cxx" "${LIBRARY_DIR}/src/notification.cxx"
"${LIBRARY_DIR}/src/params.cxx"
"${LIBRARY_DIR}/src/pipeline.cxx" "${LIBRARY_DIR}/src/pipeline.cxx"
"${LIBRARY_DIR}/src/result.cxx" "${LIBRARY_DIR}/src/result.cxx"
"${LIBRARY_DIR}/src/robusttransaction.cxx" "${LIBRARY_DIR}/src/robusttransaction.cxx"
"${LIBRARY_DIR}/src/row.cxx"
"${LIBRARY_DIR}/src/sql_cursor.cxx" "${LIBRARY_DIR}/src/sql_cursor.cxx"
"${LIBRARY_DIR}/src/strconv.cxx"
"${LIBRARY_DIR}/src/stream_from.cxx" "${LIBRARY_DIR}/src/stream_from.cxx"
"${LIBRARY_DIR}/src/stream_to.cxx" "${LIBRARY_DIR}/src/stream_to.cxx"
"${LIBRARY_DIR}/src/subtransaction.cxx" "${LIBRARY_DIR}/src/subtransaction.cxx"
"${LIBRARY_DIR}/src/time.cxx"
"${LIBRARY_DIR}/src/transaction.cxx" "${LIBRARY_DIR}/src/transaction.cxx"
"${LIBRARY_DIR}/src/transaction_base.cxx" "${LIBRARY_DIR}/src/transaction_base.cxx"
"${LIBRARY_DIR}/src/row.cxx"
"${LIBRARY_DIR}/src/params.cxx"
"${LIBRARY_DIR}/src/util.cxx" "${LIBRARY_DIR}/src/util.cxx"
"${LIBRARY_DIR}/src/version.cxx" "${LIBRARY_DIR}/src/version.cxx"
"${LIBRARY_DIR}/src/wait.cxx"
) )
# Need to explicitly include each header file, because in the directory include/pqxx there are also files add_library(_libpqxx ${SRCS})
# like just 'array'. So if including the whole directory with `target_include_directories`, it will make
# conflicts with all includes of <array>.
set (HDRS
"${LIBRARY_DIR}/include/pqxx/array.hxx"
"${LIBRARY_DIR}/include/pqxx/params.hxx"
"${LIBRARY_DIR}/include/pqxx/binarystring.hxx"
"${LIBRARY_DIR}/include/pqxx/composite.hxx"
"${LIBRARY_DIR}/include/pqxx/connection.hxx"
"${LIBRARY_DIR}/include/pqxx/cursor.hxx"
"${LIBRARY_DIR}/include/pqxx/dbtransaction.hxx"
"${LIBRARY_DIR}/include/pqxx/errorhandler.hxx"
"${LIBRARY_DIR}/include/pqxx/except.hxx"
"${LIBRARY_DIR}/include/pqxx/field.hxx"
"${LIBRARY_DIR}/include/pqxx/isolation.hxx"
"${LIBRARY_DIR}/include/pqxx/largeobject.hxx"
"${LIBRARY_DIR}/include/pqxx/nontransaction.hxx"
"${LIBRARY_DIR}/include/pqxx/notification.hxx"
"${LIBRARY_DIR}/include/pqxx/pipeline.hxx"
"${LIBRARY_DIR}/include/pqxx/prepared_statement.hxx"
"${LIBRARY_DIR}/include/pqxx/result.hxx"
"${LIBRARY_DIR}/include/pqxx/robusttransaction.hxx"
"${LIBRARY_DIR}/include/pqxx/row.hxx"
"${LIBRARY_DIR}/include/pqxx/separated_list.hxx"
"${LIBRARY_DIR}/include/pqxx/strconv.hxx"
"${LIBRARY_DIR}/include/pqxx/stream_from.hxx"
"${LIBRARY_DIR}/include/pqxx/stream_to.hxx"
"${LIBRARY_DIR}/include/pqxx/subtransaction.hxx"
"${LIBRARY_DIR}/include/pqxx/transaction.hxx"
"${LIBRARY_DIR}/include/pqxx/transaction_base.hxx"
"${LIBRARY_DIR}/include/pqxx/types.hxx"
"${LIBRARY_DIR}/include/pqxx/util.hxx"
"${LIBRARY_DIR}/include/pqxx/version.hxx"
"${LIBRARY_DIR}/include/pqxx/zview.hxx"
)
add_library(_libpqxx ${SRCS} ${HDRS})
target_link_libraries(_libpqxx PUBLIC ch_contrib::libpq) target_link_libraries(_libpqxx PUBLIC ch_contrib::libpq)
target_include_directories (_libpqxx SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include") target_include_directories (_libpqxx SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include")

2
contrib/postgres vendored

@ -1 +1 @@
Subproject commit 665ff8c164d56d012e359735efe4d400c0564b44 Subproject commit cfd77000af28469fcb650485bad65a35e7649e41

View File

@ -1,18 +1,6 @@
/* src/include/pg_config.h. Generated from pg_config.h.in by configure. */ /* src/include/pg_config.h. Generated from pg_config.h.in by configure. */
/* src/include/pg_config.h.in. Generated from configure.in by autoheader. */ /* src/include/pg_config.h.in. Generated from configure.in by autoheader. */
/* Define to the type of arg 1 of 'accept' */
#define ACCEPT_TYPE_ARG1 int
/* Define to the type of arg 2 of 'accept' */
#define ACCEPT_TYPE_ARG2 struct sockaddr *
/* Define to the type of arg 3 of 'accept' */
#define ACCEPT_TYPE_ARG3 size_t
/* Define to the return type of 'accept' */
#define ACCEPT_TYPE_RETURN int
/* Define if building universal (internal helper macro) */ /* Define if building universal (internal helper macro) */
/* #undef AC_APPLE_UNIVERSAL_BUILD */ /* #undef AC_APPLE_UNIVERSAL_BUILD */
@ -49,6 +37,9 @@
/* Define to the default TCP port number as a string constant. */ /* Define to the default TCP port number as a string constant. */
#define DEF_PGPORT_STR "5432" #define DEF_PGPORT_STR "5432"
/* Define to the file name extension of dynamically-loadable modules. */
#define DLSUFFIX ".so"
/* Define to build with GSSAPI support. (--with-gssapi) */ /* Define to build with GSSAPI support. (--with-gssapi) */
//#define ENABLE_GSS 0 //#define ENABLE_GSS 0
@ -122,6 +113,9 @@
don't. */ don't. */
#define HAVE_DECL_SNPRINTF 1 #define HAVE_DECL_SNPRINTF 1
/* Define to 1 if you have the declaration of `sigwait', and to 0 if you don't. */
#define HAVE_DECL_SIGWAIT 1
/* Define to 1 if you have the declaration of `strlcat', and to 0 if you /* Define to 1 if you have the declaration of `strlcat', and to 0 if you
don't. */ don't. */
#if OS_DARWIN #if OS_DARWIN
@ -257,6 +251,9 @@
/* Define to 1 if you have the `inet_aton' function. */ /* Define to 1 if you have the `inet_aton' function. */
#define HAVE_INET_ATON 1 #define HAVE_INET_ATON 1
/* Define to 1 if you have the `inet_pton' function. */
#define HAVE_INET_PTON 1
/* Define to 1 if the system has the type `int64'. */ /* Define to 1 if the system has the type `int64'. */
/* #undef HAVE_INT64 */ /* #undef HAVE_INT64 */
@ -323,6 +320,9 @@
/* Define to 1 if you have the `z' library (-lz). */ /* Define to 1 if you have the `z' library (-lz). */
#define HAVE_LIBZ 1 #define HAVE_LIBZ 1
/* Define to 1 if you have the `zstd' library (-lzstd). */
/* #undef HAVE_LIBZSTD */
/* Define to 1 if constants of type 'long long int' should have the suffix LL. /* Define to 1 if constants of type 'long long int' should have the suffix LL.
*/ */
#define HAVE_LL_CONSTANTS 1 #define HAVE_LL_CONSTANTS 1
@ -378,6 +378,9 @@
/* Define to 1 if you have the <poll.h> header file. */ /* Define to 1 if you have the <poll.h> header file. */
#define HAVE_POLL_H 1 #define HAVE_POLL_H 1
/* Define to 1 if you have a POSIX-conforming sigwait declaration. */
/* #undef HAVE_POSIX_DECL_SIGWAIT */
/* Define to 1 if you have the `posix_fadvise' function. */ /* Define to 1 if you have the `posix_fadvise' function. */
#define HAVE_POSIX_FADVISE 1 #define HAVE_POSIX_FADVISE 1
@ -408,9 +411,6 @@
/* Define to 1 if you have the <pwd.h> header file. */ /* Define to 1 if you have the <pwd.h> header file. */
#define HAVE_PWD_H 1 #define HAVE_PWD_H 1
/* Define to 1 if you have the `random' function. */
#define HAVE_RANDOM 1
/* Define to 1 if you have the <readline.h> header file. */ /* Define to 1 if you have the <readline.h> header file. */
/* #undef HAVE_READLINE_H */ /* #undef HAVE_READLINE_H */
@ -426,10 +426,6 @@
/* Define to 1 if you have the `rint' function. */ /* Define to 1 if you have the `rint' function. */
#define HAVE_RINT 1 #define HAVE_RINT 1
/* Define to 1 if you have the global variable
'rl_completion_append_character'. */
/* #undef HAVE_RL_COMPLETION_APPEND_CHARACTER */
/* Define to 1 if you have the `rl_completion_matches' function. */ /* Define to 1 if you have the `rl_completion_matches' function. */
#define HAVE_RL_COMPLETION_MATCHES 1 #define HAVE_RL_COMPLETION_MATCHES 1
@ -439,6 +435,9 @@
/* Define to 1 if you have the `rl_reset_screen_size' function. */ /* Define to 1 if you have the `rl_reset_screen_size' function. */
/* #undef HAVE_RL_RESET_SCREEN_SIZE */ /* #undef HAVE_RL_RESET_SCREEN_SIZE */
/* Define to 1 if you have the `rl_variable_bind' function. */
#define HAVE_RL_VARIABLE_BIND 1
/* Define to 1 if you have the <security/pam_appl.h> header file. */ /* Define to 1 if you have the <security/pam_appl.h> header file. */
#define HAVE_SECURITY_PAM_APPL_H 1 #define HAVE_SECURITY_PAM_APPL_H 1
@ -451,6 +450,9 @@
/* Define to 1 if you have the `shm_open' function. */ /* Define to 1 if you have the `shm_open' function. */
#define HAVE_SHM_OPEN 1 #define HAVE_SHM_OPEN 1
/* Define to 1 if the system has the type `socklen_t'. */
#define HAVE_SOCKLEN_T 1
/* Define to 1 if you have the `sigprocmask' function. */ /* Define to 1 if you have the `sigprocmask' function. */
#define HAVE_SIGPROCMASK 1 #define HAVE_SIGPROCMASK 1
@ -466,9 +468,6 @@
/* Define to 1 if you have spinlocks. */ /* Define to 1 if you have spinlocks. */
#define HAVE_SPINLOCKS 1 #define HAVE_SPINLOCKS 1
/* Define to 1 if you have the `srandom' function. */
#define HAVE_SRANDOM 1
/* Define to 1 if you have the `SSL_CTX_set_num_tickets' function. */ /* Define to 1 if you have the `SSL_CTX_set_num_tickets' function. */
/* #define HAVE_SSL_CTX_SET_NUM_TICKETS */ /* #define HAVE_SSL_CTX_SET_NUM_TICKETS */
@ -885,6 +884,9 @@
/* Define to select Win32-style shared memory. */ /* Define to select Win32-style shared memory. */
/* #undef USE_WIN32_SHARED_MEMORY */ /* #undef USE_WIN32_SHARED_MEMORY */
/* Define to 1 to build with ZSTD support. (--with-zstd) */
/* #undef USE_ZSTD */
/* Define to 1 if `wcstombs_l' requires <xlocale.h>. */ /* Define to 1 if `wcstombs_l' requires <xlocale.h>. */
/* #undef WCSTOMBS_L_IN_XLOCALE */ /* #undef WCSTOMBS_L_IN_XLOCALE */

View File

@ -9,7 +9,7 @@ Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a
**Syntax** **Syntax**
``` sql ``` sql
quantileDDsketch[relative_accuracy, (level)](expr) quantileDD(relative_accuracy, [level])(expr)
``` ```
**Arguments** **Arguments**

View File

@ -29,6 +29,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int CANNOT_RESTORE_TABLE; extern const int CANNOT_RESTORE_TABLE;
extern const int ACCESS_ENTITY_ALREADY_EXISTS;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
@ -175,9 +176,46 @@ namespace
return res; return res;
} }
std::unordered_map<UUID, UUID> resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies, const AccessControl & access_control, bool allow_unresolved_dependencies) /// Checks if new entities (which we're going to restore) already exist,
/// and either skips them or throws an exception depending on the restore settings.
void checkExistingEntities(std::vector<std::pair<UUID, AccessEntityPtr>> & entities,
std::unordered_map<UUID, UUID> & old_to_new_id,
const AccessControl & access_control,
RestoreAccessCreationMode creation_mode)
{
if (creation_mode == RestoreAccessCreationMode::kReplace)
return;
auto should_skip = [&](const std::pair<UUID, AccessEntityPtr> & id_and_entity)
{
const auto & id = id_and_entity.first;
const auto & entity = *id_and_entity.second;
auto existing_id = access_control.find(entity.getType(), entity.getName());
if (!existing_id)
{
return false;
}
else if (creation_mode == RestoreAccessCreationMode::kCreateIfNotExists)
{
old_to_new_id[id] = *existing_id;
return true;
}
else
{
throw Exception(ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS, "Cannot restore {} because it already exists", entity.formatTypeWithName());
}
};
std::erase_if(entities, should_skip);
}
/// If new entities (which we're going to restore) depend on other entities which are not going to be restored or not present in the backup
/// then we should try to replace those dependencies with already existing entities.
void resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies,
std::unordered_map<UUID, UUID> & old_to_new_ids,
const AccessControl & access_control,
bool allow_unresolved_dependencies)
{ {
std::unordered_map<UUID, UUID> old_to_new_ids;
for (const auto & [id, name_and_type] : dependencies) for (const auto & [id, name_and_type] : dependencies)
{ {
std::optional<UUID> new_id; std::optional<UUID> new_id;
@ -188,9 +226,9 @@ namespace
if (new_id) if (new_id)
old_to_new_ids.emplace(id, *new_id); old_to_new_ids.emplace(id, *new_id);
} }
return old_to_new_ids;
} }
/// Generates random IDs for the new entities.
void generateRandomIDs(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, std::unordered_map<UUID, UUID> & old_to_new_ids) void generateRandomIDs(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, std::unordered_map<UUID, UUID> & old_to_new_ids)
{ {
Poco::UUIDGenerator generator; Poco::UUIDGenerator generator;
@ -203,27 +241,12 @@ namespace
} }
} }
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const std::unordered_map<UUID, UUID> & old_to_new_ids) /// Updates dependencies of the new entities using a specified map.
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities,
const std::unordered_map<UUID, UUID> & old_to_new_ids)
{ {
for (auto & entity : entities | boost::adaptors::map_values) for (auto & entity : entities | boost::adaptors::map_values)
{ IAccessEntity::replaceDependencies(entity, old_to_new_ids);
bool need_replace = false;
for (const auto & dependency : entity->findDependencies())
{
if (old_to_new_ids.contains(dependency))
{
need_replace = true;
break;
}
}
if (!need_replace)
continue;
auto new_entity = entity->clone();
new_entity->replaceDependencies(old_to_new_ids);
entity = new_entity;
}
} }
AccessRightsElements getRequiredAccessToRestore(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities) AccessRightsElements getRequiredAccessToRestore(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities)
@ -314,7 +337,9 @@ std::pair<String, BackupEntryPtr> makeBackupEntryForAccess(
AccessRestorerFromBackup::AccessRestorerFromBackup( AccessRestorerFromBackup::AccessRestorerFromBackup(
const BackupPtr & backup_, const RestoreSettings & restore_settings_) const BackupPtr & backup_, const RestoreSettings & restore_settings_)
: backup(backup_), allow_unresolved_access_dependencies(restore_settings_.allow_unresolved_access_dependencies) : backup(backup_)
, creation_mode(restore_settings_.create_access)
, allow_unresolved_dependencies(restore_settings_.allow_unresolved_access_dependencies)
{ {
} }
@ -362,7 +387,9 @@ std::vector<std::pair<UUID, AccessEntityPtr>> AccessRestorerFromBackup::getAcces
{ {
auto new_entities = entities; auto new_entities = entities;
auto old_to_new_ids = resolveDependencies(dependencies, access_control, allow_unresolved_access_dependencies); std::unordered_map<UUID, UUID> old_to_new_ids;
checkExistingEntities(new_entities, old_to_new_ids, access_control, creation_mode);
resolveDependencies(dependencies, old_to_new_ids, access_control, allow_unresolved_dependencies);
generateRandomIDs(new_entities, old_to_new_ids); generateRandomIDs(new_entities, old_to_new_ids);
replaceDependencies(new_entities, old_to_new_ids); replaceDependencies(new_entities, old_to_new_ids);

View File

@ -17,6 +17,7 @@ using BackupPtr = std::shared_ptr<const IBackup>;
class IBackupEntry; class IBackupEntry;
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>; using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
struct RestoreSettings; struct RestoreSettings;
enum class RestoreAccessCreationMode : uint8_t;
/// Makes a backup of access entities of a specified type. /// Makes a backup of access entities of a specified type.
@ -45,7 +46,8 @@ public:
private: private:
BackupPtr backup; BackupPtr backup;
bool allow_unresolved_access_dependencies = false; RestoreAccessCreationMode creation_mode;
bool allow_unresolved_dependencies = false;
std::vector<std::pair<UUID, AccessEntityPtr>> entities; std::vector<std::pair<UUID, AccessEntityPtr>> entities;
std::unordered_map<UUID, std::pair<String, AccessEntityType>> dependencies; std::unordered_map<UUID, std::pair<String, AccessEntityType>> dependencies;
std::unordered_set<String> data_paths; std::unordered_set<String> data_paths;

View File

@ -544,9 +544,9 @@ scope_guard AccessControl::subscribeForChanges(const std::vector<UUID> & ids, co
return changes_notifier->subscribeForChanges(ids, handler); return changes_notifier->subscribeForChanges(ids, handler);
} }
bool AccessControl::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) bool AccessControl::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{ {
if (MultipleAccessStorage::insertImpl(id, entity, replace_if_exists, throw_if_exists)) if (MultipleAccessStorage::insertImpl(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
{ {
changes_notifier->sendNotifications(); changes_notifier->sendNotifications();
return true; return true;

View File

@ -243,7 +243,7 @@ private:
class CustomSettingsPrefixes; class CustomSettingsPrefixes;
class PasswordComplexityRules; class PasswordComplexityRules;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override; bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;

View File

@ -1,8 +1,6 @@
#include <Access/DiskAccessStorage.h> #include <Access/DiskAccessStorage.h>
#include <Access/AccessEntityIO.h> #include <Access/AccessEntityIO.h>
#include <Access/AccessChangesNotifier.h> #include <Access/AccessChangesNotifier.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
@ -418,7 +416,7 @@ void DiskAccessStorage::setAllInMemory(const std::vector<std::pair<UUID, AccessE
/// Insert or update entities. /// Insert or update entities.
for (const auto & [id, entity] : entities_without_conflicts) for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* write_on_disk= */ false); insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* conflicting_id = */ nullptr, /* write_on_disk= */ false);
} }
void DiskAccessStorage::removeAllExceptInMemory(const boost::container::flat_set<UUID> & ids_to_keep) void DiskAccessStorage::removeAllExceptInMemory(const boost::container::flat_set<UUID> & ids_to_keep)
@ -507,14 +505,14 @@ std::optional<std::pair<String, AccessEntityType>> DiskAccessStorage::readNameWi
} }
bool DiskAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) bool DiskAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{ {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, /* write_on_disk = */ true); return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, conflicting_id, /* write_on_disk = */ true);
} }
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk) bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id, bool write_on_disk)
{ {
const String & name = new_entity->getName(); const String & name = new_entity->getName();
AccessEntityType type = new_entity->getType(); AccessEntityType type = new_entity->getType();
@ -533,9 +531,15 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
if (name_collision && !replace_if_exists) if (name_collision && !replace_if_exists)
{ {
if (throw_if_exists) if (throw_if_exists)
{
throwNameCollisionCannotInsert(type, name); throwNameCollisionCannotInsert(type, name);
}
else else
{
if (conflicting_id)
*conflicting_id = id_by_name;
return false; return false;
}
} }
auto it_by_id = entries_by_id.find(id); auto it_by_id = entries_by_id.find(id);
@ -548,7 +552,11 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
throwIDCollisionCannotInsert(id, type, name, existing_entry.type, existing_entry.name); throwIDCollisionCannotInsert(id, type, name, existing_entry.type, existing_entry.name);
} }
else else
{
if (conflicting_id)
*conflicting_id = id;
return false; return false;
}
} }
if (write_on_disk) if (write_on_disk)
@ -727,25 +735,4 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Couldn't delete {}", file_path); throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Couldn't delete {}", file_path);
} }
void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : my_entities)
insert(id, entity, replace_if_exists, throw_if_exists);
});
}
} }

View File

@ -34,14 +34,13 @@ public:
bool exists(const UUID & id) const override; bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; } bool isBackupAllowed() const override { return backup_allowed; }
void restoreFromBackup(RestorerFromBackup & restorer) override;
private: private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override; std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override; std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override; std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override; bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
@ -55,7 +54,7 @@ private:
void listsWritingThreadFunc() TSA_NO_THREAD_SAFETY_ANALYSIS; void listsWritingThreadFunc() TSA_NO_THREAD_SAFETY_ANALYSIS;
void stopListsWritingThread(); void stopListsWritingThread();
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk) TSA_REQUIRES(mutex); bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id, bool write_on_disk) TSA_REQUIRES(mutex);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex); bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);
bool removeNoLock(const UUID & id, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex); bool removeNoLock(const UUID & id, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);

View File

@ -9,4 +9,28 @@ bool IAccessEntity::equal(const IAccessEntity & other) const
return (name == other.name) && (getType() == other.getType()); return (name == other.name) && (getType() == other.getType());
} }
void IAccessEntity::replaceDependencies(std::shared_ptr<const IAccessEntity> & entity, const std::unordered_map<UUID, UUID> & old_to_new_ids)
{
if (old_to_new_ids.empty())
return;
bool need_replace_dependencies = false;
auto dependencies = entity->findDependencies();
for (const auto & dependency : dependencies)
{
if (old_to_new_ids.contains(dependency))
{
need_replace_dependencies = true;
break;
}
}
if (!need_replace_dependencies)
return;
auto new_entity = entity->clone();
new_entity->replaceDependencies(old_to_new_ids);
entity = new_entity;
}
} }

View File

@ -50,7 +50,8 @@ struct IAccessEntity
virtual std::vector<UUID> findDependencies() const { return {}; } virtual std::vector<UUID> findDependencies() const { return {}; }
/// Replaces dependencies according to a specified map. /// Replaces dependencies according to a specified map.
virtual void replaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {} void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) { doReplaceDependencies(old_to_new_ids); }
static void replaceDependencies(std::shared_ptr<const IAccessEntity> & entity, const std::unordered_map<UUID, UUID> & old_to_new_ids);
/// Whether this access entity should be written to a backup. /// Whether this access entity should be written to a backup.
virtual bool isBackupAllowed() const { return false; } virtual bool isBackupAllowed() const { return false; }
@ -66,6 +67,8 @@ protected:
{ {
return std::make_shared<EntityClassT>(typeid_cast<const EntityClassT &>(*this)); return std::make_shared<EntityClassT>(typeid_cast<const EntityClassT &>(*this));
} }
virtual void doReplaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {}
}; };
using AccessEntityPtr = std::shared_ptr<const IAccessEntity>; using AccessEntityPtr = std::shared_ptr<const IAccessEntity>;

View File

@ -4,6 +4,8 @@
#include <Access/User.h> #include <Access/User.h>
#include <Access/AccessBackup.h> #include <Access/AccessBackup.h>
#include <Backups/BackupEntriesCollector.h> #include <Backups/BackupEntriesCollector.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Common/callOnce.h> #include <Common/callOnce.h>
@ -14,10 +16,11 @@
#include <base/FnTraits.h> #include <base/FnTraits.h>
#include <boost/algorithm/string/join.hpp> #include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp> #include <boost/algorithm/string/replace.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/adaptor/reversed.hpp> #include <boost/range/adaptor/reversed.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/algorithm_ext/erase.hpp> #include <boost/range/algorithm_ext/erase.hpp>
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
@ -178,20 +181,20 @@ UUID IAccessStorage::insert(const AccessEntityPtr & entity)
return *insert(entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true); return *insert(entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true);
} }
std::optional<UUID> IAccessStorage::insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) std::optional<UUID> IAccessStorage::insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{ {
auto id = generateRandomID(); auto id = generateRandomID();
if (insert(id, entity, replace_if_exists, throw_if_exists)) if (insert(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
return id; return id;
return std::nullopt; return std::nullopt;
} }
bool IAccessStorage::insert(const DB::UUID & id, const DB::AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) bool IAccessStorage::insert(const DB::UUID & id, const DB::AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{ {
return insertImpl(id, entity, replace_if_exists, throw_if_exists); return insertImpl(id, entity, replace_if_exists, throw_if_exists, conflicting_id);
} }
@ -285,7 +288,7 @@ std::vector<UUID> IAccessStorage::insertOrReplace(const std::vector<AccessEntity
} }
bool IAccessStorage::insertImpl(const UUID &, const AccessEntityPtr & entity, bool, bool) bool IAccessStorage::insertImpl(const UUID &, const AccessEntityPtr & entity, bool, bool, UUID *)
{ {
if (isReadOnly()) if (isReadOnly())
throwReadonlyCannotInsert(entity->getType(), entity->getName()); throwReadonlyCannotInsert(entity->getType(), entity->getName());
@ -611,12 +614,51 @@ void IAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, c
} }
void IAccessStorage::restoreFromBackup(RestorerFromBackup &) void IAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
{ {
if (!isRestoreAllowed()) if (!isRestoreAllowed())
throwRestoreNotAllowed(); throwRestoreNotAllowed();
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "restoreFromBackup() is not implemented in {}", getStorageType()); if (isReplicated() && !acquireReplicatedRestore(restorer))
return;
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, entities_to_restore = std::move(entities), replace_if_exists, throw_if_exists] mutable
{
std::unordered_map<UUID, UUID> new_to_existing_ids;
for (auto & [id, entity] : entities_to_restore)
{
UUID existing_entity_id;
if (!insert(id, entity, replace_if_exists, throw_if_exists, &existing_entity_id))
{
/// Couldn't insert `entity` because there is an existing entity with the same name.
new_to_existing_ids[id] = existing_entity_id;
}
}
if (!new_to_existing_ids.empty())
{
/// If new entities restored from backup have dependencies on other entities from backup which were not restored because they existed,
/// then we should correct those dependencies.
auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr
{
auto res = entity;
IAccessEntity::replaceDependencies(res, new_to_existing_ids);
return res;
};
std::vector<UUID> ids;
ids.reserve(entities_to_restore.size());
boost::copy(entities_to_restore | boost::adaptors::map_keys, std::back_inserter(ids));
tryUpdate(ids, update_func);
}
});
} }

View File

@ -64,6 +64,9 @@ public:
/// Returns true if this entity is readonly. /// Returns true if this entity is readonly.
virtual bool isReadOnly(const UUID &) const { return isReadOnly(); } virtual bool isReadOnly(const UUID &) const { return isReadOnly(); }
/// Returns true if this storage is replicated.
virtual bool isReplicated() const { return false; }
/// Starts periodic reloading and updating of entities in this storage. /// Starts periodic reloading and updating of entities in this storage.
virtual void startPeriodicReloading() {} virtual void startPeriodicReloading() {}
@ -153,8 +156,8 @@ public:
/// Inserts an entity to the storage. Returns ID of a new entry in the storage. /// Inserts an entity to the storage. Returns ID of a new entry in the storage.
/// Throws an exception if the specified name already exists. /// Throws an exception if the specified name already exists.
UUID insert(const AccessEntityPtr & entity); UUID insert(const AccessEntityPtr & entity);
std::optional<UUID> insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); std::optional<UUID> insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id = nullptr);
bool insert(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); bool insert(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id = nullptr);
std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, bool replace_if_exists = false, bool throw_if_exists = true); std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, bool replace_if_exists = false, bool throw_if_exists = true);
std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, const std::vector<UUID> & ids, bool replace_if_exists = false, bool throw_if_exists = true); std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, const std::vector<UUID> & ids, bool replace_if_exists = false, bool throw_if_exists = true);
@ -218,7 +221,7 @@ protected:
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const = 0; virtual std::vector<UUID> findAllImpl(AccessEntityType type) const = 0;
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const = 0; virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const = 0;
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const; virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const;
virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
virtual bool removeImpl(const UUID & id, bool throw_if_not_exists); virtual bool removeImpl(const UUID & id, bool throw_if_not_exists);
virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists); virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
virtual std::optional<AuthResult> authenticateImpl( virtual std::optional<AuthResult> authenticateImpl(
@ -240,6 +243,7 @@ protected:
LoggerPtr getLogger() const; LoggerPtr getLogger() const;
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); } static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_); static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_);
virtual bool acquireReplicatedRestore(RestorerFromBackup &) const { return false; }
[[noreturn]] void throwNotFound(const UUID & id) const; [[noreturn]] void throwNotFound(const UUID & id) const;
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const; [[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type); [[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);

View File

@ -1,7 +1,5 @@
#include <Access/MemoryAccessStorage.h> #include <Access/MemoryAccessStorage.h>
#include <Access/AccessChangesNotifier.h> #include <Access/AccessChangesNotifier.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <base/scope_guard.h> #include <base/scope_guard.h>
#include <boost/container/flat_set.hpp> #include <boost/container/flat_set.hpp>
#include <boost/range/adaptor/map.hpp> #include <boost/range/adaptor/map.hpp>
@ -63,14 +61,14 @@ AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id, bool throw_if_not
} }
bool MemoryAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) bool MemoryAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{ {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists); return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, conflicting_id);
} }
bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{ {
const String & name = new_entity->getName(); const String & name = new_entity->getName();
AccessEntityType type = new_entity->getType(); AccessEntityType type = new_entity->getType();
@ -86,9 +84,15 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
if (name_collision && !replace_if_exists) if (name_collision && !replace_if_exists)
{ {
if (throw_if_exists) if (throw_if_exists)
{
throwNameCollisionCannotInsert(type, name); throwNameCollisionCannotInsert(type, name);
}
else else
{
if (conflicting_id)
*conflicting_id = id_by_name;
return false; return false;
}
} }
auto it_by_id = entries_by_id.find(id); auto it_by_id = entries_by_id.find(id);
@ -97,9 +101,15 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
{ {
const auto & existing_entry = it_by_id->second; const auto & existing_entry = it_by_id->second;
if (throw_if_exists) if (throw_if_exists)
{
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName()); throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
}
else else
{
if (conflicting_id)
*conflicting_id = id;
return false; return false;
}
} }
/// Remove collisions if necessary. /// Remove collisions if necessary.
@ -270,28 +280,7 @@ void MemoryAccessStorage::setAll(const std::vector<std::pair<UUID, AccessEntityP
/// Insert or update entities. /// Insert or update entities.
for (const auto & [id, entity] : entities_without_conflicts) for (const auto & [id, entity] : entities_without_conflicts)
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false); insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* conflicting_id = */ nullptr);
}
void MemoryAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
{
if (!isRestoreAllowed())
throwRestoreNotAllowed();
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : my_entities)
insert(id, entity, replace_if_exists, throw_if_exists);
});
} }
} }

View File

@ -34,17 +34,16 @@ public:
bool exists(const UUID & id) const override; bool exists(const UUID & id) const override;
bool isBackupAllowed() const override { return backup_allowed; } bool isBackupAllowed() const override { return backup_allowed; }
void restoreFromBackup(RestorerFromBackup & restorer) override;
private: private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override; std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override; std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override; bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
bool removeNoLock(const UUID & id, bool throw_if_not_exists); bool removeNoLock(const UUID & id, bool throw_if_not_exists);
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists); bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);

View File

@ -353,7 +353,7 @@ void MultipleAccessStorage::reload(ReloadMode reload_mode)
} }
bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{ {
std::shared_ptr<IAccessStorage> storage_for_insertion; std::shared_ptr<IAccessStorage> storage_for_insertion;
@ -376,7 +376,7 @@ bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr &
getStorageName()); getStorageName());
} }
if (storage_for_insertion->insert(id, entity, replace_if_exists, throw_if_exists)) if (storage_for_insertion->insert(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
{ {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
ids_cache.set(id, storage_for_insertion); ids_cache.set(id, storage_for_insertion);

View File

@ -67,7 +67,7 @@ protected:
std::vector<UUID> findAllImpl(AccessEntityType type) const override; std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override; std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override; bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override; std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;

View File

@ -24,7 +24,7 @@ std::vector<UUID> Quota::findDependencies() const
return to_roles.findDependencies(); return to_roles.findDependencies();
} }
void Quota::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) void Quota::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{ {
to_roles.replaceDependencies(old_to_new_ids); to_roles.replaceDependencies(old_to_new_ids);
} }

View File

@ -47,7 +47,7 @@ struct Quota : public IAccessEntity
AccessEntityType getType() const override { return TYPE; } AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override; std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override; void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; } bool isBackupAllowed() const override { return true; }
}; };

View File

@ -5,10 +5,9 @@
#include <Access/AccessChangesNotifier.h> #include <Access/AccessChangesNotifier.h>
#include <Access/AccessBackup.h> #include <Access/AccessBackup.h>
#include <Backups/BackupEntriesCollector.h> #include <Backups/BackupEntriesCollector.h>
#include <Backups/RestorerFromBackup.h>
#include <Backups/RestoreSettings.h>
#include <Backups/IBackupCoordination.h> #include <Backups/IBackupCoordination.h>
#include <Backups/IRestoreCoordination.h> #include <Backups/IRestoreCoordination.h>
#include <Backups/RestorerFromBackup.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Common/ZooKeeper/KeeperException.h> #include <Common/ZooKeeper/KeeperException.h>
@ -120,7 +119,7 @@ static void retryOnZooKeeperUserError(size_t attempts, Func && function)
} }
} }
bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
{ {
const AccessEntityTypeInfo type_info = AccessEntityTypeInfo::get(new_entity->getType()); const AccessEntityTypeInfo type_info = AccessEntityTypeInfo::get(new_entity->getType());
const String & name = new_entity->getName(); const String & name = new_entity->getName();
@ -128,7 +127,7 @@ bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr
auto zookeeper = getZooKeeper(); auto zookeeper = getZooKeeper();
bool ok = false; bool ok = false;
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists); }); retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists, conflicting_id); });
if (!ok) if (!ok)
return false; return false;
@ -143,7 +142,8 @@ bool ReplicatedAccessStorage::insertZooKeeper(
const UUID & id, const UUID & id,
const AccessEntityPtr & new_entity, const AccessEntityPtr & new_entity,
bool replace_if_exists, bool replace_if_exists,
bool throw_if_exists) bool throw_if_exists,
UUID * conflicting_id)
{ {
const String & name = new_entity->getName(); const String & name = new_entity->getName();
const AccessEntityType type = new_entity->getType(); const AccessEntityType type = new_entity->getType();
@ -167,27 +167,52 @@ bool ReplicatedAccessStorage::insertZooKeeper(
if (res == Coordination::Error::ZNODEEXISTS) if (res == Coordination::Error::ZNODEEXISTS)
{ {
if (!throw_if_exists && !replace_if_exists) if (!replace_if_exists)
return false; /// Couldn't insert a new entity.
if (throw_if_exists)
{ {
if (responses[0]->error == Coordination::Error::ZNODEEXISTS) if (responses[0]->error == Coordination::Error::ZNODEEXISTS)
{ {
/// To fail with a nice error message, we need info about what already exists. /// Couldn't insert the new entity because there is an existing entity with such UUID.
/// This itself could fail if the conflicting uuid disappears in the meantime. if (throw_if_exists)
/// If that happens, then we'll just retry from the start. {
String existing_entity_definition = zookeeper->get(entity_path); /// To fail with a nice error message, we need info about what already exists.
/// This itself can fail if the conflicting uuid disappears in the meantime.
/// If that happens, then retryOnZooKeeperUserError() will just retry the operation from the start.
String existing_entity_definition = zookeeper->get(entity_path);
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path); AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
AccessEntityType existing_type = existing_entity->getType(); AccessEntityType existing_type = existing_entity->getType();
String existing_name = existing_entity->getName(); String existing_name = existing_entity->getName();
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name); throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
}
else
{
if (conflicting_id)
*conflicting_id = id;
return false;
}
}
else if (responses[1]->error == Coordination::Error::ZNODEEXISTS)
{
/// Couldn't insert the new entity because there is an existing entity with the same name.
if (throw_if_exists)
{
throwNameCollisionCannotInsert(type, name);
}
else
{
if (conflicting_id)
{
/// Get UUID of the existing entry with the same name.
/// This itself can fail if the conflicting name disappears in the meantime.
/// If that happens, then retryOnZooKeeperUserError() will just retry the operation from the start.
*conflicting_id = parseUUID(zookeeper->get(name_path));
}
return false;
}
} }
else else
{ {
/// Couldn't insert the new entity because there is an existing entity with such name. zkutil::KeeperMultiException::check(res, ops, responses);
throwNameCollisionCannotInsert(type, name);
} }
} }
@ -693,28 +718,10 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
} }
void ReplicatedAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) bool ReplicatedAccessStorage::acquireReplicatedRestore(RestorerFromBackup & restorer) const
{ {
if (!isRestoreAllowed())
throwRestoreNotAllowed();
auto restore_coordination = restorer.getRestoreCoordination(); auto restore_coordination = restorer.getRestoreCoordination();
if (!restore_coordination->acquireReplicatedAccessStorage(zookeeper_path)) return restore_coordination->acquireReplicatedAccessStorage(zookeeper_path);
return;
auto entities = restorer.getAccessEntitiesToRestore();
if (entities.empty())
return;
auto create_access = restorer.getRestoreSettings().create_access;
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
{
for (const auto & [id, entity] : my_entities)
insert(id, entity, replace_if_exists, throw_if_exists);
});
} }
} }

View File

@ -26,6 +26,7 @@ public:
void shutdown() override; void shutdown() override;
const char * getStorageType() const override { return STORAGE_TYPE; } const char * getStorageType() const override { return STORAGE_TYPE; }
bool isReplicated() const override { return true; }
void startPeriodicReloading() override { startWatchingThread(); } void startPeriodicReloading() override { startWatchingThread(); }
void stopPeriodicReloading() override { stopWatchingThread(); } void stopPeriodicReloading() override { stopWatchingThread(); }
@ -35,7 +36,6 @@ public:
bool isBackupAllowed() const override { return backup_allowed; } bool isBackupAllowed() const override { return backup_allowed; }
void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override; void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override;
void restoreFromBackup(RestorerFromBackup & restorer) override;
private: private:
String zookeeper_path; String zookeeper_path;
@ -48,11 +48,11 @@ private:
std::unique_ptr<ThreadFromGlobalPool> watching_thread; std::unique_ptr<ThreadFromGlobalPool> watching_thread;
std::shared_ptr<ConcurrentBoundedQueue<UUID>> watched_queue; std::shared_ptr<ConcurrentBoundedQueue<UUID>> watched_queue;
bool insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) override; bool insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
bool removeImpl(const UUID & id, bool throw_if_not_exists) override; bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists); bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists);
bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists); bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
@ -80,6 +80,7 @@ private:
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override; std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(AccessEntityType type) const override; std::vector<UUID> findAllImpl(AccessEntityType type) const override;
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
bool acquireReplicatedRestore(RestorerFromBackup & restorer) const override;
mutable std::mutex mutex; mutable std::mutex mutex;
MemoryAccessStorage memory_storage TSA_GUARDED_BY(mutex); MemoryAccessStorage memory_storage TSA_GUARDED_BY(mutex);

View File

@ -21,7 +21,7 @@ std::vector<UUID> Role::findDependencies() const
return res; return res;
} }
void Role::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) void Role::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{ {
granted_roles.replaceDependencies(old_to_new_ids); granted_roles.replaceDependencies(old_to_new_ids);
settings.replaceDependencies(old_to_new_ids); settings.replaceDependencies(old_to_new_ids);

View File

@ -21,7 +21,7 @@ struct Role : public IAccessEntity
AccessEntityType getType() const override { return TYPE; } AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override; std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override; void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); } bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
}; };

View File

@ -63,7 +63,7 @@ std::vector<UUID> RowPolicy::findDependencies() const
return to_roles.findDependencies(); return to_roles.findDependencies();
} }
void RowPolicy::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) void RowPolicy::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{ {
to_roles.replaceDependencies(old_to_new_ids); to_roles.replaceDependencies(old_to_new_ids);
} }

View File

@ -50,7 +50,7 @@ struct RowPolicy : public IAccessEntity
AccessEntityType getType() const override { return TYPE; } AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override; std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override; void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return true; } bool isBackupAllowed() const override { return true; }
/// Which roles or users should use this row policy. /// Which roles or users should use this row policy.

View File

@ -21,7 +21,7 @@ std::vector<UUID> SettingsProfile::findDependencies() const
return res; return res;
} }
void SettingsProfile::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) void SettingsProfile::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{ {
elements.replaceDependencies(old_to_new_ids); elements.replaceDependencies(old_to_new_ids);
to_roles.replaceDependencies(old_to_new_ids); to_roles.replaceDependencies(old_to_new_ids);

View File

@ -22,7 +22,7 @@ struct SettingsProfile : public IAccessEntity
AccessEntityType getType() const override { return TYPE; } AccessEntityType getType() const override { return TYPE; }
std::vector<UUID> findDependencies() const override; std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override; void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return elements.isBackupAllowed(); } bool isBackupAllowed() const override { return elements.isBackupAllowed(); }
}; };

View File

@ -49,7 +49,7 @@ std::vector<UUID> User::findDependencies() const
return res; return res;
} }
void User::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) void User::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
{ {
default_roles.replaceDependencies(old_to_new_ids); default_roles.replaceDependencies(old_to_new_ids);
granted_roles.replaceDependencies(old_to_new_ids); granted_roles.replaceDependencies(old_to_new_ids);

View File

@ -32,7 +32,7 @@ struct User : public IAccessEntity
void setName(const String & name_) override; void setName(const String & name_) override;
std::vector<UUID> findDependencies() const override; std::vector<UUID> findDependencies() const override;
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override; void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
bool isBackupAllowed() const override { return settings.isBackupAllowed(); } bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
}; };

View File

@ -3,370 +3,89 @@
#include <Parsers/FunctionSecretArgumentsFinder.h> #include <Parsers/FunctionSecretArgumentsFinder.h>
#include <Analyzer/ConstantNode.h> #include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/IdentifierNode.h> #include <Analyzer/IdentifierNode.h>
#include <Analyzer/ListNode.h>
#include <Common/KnownObjectNames.h>
#include <Core/QualifiedTableName.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB namespace DB
{ {
class FunctionTreeNode : public AbstractFunction
{
public:
class ArgumentTreeNode : public Argument
{
public:
explicit ArgumentTreeNode(const IQueryTreeNode * argument_) : argument(argument_) {}
std::unique_ptr<AbstractFunction> getFunction() const override
{
if (const auto * f = argument->as<FunctionNode>())
return std::make_unique<FunctionTreeNode>(*f);
return nullptr;
}
bool isIdentifier() const override { return argument->as<IdentifierNode>(); }
bool tryGetString(String * res, bool allow_identifier) const override
{
if (const auto * literal = argument->as<ConstantNode>())
{
if (literal->getValue().getType() != Field::Types::String)
return false;
if (res)
*res = literal->getValue().safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument->as<IdentifierNode>())
{
if (res)
*res = id->getIdentifier().getFullName();
return true;
}
}
return false;
}
private:
const IQueryTreeNode * argument = nullptr;
};
class ArgumentsTreeNode : public Arguments
{
public:
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
size_t size() const override { return arguments ? arguments->size() : 0; }
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
private:
const QueryTreeNodes * arguments = nullptr;
};
explicit FunctionTreeNode(const FunctionNode & function_) : function(&function_)
{
if (const auto & nodes = function->getArguments().getNodes(); !nodes.empty())
arguments = std::make_unique<ArgumentsTreeNode>(&nodes);
}
String name() const override { return function->getFunctionName(); }
private:
const FunctionNode * function = nullptr;
};
/// Finds arguments of a specified function which should not be displayed for most users for security reasons. /// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys. /// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderTreeNode class FunctionSecretArgumentsFinderTreeNode : public FunctionSecretArgumentsFinder
{ {
public: public:
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments()) explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_)
: FunctionSecretArgumentsFinder(std::make_unique<FunctionTreeNode>(function_))
{ {
if (arguments.getNodes().empty()) if (!function->hasArguments())
return; return;
findFunctionSecretArguments(); findOrdinaryFunctionSecretArguments();
} }
struct Result
{
/// Result constructed by default means no arguments will be hidden.
size_t start = static_cast<size_t>(-1);
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
/// In all known cases secret arguments are consecutive
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
std::vector<std::string> nested_maps;
bool hasSecrets() const
{
return count != 0 || !nested_maps.empty();
}
};
FunctionSecretArgumentsFinder::Result getResult() const { return result; } FunctionSecretArgumentsFinder::Result getResult() const { return result; }
private:
const FunctionNode & function;
const ListNode & arguments;
FunctionSecretArgumentsFinder::Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= arguments.getNodes().size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findFunctionSecretArguments()
{
const auto & name = function.getFunctionName();
if ((name == "mysql") || (name == "postgresql") || (name == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((name == "s3") || (name == "cosn") || (name == "oss") ||
(name == "deltaLake") || (name == "hudi") || (name == "iceberg"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((name == "remote") || (name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((name == "encrypt") || (name == "decrypt") ||
(name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") ||
(name == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
const auto & nodes = arguments.getNodes();
size_t count = nodes.size();
while (count > 0)
{
const FunctionNode * f = nodes.at(count - 1)->as<FunctionNode>();
if (!f)
break;
if (f->getFunctionName() == "headers")
result.nested_maps.push_back(f->getFunctionName());
else if (f->getFunctionName() != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= arguments.getNodes().size())
return false;
return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier);
}
static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true)
{
if (const auto * literal = argument->as<ConstantNode>())
{
if (literal->getValue().getType() != Field::Types::String)
return false;
if (res)
*res = literal->getValue().safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument->as<IdentifierNode>())
{
if (res)
*res = id->getIdentifier().getFullName();
return true;
}
}
return false;
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (arguments.getNodes().size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = arguments.getNodes()[arg_num]->as<FunctionNode>();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName()))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (arguments.getNodes().empty())
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = arguments.getNodes().size() - 1;
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (arguments.getNodes().size() <= arg_idx)
return false;
const auto * identifier = arguments.getNodes()[arg_idx]->as<IdentifierNode>();
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments.getNodes().size(); ++i)
{
const auto & argument = arguments.getNodes()[i];
const auto * equals_func = argument->as<FunctionNode>();
if (!equals_func || (equals_func->getFunctionName() != "equals"))
continue;
const auto * expr_list = equals_func->getArguments().as<ListNode>();
if (!expr_list)
continue;
const auto & equal_args = expr_list->getNodes();
if (equal_args.size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(equal_args[0], &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
}; };
} }

View File

@ -2564,8 +2564,8 @@ void checkFunctionNodeHasEmptyNullsAction(FunctionNode const & node)
if (node.getNullsAction() != NullsAction::EMPTY) if (node.getNullsAction() != NullsAction::EMPTY)
throw Exception( throw Exception(
ErrorCodes::SYNTAX_ERROR, ErrorCodes::SYNTAX_ERROR,
"Function with name '{}' cannot use {} NULLS", "Function with name {} cannot use {} NULLS",
node.getFunctionName(), backQuote(node.getFunctionName()),
node.getNullsAction() == NullsAction::IGNORE_NULLS ? "IGNORE" : "RESPECT"); node.getNullsAction() == NullsAction::IGNORE_NULLS ? "IGNORE" : "RESPECT");
} }
} }
@ -3228,16 +3228,16 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
auto hints = NamePrompter<2>::getHints(function_name, possible_function_names); auto hints = NamePrompter<2>::getHints(function_name, possible_function_names);
throw Exception(ErrorCodes::UNKNOWN_FUNCTION, throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
"Function with name '{}' does not exist. In scope {}{}", "Function with name {} does not exist. In scope {}{}",
function_name, backQuote(function_name),
scope.scope_node->formatASTForErrorMessage(), scope.scope_node->formatASTForErrorMessage(),
getHintsErrorMessageSuffix(hints)); getHintsErrorMessageSuffix(hints));
} }
if (!function_lambda_arguments_indexes.empty()) if (!function_lambda_arguments_indexes.empty())
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Aggregate function '{}' does not support lambda arguments", "Aggregate function {} does not support lambda arguments",
function_name); backQuote(function_name));
auto action = function_node_ptr->getNullsAction(); auto action = function_node_ptr->getNullsAction();
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context); std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
@ -3674,10 +3674,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(
auto hints = IdentifierResolver::collectIdentifierTypoHints(unresolved_identifier, valid_identifiers); auto hints = IdentifierResolver::collectIdentifierTypoHints(unresolved_identifier, valid_identifiers);
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {}{} identifier '{}' in scope {}{}", throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {}{} identifier {} in scope {}{}",
toStringLowercase(IdentifierLookupContext::EXPRESSION), toStringLowercase(IdentifierLookupContext::EXPRESSION),
message_clarification, message_clarification,
unresolved_identifier.getFullName(), backQuote(unresolved_identifier.getFullName()),
scope.scope_node->formatASTForErrorMessage(), scope.scope_node->formatASTForErrorMessage(),
getHintsErrorMessageSuffix(hints)); getHintsErrorMessageSuffix(hints));
} }

View File

@ -456,6 +456,9 @@ void Connection::sendAddendum()
writeStringBinary(proto_recv_chunked, *out); writeStringBinary(proto_recv_chunked, *out);
} }
if (server_revision >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, *out);
out->next(); out->next();
} }
@ -526,6 +529,8 @@ void Connection::receiveHello(const Poco::Timespan & handshake_timeout)
readVarUInt(server_version_major, *in); readVarUInt(server_version_major, *in);
readVarUInt(server_version_minor, *in); readVarUInt(server_version_minor, *in);
readVarUInt(server_revision, *in); readVarUInt(server_revision, *in);
if (server_revision >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
readVarUInt(server_parallel_replicas_protocol_version, *in);
if (server_revision >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) if (server_revision >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE)
readStringBinary(server_timezone, *in); readStringBinary(server_timezone, *in);
if (server_revision >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME) if (server_revision >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME)
@ -966,7 +971,7 @@ void Connection::sendReadTaskResponse(const String & response)
void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & response) void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & response)
{ {
writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out); writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out);
response.serialize(*out); response.serialize(*out, server_parallel_replicas_protocol_version);
out->finishChunk(); out->finishChunk();
out->next(); out->next();
} }
@ -1420,7 +1425,7 @@ ParallelReadRequest Connection::receiveParallelReadRequest() const
InitialAllRangesAnnouncement Connection::receiveInitialParallelReadAnnouncement() const InitialAllRangesAnnouncement Connection::receiveInitialParallelReadAnnouncement() const
{ {
return InitialAllRangesAnnouncement::deserialize(*in); return InitialAllRangesAnnouncement::deserialize(*in, server_parallel_replicas_protocol_version);
} }

View File

@ -212,6 +212,7 @@ private:
UInt64 server_version_minor = 0; UInt64 server_version_minor = 0;
UInt64 server_version_patch = 0; UInt64 server_version_patch = 0;
UInt64 server_revision = 0; UInt64 server_revision = 0;
UInt64 server_parallel_replicas_protocol_version = 0;
String server_timezone; String server_timezone;
String server_display_name; String server_display_name;

View File

@ -200,7 +200,6 @@ void MultiplexedConnections::sendQuery(
} }
void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids) void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
{ {
std::lock_guard lock(cancel_mutex); std::lock_guard lock(cancel_mutex);

View File

@ -64,6 +64,7 @@ static struct InitFiu
REGULAR(lazy_pipe_fds_fail_close) \ REGULAR(lazy_pipe_fds_fail_close) \
PAUSEABLE(infinite_sleep) \ PAUSEABLE(infinite_sleep) \
PAUSEABLE(stop_moving_part_before_swap_with_active) \ PAUSEABLE(stop_moving_part_before_swap_with_active) \
REGULAR(slowdown_index_analysis) \
namespace FailPoints namespace FailPoints

View File

@ -376,6 +376,7 @@ The server successfully detected this situation and will download merged part fr
M(ParallelReplicasReadAssignedMarks, "Sum across all replicas of how many of scheduled marks were assigned by consistent hash") \ M(ParallelReplicasReadAssignedMarks, "Sum across all replicas of how many of scheduled marks were assigned by consistent hash") \
M(ParallelReplicasReadUnassignedMarks, "Sum across all replicas of how many unassigned marks were scheduled") \ M(ParallelReplicasReadUnassignedMarks, "Sum across all replicas of how many unassigned marks were scheduled") \
M(ParallelReplicasReadAssignedForStealingMarks, "Sum across all replicas of how many of scheduled marks were assigned for stealing by consistent hash") \ M(ParallelReplicasReadAssignedForStealingMarks, "Sum across all replicas of how many of scheduled marks were assigned for stealing by consistent hash") \
M(ParallelReplicasReadMarks, "How many marks were read by the given replica") \
\ \
M(ParallelReplicasStealingByHashMicroseconds, "Time spent collecting segments meant for stealing by hash") \ M(ParallelReplicasStealingByHashMicroseconds, "Time spent collecting segments meant for stealing by hash") \
M(ParallelReplicasProcessingPartsMicroseconds, "Time spent processing data parts") \ M(ParallelReplicasProcessingPartsMicroseconds, "Time spent processing data parts") \
@ -529,6 +530,7 @@ The server successfully detected this situation and will download merged part fr
M(CachedReadBufferReadFromCacheMicroseconds, "Time reading from filesystem cache") \ M(CachedReadBufferReadFromCacheMicroseconds, "Time reading from filesystem cache") \
M(CachedReadBufferReadFromSourceBytes, "Bytes read from filesystem cache source (from remote fs, etc)") \ M(CachedReadBufferReadFromSourceBytes, "Bytes read from filesystem cache source (from remote fs, etc)") \
M(CachedReadBufferReadFromCacheBytes, "Bytes read from filesystem cache") \ M(CachedReadBufferReadFromCacheBytes, "Bytes read from filesystem cache") \
M(CachedReadBufferPredownloadedBytes, "Bytes read from filesystem cache source. Cache segments are read from left to right as a whole, it might be that we need to predownload some part of the segment irrelevant for the current task just to get to the needed data") \
M(CachedReadBufferCacheWriteBytes, "Bytes written from source (remote fs, etc) to filesystem cache") \ M(CachedReadBufferCacheWriteBytes, "Bytes written from source (remote fs, etc) to filesystem cache") \
M(CachedReadBufferCacheWriteMicroseconds, "Time spent writing data into filesystem cache") \ M(CachedReadBufferCacheWriteMicroseconds, "Time spent writing data into filesystem cache") \
M(CachedReadBufferCreateBufferMicroseconds, "Prepare buffer time") \ M(CachedReadBufferCreateBufferMicroseconds, "Prepare buffer time") \

View File

@ -181,12 +181,6 @@ void SetACLRequest::addRootPath(const String & root_path) { Coordination::addRoo
void GetACLRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } void GetACLRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
void SyncRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); } void SyncRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
void MultiRequest::addRootPath(const String & root_path)
{
for (auto & request : requests)
request->addRootPath(root_path);
}
void CreateResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path_created, root_path); } void CreateResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path_created, root_path); }
void WatchResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path, root_path); } void WatchResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path, root_path); }

View File

@ -408,11 +408,17 @@ struct ReconfigResponse : virtual Response
size_t bytesSize() const override { return value.size() + sizeof(stat); } size_t bytesSize() const override { return value.size() + sizeof(stat); }
}; };
template <typename T>
struct MultiRequest : virtual Request struct MultiRequest : virtual Request
{ {
Requests requests; std::vector<T> requests;
void addRootPath(const String & root_path) override
{
for (auto & request : requests)
request->addRootPath(root_path);
}
void addRootPath(const String & root_path) override;
String getPath() const override { return {}; } String getPath() const override { return {}; }
size_t bytesSize() const override size_t bytesSize() const override

View File

@ -184,7 +184,7 @@ struct TestKeeperReconfigRequest final : ReconfigRequest, TestKeeperRequest
std::pair<ResponsePtr, Undo> process(TestKeeper::Container & container, int64_t zxid) const override; std::pair<ResponsePtr, Undo> process(TestKeeper::Container & container, int64_t zxid) const override;
}; };
struct TestKeeperMultiRequest final : MultiRequest, TestKeeperRequest struct TestKeeperMultiRequest final : MultiRequest<RequestPtr>, TestKeeperRequest
{ {
explicit TestKeeperMultiRequest(const Requests & generic_requests) explicit TestKeeperMultiRequest(const Requests & generic_requests)
: TestKeeperMultiRequest(std::span(generic_requests)) : TestKeeperMultiRequest(std::span(generic_requests))

View File

@ -18,14 +18,16 @@ using namespace DB;
void ZooKeeperResponse::write(WriteBuffer & out) const void ZooKeeperResponse::write(WriteBuffer & out) const
{ {
/// Excessive copy to calculate length. auto response_size = Coordination::size(xid) + Coordination::size(zxid) + Coordination::size(error);
WriteBufferFromOwnString buf;
Coordination::write(xid, buf);
Coordination::write(zxid, buf);
Coordination::write(error, buf);
if (error == Error::ZOK) if (error == Error::ZOK)
writeImpl(buf); response_size += sizeImpl();
Coordination::write(buf.str(), out);
Coordination::write(static_cast<int32_t>(response_size), out);
Coordination::write(xid, out);
Coordination::write(zxid, out);
Coordination::write(error, out);
if (error == Error::ZOK)
writeImpl(out);
} }
std::string ZooKeeperRequest::toString(bool short_format) const std::string ZooKeeperRequest::toString(bool short_format) const
@ -41,12 +43,12 @@ std::string ZooKeeperRequest::toString(bool short_format) const
void ZooKeeperRequest::write(WriteBuffer & out) const void ZooKeeperRequest::write(WriteBuffer & out) const
{ {
/// Excessive copy to calculate length. auto request_size = Coordination::size(xid) + Coordination::size(getOpNum()) + sizeImpl();
WriteBufferFromOwnString buf;
Coordination::write(xid, buf); Coordination::write(static_cast<int32_t>(request_size), out);
Coordination::write(getOpNum(), buf); Coordination::write(xid, out);
writeImpl(buf); Coordination::write(getOpNum(), out);
Coordination::write(buf.str(), out); writeImpl(out);
} }
void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const
@ -54,6 +56,11 @@ void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const
Coordination::write(path, out); Coordination::write(path, out);
} }
size_t ZooKeeperSyncRequest::sizeImpl() const
{
return Coordination::size(path);
}
void ZooKeeperSyncRequest::readImpl(ReadBuffer & in) void ZooKeeperSyncRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(path, in); Coordination::read(path, in);
@ -74,6 +81,11 @@ void ZooKeeperSyncResponse::writeImpl(WriteBuffer & out) const
Coordination::write(path, out); Coordination::write(path, out);
} }
size_t ZooKeeperSyncResponse::sizeImpl() const
{
return Coordination::size(path);
}
void ZooKeeperReconfigRequest::writeImpl(WriteBuffer & out) const void ZooKeeperReconfigRequest::writeImpl(WriteBuffer & out) const
{ {
Coordination::write(joining, out); Coordination::write(joining, out);
@ -82,6 +94,11 @@ void ZooKeeperReconfigRequest::writeImpl(WriteBuffer & out) const
Coordination::write(version, out); Coordination::write(version, out);
} }
size_t ZooKeeperReconfigRequest::sizeImpl() const
{
return Coordination::size(joining) + Coordination::size(leaving) + Coordination::size(new_members) + Coordination::size(version);
}
void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in) void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(joining, in); Coordination::read(joining, in);
@ -109,6 +126,11 @@ void ZooKeeperReconfigResponse::writeImpl(WriteBuffer & out) const
Coordination::write(stat, out); Coordination::write(stat, out);
} }
size_t ZooKeeperReconfigResponse::sizeImpl() const
{
return Coordination::size(value) + Coordination::size(stat);
}
void ZooKeeperWatchResponse::readImpl(ReadBuffer & in) void ZooKeeperWatchResponse::readImpl(ReadBuffer & in)
{ {
Coordination::read(type, in); Coordination::read(type, in);
@ -123,6 +145,11 @@ void ZooKeeperWatchResponse::writeImpl(WriteBuffer & out) const
Coordination::write(path, out); Coordination::write(path, out);
} }
size_t ZooKeeperWatchResponse::sizeImpl() const
{
return Coordination::size(type) + Coordination::size(state) + Coordination::size(path);
}
void ZooKeeperWatchResponse::write(WriteBuffer & out) const void ZooKeeperWatchResponse::write(WriteBuffer & out) const
{ {
if (error == Error::ZOK) if (error == Error::ZOK)
@ -137,6 +164,11 @@ void ZooKeeperAuthRequest::writeImpl(WriteBuffer & out) const
Coordination::write(data, out); Coordination::write(data, out);
} }
size_t ZooKeeperAuthRequest::sizeImpl() const
{
return Coordination::size(type) + Coordination::size(scheme) + Coordination::size(data);
}
void ZooKeeperAuthRequest::readImpl(ReadBuffer & in) void ZooKeeperAuthRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(type, in); Coordination::read(type, in);
@ -175,6 +207,12 @@ void ZooKeeperCreateRequest::writeImpl(WriteBuffer & out) const
Coordination::write(flags, out); Coordination::write(flags, out);
} }
size_t ZooKeeperCreateRequest::sizeImpl() const
{
int32_t flags = 0;
return Coordination::size(path) + Coordination::size(data) + Coordination::size(acls) + Coordination::size(flags);
}
void ZooKeeperCreateRequest::readImpl(ReadBuffer & in) void ZooKeeperCreateRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(path, in); Coordination::read(path, in);
@ -211,12 +249,22 @@ void ZooKeeperCreateResponse::writeImpl(WriteBuffer & out) const
Coordination::write(path_created, out); Coordination::write(path_created, out);
} }
size_t ZooKeeperCreateResponse::sizeImpl() const
{
return Coordination::size(path_created);
}
void ZooKeeperRemoveRequest::writeImpl(WriteBuffer & out) const void ZooKeeperRemoveRequest::writeImpl(WriteBuffer & out) const
{ {
Coordination::write(path, out); Coordination::write(path, out);
Coordination::write(version, out); Coordination::write(version, out);
} }
size_t ZooKeeperRemoveRequest::sizeImpl() const
{
return Coordination::size(path) + Coordination::size(version);
}
std::string ZooKeeperRemoveRequest::toStringImpl(bool /*short_format*/) const std::string ZooKeeperRemoveRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format( return fmt::format(
@ -244,6 +292,11 @@ void ZooKeeperRemoveRecursiveRequest::readImpl(ReadBuffer & in)
Coordination::read(remove_nodes_limit, in); Coordination::read(remove_nodes_limit, in);
} }
size_t ZooKeeperRemoveRecursiveRequest::sizeImpl() const
{
return Coordination::size(path) + Coordination::size(remove_nodes_limit);
}
std::string ZooKeeperRemoveRecursiveRequest::toStringImpl(bool /*short_format*/) const std::string ZooKeeperRemoveRecursiveRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format( return fmt::format(
@ -259,6 +312,11 @@ void ZooKeeperExistsRequest::writeImpl(WriteBuffer & out) const
Coordination::write(has_watch, out); Coordination::write(has_watch, out);
} }
size_t ZooKeeperExistsRequest::sizeImpl() const
{
return Coordination::size(path) + Coordination::size(has_watch);
}
void ZooKeeperExistsRequest::readImpl(ReadBuffer & in) void ZooKeeperExistsRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(path, in); Coordination::read(path, in);
@ -280,12 +338,22 @@ void ZooKeeperExistsResponse::writeImpl(WriteBuffer & out) const
Coordination::write(stat, out); Coordination::write(stat, out);
} }
size_t ZooKeeperExistsResponse::sizeImpl() const
{
return Coordination::size(stat);
}
void ZooKeeperGetRequest::writeImpl(WriteBuffer & out) const void ZooKeeperGetRequest::writeImpl(WriteBuffer & out) const
{ {
Coordination::write(path, out); Coordination::write(path, out);
Coordination::write(has_watch, out); Coordination::write(has_watch, out);
} }
size_t ZooKeeperGetRequest::sizeImpl() const
{
return Coordination::size(path) + Coordination::size(has_watch);
}
void ZooKeeperGetRequest::readImpl(ReadBuffer & in) void ZooKeeperGetRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(path, in); Coordination::read(path, in);
@ -309,6 +377,11 @@ void ZooKeeperGetResponse::writeImpl(WriteBuffer & out) const
Coordination::write(stat, out); Coordination::write(stat, out);
} }
size_t ZooKeeperGetResponse::sizeImpl() const
{
return Coordination::size(data) + Coordination::size(stat);
}
void ZooKeeperSetRequest::writeImpl(WriteBuffer & out) const void ZooKeeperSetRequest::writeImpl(WriteBuffer & out) const
{ {
Coordination::write(path, out); Coordination::write(path, out);
@ -316,6 +389,11 @@ void ZooKeeperSetRequest::writeImpl(WriteBuffer & out) const
Coordination::write(version, out); Coordination::write(version, out);
} }
size_t ZooKeeperSetRequest::sizeImpl() const
{
return Coordination::size(path) + Coordination::size(data) + Coordination::size(version);
}
void ZooKeeperSetRequest::readImpl(ReadBuffer & in) void ZooKeeperSetRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(path, in); Coordination::read(path, in);
@ -342,12 +420,22 @@ void ZooKeeperSetResponse::writeImpl(WriteBuffer & out) const
Coordination::write(stat, out); Coordination::write(stat, out);
} }
size_t ZooKeeperSetResponse::sizeImpl() const
{
return Coordination::size(stat);
}
void ZooKeeperListRequest::writeImpl(WriteBuffer & out) const void ZooKeeperListRequest::writeImpl(WriteBuffer & out) const
{ {
Coordination::write(path, out); Coordination::write(path, out);
Coordination::write(has_watch, out); Coordination::write(has_watch, out);
} }
size_t ZooKeeperListRequest::sizeImpl() const
{
return Coordination::size(path) + Coordination::size(has_watch);
}
void ZooKeeperListRequest::readImpl(ReadBuffer & in) void ZooKeeperListRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(path, in); Coordination::read(path, in);
@ -366,6 +454,11 @@ void ZooKeeperFilteredListRequest::writeImpl(WriteBuffer & out) const
Coordination::write(static_cast<uint8_t>(list_request_type), out); Coordination::write(static_cast<uint8_t>(list_request_type), out);
} }
size_t ZooKeeperFilteredListRequest::sizeImpl() const
{
return Coordination::size(path) + Coordination::size(has_watch) + Coordination::size(static_cast<uint8_t>(list_request_type));
}
void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in) void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(path, in); Coordination::read(path, in);
@ -397,6 +490,11 @@ void ZooKeeperListResponse::writeImpl(WriteBuffer & out) const
Coordination::write(stat, out); Coordination::write(stat, out);
} }
size_t ZooKeeperListResponse::sizeImpl() const
{
return Coordination::size(names) + Coordination::size(stat);
}
void ZooKeeperSimpleListResponse::readImpl(ReadBuffer & in) void ZooKeeperSimpleListResponse::readImpl(ReadBuffer & in)
{ {
Coordination::read(names, in); Coordination::read(names, in);
@ -407,6 +505,11 @@ void ZooKeeperSimpleListResponse::writeImpl(WriteBuffer & out) const
Coordination::write(names, out); Coordination::write(names, out);
} }
size_t ZooKeeperSimpleListResponse::sizeImpl() const
{
return Coordination::size(names);
}
void ZooKeeperSetACLRequest::writeImpl(WriteBuffer & out) const void ZooKeeperSetACLRequest::writeImpl(WriteBuffer & out) const
{ {
Coordination::write(path, out); Coordination::write(path, out);
@ -414,6 +517,11 @@ void ZooKeeperSetACLRequest::writeImpl(WriteBuffer & out) const
Coordination::write(version, out); Coordination::write(version, out);
} }
size_t ZooKeeperSetACLRequest::sizeImpl() const
{
return Coordination::size(path) + Coordination::size(acls) + Coordination::size(version);
}
void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in) void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(path, in); Coordination::read(path, in);
@ -431,6 +539,11 @@ void ZooKeeperSetACLResponse::writeImpl(WriteBuffer & out) const
Coordination::write(stat, out); Coordination::write(stat, out);
} }
size_t ZooKeeperSetACLResponse::sizeImpl() const
{
return Coordination::size(stat);
}
void ZooKeeperSetACLResponse::readImpl(ReadBuffer & in) void ZooKeeperSetACLResponse::readImpl(ReadBuffer & in)
{ {
Coordination::read(stat, in); Coordination::read(stat, in);
@ -446,6 +559,11 @@ void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const
Coordination::write(path, out); Coordination::write(path, out);
} }
size_t ZooKeeperGetACLRequest::sizeImpl() const
{
return Coordination::size(path);
}
std::string ZooKeeperGetACLRequest::toStringImpl(bool /*short_format*/) const std::string ZooKeeperGetACLRequest::toStringImpl(bool /*short_format*/) const
{ {
return fmt::format("path = {}", path); return fmt::format("path = {}", path);
@ -457,6 +575,11 @@ void ZooKeeperGetACLResponse::writeImpl(WriteBuffer & out) const
Coordination::write(stat, out); Coordination::write(stat, out);
} }
size_t ZooKeeperGetACLResponse::sizeImpl() const
{
return Coordination::size(acl) + Coordination::size(stat);
}
void ZooKeeperGetACLResponse::readImpl(ReadBuffer & in) void ZooKeeperGetACLResponse::readImpl(ReadBuffer & in)
{ {
Coordination::read(acl, in); Coordination::read(acl, in);
@ -469,6 +592,11 @@ void ZooKeeperCheckRequest::writeImpl(WriteBuffer & out) const
Coordination::write(version, out); Coordination::write(version, out);
} }
size_t ZooKeeperCheckRequest::sizeImpl() const
{
return Coordination::size(path) + Coordination::size(version);
}
void ZooKeeperCheckRequest::readImpl(ReadBuffer & in) void ZooKeeperCheckRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(path, in); Coordination::read(path, in);
@ -494,6 +622,11 @@ void ZooKeeperErrorResponse::writeImpl(WriteBuffer & out) const
Coordination::write(error, out); Coordination::write(error, out);
} }
size_t ZooKeeperErrorResponse::sizeImpl() const
{
return Coordination::size(error);
}
void ZooKeeperMultiRequest::checkOperationType(OperationType type) void ZooKeeperMultiRequest::checkOperationType(OperationType type)
{ {
chassert(!operation_type.has_value() || *operation_type == type); chassert(!operation_type.has_value() || *operation_type == type);
@ -596,6 +729,27 @@ void ZooKeeperMultiRequest::writeImpl(WriteBuffer & out) const
Coordination::write(error, out); Coordination::write(error, out);
} }
size_t ZooKeeperMultiRequest::sizeImpl() const
{
size_t total_size = 0;
for (const auto & request : requests)
{
const auto & zk_request = dynamic_cast<const ZooKeeperRequest &>(*request);
bool done = false;
int32_t error = -1;
total_size
+= Coordination::size(zk_request.getOpNum()) + Coordination::size(done) + Coordination::size(error) + zk_request.sizeImpl();
}
OpNum op_num = OpNum::Error;
bool done = true;
int32_t error = -1;
return total_size + Coordination::size(op_num) + Coordination::size(done) + Coordination::size(error);
}
void ZooKeeperMultiRequest::readImpl(ReadBuffer & in) void ZooKeeperMultiRequest::readImpl(ReadBuffer & in)
{ {
while (true) while (true)
@ -729,31 +883,54 @@ void ZooKeeperMultiResponse::writeImpl(WriteBuffer & out) const
} }
} }
ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperHeartbeatResponse>()); } size_t ZooKeeperMultiResponse::sizeImpl() const
ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSyncResponse>()); } {
ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperAuthResponse>()); } size_t total_size = 0;
ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperRemoveResponse>()); } for (const auto & response : responses)
ZooKeeperResponsePtr ZooKeeperRemoveRecursiveRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperRemoveRecursiveResponse>()); } {
ZooKeeperResponsePtr ZooKeeperExistsRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperExistsResponse>()); } const ZooKeeperResponse & zk_response = dynamic_cast<const ZooKeeperResponse &>(*response);
ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperGetResponse>()); } OpNum op_num = zk_response.getOpNum();
ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSetResponse>()); } bool done = false;
ZooKeeperResponsePtr ZooKeeperReconfigRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperReconfigResponse>()); } Error op_error = zk_response.error;
ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperListResponse>()); }
ZooKeeperResponsePtr ZooKeeperSimpleListRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSimpleListResponse>()); } total_size += Coordination::size(op_num) + Coordination::size(done) + Coordination::size(op_error);
if (op_error == Error::ZOK || op_num == OpNum::Error)
total_size += zk_response.sizeImpl();
}
/// Footer.
OpNum op_num = OpNum::Error;
bool done = true;
int32_t error_read = - 1;
return total_size + Coordination::size(op_num) + Coordination::size(done) + Coordination::size(error_read);
}
ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return std::make_shared<ZooKeeperHeartbeatResponse>(); }
ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return std::make_shared<ZooKeeperSyncResponse>(); }
ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return std::make_shared<ZooKeeperAuthResponse>(); }
ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return std::make_shared<ZooKeeperRemoveResponse>(); }
ZooKeeperResponsePtr ZooKeeperRemoveRecursiveRequest::makeResponse() const { return std::make_shared<ZooKeeperRemoveRecursiveResponse>(); }
ZooKeeperResponsePtr ZooKeeperExistsRequest::makeResponse() const { return std::make_shared<ZooKeeperExistsResponse>(); }
ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return std::make_shared<ZooKeeperGetResponse>(); }
ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return std::make_shared<ZooKeeperSetResponse>(); }
ZooKeeperResponsePtr ZooKeeperReconfigRequest::makeResponse() const { return std::make_shared<ZooKeeperReconfigResponse>(); }
ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return std::make_shared<ZooKeeperListResponse>(); }
ZooKeeperResponsePtr ZooKeeperSimpleListRequest::makeResponse() const { return std::make_shared<ZooKeeperSimpleListResponse>(); }
ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const
{ {
if (not_exists) if (not_exists)
return setTime(std::make_shared<ZooKeeperCreateIfNotExistsResponse>()); return std::make_shared<ZooKeeperCreateIfNotExistsResponse>();
return setTime(std::make_shared<ZooKeeperCreateResponse>()); return std::make_shared<ZooKeeperCreateResponse>();
} }
ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const
{ {
if (not_exists) if (not_exists)
return setTime(std::make_shared<ZooKeeperCheckNotExistsResponse>()); return std::make_shared<ZooKeeperCheckNotExistsResponse>();
return setTime(std::make_shared<ZooKeeperCheckResponse>()); return std::make_shared<ZooKeeperCheckResponse>();
} }
ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const
@ -764,11 +941,12 @@ ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const
else else
response = std::make_shared<ZooKeeperMultiReadResponse>(requests); response = std::make_shared<ZooKeeperMultiReadResponse>(requests);
return setTime(std::move(response)); return std::move(response);
} }
ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperCloseResponse>()); }
ZooKeeperResponsePtr ZooKeeperSetACLRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSetACLResponse>()); } ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return std::make_shared<ZooKeeperCloseResponse>(); }
ZooKeeperResponsePtr ZooKeeperGetACLRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperGetACLResponse>()); } ZooKeeperResponsePtr ZooKeeperSetACLRequest::makeResponse() const { return std::make_shared<ZooKeeperSetACLResponse>(); }
ZooKeeperResponsePtr ZooKeeperGetACLRequest::makeResponse() const { return std::make_shared<ZooKeeperGetACLResponse>(); }
void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const
{ {
@ -777,6 +955,11 @@ void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const
Coordination::write(server_id, out); Coordination::write(server_id, out);
} }
size_t ZooKeeperSessionIDRequest::sizeImpl() const
{
return Coordination::size(internal_id) + Coordination::size(session_timeout_ms) + Coordination::size(server_id);
}
void ZooKeeperSessionIDRequest::readImpl(ReadBuffer & in) void ZooKeeperSessionIDRequest::readImpl(ReadBuffer & in)
{ {
Coordination::read(internal_id, in); Coordination::read(internal_id, in);
@ -803,6 +986,11 @@ void ZooKeeperSessionIDResponse::writeImpl(WriteBuffer & out) const
Coordination::write(server_id, out); Coordination::write(server_id, out);
} }
size_t ZooKeeperSessionIDResponse::sizeImpl() const
{
return Coordination::size(internal_id) + Coordination::size(session_id) + Coordination::size(server_id);
}
void ZooKeeperRequest::createLogElements(LogElements & elems) const void ZooKeeperRequest::createLogElements(LogElements & elems) const
{ {
@ -960,40 +1148,6 @@ std::shared_ptr<ZooKeeperRequest> ZooKeeperRequest::read(ReadBuffer & in)
return request; return request;
} }
ZooKeeperRequest::~ZooKeeperRequest()
{
if (!request_created_time_ns)
return;
UInt64 elapsed_ns = clock_gettime_ns() - request_created_time_ns;
constexpr UInt64 max_request_time_ns = 1000000000ULL; /// 1 sec
if (max_request_time_ns < elapsed_ns)
{
LOG_TEST(getLogger(__PRETTY_FUNCTION__), "Processing of request xid={} took {} ms", xid, elapsed_ns / 1000000UL);
}
}
ZooKeeperResponsePtr ZooKeeperRequest::setTime(ZooKeeperResponsePtr response) const
{
if (request_created_time_ns)
{
response->response_created_time_ns = clock_gettime_ns();
}
return response;
}
ZooKeeperResponse::~ZooKeeperResponse()
{
if (!response_created_time_ns)
return;
UInt64 elapsed_ns = clock_gettime_ns() - response_created_time_ns;
constexpr UInt64 max_request_time_ns = 1000000000ULL; /// 1 sec
if (max_request_time_ns < elapsed_ns)
{
LOG_TEST(getLogger(__PRETTY_FUNCTION__), "Processing of response xid={} took {} ms", xid, elapsed_ns / 1000000UL);
}
}
ZooKeeperRequestPtr ZooKeeperRequestFactory::get(OpNum op_num) const ZooKeeperRequestPtr ZooKeeperRequestFactory::get(OpNum op_num) const
{ {
auto it = op_num_to_request.find(op_num); auto it = op_num_to_request.find(op_num);
@ -1015,7 +1169,6 @@ void registerZooKeeperRequest(ZooKeeperRequestFactory & factory)
factory.registerRequest(num, [] factory.registerRequest(num, []
{ {
auto res = std::make_shared<RequestT>(); auto res = std::make_shared<RequestT>();
res->request_created_time_ns = clock_gettime_ns();
if constexpr (num == OpNum::MultiRead) if constexpr (num == OpNum::MultiRead)
res->operation_type = ZooKeeperMultiRequest::OperationType::Read; res->operation_type = ZooKeeperMultiRequest::OperationType::Read;

View File

@ -7,13 +7,11 @@
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
#include <IO/ReadBuffer.h> #include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h> #include <IO/WriteBuffer.h>
#include <unordered_map>
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <cstdint> #include <cstdint>
#include <optional> #include <optional>
#include <functional> #include <functional>
#include <span>
namespace Coordination namespace Coordination
@ -25,13 +23,11 @@ struct ZooKeeperResponse : virtual Response
{ {
XID xid = 0; XID xid = 0;
UInt64 response_created_time_ns = 0;
ZooKeeperResponse() = default; ZooKeeperResponse() = default;
ZooKeeperResponse(const ZooKeeperResponse &) = default; ZooKeeperResponse(const ZooKeeperResponse &) = default;
~ZooKeeperResponse() override;
virtual void readImpl(ReadBuffer &) = 0; virtual void readImpl(ReadBuffer &) = 0;
virtual void writeImpl(WriteBuffer &) const = 0; virtual void writeImpl(WriteBuffer &) const = 0;
virtual size_t sizeImpl() const = 0;
virtual void write(WriteBuffer & out) const; virtual void write(WriteBuffer & out) const;
virtual OpNum getOpNum() const = 0; virtual OpNum getOpNum() const = 0;
virtual void fillLogElements(LogElements & elems, size_t idx) const; virtual void fillLogElements(LogElements & elems, size_t idx) const;
@ -51,13 +47,11 @@ struct ZooKeeperRequest : virtual Request
bool restored_from_zookeeper_log = false; bool restored_from_zookeeper_log = false;
UInt64 request_created_time_ns = 0;
UInt64 thread_id = 0; UInt64 thread_id = 0;
String query_id; String query_id;
ZooKeeperRequest() = default; ZooKeeperRequest() = default;
ZooKeeperRequest(const ZooKeeperRequest &) = default; ZooKeeperRequest(const ZooKeeperRequest &) = default;
~ZooKeeperRequest() override;
virtual OpNum getOpNum() const = 0; virtual OpNum getOpNum() const = 0;
@ -66,6 +60,7 @@ struct ZooKeeperRequest : virtual Request
std::string toString(bool short_format = false) const; std::string toString(bool short_format = false) const;
virtual void writeImpl(WriteBuffer &) const = 0; virtual void writeImpl(WriteBuffer &) const = 0;
virtual size_t sizeImpl() const = 0;
virtual void readImpl(ReadBuffer &) = 0; virtual void readImpl(ReadBuffer &) = 0;
virtual std::string toStringImpl(bool /*short_format*/) const { return ""; } virtual std::string toStringImpl(bool /*short_format*/) const { return ""; }
@ -73,7 +68,6 @@ struct ZooKeeperRequest : virtual Request
static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in); static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in);
virtual ZooKeeperResponsePtr makeResponse() const = 0; virtual ZooKeeperResponsePtr makeResponse() const = 0;
ZooKeeperResponsePtr setTime(ZooKeeperResponsePtr response) const;
virtual bool isReadRequest() const = 0; virtual bool isReadRequest() const = 0;
virtual void createLogElements(LogElements & elems) const; virtual void createLogElements(LogElements & elems) const;
@ -86,6 +80,7 @@ struct ZooKeeperHeartbeatRequest final : ZooKeeperRequest
String getPath() const override { return {}; } String getPath() const override { return {}; }
OpNum getOpNum() const override { return OpNum::Heartbeat; } OpNum getOpNum() const override { return OpNum::Heartbeat; }
void writeImpl(WriteBuffer &) const override {} void writeImpl(WriteBuffer &) const override {}
size_t sizeImpl() const override { return 0; }
void readImpl(ReadBuffer &) override {} void readImpl(ReadBuffer &) override {}
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
bool isReadRequest() const override { return false; } bool isReadRequest() const override { return false; }
@ -97,6 +92,7 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest
String getPath() const override { return path; } String getPath() const override { return path; }
OpNum getOpNum() const override { return OpNum::Sync; } OpNum getOpNum() const override { return OpNum::Sync; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
@ -109,6 +105,7 @@ struct ZooKeeperSyncResponse final : SyncResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::Sync; } OpNum getOpNum() const override { return OpNum::Sync; }
}; };
@ -122,6 +119,7 @@ struct ZooKeeperReconfigRequest final : ZooKeeperRequest
String getPath() const override { return keeper_config_path; } String getPath() const override { return keeper_config_path; }
OpNum getOpNum() const override { return OpNum::Reconfig; } OpNum getOpNum() const override { return OpNum::Reconfig; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
@ -138,6 +136,7 @@ struct ZooKeeperReconfigResponse final : ReconfigResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::Reconfig; } OpNum getOpNum() const override { return OpNum::Reconfig; }
}; };
@ -145,6 +144,7 @@ struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse
{ {
void readImpl(ReadBuffer &) override {} void readImpl(ReadBuffer &) override {}
void writeImpl(WriteBuffer &) const override {} void writeImpl(WriteBuffer &) const override {}
size_t sizeImpl() const override { return 0; }
OpNum getOpNum() const override { return OpNum::Heartbeat; } OpNum getOpNum() const override { return OpNum::Heartbeat; }
}; };
@ -153,6 +153,7 @@ struct ZooKeeperWatchResponse final : WatchResponse, ZooKeeperResponse
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void write(WriteBuffer & out) const override; void write(WriteBuffer & out) const override;
@ -175,6 +176,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest
String getPath() const override { return {}; } String getPath() const override { return {}; }
OpNum getOpNum() const override { return OpNum::Auth; } OpNum getOpNum() const override { return OpNum::Auth; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
@ -189,6 +191,7 @@ struct ZooKeeperAuthResponse final : ZooKeeperResponse
{ {
void readImpl(ReadBuffer &) override {} void readImpl(ReadBuffer &) override {}
void writeImpl(WriteBuffer &) const override {} void writeImpl(WriteBuffer &) const override {}
size_t sizeImpl() const override { return 0; }
OpNum getOpNum() const override { return OpNum::Auth; } OpNum getOpNum() const override { return OpNum::Auth; }
@ -200,6 +203,7 @@ struct ZooKeeperCloseRequest final : ZooKeeperRequest
String getPath() const override { return {}; } String getPath() const override { return {}; }
OpNum getOpNum() const override { return OpNum::Close; } OpNum getOpNum() const override { return OpNum::Close; }
void writeImpl(WriteBuffer &) const override {} void writeImpl(WriteBuffer &) const override {}
size_t sizeImpl() const override { return 0; }
void readImpl(ReadBuffer &) override {} void readImpl(ReadBuffer &) override {}
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
@ -214,6 +218,7 @@ struct ZooKeeperCloseResponse final : ZooKeeperResponse
} }
void writeImpl(WriteBuffer &) const override {} void writeImpl(WriteBuffer &) const override {}
size_t sizeImpl() const override { return 0; }
OpNum getOpNum() const override { return OpNum::Close; } OpNum getOpNum() const override { return OpNum::Close; }
}; };
@ -228,6 +233,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest
OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; } OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
@ -244,6 +250,7 @@ struct ZooKeeperCreateResponse : CreateResponse, ZooKeeperResponse
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::Create; } OpNum getOpNum() const override { return OpNum::Create; }
@ -265,6 +272,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Remove; } OpNum getOpNum() const override { return OpNum::Remove; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
@ -280,6 +288,7 @@ struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer &) override {} void readImpl(ReadBuffer &) override {}
void writeImpl(WriteBuffer &) const override {} void writeImpl(WriteBuffer &) const override {}
size_t sizeImpl() const override { return 0; }
OpNum getOpNum() const override { return OpNum::Remove; } OpNum getOpNum() const override { return OpNum::Remove; }
size_t bytesSize() const override { return RemoveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return RemoveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
@ -293,6 +302,7 @@ struct ZooKeeperRemoveRecursiveRequest final : RemoveRecursiveRequest, ZooKeeper
OpNum getOpNum() const override { return OpNum::RemoveRecursive; } OpNum getOpNum() const override { return OpNum::RemoveRecursive; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
size_t sizeImpl() const override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
@ -305,6 +315,7 @@ struct ZooKeeperRemoveRecursiveResponse : RemoveRecursiveResponse, ZooKeeperResp
{ {
void readImpl(ReadBuffer &) override {} void readImpl(ReadBuffer &) override {}
void writeImpl(WriteBuffer &) const override {} void writeImpl(WriteBuffer &) const override {}
size_t sizeImpl() const override { return 0; }
OpNum getOpNum() const override { return OpNum::RemoveRecursive; } OpNum getOpNum() const override { return OpNum::RemoveRecursive; }
size_t bytesSize() const override { return RemoveRecursiveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return RemoveRecursiveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
@ -317,6 +328,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Exists; } OpNum getOpNum() const override { return OpNum::Exists; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
@ -330,6 +342,7 @@ struct ZooKeeperExistsResponse final : ExistsResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::Exists; } OpNum getOpNum() const override { return OpNum::Exists; }
size_t bytesSize() const override { return ExistsResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return ExistsResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
@ -344,6 +357,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Get; } OpNum getOpNum() const override { return OpNum::Get; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
@ -357,6 +371,7 @@ struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::Get; } OpNum getOpNum() const override { return OpNum::Get; }
size_t bytesSize() const override { return GetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return GetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
@ -371,6 +386,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::Set; } OpNum getOpNum() const override { return OpNum::Set; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
@ -385,6 +401,7 @@ struct ZooKeeperSetResponse final : SetResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::Set; } OpNum getOpNum() const override { return OpNum::Set; }
size_t bytesSize() const override { return SetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return SetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
@ -399,6 +416,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
OpNum getOpNum() const override { return OpNum::List; } OpNum getOpNum() const override { return OpNum::List; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
@ -419,6 +437,7 @@ struct ZooKeeperFilteredListRequest final : ZooKeeperListRequest
OpNum getOpNum() const override { return OpNum::FilteredList; } OpNum getOpNum() const override { return OpNum::FilteredList; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
@ -429,6 +448,7 @@ struct ZooKeeperListResponse : ListResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::List; } OpNum getOpNum() const override { return OpNum::List; }
size_t bytesSize() const override { return ListResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return ListResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
@ -440,6 +460,7 @@ struct ZooKeeperSimpleListResponse final : ZooKeeperListResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::SimpleList; } OpNum getOpNum() const override { return OpNum::SimpleList; }
size_t bytesSize() const override { return ZooKeeperListResponse::bytesSize() - sizeof(stat); } size_t bytesSize() const override { return ZooKeeperListResponse::bytesSize() - sizeof(stat); }
@ -452,6 +473,7 @@ struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest
OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; } OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
@ -467,6 +489,7 @@ struct ZooKeeperCheckResponse : CheckResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer &) override {} void readImpl(ReadBuffer &) override {}
void writeImpl(WriteBuffer &) const override {} void writeImpl(WriteBuffer &) const override {}
size_t sizeImpl() const override { return 0; }
OpNum getOpNum() const override { return OpNum::Check; } OpNum getOpNum() const override { return OpNum::Check; }
size_t bytesSize() const override { return CheckResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return CheckResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
@ -483,6 +506,7 @@ struct ZooKeeperErrorResponse final : ErrorResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::Error; } OpNum getOpNum() const override { return OpNum::Error; }
@ -493,6 +517,7 @@ struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest
{ {
OpNum getOpNum() const override { return OpNum::SetACL; } OpNum getOpNum() const override { return OpNum::SetACL; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
@ -505,6 +530,7 @@ struct ZooKeeperSetACLResponse final : SetACLResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::SetACL; } OpNum getOpNum() const override { return OpNum::SetACL; }
size_t bytesSize() const override { return SetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return SetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
@ -514,6 +540,7 @@ struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest
{ {
OpNum getOpNum() const override { return OpNum::GetACL; } OpNum getOpNum() const override { return OpNum::GetACL; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
ZooKeeperResponsePtr makeResponse() const override; ZooKeeperResponsePtr makeResponse() const override;
@ -526,12 +553,13 @@ struct ZooKeeperGetACLResponse final : GetACLResponse, ZooKeeperResponse
{ {
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
OpNum getOpNum() const override { return OpNum::GetACL; } OpNum getOpNum() const override { return OpNum::GetACL; }
size_t bytesSize() const override { return GetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return GetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
}; };
struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest struct ZooKeeperMultiRequest final : MultiRequest<ZooKeeperRequestPtr>, ZooKeeperRequest
{ {
OpNum getOpNum() const override; OpNum getOpNum() const override;
ZooKeeperMultiRequest() = default; ZooKeeperMultiRequest() = default;
@ -540,6 +568,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
ZooKeeperMultiRequest(std::span<const Coordination::RequestPtr> generic_requests, const ACLs & default_acls); ZooKeeperMultiRequest(std::span<const Coordination::RequestPtr> generic_requests, const ACLs & default_acls);
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
std::string toStringImpl(bool short_format) const override; std::string toStringImpl(bool short_format) const override;
@ -563,12 +592,14 @@ private:
struct ZooKeeperMultiResponse : MultiResponse, ZooKeeperResponse struct ZooKeeperMultiResponse : MultiResponse, ZooKeeperResponse
{ {
explicit ZooKeeperMultiResponse(const Requests & requests) ZooKeeperMultiResponse() = default;
explicit ZooKeeperMultiResponse(const std::vector<ZooKeeperRequestPtr> & requests)
{ {
responses.reserve(requests.size()); responses.reserve(requests.size());
for (const auto & request : requests) for (const auto & request : requests)
responses.emplace_back(dynamic_cast<const ZooKeeperRequest &>(*request).makeResponse()); responses.emplace_back(request->makeResponse());
} }
explicit ZooKeeperMultiResponse(const Responses & responses_) explicit ZooKeeperMultiResponse(const Responses & responses_)
@ -579,6 +610,7 @@ struct ZooKeeperMultiResponse : MultiResponse, ZooKeeperResponse
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
size_t bytesSize() const override { return MultiResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } size_t bytesSize() const override { return MultiResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
@ -609,6 +641,7 @@ struct ZooKeeperSessionIDRequest final : ZooKeeperRequest
Coordination::OpNum getOpNum() const override { return OpNum::SessionID; } Coordination::OpNum getOpNum() const override { return OpNum::SessionID; }
String getPath() const override { return {}; } String getPath() const override { return {}; }
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
Coordination::ZooKeeperResponsePtr makeResponse() const override; Coordination::ZooKeeperResponsePtr makeResponse() const override;
@ -627,6 +660,7 @@ struct ZooKeeperSessionIDResponse final : ZooKeeperResponse
void readImpl(ReadBuffer & in) override; void readImpl(ReadBuffer & in) override;
void writeImpl(WriteBuffer & out) const override; void writeImpl(WriteBuffer & out) const override;
size_t sizeImpl() const override;
Coordination::OpNum getOpNum() const override { return OpNum::SessionID; } Coordination::OpNum getOpNum() const override { return OpNum::SessionID; }
}; };

View File

@ -42,6 +42,32 @@ void write(const Error & x, WriteBuffer & out)
write(static_cast<int32_t>(x), out); write(static_cast<int32_t>(x), out);
} }
size_t size(OpNum x)
{
return size(static_cast<int32_t>(x));
}
size_t size(const std::string & s)
{
return size(static_cast<int32_t>(s.size())) + s.size();
}
size_t size(const ACL & acl)
{
return size(acl.permissions) + size(acl.scheme) + size(acl.id);
}
size_t size(const Stat & stat)
{
return size(stat.czxid) + size(stat.mzxid) + size(stat.ctime) + size(stat.mtime) + size(stat.version) + size(stat.cversion)
+ size(stat.aversion) + size(stat.ephemeralOwner) + size(stat.dataLength) + size(stat.numChildren) + size(stat.pzxid);
}
size_t size(const Error & x)
{
return size(static_cast<int32_t>(x));
}
void read(OpNum & x, ReadBuffer & in) void read(OpNum & x, ReadBuffer & in)
{ {
int32_t raw_op_num; int32_t raw_op_num;

View File

@ -43,6 +43,36 @@ void write(const std::vector<T> & arr, WriteBuffer & out)
write(elem, out); write(elem, out);
} }
template <typename T>
requires is_arithmetic_v<T>
size_t size(T x)
{
return sizeof(x);
}
size_t size(OpNum x);
size_t size(const std::string & s);
size_t size(const ACL & acl);
size_t size(const Stat & stat);
size_t size(const Error & x);
template <size_t N>
size_t size(const std::array<char, N>)
{
return size(static_cast<int32_t>(N)) + N;
}
template <typename T>
size_t size(const std::vector<T> & arr)
{
size_t total_size = size(static_cast<int32_t>(arr.size()));
for (const auto & elem : arr)
total_size += size(elem);
return total_size;
}
template <typename T> template <typename T>
requires is_arithmetic_v<T> requires is_arithmetic_v<T>
void read(T & x, ReadBuffer & in) void read(T & x, ReadBuffer & in)

View File

@ -1,4 +1,4 @@
clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp) clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp orc_string_dictionary.cpp)
target_link_libraries (integer_hash_tables_and_hashes PRIVATE target_link_libraries (integer_hash_tables_and_hashes PRIVATE
ch_contrib::gbenchmark_all ch_contrib::gbenchmark_all
dbms dbms
@ -7,3 +7,8 @@ target_link_libraries (integer_hash_tables_and_hashes PRIVATE
ch_contrib::wyhash ch_contrib::wyhash
ch_contrib::farmhash ch_contrib::farmhash
ch_contrib::xxHash) ch_contrib::xxHash)
clickhouse_add_executable(orc_string_dictionary orc_string_dictionary.cpp)
target_link_libraries (orc_string_dictionary PRIVATE
ch_contrib::gbenchmark_all
dbms)

View File

@ -0,0 +1,311 @@
#include <cstdlib>
#include <base/defines.h>
#include <benchmark/benchmark.h>
class OldSortedStringDictionary
{
public:
struct DictEntry
{
DictEntry(const char * str, size_t len) : data(str), length(len) { }
const char * data;
size_t length;
};
OldSortedStringDictionary() : totalLength(0) { }
// insert a new string into dictionary, return its insertion order
size_t insert(const char * str, size_t len);
// reorder input index buffer from insertion order to dictionary order
void reorder(std::vector<int64_t> & idxBuffer) const;
// get dict entries in insertion order
void getEntriesInInsertionOrder(std::vector<const DictEntry *> &) const;
size_t size() const;
// return total length of strings in the dictionary
uint64_t length() const;
void clear();
// store indexes of insertion order in the dictionary for not-null rows
std::vector<int64_t> idxInDictBuffer;
private:
struct LessThan
{
bool operator()(const DictEntry & left, const DictEntry & right) const
{
int ret = memcmp(left.data, right.data, std::min(left.length, right.length));
if (ret != 0)
{
return ret < 0;
}
return left.length < right.length;
}
};
std::map<DictEntry, size_t, LessThan> dict;
std::vector<std::vector<char>> data;
uint64_t totalLength;
};
// insert a new string into dictionary, return its insertion order
size_t OldSortedStringDictionary::insert(const char * str, size_t len)
{
auto ret = dict.insert({DictEntry(str, len), dict.size()});
if (ret.second)
{
// make a copy to internal storage
data.push_back(std::vector<char>(len));
memcpy(data.back().data(), str, len);
// update dictionary entry to link pointer to internal storage
DictEntry * entry = const_cast<DictEntry *>(&(ret.first->first));
entry->data = data.back().data();
totalLength += len;
}
return ret.first->second;
}
/**
* Reorder input index buffer from insertion order to dictionary order
*
* We require this function because string values are buffered by indexes
* in their insertion order. Until the entire dictionary is complete can
* we get their sorted indexes in the dictionary in that ORC specification
* demands dictionary should be ordered. Therefore this function transforms
* the indexes from insertion order to dictionary value order for final
* output.
*/
void OldSortedStringDictionary::reorder(std::vector<int64_t> & idxBuffer) const
{
// iterate the dictionary to get mapping from insertion order to value order
std::vector<size_t> mapping(dict.size());
size_t dictIdx = 0;
for (auto it = dict.cbegin(); it != dict.cend(); ++it)
{
mapping[it->second] = dictIdx++;
}
// do the transformation
for (size_t i = 0; i != idxBuffer.size(); ++i)
{
idxBuffer[i] = static_cast<int64_t>(mapping[static_cast<size_t>(idxBuffer[i])]);
}
}
// get dict entries in insertion order
void OldSortedStringDictionary::getEntriesInInsertionOrder(std::vector<const DictEntry *> & entries) const
{
entries.resize(dict.size());
for (auto it = dict.cbegin(); it != dict.cend(); ++it)
{
entries[it->second] = &(it->first);
}
}
// return count of entries
size_t OldSortedStringDictionary::size() const
{
return dict.size();
}
// return total length of strings in the dictionary
uint64_t OldSortedStringDictionary::length() const
{
return totalLength;
}
void OldSortedStringDictionary::clear()
{
totalLength = 0;
data.clear();
dict.clear();
}
/**
* Implementation of increasing sorted string dictionary
*/
class NewSortedStringDictionary
{
public:
struct DictEntry
{
DictEntry(const char * str, size_t len) : data(str), length(len) { }
const char * data;
size_t length;
};
struct DictEntryWithIndex
{
DictEntryWithIndex(const char * str, size_t len, size_t index_) : entry(str, len), index(index_) { }
DictEntry entry;
size_t index;
};
NewSortedStringDictionary() : totalLength_(0) { }
// insert a new string into dictionary, return its insertion order
size_t insert(const char * str, size_t len);
// reorder input index buffer from insertion order to dictionary order
void reorder(std::vector<int64_t> & idxBuffer) const;
// get dict entries in insertion order
void getEntriesInInsertionOrder(std::vector<const DictEntry *> &) const;
// return count of entries
size_t size() const;
// return total length of strings in the dictionary
uint64_t length() const;
void clear();
// store indexes of insertion order in the dictionary for not-null rows
std::vector<int64_t> idxInDictBuffer;
private:
struct LessThan
{
bool operator()(const DictEntryWithIndex & l, const DictEntryWithIndex & r)
{
const auto & left = l.entry;
const auto & right = r.entry;
int ret = memcmp(left.data, right.data, std::min(left.length, right.length));
if (ret != 0)
{
return ret < 0;
}
return left.length < right.length;
}
};
mutable std::vector<DictEntryWithIndex> flatDict_;
std::unordered_map<std::string, size_t> keyToIndex;
uint64_t totalLength_;
};
// insert a new string into dictionary, return its insertion order
size_t NewSortedStringDictionary::insert(const char * str, size_t len)
{
size_t index = flatDict_.size();
auto ret = keyToIndex.emplace(std::string(str, len), index);
if (ret.second)
{
flatDict_.emplace_back(ret.first->first.data(), ret.first->first.size(), index);
totalLength_ += len;
}
return ret.first->second;
}
/**
* Reorder input index buffer from insertion order to dictionary order
*
* We require this function because string values are buffered by indexes
* in their insertion order. Until the entire dictionary is complete can
* we get their sorted indexes in the dictionary in that ORC specification
* demands dictionary should be ordered. Therefore this function transforms
* the indexes from insertion order to dictionary value order for final
* output.
*/
void NewSortedStringDictionary::reorder(std::vector<int64_t> & idxBuffer) const
{
// iterate the dictionary to get mapping from insertion order to value order
std::vector<size_t> mapping(flatDict_.size());
for (size_t i = 0; i < flatDict_.size(); ++i)
{
mapping[flatDict_[i].index] = i;
}
// do the transformation
for (size_t i = 0; i != idxBuffer.size(); ++i)
{
idxBuffer[i] = static_cast<int64_t>(mapping[static_cast<size_t>(idxBuffer[i])]);
}
}
// get dict entries in insertion order
void NewSortedStringDictionary::getEntriesInInsertionOrder(std::vector<const DictEntry *> & entries) const
{
std::sort(
flatDict_.begin(),
flatDict_.end(),
[](const DictEntryWithIndex & left, const DictEntryWithIndex & right) { return left.index < right.index; });
entries.resize(flatDict_.size());
for (size_t i = 0; i < flatDict_.size(); ++i)
{
entries[i] = &(flatDict_[i].entry);
}
}
// return count of entries
size_t NewSortedStringDictionary::size() const
{
return flatDict_.size();
}
// return total length of strings in the dictionary
uint64_t NewSortedStringDictionary::length() const
{
return totalLength_;
}
void NewSortedStringDictionary::clear()
{
totalLength_ = 0;
keyToIndex.clear();
flatDict_.clear();
}
template <size_t cardinality>
static std::vector<std::string> mockStrings()
{
std::vector<std::string> res(1000000);
for (auto & s : res)
{
s = "test string dictionary " + std::to_string(rand() % cardinality);
}
return res;
}
template <typename DictionaryImpl>
static NO_INLINE std::unique_ptr<DictionaryImpl> createAndWriteStringDictionary(const std::vector<std::string> & strs)
{
auto dict = std::make_unique<DictionaryImpl>();
for (const auto & str : strs)
{
auto index = dict->insert(str.data(), str.size());
dict->idxInDictBuffer.push_back(index);
}
dict->reorder(dict->idxInDictBuffer);
return dict;
}
template <typename DictionaryImpl, size_t cardinality>
static void BM_writeStringDictionary(benchmark::State & state)
{
auto strs = mockStrings<cardinality>();
for (auto _ : state)
{
auto dict = createAndWriteStringDictionary<DictionaryImpl>(strs);
benchmark::DoNotOptimize(dict);
}
}
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 10);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 10);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 100);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 100);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 1000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 1000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 10000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 10000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 100000);
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 100000);

View File

@ -45,6 +45,7 @@ uint64_t ACLMap::convertACLs(const Coordination::ACLs & acls)
if (acls.empty()) if (acls.empty())
return 0; return 0;
std::lock_guard lock(map_mutex);
if (acl_to_num.contains(acls)) if (acl_to_num.contains(acls))
return acl_to_num[acls]; return acl_to_num[acls];
@ -62,6 +63,7 @@ Coordination::ACLs ACLMap::convertNumber(uint64_t acls_id) const
if (acls_id == 0) if (acls_id == 0)
return Coordination::ACLs{}; return Coordination::ACLs{};
std::lock_guard lock(map_mutex);
if (!num_to_acl.contains(acls_id)) if (!num_to_acl.contains(acls_id))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown ACL id {}. It's a bug", acls_id); throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown ACL id {}. It's a bug", acls_id);
@ -70,6 +72,7 @@ Coordination::ACLs ACLMap::convertNumber(uint64_t acls_id) const
void ACLMap::addMapping(uint64_t acls_id, const Coordination::ACLs & acls) void ACLMap::addMapping(uint64_t acls_id, const Coordination::ACLs & acls)
{ {
std::lock_guard lock(map_mutex);
num_to_acl[acls_id] = acls; num_to_acl[acls_id] = acls;
acl_to_num[acls] = acls_id; acl_to_num[acls] = acls_id;
max_acl_id = std::max(acls_id + 1, max_acl_id); /// max_acl_id pointer next slot max_acl_id = std::max(acls_id + 1, max_acl_id); /// max_acl_id pointer next slot
@ -77,11 +80,13 @@ void ACLMap::addMapping(uint64_t acls_id, const Coordination::ACLs & acls)
void ACLMap::addUsage(uint64_t acl_id) void ACLMap::addUsage(uint64_t acl_id)
{ {
std::lock_guard lock(map_mutex);
usage_counter[acl_id]++; usage_counter[acl_id]++;
} }
void ACLMap::removeUsage(uint64_t acl_id) void ACLMap::removeUsage(uint64_t acl_id)
{ {
std::lock_guard lock(map_mutex);
if (!usage_counter.contains(acl_id)) if (!usage_counter.contains(acl_id))
return; return;

View File

@ -32,6 +32,8 @@ private:
NumToACLMap num_to_acl; NumToACLMap num_to_acl;
UsageCounter usage_counter; UsageCounter usage_counter;
uint64_t max_acl_id{1}; uint64_t max_acl_id{1};
mutable std::mutex map_mutex;
public: public:
/// Convert ACL to number. If it's new ACL than adds it to map /// Convert ACL to number. If it's new ACL than adds it to map

View File

@ -301,11 +301,13 @@ String MonitorCommand::run()
print(ret, "server_state", keeper_info.getRole()); print(ret, "server_state", keeper_info.getRole());
print(ret, "znode_count", state_machine.getNodesCount()); const auto & storage_stats = state_machine.getStorageStats();
print(ret, "watch_count", state_machine.getTotalWatchesCount());
print(ret, "ephemerals_count", state_machine.getTotalEphemeralNodesCount()); print(ret, "znode_count", storage_stats.nodes_count.load(std::memory_order_relaxed));
print(ret, "approximate_data_size", state_machine.getApproximateDataSize()); print(ret, "watch_count", storage_stats.total_watches_count.load(std::memory_order_relaxed));
print(ret, "key_arena_size", state_machine.getKeyArenaSize()); print(ret, "ephemerals_count", storage_stats.total_emphemeral_nodes_count.load(std::memory_order_relaxed));
print(ret, "approximate_data_size", storage_stats.approximate_data_size.load(std::memory_order_relaxed));
print(ret, "key_arena_size", 0);
print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotSize()); print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotSize());
#if defined(OS_LINUX) || defined(OS_DARWIN) #if defined(OS_LINUX) || defined(OS_DARWIN)
@ -387,6 +389,7 @@ String ServerStatCommand::run()
auto & stats = keeper_dispatcher.getKeeperConnectionStats(); auto & stats = keeper_dispatcher.getKeeperConnectionStats();
Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo(); Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo();
const auto & storage_stats = keeper_dispatcher.getStateMachine().getStorageStats();
write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH); write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH);
@ -398,9 +401,9 @@ String ServerStatCommand::run()
write("Sent", toString(stats.getPacketsSent())); write("Sent", toString(stats.getPacketsSent()));
write("Connections", toString(keeper_info.alive_connections_count)); write("Connections", toString(keeper_info.alive_connections_count));
write("Outstanding", toString(keeper_info.outstanding_requests_count)); write("Outstanding", toString(keeper_info.outstanding_requests_count));
write("Zxid", formatZxid(keeper_info.last_zxid)); write("Zxid", formatZxid(storage_stats.last_zxid.load(std::memory_order_relaxed)));
write("Mode", keeper_info.getRole()); write("Mode", keeper_info.getRole());
write("Node count", toString(keeper_info.total_nodes_count)); write("Node count", toString(storage_stats.nodes_count.load(std::memory_order_relaxed)));
return buf.str(); return buf.str();
} }
@ -416,6 +419,7 @@ String StatCommand::run()
auto & stats = keeper_dispatcher.getKeeperConnectionStats(); auto & stats = keeper_dispatcher.getKeeperConnectionStats();
Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo(); Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo();
const auto & storage_stats = keeper_dispatcher.getStateMachine().getStorageStats();
write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH); write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH);
@ -431,9 +435,9 @@ String StatCommand::run()
write("Sent", toString(stats.getPacketsSent())); write("Sent", toString(stats.getPacketsSent()));
write("Connections", toString(keeper_info.alive_connections_count)); write("Connections", toString(keeper_info.alive_connections_count));
write("Outstanding", toString(keeper_info.outstanding_requests_count)); write("Outstanding", toString(keeper_info.outstanding_requests_count));
write("Zxid", formatZxid(keeper_info.last_zxid)); write("Zxid", formatZxid(storage_stats.last_zxid.load(std::memory_order_relaxed)));
write("Mode", keeper_info.getRole()); write("Mode", keeper_info.getRole());
write("Node count", toString(keeper_info.total_nodes_count)); write("Node count", toString(storage_stats.nodes_count.load(std::memory_order_relaxed)));
return buf.str(); return buf.str();
} }

View File

@ -1,7 +1,5 @@
#pragma once #pragma once
#include <string>
#include <base/types.h> #include <base/types.h>
#include <Common/Exception.h> #include <Common/Exception.h>
@ -30,9 +28,6 @@ struct Keeper4LWInfo
uint64_t follower_count; uint64_t follower_count;
uint64_t synced_follower_count; uint64_t synced_follower_count;
uint64_t total_nodes_count;
int64_t last_zxid;
String getRole() const String getRole() const
{ {
if (is_standalone) if (is_standalone)

View File

@ -38,15 +38,16 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM
is_follower = static_cast<size_t>(keeper_info.is_follower); is_follower = static_cast<size_t>(keeper_info.is_follower);
is_exceeding_mem_soft_limit = static_cast<size_t>(keeper_info.is_exceeding_mem_soft_limit); is_exceeding_mem_soft_limit = static_cast<size_t>(keeper_info.is_exceeding_mem_soft_limit);
zxid = keeper_info.last_zxid;
const auto & state_machine = keeper_dispatcher.getStateMachine(); const auto & state_machine = keeper_dispatcher.getStateMachine();
znode_count = state_machine.getNodesCount(); const auto & storage_stats = state_machine.getStorageStats();
watch_count = state_machine.getTotalWatchesCount(); zxid = storage_stats.last_zxid.load(std::memory_order_relaxed);
ephemerals_count = state_machine.getTotalEphemeralNodesCount(); znode_count = storage_stats.nodes_count.load(std::memory_order_relaxed);
approximate_data_size = state_machine.getApproximateDataSize(); watch_count = storage_stats.total_watches_count.load(std::memory_order_relaxed);
key_arena_size = state_machine.getKeyArenaSize(); ephemerals_count = storage_stats.total_emphemeral_nodes_count.load(std::memory_order_relaxed);
session_with_watches = state_machine.getSessionsWithWatchesCount(); approximate_data_size = storage_stats.approximate_data_size.load(std::memory_order_relaxed);
paths_watched = state_machine.getWatchedPathsCount(); key_arena_size = 0;
session_with_watches = storage_stats.sessions_with_watches_count.load(std::memory_order_relaxed);
paths_watched = storage_stats.watched_paths_count.load(std::memory_order_relaxed);
# if defined(__linux__) || defined(__APPLE__) # if defined(__linux__) || defined(__APPLE__)
open_file_descriptor_count = getCurrentProcessFDCount(); open_file_descriptor_count = getCurrentProcessFDCount();

View File

@ -305,7 +305,7 @@ void KeeperDispatcher::requestThread()
if (has_read_request) if (has_read_request)
{ {
if (server->isLeaderAlive()) if (server->isLeaderAlive())
server->putLocalReadRequest(request); server->putLocalReadRequest({request});
else else
addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS); addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS);
} }

View File

@ -28,6 +28,16 @@
#include <Common/getMultipleKeysFromConfig.h> #include <Common/getMultipleKeysFromConfig.h>
#include <Common/getNumberOfPhysicalCPUCores.h> #include <Common/getNumberOfPhysicalCPUCores.h>
#if USE_SSL
# include <Server/CertificateReloader.h>
# include <openssl/ssl.h>
# include <Poco/Crypto/EVPPKey.h>
# include <Poco/Net/Context.h>
# include <Poco/Net/SSLManager.h>
# include <Poco/Net/Utility.h>
# include <Poco/StringTokenizer.h>
#endif
#include <chrono> #include <chrono>
#include <mutex> #include <mutex>
#include <string> #include <string>
@ -48,6 +58,7 @@ namespace ErrorCodes
extern const int SUPPORT_IS_DISABLED; extern const int SUPPORT_IS_DISABLED;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int INVALID_CONFIG_PARAMETER; extern const int INVALID_CONFIG_PARAMETER;
extern const int BAD_ARGUMENTS;
} }
using namespace std::chrono_literals; using namespace std::chrono_literals;
@ -56,6 +67,16 @@ namespace
{ {
#if USE_SSL #if USE_SSL
int callSetCertificate(SSL * ssl, void * arg)
{
if (!arg)
return -1;
const CertificateReloader::Data * data = reinterpret_cast<CertificateReloader::Data *>(arg);
return setCertificateCallback(ssl, data, getLogger("SSLContext"));
}
void setSSLParams(nuraft::asio_service::options & asio_opts) void setSSLParams(nuraft::asio_service::options & asio_opts)
{ {
const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config(); const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config();
@ -69,18 +90,55 @@ void setSSLParams(nuraft::asio_service::options & asio_opts)
if (!config.has(private_key_file_property)) if (!config.has(private_key_file_property))
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Server private key file is not set."); throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Server private key file is not set.");
asio_opts.enable_ssl_ = true; Poco::Net::Context::Params params;
asio_opts.server_cert_file_ = config.getString(certificate_file_property); params.certificateFile = config.getString(certificate_file_property);
asio_opts.server_key_file_ = config.getString(private_key_file_property); if (params.certificateFile.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Server certificate file in config '{}' is empty", certificate_file_property);
params.privateKeyFile = config.getString(private_key_file_property);
if (params.privateKeyFile.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Server key file in config '{}' is empty", private_key_file_property);
auto pass_phrase = config.getString("openSSL.server.privateKeyPassphraseHandler.options.password", "");
auto certificate_data = std::make_shared<CertificateReloader::Data>(params.certificateFile, params.privateKeyFile, pass_phrase);
if (config.has(root_ca_file_property)) if (config.has(root_ca_file_property))
asio_opts.root_cert_file_ = config.getString(root_ca_file_property); params.caLocation = config.getString(root_ca_file_property);
if (config.getBool("openSSL.server.loadDefaultCAFile", false)) params.loadDefaultCAs = config.getBool("openSSL.server.loadDefaultCAFile", false);
asio_opts.load_default_ca_file_ = true; params.verificationMode = Poco::Net::Utility::convertVerificationMode(config.getString("openSSL.server.verificationMode", "none"));
if (config.getString("openSSL.server.verificationMode", "none") == "none") std::string disabled_protocols_list = config.getString("openSSL.server.disableProtocols", "");
asio_opts.skip_verification_ = true; Poco::StringTokenizer dp_tok(disabled_protocols_list, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
int disabled_protocols = 0;
for (const auto & token : dp_tok)
{
if (token == "sslv2")
disabled_protocols |= Poco::Net::Context::PROTO_SSLV2;
else if (token == "sslv3")
disabled_protocols |= Poco::Net::Context::PROTO_SSLV3;
else if (token == "tlsv1")
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1;
else if (token == "tlsv1_1")
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1_1;
else if (token == "tlsv1_2")
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1_2;
}
asio_opts.ssl_context_provider_server_ = [params, certificate_data, disabled_protocols]
{
Poco::Net::Context context(Poco::Net::Context::Usage::TLSV1_2_SERVER_USE, params);
context.disableProtocols(disabled_protocols);
SSL_CTX * ssl_ctx = context.takeSslContext();
SSL_CTX_set_cert_cb(ssl_ctx, callSetCertificate, reinterpret_cast<void *>(certificate_data.get()));
return ssl_ctx;
};
asio_opts.ssl_context_provider_client_ = [ctx_params = std::move(params)]
{
Poco::Net::Context context(Poco::Net::Context::Usage::TLSV1_2_CLIENT_USE, ctx_params);
return context.takeSslContext();
};
} }
#endif #endif
@ -1149,8 +1207,6 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const
result.synced_follower_count = getSyncedFollowerCount(); result.synced_follower_count = getSyncedFollowerCount();
} }
result.is_exceeding_mem_soft_limit = isExceedingMemorySoftLimit(); result.is_exceeding_mem_soft_limit = isExceedingMemorySoftLimit();
result.total_nodes_count = getKeeperStateMachine()->getNodesCount();
result.last_zxid = getKeeperStateMachine()->getLastProcessedZxid();
return result; return result;
} }

View File

@ -78,20 +78,20 @@ namespace
writeBinary(false, out); writeBinary(false, out);
/// Serialize stat /// Serialize stat
writeBinary(node.czxid, out); writeBinary(node.stats.czxid, out);
writeBinary(node.mzxid, out); writeBinary(node.stats.mzxid, out);
writeBinary(node.ctime(), out); writeBinary(node.stats.ctime(), out);
writeBinary(node.mtime, out); writeBinary(node.stats.mtime, out);
writeBinary(node.version, out); writeBinary(node.stats.version, out);
writeBinary(node.cversion, out); writeBinary(node.stats.cversion, out);
writeBinary(node.aversion, out); writeBinary(node.stats.aversion, out);
writeBinary(node.ephemeralOwner(), out); writeBinary(node.stats.ephemeralOwner(), out);
if (version < SnapshotVersion::V6) if (version < SnapshotVersion::V6)
writeBinary(static_cast<int32_t>(node.getData().size()), out); writeBinary(static_cast<int32_t>(node.stats.data_size), out);
writeBinary(node.numChildren(), out); writeBinary(node.stats.numChildren(), out);
writeBinary(node.pzxid, out); writeBinary(node.stats.pzxid, out);
writeBinary(node.seqNum(), out); writeBinary(node.stats.seqNum(), out);
if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5) if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
writeBinary(node.sizeInBytes(), out); writeBinary(node.sizeInBytes(), out);
@ -100,11 +100,11 @@ namespace
template<typename Node> template<typename Node>
void readNode(Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map) void readNode(Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map)
{ {
readVarUInt(node.data_size, in); readVarUInt(node.stats.data_size, in);
if (node.data_size != 0) if (node.stats.data_size != 0)
{ {
node.data = std::unique_ptr<char[]>(new char[node.data_size]); node.data = std::unique_ptr<char[]>(new char[node.stats.data_size]);
in.readStrict(node.data.get(), node.data_size); in.readStrict(node.data.get(), node.stats.data_size);
} }
if (version >= SnapshotVersion::V1) if (version >= SnapshotVersion::V1)
@ -141,19 +141,19 @@ namespace
} }
/// Deserialize stat /// Deserialize stat
readBinary(node.czxid, in); readBinary(node.stats.czxid, in);
readBinary(node.mzxid, in); readBinary(node.stats.mzxid, in);
int64_t ctime; int64_t ctime;
readBinary(ctime, in); readBinary(ctime, in);
node.setCtime(ctime); node.stats.setCtime(ctime);
readBinary(node.mtime, in); readBinary(node.stats.mtime, in);
readBinary(node.version, in); readBinary(node.stats.version, in);
readBinary(node.cversion, in); readBinary(node.stats.cversion, in);
readBinary(node.aversion, in); readBinary(node.stats.aversion, in);
int64_t ephemeral_owner = 0; int64_t ephemeral_owner = 0;
readBinary(ephemeral_owner, in); readBinary(ephemeral_owner, in);
if (ephemeral_owner != 0) if (ephemeral_owner != 0)
node.setEphemeralOwner(ephemeral_owner); node.stats.setEphemeralOwner(ephemeral_owner);
if (version < SnapshotVersion::V6) if (version < SnapshotVersion::V6)
{ {
@ -163,14 +163,14 @@ namespace
int32_t num_children = 0; int32_t num_children = 0;
readBinary(num_children, in); readBinary(num_children, in);
if (ephemeral_owner == 0) if (ephemeral_owner == 0)
node.setNumChildren(num_children); node.stats.setNumChildren(num_children);
readBinary(node.pzxid, in); readBinary(node.stats.pzxid, in);
int32_t seq_num = 0; int32_t seq_num = 0;
readBinary(seq_num, in); readBinary(seq_num, in);
if (ephemeral_owner == 0) if (ephemeral_owner == 0)
node.setSeqNum(seq_num); node.stats.setSeqNum(seq_num);
if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5) if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
{ {
@ -256,7 +256,7 @@ void KeeperStorageSnapshot<Storage>::serialize(const KeeperStorageSnapshot<Stora
/// Benign race condition possible while taking snapshot: NuRaft decide to create snapshot at some log id /// Benign race condition possible while taking snapshot: NuRaft decide to create snapshot at some log id
/// and only after some time we lock storage and enable snapshot mode. So snapshot_container_size can be /// and only after some time we lock storage and enable snapshot mode. So snapshot_container_size can be
/// slightly bigger than required. /// slightly bigger than required.
if (node.mzxid > snapshot.zxid) if (node.stats.mzxid > snapshot.zxid)
break; break;
writeBinary(path, out); writeBinary(path, out);
writeNode(node, snapshot.version, out); writeNode(node, snapshot.version, out);
@ -306,7 +306,7 @@ void KeeperStorageSnapshot<Storage>::serialize(const KeeperStorageSnapshot<Stora
} }
template<typename Storage> template<typename Storage>
void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<Storage> & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context) void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<Storage> & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context) TSA_NO_THREAD_SAFETY_ANALYSIS
{ {
uint8_t version; uint8_t version;
readBinary(version, in); readBinary(version, in);
@ -435,13 +435,13 @@ void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<S
} }
} }
auto ephemeral_owner = node.ephemeralOwner(); auto ephemeral_owner = node.stats.ephemeralOwner();
if constexpr (!use_rocksdb) if constexpr (!use_rocksdb)
if (!node.isEphemeral() && node.numChildren() > 0) if (!node.stats.isEphemeral() && node.stats.numChildren() > 0)
node.getChildren().reserve(node.numChildren()); node.getChildren().reserve(node.stats.numChildren());
if (ephemeral_owner != 0) if (ephemeral_owner != 0)
storage.ephemerals[node.ephemeralOwner()].insert(std::string{path}); storage.committed_ephemerals[node.stats.ephemeralOwner()].insert(std::string{path});
if (recalculate_digest) if (recalculate_digest)
storage.nodes_digest += node.getDigest(path); storage.nodes_digest += node.getDigest(path);
@ -467,16 +467,25 @@ void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<S
{ {
if (itr.key != "/") if (itr.key != "/")
{ {
if (itr.value.numChildren() != static_cast<int32_t>(itr.value.getChildren().size())) if (itr.value.stats.numChildren() != static_cast<int32_t>(itr.value.getChildren().size()))
{ {
#ifdef NDEBUG #ifdef NDEBUG
/// TODO (alesapin) remove this, it should be always CORRUPTED_DATA. /// TODO (alesapin) remove this, it should be always CORRUPTED_DATA.
LOG_ERROR(getLogger("KeeperSnapshotManager"), "Children counter in stat.numChildren {}" LOG_ERROR(
" is different from actual children size {} for node {}", itr.value.numChildren(), itr.value.getChildren().size(), itr.key); getLogger("KeeperSnapshotManager"),
"Children counter in stat.numChildren {}"
" is different from actual children size {} for node {}",
itr.value.stats.numChildren(),
itr.value.getChildren().size(),
itr.key);
#else #else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}" throw Exception(
" is different from actual children size {} for node {}", ErrorCodes::LOGICAL_ERROR,
itr.value.numChildren(), itr.value.getChildren().size(), itr.key); "Children counter in stat.numChildren {}"
" is different from actual children size {} for node {}",
itr.value.stats.numChildren(),
itr.value.getChildren().size(),
itr.key);
#endif #endif
} }
} }
@ -511,7 +520,7 @@ void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<S
session_auth_counter++; session_auth_counter++;
} }
if (!ids.empty()) if (!ids.empty())
storage.session_and_auth[active_session_id] = ids; storage.committed_session_and_auth[active_session_id] = ids;
} }
current_session_size++; current_session_size++;
} }
@ -527,6 +536,8 @@ void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<S
buffer->pos(0); buffer->pos(0);
deserialization_result.cluster_config = ClusterConfig::deserialize(*buffer); deserialization_result.cluster_config = ClusterConfig::deserialize(*buffer);
} }
storage.updateStats();
} }
template<typename Storage> template<typename Storage>
@ -544,7 +555,7 @@ KeeperStorageSnapshot<Storage>::KeeperStorageSnapshot(Storage * storage_, uint64
begin = storage->getSnapshotIteratorBegin(); begin = storage->getSnapshotIteratorBegin();
session_and_timeout = storage->getActiveSessions(); session_and_timeout = storage->getActiveSessions();
acl_map = storage->acl_map.getMapping(); acl_map = storage->acl_map.getMapping();
session_and_auth = storage->session_and_auth; session_and_auth = storage->committed_session_and_auth;
} }
template<typename Storage> template<typename Storage>
@ -563,7 +574,7 @@ KeeperStorageSnapshot<Storage>::KeeperStorageSnapshot(
begin = storage->getSnapshotIteratorBegin(); begin = storage->getSnapshotIteratorBegin();
session_and_timeout = storage->getActiveSessions(); session_and_timeout = storage->getActiveSessions();
acl_map = storage->acl_map.getMapping(); acl_map = storage->acl_map.getMapping();
session_and_auth = storage->session_and_auth; session_and_auth = storage->committed_session_and_auth;
} }
template<typename Storage> template<typename Storage>

View File

@ -36,6 +36,11 @@ namespace ProfileEvents
extern const Event KeeperStorageLockWaitMicroseconds; extern const Event KeeperStorageLockWaitMicroseconds;
} }
namespace CurrentMetrics
{
extern const Metric KeeperAliveConnections;
}
namespace DB namespace DB
{ {
@ -56,6 +61,7 @@ IKeeperStateMachine::IKeeperStateMachine(
, snapshots_queue(snapshots_queue_) , snapshots_queue(snapshots_queue_)
, min_request_size_to_cache(keeper_context_->getCoordinationSettings()->min_request_size_for_cache) , min_request_size_to_cache(keeper_context_->getCoordinationSettings()->min_request_size_for_cache)
, log(getLogger("KeeperStateMachine")) , log(getLogger("KeeperStateMachine"))
, read_pool(CurrentMetrics::KeeperAliveConnections, CurrentMetrics::KeeperAliveConnections, CurrentMetrics::KeeperAliveConnections, 100, 10000, 10000)
, superdigest(superdigest_) , superdigest(superdigest_)
, keeper_context(keeper_context_) , keeper_context(keeper_context_)
, snapshot_manager_s3(snapshot_manager_s3_) , snapshot_manager_s3(snapshot_manager_s3_)
@ -175,18 +181,20 @@ void assertDigest(
} }
} }
struct TSA_SCOPED_LOCKABLE LockGuardWithStats final template <bool shared = false>
struct LockGuardWithStats final
{ {
std::unique_lock<std::mutex> lock; using LockType = std::conditional_t<shared, std::shared_lock<SharedMutex>, std::unique_lock<SharedMutex>>;
explicit LockGuardWithStats(std::mutex & mutex) TSA_ACQUIRE(mutex) LockType lock;
explicit LockGuardWithStats(SharedMutex & mutex)
{ {
Stopwatch watch; Stopwatch watch;
std::unique_lock l(mutex); LockType l(mutex);
ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds());
lock = std::move(l); lock = std::move(l);
} }
~LockGuardWithStats() TSA_RELEASE() = default; ~LockGuardWithStats() = default;
}; };
} }
@ -312,13 +320,12 @@ bool KeeperStateMachine<Storage>::preprocess(const KeeperStorageBase::RequestFor
if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig) if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig)
return true; return true;
LockGuardWithStats lock(storage_and_responses_lock);
if (storage->isFinalized()) if (storage->isFinalized())
return false; return false;
try try
{ {
LockGuardWithStats<true> lock(storage_mutex);
storage->preprocessRequest( storage->preprocessRequest(
request_for_session.request, request_for_session.request,
request_for_session.session_id, request_for_session.session_id,
@ -335,7 +342,12 @@ bool KeeperStateMachine<Storage>::preprocess(const KeeperStorageBase::RequestFor
} }
if (keeper_context->digestEnabled() && request_for_session.digest) if (keeper_context->digestEnabled() && request_for_session.digest)
assertDigest(*request_for_session.digest, storage->getNodesDigest(false), *request_for_session.request, request_for_session.log_idx, false); assertDigest(
*request_for_session.digest,
storage->getNodesDigest(false, /*lock_transaction_mutex=*/true),
*request_for_session.request,
request_for_session.log_idx,
false);
return true; return true;
} }
@ -343,7 +355,7 @@ bool KeeperStateMachine<Storage>::preprocess(const KeeperStorageBase::RequestFor
template<typename Storage> template<typename Storage>
void KeeperStateMachine<Storage>::reconfigure(const KeeperStorageBase::RequestForSession& request_for_session) void KeeperStateMachine<Storage>::reconfigure(const KeeperStorageBase::RequestForSession& request_for_session)
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
KeeperStorageBase::ResponseForSession response = processReconfiguration(request_for_session); KeeperStorageBase::ResponseForSession response = processReconfiguration(request_for_session);
if (!responses_queue.push(response)) if (!responses_queue.push(response))
{ {
@ -461,7 +473,7 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine<Storage>::commit(const uint64_t l
response_for_session.response = response; response_for_session.response = response;
response_for_session.request = request_for_session->request; response_for_session.request = request_for_session->request;
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
session_id = storage->getSessionID(session_id_request.session_timeout_ms); session_id = storage->getSessionID(session_id_request.session_timeout_ms);
LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms);
response->session_id = session_id; response->session_id = session_id;
@ -472,24 +484,31 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine<Storage>::commit(const uint64_t l
if (op_num == Coordination::OpNum::Close) if (op_num == Coordination::OpNum::Close)
{ {
std::lock_guard lock(request_cache_mutex); std::lock_guard cache_lock(request_cache_mutex);
parsed_request_cache.erase(request_for_session->session_id); parsed_request_cache.erase(request_for_session->session_id);
} }
LockGuardWithStats lock(storage_and_responses_lock);
KeeperStorageBase::ResponsesForSessions responses_for_sessions
= storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid);
for (auto & response_for_session : responses_for_sessions)
{ {
if (response_for_session.response->xid != Coordination::WATCH_XID) LockGuardWithStats<true> lock(storage_mutex);
response_for_session.request = request_for_session->request; std::lock_guard response_lock(process_and_responses_lock);
KeeperStorageBase::ResponsesForSessions responses_for_sessions
= storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid);
for (auto & response_for_session : responses_for_sessions)
{
if (response_for_session.response->xid != Coordination::WATCH_XID)
response_for_session.request = request_for_session->request;
try_push(response_for_session); try_push(response_for_session);
}
} }
if (keeper_context->digestEnabled() && request_for_session->digest) if (keeper_context->digestEnabled() && request_for_session->digest)
assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, request_for_session->log_idx, true); assertDigest(
*request_for_session->digest,
storage->getNodesDigest(true, /*lock_transaction_mutex=*/true),
*request_for_session->request,
request_for_session->log_idx,
true);
} }
ProfileEvents::increment(ProfileEvents::KeeperCommits); ProfileEvents::increment(ProfileEvents::KeeperCommits);
@ -534,8 +553,6 @@ bool KeeperStateMachine<Storage>::apply_snapshot(nuraft::snapshot & s)
} }
{ /// deserialize and apply snapshot to storage { /// deserialize and apply snapshot to storage
LockGuardWithStats lock(storage_and_responses_lock);
SnapshotDeserializationResult<Storage> snapshot_deserialization_result; SnapshotDeserializationResult<Storage> snapshot_deserialization_result;
if (latest_snapshot_ptr) if (latest_snapshot_ptr)
snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr); snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr);
@ -543,6 +560,7 @@ bool KeeperStateMachine<Storage>::apply_snapshot(nuraft::snapshot & s)
snapshot_deserialization_result snapshot_deserialization_result
= snapshot_manager.deserializeSnapshotFromBuffer(snapshot_manager.deserializeSnapshotBufferFromDisk(s.get_last_log_idx())); = snapshot_manager.deserializeSnapshotFromBuffer(snapshot_manager.deserializeSnapshotBufferFromDisk(s.get_last_log_idx()));
LockGuardWithStats storage_lock(storage_mutex);
/// maybe some logs were preprocessed with log idx larger than the snapshot idx /// maybe some logs were preprocessed with log idx larger than the snapshot idx
/// we have to apply them to the new storage /// we have to apply them to the new storage
storage->applyUncommittedState(*snapshot_deserialization_result.storage, snapshot_deserialization_result.snapshot_meta->get_last_log_idx()); storage->applyUncommittedState(*snapshot_deserialization_result.storage, snapshot_deserialization_result.snapshot_meta->get_last_log_idx());
@ -587,16 +605,7 @@ void KeeperStateMachine<Storage>::rollbackRequest(const KeeperStorageBase::Reque
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
return; return;
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
storage->rollbackRequest(request_for_session.zxid, allow_missing);
}
template<typename Storage>
void KeeperStateMachine<Storage>::rollbackRequestNoLock(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing)
{
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
return;
storage->rollbackRequest(request_for_session.zxid, allow_missing); storage->rollbackRequest(request_for_session.zxid, allow_missing);
} }
@ -616,7 +625,7 @@ void KeeperStateMachine<Storage>::create_snapshot(nuraft::snapshot & s, nuraft::
auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf); auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf);
CreateSnapshotTask snapshot_task; CreateSnapshotTask snapshot_task;
{ /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking. { /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking.
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot<Storage>>(storage.get(), snapshot_meta_copy, getClusterConfig()); snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot<Storage>>(storage.get(), snapshot_meta_copy, getClusterConfig());
} }
@ -681,7 +690,7 @@ void KeeperStateMachine<Storage>::create_snapshot(nuraft::snapshot & s, nuraft::
} }
{ {
/// Destroy snapshot with lock /// Destroy snapshot with lock
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
LOG_TRACE(log, "Clearing garbage after snapshot"); LOG_TRACE(log, "Clearing garbage after snapshot");
/// Turn off "snapshot mode" and clear outdate part of storage state /// Turn off "snapshot mode" and clear outdate part of storage state
storage->clearGarbageAfterSnapshot(); storage->clearGarbageAfterSnapshot();
@ -824,10 +833,10 @@ template<typename Storage>
void KeeperStateMachine<Storage>::processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session) void KeeperStateMachine<Storage>::processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session)
{ {
/// Pure local request, just process it with storage /// Pure local request, just process it with storage
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats<true> storage_lock(storage_mutex);
std::lock_guard response_lock(process_and_responses_lock);
auto responses = storage->processRequest( auto responses = storage->processRequest(
request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/); request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/);
for (auto & response_for_session : responses) for (auto & response_for_session : responses)
{ {
if (response_for_session.response->xid != Coordination::WATCH_XID) if (response_for_session.response->xid != Coordination::WATCH_XID)
@ -840,112 +849,116 @@ void KeeperStateMachine<Storage>::processReadRequest(const KeeperStorageBase::Re
template<typename Storage> template<typename Storage>
void KeeperStateMachine<Storage>::shutdownStorage() void KeeperStateMachine<Storage>::shutdownStorage()
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
storage->finalize(); storage->finalize();
} }
template<typename Storage> template<typename Storage>
std::vector<int64_t> KeeperStateMachine<Storage>::getDeadSessions() std::vector<int64_t> KeeperStateMachine<Storage>::getDeadSessions()
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getDeadSessions(); return storage->getDeadSessions();
} }
template<typename Storage> template<typename Storage>
int64_t KeeperStateMachine<Storage>::getNextZxid() const int64_t KeeperStateMachine<Storage>::getNextZxid() const
{ {
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getNextZXID(); return storage->getNextZXID();
} }
template<typename Storage> template<typename Storage>
KeeperStorageBase::Digest KeeperStateMachine<Storage>::getNodesDigest() const KeeperStorageBase::Digest KeeperStateMachine<Storage>::getNodesDigest() const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getNodesDigest(false); return storage->getNodesDigest(false, /*lock_transaction_mutex=*/true);
} }
template<typename Storage> template<typename Storage>
uint64_t KeeperStateMachine<Storage>::getLastProcessedZxid() const uint64_t KeeperStateMachine<Storage>::getLastProcessedZxid() const
{ {
LockGuardWithStats lock(storage_and_responses_lock);
return storage->getZXID(); return storage->getZXID();
} }
template<typename Storage>
const KeeperStorageBase::Stats & KeeperStateMachine<Storage>::getStorageStats() const TSA_NO_THREAD_SAFETY_ANALYSIS
{
return storage->getStorageStats();
}
template<typename Storage> template<typename Storage>
uint64_t KeeperStateMachine<Storage>::getNodesCount() const uint64_t KeeperStateMachine<Storage>::getNodesCount() const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getNodesCount(); return storage->getNodesCount();
} }
template<typename Storage> template<typename Storage>
uint64_t KeeperStateMachine<Storage>::getTotalWatchesCount() const uint64_t KeeperStateMachine<Storage>::getTotalWatchesCount() const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getTotalWatchesCount(); return storage->getTotalWatchesCount();
} }
template<typename Storage> template<typename Storage>
uint64_t KeeperStateMachine<Storage>::getWatchedPathsCount() const uint64_t KeeperStateMachine<Storage>::getWatchedPathsCount() const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getWatchedPathsCount(); return storage->getWatchedPathsCount();
} }
template<typename Storage> template<typename Storage>
uint64_t KeeperStateMachine<Storage>::getSessionsWithWatchesCount() const uint64_t KeeperStateMachine<Storage>::getSessionsWithWatchesCount() const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getSessionsWithWatchesCount(); return storage->getSessionsWithWatchesCount();
} }
template<typename Storage> template<typename Storage>
uint64_t KeeperStateMachine<Storage>::getTotalEphemeralNodesCount() const uint64_t KeeperStateMachine<Storage>::getTotalEphemeralNodesCount() const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getTotalEphemeralNodesCount(); return storage->getTotalEphemeralNodesCount();
} }
template<typename Storage> template<typename Storage>
uint64_t KeeperStateMachine<Storage>::getSessionWithEphemeralNodesCount() const uint64_t KeeperStateMachine<Storage>::getSessionWithEphemeralNodesCount() const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getSessionWithEphemeralNodesCount(); return storage->getSessionWithEphemeralNodesCount();
} }
template<typename Storage> template<typename Storage>
void KeeperStateMachine<Storage>::dumpWatches(WriteBufferFromOwnString & buf) const void KeeperStateMachine<Storage>::dumpWatches(WriteBufferFromOwnString & buf) const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
storage->dumpWatches(buf); storage->dumpWatches(buf);
} }
template<typename Storage> template<typename Storage>
void KeeperStateMachine<Storage>::dumpWatchesByPath(WriteBufferFromOwnString & buf) const void KeeperStateMachine<Storage>::dumpWatchesByPath(WriteBufferFromOwnString & buf) const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
storage->dumpWatchesByPath(buf); storage->dumpWatchesByPath(buf);
} }
template<typename Storage> template<typename Storage>
void KeeperStateMachine<Storage>::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const void KeeperStateMachine<Storage>::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
storage->dumpSessionsAndEphemerals(buf); storage->dumpSessionsAndEphemerals(buf);
} }
template<typename Storage> template<typename Storage>
uint64_t KeeperStateMachine<Storage>::getApproximateDataSize() const uint64_t KeeperStateMachine<Storage>::getApproximateDataSize() const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getApproximateDataSize(); return storage->getApproximateDataSize();
} }
template<typename Storage> template<typename Storage>
uint64_t KeeperStateMachine<Storage>::getKeyArenaSize() const uint64_t KeeperStateMachine<Storage>::getKeyArenaSize() const
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
return storage->getArenaDataSize(); return storage->getArenaDataSize();
} }
@ -988,7 +1001,7 @@ ClusterConfigPtr IKeeperStateMachine::getClusterConfig() const
template<typename Storage> template<typename Storage>
void KeeperStateMachine<Storage>::recalculateStorageStats() void KeeperStateMachine<Storage>::recalculateStorageStats()
{ {
LockGuardWithStats lock(storage_and_responses_lock); LockGuardWithStats lock(storage_mutex);
LOG_INFO(log, "Recalculating storage stats"); LOG_INFO(log, "Recalculating storage stats");
storage->recalculateStats(); storage->recalculateStats();
LOG_INFO(log, "Done recalculating storage stats"); LOG_INFO(log, "Done recalculating storage stats");

View File

@ -85,6 +85,8 @@ public:
/// Introspection functions for 4lw commands /// Introspection functions for 4lw commands
virtual uint64_t getLastProcessedZxid() const = 0; virtual uint64_t getLastProcessedZxid() const = 0;
virtual const KeeperStorageBase::Stats & getStorageStats() const = 0;
virtual uint64_t getNodesCount() const = 0; virtual uint64_t getNodesCount() const = 0;
virtual uint64_t getTotalWatchesCount() const = 0; virtual uint64_t getTotalWatchesCount() const = 0;
virtual uint64_t getWatchedPathsCount() const = 0; virtual uint64_t getWatchedPathsCount() const = 0;
@ -124,12 +126,16 @@ protected:
/// Mutex for snapshots /// Mutex for snapshots
mutable std::mutex snapshots_lock; mutable std::mutex snapshots_lock;
/// Lock for storage and responses_queue. It's important to process requests /// Lock for the storage
/// Storage works in thread-safe way ONLY for preprocessing/processing
/// In any other case, unique storage lock needs to be taken
mutable SharedMutex storage_mutex;
/// Lock for processing and responses_queue. It's important to process requests
/// and push them to the responses queue while holding this lock. Otherwise /// and push them to the responses queue while holding this lock. Otherwise
/// we can get strange cases when, for example client send read request with /// we can get strange cases when, for example client send read request with
/// watch and after that receive watch response and only receive response /// watch and after that receive watch response and only receive response
/// for request. /// for request.
mutable std::mutex storage_and_responses_lock; mutable std::mutex process_and_responses_lock;
std::unordered_map<int64_t, std::unordered_map<Coordination::XID, std::shared_ptr<KeeperStorageBase::RequestForSession>>> parsed_request_cache; std::unordered_map<int64_t, std::unordered_map<Coordination::XID, std::shared_ptr<KeeperStorageBase::RequestForSession>>> parsed_request_cache;
uint64_t min_request_size_to_cache{0}; uint64_t min_request_size_to_cache{0};
@ -146,6 +152,7 @@ protected:
mutable std::mutex cluster_config_lock; mutable std::mutex cluster_config_lock;
ClusterConfigPtr cluster_config; ClusterConfigPtr cluster_config;
ThreadPool read_pool;
/// Special part of ACL system -- superdigest specified in server config. /// Special part of ACL system -- superdigest specified in server config.
const std::string superdigest; const std::string superdigest;
@ -153,10 +160,8 @@ protected:
KeeperSnapshotManagerS3 * snapshot_manager_s3; KeeperSnapshotManagerS3 * snapshot_manager_s3;
virtual KeeperStorageBase::ResponseForSession processReconfiguration( virtual KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session)
const KeeperStorageBase::RequestForSession& request_for_session) = 0;
TSA_REQUIRES(storage_and_responses_lock) = 0;
}; };
/// ClickHouse Keeper state machine. Wrapper for KeeperStorage. /// ClickHouse Keeper state machine. Wrapper for KeeperStorage.
@ -189,10 +194,6 @@ public:
// (can happen in case of exception during preprocessing) // (can happen in case of exception during preprocessing)
void rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) override; void rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) override;
void rollbackRequestNoLock(
const KeeperStorageBase::RequestForSession & request_for_session,
bool allow_missing) TSA_NO_THREAD_SAFETY_ANALYSIS;
/// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state. /// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state.
bool apply_snapshot(nuraft::snapshot & s) override; bool apply_snapshot(nuraft::snapshot & s) override;
@ -205,7 +206,7 @@ public:
// This should be used only for tests or keeper-data-dumper because it violates // This should be used only for tests or keeper-data-dumper because it violates
// TSA -- we can't acquire the lock outside of this class or return a storage under lock // TSA -- we can't acquire the lock outside of this class or return a storage under lock
// in a reasonable way. // in a reasonable way.
Storage & getStorageUnsafe() TSA_NO_THREAD_SAFETY_ANALYSIS Storage & getStorageUnsafe()
{ {
return *storage; return *storage;
} }
@ -224,6 +225,8 @@ public:
/// Introspection functions for 4lw commands /// Introspection functions for 4lw commands
uint64_t getLastProcessedZxid() const override; uint64_t getLastProcessedZxid() const override;
const KeeperStorageBase::Stats & getStorageStats() const override;
uint64_t getNodesCount() const override; uint64_t getNodesCount() const override;
uint64_t getTotalWatchesCount() const override; uint64_t getTotalWatchesCount() const override;
uint64_t getWatchedPathsCount() const override; uint64_t getWatchedPathsCount() const override;
@ -245,12 +248,12 @@ public:
private: private:
/// Main state machine logic /// Main state machine logic
std::unique_ptr<Storage> storage; //TSA_PT_GUARDED_BY(storage_and_responses_lock); std::unique_ptr<Storage> storage;
/// Save/Load and Serialize/Deserialize logic for snapshots. /// Save/Load and Serialize/Deserialize logic for snapshots.
KeeperSnapshotManager<Storage> snapshot_manager; KeeperSnapshotManager<Storage> snapshot_manager;
KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session) KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session) override;
TSA_REQUIRES(storage_and_responses_lock) override;
}; };
} }

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,16 @@
#pragma once #pragma once
#include <unordered_map> #include <unordered_map>
#include <unordered_set>
#include <vector> #include <vector>
#include <Coordination/ACLMap.h> #include <Coordination/ACLMap.h>
#include <Coordination/SessionExpiryQueue.h> #include <Coordination/SessionExpiryQueue.h>
#include <Coordination/SnapshotableHashTable.h> #include <Coordination/SnapshotableHashTable.h>
#include "Common/StringHashForHeterogeneousLookup.h"
#include <Common/SharedMutex.h>
#include <Common/Concepts.h>
#include <base/defines.h>
#include <absl/container/flat_hash_set.h> #include <absl/container/flat_hash_set.h>
@ -23,14 +29,11 @@ using ResponseCallback = std::function<void(const Coordination::ZooKeeperRespons
using ChildrenSet = absl::flat_hash_set<StringRef, StringRefHash>; using ChildrenSet = absl::flat_hash_set<StringRef, StringRefHash>;
using SessionAndTimeout = std::unordered_map<int64_t, int64_t>; using SessionAndTimeout = std::unordered_map<int64_t, int64_t>;
/// KeeperRocksNodeInfo is used in RocksDB keeper. struct NodeStats
/// It is serialized directly as POD to RocksDB.
struct KeeperRocksNodeInfo
{ {
int64_t czxid{0}; int64_t czxid{0};
int64_t mzxid{0}; int64_t mzxid{0};
int64_t pzxid{0}; int64_t pzxid{0};
uint64_t acl_id = 0; /// 0 -- no ACL by default
int64_t mtime{0}; int64_t mtime{0};
@ -38,225 +41,9 @@ struct KeeperRocksNodeInfo
int32_t cversion{0}; int32_t cversion{0};
int32_t aversion{0}; int32_t aversion{0};
int32_t seq_num = 0; uint32_t data_size{0};
mutable UInt64 digest = 0; /// we cached digest for this node.
/// as ctime can't be negative because it stores the timestamp when the
/// node was created, we can use the MSB for a bool
struct
{
bool is_ephemeral : 1;
int64_t ctime : 63;
} is_ephemeral_and_ctime{false, 0};
/// ephemeral notes cannot have children so a node can set either
/// ephemeral_owner OR seq_num + num_children
union
{
int64_t ephemeral_owner;
struct
{
int32_t seq_num;
int32_t num_children;
} children_info;
} ephemeral_or_children_data{0};
bool isEphemeral() const
{
return is_ephemeral_and_ctime.is_ephemeral;
}
int64_t ephemeralOwner() const
{
if (isEphemeral())
return ephemeral_or_children_data.ephemeral_owner;
return 0;
}
void setEphemeralOwner(int64_t ephemeral_owner)
{
is_ephemeral_and_ctime.is_ephemeral = ephemeral_owner != 0;
ephemeral_or_children_data.ephemeral_owner = ephemeral_owner;
}
int32_t numChildren() const
{
if (isEphemeral())
return 0;
return ephemeral_or_children_data.children_info.num_children;
}
void setNumChildren(int32_t num_children)
{
ephemeral_or_children_data.children_info.num_children = num_children;
}
/// dummy interface for test
void addChild(StringRef) {}
auto getChildren() const
{
return std::vector<int>(numChildren());
}
void increaseNumChildren()
{
chassert(!isEphemeral());
++ephemeral_or_children_data.children_info.num_children;
}
void decreaseNumChildren()
{
chassert(!isEphemeral());
--ephemeral_or_children_data.children_info.num_children;
}
int32_t seqNum() const
{
if (isEphemeral())
return 0;
return ephemeral_or_children_data.children_info.seq_num;
}
void setSeqNum(int32_t seq_num_)
{
ephemeral_or_children_data.children_info.seq_num = seq_num_;
}
void increaseSeqNum()
{
chassert(!isEphemeral());
++ephemeral_or_children_data.children_info.seq_num;
}
int64_t ctime() const
{
return is_ephemeral_and_ctime.ctime;
}
void setCtime(uint64_t ctime)
{
is_ephemeral_and_ctime.ctime = ctime;
}
void copyStats(const Coordination::Stat & stat); void copyStats(const Coordination::Stat & stat);
};
/// KeeperRocksNode is the memory structure used by RocksDB
struct KeeperRocksNode : public KeeperRocksNodeInfo
{
#if USE_ROCKSDB
friend struct RocksDBContainer<KeeperRocksNode>;
#endif
using Meta = KeeperRocksNodeInfo;
uint64_t size_bytes = 0; // only for compatible, should be deprecated
uint64_t sizeInBytes() const { return data_size + sizeof(KeeperRocksNodeInfo); }
void setData(String new_data)
{
data_size = static_cast<uint32_t>(new_data.size());
if (data_size != 0)
{
data = std::unique_ptr<char[]>(new char[new_data.size()]);
memcpy(data.get(), new_data.data(), data_size);
}
}
void shallowCopy(const KeeperRocksNode & other)
{
czxid = other.czxid;
mzxid = other.mzxid;
pzxid = other.pzxid;
acl_id = other.acl_id; /// 0 -- no ACL by default
mtime = other.mtime;
is_ephemeral_and_ctime = other.is_ephemeral_and_ctime;
ephemeral_or_children_data = other.ephemeral_or_children_data;
data_size = other.data_size;
if (data_size != 0)
{
data = std::unique_ptr<char[]>(new char[data_size]);
memcpy(data.get(), other.data.get(), data_size);
}
version = other.version;
cversion = other.cversion;
aversion = other.aversion;
/// cached_digest = other.cached_digest;
}
void invalidateDigestCache() const;
UInt64 getDigest(std::string_view path) const;
String getEncodedString();
void decodeFromString(const String & buffer_str);
void recalculateSize() {}
std::string_view getData() const noexcept { return {data.get(), data_size}; }
void setResponseStat(Coordination::Stat & response_stat) const
{
response_stat.czxid = czxid;
response_stat.mzxid = mzxid;
response_stat.ctime = ctime();
response_stat.mtime = mtime;
response_stat.version = version;
response_stat.cversion = cversion;
response_stat.aversion = aversion;
response_stat.ephemeralOwner = ephemeralOwner();
response_stat.dataLength = static_cast<int32_t>(data_size);
response_stat.numChildren = numChildren();
response_stat.pzxid = pzxid;
}
void reset()
{
serialized = false;
}
bool empty() const
{
return data_size == 0 && mzxid == 0;
}
std::unique_ptr<char[]> data{nullptr};
uint32_t data_size{0};
private:
bool serialized = false;
};
/// KeeperMemNode should have as minimal size as possible to reduce memory footprint
/// of stored nodes
/// New fields should be added to the struct only if it's really necessary
struct KeeperMemNode
{
int64_t czxid{0};
int64_t mzxid{0};
int64_t pzxid{0};
uint64_t acl_id = 0; /// 0 -- no ACL by default
int64_t mtime{0};
std::unique_ptr<char[]> data{nullptr};
uint32_t data_size{0};
int32_t version{0};
int32_t cversion{0};
int32_t aversion{0};
mutable uint64_t cached_digest = 0;
KeeperMemNode() = default;
KeeperMemNode & operator=(const KeeperMemNode & other);
KeeperMemNode(const KeeperMemNode & other);
KeeperMemNode & operator=(KeeperMemNode && other) noexcept;
KeeperMemNode(KeeperMemNode && other) noexcept;
bool empty() const;
bool isEphemeral() const bool isEphemeral() const
{ {
@ -287,6 +74,7 @@ struct KeeperMemNode
void setNumChildren(int32_t num_children) void setNumChildren(int32_t num_children)
{ {
is_ephemeral_and_ctime.is_ephemeral = false;
ephemeral_or_children_data.children_info.num_children = num_children; ephemeral_or_children_data.children_info.num_children = num_children;
} }
@ -331,34 +119,6 @@ struct KeeperMemNode
is_ephemeral_and_ctime.ctime = ctime; is_ephemeral_and_ctime.ctime = ctime;
} }
void copyStats(const Coordination::Stat & stat);
void setResponseStat(Coordination::Stat & response_stat) const;
/// Object memory size
uint64_t sizeInBytes() const;
void setData(const String & new_data);
std::string_view getData() const noexcept { return {data.get(), data_size}; }
void addChild(StringRef child_path);
void removeChild(StringRef child_path);
const auto & getChildren() const noexcept { return children; }
auto & getChildren() { return children; }
// Invalidate the calculated digest so it's recalculated again on the next
// getDigest call
void invalidateDigestCache() const;
// get the calculated digest of the node
UInt64 getDigest(std::string_view path) const;
// copy only necessary information for preprocessing and digest calculation
// (e.g. we don't need to copy list of children)
void shallowCopy(const KeeperMemNode & other);
private: private:
/// as ctime can't be negative because it stores the timestamp when the /// as ctime can't be negative because it stores the timestamp when the
/// node was created, we can use the MSB for a bool /// node was created, we can use the MSB for a bool
@ -379,7 +139,132 @@ private:
int32_t num_children; int32_t num_children;
} children_info; } children_info;
} ephemeral_or_children_data{0}; } ephemeral_or_children_data{0};
};
/// KeeperRocksNodeInfo is used in RocksDB keeper.
/// It is serialized directly as POD to RocksDB.
struct KeeperRocksNodeInfo
{
NodeStats stats;
uint64_t acl_id = 0; /// 0 -- no ACL by default
/// dummy interface for test
void addChild(StringRef) {}
auto getChildren() const
{
return std::vector<int>(stats.numChildren());
}
void copyStats(const Coordination::Stat & stat);
};
/// KeeperRocksNode is the memory structure used by RocksDB
struct KeeperRocksNode : public KeeperRocksNodeInfo
{
#if USE_ROCKSDB
friend struct RocksDBContainer<KeeperRocksNode>;
#endif
using Meta = KeeperRocksNodeInfo;
uint64_t size_bytes = 0; // only for compatible, should be deprecated
uint64_t sizeInBytes() const { return stats.data_size + sizeof(KeeperRocksNodeInfo); }
void setData(String new_data)
{
stats.data_size = static_cast<uint32_t>(new_data.size());
if (stats.data_size != 0)
{
data = std::unique_ptr<char[]>(new char[new_data.size()]);
memcpy(data.get(), new_data.data(), stats.data_size);
}
}
void shallowCopy(const KeeperRocksNode & other)
{
stats = other.stats;
acl_id = other.acl_id;
if (stats.data_size != 0)
{
data = std::unique_ptr<char[]>(new char[stats.data_size]);
memcpy(data.get(), other.data.get(), stats.data_size);
}
/// cached_digest = other.cached_digest;
}
void invalidateDigestCache() const;
UInt64 getDigest(std::string_view path) const;
String getEncodedString();
void decodeFromString(const String & buffer_str);
void recalculateSize() {}
std::string_view getData() const noexcept { return {data.get(), stats.data_size}; }
void setResponseStat(Coordination::Stat & response_stat) const;
void reset()
{
serialized = false;
}
bool empty() const
{
return stats.data_size == 0 && stats.mzxid == 0;
}
std::unique_ptr<char[]> data{nullptr};
mutable UInt64 cached_digest = 0; /// we cached digest for this node.
private:
bool serialized = false;
};
/// KeeperMemNode should have as minimal size as possible to reduce memory footprint
/// of stored nodes
/// New fields should be added to the struct only if it's really necessary
struct KeeperMemNode
{
NodeStats stats;
std::unique_ptr<char[]> data{nullptr};
mutable uint64_t cached_digest = 0;
uint64_t acl_id = 0; /// 0 -- no ACL by default
KeeperMemNode() = default;
KeeperMemNode & operator=(const KeeperMemNode & other);
KeeperMemNode(const KeeperMemNode & other);
KeeperMemNode & operator=(KeeperMemNode && other) noexcept;
KeeperMemNode(KeeperMemNode && other) noexcept;
bool empty() const;
void copyStats(const Coordination::Stat & stat);
void setResponseStat(Coordination::Stat & response_stat) const;
/// Object memory size
uint64_t sizeInBytes() const;
void setData(const String & new_data);
std::string_view getData() const noexcept { return {data.get(), stats.data_size}; }
void addChild(StringRef child_path);
void removeChild(StringRef child_path);
const auto & getChildren() const noexcept { return children; }
auto & getChildren() { return children; }
// Invalidate the calculated digest so it's recalculated again on the next
// getDigest call
void invalidateDigestCache() const;
// get the calculated digest of the node
UInt64 getDigest(std::string_view path) const;
// copy only necessary information for preprocessing and digest calculation
// (e.g. we don't need to copy list of children)
void shallowCopy(const KeeperMemNode & other);
private:
ChildrenSet children{}; ChildrenSet children{};
}; };
@ -430,18 +315,187 @@ public:
}; };
using Ephemerals = std::unordered_map<int64_t, std::unordered_set<std::string>>; using Ephemerals = std::unordered_map<int64_t, std::unordered_set<std::string>>;
using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<std::string>>; struct WatchInfo
{
std::string_view path;
bool is_list_watch;
bool operator==(const WatchInfo &) const = default;
};
struct WatchInfoHash
{
auto operator()(WatchInfo info) const
{
SipHash hash;
hash.update(info.path);
hash.update(info.is_list_watch);
return hash.get64();
}
};
using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<WatchInfo, WatchInfoHash>>;
using SessionIDs = std::unordered_set<int64_t>; using SessionIDs = std::unordered_set<int64_t>;
/// Just vector of SHA1 from user:password /// Just vector of SHA1 from user:password
using AuthIDs = std::vector<AuthID>; using AuthIDs = std::vector<AuthID>;
using SessionAndAuth = std::unordered_map<int64_t, AuthIDs>; using SessionAndAuth = std::unordered_map<int64_t, AuthIDs>;
using Watches = std::unordered_map<String /* path, relative of root_path */, SessionIDs>; using Watches = std::unordered_map<
String /* path, relative of root_path */,
SessionIDs,
StringHashForHeterogeneousLookup,
StringHashForHeterogeneousLookup::transparent_key_equal>;
// Applying ZooKeeper request to storage consists of two steps:
// - preprocessing which, instead of applying the changes directly to storage,
// generates deltas with those changes, denoted with the request ZXID
// - processing which applies deltas with the correct ZXID to the storage
//
// Delta objects allow us two things:
// - fetch the latest, uncommitted state of an object by getting the committed
// state of that same object from the storage and applying the deltas
// in the same order as they are defined
// - quickly commit the changes to the storage
struct CreateNodeDelta
{
Coordination::Stat stat;
Coordination::ACLs acls;
String data;
};
struct RemoveNodeDelta
{
int32_t version{-1};
NodeStats stat;
Coordination::ACLs acls;
String data;
};
struct UpdateNodeStatDelta
{
template <is_any_of<KeeperMemNode, KeeperRocksNode> Node>
explicit UpdateNodeStatDelta(const Node & node)
: old_stats(node.stats)
, new_stats(node.stats)
{}
NodeStats old_stats;
NodeStats new_stats;
int32_t version{-1};
};
struct UpdateNodeDataDelta
{
std::string old_data;
std::string new_data;
int32_t version{-1};
};
struct SetACLDelta
{
Coordination::ACLs old_acls;
Coordination::ACLs new_acls;
int32_t version{-1};
};
struct ErrorDelta
{
Coordination::Error error;
};
struct FailedMultiDelta
{
std::vector<Coordination::Error> error_codes;
Coordination::Error global_error{Coordination::Error::ZOK};
};
// Denotes end of a subrequest in multi request
struct SubDeltaEnd
{
};
struct AddAuthDelta
{
int64_t session_id;
std::shared_ptr<AuthID> auth_id;
};
struct CloseSessionDelta
{
int64_t session_id;
};
using Operation = std::variant<
CreateNodeDelta,
RemoveNodeDelta,
UpdateNodeStatDelta,
UpdateNodeDataDelta,
SetACLDelta,
AddAuthDelta,
ErrorDelta,
SubDeltaEnd,
FailedMultiDelta,
CloseSessionDelta>;
struct Delta
{
Delta(String path_, int64_t zxid_, Operation operation_) : path(std::move(path_)), zxid(zxid_), operation(std::move(operation_)) { }
Delta(int64_t zxid_, Coordination::Error error) : Delta("", zxid_, ErrorDelta{error}) { }
Delta(int64_t zxid_, Operation subdelta) : Delta("", zxid_, subdelta) { }
String path;
int64_t zxid;
Operation operation;
};
using DeltaIterator = std::list<KeeperStorageBase::Delta>::const_iterator;
struct DeltaRange
{
DeltaIterator begin_it;
DeltaIterator end_it;
auto begin() const
{
return begin_it;
}
auto end() const
{
return end_it;
}
bool empty() const
{
return begin_it == end_it;
}
const auto & front() const
{
return *begin_it;
}
};
struct Stats
{
std::atomic<uint64_t> nodes_count = 0;
std::atomic<uint64_t> approximate_data_size = 0;
std::atomic<uint64_t> total_watches_count = 0;
std::atomic<uint64_t> watched_paths_count = 0;
std::atomic<uint64_t> sessions_with_watches_count = 0;
std::atomic<uint64_t> session_with_ephemeral_nodes_count = 0;
std::atomic<uint64_t> total_emphemeral_nodes_count = 0;
std::atomic<int64_t> last_zxid = 0;
};
Stats stats;
static bool checkDigest(const Digest & first, const Digest & second); static bool checkDigest(const Digest & first, const Digest & second);
}; };
/// Keeper state machine almost equal to the ZooKeeper's state machine. /// Keeper state machine almost equal to the ZooKeeper's state machine.
/// Implements all logic of operations, data changes, sessions allocation. /// Implements all logic of operations, data changes, sessions allocation.
/// In-memory and not thread safe. /// In-memory and not thread safe.
@ -472,160 +526,73 @@ public:
int64_t session_id_counter{1}; int64_t session_id_counter{1};
SessionAndAuth session_and_auth; mutable SharedMutex auth_mutex;
SessionAndAuth committed_session_and_auth;
mutable SharedMutex storage_mutex;
/// Main hashtable with nodes. Contain all information about data. /// Main hashtable with nodes. Contain all information about data.
/// All other structures expect session_and_timeout can be restored from /// All other structures expect session_and_timeout can be restored from
/// container. /// container.
Container container; Container container;
// Applying ZooKeeper request to storage consists of two steps:
// - preprocessing which, instead of applying the changes directly to storage,
// generates deltas with those changes, denoted with the request ZXID
// - processing which applies deltas with the correct ZXID to the storage
//
// Delta objects allow us two things:
// - fetch the latest, uncommitted state of an object by getting the committed
// state of that same object from the storage and applying the deltas
// in the same order as they are defined
// - quickly commit the changes to the storage
struct CreateNodeDelta
{
Coordination::Stat stat;
Coordination::ACLs acls;
String data;
};
struct RemoveNodeDelta
{
int32_t version{-1};
int64_t ephemeral_owner{0};
};
struct UpdateNodeDelta
{
std::function<void(Node &)> update_fn;
int32_t version{-1};
};
struct SetACLDelta
{
Coordination::ACLs acls;
int32_t version{-1};
};
struct ErrorDelta
{
Coordination::Error error;
};
struct FailedMultiDelta
{
std::vector<Coordination::Error> error_codes;
Coordination::Error global_error{Coordination::Error::ZOK};
};
// Denotes end of a subrequest in multi request
struct SubDeltaEnd
{
};
struct AddAuthDelta
{
int64_t session_id;
AuthID auth_id;
};
struct CloseSessionDelta
{
int64_t session_id;
};
using Operation = std::
variant<CreateNodeDelta, RemoveNodeDelta, UpdateNodeDelta, SetACLDelta, AddAuthDelta, ErrorDelta, SubDeltaEnd, FailedMultiDelta, CloseSessionDelta>;
struct Delta
{
Delta(String path_, int64_t zxid_, Operation operation_) : path(std::move(path_)), zxid(zxid_), operation(std::move(operation_)) { }
Delta(int64_t zxid_, Coordination::Error error) : Delta("", zxid_, ErrorDelta{error}) { }
Delta(int64_t zxid_, Operation subdelta) : Delta("", zxid_, subdelta) { }
String path;
int64_t zxid;
Operation operation;
};
struct UncommittedState struct UncommittedState
{ {
explicit UncommittedState(KeeperStorage & storage_) : storage(storage_) { } explicit UncommittedState(KeeperStorage & storage_) : storage(storage_) { }
void addDelta(Delta new_delta); void addDeltas(std::list<Delta> new_deltas);
void addDeltas(std::vector<Delta> new_deltas); void cleanup(int64_t commit_zxid);
void commit(int64_t commit_zxid);
void rollback(int64_t rollback_zxid); void rollback(int64_t rollback_zxid);
void rollback(std::list<Delta> rollback_deltas);
std::shared_ptr<Node> getNode(StringRef path) const; std::shared_ptr<Node> getNode(StringRef path, bool should_lock_storage = true) const;
const Node * getActualNodeView(StringRef path, const Node & storage_node) const; const Node * getActualNodeView(StringRef path, const Node & storage_node) const;
Coordination::ACLs getACLs(StringRef path) const; Coordination::ACLs getACLs(StringRef path) const;
void applyDeltas(const std::list<Delta> & new_deltas);
void applyDelta(const Delta & delta); void applyDelta(const Delta & delta);
void rollbackDelta(const Delta & delta);
bool hasACL(int64_t session_id, bool is_local, std::function<bool(const AuthID &)> predicate) const; bool hasACL(int64_t session_id, bool is_local, std::function<bool(const AuthID &)> predicate) const;
void forEachAuthInSession(int64_t session_id, std::function<void(const AuthID &)> func) const; void forEachAuthInSession(int64_t session_id, std::function<void(const AuthID &)> func) const;
std::shared_ptr<Node> tryGetNodeFromStorage(StringRef path) const; std::shared_ptr<Node> tryGetNodeFromStorage(StringRef path, bool should_lock_storage = true) const;
std::unordered_map<int64_t, std::list<const AuthID *>> session_and_auth;
std::unordered_set<int64_t> closed_sessions; std::unordered_set<int64_t> closed_sessions;
using ZxidToNodes = std::map<int64_t, std::unordered_set<std::string_view>>;
struct UncommittedNode struct UncommittedNode
{ {
std::shared_ptr<Node> node{nullptr}; std::shared_ptr<Node> node{nullptr};
Coordination::ACLs acls{}; std::optional<Coordination::ACLs> acls{};
int64_t zxid{0}; std::unordered_set<uint64_t> applied_zxids{};
};
struct Hash void materializeACL(const ACLMap & current_acl_map);
{
auto operator()(const std::string_view view) const
{
SipHash hash;
hash.update(view);
return hash.get64();
}
using is_transparent = void; // required to make find() work with different type than key_type
};
struct Equal
{
auto operator()(const std::string_view a,
const std::string_view b) const
{
return a == b;
}
using is_transparent = void; // required to make find() work with different type than key_type
}; };
struct PathCmp struct PathCmp
{ {
using is_transparent = std::true_type;
auto operator()(const std::string_view a, auto operator()(const std::string_view a,
const std::string_view b) const const std::string_view b) const
{ {
return a.size() < b.size() || (a.size() == b.size() && a < b); size_t level_a = std::count(a.begin(), a.end(), '/');
size_t level_b = std::count(b.begin(), b.end(), '/');
return level_a < level_b || (level_a == level_b && a < b);
} }
using is_transparent = void; // required to make find() work with different type than key_type
}; };
mutable std::map<std::string, UncommittedNode, PathCmp> nodes; Ephemerals ephemerals;
std::unordered_map<std::string, std::list<const Delta *>, Hash, Equal> deltas_for_path;
std::list<Delta> deltas; std::unordered_map<int64_t, std::list<std::pair<int64_t, std::shared_ptr<AuthID>>>> session_and_auth;
mutable std::map<std::string, UncommittedNode, PathCmp> nodes;
mutable ZxidToNodes zxid_to_nodes;
mutable std::mutex deltas_mutex;
std::list<Delta> deltas TSA_GUARDED_BY(deltas_mutex);
KeeperStorage<Container> & storage; KeeperStorage<Container> & storage;
}; };
@ -635,7 +602,7 @@ public:
// with zxid > last_zxid // with zxid > last_zxid
void applyUncommittedState(KeeperStorage & other, int64_t last_log_idx); void applyUncommittedState(KeeperStorage & other, int64_t last_log_idx);
Coordination::Error commit(int64_t zxid); Coordination::Error commit(DeltaRange deltas);
// Create node in the storage // Create node in the storage
// Returns false if it failed to create the node, true otherwise // Returns false if it failed to create the node, true otherwise
@ -653,12 +620,11 @@ public:
bool checkACL(StringRef path, int32_t permissions, int64_t session_id, bool is_local); bool checkACL(StringRef path, int32_t permissions, int64_t session_id, bool is_local);
void unregisterEphemeralPath(int64_t session_id, const std::string & path); std::mutex ephemeral_mutex;
/// Mapping session_id -> set of ephemeral nodes paths /// Mapping session_id -> set of ephemeral nodes paths
Ephemerals ephemerals; Ephemerals committed_ephemerals;
/// Mapping session_id -> set of watched nodes paths size_t committed_ephemeral_nodes{0};
SessionAndWatcher sessions_and_watchers;
/// Expiration queue for session, allows to get dead sessions at some point of time /// Expiration queue for session, allows to get dead sessions at some point of time
SessionExpiryQueue session_expiry_queue; SessionExpiryQueue session_expiry_queue;
/// All active sessions with timeout /// All active sessions with timeout
@ -667,8 +633,10 @@ public:
/// ACLMap for more compact ACLs storage inside nodes. /// ACLMap for more compact ACLs storage inside nodes.
ACLMap acl_map; ACLMap acl_map;
mutable std::mutex transaction_mutex;
/// Global id of all requests applied to storage /// Global id of all requests applied to storage
int64_t zxid{0}; int64_t zxid TSA_GUARDED_BY(transaction_mutex) = 0;
// older Keeper node (pre V5 snapshots) can create snapshots and receive logs from newer Keeper nodes // older Keeper node (pre V5 snapshots) can create snapshots and receive logs from newer Keeper nodes
// this can lead to some inconsistencies, e.g. from snapshot it will use log_idx as zxid // this can lead to some inconsistencies, e.g. from snapshot it will use log_idx as zxid
@ -685,11 +653,16 @@ public:
int64_t log_idx = 0; int64_t log_idx = 0;
}; };
std::deque<TransactionInfo> uncommitted_transactions; std::list<TransactionInfo> uncommitted_transactions TSA_GUARDED_BY(transaction_mutex);
uint64_t nodes_digest{0}; uint64_t nodes_digest = 0;
bool finalized{false}; std::atomic<bool> finalized{false};
/// Mapping session_id -> set of watched nodes paths
SessionAndWatcher sessions_and_watchers;
size_t total_watches_count = 0;
/// Currently active watches (node_path -> subscribed sessions) /// Currently active watches (node_path -> subscribed sessions)
Watches watches; Watches watches;
@ -698,45 +671,30 @@ public:
void clearDeadWatches(int64_t session_id); void clearDeadWatches(int64_t session_id);
/// Get current committed zxid /// Get current committed zxid
int64_t getZXID() const { return zxid; } int64_t getZXID() const;
int64_t getNextZXID() const int64_t getNextZXID() const;
{ int64_t getNextZXIDLocked() const TSA_REQUIRES(transaction_mutex);
if (uncommitted_transactions.empty())
return zxid + 1;
return uncommitted_transactions.back().zxid + 1; Digest getNodesDigest(bool committed, bool lock_transaction_mutex) const;
}
Digest getNodesDigest(bool committed) const;
KeeperContextPtr keeper_context; KeeperContextPtr keeper_context;
const String superdigest; const String superdigest;
bool initialized{false}; std::atomic<bool> initialized{false};
KeeperStorage(int64_t tick_time_ms, const String & superdigest_, const KeeperContextPtr & keeper_context_, bool initialize_system_nodes = true); KeeperStorage(int64_t tick_time_ms, const String & superdigest_, const KeeperContextPtr & keeper_context_, bool initialize_system_nodes = true);
void initializeSystemNodes(); void initializeSystemNodes() TSA_NO_THREAD_SAFETY_ANALYSIS;
/// Allocate new session id with the specified timeouts /// Allocate new session id with the specified timeouts
int64_t getSessionID(int64_t session_timeout_ms) int64_t getSessionID(int64_t session_timeout_ms);
{
auto result = session_id_counter++;
session_and_timeout.emplace(result, session_timeout_ms);
session_expiry_queue.addNewSessionOrUpdate(result, session_timeout_ms);
return result;
}
/// Add session id. Used when restoring KeeperStorage from snapshot. /// Add session id. Used when restoring KeeperStorage from snapshot.
void addSessionID(int64_t session_id, int64_t session_timeout_ms) void addSessionID(int64_t session_id, int64_t session_timeout_ms) TSA_NO_THREAD_SAFETY_ANALYSIS;
{
session_and_timeout.emplace(session_id, session_timeout_ms);
session_expiry_queue.addNewSessionOrUpdate(session_id, session_timeout_ms);
}
UInt64 calculateNodesDigest(UInt64 current_digest, const std::vector<Delta> & new_deltas) const; UInt64 calculateNodesDigest(UInt64 current_digest, const std::list<Delta> & new_deltas) const;
/// Process user request and return response. /// Process user request and return response.
/// check_acl = false only when converting data from ZooKeeper. /// check_acl = false only when converting data from ZooKeeper.
@ -763,42 +721,39 @@ public:
/// Set of methods for creating snapshots /// Set of methods for creating snapshots
/// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version. /// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version.
void enableSnapshotMode(size_t up_to_version) void enableSnapshotMode(size_t up_to_version);
{
container.enableSnapshotMode(up_to_version);
}
/// Turn off snapshot mode. /// Turn off snapshot mode.
void disableSnapshotMode() void disableSnapshotMode();
{
container.disableSnapshotMode();
}
Container::const_iterator getSnapshotIteratorBegin() const { return container.begin(); } Container::const_iterator getSnapshotIteratorBegin() const;
/// Clear outdated data from internal container. /// Clear outdated data from internal container.
void clearGarbageAfterSnapshot() { container.clearOutdatedNodes(); } void clearGarbageAfterSnapshot();
/// Get all active sessions /// Get all active sessions
const SessionAndTimeout & getActiveSessions() const { return session_and_timeout; } SessionAndTimeout getActiveSessions() const;
/// Get all dead sessions /// Get all dead sessions
std::vector<int64_t> getDeadSessions() const { return session_expiry_queue.getExpiredSessions(); } std::vector<int64_t> getDeadSessions() const;
void updateStats();
const Stats & getStorageStats() const;
/// Introspection functions mostly used in 4-letter commands /// Introspection functions mostly used in 4-letter commands
uint64_t getNodesCount() const { return container.size(); } uint64_t getNodesCount() const;
uint64_t getApproximateDataSize() const { return container.getApproximateDataSize(); } uint64_t getApproximateDataSize() const;
uint64_t getArenaDataSize() const { return container.keyArenaSize(); } uint64_t getArenaDataSize() const;
uint64_t getTotalWatchesCount() const; uint64_t getTotalWatchesCount() const;
uint64_t getWatchedPathsCount() const { return watches.size() + list_watches.size(); } uint64_t getWatchedPathsCount() const;
uint64_t getSessionsWithWatchesCount() const; uint64_t getSessionsWithWatchesCount() const;
uint64_t getSessionWithEphemeralNodesCount() const { return ephemerals.size(); } uint64_t getSessionWithEphemeralNodesCount() const;
uint64_t getTotalEphemeralNodesCount() const; uint64_t getTotalEphemeralNodesCount() const;
void dumpWatches(WriteBufferFromOwnString & buf) const; void dumpWatches(WriteBufferFromOwnString & buf) const;

View File

@ -155,11 +155,11 @@ public:
ReadBufferFromOwnString buffer(iter->value().ToStringView()); ReadBufferFromOwnString buffer(iter->value().ToStringView());
typename Node::Meta & meta = new_pair->value; typename Node::Meta & meta = new_pair->value;
readPODBinary(meta, buffer); readPODBinary(meta, buffer);
readVarUInt(new_pair->value.data_size, buffer); readVarUInt(new_pair->value.stats.data_size, buffer);
if (new_pair->value.data_size) if (new_pair->value.stats.data_size)
{ {
new_pair->value.data = std::unique_ptr<char[]>(new char[new_pair->value.data_size]); new_pair->value.data = std::unique_ptr<char[]>(new char[new_pair->value.stats.data_size]);
buffer.readStrict(new_pair->value.data.get(), new_pair->value.data_size); buffer.readStrict(new_pair->value.data.get(), new_pair->value.stats.data_size);
} }
pair = new_pair; pair = new_pair;
} }
@ -211,7 +211,7 @@ public:
} }
} }
std::vector<std::pair<std::string, Node>> getChildren(const std::string & key_) std::vector<std::pair<std::string, Node>> getChildren(const std::string & key_, bool read_data = false)
{ {
rocksdb::ReadOptions read_options; rocksdb::ReadOptions read_options;
read_options.total_order_seek = true; read_options.total_order_seek = true;
@ -232,6 +232,15 @@ public:
typename Node::Meta & meta = node; typename Node::Meta & meta = node;
/// We do not read data here /// We do not read data here
readPODBinary(meta, buffer); readPODBinary(meta, buffer);
if (read_data)
{
readVarUInt(meta.stats.data_size, buffer);
if (meta.stats.data_size)
{
node.data = std::unique_ptr<char[]>(new char[meta.stats.data_size]);
buffer.readStrict(node.data.get(), meta.stats.data_size);
}
}
std::string real_key(iter->key().data() + len, iter->key().size() - len); std::string real_key(iter->key().data() + len, iter->key().size() - len);
// std::cout << "real key: " << real_key << std::endl; // std::cout << "real key: " << real_key << std::endl;
result.emplace_back(std::move(real_key), std::move(node)); result.emplace_back(std::move(real_key), std::move(node));
@ -268,11 +277,11 @@ public:
typename Node::Meta & meta = kv->value; typename Node::Meta & meta = kv->value;
readPODBinary(meta, buffer); readPODBinary(meta, buffer);
/// TODO: Sometimes we don't need to load data. /// TODO: Sometimes we don't need to load data.
readVarUInt(kv->value.data_size, buffer); readVarUInt(kv->value.stats.data_size, buffer);
if (kv->value.data_size) if (kv->value.stats.data_size)
{ {
kv->value.data = std::unique_ptr<char[]>(new char[kv->value.data_size]); kv->value.data = std::unique_ptr<char[]>(new char[kv->value.stats.data_size]);
buffer.readStrict(kv->value.data.get(), kv->value.data_size); buffer.readStrict(kv->value.data.get(), kv->value.stats.data_size);
} }
return const_iterator(kv); return const_iterator(kv);
} }
@ -281,7 +290,7 @@ public:
{ {
auto it = find(key); auto it = find(key);
chassert(it != end()); chassert(it != end());
return MockNode(it->value.numChildren(), it->value.getData()); return MockNode(it->value.stats.numChildren(), it->value.getData());
} }
const_iterator updateValue(StringRef key_, ValueUpdater updater) const_iterator updateValue(StringRef key_, ValueUpdater updater)

View File

@ -93,7 +93,7 @@ void deserializeACLMap(Storage & storage, ReadBuffer & in)
} }
template<typename Storage> template<typename Storage>
int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log) int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log) TSA_NO_THREAD_SAFETY_ANALYSIS
{ {
int64_t max_zxid = 0; int64_t max_zxid = 0;
std::string path; std::string path;
@ -108,33 +108,33 @@ int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log
Coordination::read(node.acl_id, in); Coordination::read(node.acl_id, in);
/// Deserialize stat /// Deserialize stat
Coordination::read(node.czxid, in); Coordination::read(node.stats.czxid, in);
Coordination::read(node.mzxid, in); Coordination::read(node.stats.mzxid, in);
/// For some reason ZXID specified in filename can be smaller /// For some reason ZXID specified in filename can be smaller
/// then actual zxid from nodes. In this case we will use zxid from nodes. /// then actual zxid from nodes. In this case we will use zxid from nodes.
max_zxid = std::max(max_zxid, node.mzxid); max_zxid = std::max(max_zxid, node.stats.mzxid);
int64_t ctime; int64_t ctime;
Coordination::read(ctime, in); Coordination::read(ctime, in);
node.setCtime(ctime); node.stats.setCtime(ctime);
Coordination::read(node.mtime, in); Coordination::read(node.stats.mtime, in);
Coordination::read(node.version, in); Coordination::read(node.stats.version, in);
Coordination::read(node.cversion, in); Coordination::read(node.stats.cversion, in);
Coordination::read(node.aversion, in); Coordination::read(node.stats.aversion, in);
int64_t ephemeral_owner; int64_t ephemeral_owner;
Coordination::read(ephemeral_owner, in); Coordination::read(ephemeral_owner, in);
if (ephemeral_owner != 0) if (ephemeral_owner != 0)
node.setEphemeralOwner(ephemeral_owner); node.stats.setEphemeralOwner(ephemeral_owner);
Coordination::read(node.pzxid, in); Coordination::read(node.stats.pzxid, in);
if (!path.empty()) if (!path.empty())
{ {
if (ephemeral_owner == 0) if (ephemeral_owner == 0)
node.setSeqNum(node.cversion); node.stats.setSeqNum(node.stats.cversion);
storage.container.insertOrReplace(path, node); storage.container.insertOrReplace(path, node);
if (ephemeral_owner != 0) if (ephemeral_owner != 0)
storage.ephemerals[ephemeral_owner].insert(path); storage.committed_ephemerals[ephemeral_owner].insert(path);
storage.acl_map.addUsage(node.acl_id); storage.acl_map.addUsage(node.acl_id);
} }
@ -149,7 +149,13 @@ int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log
if (itr.key != "/") if (itr.key != "/")
{ {
auto parent_path = parentNodePath(itr.key); auto parent_path = parentNodePath(itr.key);
storage.container.updateValue(parent_path, [my_path = itr.key] (typename Storage::Node & value) { value.addChild(getBaseNodeName(my_path)); value.increaseNumChildren(); }); storage.container.updateValue(
parent_path,
[my_path = itr.key](typename Storage::Node & value)
{
value.addChild(getBaseNodeName(my_path));
value.stats.increaseNumChildren();
});
} }
} }
@ -157,7 +163,7 @@ int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log
} }
template<typename Storage> template<typename Storage>
void deserializeKeeperStorageFromSnapshot(Storage & storage, const std::string & snapshot_path, LoggerPtr log) void deserializeKeeperStorageFromSnapshot(Storage & storage, const std::string & snapshot_path, LoggerPtr log) TSA_NO_THREAD_SAFETY_ANALYSIS
{ {
LOG_INFO(log, "Deserializing storage snapshot {}", snapshot_path); LOG_INFO(log, "Deserializing storage snapshot {}", snapshot_path);
int64_t zxid = getZxidFromName(snapshot_path); int64_t zxid = getZxidFromName(snapshot_path);
@ -487,7 +493,7 @@ bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request)
} }
template<typename Storage> template<typename Storage>
bool deserializeTxn(Storage & storage, ReadBuffer & in, LoggerPtr /*log*/) bool deserializeTxn(Storage & storage, ReadBuffer & in, LoggerPtr /*log*/) TSA_NO_THREAD_SAFETY_ANALYSIS
{ {
int64_t checksum; int64_t checksum;
Coordination::read(checksum, in); Coordination::read(checksum, in);
@ -568,7 +574,7 @@ void deserializeLogAndApplyToStorage(Storage & storage, const std::string & log_
} }
template<typename Storage> template<typename Storage>
void deserializeLogsAndApplyToStorage(Storage & storage, const std::string & path, LoggerPtr log) void deserializeLogsAndApplyToStorage(Storage & storage, const std::string & path, LoggerPtr log) TSA_NO_THREAD_SAFETY_ANALYSIS
{ {
std::map<int64_t, std::string> existing_logs; std::map<int64_t, std::string> existing_logs;
for (const auto & p : fs::directory_iterator(path)) for (const auto & p : fs::directory_iterator(path))

View File

@ -1,6 +1,7 @@
#include <chrono> #include <chrono>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "base/defines.h"
#include "config.h" #include "config.h"
#if USE_NURAFT #if USE_NURAFT
@ -1540,7 +1541,7 @@ void addNode(Storage & storage, const std::string & path, const std::string & da
using Node = typename Storage::Node; using Node = typename Storage::Node;
Node node{}; Node node{};
node.setData(data); node.setData(data);
node.setEphemeralOwner(ephemeral_owner); node.stats.setEphemeralOwner(ephemeral_owner);
storage.container.insertOrReplace(path, node); storage.container.insertOrReplace(path, node);
auto child_it = storage.container.find(path); auto child_it = storage.container.find(path);
auto child_path = DB::getBaseNodeName(child_it->key); auto child_path = DB::getBaseNodeName(child_it->key);
@ -1549,7 +1550,7 @@ void addNode(Storage & storage, const std::string & path, const std::string & da
[&](auto & parent) [&](auto & parent)
{ {
parent.addChild(child_path); parent.addChild(child_path);
parent.increaseNumChildren(); parent.stats.increaseNumChildren();
}); });
} }
@ -1570,9 +1571,9 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotSimple)
addNode(storage, "/hello1", "world", 1); addNode(storage, "/hello1", "world", 1);
addNode(storage, "/hello2", "somedata", 3); addNode(storage, "/hello2", "somedata", 3);
storage.session_id_counter = 5; storage.session_id_counter = 5;
storage.zxid = 2; TSA_SUPPRESS_WARNING_FOR_WRITE(storage.zxid) = 2;
storage.ephemerals[3] = {"/hello2"}; storage.committed_ephemerals[3] = {"/hello2"};
storage.ephemerals[1] = {"/hello1"}; storage.committed_ephemerals[1] = {"/hello1"};
storage.getSessionID(130); storage.getSessionID(130);
storage.getSessionID(130); storage.getSessionID(130);
@ -1601,10 +1602,10 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotSimple)
EXPECT_EQ(restored_storage->container.getValue("/hello1").getData(), "world"); EXPECT_EQ(restored_storage->container.getValue("/hello1").getData(), "world");
EXPECT_EQ(restored_storage->container.getValue("/hello2").getData(), "somedata"); EXPECT_EQ(restored_storage->container.getValue("/hello2").getData(), "somedata");
EXPECT_EQ(restored_storage->session_id_counter, 7); EXPECT_EQ(restored_storage->session_id_counter, 7);
EXPECT_EQ(restored_storage->zxid, 2); EXPECT_EQ(restored_storage->getZXID(), 2);
EXPECT_EQ(restored_storage->ephemerals.size(), 2); EXPECT_EQ(restored_storage->committed_ephemerals.size(), 2);
EXPECT_EQ(restored_storage->ephemerals[3].size(), 1); EXPECT_EQ(restored_storage->committed_ephemerals[3].size(), 1);
EXPECT_EQ(restored_storage->ephemerals[1].size(), 1); EXPECT_EQ(restored_storage->committed_ephemerals[1].size(), 1);
EXPECT_EQ(restored_storage->session_and_timeout.size(), 2); EXPECT_EQ(restored_storage->session_and_timeout.size(), 2);
} }
@ -2027,7 +2028,7 @@ TYPED_TEST(CoordinationTest, TestEphemeralNodeRemove)
state_machine->commit(1, entry_c->get_buf()); state_machine->commit(1, entry_c->get_buf());
const auto & storage = state_machine->getStorageUnsafe(); const auto & storage = state_machine->getStorageUnsafe();
EXPECT_EQ(storage.ephemerals.size(), 1); EXPECT_EQ(storage.committed_ephemerals.size(), 1);
std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>(); std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>();
request_d->path = "/hello"; request_d->path = "/hello";
/// Delete from other session /// Delete from other session
@ -2035,7 +2036,7 @@ TYPED_TEST(CoordinationTest, TestEphemeralNodeRemove)
state_machine->pre_commit(2, entry_d->get_buf()); state_machine->pre_commit(2, entry_d->get_buf());
state_machine->commit(2, entry_d->get_buf()); state_machine->commit(2, entry_d->get_buf());
EXPECT_EQ(storage.ephemerals.size(), 0); EXPECT_EQ(storage.committed_ephemerals.size(), 0);
} }
@ -2590,9 +2591,9 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotDifferentCompressions)
addNode(storage, "/hello1", "world", 1); addNode(storage, "/hello1", "world", 1);
addNode(storage, "/hello2", "somedata", 3); addNode(storage, "/hello2", "somedata", 3);
storage.session_id_counter = 5; storage.session_id_counter = 5;
storage.zxid = 2; TSA_SUPPRESS_WARNING_FOR_WRITE(storage.zxid) = 2;
storage.ephemerals[3] = {"/hello2"}; storage.committed_ephemerals[3] = {"/hello2"};
storage.ephemerals[1] = {"/hello1"}; storage.committed_ephemerals[1] = {"/hello1"};
storage.getSessionID(130); storage.getSessionID(130);
storage.getSessionID(130); storage.getSessionID(130);
@ -2617,10 +2618,10 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotDifferentCompressions)
EXPECT_EQ(restored_storage->container.getValue("/hello1").getData(), "world"); EXPECT_EQ(restored_storage->container.getValue("/hello1").getData(), "world");
EXPECT_EQ(restored_storage->container.getValue("/hello2").getData(), "somedata"); EXPECT_EQ(restored_storage->container.getValue("/hello2").getData(), "somedata");
EXPECT_EQ(restored_storage->session_id_counter, 7); EXPECT_EQ(restored_storage->session_id_counter, 7);
EXPECT_EQ(restored_storage->zxid, 2); EXPECT_EQ(restored_storage->getZXID(), 2);
EXPECT_EQ(restored_storage->ephemerals.size(), 2); EXPECT_EQ(restored_storage->committed_ephemerals.size(), 2);
EXPECT_EQ(restored_storage->ephemerals[3].size(), 1); EXPECT_EQ(restored_storage->committed_ephemerals[3].size(), 1);
EXPECT_EQ(restored_storage->ephemerals[1].size(), 1); EXPECT_EQ(restored_storage->committed_ephemerals[1].size(), 1);
EXPECT_EQ(restored_storage->session_and_timeout.size(), 2); EXPECT_EQ(restored_storage->session_and_timeout.size(), 2);
} }
@ -2805,13 +2806,13 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotEqual)
storage.session_id_counter = 5; storage.session_id_counter = 5;
storage.ephemerals[3] = {"/hello"}; storage.committed_ephemerals[3] = {"/hello"};
storage.ephemerals[1] = {"/hello/somepath"}; storage.committed_ephemerals[1] = {"/hello/somepath"};
for (size_t j = 0; j < 3333; ++j) for (size_t j = 0; j < 3333; ++j)
storage.getSessionID(130 * j); storage.getSessionID(130 * j);
DB::KeeperStorageSnapshot<Storage> snapshot(&storage, storage.zxid); DB::KeeperStorageSnapshot<Storage> snapshot(&storage, storage.getZXID());
auto buf = manager.serializeSnapshotToBuffer(snapshot); auto buf = manager.serializeSnapshotToBuffer(snapshot);
@ -3315,7 +3316,7 @@ TYPED_TEST(CoordinationTest, TestCheckNotExistsRequest)
create_path("/test_node"); create_path("/test_node");
auto node_it = storage.container.find("/test_node"); auto node_it = storage.container.find("/test_node");
ASSERT_NE(node_it, storage.container.end()); ASSERT_NE(node_it, storage.container.end());
auto node_version = node_it->value.version; auto node_version = node_it->value.stats.version;
{ {
SCOPED_TRACE("CheckNotExists returns ZNODEEXISTS"); SCOPED_TRACE("CheckNotExists returns ZNODEEXISTS");
@ -3566,12 +3567,12 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveRequest)
{ {
SCOPED_TRACE("Recursive Remove Ephemeral"); SCOPED_TRACE("Recursive Remove Ephemeral");
create("/T7", zkutil::CreateMode::Ephemeral); create("/T7", zkutil::CreateMode::Ephemeral);
ASSERT_EQ(storage.ephemerals.size(), 1); ASSERT_EQ(storage.committed_ephemerals.size(), 1);
auto responses = remove_recursive("/T7", 100); auto responses = remove_recursive("/T7", 100);
ASSERT_EQ(responses.size(), 1); ASSERT_EQ(responses.size(), 1);
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK); ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK);
ASSERT_EQ(storage.ephemerals.size(), 0); ASSERT_EQ(storage.committed_ephemerals.size(), 0);
ASSERT_FALSE(exists("/T7")); ASSERT_FALSE(exists("/T7"));
} }
@ -3581,12 +3582,12 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveRequest)
create("/T8/A", zkutil::CreateMode::Persistent); create("/T8/A", zkutil::CreateMode::Persistent);
create("/T8/B", zkutil::CreateMode::Ephemeral); create("/T8/B", zkutil::CreateMode::Ephemeral);
create("/T8/A/C", zkutil::CreateMode::Ephemeral); create("/T8/A/C", zkutil::CreateMode::Ephemeral);
ASSERT_EQ(storage.ephemerals.size(), 1); ASSERT_EQ(storage.committed_ephemerals.size(), 1);
auto responses = remove_recursive("/T8", 4); auto responses = remove_recursive("/T8", 4);
ASSERT_EQ(responses.size(), 1); ASSERT_EQ(responses.size(), 1);
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK); ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK);
ASSERT_EQ(storage.ephemerals.size(), 0); ASSERT_EQ(storage.committed_ephemerals.size(), 0);
ASSERT_FALSE(exists("/T8")); ASSERT_FALSE(exists("/T8"));
ASSERT_FALSE(exists("/T8/A")); ASSERT_FALSE(exists("/T8/A"));
ASSERT_FALSE(exists("/T8/B")); ASSERT_FALSE(exists("/T8/B"));
@ -3738,6 +3739,72 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveInMultiRequest)
ASSERT_FALSE(exists("/A/B")); ASSERT_FALSE(exists("/A/B"));
ASSERT_FALSE(exists("/A/B/D")); ASSERT_FALSE(exists("/A/B/D"));
} }
{
SCOPED_TRACE("Recursive Remove For Subtree With Updated Node");
int create_zxid = ++zxid;
auto ops = prepare_create_tree();
/// First create nodes
const auto create_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(create_request, 1, 0, create_zxid);
auto create_responses = storage.processRequest(create_request, 1, create_zxid);
ASSERT_EQ(create_responses.size(), 1);
ASSERT_TRUE(is_multi_ok(create_responses[0].response));
/// Small limit
int remove_zxid = ++zxid;
ops = {
zkutil::makeSetRequest("/A/B", "", -1),
zkutil::makeRemoveRecursiveRequest("/A", 3),
};
auto remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(remove_request, 1, 0, remove_zxid);
auto remove_responses = storage.processRequest(remove_request, 1, remove_zxid);
ASSERT_EQ(remove_responses.size(), 1);
ASSERT_FALSE(is_multi_ok(remove_responses[0].response));
/// Big limit
remove_zxid = ++zxid;
ops[1] = zkutil::makeRemoveRecursiveRequest("/A", 4);
remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(remove_request, 1, 0, remove_zxid);
remove_responses = storage.processRequest(remove_request, 1, remove_zxid);
ASSERT_EQ(remove_responses.size(), 1);
ASSERT_TRUE(is_multi_ok(remove_responses[0].response));
ASSERT_FALSE(exists("/A"));
ASSERT_FALSE(exists("/A/C"));
ASSERT_FALSE(exists("/A/B"));
ASSERT_FALSE(exists("/A/B/D"));
}
{
SCOPED_TRACE("[BUG] Recursive Remove Level Sorting");
int new_zxid = ++zxid;
Coordination::Requests ops = {
zkutil::makeCreateRequest("/a", "", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/a/bbbbbb", "", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/A", "", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/A/B", "", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest("/A/CCCCCCCCCCCC", "", zkutil::CreateMode::Persistent),
zkutil::makeRemoveRecursiveRequest("/A", 3),
};
auto remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
storage.preprocessRequest(remove_request, 1, 0, new_zxid);
auto remove_responses = storage.processRequest(remove_request, 1, new_zxid);
ASSERT_EQ(remove_responses.size(), 1);
ASSERT_TRUE(is_multi_ok(remove_responses[0].response));
ASSERT_TRUE(exists("/a"));
ASSERT_TRUE(exists("/a/bbbbbb"));
ASSERT_FALSE(exists("/A"));
ASSERT_FALSE(exists("/A/B"));
ASSERT_FALSE(exists("/A/CCCCCCCCCCCC"));
}
} }
TYPED_TEST(CoordinationTest, TestRemoveRecursiveWatches) TYPED_TEST(CoordinationTest, TestRemoveRecursiveWatches)
@ -3823,14 +3890,26 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveWatches)
auto responses = storage.processRequest(remove_request, 1, new_zxid); auto responses = storage.processRequest(remove_request, 1, new_zxid);
ASSERT_EQ(responses.size(), 7); ASSERT_EQ(responses.size(), 7);
/// request response is last
ASSERT_EQ(dynamic_cast<Coordination::ZooKeeperWatchResponse *>(responses.back().response.get()), nullptr);
for (size_t i = 0; i < 7; ++i) std::unordered_map<std::string, std::vector<Coordination::Event>> expected_watch_responses
{
{"/A/B/D", {Coordination::Event::DELETED}},
{"/A/B", {Coordination::Event::CHILD, Coordination::Event::DELETED}},
{"/A/C", {Coordination::Event::DELETED}},
{"/A", {Coordination::Event::CHILD, Coordination::Event::DELETED}},
};
std::unordered_map<std::string, std::vector<Coordination::Event>> actual_watch_responses;
for (size_t i = 0; i < 6; ++i)
{ {
ASSERT_EQ(responses[i].response->error, Coordination::Error::ZOK); ASSERT_EQ(responses[i].response->error, Coordination::Error::ZOK);
if (const auto * watch_response = dynamic_cast<Coordination::ZooKeeperWatchResponse *>(responses[i].response.get())) const auto & watch_response = dynamic_cast<Coordination::ZooKeeperWatchResponse &>(*responses[i].response);
ASSERT_EQ(watch_response->type, Coordination::Event::DELETED); actual_watch_responses[watch_response.path].push_back(static_cast<Coordination::Event>(watch_response.type));
} }
ASSERT_EQ(expected_watch_responses, actual_watch_responses);
ASSERT_EQ(storage.watches.size(), 0); ASSERT_EQ(storage.watches.size(), 0);
ASSERT_EQ(storage.list_watches.size(), 0); ASSERT_EQ(storage.list_watches.size(), 0);

View File

@ -33,7 +33,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING = 54
static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = 1; static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = 1;
static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3; static constexpr auto DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3;
static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 4;
static constexpr auto DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453; static constexpr auto DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453;
static constexpr auto DBMS_MERGE_TREE_PART_INFO_VERSION = 1; static constexpr auto DBMS_MERGE_TREE_PART_INFO_VERSION = 1;
@ -86,6 +87,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION = 54469;
/// Packets size header /// Packets size header
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54470; static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54470;
static constexpr auto DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL = 54471;
/// Version of ClickHouse TCP protocol. /// Version of ClickHouse TCP protocol.
/// ///
/// Should be incremented manually on protocol changes. /// Should be incremented manually on protocol changes.
@ -93,6 +96,6 @@ static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54470;
/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
/// later is just a number for server version (one number instead of commit SHA) /// later is just a number for server version (one number instead of commit SHA)
/// for simplicity (sometimes it may be more convenient in some use cases). /// for simplicity (sometimes it may be more convenient in some use cases).
static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54470; static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54471;
} }

View File

@ -947,7 +947,7 @@ class IColumn;
M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ M(UInt64, parallel_replicas_mark_segment_size, 0, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing. Value should be in range [128; 16384]", 0) \
M(Bool, allow_archive_path_syntax, true, "File/S3 engines/table function will parse paths with '::' as '<archive> :: <file>' if archive has correct extension", 0) \ M(Bool, allow_archive_path_syntax, true, "File/S3 engines/table function will parse paths with '::' as '<archive> :: <file>' if archive has correct extension", 0) \
M(Bool, parallel_replicas_local_plan, false, "Build local plan for local replica", 0) \ M(Bool, parallel_replicas_local_plan, false, "Build local plan for local replica", 0) \
\ \
@ -1272,6 +1272,7 @@ class IColumn;
M(Bool, output_format_orc_string_as_string, true, "Use ORC String type instead of Binary for String columns", 0) \ M(Bool, output_format_orc_string_as_string, true, "Use ORC String type instead of Binary for String columns", 0) \
M(ORCCompression, output_format_orc_compression_method, "zstd", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \ M(ORCCompression, output_format_orc_compression_method, "zstd", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
M(UInt64, output_format_orc_row_index_stride, 10'000, "Target row index stride in ORC output format", 0) \ M(UInt64, output_format_orc_row_index_stride, 10'000, "Target row index stride in ORC output format", 0) \
M(Double, output_format_orc_dictionary_key_size_threshold, 0.0, "For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled", 0) \
\ \
M(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \ M(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \
\ \

View File

@ -71,6 +71,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
}, },
{"24.9", {"24.9",
{ {
{"output_format_orc_dictionary_key_size_threshold", 0.0, 0.0, "For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled"},
{"input_format_json_empty_as_default", false, false, "Added new setting to allow to treat empty fields in JSON input as default values."}, {"input_format_json_empty_as_default", false, false, "Added new setting to allow to treat empty fields in JSON input as default values."},
{"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."}, {"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
@ -78,6 +79,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"}, {"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
{"output_format_always_quote_identifiers", false, false, "New setting."}, {"output_format_always_quote_identifiers", false, false, "New setting."},
{"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."}, {"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
{"parallel_replicas_mark_segment_size", 128, 0, "Value for this setting now determined automatically"},
{"database_replicated_allow_replicated_engine_arguments", 1, 0, "Don't allow explicit arguments by default"}, {"database_replicated_allow_replicated_engine_arguments", 1, 0, "Don't allow explicit arguments by default"},
{"database_replicated_allow_explicit_uuid", 0, 0, "Added a new setting to disallow explicitly specifying table UUID"}, {"database_replicated_allow_explicit_uuid", 0, 0, "Added a new setting to disallow explicitly specifying table UUID"},
{"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"}, {"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"},

View File

@ -59,35 +59,27 @@ void cckMetadataPathForOrdinary(const ASTCreateQuery & create, const String & me
} }
/// validate validates the database engine that's specified in the create query for void DatabaseFactory::validate(const ASTCreateQuery & create_query) const
/// engine arguments, settings and table overrides.
void validate(const ASTCreateQuery & create_query)
{ {
auto * storage = create_query.storage; auto * storage = create_query.storage;
/// Check engine may have arguments
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
const String & engine_name = storage->engine->name; const String & engine_name = storage->engine->name;
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name); const EngineFeatures & engine_features = database_engines.at(engine_name).features;
if (storage->engine->arguments && !engine_may_have_arguments) /// Check engine may have arguments
if (storage->engine->arguments && !engine_features.supports_arguments)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have arguments", engine_name); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have arguments", engine_name);
/// Check engine may have settings /// Check engine may have settings
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
bool has_unexpected_element = storage->engine->parameters || storage->partition_by || bool has_unexpected_element = storage->engine->parameters || storage->partition_by ||
storage->primary_key || storage->order_by || storage->primary_key || storage->order_by ||
storage->sample_by; storage->sample_by;
if (has_unexpected_element || (!may_have_settings && storage->settings)) if (has_unexpected_element || (!engine_features.supports_settings && storage->settings))
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST, throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST,
"Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name); "Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name);
/// Check engine with table overrides /// Check engine with table overrides
static const std::unordered_set<std::string_view> engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"}; if (create_query.table_overrides && !engine_features.supports_table_overrides)
if (create_query.table_overrides && !engines_with_table_overrides.contains(engine_name))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have table overrides", engine_name); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have table overrides", engine_name);
} }
@ -121,9 +113,9 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m
return impl; return impl;
} }
void DatabaseFactory::registerDatabase(const std::string & name, CreatorFn creator_fn) void DatabaseFactory::registerDatabase(const std::string & name, CreatorFn creator_fn, EngineFeatures features)
{ {
if (!database_engines.emplace(name, std::move(creator_fn)).second) if (!database_engines.emplace(name, Creator{std::move(creator_fn), features}).second)
throw Exception(ErrorCodes::LOGICAL_ERROR, "DatabaseFactory: the database engine name '{}' is not unique", name); throw Exception(ErrorCodes::LOGICAL_ERROR, "DatabaseFactory: the database engine name '{}' is not unique", name);
} }
@ -154,7 +146,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
.context = context}; .context = context};
// creator_fn creates and returns a DatabasePtr with the supplied arguments // creator_fn creates and returns a DatabasePtr with the supplied arguments
auto creator_fn = database_engines.at(engine_name); auto creator_fn = database_engines.at(engine_name).creator_fn;
return creator_fn(arguments); return creator_fn(arguments);
} }

View File

@ -43,13 +43,30 @@ public:
ContextPtr & context; ContextPtr & context;
}; };
DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context); struct EngineFeatures
{
bool supports_arguments = false;
bool supports_settings = false;
bool supports_table_overrides = false;
};
using CreatorFn = std::function<DatabasePtr(const Arguments & arguments)>; using CreatorFn = std::function<DatabasePtr(const Arguments & arguments)>;
using DatabaseEngines = std::unordered_map<std::string, CreatorFn>; struct Creator
{
CreatorFn creator_fn;
EngineFeatures features;
};
void registerDatabase(const std::string & name, CreatorFn creator_fn); DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
using DatabaseEngines = std::unordered_map<std::string, Creator>;
void registerDatabase(const std::string & name, CreatorFn creator_fn, EngineFeatures features = EngineFeatures{
.supports_arguments = false,
.supports_settings = false,
.supports_table_overrides = false,
});
const DatabaseEngines & getDatabaseEngines() const { return database_engines; } const DatabaseEngines & getDatabaseEngines() const { return database_engines; }
@ -65,6 +82,10 @@ private:
DatabaseEngines database_engines; DatabaseEngines database_engines;
DatabasePtr getImpl(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context); DatabasePtr getImpl(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
/// validate validates the database engine that's specified in the create query for
/// engine arguments, settings and table overrides.
void validate(const ASTCreateQuery & create_query) const;
}; };
} }

View File

@ -257,6 +257,6 @@ void registerDatabaseFilesystem(DatabaseFactory & factory)
return std::make_shared<DatabaseFilesystem>(args.database_name, init_path, args.context); return std::make_shared<DatabaseFilesystem>(args.database_name, init_path, args.context);
}; };
factory.registerDatabase("Filesystem", create_fn); factory.registerDatabase("Filesystem", create_fn, {.supports_arguments = true});
} }
} }

View File

@ -253,7 +253,7 @@ void registerDatabaseHDFS(DatabaseFactory & factory)
return std::make_shared<DatabaseHDFS>(args.database_name, source_url, args.context); return std::make_shared<DatabaseHDFS>(args.database_name, source_url, args.context);
}; };
factory.registerDatabase("HDFS", create_fn); factory.registerDatabase("HDFS", create_fn, {.supports_arguments = true});
} }
} // DB } // DB

View File

@ -398,6 +398,6 @@ void registerDatabaseLazy(DatabaseFactory & factory)
cache_expiration_time_seconds, cache_expiration_time_seconds,
args.context); args.context);
}; };
factory.registerDatabase("Lazy", create_fn); factory.registerDatabase("Lazy", create_fn, {.supports_arguments = true});
} }
} }

View File

@ -2001,6 +2001,6 @@ void registerDatabaseReplicated(DatabaseFactory & factory)
replica_name, replica_name,
std::move(database_replicated_settings), args.context); std::move(database_replicated_settings), args.context);
}; };
factory.registerDatabase("Replicated", create_fn); factory.registerDatabase("Replicated", create_fn, {.supports_arguments = true, .supports_settings = true});
} }
} }

View File

@ -326,7 +326,7 @@ void registerDatabaseS3(DatabaseFactory & factory)
return std::make_shared<DatabaseS3>(args.database_name, config, args.context); return std::make_shared<DatabaseS3>(args.database_name, config, args.context);
}; };
factory.registerDatabase("S3", create_fn); factory.registerDatabase("S3", create_fn, {.supports_arguments = true});
} }
} }
#endif #endif

View File

@ -290,8 +290,14 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory)
binlog_client, binlog_client,
std::move(materialize_mode_settings)); std::move(materialize_mode_settings));
}; };
factory.registerDatabase("MaterializeMySQL", create_fn);
factory.registerDatabase("MaterializedMySQL", create_fn); DatabaseFactory::EngineFeatures features{
.supports_arguments = true,
.supports_settings = true,
.supports_table_overrides = true,
};
factory.registerDatabase("MaterializeMySQL", create_fn, features);
factory.registerDatabase("MaterializedMySQL", create_fn, features);
} }
} }

View File

@ -584,7 +584,7 @@ void registerDatabaseMySQL(DatabaseFactory & factory)
throw Exception(ErrorCodes::CANNOT_CREATE_DATABASE, "Cannot create MySQL database, because {}", exception_message); throw Exception(ErrorCodes::CANNOT_CREATE_DATABASE, "Cannot create MySQL database, because {}", exception_message);
} }
}; };
factory.registerDatabase("MySQL", create_fn); factory.registerDatabase("MySQL", create_fn, {.supports_arguments = true, .supports_settings = true});
} }
} }

View File

@ -546,7 +546,11 @@ void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory)
args.database_name, configuration.database, connection_info, args.database_name, configuration.database, connection_info,
std::move(postgresql_replica_settings)); std::move(postgresql_replica_settings));
}; };
factory.registerDatabase("MaterializedPostgreSQL", create_fn); factory.registerDatabase("MaterializedPostgreSQL", create_fn, {
.supports_arguments = true,
.supports_settings = true,
.supports_table_overrides = true,
});
} }
} }

View File

@ -558,7 +558,7 @@ void registerDatabasePostgreSQL(DatabaseFactory & factory)
pool, pool,
use_table_cache); use_table_cache);
}; };
factory.registerDatabase("PostgreSQL", create_fn); factory.registerDatabase("PostgreSQL", create_fn, {.supports_arguments = true});
} }
} }

View File

@ -220,7 +220,7 @@ void registerDatabaseSQLite(DatabaseFactory & factory)
return std::make_shared<DatabaseSQLite>(args.context, engine_define, args.create_query.attach, database_path); return std::make_shared<DatabaseSQLite>(args.context, engine_define, args.create_query.attach, database_path);
}; };
factory.registerDatabase("SQLite", create_fn); factory.registerDatabase("SQLite", create_fn, {.supports_arguments = true});
} }
} }

View File

@ -28,6 +28,7 @@ extern const Event CachedReadBufferReadFromCacheMicroseconds;
extern const Event CachedReadBufferCacheWriteMicroseconds; extern const Event CachedReadBufferCacheWriteMicroseconds;
extern const Event CachedReadBufferReadFromSourceBytes; extern const Event CachedReadBufferReadFromSourceBytes;
extern const Event CachedReadBufferReadFromCacheBytes; extern const Event CachedReadBufferReadFromCacheBytes;
extern const Event CachedReadBufferPredownloadedBytes;
extern const Event CachedReadBufferCacheWriteBytes; extern const Event CachedReadBufferCacheWriteBytes;
extern const Event CachedReadBufferCreateBufferMicroseconds; extern const Event CachedReadBufferCreateBufferMicroseconds;
@ -644,6 +645,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
size_t current_predownload_size = std::min(current_impl_buffer_size, bytes_to_predownload); size_t current_predownload_size = std::min(current_impl_buffer_size, bytes_to_predownload);
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size); ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size);
ProfileEvents::increment(ProfileEvents::CachedReadBufferPredownloadedBytes, current_impl_buffer_size);
std::string failure_reason; std::string failure_reason;
bool continue_predownload = file_segment.reserve( bool continue_predownload = file_segment.reserve(

View File

@ -244,6 +244,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string; format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string;
format_settings.orc.output_compression_method = settings.output_format_orc_compression_method; format_settings.orc.output_compression_method = settings.output_format_orc_compression_method;
format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride; format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride;
format_settings.orc.output_dictionary_key_size_threshold = settings.output_format_orc_dictionary_key_size_threshold;
format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder; format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder;
format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down; format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down;
format_settings.orc.reader_time_zone_name = settings.input_format_orc_reader_time_zone_name; format_settings.orc.reader_time_zone_name = settings.input_format_orc_reader_time_zone_name;

View File

@ -415,6 +415,7 @@ struct FormatSettings
bool filter_push_down = true; bool filter_push_down = true;
UInt64 output_row_index_stride = 10'000; UInt64 output_row_index_stride = 10'000;
String reader_time_zone_name = "GMT"; String reader_time_zone_name = "GMT";
double output_dictionary_key_size_threshold = 0.0;
} orc{}; } orc{};
/// For capnProto format we should determine how to /// For capnProto format we should determine how to

View File

@ -15,7 +15,6 @@ namespace ErrorCodes
{ {
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SIZES_OF_ARRAYS_DONT_MATCH; extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
} }
@ -38,13 +37,6 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{ {
if (arguments.empty())
throw Exception(
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
"Function {} needs at least one argument; passed {}.",
getName(),
arguments.size());
DataTypes arguments_types; DataTypes arguments_types;
for (size_t index = 0; index < arguments.size(); ++index) for (size_t index = 0; index < arguments.size(); ++index)
{ {
@ -68,9 +60,16 @@ public:
} }
ColumnPtr ColumnPtr
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
{ {
size_t num_arguments = arguments.size(); size_t num_arguments = arguments.size();
if (num_arguments == 0)
{
auto res_col = result_type->createColumn();
res_col->insertDefault();
return ColumnConst::create(std::move(res_col), input_rows_count);
}
Columns holders(num_arguments); Columns holders(num_arguments);
Columns tuple_columns(num_arguments); Columns tuple_columns(num_arguments);

View File

@ -718,7 +718,12 @@ FileCache::getOrSet(
} }
} }
chassert(file_segments_limit ? file_segments.back()->range().left <= result_range.right : file_segments.back()->range().contains(result_range.right)); chassert(file_segments_limit
? file_segments.back()->range().left <= result_range.right
: file_segments.back()->range().contains(result_range.right),
fmt::format("Unexpected state. Back: {}, result range: {}, limit: {}",
file_segments.back()->range().toString(), result_range.toString(), file_segments_limit));
chassert(!file_segments_limit || file_segments.size() <= file_segments_limit); chassert(!file_segments_limit || file_segments.size() <= file_segments_limit);
return std::make_unique<FileSegmentsHolder>(std::move(file_segments)); return std::make_unique<FileSegmentsHolder>(std::move(file_segments));

View File

@ -532,7 +532,7 @@ void executeQueryWithParallelReplicas(
max_replicas_to_use = shard.getAllNodeCount(); max_replicas_to_use = shard.getAllNodeCount();
} }
auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(max_replicas_to_use, settings.parallel_replicas_mark_segment_size); auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(max_replicas_to_use);
auto external_tables = new_context->getExternalTables(); auto external_tables = new_context->getExternalTables();

View File

@ -338,11 +338,8 @@ size_t HashJoin::getTotalRowCount() const
return res; return res;
} }
size_t HashJoin::getTotalByteCount() const void HashJoin::doDebugAsserts() const
{ {
if (!data)
return 0;
#ifndef NDEBUG #ifndef NDEBUG
size_t debug_blocks_allocated_size = 0; size_t debug_blocks_allocated_size = 0;
for (const auto & block : data->blocks) for (const auto & block : data->blocks)
@ -360,6 +357,14 @@ size_t HashJoin::getTotalByteCount() const
throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})", throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})",
data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size); data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size);
#endif #endif
}
size_t HashJoin::getTotalByteCount() const
{
if (!data)
return 0;
doDebugAsserts();
size_t res = 0; size_t res = 0;
@ -544,9 +549,11 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
have_compressed = true; have_compressed = true;
} }
doDebugAsserts();
data->blocks_allocated_size += block_to_save.allocatedBytes(); data->blocks_allocated_size += block_to_save.allocatedBytes();
data->blocks.emplace_back(std::move(block_to_save)); data->blocks.emplace_back(std::move(block_to_save));
Block * stored_block = &data->blocks.back(); Block * stored_block = &data->blocks.back();
doDebugAsserts();
if (rows) if (rows)
data->empty = false; data->empty = false;
@ -634,9 +641,11 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
if (!flag_per_row && !is_inserted) if (!flag_per_row && !is_inserted)
{ {
doDebugAsserts();
LOG_TRACE(log, "Skipping inserting block with {} rows", rows); LOG_TRACE(log, "Skipping inserting block with {} rows", rows);
data->blocks_allocated_size -= stored_block->allocatedBytes(); data->blocks_allocated_size -= stored_block->allocatedBytes();
data->blocks.pop_back(); data->blocks.pop_back();
doDebugAsserts();
} }
if (!check_limits) if (!check_limits)
@ -683,6 +692,8 @@ void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join, bool force_
for (auto & stored_block : data->blocks) for (auto & stored_block : data->blocks)
{ {
doDebugAsserts();
size_t old_size = stored_block.allocatedBytes(); size_t old_size = stored_block.allocatedBytes();
stored_block = stored_block.shrinkToFit(); stored_block = stored_block.shrinkToFit();
size_t new_size = stored_block.allocatedBytes(); size_t new_size = stored_block.allocatedBytes();
@ -700,6 +711,8 @@ void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join, bool force_
else else
/// Sometimes after clone resized block can be bigger than original /// Sometimes after clone resized block can be bigger than original
data->blocks_allocated_size += new_size - old_size; data->blocks_allocated_size += new_size - old_size;
doDebugAsserts();
} }
auto new_total_bytes_in_join = getTotalByteCount(); auto new_total_bytes_in_join = getTotalByteCount();
@ -1416,7 +1429,13 @@ void HashJoin::tryRerangeRightTableDataImpl(Map & map [[maybe_unused]])
}; };
BlocksList sorted_blocks; BlocksList sorted_blocks;
visit_rows_map(sorted_blocks, map); visit_rows_map(sorted_blocks, map);
doDebugAsserts();
data->blocks.swap(sorted_blocks); data->blocks.swap(sorted_blocks);
size_t new_blocks_allocated_size = 0;
for (const auto & block : data->blocks)
new_blocks_allocated_size += block.allocatedBytes();
data->blocks_allocated_size = new_blocks_allocated_size;
doDebugAsserts();
} }
} }

View File

@ -470,6 +470,7 @@ private:
void tryRerangeRightTableData() override; void tryRerangeRightTableData() override;
template <JoinKind KIND, typename Map, JoinStrictness STRICTNESS> template <JoinKind KIND, typename Map, JoinStrictness STRICTNESS>
void tryRerangeRightTableDataImpl(Map & map); void tryRerangeRightTableDataImpl(Map & map);
void doDebugAsserts() const;
}; };
} }

View File

@ -380,100 +380,99 @@ BlockIO InterpreterDropQuery::executeToDatabase(const ASTDropQuery & query)
BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector<UUID> & uuids_to_wait) BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector<UUID> & uuids_to_wait)
{ {
if (query.kind != ASTDropQuery::Kind::Detach && query.kind != ASTDropQuery::Kind::Drop && query.kind != ASTDropQuery::Kind::Truncate)
return {};
const auto & database_name = query.getDatabase(); const auto & database_name = query.getDatabase();
auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, ""); auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, "");
database = tryGetDatabase(database_name, query.if_exists); database = tryGetDatabase(database_name, query.if_exists);
if (database) if (!database)
return {};
bool drop = query.kind == ASTDropQuery::Kind::Drop;
bool truncate = query.kind == ASTDropQuery::Kind::Truncate;
getContext()->checkAccess(AccessType::DROP_DATABASE, database_name);
if (query.kind == ASTDropQuery::Kind::Detach && query.permanently)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH PERMANENTLY is not implemented for databases");
if (query.if_empty)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases");
if (!truncate && database->hasReplicationThread())
database->stopReplication();
if (database->shouldBeEmptyOnDetach())
{ {
if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop /// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish.
|| query.kind == ASTDropQuery::Kind::Truncate) /// So it will not startup tables concurrently with the flushAndPrepareForShutdown call below.
auto restart_replica_lock = DatabaseCatalog::instance().getLockForDropDatabase(database_name);
ASTDropQuery query_for_table;
query_for_table.kind = query.kind;
// For truncate operation on database, drop the tables
if (truncate)
query_for_table.kind = query.has_all_tables ? ASTDropQuery::Kind::Truncate : ASTDropQuery::Kind::Drop;
query_for_table.if_exists = true;
query_for_table.if_empty = false;
query_for_table.setDatabase(database_name);
query_for_table.sync = query.sync;
/// Flush should not be done if shouldBeEmptyOnDetach() == false,
/// since in this case getTablesIterator() may do some additional work,
/// see DatabaseMaterializedMySQL::getTablesIterator()
auto table_context = Context::createCopy(getContext());
table_context->setInternalQuery(true);
/// Do not hold extra shared pointers to tables
std::vector<std::pair<StorageID, bool>> tables_to_drop;
// NOTE: This means we wait for all tables to be loaded inside getTablesIterator() call in case of `async_load_databases = true`.
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
{ {
bool drop = query.kind == ASTDropQuery::Kind::Drop; auto table_ptr = iterator->table();
bool truncate = query.kind == ASTDropQuery::Kind::Truncate; tables_to_drop.push_back({table_ptr->getStorageID(), table_ptr->isDictionary()});
}
getContext()->checkAccess(AccessType::DROP_DATABASE, database_name); /// Prepare tables for shutdown in parallel.
ThreadPoolCallbackRunnerLocal<void> runner(getDatabaseCatalogDropTablesThreadPool().get(), "DropTables");
if (query.kind == ASTDropQuery::Kind::Detach && query.permanently) for (const auto & [name, _] : tables_to_drop)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH PERMANENTLY is not implemented for databases"); {
auto table_ptr = DatabaseCatalog::instance().getTable(name, table_context);
if (query.if_empty) runner([my_table_ptr = std::move(table_ptr)]()
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases");
if (!truncate && database->hasReplicationThread())
database->stopReplication();
if (database->shouldBeEmptyOnDetach())
{ {
/// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish. my_table_ptr->flushAndPrepareForShutdown();
/// So it will not startup tables concurrently with the flushAndPrepareForShutdown call below. });
auto restart_replica_lock = DatabaseCatalog::instance().getLockForDropDatabase(database_name); }
runner.waitForAllToFinishAndRethrowFirstError();
ASTDropQuery query_for_table; for (const auto & table : tables_to_drop)
query_for_table.kind = query.kind; {
// For truncate operation on database, drop the tables query_for_table.setTable(table.first.getTableName());
if (truncate) query_for_table.is_dictionary = table.second;
query_for_table.kind = query.has_all_tables ? ASTDropQuery::Kind::Truncate : ASTDropQuery::Kind::Drop; DatabasePtr db;
query_for_table.if_exists = true; UUID table_to_wait = UUIDHelpers::Nil;
query_for_table.if_empty = false; executeToTableImpl(table_context, query_for_table, db, table_to_wait);
query_for_table.setDatabase(database_name); uuids_to_wait.push_back(table_to_wait);
query_for_table.sync = query.sync;
/// Flush should not be done if shouldBeEmptyOnDetach() == false,
/// since in this case getTablesIterator() may do some additional work,
/// see DatabaseMaterializedMySQL::getTablesIterator()
auto table_context = Context::createCopy(getContext());
table_context->setInternalQuery(true);
/// Do not hold extra shared pointers to tables
std::vector<std::pair<StorageID, bool>> tables_to_drop;
// NOTE: This means we wait for all tables to be loaded inside getTablesIterator() call in case of `async_load_databases = true`.
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
{
auto table_ptr = iterator->table();
tables_to_drop.push_back({table_ptr->getStorageID(), table_ptr->isDictionary()});
}
/// Prepare tables for shutdown in parallel.
ThreadPoolCallbackRunnerLocal<void> runner(getDatabaseCatalogDropTablesThreadPool().get(), "DropTables");
for (const auto & [name, _] : tables_to_drop)
{
auto table_ptr = DatabaseCatalog::instance().getTable(name, table_context);
runner([my_table_ptr = std::move(table_ptr)]()
{
my_table_ptr->flushAndPrepareForShutdown();
});
}
runner.waitForAllToFinishAndRethrowFirstError();
for (const auto & table : tables_to_drop)
{
query_for_table.setTable(table.first.getTableName());
query_for_table.is_dictionary = table.second;
DatabasePtr db;
UUID table_to_wait = UUIDHelpers::Nil;
executeToTableImpl(table_context, query_for_table, db, table_to_wait);
uuids_to_wait.push_back(table_to_wait);
}
}
// only if operation is DETACH
if ((!drop || !truncate) && query.sync)
{
/// Avoid "some tables are still in use" when sync mode is enabled
for (const auto & table_uuid : uuids_to_wait)
database->waitDetachedTableNotInUse(table_uuid);
}
/// Protects from concurrent CREATE TABLE queries
auto db_guard = DatabaseCatalog::instance().getExclusiveDDLGuardForDatabase(database_name);
// only if operation is DETACH
if (!drop || !truncate)
database->assertCanBeDetached(true);
/// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database.
if (!truncate)
DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach());
} }
} }
// only if operation is DETACH
if ((!drop || !truncate) && query.sync)
{
/// Avoid "some tables are still in use" when sync mode is enabled
for (const auto & table_uuid : uuids_to_wait)
database->waitDetachedTableNotInUse(table_uuid);
}
/// Protects from concurrent CREATE TABLE queries
auto db_guard = DatabaseCatalog::instance().getExclusiveDDLGuardForDatabase(database_name);
// only if operation is DETACH
if (!drop || !truncate)
database->assertCanBeDetached(true);
/// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database.
if (!truncate)
DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach());
return {}; return {};
} }

View File

@ -1,10 +1,42 @@
#pragma once #pragma once
#include <vector> #include <Common/KnownObjectNames.h>
#include <Core/QualifiedTableName.h>
#include <base/defines.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB namespace DB
{ {
class AbstractFunction
{
friend class FunctionSecretArgumentsFinder;
public:
class Argument
{
public:
virtual ~Argument() = default;
virtual std::unique_ptr<AbstractFunction> getFunction() const = 0;
virtual bool isIdentifier() const = 0;
virtual bool tryGetString(String * res, bool allow_identifier) const = 0;
};
class Arguments
{
public:
virtual ~Arguments() = default;
virtual size_t size() const = 0;
virtual std::unique_ptr<Argument> at(size_t n) const = 0;
};
virtual ~AbstractFunction() = default;
virtual String name() const = 0;
bool hasArguments() const { return !!arguments; }
protected:
std::unique_ptr<Arguments> arguments;
};
class FunctionSecretArgumentsFinder class FunctionSecretArgumentsFinder
{ {
public: public:
@ -23,6 +55,485 @@ public:
return count != 0 || !nested_maps.empty(); return count != 0 || !nested_maps.empty();
} }
}; };
explicit FunctionSecretArgumentsFinder(std::unique_ptr<AbstractFunction> && function_) : function(std::move(function_)) {}
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
protected:
const std::unique_ptr<AbstractFunction> function;
Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= function->arguments->size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findOrdinaryFunctionSecretArguments()
{
if ((function->name() == "mysql") || (function->name() == "postgresql") || (function->name() == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((function->name() == "s3") || (function->name() == "cosn") || (function->name() == "oss") ||
(function->name() == "deltaLake") || (function->name() == "hudi") || (function->name() == "iceberg") ||
(function->name() == "gcs"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function->name() == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (function->name() == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function->name() == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((function->name() == "remote") || (function->name() == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((function->name() == "encrypt") || (function->name() == "decrypt") ||
(function->name() == "aes_encrypt_mysql") || (function->name() == "aes_decrypt_mysql") ||
(function->name() == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (function->name() == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
size_t count = function->arguments->size();
while (count > 0)
{
const auto f = function->arguments->at(count - 1)->getFunction();
if (!f)
break;
if (f->name() == "headers")
result.nested_maps.push_back(f->name());
else if (f->name() != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
size_t count = function->arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= function->arguments->size())
return false;
return tryGetStringFromArgument(*function->arguments->at(arg_idx), res, allow_identifier);
}
static bool tryGetStringFromArgument(const AbstractFunction::Argument & argument, String * res, bool allow_identifier = true)
{
return argument.tryGetString(res, allow_identifier);
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (function->arguments->size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
auto table_function = function->arguments->at(arg_num)->getFunction();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name()))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (function->arguments->size() == 0)
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = function->arguments->size() - 1;
}
void findTableEngineSecretArguments()
{
const String & engine_name = function->name();
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
findExternalDistributedTableEngineSecretArguments();
}
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
findS3TableEngineSecretArguments();
}
else if (engine_name == "URL")
{
findURLSecretArguments();
}
}
void findExternalDistributedTableEngineSecretArguments()
{
if (isNamedCollectionName(1))
{
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 2);
}
else
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
markSecretArgument(5);
}
}
void findS3TableEngineSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((3 <= count) && (count <= 4))
{
String second_arg;
if (tryGetStringFromArgument(1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (count == 3)
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
}
}
}
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (2 < count)
markSecretArgument(2);
}
void findDatabaseEngineSecretArguments()
{
const String & engine_name = function->name();
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL"))
{
/// MySQL('host:port', 'database', 'user', 'password')
/// PostgreSQL('host:port', 'database', 'user', 'password')
findMySQLDatabaseSecretArguments();
}
else if (engine_name == "S3")
{
/// S3('url', 'access_key_id', 'secret_access_key')
findS3DatabaseSecretArguments();
}
}
void findMySQLDatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// MySQL(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// MySQL('host:port', 'database', 'user', 'password')
markSecretArgument(3);
}
}
void findS3DatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'password', ...)
findSecretNamedArgument("secret_access_key", 1);
}
else
{
/// S3('url', 'access_key_id', 'secret_access_key')
markSecretArgument(2);
}
}
void findBackupNameSecretArguments()
{
const String & engine_name = function->name();
if (engine_name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
markSecretArgument(2);
}
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (function->arguments->size() <= arg_idx)
return false;
return function->arguments->at(arg_idx)->isIdentifier();
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < function->arguments->size(); ++i)
{
const auto & argument = function->arguments->at(i);
const auto equals_func = argument->getFunction();
if (!equals_func || (equals_func->name() != "equals"))
continue;
if (!equals_func->arguments || equals_func->arguments->size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(*equals_func->arguments->at(0), &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
}; };
} }

View File

@ -1,35 +1,97 @@
#pragma once #pragma once
#include <Parsers/FunctionSecretArgumentsFinder.h> #include <Parsers/FunctionSecretArgumentsFinder.h>
#include <Core/QualifiedTableName.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h> #include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Common/KnownObjectNames.h>
#include <boost/algorithm/string/predicate.hpp>
namespace DB namespace DB
{ {
class FunctionAST : public AbstractFunction
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderAST
{ {
public: public:
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_) class ArgumentAST : public Argument
{ {
if (!function.arguments) public:
explicit ArgumentAST(const IAST * argument_) : argument(argument_) {}
std::unique_ptr<AbstractFunction> getFunction() const override
{
if (const auto * f = argument->as<ASTFunction>())
return std::make_unique<FunctionAST>(*f);
return nullptr;
}
bool isIdentifier() const override { return argument->as<ASTIdentifier>(); }
bool tryGetString(String * res, bool allow_identifier) const override
{
if (const auto * literal = argument->as<ASTLiteral>())
{
if (literal->value.getType() != Field::Types::String)
return false;
if (res)
*res = literal->value.safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument->as<ASTIdentifier>())
{
if (res)
*res = id->name();
return true;
}
}
return false;
}
private:
const IAST * argument = nullptr;
};
class ArgumentsAST : public Arguments
{
public:
explicit ArgumentsAST(const ASTs * arguments_) : arguments(arguments_) {}
size_t size() const override { return arguments ? arguments->size() : 0; }
std::unique_ptr<Argument> at(size_t n) const override
{
return std::make_unique<ArgumentAST>(arguments->at(n).get());
}
private:
const ASTs * arguments = nullptr;
};
explicit FunctionAST(const ASTFunction & function_) : function(&function_)
{
if (!function->arguments)
return; return;
const auto * expr_list = function.arguments->as<ASTExpressionList>(); const auto * expr_list = function->arguments->as<ASTExpressionList>();
if (!expr_list) if (!expr_list)
return; return;
arguments = &expr_list->children; arguments = std::make_unique<ArgumentsAST>(&expr_list->children);
switch (function.kind) }
String name() const override { return function->name; }
private:
const ASTFunction * function = nullptr;
};
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
/// That involves passwords and secret keys.
class FunctionSecretArgumentsFinderAST : public FunctionSecretArgumentsFinder
{
public:
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_)
: FunctionSecretArgumentsFinder(std::make_unique<FunctionAST>(function_))
{
if (!function->hasArguments())
return;
switch (function_.kind)
{ {
case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break;
case ASTFunction::Kind::WINDOW_FUNCTION: break; case ASTFunction::Kind::WINDOW_FUNCTION: break;
@ -43,507 +105,7 @@ public:
} }
FunctionSecretArgumentsFinder::Result getResult() const { return result; } FunctionSecretArgumentsFinder::Result getResult() const { return result; }
private:
const ASTFunction & function;
const ASTs * arguments = nullptr;
FunctionSecretArgumentsFinder::Result result;
void markSecretArgument(size_t index, bool argument_is_named = false)
{
if (index >= arguments->size())
return;
if (!result.count)
{
result.start = index;
result.are_named = argument_is_named;
}
chassert(index >= result.start); /// We always check arguments consecutively
result.count = index + 1 - result.start;
if (!argument_is_named)
result.are_named = false;
}
void findOrdinaryFunctionSecretArguments()
{
if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb"))
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") ||
(function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg") ||
(function.name == "gcs"))
{
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function.name == "s3Cluster")
{
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
}
else if (function.name == "azureBlobStorage")
{
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
}
else if (function.name == "azureBlobStorageCluster")
{
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
}
else if ((function.name == "remote") || (function.name == "remoteSecure"))
{
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
findRemoteFunctionSecretArguments();
}
else if ((function.name == "encrypt") || (function.name == "decrypt") ||
(function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") ||
(function.name == "tryDecrypt"))
{
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
findEncryptionFunctionSecretArguments();
}
else if (function.name == "url")
{
findURLSecretArguments();
}
}
void findMySQLFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// mysql(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
markSecretArgument(4);
}
}
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
/// always be at the end). Marks "headers" as secret, if found.
size_t excludeS3OrURLNestedMaps()
{
size_t count = arguments->size();
while (count > 0)
{
const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
if (!f)
break;
if (f->name == "headers")
result.nested_maps.push_back(f->name);
else if (f->name != "extra_credentials")
break;
count -= 1;
}
return count;
}
void findS3FunctionSecretArguments(bool is_cluster_function)
{
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (url_arg_idx + 2 < count)
markSecretArgument(url_arg_idx + 2);
}
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
{
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
size_t url_arg_idx = is_cluster_function ? 1 : 0;
if (!is_cluster_function && isNamedCollectionName(0))
{
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 1);
return;
}
else if (is_cluster_function && isNamedCollectionName(1))
{
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
findSecretNamedArgument("account_key", 2);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
size_t count = arguments->size();
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
{
String second_arg;
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
}
}
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
if (url_arg_idx + 4 < count)
markSecretArgument(url_arg_idx + 4);
}
void findURLSecretArguments()
{
if (!isNamedCollectionName(0))
excludeS3OrURLNestedMaps();
}
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
{
if (arg_idx >= arguments->size())
return false;
return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier);
}
static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true)
{
if (const auto * literal = argument.as<ASTLiteral>())
{
if (literal->value.getType() != Field::Types::String)
return false;
if (res)
*res = literal->value.safeGet<String>();
return true;
}
if (allow_identifier)
{
if (const auto * id = argument.as<ASTIdentifier>())
{
if (res)
*res = id->name();
return true;
}
}
return false;
}
void findRemoteFunctionSecretArguments()
{
if (isNamedCollectionName(0))
{
/// remote(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
return;
}
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
/// But we should check the number of arguments first because we don't need to do any replacements in case of
/// remote('addresses_expr', db.table)
if (arguments->size() < 3)
return;
size_t arg_num = 1;
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
const auto * table_function = (*arguments)[arg_num]->as<ASTFunction>();
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name))
{
++arg_num;
}
else
{
std::optional<String> database;
std::optional<QualifiedTableName> qualified_table_name;
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
{
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `user`.
markSecretArgument(arg_num + 2);
}
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
{
/// Wipe either `password` or `sharding_key`.
markSecretArgument(arg_num + 3);
}
return;
}
/// Skip the current argument (which is either a database name or a qualified table name).
++arg_num;
if (database)
{
/// Skip the 'table' argument if the previous argument was a database name.
++arg_num;
}
}
/// Skip username.
++arg_num;
/// Do our replacement:
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
/// before wiping it (because the `password` argument is always a literal string).
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
if (can_be_password)
markSecretArgument(arg_num);
}
/// Tries to get either a database name or a qualified table name from an argument.
/// Empty string is also allowed (it means the default database).
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
bool tryGetDatabaseNameOrQualifiedTableName(
size_t arg_idx,
std::optional<String> & res_database,
std::optional<QualifiedTableName> & res_qualified_table_name) const
{
res_database.reset();
res_qualified_table_name.reset();
String str;
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
return false;
if (str.empty())
{
res_database = "";
return true;
}
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
if (!qualified_table_name)
return false;
if (qualified_table_name->database.empty())
res_database = std::move(qualified_table_name->table);
else
res_qualified_table_name = std::move(qualified_table_name);
return true;
}
void findEncryptionFunctionSecretArguments()
{
if (arguments->empty())
return;
/// We replace all arguments after 'mode' with '[HIDDEN]':
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
result.start = 1;
result.count = arguments->size() - 1;
}
void findTableEngineSecretArguments()
{
const String & engine_name = function.name;
if (engine_name == "ExternalDistributed")
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
findExternalDistributedTableEngineSecretArguments();
}
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
{
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
findMySQLFunctionSecretArguments();
}
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
{
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
findS3TableEngineSecretArguments();
}
else if (engine_name == "URL")
{
findURLSecretArguments();
}
}
void findExternalDistributedTableEngineSecretArguments()
{
if (isNamedCollectionName(1))
{
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 2);
}
else
{
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
markSecretArgument(5);
}
}
void findS3TableEngineSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
findSecretNamedArgument("secret_access_key", 1);
return;
}
/// We should check other arguments first because we don't need to do any replacement in case of
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
size_t count = excludeS3OrURLNestedMaps();
if ((3 <= count) && (count <= 4))
{
String second_arg;
if (tryGetStringFromArgument(1, &second_arg))
{
if (boost::iequals(second_arg, "NOSIGN"))
return; /// The argument after 'url' is "NOSIGN".
if (count == 3)
{
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
}
}
}
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
if (2 < count)
markSecretArgument(2);
}
void findDatabaseEngineSecretArguments()
{
const String & engine_name = function.name;
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
(engine_name == "MaterializedPostgreSQL"))
{
/// MySQL('host:port', 'database', 'user', 'password')
/// PostgreSQL('host:port', 'database', 'user', 'password')
findMySQLDatabaseSecretArguments();
}
else if (engine_name == "S3")
{
/// S3('url', 'access_key_id', 'secret_access_key')
findS3DatabaseSecretArguments();
}
}
void findMySQLDatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// MySQL(named_collection, ..., password = 'password', ...)
findSecretNamedArgument("password", 1);
}
else
{
/// MySQL('host:port', 'database', 'user', 'password')
markSecretArgument(3);
}
}
void findS3DatabaseSecretArguments()
{
if (isNamedCollectionName(0))
{
/// S3(named_collection, ..., secret_access_key = 'password', ...)
findSecretNamedArgument("secret_access_key", 1);
}
else
{
/// S3('url', 'access_key_id', 'secret_access_key')
markSecretArgument(2);
}
}
void findBackupNameSecretArguments()
{
const String & engine_name = function.name;
if (engine_name == "S3")
{
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
markSecretArgument(2);
}
}
/// Whether a specified argument can be the name of a named collection?
bool isNamedCollectionName(size_t arg_idx) const
{
if (arguments->size() <= arg_idx)
return false;
const auto * identifier = (*arguments)[arg_idx]->as<ASTIdentifier>();
return identifier != nullptr;
}
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
{
for (size_t i = start; i < arguments->size(); ++i)
{
const auto & argument = (*arguments)[i];
const auto * equals_func = argument->as<ASTFunction>();
if (!equals_func || (equals_func->name != "equals"))
continue;
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
if (!expr_list)
continue;
const auto & equal_args = expr_list->children;
if (equal_args.size() != 2)
continue;
String found_key;
if (!tryGetStringFromArgument(*equal_args[0], &found_key))
continue;
if (found_key == key)
markSecretArgument(i, /* argument_is_named= */ true);
}
}
}; };
} }

View File

@ -778,7 +778,18 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
auto & prewhere_info = table_expression_query_info.prewhere_info; auto & prewhere_info = table_expression_query_info.prewhere_info;
const auto & prewhere_actions = table_expression_data.getPrewhereFilterActions(); const auto & prewhere_actions = table_expression_data.getPrewhereFilterActions();
if (prewhere_actions) std::vector<std::pair<FilterDAGInfo, std::string>> where_filters;
if (prewhere_actions && select_query_options.build_logical_plan)
{
where_filters.emplace_back(
FilterDAGInfo{
prewhere_actions->clone(),
prewhere_actions->getOutputs().at(0)->result_name,
true},
"Prewhere");
}
else if (prewhere_actions)
{ {
prewhere_info = std::make_shared<PrewhereInfo>(); prewhere_info = std::make_shared<PrewhereInfo>();
prewhere_info->prewhere_actions = prewhere_actions->clone(); prewhere_info->prewhere_actions = prewhere_actions->clone();
@ -791,7 +802,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
const auto & columns_names = table_expression_data.getColumnNames(); const auto & columns_names = table_expression_data.getColumnNames();
std::vector<std::pair<FilterDAGInfo, std::string>> where_filters;
const auto add_filter = [&](FilterDAGInfo & filter_info, std::string description) const auto add_filter = [&](FilterDAGInfo & filter_info, std::string description)
{ {
bool is_final = table_expression_query_info.table_expression_modifiers bool is_final = table_expression_query_info.table_expression_modifiers

View File

@ -78,7 +78,9 @@ void ORCOutputStream::write(const void* buf, size_t length)
} }
ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
: IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_) : IOutputFormat(header_, out_)
, format_settings{format_settings_}
, output_stream(out_)
{ {
for (const auto & type : header_.getDataTypes()) for (const auto & type : header_.getDataTypes())
data_types.push_back(recursiveRemoveLowCardinality(type)); data_types.push_back(recursiveRemoveLowCardinality(type));
@ -565,6 +567,7 @@ void ORCBlockOutputFormat::prepareWriter()
schema = orc::createStructType(); schema = orc::createStructType();
options.setCompression(getORCCompression(format_settings.orc.output_compression_method)); options.setCompression(getORCCompression(format_settings.orc.output_compression_method));
options.setRowIndexStride(format_settings.orc.output_row_index_stride); options.setRowIndexStride(format_settings.orc.output_row_index_stride);
options.setDictionaryKeySizeThreshold(format_settings.orc.output_dictionary_key_size_threshold);
size_t columns_count = header.columns(); size_t columns_count = header.columns();
for (size_t i = 0; i != columns_count; ++i) for (size_t i = 0; i != columns_count; ++i)
schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i]))); schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i])));

View File

@ -1,6 +1,8 @@
#include <Processors/QueryPlan/ReadFromMergeTree.h> #include <Processors/QueryPlan/ReadFromMergeTree.h>
#include <Core/Settings.h>
#include <IO/Operators.h> #include <IO/Operators.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/ExpressionAnalyzer.h> #include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterSelectQuery.h> #include <Interpreters/InterpreterSelectQuery.h>
@ -8,6 +10,8 @@
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h> #include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTSelectQuery.h> #include <Parsers/ASTSelectQuery.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
#include <Processors/ConcatProcessor.h> #include <Processors/ConcatProcessor.h>
#include <Processors/Merges/AggregatingSortedTransform.h> #include <Processors/Merges/AggregatingSortedTransform.h>
#include <Processors/Merges/CollapsingSortedTransform.h> #include <Processors/Merges/CollapsingSortedTransform.h>
@ -16,6 +20,7 @@
#include <Processors/Merges/ReplacingSortedTransform.h> #include <Processors/Merges/ReplacingSortedTransform.h>
#include <Processors/Merges/SummingSortedTransform.h> #include <Processors/Merges/SummingSortedTransform.h>
#include <Processors/Merges/VersionedCollapsingTransform.h> #include <Processors/Merges/VersionedCollapsingTransform.h>
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Processors/QueryPlan/PartsSplitter.h> #include <Processors/QueryPlan/PartsSplitter.h>
#include <Processors/Sources/NullSource.h> #include <Processors/Sources/NullSource.h>
#include <Processors/Transforms/ExpressionTransform.h> #include <Processors/Transforms/ExpressionTransform.h>
@ -24,10 +29,11 @@
#include <Processors/Transforms/SelectByIndicesTransform.h> #include <Processors/Transforms/SelectByIndicesTransform.h>
#include <QueryPipeline/QueryPipelineBuilder.h> #include <QueryPipeline/QueryPipelineBuilder.h>
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h> #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
#include <Storages/MergeTree/MergeTreeIndexVectorSimilarity.h>
#include <Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h> #include <Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h>
#include <Storages/MergeTree/MergeTreeReadPool.h> #include <Storages/MergeTree/MergeTreeIndexMinMax.h>
#include <Storages/MergeTree/MergeTreeIndexVectorSimilarity.h>
#include <Storages/MergeTree/MergeTreePrefetchedReadPool.h> #include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
#include <Storages/MergeTree/MergeTreeReadPool.h>
#include <Storages/MergeTree/MergeTreeReadPoolInOrder.h> #include <Storages/MergeTree/MergeTreeReadPoolInOrder.h>
#include <Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h> #include <Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h>
#include <Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h> #include <Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h>
@ -41,11 +47,6 @@
#include <Common/JSONBuilder.h> #include <Common/JSONBuilder.h>
#include <Common/isLocalAddress.h> #include <Common/isLocalAddress.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <Core/Settings.h>
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
#include <Parsers/ExpressionListParsers.h>
#include <Storages/MergeTree/MergeTreeIndexMinMax.h>
#include <algorithm> #include <algorithm>
#include <iterator> #include <iterator>
@ -381,6 +382,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit
.all_callback = all_ranges_callback.value(), .all_callback = all_ranges_callback.value(),
.callback = read_task_callback.value(), .callback = read_task_callback.value(),
.number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica), .number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica),
.total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().at(0).getAllNodeCount(),
}; };
/// We have a special logic for local replica. It has to read less data, because in some cases it should /// We have a special logic for local replica. It has to read less data, because in some cases it should
@ -563,6 +565,7 @@ Pipe ReadFromMergeTree::readInOrder(
.all_callback = all_ranges_callback.value(), .all_callback = all_ranges_callback.value(),
.callback = read_task_callback.value(), .callback = read_task_callback.value(),
.number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica), .number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica),
.total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().at(0).getAllNodeCount(),
}; };
auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier;

View File

@ -552,7 +552,7 @@ static QueryPlanResourceHolder replaceReadingFromTable(QueryPlan::Node & node, Q
{ {
SelectQueryOptions options(QueryProcessingStage::FetchColumns); SelectQueryOptions options(QueryProcessingStage::FetchColumns);
options.ignore_rename_columns = true; options.ignore_rename_columns = true;
InterpreterSelectQueryAnalyzer interpreter(wrapWithUnion(std::move(query)), context, options, column_names); InterpreterSelectQueryAnalyzer interpreter(wrapWithUnion(std::move(query)), context, options);
reading_plan = std::move(interpreter).extractQueryPlan(); reading_plan = std::move(interpreter).extractQueryPlan();
} }
else else

View File

@ -34,8 +34,12 @@ int CertificateReloader::setCertificate(SSL * ssl, const CertificateReloader::Mu
auto current = pdata->data.get(); auto current = pdata->data.get();
if (!current) if (!current)
return -1; return -1;
return setCertificateCallback(ssl, current.get(), log);
}
if (current->certs_chain.empty()) int setCertificateCallback(SSL * ssl, const CertificateReloader::Data * current_data, LoggerPtr log)
{
if (current_data->certs_chain.empty())
return -1; return -1;
if (auto err = SSL_clear_chain_certs(ssl); err != 1) if (auto err = SSL_clear_chain_certs(ssl); err != 1)
@ -43,12 +47,12 @@ int CertificateReloader::setCertificate(SSL * ssl, const CertificateReloader::Mu
LOG_ERROR(log, "Clear certificates {}", Poco::Net::Utility::getLastError()); LOG_ERROR(log, "Clear certificates {}", Poco::Net::Utility::getLastError());
return -1; return -1;
} }
if (auto err = SSL_use_certificate(ssl, const_cast<X509 *>(current->certs_chain[0].certificate())); err != 1) if (auto err = SSL_use_certificate(ssl, const_cast<X509 *>(current_data->certs_chain[0].certificate())); err != 1)
{ {
LOG_ERROR(log, "Use certificate {}", Poco::Net::Utility::getLastError()); LOG_ERROR(log, "Use certificate {}", Poco::Net::Utility::getLastError());
return -1; return -1;
} }
for (auto cert = current->certs_chain.begin() + 1; cert != current->certs_chain.end(); cert++) for (auto cert = current_data->certs_chain.begin() + 1; cert != current_data->certs_chain.end(); cert++)
{ {
if (auto err = SSL_add1_chain_cert(ssl, const_cast<X509 *>(cert->certificate())); err != 1) if (auto err = SSL_add1_chain_cert(ssl, const_cast<X509 *>(cert->certificate())); err != 1)
{ {
@ -56,7 +60,7 @@ int CertificateReloader::setCertificate(SSL * ssl, const CertificateReloader::Mu
return -1; return -1;
} }
} }
if (auto err = SSL_use_PrivateKey(ssl, const_cast<EVP_PKEY *>(static_cast<const EVP_PKEY *>(current->key))); err != 1) if (auto err = SSL_use_PrivateKey(ssl, const_cast<EVP_PKEY *>(static_cast<const EVP_PKEY *>(current_data->key))); err != 1)
{ {
LOG_ERROR(log, "Use private key {}", Poco::Net::Utility::getLastError()); LOG_ERROR(log, "Use private key {}", Poco::Net::Utility::getLastError());
return -1; return -1;

View File

@ -104,6 +104,9 @@ private:
mutable std::mutex data_mutex; mutable std::mutex data_mutex;
}; };
/// A callback for OpenSSL
int setCertificateCallback(SSL * ssl, const CertificateReloader::Data * current_data, LoggerPtr log);
} }
#endif #endif

View File

@ -1271,7 +1271,7 @@ void TCPHandler::sendReadTaskRequestAssumeLocked()
void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRangesAnnouncement announcement) void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRangesAnnouncement announcement)
{ {
writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out); writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out);
announcement.serialize(*out); announcement.serialize(*out, client_parallel_replicas_protocol_version);
out->finishChunk(); out->finishChunk();
out->next(); out->next();
@ -1281,7 +1281,7 @@ void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRanges
void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest request) void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest request)
{ {
writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out); writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out);
request.serialize(*out); request.serialize(*out, client_parallel_replicas_protocol_version);
out->finishChunk(); out->finishChunk();
out->next(); out->next();
@ -1663,6 +1663,9 @@ void TCPHandler::receiveAddendum()
readStringBinary(proto_send_chunked_cl, *in); readStringBinary(proto_send_chunked_cl, *in);
readStringBinary(proto_recv_chunked_cl, *in); readStringBinary(proto_recv_chunked_cl, *in);
} }
if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
readVarUInt(client_parallel_replicas_protocol_version, *in);
} }
@ -1690,6 +1693,8 @@ void TCPHandler::sendHello()
writeVarUInt(VERSION_MAJOR, *out); writeVarUInt(VERSION_MAJOR, *out);
writeVarUInt(VERSION_MINOR, *out); writeVarUInt(VERSION_MINOR, *out);
writeVarUInt(DBMS_TCP_PROTOCOL_VERSION, *out); writeVarUInt(DBMS_TCP_PROTOCOL_VERSION, *out);
if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, *out);
if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE)
writeStringBinary(DateLUT::instance().getTimeZone(), *out); writeStringBinary(DateLUT::instance().getTimeZone(), *out);
if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME) if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME)

View File

@ -191,6 +191,7 @@ private:
UInt64 client_version_minor = 0; UInt64 client_version_minor = 0;
UInt64 client_version_patch = 0; UInt64 client_version_patch = 0;
UInt32 client_tcp_protocol_version = 0; UInt32 client_tcp_protocol_version = 0;
UInt32 client_parallel_replicas_protocol_version = 0;
String proto_send_chunked_cl = "notchunked"; String proto_send_chunked_cl = "notchunked";
String proto_recv_chunked_cl = "notchunked"; String proto_recv_chunked_cl = "notchunked";
String quota_key; String quota_key;

Some files were not shown because too many files have changed in this diff Show More